[Bese-devel] arnesi http patch

Chris Dean ctdean at sokitomi.com
Sat Apr 5 01:44:25 UTC 2008


Below is a patch to slightly extend html entity handling.  

- We add more html entity mappings
- We handle the conversion of numeric html entities.

Cheers,
Chris Dean

diff -rN -u old-arnesi_dev/src/http.lisp new-arnesi_dev/src/http.lisp
--- old-arnesi_dev/src/http.lisp	2008-04-04 17:16:16.000000000 -0700
+++ new-arnesi_dev/src/http.lisp	2008-04-04 17:16:16.000000000 -0700
@@ -126,9 +126,12 @@
 
 (defun make-html-entities ()
   (let ((ht (make-hash-table :test 'equalp)))
-    (flet ((add-mapping (char escaped)
-             (setf (gethash char ht) escaped
-                   (gethash escaped ht) char)))
+    (flet ((add-mapping (char-or-code escaped)
+             (let ((char (if (numberp char-or-code)
+                             (code-char char-or-code)
+                             char-or-code)))
+               (setf (gethash char ht) escaped
+                     (gethash escaped ht) char))))
       (add-mapping #\< "&lt;")
       (add-mapping #\> "&gt;")
       (add-mapping #\& "&amp;")
@@ -143,18 +146,283 @@
       (add-mapping "o`" "&#242;")
       (add-mapping "o'" "&#243;")
       (add-mapping "u`" "&#249;")
-      (add-mapping "u'" "&#250;"))
+      (add-mapping "u'" "&#250;")
+      (add-mapping  160 "&nbsp;")
+      (add-mapping  161 "&iexcl;")
+      (add-mapping  162 "&cent;")
+      (add-mapping  163 "&pound;")
+      (add-mapping  164 "&curren;")
+      (add-mapping  165 "&yen;")
+      (add-mapping  166 "&brvbar;")
+      (add-mapping  167 "&sect;")
+      (add-mapping  168 "&uml;")
+      (add-mapping  169 "&copy;")
+      (add-mapping  170 "&ordf;")
+      (add-mapping  171 "&laquo;")
+      (add-mapping  172 "&not;")
+      (add-mapping  173 "&shy;")
+      (add-mapping  174 "&reg;")
+      (add-mapping  175 "&macr;")
+      (add-mapping  176 "&deg;")
+      (add-mapping  177 "&plusmn;")
+      (add-mapping  178 "&sup2;")
+      (add-mapping  179 "&sup3;")
+      (add-mapping  180 "&acute;")
+      (add-mapping  181 "&micro;")
+      (add-mapping  182 "&para;")
+      (add-mapping  183 "&middot;")
+      (add-mapping  184 "&cedil;")
+      (add-mapping  185 "&sup1;")
+      (add-mapping  186 "&ordm;")
+      (add-mapping  187 "&raquo;")
+      (add-mapping  188 "&frac14;")
+      (add-mapping  189 "&frac12;")
+      (add-mapping  190 "&frac34;")
+      (add-mapping  191 "&iquest;")
+      (add-mapping  192 "&Agrave;")
+      (add-mapping  193 "&Aacute;")
+      (add-mapping  194 "&Acirc;")
+      (add-mapping  195 "&Atilde;")
+      (add-mapping  196 "&Auml;")
+      (add-mapping  197 "&Aring;")
+      (add-mapping  198 "&AElig;")
+      (add-mapping  199 "&Ccedil;")
+      (add-mapping  200 "&Egrave;")
+      (add-mapping  201 "&Eacute;")
+      (add-mapping  202 "&Ecirc;")
+      (add-mapping  203 "&Euml;")
+      (add-mapping  204 "&Igrave;")
+      (add-mapping  205 "&Iacute;")
+      (add-mapping  206 "&Icirc;")
+      (add-mapping  207 "&Iuml;")
+      (add-mapping  208 "&ETH;")
+      (add-mapping  209 "&Ntilde;")
+      (add-mapping  210 "&Ograve;")
+      (add-mapping  211 "&Oacute;")
+      (add-mapping  212 "&Ocirc;")
+      (add-mapping  213 "&Otilde;")
+      (add-mapping  214 "&Ouml;")
+      (add-mapping  215 "&times;")
+      (add-mapping  216 "&Oslash;")
+      (add-mapping  217 "&Ugrave;")
+      (add-mapping  218 "&Uacute;")
+      (add-mapping  219 "&Ucirc;")
+      (add-mapping  220 "&Uuml;")
+      (add-mapping  221 "&Yacute;")
+      (add-mapping  222 "&THORN;")
+      (add-mapping  223 "&szlig;")
+      (add-mapping  224 "&agrave;")
+      (add-mapping  225 "&aacute;")
+      (add-mapping  226 "&acirc;")
+      (add-mapping  227 "&atilde;")
+      (add-mapping  228 "&auml;")
+      (add-mapping  229 "&aring;")
+      (add-mapping  230 "&aelig;")
+      (add-mapping  231 "&ccedil;")
+      (add-mapping  232 "&egrave;")
+      (add-mapping  233 "&eacute;")
+      (add-mapping  234 "&ecirc;")
+      (add-mapping  235 "&euml;")
+      (add-mapping  236 "&igrave;")
+      (add-mapping  237 "&iacute;")
+      (add-mapping  238 "&icirc;")
+      (add-mapping  239 "&iuml;")
+      (add-mapping  240 "&eth;")
+      (add-mapping  241 "&ntilde;")
+      (add-mapping  242 "&ograve;")
+      (add-mapping  243 "&oacute;")
+      (add-mapping  244 "&ocirc;")
+      (add-mapping  245 "&otilde;")
+      (add-mapping  246 "&ouml;")
+      (add-mapping  247 "&divide;")
+      (add-mapping  248 "&oslash;")
+      (add-mapping  249 "&ugrave;")
+      (add-mapping  250 "&uacute;")
+      (add-mapping  251 "&ucirc;")
+      (add-mapping  252 "&uuml;")
+      (add-mapping  253 "&yacute;")
+      (add-mapping  254 "&thorn;")
+      (add-mapping  255 "&yuml;")
+      (add-mapping  338 "&OElig;")
+      (add-mapping  339 "&oelig;")
+      (add-mapping  352 "&Scaron;")
+      (add-mapping  353 "&scaron;")
+      (add-mapping  376 "&Yuml;")
+      (add-mapping  402 "&fnof;")
+      (add-mapping  710 "&circ;")
+      (add-mapping  732 "&tilde;")
+      (add-mapping  913 "&Alpha;")
+      (add-mapping  914 "&Beta;")
+      (add-mapping  915 "&Gamma;")
+      (add-mapping  916 "&Delta;")
+      (add-mapping  917 "&Epsilon;")
+      (add-mapping  918 "&Zeta;")
+      (add-mapping  919 "&Eta;")
+      (add-mapping  920 "&Theta;")
+      (add-mapping  921 "&Iota;")
+      (add-mapping  922 "&Kappa;")
+      (add-mapping  923 "&Lambda;")
+      (add-mapping  924 "&Mu;")
+      (add-mapping  925 "&Nu;")
+      (add-mapping  926 "&Xi;")
+      (add-mapping  927 "&Omicron;")
+      (add-mapping  928 "&Pi;")
+      (add-mapping  929 "&Rho;")
+      (add-mapping  931 "&Sigma;")
+      (add-mapping  932 "&Tau;")
+      (add-mapping  933 "&Upsilon;")
+      (add-mapping  934 "&Phi;")
+      (add-mapping  935 "&Chi;")
+      (add-mapping  936 "&Psi;")
+      (add-mapping  937 "&Omega;")
+      (add-mapping  945 "&alpha;")
+      (add-mapping  946 "&beta;")
+      (add-mapping  947 "&gamma;")
+      (add-mapping  948 "&delta;")
+      (add-mapping  949 "&epsilon;")
+      (add-mapping  950 "&zeta;")
+      (add-mapping  951 "&eta;")
+      (add-mapping  952 "&theta;")
+      (add-mapping  953 "&iota;")
+      (add-mapping  954 "&kappa;")
+      (add-mapping  955 "&lambda;")
+      (add-mapping  956 "&mu;")
+      (add-mapping  957 "&nu;")
+      (add-mapping  958 "&xi;")
+      (add-mapping  959 "&omicron;")
+      (add-mapping  960 "&pi;")
+      (add-mapping  961 "&rho;")
+      (add-mapping  962 "&sigmaf;")
+      (add-mapping  963 "&sigma;")
+      (add-mapping  964 "&tau;")
+      (add-mapping  965 "&upsilon;")
+      (add-mapping  966 "&phi;")
+      (add-mapping  967 "&chi;")
+      (add-mapping  968 "&psi;")
+      (add-mapping  969 "&omega;")
+      (add-mapping  977 "&thetasym;")
+      (add-mapping  978 "&upsih;")
+      (add-mapping  982 "&piv;")
+      (add-mapping 8194 "&ensp;")
+      (add-mapping 8195 "&emsp;")
+      (add-mapping 8201 "&thinsp;")
+      (add-mapping 8204 "&zwnj;")
+      (add-mapping 8205 "&zwj;")
+      (add-mapping 8206 "&lrm;")
+      (add-mapping 8207 "&rlm;")
+      (add-mapping 8211 "&ndash;")
+      (add-mapping 8212 "&mdash;")
+      (add-mapping 8216 "&lsquo;")
+      (add-mapping 8217 "&rsquo;")
+      (add-mapping 8218 "&sbquo;")
+      (add-mapping 8220 "&ldquo;")
+      (add-mapping 8221 "&rdquo;")
+      (add-mapping 8222 "&bdquo;")
+      (add-mapping 8224 "&dagger;")
+      (add-mapping 8225 "&Dagger;")
+      (add-mapping 8226 "&bull;")
+      (add-mapping 8230 "&hellip;")
+      (add-mapping 8240 "&permil;")
+      (add-mapping 8242 "&prime;")
+      (add-mapping 8243 "&Prime;")
+      (add-mapping 8249 "&lsaquo;")
+      (add-mapping 8250 "&rsaquo;")
+      (add-mapping 8254 "&oline;")
+      (add-mapping 8260 "&frasl;")
+      (add-mapping 8364 "&euro;")
+      (add-mapping 8465 "&image;")
+      (add-mapping 8472 "&weierp;")
+      (add-mapping 8476 "&real;")
+      (add-mapping 8482 "&trade;")
+      (add-mapping 8501 "&alefsym;")
+      (add-mapping 8592 "&larr;")
+      (add-mapping 8593 "&uarr;")
+      (add-mapping 8594 "&rarr;")
+      (add-mapping 8595 "&darr;")
+      (add-mapping 8596 "&harr;")
+      (add-mapping 8629 "&crarr;")
+      (add-mapping 8656 "&lArr;")
+      (add-mapping 8657 "&uArr;")
+      (add-mapping 8658 "&rArr;")
+      (add-mapping 8659 "&dArr;")
+      (add-mapping 8660 "&hArr;")
+      (add-mapping 8704 "&forall;")
+      (add-mapping 8706 "&part;")
+      (add-mapping 8707 "&exist;")
+      (add-mapping 8709 "&empty;")
+      (add-mapping 8711 "&nabla;")
+      (add-mapping 8712 "&isin;")
+      (add-mapping 8713 "&notin;")
+      (add-mapping 8715 "&ni;")
+      (add-mapping 8719 "&prod;")
+      (add-mapping 8721 "&sum;")
+      (add-mapping 8722 "&minus;")
+      (add-mapping 8727 "&lowast;")
+      (add-mapping 8730 "&radic;")
+      (add-mapping 8733 "&prop;")
+      (add-mapping 8734 "&infin;")
+      (add-mapping 8736 "&ang;")
+      (add-mapping 8743 "&and;")
+      (add-mapping 8744 "&or;")
+      (add-mapping 8745 "&cap;")
+      (add-mapping 8746 "&cup;")
+      (add-mapping 8747 "&int;")
+      (add-mapping 8756 "&there4;")
+      (add-mapping 8764 "&sim;")
+      (add-mapping 8773 "&cong;")
+      (add-mapping 8776 "&asymp;")
+      (add-mapping 8800 "&ne;")
+      (add-mapping 8801 "&equiv;")
+      (add-mapping 8804 "&le;")
+      (add-mapping 8805 "&ge;")
+      (add-mapping 8834 "&sub;")
+      (add-mapping 8835 "&sup;")
+      (add-mapping 8836 "&nsub;")
+      (add-mapping 8838 "&sube;")
+      (add-mapping 8839 "&supe;")
+      (add-mapping 8853 "&oplus;")
+      (add-mapping 8855 "&otimes;")
+      (add-mapping 8869 "&perp;")
+      (add-mapping 8901 "&sdot;")
+      (add-mapping 8968 "&lceil;")
+      (add-mapping 8969 "&rceil;")
+      (add-mapping 8970 "&lfloor;")
+      (add-mapping 8971 "&rfloor;")
+      (add-mapping 9001 "&lang;")
+      (add-mapping 9002 "&rang;")
+      (add-mapping 9674 "&loz;")
+      (add-mapping 9824 "&spades;")
+      (add-mapping 9827 "&clubs;")
+      (add-mapping 9829 "&hearts;")
+      (add-mapping 9830 "&diams;"))
+
     ht))
 
 (defparameter *html-entites* (make-html-entities))
 
+(defun numeric-html-entity-value (s)
+  (let ((len (length s)))
+    (and (> len 3)
+         (char-equal (char s 0) #\&)
+         (char-equal (char s 1) #\#)
+         (char-equal (char s (1- len)) #\;)
+         (if (char-equal (char s 2) #\x)
+             (and (every (lambda (ch) (digit-char-p ch 16)) 
+                         (subseq s 3 (1- len)))
+                  (parse-integer s :start 3 :end (1- len) :radix 16))
+             (and (every #'digit-char-p 
+                         (subseq s 2 (1- len)))
+                  (parse-integer s :start 2 :end (1- len)))))))
+
 (defun html-entity->char (entity &optional (default #\?))
   (let ((res (gethash entity *html-entites*)))
     (if res
         (if (stringp res)
             (char res 0)
             res)
-        default)))
+        (aif (numeric-html-entity-value entity)
+             (code-char it)
+             default))))
 
 (defun write-as-html (string &key (stream t) (escape-whitespace nil))
   (loop




More information about the bese-devel mailing list