;;;; -- HTML Pretty Printer ------------------------------------------------------------------- (defvar *pp-html-ascii-p* nil "Stick to US-ASCII, hence 7 bit.") (defvar *pp-true-html-p* nil "Use SGML features of HTML, like .") (defvar *pp-numeric-entities-p* nil "Besides <, > and & use only numeric entity references.") (defvar *pp-html-escape-quot-p* nil "Also escape the #\\\" characters; set when grinding attribute values.") (defun pprint-pcdata (string stream &key (space-handler nil)) (loop for c across string do (cond ((eql #\space c) (princ c stream) (when space-handler (funcall space-handler stream))) ((eql #\& c) (princ "&" stream)) ((eql #\< c) (princ "<" stream)) ((eql #\> c) (princ ">" stream)) ((and (eql #\" c) *pp-html-escape-quot-p*) (princ """ stream)) ;; Escape any control characters and NBSP and the ;; characters, which differ between 8859-1 and -15 ;; Further note that Microsofts code page 1252 agrees to ;; 8859-1 in positions #xA0 to #xFF. ((or ;; strict ASCII, recommended when generating pages: (and *pp-html-ascii-p* (>= (char-code c) 127)) (>= (char-code c) 256) (<= 128 (char-code c) 160) ;; Skip incompatibilities between Latin-1 and Latin-15 (member (char-code c) '(#xA4 #xA6 #xA8 #xB4 #xB8 #xBC #xBD #xBE))) ;; (let ((ent (and (not *pp-numeric-entities-p*) (find-character-entity c)))) (cond ((null ent) (format stream "&#~D;" (char-code c))) (t (format stream "&~A;" ent))))) (t (princ c stream))))) (defun find-character-entity (char) "When there is a character entity, which expands to the character /char/ returns its name (without &;) as a string." (car (rassoc (string char) (sgml::dtd-entities closure-html:*html-dtd*) :test #'string=))) (defparameter +block-elements+ '(:p :h1 :h2 :h3 :h4 :h5 :h6 :ul :ol :li :dl :dd :dt :table :tbody :tr :td :center :div :body :html :head :meta :link :table :tbody :tr :td :th :form :select :option :script :style)) (defun pprint-html (node stream) (labels ((pr-start (node stream) "Prints the start tag" (let ((element-definition (gethash (gi node) (sgml::dtd-elements CLOSURE-HTML:*HTML-DTD*)))) (pprint-logical-block (stream (attrs node) :prefix (format nil "<~A~A" (gi node) (if (attrs node) " " "")) :suffix ">") (loop for i from 0 do (pprint-exit-if-list-exhausted) (unless (zerop i) (princ " " stream) (pprint-newline :linear stream)) (let ((next (pprint-pop))) (destructuring-bind (attr value) next (let (it) (cond ((and *pp-true-html-p* (setf it (find-value-short-hand element-definition value))) (format stream "~A" it)) (t (format stream "~A=~S" attr (cond ((stringp value) (with-output-to-string (bag) (let ((*pp-html-escape-quot-p* t)) (pprint-pcdata (cadr next) bag)))) (t value)))))))))))) ;; (find-value-short-hand (element-definition value) (and element-definition (and (find-if (lambda (attr-def) (and (consp (cadr attr-def)) (member value (cadr attr-def) :test (lambda (x y) (and (or (stringp x) (symbolp x)) (or (stringp y) (symbolp y)) (string-equal x y)))))) (sgml::element-attlist element-definition)) (string-upcase (string value))))) ;; (end-tag-p (node) (or (not (sgml::element-oend? (gethash (gi node) (sgml::dtd-elements CLOSURE-HTML:*HTML-DTD*)))) (member (gi node) '(:html :body :tr)))) ;; (pr-end (node stream) (when (end-tag-p node) ;; (pprint-newline :linear stream) (format stream "" (gi node)))) ;; (collect-inline-runs (node) (let ((bag nil) (res nil)) (loop for child in (children node) do (cond ((or (member (gi child) +block-elements+) (member (gi child) '(:li :ul :script :style))) (when bag (push (list* :inline-run nil (reverse bag)) res)) (setf bag nil) (push child res)) (t (push child bag)))) (when bag (push (list* :inline-run nil (reverse bag)) res)) (reverse res)))) (cond ((null node)) ;### ;; #PCDATA ((eq :pcdata (gi node)) (pprint-pcdata node stream :space-handler (lambda (stream) (pprint-newline :fill stream)))) ((eq :?entity (gi node)) (format stream "&~A;" (cadr node))) ((member (gi node) '(:style)) (pr-start node stream) (loop for child in (children node) do (princ child stream)) (format stream "" (gi node))) ;; preformated stuff ((member (gi node) '(:textarea :pre :script :style)) (pr-start node stream) (loop for child in (children node) do (pprint-pcdata child stream)) (format stream "" (gi node))) ;; :BR is special ((member (gi node) '(:br)) (pr-start node stream) (pprint-newline :mandatory stream)) ((eq (gi node) :splice) (map nil #'(lambda (x) (pprint-html x stream)) (cdr node))) ((eq (gi node) :verbatim) (princ (cadr node) stream)) ;; block level elements ((member (gi node) +block-elements+) (let ((style (case (gi node) ((:p :div :body :head :html :ul :ol :dl :table :tbody :tr :li) :mandatory) (t :linear)))) (pprint-newline :mandatory stream) ;;(fresh-line stream) ;; OK. We collect runs of non block level elements. (let ((children* #+NIL (children node) #-NIL (collect-inline-runs node))) (pprint-logical-block (stream children* :prefix "" :suffix "") (pr-start node stream) (pprint-indent :block 2 stream) (unwind-protect (loop for i from 0 do (pprint-exit-if-list-exhausted) (let ((next (pprint-pop))) ;;(princ style stream) ;;(format stream "" style) (pprint-newline style stream) (pprint-html next stream))) (pprint-indent :block 0 stream) (when (end-tag-p node) '(princ "$2" stream) (pprint-newline style stream) (pr-end node stream))))))) ;; ((eq (gi node) :inline-run) (pprint-logical-block (stream (children node)) (unwind-protect (loop (pprint-exit-if-list-exhausted) (pprint-html (pprint-pop) stream)) (pprint-indent :block 0 stream)))) ;; otherwise (t (pprint-logical-block (stream (children node)) (pr-start node stream) (pprint-indent :block 2 stream) (unwind-protect (loop (pprint-exit-if-list-exhausted) (pprint-html (pprint-pop) stream)) (pprint-indent :block 0 stream) (pr-end node stream)))))))