;;;; -- HTML Pretty Printer -------------------------------------------------------------------
(defvar *pp-html-ascii-p*
nil
"Stick to US-ASCII, hence 7 bit.")
(defvar *pp-true-html-p*
nil
"Use SGML features of HTML, like
.")
(defvar *pp-numeric-entities-p*
nil
"Besides <, > and & use only numeric entity references.")
(defvar *pp-html-escape-quot-p*
nil
"Also escape the #\\\" characters; set when grinding attribute values.")
(defun pprint-pcdata (string stream &key (space-handler nil))
(loop for c across string do
(cond ((eql #\space c)
(princ c stream)
(when space-handler
(funcall space-handler stream)))
((eql #\& c) (princ "&" stream))
((eql #\< c) (princ "<" stream))
((eql #\> c) (princ ">" stream))
((and (eql #\" c) *pp-html-escape-quot-p*)
(princ """ stream))
;; Escape any control characters and NBSP and the
;; characters, which differ between 8859-1 and -15
;; Further note that Microsofts code page 1252 agrees to
;; 8859-1 in positions #xA0 to #xFF.
((or
;; strict ASCII, recommended when generating pages:
(and *pp-html-ascii-p* (>= (char-code c) 127))
(>= (char-code c) 256)
(<= 128 (char-code c) 160)
;; Skip incompatibilities between Latin-1 and Latin-15
(member (char-code c) '(#xA4 #xA6 #xA8 #xB4 #xB8 #xBC #xBD #xBE)))
;;
(let ((ent (and (not *pp-numeric-entities-p*) (find-character-entity c))))
(cond ((null ent)
(format stream "~D;" (char-code c)))
(t
(format stream "&~A;" ent)))))
(t
(princ c stream)))))
(defun find-character-entity (char)
"When there is a character entity, which expands to the character
/char/ returns its name (without &;) as a string."
(car (rassoc (string char) (sgml::dtd-entities closure-html:*html-dtd*) :test #'string=)))
(defparameter +block-elements+
'(:p :h1 :h2 :h3 :h4 :h5 :h6
:ul :ol :li :dl :dd :dt
:table :tbody :tr :td :center
:div :body :html :head :meta :link :table :tbody :tr :td :th :form :select :option :script :style))
(defun pprint-html (node stream)
(labels ((pr-start (node stream)
"Prints the start tag"
(let ((element-definition (gethash (gi node) (sgml::dtd-elements CLOSURE-HTML:*HTML-DTD*))))
(pprint-logical-block (stream (attrs node)
:prefix (format nil "<~A~A" (gi node) (if (attrs node) " " ""))
:suffix ">")
(loop for i from 0 do
(pprint-exit-if-list-exhausted)
(unless (zerop i)
(princ " " stream)
(pprint-newline :linear stream))
(let ((next (pprint-pop)))
(destructuring-bind (attr value) next
(let (it)
(cond ((and *pp-true-html-p*
(setf it (find-value-short-hand element-definition value)))
(format stream "~A" it))
(t
(format stream "~A=~S"
attr
(cond ((stringp value)
(with-output-to-string (bag)
(let ((*pp-html-escape-quot-p* t))
(pprint-pcdata (cadr next) bag))))
(t
value))))))))))))
;;
(find-value-short-hand (element-definition value)
(and element-definition
(and (find-if (lambda (attr-def)
(and (consp (cadr attr-def))
(member value (cadr attr-def)
:test (lambda (x y)
(and (or (stringp x) (symbolp x))
(or (stringp y) (symbolp y))
(string-equal x y))))))
(sgml::element-attlist element-definition))
(string-upcase (string value)))))
;;
(end-tag-p (node)
(or (not (sgml::element-oend? (gethash (gi node) (sgml::dtd-elements CLOSURE-HTML:*HTML-DTD*))))
(member (gi node) '(:html :body :tr))))
;;
(pr-end (node stream)
(when (end-tag-p node)
;; (pprint-newline :linear stream)
(format stream "~A>" (gi node))))
;;
(collect-inline-runs (node)
(let ((bag nil)
(res nil))
(loop for child in (children node) do
(cond ((or (member (gi child) +block-elements+)
(member (gi child) '(:li :ul :script :style)))
(when bag
(push (list* :inline-run nil (reverse bag)) res))
(setf bag nil)
(push child res))
(t
(push child bag))))
(when bag
(push (list* :inline-run nil (reverse bag)) res))
(reverse res))))
(cond
((null node)) ;###
;; #PCDATA
((eq :pcdata (gi node))
(pprint-pcdata node stream
:space-handler (lambda (stream)
(pprint-newline :fill stream))))
((eq :?entity (gi node))
(format stream "&~A;" (cadr node)))
((member (gi node) '(:style))
(pr-start node stream)
(loop for child in (children node) do
(princ child stream))
(format stream "~A>" (gi node)))
;; preformated stuff
((member (gi node) '(:textarea :pre :script :style))
(pr-start node stream)
(loop for child in (children node) do
(pprint-pcdata child stream))
(format stream "~A>" (gi node)))
;; :BR is special
((member (gi node) '(:br))
(pr-start node stream)
(pprint-newline :mandatory stream))
((eq (gi node) :splice)
(map nil #'(lambda (x) (pprint-html x stream)) (cdr node)))
((eq (gi node) :verbatim)
(princ (cadr node) stream))
;; block level elements
((member (gi node) +block-elements+)
(let ((style
(case (gi node)
((:p :div :body :head :html :ul :ol :dl :table :tbody :tr :li)
:mandatory)
(t
:linear))))
(pprint-newline :mandatory stream)
;;(fresh-line stream)
;; OK. We collect runs of non block level elements.
(let ((children* #+NIL (children node) #-NIL (collect-inline-runs node)))
(pprint-logical-block (stream children*
:prefix ""
:suffix "")
(pr-start node stream)
(pprint-indent :block 2 stream)
(unwind-protect
(loop for i from 0 do
(pprint-exit-if-list-exhausted)
(let ((next (pprint-pop)))
;;(princ style stream)
;;(format stream "" style)
(pprint-newline style stream)
(pprint-html next stream)))
(pprint-indent :block 0 stream)
(when (end-tag-p node)
'(princ "$2" stream)
(pprint-newline style stream)
(pr-end node stream)))))))
;;
((eq (gi node) :inline-run)
(pprint-logical-block (stream (children node))
(unwind-protect
(loop
(pprint-exit-if-list-exhausted)
(pprint-html (pprint-pop) stream))
(pprint-indent :block 0 stream))))
;; otherwise
(t
(pprint-logical-block (stream (children node))
(pr-start node stream)
(pprint-indent :block 2 stream)
(unwind-protect
(loop
(pprint-exit-if-list-exhausted)
(pprint-html (pprint-pop) stream))
(pprint-indent :block 0 stream)
(pr-end node stream)))))))
|