(in-package :parse2) (defun foo (input) (let (bag) (parse (input) (top -> :string-literal) ;; Scanner ;; ;; When seeing #\" or #\' we switch the scanner "condition" to either ;; :DQ-LITERAL or :SQ-LITERAL. The effect is that only the scanner rules ;; mentioned with (:in ...) clauses apply. We collect the string literal ;; at hand into a string output stream BAG while doing so ;; (-> #\" => (setq bag (make-string-output-stream)) (clex2:begin :dq-literal)) (-> #\' => (setq bag (make-string-output-stream)) (clex2:begin :sq-literal)) (-> "\\s+") ;; Literals (:in :dq-literal ;; On #\" we're done. Arrange for a :STRING-LITERAL token being ;; reported. (:string-literal -> #\" => (clex2:begin :initial) (get-output-stream-string bag)) (-> #\\ #\" => (write-char #\" bag)) ;; Try to match some long run without interesting characters (-> (* (- t #\\ #\")) => (write-string (:buffer $$) bag :start (:start $$) :end (:end $$)))) (:in :sq-literal (:string-literal -> #\' => (clex2:begin :initial) (get-output-stream-string bag)) (-> #\\ #\' => (write-char #\' bag)) (-> (* (- t #\\ #\')) => (write-string (:buffer $$) bag :start (:start $$) :end (:end $$)))) ;; Common Escapes (:in (:dq-literal :sq-literal) (-> '"\\n" => (write-char #\Newline bag)) (-> '"\\f" => (write-char #\Page bag)) (-> (or (and '"\\u" (= it "[0-9A-Fa-f]{4}")) (and '"\\U" (= it "[0-9A-Fa-f]{8}"))) => (write-char (or (code-char (parse-integer (:buffer it) :start (:start it) :end (:end it) :radix 16)) (lex-error "Bad code point: ~A" $$)) bag)) ((and #\\ t) (lex-error "Bad escape: ~A" $$)) (-> :eof => (lex-error "Unterminated string literal"))))))