;; -*- Mode: Lisp; -*- ;; --------------------------------------------------------------------------- ;; Title: Small JSON Parser ;; Created: 2020-03-25 ;; Author: Gilbert Baumann ;; --------------------------------------------------------------------------- ;; (c) copyright 2020 by Gilbert Baumann (defpackage :de.bauhh.json (:use :common-lisp) (:export #:read-json #:parse-json #:read-json-file #:json-parse-error)) (in-package :de.bauhh.json) ;;;; -- TODO ---------------------------------------------------------------------------------- ;; - When seeing "123x" we might want to report a bad digit. ;; - UTF-16? Do we care? ;; - docu. ;;;; -- Implementation ------------------------------------------------------------------------ (define-condition json-parse-error (simple-error) ()) (defun read-json (stream &key (interactive-p (interactive-stream-p stream)) (camel-escape-char #\-) (max-nest 1000)) (check-type stream stream) (read-json-1 stream :interactive-p interactive-p :camel-escape-char camel-escape-char :max-nest max-nest)) (defun read-json-1 (stream &key (interactive-p (interactive-stream-p stream)) (camel-escape-char #\-) (max-nest 1000) (from-string nil) from-start from-end) (check-type camel-escape-char character) (check-type max-nest unsigned-byte) (locally (declare (optimize (speed 3) (safety 1))) (let ((last-nl-fp 0) (last-nl-iptr (or from-start 0)) (lineno 1) (buffer (make-array 20 :element-type 'character :adjustable t :fill-pointer 0)) (array-buffer (make-array 0 :adjustable t :fill-pointer 0)) (la nil) (ibuf-fp 0) ;the FILE-POSITION at which ; IBUF was last filled. (ibuf (or from-string (make-array (if interactive-p 1 512) :element-type 'character))) (iend (if from-string from-end 0)) (iptr (or from-start 0)) (nest 0)) (declare (special nest)) (declare (type (or null character) la) (type (simple-array character (*)) ibuf) (type fixnum iend iptr)) (labels ((read-element () (skip-ws) (prog1 (read-element-1) (skip-ws))) ;; (read-element-1 () (skip-ws) (case la ((nil) (parse-error 1 "Premature EOF")) (#\{ (read-object)) (#\[ (read-array)) (#\" (read-string)) (otherwise (cond ((or (char<= #\0 la #\9) (char= #\- la)) (read-number)) ((char<= #\a la #\z) (read-symbol)) (t (parse-error 0 "Unexpected character - ~S" la)))))) ;; (read-object () (let ((nest (1+ nest))) (declare (special nest)) (when (> nest max-nest) (parse-error 0 "Maximum nesting, ~D, exceeded." max-nest)) ;; (consume) ; { (skip-ws) (cond ((eql #\} la) (consume) '()) (t (prog1 (read-members) (expect #\} "#\\, or #\\} expected") (consume)))))) ;; (read-members () (let (head tail) (loop (multiple-value-bind (key val) (read-member) (let ((q (list key val))) (if tail (rplacd tail q) (setq head q)) (setq tail (cdr q))) (unless (eql #\, la) (return head)) (consume))))) ;; (read-member () (skip-ws) (expect #\" "#\\\" or #\\} expected") (values (intern (read-string t) :keyword) (progn (skip-ws) (expect #\:) (consume) (read-element)))) ;; (read-string (&optional upcasep) (setf (fill-pointer buffer) 0) (consume) (loop (cond ((null la) (parse-error -1 "Premature EOF in string.")) ((char= #\" la) (consume) (return (copy-seq buffer))) ((char= #\\ la) (consume) (case la ((nil) (parse-error -1 "Premature EOF in escape sequence.")) (#\b (consume) (vector-push-extend (code-char 8) buffer)) (#\f (consume) (vector-push-extend (code-char 12) buffer)) (#\n (consume) (vector-push-extend #\newline buffer)) (#\r (consume) (vector-push-extend (code-char 13) buffer)) (#\t (consume) (vector-push-extend (code-char 9) buffer)) (#\u (consume) (let ((r 0)) (loop repeat 4 do (let (v) (cond ((and la (setq v (digit-char-p la 16))) (consume) (setq r (+ (* 16 r) v))) ((null la) (parse-error -1 "Premature end of \\uHHHH escape.")) (t (parse-error 0 "Illegal digit in \\uHHHH escape."))))) (vector-push-extend (code-char r) buffer))) ((#\" #\\ #\/) (vector-push-extend la buffer) (consume)) (otherwise (parse-error 0 "Illegal character in escape sequence - ~S" la)) )) ((char< la #\space) (parse-error 0 "Unescaped control character - ~S" la)) (t (when (and upcasep (or (char<= #\A la #\Z) (char= la camel-escape-char))) (vector-push-extend camel-escape-char buffer)) (when (and upcasep (char<= #\a la #\z)) (setq la (code-char (- (char-code la) 32)))) (vector-push-extend la buffer) (consume))))) ;; (read-array () (let ((nest (1+ nest))) (declare (special nest)) (when (> nest max-nest) (parse-error 1 "Maximum nesting, ~D, exceeded." max-nest)) (consume) ;#\[ (skip-ws) (cond ((eql #\] la) (consume) #()) (t (setf (fill-pointer array-buffer) 0) (read-elements) (expect #\] "#\\, or #\\] expected") (consume) (copy-seq array-buffer))))) ;; (read-elements () (loop (vector-push-extend (read-element) array-buffer) (unless (eql #\, la) (return)) (consume))) ;; (read-number (&aux (res 0) (sign 1)) ;; optional sign (when (eql #\- la) (setq sign -1) (consume)) ;; integer part (let ((leading-with-zero-p (eql la #\0))) (multiple-value-bind (int n) (read-digits) (when (and leading-with-zero-p (> n 1)) (parse-error n "Leading zero in number.")) (setq res int))) ;; opt. fraction (when (eql #\. la) (consume) (multiple-value-bind (frac n) (read-digits) (incf res (* (expt 10d0 (- n)) frac))) ) ;; opt. exponent (when (or (eql #\e la) (eql #\E la)) (consume) (let ((exp (* (case la (#\+ (consume) 1) (#\- (consume) -1) (t 1)) (read-digits)))) (setq res (* res (expt 10d0 exp))))) ;; done (* sign res)) ;; (read-digits () (let ((s 0) (n 0)) (loop for v = (and la (digit-char-p la)) while v do (setq s (+ (* 10 s) v) n (1+ n)) (consume)) (when (zerop n) (parse-error 0 "Digit expected, got ~S." (subst :eof nil la))) (values s n))) ;; (read-symbol () (setf (fill-pointer buffer) 0) (loop while (and la (char<= #\a la #\z)) do (vector-push-extend (consume) buffer)) (cond ((string= "true" buffer) :true) ((string= "false" buffer) :false) ((string= "null" buffer) nil) (t (parse-error (length buffer) "Unknown JSON identifier - ~S" buffer)))) ;; ;; (expect (char &optional message) (unless (eql char la) (if message (parse-error 0 "~A, saw ~S" message (subst :eof nil la)) (parse-error 0 "~S expected, got ~S." char (subst :eof nil la))))) ;; (skip-ws () (loop for c = la while (ws-p c) do (when (eql #\newline c) (setq last-nl-fp ibuf-fp last-nl-iptr iptr lineno (+ lineno 1))) (consume))) ;; (ws-p (char) (or (eql #\space char) (eql #\newline char) (eql #\return char) (eql #\tab char))) ;; (consume () (prog1 la (when (= iptr iend) (setq ibuf-fp (and stream (file-position stream))) (setq iend (if stream (read-sequence ibuf stream) 0) iptr 0)) (cond ((= iptr iend) (setq la nil)) (t (setq la (char ibuf iptr)) (incf iptr))))) ;; (parse-error (offset format &rest args &aux (context-format "") context-args) (let (line col) (cond (from-string (let ((p (or (position #\newline from-string :start last-nl-iptr :end from-end) from-end))) (setq line (subseq from-string last-nl-iptr p) col (- iptr last-nl-iptr)))) ((and ibuf-fp (file-position stream last-nl-fp)) ;; Get to the last beginning of a line, if we can. (loop repeat last-nl-iptr do (read-char stream)) ;; (let ((save (file-position stream))) (setq line (read-line stream)) (file-position stream save)) ;; Figure out the column. When the last #|newline appears ;; in our current buffer, whe could just look at ;; IPTR. Otherwise we read characters until we hit where ;; our current IBUF was read. The sum of the number of read ;; characters and the IPTR then is the column. (setq col (cond ((= last-nl-fp ibuf-fp) (- iptr last-nl-iptr)) (t (+ (loop until (>= (file-position stream) ibuf-fp) do (read-char stream) count 1) iptr)))))) ;; (when line (setq context-format "~%~%~A~%~v<~>^" context-args (list line (- col offset (if la 1 0))))) ;; (error 'json-parse-error :format-control "JSON parse error: ~?~%On stream ~S, line ~D~?" :format-arguments (list format args stream lineno context-format context-args))))) ;; (declare (inline consume ws-p skip-ws expect read-string read-member)) (consume) (let ((res (read-element-1))) (unless interactive-p (skip-ws) (unless (null la) (parse-error 0 "Trailing garbage"))) (when (and interactive-p stream la) (unread-char la stream)) (if (and interactive-p from-string) (values res (if la (1- iptr) (length from-string))) res)))))) (defun parse-json (string &rest args &key start end interactive-p camel-escape-char max-nest) (declare (ignore camel-escape-char max-nest)) (remf args :start) (remf args :end) (unless (typep string '(simple-array character (*))) (let ((new-string (make-array (length string) :element-type 'character))) (replace new-string string) (setq string new-string))) (apply #'read-json-1 nil :from-string string :from-start (or start 0) :from-end (or end (length string)) :interactive-p interactive-p args)) (defun read-json-file (pathname &rest rest &key (external-format :utf-8) max-nest camel-escape-char) (declare (ignore max-nest camel-escape-char)) (remf rest external-format) (with-open-file (input pathname :external-format external-format) (apply #'read-json input rest))) ;;;; -- Test Suite ---------------------------------------------------------------------------- ;; https://github.com/nst/JSONTestSuite (defvar *json-parser* 'read-json) (defun run-test-1 (pathname &key fail-expected) (let ((short-filename (namestring (make-pathname :directory nil :defaults pathname)))) (with-open-file (input pathname :external-format :utf-8) (multiple-value-bind (res cond) (ignore-errors (funcall *json-parser* input)) (declare (ignore res)) (when (if fail-expected (not cond) cond) (format t "~&;; ~60A~A~&" short-filename (type-of cond)) (when cond (princ cond) (terpri)) ))))) (defun test-suite-y () (let ((*read-default-float-format* 'double-float)) (loop for p in (directory "other/JSONTestSuite/test_parsing/y_*.json") do (run-test-1 p)))) (defun test-suite-i () (let ((*read-default-float-format* 'double-float)) (loop for p in (directory "other/JSONTestSuite/test_parsing/i_*.json") do (run-test-1 p)))) (defun test-suite-n () (let ((*read-default-float-format* 'double-float)) (loop for p in (directory "other/JSONTestSuite/test_parsing/n_*.json") do (run-test-1 p :fail-expected t)))) (defun test-suite () (test-suite-y) (test-suite-n))