(declaim (inline decode-utf-8-char)) (defun decode-utf-8-char (first-octet &key peek-octet read-octet (bad-action nil) (bad-value (code-char #xFFFD))) (macrolet ((one-step () `(progn (let ((char (funcall peek-octet))) (when (null char) (go :bad)) (setq c (- char #x80)) (unless (<= 0 c #x3F) (go :bad)) (funcall read-octet) (setq u (logior (ash u 6) c)))))) ;; (prog ((c 0) (u 0) (m 0)) (declare (type (unsigned-byte 32) u m) (type (signed-byte 9) c)) (setq c first-octet) (cond ((<= c #x7F) (return (code-char c))) ((< c #xC0) (go :bad)) ((< c #xE0) (setq u (logand #x1F c) m #x80) (go :one)) ((< c #xF0) (setq u (logand #x0F c) m #x800) (go :two)) ((< c #xF8) (setq u (logand #x07 c) m #x10000) (go :three)) ((< c #xFC) (setq u (logand #x03 c) m #x200000) (go :four)) ((< c #xFE) (setq u (logand #x01 c) m #x4000000) (go :five)) (t (go :bad))) ;; :five (one-step) :four (one-step) :three (one-step) :two (one-step) :one (one-step) ;; (when (or (< u m) (<= #xd800 u #xdfff) (>= u char-code-limit)) (go :bad)) (return (code-char u)) ;; :bad (when bad-action (funcall bad-action)) (return bad-value))))