;;;; -- Interval Sums ------------------------------------------------------------------------- ;; To support large character sets, we need an implemention of a set of ;; characters. Traditional scanner generators would at some place just ;; enumerate the alphabet \Sigma, which is not feasible with large character ;; sets like Unicode. ;; We handle all transitions in the automaton as a set of of the codes of ;; characters, expressed by an ISUM. The representation of such a set is ;; best defined by the ISUM-MEMBER function, but here is an overview to get ;; the idea: ;; NIL is the empty set ;; (a b) is the set [a, b) ;; (a b c d) is the set [a, b) u [c, d) ;; (nil) is everything ;; (nil a b) is everything but [a, b) ;; An ISUM is a sequence of stricly monotonic increasing integers. The idea ;; is that when you sweep a pointer over the list at each element found the ;; membership in the set changes. Like (1 10 12 15). You start outside the ;; set, find 1 and say "above or equal 1 is in the set" and then find 10 and ;; say "above or equal 10 is not in the set" and so on. This way it is very ;; easy to implement Boolean operations on sets. (define-constant +isum-nothing+ nil "The empty set.") (define-constant +isum-everything+ '(nil) "The set that contains every integer.") (define-constant +isum-any-character+ (list 0 char-code-limit) "The set that contains every character by its code point.") (define-constant +isum-ascii-printable+ (list 32 128)) (defun isum-singleton (x) "Returns the ISUM, that contains only /x/." (list x (1+ x))) (defun isum-range (from below) "Returns the ISUM, that contains every code point that is in [from, below)" (list from below)) ;;; Membership (defun isum-member (x isum) "Determines, whether /x/ is member of the ISUM /isum/. Returns non-NIL if so and NIL otherwise." (declare (type fixnum x) (optimize (speed 3) (safety 0))) (loop for i of-type fixnum from 0 for y in isum when (and y (< x (the fixnum y))) return (oddp i) finally (return (oddp i)))) ;;; Boolean operation on ISUMs (defmacro isum-op (op A B) "Combine the sets A and B by the Boolean operator op, which should be a valid argument to the BOOLE function. An integer x is member of the resulting set iff (logbitp 0 (boole op (if (isum-member x A) 1 0) (if (isum-member x B) 1 0))) is non-NIL. That way e.g. boole-ior denotes the union." `((lambda (A B) (declare (optimize (speed 3) (safety 0))) (let* ((Ain 0) (Bin 0) (Cin 0) (s nil) (res (cons nil nil)) (resf res)) (declare (type fixnum Ain Bin Cin) (type cons res)) ;; Get rid of an initial NIL, which indicates a complemented set. (when (and A (null (car A))) (pop A) (setq Ain (- 1 Ain))) (when (and B (null (car B))) (pop B) (setq Bin (- 1 Bin))) ;; Now traverse A and B in paralell and generate the resulting sequence. (loop (when (/= Cin (the fixnum (ldb (byte 1 0) (the fixnum (boole ,op (the fixnum Ain) (the fixnum Bin)))))) (setf resf (setf (cdr resf) (cons s nil))) (setf Cin (the fixnum (- 1 Cin)))) (cond ((null A) (cond ((null B) (return)) (t (setq s (pop B)) (setq Bin (the fixnum (- 1 Bin)))))) ((null B) (setq s (pop A)) (setq Ain (the fixnum (- 1 Ain)))) ((< (the fixnum (car A)) (the fixnum (car B))) (setq s (pop A)) (setq Ain (the fixnum (- 1 Ain)))) ((< (the fixnum (car B)) (the fixnum (car A))) (setq s (pop B)) (setq Bin (the fixnum (- 1 Bin)))) (t (setq s (pop A)) (setq Ain (the fixnum (- 1 Ain))) (pop B) (setq Bin (the fixnum (- 1 Bin)))) )) (cdr res))) ,A ,B)) ;;; Boiler plate ;; Now we could define interesting set operations in terms of ISUM-OP. (defun isum-union (a b) (isum-op boole-ior a b)) (defun isum-intersection (a b) (isum-op boole-and a b)) (defun isum-difference (a b) (isum-op boole-andc2 a b)) (defun isum-complement (a) (isum-op boole-c1 a nil)) ;;; Misc (defun isum-witness (isum) "Returns a witness of the set /isum/, which must be non empty." (cond ((null isum) (error "The empty set has no witnesses.")) ((null (car isum)) (if (cdr isum) (1- (cadr isum)) 42)) ;you get it (t (car isum)))) (defun isum-empty-p (isum) (null isum))