mirror of
https://github.com/mozilla/gecko-dev.git
synced 2025-04-03 04:52:54 +00:00
Added support for lookahead constraints
This commit is contained in:
parent
a7ae00c0dd
commit
78be7c3fee
@ -28,6 +28,7 @@
|
||||
(deftype terminalset () 'integer)
|
||||
|
||||
(defconstant *empty-terminalset* 0)
|
||||
(defconstant *full-terminalset* -1)
|
||||
|
||||
; Return true if terminalset is empty.
|
||||
(declaim (inline terminalset-empty?))
|
||||
@ -53,12 +54,35 @@
|
||||
(zerop (logand terminalset1 terminalset2)))
|
||||
|
||||
|
||||
; Merge two sets of lookaheads sorted by increasing terminal numbers, eliminating
|
||||
; duplicates. Return the combined set.
|
||||
; Return the complement of the terminalset.
|
||||
(declaim (inline terminalset-complement))
|
||||
(defun terminalset-complement (terminalset)
|
||||
(lognot terminalset))
|
||||
|
||||
|
||||
; Return the intersection of the two terminalsets.
|
||||
(declaim (inline terminalset-intersection))
|
||||
(defun terminalset-intersection (terminalset1 terminalset2)
|
||||
(logand terminalset1 terminalset2))
|
||||
|
||||
(define-modify-macro terminalset-intersection-f (terminalset2) terminalset-intersection)
|
||||
|
||||
|
||||
; Return the union of the two terminalsets.
|
||||
(declaim (inline terminalset-union))
|
||||
(defun terminalset-union (terminalset1 terminalset2)
|
||||
(logior terminalset1 terminalset2))
|
||||
|
||||
(define-modify-macro terminalset-union-f (terminalset2) terminalset-union)
|
||||
|
||||
|
||||
; Return the elements in terminalset1 that are not in terminalset2.
|
||||
(declaim (inline terminalset-difference))
|
||||
(defun terminalset-difference (terminalset1 terminalset2)
|
||||
(logandc2 terminalset1 terminalset2))
|
||||
|
||||
(define-modify-macro terminalset-difference-f (terminalset2) terminalset-difference)
|
||||
|
||||
|
||||
; Return a unique serial number for the given terminal.
|
||||
(declaim (inline terminal-number))
|
||||
@ -119,6 +143,41 @@
|
||||
(code nil)) ;The generated lisp source code that performs the action
|
||||
|
||||
|
||||
;;; ------------------------------------------------------------------------------------------------------
|
||||
;;; CONSTRAINTS
|
||||
|
||||
;;; A constraint modifies the rhs of a production. The constraint applies just before the pos-th
|
||||
;;; general grammar symbol on the rhs of the production.
|
||||
(defstruct (constraint (:constructor nil) (:copier nil) (:predicate constraint?))
|
||||
(pos nil :type integer :read-only t)) ;Position of this constraint; ranges between 0 and length(general-production-rhs), inclusive.
|
||||
|
||||
|
||||
;;; A lookahead-constraint imposes a restriction on the current lookahead terminal when matching
|
||||
;;; against the production. The match succeeds only if there is no following terminal or the following
|
||||
;;; terminal is not present in the lookahead-constraint's list of forbidden terminals.
|
||||
(defstruct (lookahead-constraint (:include constraint)
|
||||
(:constructor make-lookahead-constraint (pos forbidden-terminals source))
|
||||
(:predicate lookahead-constraint?))
|
||||
(forbidden-terminals nil :type list :read-only t) ;List of forbidden terminals
|
||||
(source nil :type list :read-only t) ;List of grammar symbols (terminals or nonterminals) that produced the list of forbidden terminals
|
||||
(terminalset nil :type (or null terminalset))) ;Set of allowed terminals (complement of forbidden-terminals); null until terminals are numbered
|
||||
|
||||
|
||||
; Emit markup for a lookahead-constraint.
|
||||
(defun depict-lookahead-constraint (markup-stream lookahead-constraint)
|
||||
(depict markup-stream :begin-negative-lookahead)
|
||||
(depict-list markup-stream
|
||||
#'depict-production-rhs-component
|
||||
(lookahead-constraint-source lookahead-constraint)
|
||||
:separator ", ")
|
||||
(depict markup-stream :end-negative-lookahead))
|
||||
|
||||
|
||||
(defmethod print-object ((lookahead-constraint lookahead-constraint) stream)
|
||||
(print-unreadable-object (lookahead-constraint stream)
|
||||
(format stream "-~{ ~:_~W~}" (lookahead-constraint-source lookahead-constraint))))
|
||||
|
||||
|
||||
;;; ------------------------------------------------------------------------------------------------------
|
||||
;;; GENERALIZED PRODUCTIONS
|
||||
|
||||
@ -126,6 +185,7 @@
|
||||
(defstruct (general-production (:constructor nil) (:copier nil) (:predicate general-production?))
|
||||
(lhs nil :type general-nonterminal :read-only t) ;The general-nonterminal on the left-hand side of this general-production
|
||||
(rhs nil :type list :read-only t) ;List of general grammar symbols to which that general-nonterminal expands
|
||||
(constraints nil :type list :read-only t) ;List of constraints applying to rhs, sorted by increasing pos values
|
||||
(name nil :read-only t)) ;This general-production's name that will be used to name the parse tree node
|
||||
|
||||
|
||||
@ -137,6 +197,34 @@
|
||||
(generic-production-productions general-production)))
|
||||
|
||||
|
||||
; Return the general-production's rhs with constraints interleaved with the rhs general grammar symbols.
|
||||
(defun general-production-rhs-components (general-production)
|
||||
(labels
|
||||
((merge-constraints (rhs constraints pos)
|
||||
(if constraints
|
||||
(let* ((constraint (first constraints))
|
||||
(constraint-pos (constraint-pos constraint)))
|
||||
(cond
|
||||
((= constraint-pos pos)
|
||||
(cons constraint (merge-constraints rhs (rest constraints) pos)))
|
||||
(rhs
|
||||
(cons (first rhs) (merge-constraints (rest rhs) constraints (1+ pos))))
|
||||
(t (error "Bad constraint list"))))
|
||||
rhs)))
|
||||
|
||||
(merge-constraints (general-production-rhs general-production)
|
||||
(general-production-constraints general-production)
|
||||
0)))
|
||||
|
||||
|
||||
; Return the general-production's lookahead-constraint's terminalset at the given position.
|
||||
(defun general-production-lookahead-constraint (general-production pos)
|
||||
(let ((constraint (find pos (general-production-constraints general-production) :key #'constraint-pos :test #'=)))
|
||||
(if constraint
|
||||
(lookahead-constraint-terminalset constraint)
|
||||
*full-terminalset*)))
|
||||
|
||||
|
||||
; Emit a markup paragraph for the left-hand-side of a general production.
|
||||
(defun depict-general-production-lhs (markup-stream lhs-general-nonterminal)
|
||||
(depict-paragraph (markup-stream ':grammar-lhs)
|
||||
@ -144,6 +232,13 @@
|
||||
(depict markup-stream " " ':derives-10)))
|
||||
|
||||
|
||||
; Emit markup for a production right-hand-side component.
|
||||
(defun depict-production-rhs-component (markup-stream production-rhs-component &optional subscript)
|
||||
(if (lookahead-constraint? production-rhs-component)
|
||||
(depict-lookahead-constraint markup-stream production-rhs-component)
|
||||
(depict-general-grammar-symbol markup-stream production-rhs-component :reference subscript)))
|
||||
|
||||
|
||||
; Emit a markup paragraph for the right-hand-side of a general production.
|
||||
; first is true if this is the first production in a rule.
|
||||
; last is true if this is the last production in a rule.
|
||||
@ -152,45 +247,48 @@
|
||||
(if first
|
||||
(depict markup-stream ':tab3)
|
||||
(depict markup-stream "|" ':tab2))
|
||||
(let ((rhs (general-production-rhs general-production)))
|
||||
(let ((rhs-components (general-production-rhs-components general-production)))
|
||||
(depict-list markup-stream
|
||||
#'(lambda (markup-stream general-grammar-symbol)
|
||||
(depict-general-grammar-symbol markup-stream general-grammar-symbol :reference))
|
||||
rhs
|
||||
#'depict-production-rhs-component
|
||||
rhs-components
|
||||
:separator " "
|
||||
:empty '(:left-angle-quote "empty" :right-angle-quote)))))
|
||||
|
||||
|
||||
; Emit the general production, including both its left and right-hand sides.
|
||||
; Include serial number subscripts on all rhs grammar symbols that both
|
||||
; appear more than once in the rhs or appear in the lhs; and
|
||||
; appear more than once in the rhs or appear in the lhs, and
|
||||
; have symbols that are present in the symbols-with-subscripts list.
|
||||
; link is the lhs's link type.
|
||||
(defun depict-general-production (markup-stream general-production link &optional symbols-with-subscripts)
|
||||
(let ((lhs (general-production-lhs general-production))
|
||||
(rhs (general-production-rhs general-production)))
|
||||
(rhs-components (general-production-rhs-components general-production)))
|
||||
(depict-general-nonterminal markup-stream lhs link)
|
||||
(depict markup-stream " " ':derives-10)
|
||||
(if rhs
|
||||
(if rhs-components
|
||||
(let ((counts-hash (make-hash-table :test *grammar-symbol-=*)))
|
||||
(when symbols-with-subscripts
|
||||
(dolist (symbol symbols-with-subscripts)
|
||||
(setf (gethash symbol counts-hash) 0))
|
||||
(dolist (general-grammar-symbol (cons lhs rhs))
|
||||
(let ((symbol (general-grammar-symbol-symbol general-grammar-symbol)))
|
||||
(when (gethash symbol counts-hash)
|
||||
(incf (gethash symbol counts-hash)))))
|
||||
(dolist (production-rhs-component (cons lhs (general-production-rhs general-production)))
|
||||
(when (general-grammar-symbol? production-rhs-component)
|
||||
(let ((symbol (general-grammar-symbol-symbol production-rhs-component)))
|
||||
(when (gethash symbol counts-hash)
|
||||
(incf (gethash symbol counts-hash))))))
|
||||
(maphash #'(lambda (symbol count)
|
||||
(assert-true (> count 0))
|
||||
(if (> count 1)
|
||||
(setf (gethash symbol counts-hash) 0)
|
||||
(remhash symbol counts-hash)))
|
||||
counts-hash))
|
||||
(dolist (general-grammar-symbol rhs)
|
||||
(let* ((symbol (general-grammar-symbol-symbol general-grammar-symbol))
|
||||
(subscript (and (gethash symbol counts-hash) (incf (gethash symbol counts-hash)))))
|
||||
(dolist (production-rhs-component rhs-components)
|
||||
(let ((subscript nil))
|
||||
(when (general-grammar-symbol? production-rhs-component)
|
||||
(let ((symbol (general-grammar-symbol-symbol production-rhs-component)))
|
||||
(when (gethash symbol counts-hash)
|
||||
(setq subscript (incf (gethash symbol counts-hash))))))
|
||||
(depict-space markup-stream)
|
||||
(depict-general-grammar-symbol markup-stream general-grammar-symbol :reference subscript))))
|
||||
(depict-production-rhs-component markup-stream production-rhs-component subscript))))
|
||||
(depict markup-stream " " ':left-angle-quote "empty" :right-angle-quote))))
|
||||
|
||||
|
||||
@ -198,7 +296,7 @@
|
||||
;;; PRODUCTIONS
|
||||
|
||||
;;; A production describes the expansion of a nonterminal (the lhs) into
|
||||
;;; a string of zero or more grammar symbols (the rhs).
|
||||
;;; a string of zero or more grammar symbols (the rhs); the rhs may also have constraints attached.
|
||||
;;; Each production has a unique number. Earlier productions have smaller numbers.
|
||||
;;; There is exactly one production structure for a given production, so eq can be
|
||||
;;; used to test for production equality.
|
||||
@ -206,7 +304,7 @@
|
||||
;;; The evaluator is a lisp form that evaluates to a function f that takes one argument --
|
||||
;;; the old state of the parser's value stack -- and returns the new state of that stack.
|
||||
(defstruct (production (:include general-production (lhs nil :type nonterminal :read-only t))
|
||||
(:constructor make-production (lhs rhs name rhs-length number))
|
||||
(:constructor make-production (lhs rhs constraints name rhs-length number))
|
||||
(:copier nil) (:predicate production?))
|
||||
(rhs-length nil :type integer :read-only t) ;Number of grammar symbols in the rhs
|
||||
(number nil :type integer :read-only t) ;This production's serial number
|
||||
@ -234,7 +332,7 @@
|
||||
|
||||
(defun print-production (production &optional (stream t))
|
||||
(format stream "~<~W -> ~:I~_~:[<epsilon> ~;~:*~{~W ~:_~}~]~:> ~:_[~W]"
|
||||
(list (production-lhs production) (production-rhs production))
|
||||
(list (production-lhs production) (general-production-rhs-components production))
|
||||
(production-name production)))
|
||||
|
||||
|
||||
@ -251,7 +349,7 @@
|
||||
;;; A generic production is not a production and does not have a number or actions.
|
||||
|
||||
(defstruct (generic-production (:include general-production (lhs nil :type generic-nonterminal :read-only t))
|
||||
(:constructor make-generic-production (lhs rhs name productions))
|
||||
(:constructor make-generic-production (lhs rhs constraints name productions))
|
||||
(:copier nil)
|
||||
(:predicate generic-production?))
|
||||
(productions nil :type list :read-only t)) ;List of instantiations of this generic production
|
||||
@ -265,14 +363,15 @@
|
||||
(if (generic-nonterminal? new-lhs)
|
||||
(make-generic-production
|
||||
new-lhs
|
||||
(mapcar #'(lambda (rhs-general-grammar-symbol)
|
||||
(general-grammar-symbol-substitute attribute argument rhs-general-grammar-symbol))
|
||||
(mapcar #'(lambda (grammar-symbol)
|
||||
(general-grammar-symbol-substitute attribute argument grammar-symbol))
|
||||
(generic-production-rhs generic-production))
|
||||
(general-production-constraints generic-production)
|
||||
(generic-production-name generic-production)
|
||||
(remove-if #'(lambda (production)
|
||||
(not (general-nonterminal-is-instance? grammar-parametrization new-lhs (production-lhs production))))
|
||||
productions))
|
||||
|
||||
|
||||
(assert-non-null (find new-lhs productions :key #'production-lhs :test #'eq)))))
|
||||
|
||||
|
||||
@ -317,14 +416,19 @@
|
||||
;;; A rule is the set of all productions with the same lhs nonterminal.
|
||||
;;; There is exactly one rule structure for a given nonterminal lhs, so eq can be
|
||||
;;; used to test for rule equality.
|
||||
;;;
|
||||
;;; If the rule cannot somehow produce an empty expansion, then passthrough-terminals is empty.
|
||||
;;; Otherwise, passthrough-terminals summarizes the constraints imposed on the next lookahead terminal
|
||||
;;; imposed by all empty expansions of this rule. If these empty expansions do not impose any
|
||||
;;; lookahead-constraints, then passthrough-terminals will be the full set.
|
||||
(defstruct (rule (:include general-rule (productions nil :type list :read-only t))
|
||||
(:constructor make-rule (productions))
|
||||
(:copier nil)
|
||||
(:predicate rule?))
|
||||
(:copier nil)
|
||||
(:predicate rule?))
|
||||
;productions ;The list of all productions for this rule's nonterminal lhs
|
||||
(number nil :type (or null integer)) ;This nonterminal's serial number
|
||||
(derives-epsilon nil :type bool) ;True if some direct or indirect expansion of this nonterminal can return epsilon
|
||||
(initial-terminals *empty-terminalset* :type terminalset)) ;Set of all terminals that can begin some expansion of this nonterminal
|
||||
(passthrough-terminals *empty-terminalset* :type terminalset) ;See above
|
||||
(initial-terminals *empty-terminalset* :type terminalset)) ;Set of all terminals that can begin some expansion of this nonterminal
|
||||
|
||||
|
||||
; Return a list of nonterminals in this rule's rhs.
|
||||
@ -394,6 +498,12 @@
|
||||
(production-lhs (item-production item)))
|
||||
|
||||
|
||||
; Return the constraints of the item's production.
|
||||
(declaim (inline item-constraints))
|
||||
(defun item-constraints (item)
|
||||
(production-constraints (item-production item)))
|
||||
|
||||
|
||||
; Make an item with the given production and dot location (which must be an integer
|
||||
; between 0 and length(rhs(production)), inclusive. Reuse an existing item in the
|
||||
; grammar if possible.
|
||||
@ -412,18 +522,27 @@
|
||||
(allocate-item production dot unseen number next)))))))
|
||||
|
||||
|
||||
(defun print-item (item &optional (stream t))
|
||||
(defun print-item (item &optional (stream t) after-dot)
|
||||
(let ((production (item-production item)))
|
||||
(format stream "~W -> ~:_" (production-lhs production))
|
||||
(pprint-logical-block (stream (production-rhs production))
|
||||
(do ((n (item-dot item) (1- n))
|
||||
(do ((pos 0 (1+ pos))
|
||||
(constraints (production-constraints production))
|
||||
(first t))
|
||||
()
|
||||
(when (zerop n)
|
||||
(when (= pos (item-dot item))
|
||||
(if first
|
||||
(setq first nil)
|
||||
(format stream " ~:_"))
|
||||
(write-char #\. stream))
|
||||
(write-char #\. stream)
|
||||
(when after-dot
|
||||
(format stream " ~:_~W" after-dot)))
|
||||
(do ()
|
||||
((or (endp constraints) (/= (constraint-pos (first constraints)) pos)))
|
||||
(if first
|
||||
(setq first nil)
|
||||
(format stream " ~:_"))
|
||||
(write (pop constraints) :stream stream))
|
||||
(pprint-exit-if-list-exhausted)
|
||||
(if first
|
||||
(setq first nil)
|
||||
@ -441,27 +560,46 @@
|
||||
|
||||
;;; A laitem is an item with associated lookahead information.
|
||||
;;; Unlike items, laitem structures are not shared among the states.
|
||||
(defstruct (laitem (:constructor allocate-laitem (grammar item lookaheads))
|
||||
(defstruct (laitem (:constructor allocate-laitem (grammar item forbidden lookaheads))
|
||||
(:copier nil)
|
||||
(:predicate laitem?))
|
||||
(grammar nil :type grammar :read-only t) ;The grammar to which this laitem belongs
|
||||
(item nil :type item :read-only t) ;The item to which this laitem corresponds
|
||||
(forbidden nil :type terminalset :read-only t) ;A set of terminals that must not occur after the dot because of lookahead-constraints
|
||||
(lookaheads nil :type terminalset) ;Set of lookahead terminals
|
||||
(propagates nil :type list)) ;List of laitems to which lookaheads propagate from this laitem (see note below)
|
||||
;When parsing a LALR(1) grammar, propagates is the list of all laitems (in this and other states)
|
||||
;to which lookaheads propagate from this laitem.
|
||||
(propagates nil :type list)) ;List of (laitem . mask) to which lookaheads propagate from this laitem (see note below)
|
||||
;When parsing a LALR(1) grammar, propagates contains all laitems (in this and other states)
|
||||
;to which lookaheads propagate from this laitem. The mask in each entry of propagates
|
||||
;is a terminalset that indicates which lookaheads can propagate; this is usually *full-terminalset*
|
||||
;but can be smaller in the presence of constraints.
|
||||
;When parsing a LR(1) grammar, propagates is the list of all laitems to which lookaheads propagate from this laitem
|
||||
;without following a shift transition. Such laitems must necessarily be in the same state. In the LR(1) case each
|
||||
;laitem listed in the propagates list must come after this laitem in this state's laitems list.
|
||||
|
||||
|
||||
; Add or modify a propagation entry in src-laitem to point to dst-laitem with
|
||||
; the given mask.
|
||||
(defun laitem-add-propagation (src-laitem dst-laitem mask)
|
||||
(let ((propagation-entry (assoc dst-laitem (laitem-propagates src-laitem))))
|
||||
(if propagation-entry
|
||||
(terminalset-union-f (cdr propagation-entry) mask)
|
||||
(push (cons dst-laitem mask) (laitem-propagates src-laitem)))))
|
||||
|
||||
|
||||
(defvar *lookahead-print-column* 70)
|
||||
|
||||
(defun print-laitem (laitem &optional (stream t))
|
||||
(print-item (laitem-item laitem) stream)
|
||||
(format stream " ~vT~_" *lookahead-print-column*)
|
||||
(pprint-logical-block (stream nil :prefix "{" :suffix "}")
|
||||
(print-terminalset (laitem-grammar laitem) (laitem-lookaheads laitem) stream)))
|
||||
(let* ((grammar (laitem-grammar laitem))
|
||||
(item (laitem-item laitem))
|
||||
(forbidden (laitem-forbidden laitem))
|
||||
(forbidden-as-constraint
|
||||
(and (not (terminalset-empty? forbidden))
|
||||
(let ((forbidden-terminals (terminalset-list grammar forbidden)))
|
||||
(make-lookahead-constraint (item-dot item) forbidden-terminals forbidden-terminals)))))
|
||||
(print-item item stream forbidden-as-constraint)
|
||||
(format stream " ~vT~_" *lookahead-print-column*)
|
||||
(pprint-logical-block (stream nil :prefix "{" :suffix "}")
|
||||
(print-terminalset grammar (laitem-lookaheads laitem) stream))))
|
||||
|
||||
(defmethod print-object ((laitem laitem) stream)
|
||||
(print-unreadable-object (laitem stream)
|
||||
@ -695,14 +833,14 @@
|
||||
((substitute-argument-with (attribute subtree)
|
||||
(ecase (first subtree)
|
||||
(:rule
|
||||
(let* ((general-rule (second subtree))
|
||||
(lhs (general-rule-lhs general-rule))
|
||||
(new-lhs (general-grammar-symbol-substitute attribute argument lhs)))
|
||||
(assert-true (generic-rule? general-rule))
|
||||
(list ':rule
|
||||
(if (generic-nonterminal? new-lhs)
|
||||
(generic-rule-substitute grammar attribute argument general-rule)
|
||||
(grammar-rule grammar new-lhs)))))
|
||||
(let* ((general-rule (second subtree))
|
||||
(lhs (general-rule-lhs general-rule))
|
||||
(new-lhs (general-grammar-symbol-substitute attribute argument lhs)))
|
||||
(assert-true (generic-rule? general-rule))
|
||||
(list ':rule
|
||||
(if (generic-nonterminal? new-lhs)
|
||||
(generic-rule-substitute grammar attribute argument general-rule)
|
||||
(grammar-rule grammar new-lhs)))))
|
||||
(:argument
|
||||
(list ':argument
|
||||
(second subtree)
|
||||
@ -841,7 +979,7 @@
|
||||
(terminal-actions nil :type hash-table :read-only t) ;Hash table of terminal -> list of (action-symbol . action-function-or-nil)
|
||||
(rules nil :type hash-table :read-only t) ;Hash table of nonterminal -> rule
|
||||
(parameter-trees nil :type hash-table :read-only t) ;Hash table of nonterminal-symbol -> parameter-tree
|
||||
(max-production-length nil :type integer :read-only t) ;Maximum number of grammar symbols on the rhs of a production
|
||||
(max-production-length nil :type integer :read-only t) ;Maximum number of grammar symbols in the rhs of a production
|
||||
(general-productions nil :type hash-table :read-only t);Hash table of production-name -> general-production
|
||||
(n-productions nil :type integer :read-only t) ;Number of productions in the grammar
|
||||
;The following fields are used for the parser.
|
||||
@ -904,41 +1042,57 @@
|
||||
(assert-non-null (nth n (grammar-states grammar))))
|
||||
|
||||
|
||||
; Return true if symbol ==>* epsilon.
|
||||
(defun symbol-derives-epsilon (grammar symbol)
|
||||
(assert-type symbol grammar-symbol)
|
||||
(and (nonterminal? symbol)
|
||||
(rule-derives-epsilon (grammar-rule grammar symbol))))
|
||||
|
||||
|
||||
; Return the terminalset of all terminals a that satisfy
|
||||
; symbol ==>* a rest,
|
||||
; Return two values:
|
||||
; a generate terminalset G;
|
||||
; a passthrough terminalset P.
|
||||
;
|
||||
; G is the terminalset of all terminals x that satisfy
|
||||
; symbol ==>* x rest,
|
||||
; where rest is an arbitrary string of grammar symbols.
|
||||
; P is the terminalset of all terminals x that satisfy
|
||||
; symbol x ==> x
|
||||
(defun symbol-initial-terminals (grammar symbol)
|
||||
(assert-type symbol grammar-symbol)
|
||||
(if (nonterminal? symbol)
|
||||
(rule-initial-terminals (grammar-rule grammar symbol))
|
||||
(make-terminalset grammar symbol)))
|
||||
(let ((rule (grammar-rule grammar symbol)))
|
||||
(values (rule-initial-terminals rule) (rule-passthrough-terminals rule)))
|
||||
(values (make-terminalset grammar symbol) *empty-terminalset*)))
|
||||
|
||||
|
||||
; Given symbol-string, an arbitrary string of grammar symbols,
|
||||
; return two values: a terminalset S and a boolean B.
|
||||
; S is the terminalset of all terminals a that satisfy
|
||||
; symbol-string ==>* a rest,
|
||||
; Given an arbitrary string of grammar symbols, a list of constraints, and an initial position,
|
||||
; return two values:
|
||||
; a generate terminalset G;
|
||||
; a passthrough terminalset P.
|
||||
;
|
||||
; G is the terminalset of all terminals x that satisfy
|
||||
; symbol-string ==>* x rest,
|
||||
; where rest is an arbitrary string of grammar symbols.
|
||||
; B is true if symbol-string ==>* epsilon.
|
||||
(defun string-initial-terminals (grammar symbol-string)
|
||||
(let ((initial-terminals *empty-terminalset*)
|
||||
(derives-epsilon nil))
|
||||
(dolist (element symbol-string (setq derives-epsilon t))
|
||||
(setq initial-terminals (terminalset-union initial-terminals (symbol-initial-terminals grammar element)))
|
||||
(unless (symbol-derives-epsilon grammar element)
|
||||
(return)))
|
||||
(values initial-terminals derives-epsilon)))
|
||||
; P is the terminalset of all terminals x that satisfy
|
||||
; symbol-string x ==> x
|
||||
;
|
||||
; The constraints' positions are relative to the given initial position, which specifies the position
|
||||
; of the first grammar-symbol in the symbol-string. The constraints must be listed in order of
|
||||
; increasing positions.
|
||||
(defun string-initial-terminals (grammar symbol-string constraints pos)
|
||||
(do ((symbol-string symbol-string (cdr symbol-string))
|
||||
(initial-terminals *empty-terminalset*)
|
||||
(passthrough-terminals *full-terminalset*))
|
||||
((terminalset-empty? passthrough-terminals) (values initial-terminals *empty-terminalset*))
|
||||
(let ((constraint (find pos constraints :key #'constraint-pos :test #'=)))
|
||||
(when constraint
|
||||
(terminalset-intersection-f passthrough-terminals (lookahead-constraint-terminalset constraint))))
|
||||
(if symbol-string
|
||||
(multiple-value-bind (generate passthrough) (symbol-initial-terminals grammar (first symbol-string))
|
||||
(terminalset-union-f initial-terminals (terminalset-intersection passthrough-terminals generate))
|
||||
(terminalset-intersection-f passthrough-terminals passthrough))
|
||||
(return (values initial-terminals passthrough-terminals)))))
|
||||
|
||||
|
||||
; Intern attributed or generic nonterminals in the production's lhs and rhs. Return the
|
||||
; resulting production source.
|
||||
; Intern attributed or generic nonterminals in the production's lhs and rhs. Also replace
|
||||
; (:- <terminal> ... <terminal>) or (:-- (<grammar-symbol> ... <grammar-symbol>) <terminal> ... <terminal>)
|
||||
; sublists in the rhs with lookahead-constraints and put these, in order, after the third element of
|
||||
; the returned list.
|
||||
; Return the resulting production source.
|
||||
(defun intern-production-source (grammar-parametrization production-source)
|
||||
(assert-type production-source (tuple (or user-nonterminal cons) (list (or user-grammar-symbol cons)) identifier))
|
||||
(let ((production-lhs-source (first production-source))
|
||||
@ -946,11 +1100,24 @@
|
||||
(production-name (third production-source)))
|
||||
(if (or (consp production-lhs-source) (some #'consp production-rhs-source))
|
||||
(multiple-value-bind (lhs-nonterminal lhs-arguments) (grammar-parametrization-intern grammar-parametrization production-lhs-source)
|
||||
(list lhs-nonterminal
|
||||
(mapcar #'(lambda (grammar-symbol-source)
|
||||
(grammar-parametrization-intern grammar-parametrization grammar-symbol-source lhs-arguments))
|
||||
production-rhs-source)
|
||||
production-name))
|
||||
(let ((rhs nil)
|
||||
(constraints nil)
|
||||
(pos 0))
|
||||
(dolist (component-source production-rhs-source)
|
||||
(cond
|
||||
((and (consp component-source) (eq (first component-source) ':-))
|
||||
(let ((lookaheads (rest component-source)))
|
||||
(push
|
||||
(make-lookahead-constraint pos (assert-non-null lookaheads) lookaheads)
|
||||
constraints)))
|
||||
((and (consp component-source) (eq (first component-source) ':--))
|
||||
(let ((lookaheads (rest component-source)))
|
||||
(push
|
||||
(make-lookahead-constraint pos (assert-non-null (rest lookaheads)) (assert-non-null (first lookaheads)))
|
||||
constraints)))
|
||||
(t (push (grammar-parametrization-intern grammar-parametrization component-source lhs-arguments) rhs)))
|
||||
(incf pos))
|
||||
(list* lhs-nonterminal (nreverse rhs) production-name (nreverse constraints))))
|
||||
production-source)))
|
||||
|
||||
|
||||
@ -966,6 +1133,13 @@
|
||||
; nonterminal can have attributes, thereby designating a specialization instead of a fully
|
||||
; generic production.
|
||||
;
|
||||
; The rhs can also contain lookahead constraints of the form
|
||||
; (:- <terminal> ... <terminal>)
|
||||
; which indicate that the following terminal must not be one of the listed terminals. The form
|
||||
; (:-- (<grammar-symbol> ... <grammar-symbol>) <terminal> ... <terminal>)
|
||||
; does the same thing except that it prints the grammar-symbols instead of the terminals when
|
||||
; the production is printed.
|
||||
;
|
||||
; excluded-nonterminals-source is a list of nonterminals not used in the grammar. Productions,
|
||||
; including productions expanded from generic productions, that have one of these nonterminals
|
||||
; on the lhs are ignored.
|
||||
@ -979,7 +1153,8 @@
|
||||
(general-productions (make-hash-table :test #'equal))
|
||||
(production-number 0)
|
||||
(max-production-length 1)
|
||||
(excluded-nonterminals-hash (make-hash-table :test *grammar-symbol-=*)))
|
||||
(excluded-nonterminals-hash (make-hash-table :test *grammar-symbol-=*))
|
||||
(lookahead-constraints nil))
|
||||
|
||||
;Set up excluded-nonterminals-hash. The values of the hash table are either :seen or :unseen
|
||||
;depending on whether a production with the particular nonterminal has been seen yet.
|
||||
@ -989,12 +1164,12 @@
|
||||
|
||||
;Create the starting production: *start-nonterminal* ==> start-symbol
|
||||
(setf (gethash *start-nonterminal* rules)
|
||||
(list (make-production *start-nonterminal* (list start-symbol) nil 1 0)))
|
||||
(list (make-production *start-nonterminal* (list start-symbol) nil nil 1 0)))
|
||||
|
||||
;Create the rest of the productions.
|
||||
(flet
|
||||
((create-production (lhs rhs name)
|
||||
(let ((production (make-production lhs rhs name (length rhs) (incf production-number))))
|
||||
((create-production (lhs rhs constraints name)
|
||||
(let ((production (make-production lhs rhs constraints name (length rhs) (incf production-number))))
|
||||
(push production (gethash lhs rules))
|
||||
(dolist (rhs-terminal (production-terminals production))
|
||||
(setf (gethash rhs-terminal terminals-hash) t))
|
||||
@ -1008,6 +1183,7 @@
|
||||
(let* ((production-lhs (first production-source))
|
||||
(production-rhs (second production-source))
|
||||
(production-name (third production-source))
|
||||
(production-constraints (cdddr production-source))
|
||||
(lhs-arguments (general-grammar-symbol-arguments production-lhs)))
|
||||
(setq max-production-length (max max-production-length (length production-rhs)))
|
||||
(when (gethash production-name general-productions)
|
||||
@ -1024,16 +1200,17 @@
|
||||
(mapcar #'(lambda (general-grammar-symbol)
|
||||
(instantiate-general-grammar-symbol bound-argument-alist general-grammar-symbol))
|
||||
production-rhs)
|
||||
production-constraints
|
||||
production-name)
|
||||
productions))))
|
||||
lhs-arguments)
|
||||
(when productions
|
||||
(setf (gethash production-name general-productions)
|
||||
(make-generic-production production-lhs production-rhs production-name (nreverse productions)))))
|
||||
(make-generic-production production-lhs production-rhs production-constraints production-name (nreverse productions)))))
|
||||
|
||||
(unless (nonterminal-excluded production-lhs)
|
||||
(setf (gethash production-name general-productions)
|
||||
(create-production production-lhs production-rhs production-name)))))))
|
||||
(create-production production-lhs production-rhs production-constraints production-name)))))))
|
||||
|
||||
|
||||
;Change all values of the rules hash table to contain rule structures
|
||||
@ -1042,9 +1219,16 @@
|
||||
(maphash
|
||||
#'(lambda (rule-lhs rule-productions)
|
||||
(dolist (rule-production rule-productions)
|
||||
(dolist (rhs-nonterminal (production-nonterminals rule-production))
|
||||
(unless (gethash rhs-nonterminal rules)
|
||||
(error "Nonterminal ~S used but not defined" rhs-nonterminal))))
|
||||
(dolist (grammar-symbol (production-rhs rule-production))
|
||||
(when (nonterminal? grammar-symbol)
|
||||
(unless (gethash grammar-symbol rules)
|
||||
(error "Nonterminal ~S used but not defined" grammar-symbol))))
|
||||
(dolist (constraint (production-constraints rule-production))
|
||||
(push constraint lookahead-constraints)
|
||||
(dolist (lookahead-terminal (lookahead-constraint-forbidden-terminals constraint))
|
||||
(unless (gethash lookahead-terminal terminals-hash)
|
||||
(warn "Lookahead terminal ~S not used in main grammar" lookahead-terminal)
|
||||
(setf (gethash lookahead-terminal terminals-hash) t)))))
|
||||
(setf (gethash rule-lhs rules)
|
||||
(make-rule (nreverse rule-productions))))
|
||||
rules)
|
||||
@ -1086,27 +1270,33 @@
|
||||
:n-productions production-number
|
||||
:items-hash (make-hash-table :test #'equal))))
|
||||
|
||||
;Compute the values of derives-epsilon and initial-terminals in each rule.
|
||||
;Compute the terminalsets in the lookahead-constraints.
|
||||
(dolist (lookahead-constraint lookahead-constraints)
|
||||
(let ((terminalset *full-terminalset*))
|
||||
(dolist (forbidden-terminal (lookahead-constraint-forbidden-terminals lookahead-constraint))
|
||||
(terminalset-difference-f terminalset (make-terminalset grammar forbidden-terminal)))
|
||||
(setf (lookahead-constraint-terminalset lookahead-constraint) terminalset)))
|
||||
|
||||
;Compute the values of passthrough-terminals and initial-terminals in each rule.
|
||||
(do ((changed t))
|
||||
((not changed))
|
||||
(setq changed nil)
|
||||
(dolist (nonterminal (grammar-nonterminals-list grammar))
|
||||
(let ((rule (grammar-rule grammar nonterminal))
|
||||
(new-initial-terminals *empty-terminalset*)
|
||||
(new-derives-epsilon nil))
|
||||
(new-passthrough-terminals *empty-terminalset*))
|
||||
(dolist (production (rule-productions rule))
|
||||
(multiple-value-bind (production-initial-terminals production-derives-epsilon)
|
||||
(string-initial-terminals grammar (production-rhs production))
|
||||
(setq new-initial-terminals (terminalset-union new-initial-terminals production-initial-terminals))
|
||||
(when production-derives-epsilon
|
||||
(setq new-derives-epsilon t))))
|
||||
(assert-true (or new-derives-epsilon (not (rule-derives-epsilon rule))))
|
||||
(assert-true (terminalset-<= (rule-initial-terminals rule) new-initial-terminals))
|
||||
(multiple-value-bind (production-initial-terminals production-passthrough-terminals)
|
||||
(string-initial-terminals grammar (production-rhs production) (production-constraints production) 0)
|
||||
(terminalset-union-f new-initial-terminals production-initial-terminals)
|
||||
(terminalset-union-f new-passthrough-terminals production-passthrough-terminals)))
|
||||
(unless (terminalset-= new-initial-terminals (rule-initial-terminals rule))
|
||||
(assert-true (terminalset-<= (rule-initial-terminals rule) new-initial-terminals))
|
||||
(setf (rule-initial-terminals rule) new-initial-terminals)
|
||||
(setq changed t))
|
||||
(unless (eq new-derives-epsilon (rule-derives-epsilon rule))
|
||||
(setf (rule-derives-epsilon rule) t)
|
||||
(unless (terminalset-= new-passthrough-terminals (rule-passthrough-terminals rule))
|
||||
(assert-true (terminalset-<= (rule-passthrough-terminals rule) new-passthrough-terminals))
|
||||
(setf (rule-passthrough-terminals rule) new-passthrough-terminals)
|
||||
(setq changed t)))))
|
||||
|
||||
;Compute the parameter-trees entries.
|
||||
@ -1154,7 +1344,7 @@
|
||||
(loop
|
||||
(let ((production (pprint-pop)))
|
||||
(format stream "~@<~:[<epsilon> ~;~:*~{~W ~:_~}~] ~_~vT~vA [~W]~:>"
|
||||
(production-rhs production) *name-print-column*
|
||||
(general-production-rhs-components production) *name-print-column*
|
||||
production-number-width (print-production-number (production-number production))
|
||||
(production-name production)))
|
||||
(pprint-exit-if-list-exhausted)
|
||||
@ -1162,7 +1352,7 @@
|
||||
(pprint-indent :block 2 stream)
|
||||
(when details
|
||||
(format stream "~:@_Initial terminals: ~@_~@<~:[~;<epsilon> ~:_~]~{~W ~:_~}~:>"
|
||||
(rule-derives-epsilon rule)
|
||||
(not (terminalset-empty? (rule-passthrough-terminals rule)))
|
||||
(terminalset-list grammar (rule-initial-terminals rule)))))
|
||||
(pprint-newline :mandatory stream))
|
||||
(pprint-exit-if-list-exhausted)
|
||||
|
@ -378,6 +378,7 @@
|
||||
((:intersection-10 1) (:script "document.write(U_cap)")) ;#x2229
|
||||
((:union-10 1) (:script "document.write(U_cup)")) ;#x222A
|
||||
((:member-10 2) (:script "document.write(U_isin)")) ;#x2208
|
||||
((:not-member-10 2) (:script "document.write(U_notin)")) ;#x2209
|
||||
((:derives-10 2) (:script "document.write(U_rArr)")) ;#x21D2
|
||||
((:left-triangle-bracket-10 1) (:script "document.write(U_lang)")) ;#x2329
|
||||
((:right-triangle-bracket-10 1) (:script "document.write(U_rang)")) ;#x232A
|
||||
@ -443,6 +444,8 @@
|
||||
;Specials
|
||||
(:invisible del)
|
||||
((:but-not 6) (b "except"))
|
||||
((:begin-negative-lookahead 13) "[lookahead" :not-member-10 "{")
|
||||
((:end-negative-lookahead 2) "}]")
|
||||
(:subscript sub)
|
||||
(:superscript sup)
|
||||
(:plain-subscript :subscript)
|
||||
|
@ -388,10 +388,19 @@
|
||||
(defun grammar-singletons (grammar-source)
|
||||
(assert-type grammar-source (list (tuple t (list t) identifier)))
|
||||
(let ((singletons 0))
|
||||
(dolist (production-source grammar-source)
|
||||
(dolist (grammar-symbol (second production-source))
|
||||
(when (characterp grammar-symbol)
|
||||
(setq singletons (charset-add-char singletons grammar-symbol)))))
|
||||
(labels
|
||||
((scan-for-singletons (list)
|
||||
(dolist (element list)
|
||||
(cond
|
||||
((characterp element)
|
||||
(setq singletons (charset-add-char singletons element)))
|
||||
((consp element)
|
||||
(case (first element)
|
||||
(:- (scan-for-singletons (rest element)))
|
||||
(:-- (scan-for-singletons (cddr element)))))))))
|
||||
|
||||
(dolist (production-source grammar-source)
|
||||
(scan-for-singletons (second production-source))))
|
||||
singletons))
|
||||
|
||||
|
||||
@ -551,12 +560,64 @@
|
||||
;;; ------------------------------------------------------------------------------------------------------
|
||||
|
||||
|
||||
; Return a freshly consed list of partitions for the given charclass.
|
||||
(defun charclass-partitions (lexer charclass)
|
||||
(do ((partitions nil)
|
||||
(charset (charclass-charset charclass)))
|
||||
((charset-empty? charset) partitions)
|
||||
(let* ((partition-name (if (charset-infinite? charset)
|
||||
*default-partition-name*
|
||||
(gethash (charset-highest-char charset) (lexer-char-tokens lexer))))
|
||||
(partition-charset (if (characterp partition-name)
|
||||
(char-charset partition-name)
|
||||
(partition-charset (gethash partition-name (lexer-partitions lexer))))))
|
||||
(push partition-name partitions)
|
||||
(setq charset (charset-difference charset partition-charset)))))
|
||||
|
||||
|
||||
; Return an updated grammar-source whose character class nonterminals replaced with sets of
|
||||
; terminals inside :- and :-- constraints.
|
||||
(defun update-constraint-nonterminals (lexer grammar-source)
|
||||
(mapcar
|
||||
#'(lambda (production-source)
|
||||
(let ((rhs (second production-source)))
|
||||
(if (some #'(lambda (rhs-component)
|
||||
(and (consp rhs-component)
|
||||
(member (first rhs-component) '(:- :--))))
|
||||
rhs)
|
||||
(list*
|
||||
(first production-source)
|
||||
(mapcar
|
||||
#'(lambda (component)
|
||||
(when (consp component)
|
||||
(let ((tag (first component)))
|
||||
(when (eq tag ':-)
|
||||
(setq component (list* ':-- (rest component) (rest component)))
|
||||
(setq tag ':--))
|
||||
(when (eq tag ':--)
|
||||
(setq component
|
||||
(list* tag
|
||||
(second component)
|
||||
(mapcan #'(lambda (grammar-symbol)
|
||||
(if (nonterminal? grammar-symbol)
|
||||
(charclass-partitions lexer (assert-non-null (lexer-charclass lexer grammar-symbol)))
|
||||
(list grammar-symbol)))
|
||||
(cddr component)))))))
|
||||
component)
|
||||
rhs)
|
||||
(cddr production-source))
|
||||
production-source)))
|
||||
grammar-source))
|
||||
|
||||
|
||||
; Return two values:
|
||||
; extra grammar productions that define the character class nonterminals out of characters and tokens;
|
||||
; extra commands that:
|
||||
; An updated grammar-source that includes:
|
||||
; grammar productions that define the character class nonterminals out of characters and tokens;
|
||||
; character class nonterminals replaced with sets of terminals inside :- and :-- constraints.
|
||||
; Extra commands that:
|
||||
; define the partitions used in this lexer;
|
||||
; define the actions of these productions.
|
||||
(defun lexer-grammar-and-commands (lexer)
|
||||
(defun lexer-grammar-and-commands (lexer grammar-source)
|
||||
(labels
|
||||
((component-partitions (charset partitions)
|
||||
(if (charset-empty? charset)
|
||||
@ -616,7 +677,7 @@
|
||||
(partition-lexer-actions (gethash partition-name (lexer-partitions lexer)))))
|
||||
(lexer-partition-names lexer))))
|
||||
(values
|
||||
(nreverse productions)
|
||||
(nreconc productions (update-constraint-nonterminals lexer grammar-source))
|
||||
(nconc partition-commands (nreverse commands)))))))
|
||||
|
||||
|
||||
@ -632,9 +693,9 @@
|
||||
; define the actions of these productions.
|
||||
(defun make-lexer-and-grammar (kind charclasses-source lexer-actions-source parametrization start-symbol grammar-source &optional excluded-nonterminals-source)
|
||||
(let ((lexer (make-lexer parametrization charclasses-source lexer-actions-source grammar-source)))
|
||||
(multiple-value-bind (extra-grammar-source extra-commands) (lexer-grammar-and-commands lexer)
|
||||
(multiple-value-bind (lexer-grammar-source extra-commands) (lexer-grammar-and-commands lexer grammar-source)
|
||||
(let ((grammar (make-and-compile-grammar kind parametrization start-symbol
|
||||
(append extra-grammar-source grammar-source) excluded-nonterminals-source)))
|
||||
lexer-grammar-source excluded-nonterminals-source)))
|
||||
(setf (lexer-grammar lexer) grammar)
|
||||
(values lexer extra-commands)))))
|
||||
|
||||
|
@ -35,38 +35,63 @@
|
||||
;Create a laitem for this item and add the association item->laitem to the laitems-hash
|
||||
;hash table if it's not there already. Regardless of whether a new laitem was created,
|
||||
;update the laitem's lookaheads to also include the given lookaheads.
|
||||
;If prev is non-null, update (laitem-propagates prev) to include the laitem if it's not
|
||||
;already included there.
|
||||
;forbidden is a terminalset of terminals that must not occur immediately after the dot in this
|
||||
;laitem. The forbidden set is inherited from constraints in parent laitems in the same state.
|
||||
;If prev is non-null, update (laitem-propagates prev) to include the laitem and the given
|
||||
;passthrough terminalset if it's not already included there.
|
||||
;If a new laitem was created and its first symbol after the dot exists and is a
|
||||
;nonterminal A, recursively close items A->.rhs corresponding to all rhs's in the
|
||||
;grammar's rule for A.
|
||||
((close-item (item lookaheads prev)
|
||||
(let ((laitem (gethash item laitems-hash)))
|
||||
((close-item (item forbidden lookaheads prev passthroughs)
|
||||
(let ((production (item-production item))
|
||||
(laitem (gethash item laitems-hash)))
|
||||
(unless (terminalset-empty? forbidden)
|
||||
(multiple-value-bind (dot-lookaheads dot-passthroughs)
|
||||
(string-initial-terminals grammar (item-unseen item) (production-constraints production) (item-dot item))
|
||||
(let ((dot-initial (terminalset-union dot-lookaheads dot-passthroughs)))
|
||||
;Check whether any terminal can start this item. If not, skip this item altogether.
|
||||
(when (terminalset-empty? (terminalset-difference dot-initial forbidden))
|
||||
;Mark skipped items in the laitems-hash table.
|
||||
(when (and laitem (not (eq laitem 'forbidden)))
|
||||
(error "Two laitems in the same state differing only in forbidden initial terminal constraints: ~S" laitem))
|
||||
(setf (gethash item laitems-hash) 'forbidden)
|
||||
(return-from close-item))
|
||||
;Convert forbidden into a canonical format by removing terminals that cannot begin this item's expansion anyway.
|
||||
(terminalset-intersection-f forbidden dot-initial))))
|
||||
(if laitem
|
||||
(setf (laitem-lookaheads laitem)
|
||||
(terminalset-union (laitem-lookaheads laitem) lookaheads))
|
||||
(progn
|
||||
(unless (terminalset-= forbidden (laitem-forbidden laitem))
|
||||
(error "Two laitems in the same state differing only in forbidden initial terminal constraints: ~S" laitem))
|
||||
(terminalset-union-f (laitem-lookaheads laitem) lookaheads))
|
||||
(let ((item-next-symbol (item-next-symbol item)))
|
||||
(setq laitem (allocate-laitem grammar item lookaheads))
|
||||
(setq laitem (allocate-laitem grammar item forbidden lookaheads))
|
||||
(push laitem laitems)
|
||||
(setf (gethash item laitems-hash) laitem)
|
||||
(when (nonterminal? item-next-symbol)
|
||||
(multiple-value-bind (next-lookaheads epsilon-lookahead)
|
||||
(string-initial-terminals grammar (rest (item-unseen item)))
|
||||
(let ((next-prev (and epsilon-lookahead laitem)))
|
||||
(dolist (production (rule-productions (grammar-rule grammar item-next-symbol)))
|
||||
(close-item (make-item grammar production 0) next-lookaheads next-prev)))))))
|
||||
(let* ((dot (item-dot item))
|
||||
(next-forbidden (terminalset-union forbidden
|
||||
(terminalset-complement (general-production-lookahead-constraint production dot)))))
|
||||
(multiple-value-bind (next-lookaheads next-passthroughs)
|
||||
(string-initial-terminals grammar (rest (item-unseen item)) (production-constraints production) (1+ dot))
|
||||
(let ((next-prev (and (not (terminalset-empty? next-passthroughs)) laitem)))
|
||||
(dolist (production (rule-productions (grammar-rule grammar item-next-symbol)))
|
||||
(close-item (make-item grammar production 0) next-forbidden next-lookaheads next-prev next-passthroughs))))))))
|
||||
(when prev
|
||||
(pushnew laitem (laitem-propagates prev))))))
|
||||
(laitem-add-propagation prev laitem passthroughs)))))
|
||||
|
||||
(dolist (acons kernel-item-alist)
|
||||
(close-item (car acons) initial-lookaheads (and update-propagates (cdr acons))))
|
||||
(close-item (car acons)
|
||||
*empty-terminalset*
|
||||
initial-lookaheads
|
||||
(and update-propagates (cdr acons))
|
||||
*full-terminalset*))
|
||||
(allocate-state number kernel (nreverse laitems)))))
|
||||
|
||||
|
||||
; f is a function that takes two arguments:
|
||||
; a grammar symbol, and
|
||||
; a list of kernel items in order of increasing item number.
|
||||
; a list of pairs (item . prev), where item is a kernel item and prev is a laitem;
|
||||
; f is a function that takes three arguments:
|
||||
; a grammar symbol;
|
||||
; a list of kernel items in order of increasing item number;
|
||||
; a list of pairs (item . prev), where item is a kernel item and prev is a laitem.
|
||||
; For each possible symbol X that can be shifted while in the given state S, call
|
||||
; f giving it S and the list of items that constitute the kernel of that shift's destination
|
||||
; state. The prev's are the sources of the corresponding shifted items.
|
||||
@ -130,9 +155,10 @@
|
||||
(defun propagate-internal-lookaheads (state)
|
||||
(dolist (src-laitem (state-laitems state))
|
||||
(let ((src-lookaheads (laitem-lookaheads src-laitem)))
|
||||
(dolist (dst-laitem (laitem-propagates src-laitem))
|
||||
(setf (laitem-lookaheads dst-laitem)
|
||||
(terminalset-union (laitem-lookaheads dst-laitem) src-lookaheads))))))
|
||||
(dolist (propagation (laitem-propagates src-laitem))
|
||||
(let ((dst-laitem (car propagation))
|
||||
(mask (cdr propagation)))
|
||||
(terminalset-union-f (laitem-lookaheads dst-laitem) (terminalset-intersection src-lookaheads mask)))))))
|
||||
|
||||
|
||||
; Propagate all lookaheads in kernel-item-alist, which must target destination-state.
|
||||
@ -141,8 +167,7 @@
|
||||
(dolist (acons kernel-item-alist)
|
||||
(let ((dest-laitem (state-laitem destination-state (car acons)))
|
||||
(src-laitem (cdr acons)))
|
||||
(setf (laitem-lookaheads dest-laitem)
|
||||
(terminalset-union (laitem-lookaheads dest-laitem) (laitem-lookaheads src-laitem)))))
|
||||
(terminalset-union-f (laitem-lookaheads dest-laitem) (laitem-lookaheads src-laitem))))
|
||||
(setf (gethash destination-state dirty-states) t))
|
||||
|
||||
|
||||
@ -249,7 +274,7 @@
|
||||
(let ((destination-state (gethash kernel lalr-states-hash)))
|
||||
(if destination-state
|
||||
(dolist (acons kernel-item-alist)
|
||||
(pushnew (state-laitem destination-state (car acons)) (laitem-propagates (cdr acons))))
|
||||
(laitem-add-propagation (cdr acons) (state-laitem destination-state (car acons)) *full-terminalset*))
|
||||
(progn
|
||||
(setq destination-state (make-state grammar kernel kernel-item-alist t next-state-number *empty-terminalset*))
|
||||
(setf (gethash kernel lalr-states-hash) destination-state)
|
||||
@ -278,12 +303,14 @@
|
||||
(dolist (dirty-laitem (hash-table-keys dirty-laitems))
|
||||
(remhash dirty-laitem dirty-laitems)
|
||||
(let ((src-lookaheads (laitem-lookaheads dirty-laitem)))
|
||||
(dolist (dst-laitem (laitem-propagates dirty-laitem))
|
||||
(let* ((old-dst-lookaheads (laitem-lookaheads dst-laitem))
|
||||
(new-dst-lookaheads (terminalset-union old-dst-lookaheads src-lookaheads)))
|
||||
(unless (terminalset-= old-dst-lookaheads new-dst-lookaheads)
|
||||
(setf (laitem-lookaheads dst-laitem) new-dst-lookaheads)
|
||||
(setf (gethash dst-laitem dirty-laitems) t)))))))
|
||||
(dolist (propagation (laitem-propagates dirty-laitem))
|
||||
(let ((dst-laitem (car propagation))
|
||||
(mask (cdr propagation)))
|
||||
(let* ((old-dst-lookaheads (laitem-lookaheads dst-laitem))
|
||||
(new-dst-lookaheads (terminalset-union old-dst-lookaheads (terminalset-intersection src-lookaheads mask))))
|
||||
(unless (terminalset-= old-dst-lookaheads new-dst-lookaheads)
|
||||
(setf (laitem-lookaheads dst-laitem) new-dst-lookaheads)
|
||||
(setf (gethash dst-laitem dirty-laitems) t))))))))
|
||||
|
||||
;Erase the propagates chains in all laitems.
|
||||
(dolist (state (grammar-states grammar))
|
||||
@ -306,16 +333,21 @@
|
||||
(dolist (laitem (state-laitems state))
|
||||
(let ((item (laitem-item laitem)))
|
||||
(unless (item-next-symbol item)
|
||||
(if (grammar-symbol-= (item-lhs item) *start-nonterminal*)
|
||||
(when (terminal-in-terminalset grammar *end-marker* (laitem-lookaheads laitem))
|
||||
(push (cons *end-marker* (make-accept-transition))
|
||||
(state-transitions state)))
|
||||
(map-terminalset-reverse
|
||||
#'(lambda (lookahead)
|
||||
(push (cons lookahead (make-reduce-transition (item-production item)))
|
||||
(state-transitions state)))
|
||||
grammar
|
||||
(laitem-lookaheads laitem))))))
|
||||
(let ((lookaheads (terminalset-difference
|
||||
(terminalset-intersection
|
||||
(laitem-lookaheads laitem)
|
||||
(general-production-lookahead-constraint (item-production item) (item-dot item)))
|
||||
(laitem-forbidden laitem))))
|
||||
(if (grammar-symbol-= (item-lhs item) *start-nonterminal*)
|
||||
(when (terminal-in-terminalset grammar *end-marker* lookaheads)
|
||||
(push (cons *end-marker* (make-accept-transition))
|
||||
(state-transitions state)))
|
||||
(map-terminalset-reverse
|
||||
#'(lambda (lookahead)
|
||||
(push (cons lookahead (make-reduce-transition (item-production item)))
|
||||
(state-transitions state)))
|
||||
grammar
|
||||
lookaheads))))))
|
||||
(setf (state-gotos state)
|
||||
(sort (state-gotos state) #'< :key #'(lambda (goto-cons) (state-number (cdr goto-cons)))))
|
||||
(setf (state-transitions state)
|
||||
|
@ -131,6 +131,7 @@
|
||||
((:intersection-10 1) (field (* fldinst "SYMBOL 199 \\f \"Symbol\" \\s 10") (fldrslt :symbol :10-pt)))
|
||||
((:union-10 1) (field (* fldinst "SYMBOL 200 \\f \"Symbol\" \\s 10") (fldrslt :symbol :10-pt)))
|
||||
((:member-10 2) (field (* fldinst "SYMBOL 206 \\f \"Symbol\" \\s 10") (fldrslt :symbol :10-pt)))
|
||||
((:not-member-10 2) (field (* fldinst "SYMBOL 207 \\f \"Symbol\" \\s 10") (fldrslt :symbol :10-pt)))
|
||||
((:derives-10 2) (field (* fldinst "SYMBOL 222 \\f \"Symbol\" \\s 10") (fldrslt :symbol :10-pt)))
|
||||
((:left-triangle-bracket-10 1) (field (* fldinst "SYMBOL 225 \\f \"Symbol\" \\s 10") (fldrslt :symbol :10-pt)))
|
||||
((:right-triangle-bracket-10 1) (field (* fldinst "SYMBOL 241 \\f \"Symbol\" \\s 10") (fldrslt :symbol :10-pt)))
|
||||
@ -315,6 +316,8 @@
|
||||
(:text :english)
|
||||
(:invisible v)
|
||||
((:but-not 6) (b "except"))
|
||||
((:begin-negative-lookahead 13) "[lookahead" :not-member-10 "{")
|
||||
((:end-negative-lookahead 2) "}]")
|
||||
(:subscript sub)
|
||||
(:superscript super)
|
||||
(:plain-subscript b 0 i 0 :subscript)
|
||||
|
@ -28,6 +28,7 @@
|
||||
(deftype terminalset () 'integer)
|
||||
|
||||
(defconstant *empty-terminalset* 0)
|
||||
(defconstant *full-terminalset* -1)
|
||||
|
||||
; Return true if terminalset is empty.
|
||||
(declaim (inline terminalset-empty?))
|
||||
@ -53,12 +54,35 @@
|
||||
(zerop (logand terminalset1 terminalset2)))
|
||||
|
||||
|
||||
; Merge two sets of lookaheads sorted by increasing terminal numbers, eliminating
|
||||
; duplicates. Return the combined set.
|
||||
; Return the complement of the terminalset.
|
||||
(declaim (inline terminalset-complement))
|
||||
(defun terminalset-complement (terminalset)
|
||||
(lognot terminalset))
|
||||
|
||||
|
||||
; Return the intersection of the two terminalsets.
|
||||
(declaim (inline terminalset-intersection))
|
||||
(defun terminalset-intersection (terminalset1 terminalset2)
|
||||
(logand terminalset1 terminalset2))
|
||||
|
||||
(define-modify-macro terminalset-intersection-f (terminalset2) terminalset-intersection)
|
||||
|
||||
|
||||
; Return the union of the two terminalsets.
|
||||
(declaim (inline terminalset-union))
|
||||
(defun terminalset-union (terminalset1 terminalset2)
|
||||
(logior terminalset1 terminalset2))
|
||||
|
||||
(define-modify-macro terminalset-union-f (terminalset2) terminalset-union)
|
||||
|
||||
|
||||
; Return the elements in terminalset1 that are not in terminalset2.
|
||||
(declaim (inline terminalset-difference))
|
||||
(defun terminalset-difference (terminalset1 terminalset2)
|
||||
(logandc2 terminalset1 terminalset2))
|
||||
|
||||
(define-modify-macro terminalset-difference-f (terminalset2) terminalset-difference)
|
||||
|
||||
|
||||
; Return a unique serial number for the given terminal.
|
||||
(declaim (inline terminal-number))
|
||||
@ -119,6 +143,41 @@
|
||||
(code nil)) ;The generated lisp source code that performs the action
|
||||
|
||||
|
||||
;;; ------------------------------------------------------------------------------------------------------
|
||||
;;; CONSTRAINTS
|
||||
|
||||
;;; A constraint modifies the rhs of a production. The constraint applies just before the pos-th
|
||||
;;; general grammar symbol on the rhs of the production.
|
||||
(defstruct (constraint (:constructor nil) (:copier nil) (:predicate constraint?))
|
||||
(pos nil :type integer :read-only t)) ;Position of this constraint; ranges between 0 and length(general-production-rhs), inclusive.
|
||||
|
||||
|
||||
;;; A lookahead-constraint imposes a restriction on the current lookahead terminal when matching
|
||||
;;; against the production. The match succeeds only if there is no following terminal or the following
|
||||
;;; terminal is not present in the lookahead-constraint's list of forbidden terminals.
|
||||
(defstruct (lookahead-constraint (:include constraint)
|
||||
(:constructor make-lookahead-constraint (pos forbidden-terminals source))
|
||||
(:predicate lookahead-constraint?))
|
||||
(forbidden-terminals nil :type list :read-only t) ;List of forbidden terminals
|
||||
(source nil :type list :read-only t) ;List of grammar symbols (terminals or nonterminals) that produced the list of forbidden terminals
|
||||
(terminalset nil :type (or null terminalset))) ;Set of allowed terminals (complement of forbidden-terminals); null until terminals are numbered
|
||||
|
||||
|
||||
; Emit markup for a lookahead-constraint.
|
||||
(defun depict-lookahead-constraint (markup-stream lookahead-constraint)
|
||||
(depict markup-stream :begin-negative-lookahead)
|
||||
(depict-list markup-stream
|
||||
#'depict-production-rhs-component
|
||||
(lookahead-constraint-source lookahead-constraint)
|
||||
:separator ", ")
|
||||
(depict markup-stream :end-negative-lookahead))
|
||||
|
||||
|
||||
(defmethod print-object ((lookahead-constraint lookahead-constraint) stream)
|
||||
(print-unreadable-object (lookahead-constraint stream)
|
||||
(format stream "-~{ ~:_~W~}" (lookahead-constraint-source lookahead-constraint))))
|
||||
|
||||
|
||||
;;; ------------------------------------------------------------------------------------------------------
|
||||
;;; GENERALIZED PRODUCTIONS
|
||||
|
||||
@ -126,6 +185,7 @@
|
||||
(defstruct (general-production (:constructor nil) (:copier nil) (:predicate general-production?))
|
||||
(lhs nil :type general-nonterminal :read-only t) ;The general-nonterminal on the left-hand side of this general-production
|
||||
(rhs nil :type list :read-only t) ;List of general grammar symbols to which that general-nonterminal expands
|
||||
(constraints nil :type list :read-only t) ;List of constraints applying to rhs, sorted by increasing pos values
|
||||
(name nil :read-only t)) ;This general-production's name that will be used to name the parse tree node
|
||||
|
||||
|
||||
@ -137,6 +197,34 @@
|
||||
(generic-production-productions general-production)))
|
||||
|
||||
|
||||
; Return the general-production's rhs with constraints interleaved with the rhs general grammar symbols.
|
||||
(defun general-production-rhs-components (general-production)
|
||||
(labels
|
||||
((merge-constraints (rhs constraints pos)
|
||||
(if constraints
|
||||
(let* ((constraint (first constraints))
|
||||
(constraint-pos (constraint-pos constraint)))
|
||||
(cond
|
||||
((= constraint-pos pos)
|
||||
(cons constraint (merge-constraints rhs (rest constraints) pos)))
|
||||
(rhs
|
||||
(cons (first rhs) (merge-constraints (rest rhs) constraints (1+ pos))))
|
||||
(t (error "Bad constraint list"))))
|
||||
rhs)))
|
||||
|
||||
(merge-constraints (general-production-rhs general-production)
|
||||
(general-production-constraints general-production)
|
||||
0)))
|
||||
|
||||
|
||||
; Return the general-production's lookahead-constraint's terminalset at the given position.
|
||||
(defun general-production-lookahead-constraint (general-production pos)
|
||||
(let ((constraint (find pos (general-production-constraints general-production) :key #'constraint-pos :test #'=)))
|
||||
(if constraint
|
||||
(lookahead-constraint-terminalset constraint)
|
||||
*full-terminalset*)))
|
||||
|
||||
|
||||
; Emit a markup paragraph for the left-hand-side of a general production.
|
||||
(defun depict-general-production-lhs (markup-stream lhs-general-nonterminal)
|
||||
(depict-paragraph (markup-stream ':grammar-lhs)
|
||||
@ -144,6 +232,13 @@
|
||||
(depict markup-stream " " ':derives-10)))
|
||||
|
||||
|
||||
; Emit markup for a production right-hand-side component.
|
||||
(defun depict-production-rhs-component (markup-stream production-rhs-component &optional subscript)
|
||||
(if (lookahead-constraint? production-rhs-component)
|
||||
(depict-lookahead-constraint markup-stream production-rhs-component)
|
||||
(depict-general-grammar-symbol markup-stream production-rhs-component :reference subscript)))
|
||||
|
||||
|
||||
; Emit a markup paragraph for the right-hand-side of a general production.
|
||||
; first is true if this is the first production in a rule.
|
||||
; last is true if this is the last production in a rule.
|
||||
@ -152,45 +247,48 @@
|
||||
(if first
|
||||
(depict markup-stream ':tab3)
|
||||
(depict markup-stream "|" ':tab2))
|
||||
(let ((rhs (general-production-rhs general-production)))
|
||||
(let ((rhs-components (general-production-rhs-components general-production)))
|
||||
(depict-list markup-stream
|
||||
#'(lambda (markup-stream general-grammar-symbol)
|
||||
(depict-general-grammar-symbol markup-stream general-grammar-symbol :reference))
|
||||
rhs
|
||||
#'depict-production-rhs-component
|
||||
rhs-components
|
||||
:separator " "
|
||||
:empty '(:left-angle-quote "empty" :right-angle-quote)))))
|
||||
|
||||
|
||||
; Emit the general production, including both its left and right-hand sides.
|
||||
; Include serial number subscripts on all rhs grammar symbols that both
|
||||
; appear more than once in the rhs or appear in the lhs; and
|
||||
; appear more than once in the rhs or appear in the lhs, and
|
||||
; have symbols that are present in the symbols-with-subscripts list.
|
||||
; link is the lhs's link type.
|
||||
(defun depict-general-production (markup-stream general-production link &optional symbols-with-subscripts)
|
||||
(let ((lhs (general-production-lhs general-production))
|
||||
(rhs (general-production-rhs general-production)))
|
||||
(rhs-components (general-production-rhs-components general-production)))
|
||||
(depict-general-nonterminal markup-stream lhs link)
|
||||
(depict markup-stream " " ':derives-10)
|
||||
(if rhs
|
||||
(if rhs-components
|
||||
(let ((counts-hash (make-hash-table :test *grammar-symbol-=*)))
|
||||
(when symbols-with-subscripts
|
||||
(dolist (symbol symbols-with-subscripts)
|
||||
(setf (gethash symbol counts-hash) 0))
|
||||
(dolist (general-grammar-symbol (cons lhs rhs))
|
||||
(let ((symbol (general-grammar-symbol-symbol general-grammar-symbol)))
|
||||
(when (gethash symbol counts-hash)
|
||||
(incf (gethash symbol counts-hash)))))
|
||||
(dolist (production-rhs-component (cons lhs (general-production-rhs general-production)))
|
||||
(when (general-grammar-symbol? production-rhs-component)
|
||||
(let ((symbol (general-grammar-symbol-symbol production-rhs-component)))
|
||||
(when (gethash symbol counts-hash)
|
||||
(incf (gethash symbol counts-hash))))))
|
||||
(maphash #'(lambda (symbol count)
|
||||
(assert-true (> count 0))
|
||||
(if (> count 1)
|
||||
(setf (gethash symbol counts-hash) 0)
|
||||
(remhash symbol counts-hash)))
|
||||
counts-hash))
|
||||
(dolist (general-grammar-symbol rhs)
|
||||
(let* ((symbol (general-grammar-symbol-symbol general-grammar-symbol))
|
||||
(subscript (and (gethash symbol counts-hash) (incf (gethash symbol counts-hash)))))
|
||||
(dolist (production-rhs-component rhs-components)
|
||||
(let ((subscript nil))
|
||||
(when (general-grammar-symbol? production-rhs-component)
|
||||
(let ((symbol (general-grammar-symbol-symbol production-rhs-component)))
|
||||
(when (gethash symbol counts-hash)
|
||||
(setq subscript (incf (gethash symbol counts-hash))))))
|
||||
(depict-space markup-stream)
|
||||
(depict-general-grammar-symbol markup-stream general-grammar-symbol :reference subscript))))
|
||||
(depict-production-rhs-component markup-stream production-rhs-component subscript))))
|
||||
(depict markup-stream " " ':left-angle-quote "empty" :right-angle-quote))))
|
||||
|
||||
|
||||
@ -198,7 +296,7 @@
|
||||
;;; PRODUCTIONS
|
||||
|
||||
;;; A production describes the expansion of a nonterminal (the lhs) into
|
||||
;;; a string of zero or more grammar symbols (the rhs).
|
||||
;;; a string of zero or more grammar symbols (the rhs); the rhs may also have constraints attached.
|
||||
;;; Each production has a unique number. Earlier productions have smaller numbers.
|
||||
;;; There is exactly one production structure for a given production, so eq can be
|
||||
;;; used to test for production equality.
|
||||
@ -206,7 +304,7 @@
|
||||
;;; The evaluator is a lisp form that evaluates to a function f that takes one argument --
|
||||
;;; the old state of the parser's value stack -- and returns the new state of that stack.
|
||||
(defstruct (production (:include general-production (lhs nil :type nonterminal :read-only t))
|
||||
(:constructor make-production (lhs rhs name rhs-length number))
|
||||
(:constructor make-production (lhs rhs constraints name rhs-length number))
|
||||
(:copier nil) (:predicate production?))
|
||||
(rhs-length nil :type integer :read-only t) ;Number of grammar symbols in the rhs
|
||||
(number nil :type integer :read-only t) ;This production's serial number
|
||||
@ -234,7 +332,7 @@
|
||||
|
||||
(defun print-production (production &optional (stream t))
|
||||
(format stream "~<~W -> ~:I~_~:[<epsilon> ~;~:*~{~W ~:_~}~]~:> ~:_[~W]"
|
||||
(list (production-lhs production) (production-rhs production))
|
||||
(list (production-lhs production) (general-production-rhs-components production))
|
||||
(production-name production)))
|
||||
|
||||
|
||||
@ -251,7 +349,7 @@
|
||||
;;; A generic production is not a production and does not have a number or actions.
|
||||
|
||||
(defstruct (generic-production (:include general-production (lhs nil :type generic-nonterminal :read-only t))
|
||||
(:constructor make-generic-production (lhs rhs name productions))
|
||||
(:constructor make-generic-production (lhs rhs constraints name productions))
|
||||
(:copier nil)
|
||||
(:predicate generic-production?))
|
||||
(productions nil :type list :read-only t)) ;List of instantiations of this generic production
|
||||
@ -265,14 +363,15 @@
|
||||
(if (generic-nonterminal? new-lhs)
|
||||
(make-generic-production
|
||||
new-lhs
|
||||
(mapcar #'(lambda (rhs-general-grammar-symbol)
|
||||
(general-grammar-symbol-substitute attribute argument rhs-general-grammar-symbol))
|
||||
(mapcar #'(lambda (grammar-symbol)
|
||||
(general-grammar-symbol-substitute attribute argument grammar-symbol))
|
||||
(generic-production-rhs generic-production))
|
||||
(general-production-constraints generic-production)
|
||||
(generic-production-name generic-production)
|
||||
(remove-if #'(lambda (production)
|
||||
(not (general-nonterminal-is-instance? grammar-parametrization new-lhs (production-lhs production))))
|
||||
productions))
|
||||
|
||||
|
||||
(assert-non-null (find new-lhs productions :key #'production-lhs :test #'eq)))))
|
||||
|
||||
|
||||
@ -317,14 +416,19 @@
|
||||
;;; A rule is the set of all productions with the same lhs nonterminal.
|
||||
;;; There is exactly one rule structure for a given nonterminal lhs, so eq can be
|
||||
;;; used to test for rule equality.
|
||||
;;;
|
||||
;;; If the rule cannot somehow produce an empty expansion, then passthrough-terminals is empty.
|
||||
;;; Otherwise, passthrough-terminals summarizes the constraints imposed on the next lookahead terminal
|
||||
;;; imposed by all empty expansions of this rule. If these empty expansions do not impose any
|
||||
;;; lookahead-constraints, then passthrough-terminals will be the full set.
|
||||
(defstruct (rule (:include general-rule (productions nil :type list :read-only t))
|
||||
(:constructor make-rule (productions))
|
||||
(:copier nil)
|
||||
(:predicate rule?))
|
||||
(:copier nil)
|
||||
(:predicate rule?))
|
||||
;productions ;The list of all productions for this rule's nonterminal lhs
|
||||
(number nil :type (or null integer)) ;This nonterminal's serial number
|
||||
(derives-epsilon nil :type bool) ;True if some direct or indirect expansion of this nonterminal can return epsilon
|
||||
(initial-terminals *empty-terminalset* :type terminalset)) ;Set of all terminals that can begin some expansion of this nonterminal
|
||||
(passthrough-terminals *empty-terminalset* :type terminalset) ;See above
|
||||
(initial-terminals *empty-terminalset* :type terminalset)) ;Set of all terminals that can begin some expansion of this nonterminal
|
||||
|
||||
|
||||
; Return a list of nonterminals in this rule's rhs.
|
||||
@ -394,6 +498,12 @@
|
||||
(production-lhs (item-production item)))
|
||||
|
||||
|
||||
; Return the constraints of the item's production.
|
||||
(declaim (inline item-constraints))
|
||||
(defun item-constraints (item)
|
||||
(production-constraints (item-production item)))
|
||||
|
||||
|
||||
; Make an item with the given production and dot location (which must be an integer
|
||||
; between 0 and length(rhs(production)), inclusive. Reuse an existing item in the
|
||||
; grammar if possible.
|
||||
@ -412,18 +522,27 @@
|
||||
(allocate-item production dot unseen number next)))))))
|
||||
|
||||
|
||||
(defun print-item (item &optional (stream t))
|
||||
(defun print-item (item &optional (stream t) after-dot)
|
||||
(let ((production (item-production item)))
|
||||
(format stream "~W -> ~:_" (production-lhs production))
|
||||
(pprint-logical-block (stream (production-rhs production))
|
||||
(do ((n (item-dot item) (1- n))
|
||||
(do ((pos 0 (1+ pos))
|
||||
(constraints (production-constraints production))
|
||||
(first t))
|
||||
()
|
||||
(when (zerop n)
|
||||
(when (= pos (item-dot item))
|
||||
(if first
|
||||
(setq first nil)
|
||||
(format stream " ~:_"))
|
||||
(write-char #\. stream))
|
||||
(write-char #\. stream)
|
||||
(when after-dot
|
||||
(format stream " ~:_~W" after-dot)))
|
||||
(do ()
|
||||
((or (endp constraints) (/= (constraint-pos (first constraints)) pos)))
|
||||
(if first
|
||||
(setq first nil)
|
||||
(format stream " ~:_"))
|
||||
(write (pop constraints) :stream stream))
|
||||
(pprint-exit-if-list-exhausted)
|
||||
(if first
|
||||
(setq first nil)
|
||||
@ -441,27 +560,46 @@
|
||||
|
||||
;;; A laitem is an item with associated lookahead information.
|
||||
;;; Unlike items, laitem structures are not shared among the states.
|
||||
(defstruct (laitem (:constructor allocate-laitem (grammar item lookaheads))
|
||||
(defstruct (laitem (:constructor allocate-laitem (grammar item forbidden lookaheads))
|
||||
(:copier nil)
|
||||
(:predicate laitem?))
|
||||
(grammar nil :type grammar :read-only t) ;The grammar to which this laitem belongs
|
||||
(item nil :type item :read-only t) ;The item to which this laitem corresponds
|
||||
(forbidden nil :type terminalset :read-only t) ;A set of terminals that must not occur after the dot because of lookahead-constraints
|
||||
(lookaheads nil :type terminalset) ;Set of lookahead terminals
|
||||
(propagates nil :type list)) ;List of laitems to which lookaheads propagate from this laitem (see note below)
|
||||
;When parsing a LALR(1) grammar, propagates is the list of all laitems (in this and other states)
|
||||
;to which lookaheads propagate from this laitem.
|
||||
(propagates nil :type list)) ;List of (laitem . mask) to which lookaheads propagate from this laitem (see note below)
|
||||
;When parsing a LALR(1) grammar, propagates contains all laitems (in this and other states)
|
||||
;to which lookaheads propagate from this laitem. The mask in each entry of propagates
|
||||
;is a terminalset that indicates which lookaheads can propagate; this is usually *full-terminalset*
|
||||
;but can be smaller in the presence of constraints.
|
||||
;When parsing a LR(1) grammar, propagates is the list of all laitems to which lookaheads propagate from this laitem
|
||||
;without following a shift transition. Such laitems must necessarily be in the same state. In the LR(1) case each
|
||||
;laitem listed in the propagates list must come after this laitem in this state's laitems list.
|
||||
|
||||
|
||||
; Add or modify a propagation entry in src-laitem to point to dst-laitem with
|
||||
; the given mask.
|
||||
(defun laitem-add-propagation (src-laitem dst-laitem mask)
|
||||
(let ((propagation-entry (assoc dst-laitem (laitem-propagates src-laitem))))
|
||||
(if propagation-entry
|
||||
(terminalset-union-f (cdr propagation-entry) mask)
|
||||
(push (cons dst-laitem mask) (laitem-propagates src-laitem)))))
|
||||
|
||||
|
||||
(defvar *lookahead-print-column* 70)
|
||||
|
||||
(defun print-laitem (laitem &optional (stream t))
|
||||
(print-item (laitem-item laitem) stream)
|
||||
(format stream " ~vT~_" *lookahead-print-column*)
|
||||
(pprint-logical-block (stream nil :prefix "{" :suffix "}")
|
||||
(print-terminalset (laitem-grammar laitem) (laitem-lookaheads laitem) stream)))
|
||||
(let* ((grammar (laitem-grammar laitem))
|
||||
(item (laitem-item laitem))
|
||||
(forbidden (laitem-forbidden laitem))
|
||||
(forbidden-as-constraint
|
||||
(and (not (terminalset-empty? forbidden))
|
||||
(let ((forbidden-terminals (terminalset-list grammar forbidden)))
|
||||
(make-lookahead-constraint (item-dot item) forbidden-terminals forbidden-terminals)))))
|
||||
(print-item item stream forbidden-as-constraint)
|
||||
(format stream " ~vT~_" *lookahead-print-column*)
|
||||
(pprint-logical-block (stream nil :prefix "{" :suffix "}")
|
||||
(print-terminalset grammar (laitem-lookaheads laitem) stream))))
|
||||
|
||||
(defmethod print-object ((laitem laitem) stream)
|
||||
(print-unreadable-object (laitem stream)
|
||||
@ -695,14 +833,14 @@
|
||||
((substitute-argument-with (attribute subtree)
|
||||
(ecase (first subtree)
|
||||
(:rule
|
||||
(let* ((general-rule (second subtree))
|
||||
(lhs (general-rule-lhs general-rule))
|
||||
(new-lhs (general-grammar-symbol-substitute attribute argument lhs)))
|
||||
(assert-true (generic-rule? general-rule))
|
||||
(list ':rule
|
||||
(if (generic-nonterminal? new-lhs)
|
||||
(generic-rule-substitute grammar attribute argument general-rule)
|
||||
(grammar-rule grammar new-lhs)))))
|
||||
(let* ((general-rule (second subtree))
|
||||
(lhs (general-rule-lhs general-rule))
|
||||
(new-lhs (general-grammar-symbol-substitute attribute argument lhs)))
|
||||
(assert-true (generic-rule? general-rule))
|
||||
(list ':rule
|
||||
(if (generic-nonterminal? new-lhs)
|
||||
(generic-rule-substitute grammar attribute argument general-rule)
|
||||
(grammar-rule grammar new-lhs)))))
|
||||
(:argument
|
||||
(list ':argument
|
||||
(second subtree)
|
||||
@ -841,7 +979,7 @@
|
||||
(terminal-actions nil :type hash-table :read-only t) ;Hash table of terminal -> list of (action-symbol . action-function-or-nil)
|
||||
(rules nil :type hash-table :read-only t) ;Hash table of nonterminal -> rule
|
||||
(parameter-trees nil :type hash-table :read-only t) ;Hash table of nonterminal-symbol -> parameter-tree
|
||||
(max-production-length nil :type integer :read-only t) ;Maximum number of grammar symbols on the rhs of a production
|
||||
(max-production-length nil :type integer :read-only t) ;Maximum number of grammar symbols in the rhs of a production
|
||||
(general-productions nil :type hash-table :read-only t);Hash table of production-name -> general-production
|
||||
(n-productions nil :type integer :read-only t) ;Number of productions in the grammar
|
||||
;The following fields are used for the parser.
|
||||
@ -904,41 +1042,57 @@
|
||||
(assert-non-null (nth n (grammar-states grammar))))
|
||||
|
||||
|
||||
; Return true if symbol ==>* epsilon.
|
||||
(defun symbol-derives-epsilon (grammar symbol)
|
||||
(assert-type symbol grammar-symbol)
|
||||
(and (nonterminal? symbol)
|
||||
(rule-derives-epsilon (grammar-rule grammar symbol))))
|
||||
|
||||
|
||||
; Return the terminalset of all terminals a that satisfy
|
||||
; symbol ==>* a rest,
|
||||
; Return two values:
|
||||
; a generate terminalset G;
|
||||
; a passthrough terminalset P.
|
||||
;
|
||||
; G is the terminalset of all terminals x that satisfy
|
||||
; symbol ==>* x rest,
|
||||
; where rest is an arbitrary string of grammar symbols.
|
||||
; P is the terminalset of all terminals x that satisfy
|
||||
; symbol x ==> x
|
||||
(defun symbol-initial-terminals (grammar symbol)
|
||||
(assert-type symbol grammar-symbol)
|
||||
(if (nonterminal? symbol)
|
||||
(rule-initial-terminals (grammar-rule grammar symbol))
|
||||
(make-terminalset grammar symbol)))
|
||||
(let ((rule (grammar-rule grammar symbol)))
|
||||
(values (rule-initial-terminals rule) (rule-passthrough-terminals rule)))
|
||||
(values (make-terminalset grammar symbol) *empty-terminalset*)))
|
||||
|
||||
|
||||
; Given symbol-string, an arbitrary string of grammar symbols,
|
||||
; return two values: a terminalset S and a boolean B.
|
||||
; S is the terminalset of all terminals a that satisfy
|
||||
; symbol-string ==>* a rest,
|
||||
; Given an arbitrary string of grammar symbols, a list of constraints, and an initial position,
|
||||
; return two values:
|
||||
; a generate terminalset G;
|
||||
; a passthrough terminalset P.
|
||||
;
|
||||
; G is the terminalset of all terminals x that satisfy
|
||||
; symbol-string ==>* x rest,
|
||||
; where rest is an arbitrary string of grammar symbols.
|
||||
; B is true if symbol-string ==>* epsilon.
|
||||
(defun string-initial-terminals (grammar symbol-string)
|
||||
(let ((initial-terminals *empty-terminalset*)
|
||||
(derives-epsilon nil))
|
||||
(dolist (element symbol-string (setq derives-epsilon t))
|
||||
(setq initial-terminals (terminalset-union initial-terminals (symbol-initial-terminals grammar element)))
|
||||
(unless (symbol-derives-epsilon grammar element)
|
||||
(return)))
|
||||
(values initial-terminals derives-epsilon)))
|
||||
; P is the terminalset of all terminals x that satisfy
|
||||
; symbol-string x ==> x
|
||||
;
|
||||
; The constraints' positions are relative to the given initial position, which specifies the position
|
||||
; of the first grammar-symbol in the symbol-string. The constraints must be listed in order of
|
||||
; increasing positions.
|
||||
(defun string-initial-terminals (grammar symbol-string constraints pos)
|
||||
(do ((symbol-string symbol-string (cdr symbol-string))
|
||||
(initial-terminals *empty-terminalset*)
|
||||
(passthrough-terminals *full-terminalset*))
|
||||
((terminalset-empty? passthrough-terminals) (values initial-terminals *empty-terminalset*))
|
||||
(let ((constraint (find pos constraints :key #'constraint-pos :test #'=)))
|
||||
(when constraint
|
||||
(terminalset-intersection-f passthrough-terminals (lookahead-constraint-terminalset constraint))))
|
||||
(if symbol-string
|
||||
(multiple-value-bind (generate passthrough) (symbol-initial-terminals grammar (first symbol-string))
|
||||
(terminalset-union-f initial-terminals (terminalset-intersection passthrough-terminals generate))
|
||||
(terminalset-intersection-f passthrough-terminals passthrough))
|
||||
(return (values initial-terminals passthrough-terminals)))))
|
||||
|
||||
|
||||
; Intern attributed or generic nonterminals in the production's lhs and rhs. Return the
|
||||
; resulting production source.
|
||||
; Intern attributed or generic nonterminals in the production's lhs and rhs. Also replace
|
||||
; (:- <terminal> ... <terminal>) or (:-- (<grammar-symbol> ... <grammar-symbol>) <terminal> ... <terminal>)
|
||||
; sublists in the rhs with lookahead-constraints and put these, in order, after the third element of
|
||||
; the returned list.
|
||||
; Return the resulting production source.
|
||||
(defun intern-production-source (grammar-parametrization production-source)
|
||||
(assert-type production-source (tuple (or user-nonterminal cons) (list (or user-grammar-symbol cons)) identifier))
|
||||
(let ((production-lhs-source (first production-source))
|
||||
@ -946,11 +1100,24 @@
|
||||
(production-name (third production-source)))
|
||||
(if (or (consp production-lhs-source) (some #'consp production-rhs-source))
|
||||
(multiple-value-bind (lhs-nonterminal lhs-arguments) (grammar-parametrization-intern grammar-parametrization production-lhs-source)
|
||||
(list lhs-nonterminal
|
||||
(mapcar #'(lambda (grammar-symbol-source)
|
||||
(grammar-parametrization-intern grammar-parametrization grammar-symbol-source lhs-arguments))
|
||||
production-rhs-source)
|
||||
production-name))
|
||||
(let ((rhs nil)
|
||||
(constraints nil)
|
||||
(pos 0))
|
||||
(dolist (component-source production-rhs-source)
|
||||
(cond
|
||||
((and (consp component-source) (eq (first component-source) ':-))
|
||||
(let ((lookaheads (rest component-source)))
|
||||
(push
|
||||
(make-lookahead-constraint pos (assert-non-null lookaheads) lookaheads)
|
||||
constraints)))
|
||||
((and (consp component-source) (eq (first component-source) ':--))
|
||||
(let ((lookaheads (rest component-source)))
|
||||
(push
|
||||
(make-lookahead-constraint pos (assert-non-null (rest lookaheads)) (assert-non-null (first lookaheads)))
|
||||
constraints)))
|
||||
(t (push (grammar-parametrization-intern grammar-parametrization component-source lhs-arguments) rhs)))
|
||||
(incf pos))
|
||||
(list* lhs-nonterminal (nreverse rhs) production-name (nreverse constraints))))
|
||||
production-source)))
|
||||
|
||||
|
||||
@ -966,6 +1133,13 @@
|
||||
; nonterminal can have attributes, thereby designating a specialization instead of a fully
|
||||
; generic production.
|
||||
;
|
||||
; The rhs can also contain lookahead constraints of the form
|
||||
; (:- <terminal> ... <terminal>)
|
||||
; which indicate that the following terminal must not be one of the listed terminals. The form
|
||||
; (:-- (<grammar-symbol> ... <grammar-symbol>) <terminal> ... <terminal>)
|
||||
; does the same thing except that it prints the grammar-symbols instead of the terminals when
|
||||
; the production is printed.
|
||||
;
|
||||
; excluded-nonterminals-source is a list of nonterminals not used in the grammar. Productions,
|
||||
; including productions expanded from generic productions, that have one of these nonterminals
|
||||
; on the lhs are ignored.
|
||||
@ -979,7 +1153,8 @@
|
||||
(general-productions (make-hash-table :test #'equal))
|
||||
(production-number 0)
|
||||
(max-production-length 1)
|
||||
(excluded-nonterminals-hash (make-hash-table :test *grammar-symbol-=*)))
|
||||
(excluded-nonterminals-hash (make-hash-table :test *grammar-symbol-=*))
|
||||
(lookahead-constraints nil))
|
||||
|
||||
;Set up excluded-nonterminals-hash. The values of the hash table are either :seen or :unseen
|
||||
;depending on whether a production with the particular nonterminal has been seen yet.
|
||||
@ -989,12 +1164,12 @@
|
||||
|
||||
;Create the starting production: *start-nonterminal* ==> start-symbol
|
||||
(setf (gethash *start-nonterminal* rules)
|
||||
(list (make-production *start-nonterminal* (list start-symbol) nil 1 0)))
|
||||
(list (make-production *start-nonterminal* (list start-symbol) nil nil 1 0)))
|
||||
|
||||
;Create the rest of the productions.
|
||||
(flet
|
||||
((create-production (lhs rhs name)
|
||||
(let ((production (make-production lhs rhs name (length rhs) (incf production-number))))
|
||||
((create-production (lhs rhs constraints name)
|
||||
(let ((production (make-production lhs rhs constraints name (length rhs) (incf production-number))))
|
||||
(push production (gethash lhs rules))
|
||||
(dolist (rhs-terminal (production-terminals production))
|
||||
(setf (gethash rhs-terminal terminals-hash) t))
|
||||
@ -1008,6 +1183,7 @@
|
||||
(let* ((production-lhs (first production-source))
|
||||
(production-rhs (second production-source))
|
||||
(production-name (third production-source))
|
||||
(production-constraints (cdddr production-source))
|
||||
(lhs-arguments (general-grammar-symbol-arguments production-lhs)))
|
||||
(setq max-production-length (max max-production-length (length production-rhs)))
|
||||
(when (gethash production-name general-productions)
|
||||
@ -1024,16 +1200,17 @@
|
||||
(mapcar #'(lambda (general-grammar-symbol)
|
||||
(instantiate-general-grammar-symbol bound-argument-alist general-grammar-symbol))
|
||||
production-rhs)
|
||||
production-constraints
|
||||
production-name)
|
||||
productions))))
|
||||
lhs-arguments)
|
||||
(when productions
|
||||
(setf (gethash production-name general-productions)
|
||||
(make-generic-production production-lhs production-rhs production-name (nreverse productions)))))
|
||||
(make-generic-production production-lhs production-rhs production-constraints production-name (nreverse productions)))))
|
||||
|
||||
(unless (nonterminal-excluded production-lhs)
|
||||
(setf (gethash production-name general-productions)
|
||||
(create-production production-lhs production-rhs production-name)))))))
|
||||
(create-production production-lhs production-rhs production-constraints production-name)))))))
|
||||
|
||||
|
||||
;Change all values of the rules hash table to contain rule structures
|
||||
@ -1042,9 +1219,16 @@
|
||||
(maphash
|
||||
#'(lambda (rule-lhs rule-productions)
|
||||
(dolist (rule-production rule-productions)
|
||||
(dolist (rhs-nonterminal (production-nonterminals rule-production))
|
||||
(unless (gethash rhs-nonterminal rules)
|
||||
(error "Nonterminal ~S used but not defined" rhs-nonterminal))))
|
||||
(dolist (grammar-symbol (production-rhs rule-production))
|
||||
(when (nonterminal? grammar-symbol)
|
||||
(unless (gethash grammar-symbol rules)
|
||||
(error "Nonterminal ~S used but not defined" grammar-symbol))))
|
||||
(dolist (constraint (production-constraints rule-production))
|
||||
(push constraint lookahead-constraints)
|
||||
(dolist (lookahead-terminal (lookahead-constraint-forbidden-terminals constraint))
|
||||
(unless (gethash lookahead-terminal terminals-hash)
|
||||
(warn "Lookahead terminal ~S not used in main grammar" lookahead-terminal)
|
||||
(setf (gethash lookahead-terminal terminals-hash) t)))))
|
||||
(setf (gethash rule-lhs rules)
|
||||
(make-rule (nreverse rule-productions))))
|
||||
rules)
|
||||
@ -1086,27 +1270,33 @@
|
||||
:n-productions production-number
|
||||
:items-hash (make-hash-table :test #'equal))))
|
||||
|
||||
;Compute the values of derives-epsilon and initial-terminals in each rule.
|
||||
;Compute the terminalsets in the lookahead-constraints.
|
||||
(dolist (lookahead-constraint lookahead-constraints)
|
||||
(let ((terminalset *full-terminalset*))
|
||||
(dolist (forbidden-terminal (lookahead-constraint-forbidden-terminals lookahead-constraint))
|
||||
(terminalset-difference-f terminalset (make-terminalset grammar forbidden-terminal)))
|
||||
(setf (lookahead-constraint-terminalset lookahead-constraint) terminalset)))
|
||||
|
||||
;Compute the values of passthrough-terminals and initial-terminals in each rule.
|
||||
(do ((changed t))
|
||||
((not changed))
|
||||
(setq changed nil)
|
||||
(dolist (nonterminal (grammar-nonterminals-list grammar))
|
||||
(let ((rule (grammar-rule grammar nonterminal))
|
||||
(new-initial-terminals *empty-terminalset*)
|
||||
(new-derives-epsilon nil))
|
||||
(new-passthrough-terminals *empty-terminalset*))
|
||||
(dolist (production (rule-productions rule))
|
||||
(multiple-value-bind (production-initial-terminals production-derives-epsilon)
|
||||
(string-initial-terminals grammar (production-rhs production))
|
||||
(setq new-initial-terminals (terminalset-union new-initial-terminals production-initial-terminals))
|
||||
(when production-derives-epsilon
|
||||
(setq new-derives-epsilon t))))
|
||||
(assert-true (or new-derives-epsilon (not (rule-derives-epsilon rule))))
|
||||
(assert-true (terminalset-<= (rule-initial-terminals rule) new-initial-terminals))
|
||||
(multiple-value-bind (production-initial-terminals production-passthrough-terminals)
|
||||
(string-initial-terminals grammar (production-rhs production) (production-constraints production) 0)
|
||||
(terminalset-union-f new-initial-terminals production-initial-terminals)
|
||||
(terminalset-union-f new-passthrough-terminals production-passthrough-terminals)))
|
||||
(unless (terminalset-= new-initial-terminals (rule-initial-terminals rule))
|
||||
(assert-true (terminalset-<= (rule-initial-terminals rule) new-initial-terminals))
|
||||
(setf (rule-initial-terminals rule) new-initial-terminals)
|
||||
(setq changed t))
|
||||
(unless (eq new-derives-epsilon (rule-derives-epsilon rule))
|
||||
(setf (rule-derives-epsilon rule) t)
|
||||
(unless (terminalset-= new-passthrough-terminals (rule-passthrough-terminals rule))
|
||||
(assert-true (terminalset-<= (rule-passthrough-terminals rule) new-passthrough-terminals))
|
||||
(setf (rule-passthrough-terminals rule) new-passthrough-terminals)
|
||||
(setq changed t)))))
|
||||
|
||||
;Compute the parameter-trees entries.
|
||||
@ -1154,7 +1344,7 @@
|
||||
(loop
|
||||
(let ((production (pprint-pop)))
|
||||
(format stream "~@<~:[<epsilon> ~;~:*~{~W ~:_~}~] ~_~vT~vA [~W]~:>"
|
||||
(production-rhs production) *name-print-column*
|
||||
(general-production-rhs-components production) *name-print-column*
|
||||
production-number-width (print-production-number (production-number production))
|
||||
(production-name production)))
|
||||
(pprint-exit-if-list-exhausted)
|
||||
@ -1162,7 +1352,7 @@
|
||||
(pprint-indent :block 2 stream)
|
||||
(when details
|
||||
(format stream "~:@_Initial terminals: ~@_~@<~:[~;<epsilon> ~:_~]~{~W ~:_~}~:>"
|
||||
(rule-derives-epsilon rule)
|
||||
(not (terminalset-empty? (rule-passthrough-terminals rule)))
|
||||
(terminalset-list grammar (rule-initial-terminals rule)))))
|
||||
(pprint-newline :mandatory stream))
|
||||
(pprint-exit-if-list-exhausted)
|
||||
|
@ -378,6 +378,7 @@
|
||||
((:intersection-10 1) (:script "document.write(U_cap)")) ;#x2229
|
||||
((:union-10 1) (:script "document.write(U_cup)")) ;#x222A
|
||||
((:member-10 2) (:script "document.write(U_isin)")) ;#x2208
|
||||
((:not-member-10 2) (:script "document.write(U_notin)")) ;#x2209
|
||||
((:derives-10 2) (:script "document.write(U_rArr)")) ;#x21D2
|
||||
((:left-triangle-bracket-10 1) (:script "document.write(U_lang)")) ;#x2329
|
||||
((:right-triangle-bracket-10 1) (:script "document.write(U_rang)")) ;#x232A
|
||||
@ -443,6 +444,8 @@
|
||||
;Specials
|
||||
(:invisible del)
|
||||
((:but-not 6) (b "except"))
|
||||
((:begin-negative-lookahead 13) "[lookahead" :not-member-10 "{")
|
||||
((:end-negative-lookahead 2) "}]")
|
||||
(:subscript sub)
|
||||
(:superscript sup)
|
||||
(:plain-subscript :subscript)
|
||||
|
@ -388,10 +388,19 @@
|
||||
(defun grammar-singletons (grammar-source)
|
||||
(assert-type grammar-source (list (tuple t (list t) identifier)))
|
||||
(let ((singletons 0))
|
||||
(dolist (production-source grammar-source)
|
||||
(dolist (grammar-symbol (second production-source))
|
||||
(when (characterp grammar-symbol)
|
||||
(setq singletons (charset-add-char singletons grammar-symbol)))))
|
||||
(labels
|
||||
((scan-for-singletons (list)
|
||||
(dolist (element list)
|
||||
(cond
|
||||
((characterp element)
|
||||
(setq singletons (charset-add-char singletons element)))
|
||||
((consp element)
|
||||
(case (first element)
|
||||
(:- (scan-for-singletons (rest element)))
|
||||
(:-- (scan-for-singletons (cddr element)))))))))
|
||||
|
||||
(dolist (production-source grammar-source)
|
||||
(scan-for-singletons (second production-source))))
|
||||
singletons))
|
||||
|
||||
|
||||
@ -551,12 +560,64 @@
|
||||
;;; ------------------------------------------------------------------------------------------------------
|
||||
|
||||
|
||||
; Return a freshly consed list of partitions for the given charclass.
|
||||
(defun charclass-partitions (lexer charclass)
|
||||
(do ((partitions nil)
|
||||
(charset (charclass-charset charclass)))
|
||||
((charset-empty? charset) partitions)
|
||||
(let* ((partition-name (if (charset-infinite? charset)
|
||||
*default-partition-name*
|
||||
(gethash (charset-highest-char charset) (lexer-char-tokens lexer))))
|
||||
(partition-charset (if (characterp partition-name)
|
||||
(char-charset partition-name)
|
||||
(partition-charset (gethash partition-name (lexer-partitions lexer))))))
|
||||
(push partition-name partitions)
|
||||
(setq charset (charset-difference charset partition-charset)))))
|
||||
|
||||
|
||||
; Return an updated grammar-source whose character class nonterminals replaced with sets of
|
||||
; terminals inside :- and :-- constraints.
|
||||
(defun update-constraint-nonterminals (lexer grammar-source)
|
||||
(mapcar
|
||||
#'(lambda (production-source)
|
||||
(let ((rhs (second production-source)))
|
||||
(if (some #'(lambda (rhs-component)
|
||||
(and (consp rhs-component)
|
||||
(member (first rhs-component) '(:- :--))))
|
||||
rhs)
|
||||
(list*
|
||||
(first production-source)
|
||||
(mapcar
|
||||
#'(lambda (component)
|
||||
(when (consp component)
|
||||
(let ((tag (first component)))
|
||||
(when (eq tag ':-)
|
||||
(setq component (list* ':-- (rest component) (rest component)))
|
||||
(setq tag ':--))
|
||||
(when (eq tag ':--)
|
||||
(setq component
|
||||
(list* tag
|
||||
(second component)
|
||||
(mapcan #'(lambda (grammar-symbol)
|
||||
(if (nonterminal? grammar-symbol)
|
||||
(charclass-partitions lexer (assert-non-null (lexer-charclass lexer grammar-symbol)))
|
||||
(list grammar-symbol)))
|
||||
(cddr component)))))))
|
||||
component)
|
||||
rhs)
|
||||
(cddr production-source))
|
||||
production-source)))
|
||||
grammar-source))
|
||||
|
||||
|
||||
; Return two values:
|
||||
; extra grammar productions that define the character class nonterminals out of characters and tokens;
|
||||
; extra commands that:
|
||||
; An updated grammar-source that includes:
|
||||
; grammar productions that define the character class nonterminals out of characters and tokens;
|
||||
; character class nonterminals replaced with sets of terminals inside :- and :-- constraints.
|
||||
; Extra commands that:
|
||||
; define the partitions used in this lexer;
|
||||
; define the actions of these productions.
|
||||
(defun lexer-grammar-and-commands (lexer)
|
||||
(defun lexer-grammar-and-commands (lexer grammar-source)
|
||||
(labels
|
||||
((component-partitions (charset partitions)
|
||||
(if (charset-empty? charset)
|
||||
@ -616,7 +677,7 @@
|
||||
(partition-lexer-actions (gethash partition-name (lexer-partitions lexer)))))
|
||||
(lexer-partition-names lexer))))
|
||||
(values
|
||||
(nreverse productions)
|
||||
(nreconc productions (update-constraint-nonterminals lexer grammar-source))
|
||||
(nconc partition-commands (nreverse commands)))))))
|
||||
|
||||
|
||||
@ -632,9 +693,9 @@
|
||||
; define the actions of these productions.
|
||||
(defun make-lexer-and-grammar (kind charclasses-source lexer-actions-source parametrization start-symbol grammar-source &optional excluded-nonterminals-source)
|
||||
(let ((lexer (make-lexer parametrization charclasses-source lexer-actions-source grammar-source)))
|
||||
(multiple-value-bind (extra-grammar-source extra-commands) (lexer-grammar-and-commands lexer)
|
||||
(multiple-value-bind (lexer-grammar-source extra-commands) (lexer-grammar-and-commands lexer grammar-source)
|
||||
(let ((grammar (make-and-compile-grammar kind parametrization start-symbol
|
||||
(append extra-grammar-source grammar-source) excluded-nonterminals-source)))
|
||||
lexer-grammar-source excluded-nonterminals-source)))
|
||||
(setf (lexer-grammar lexer) grammar)
|
||||
(values lexer extra-commands)))))
|
||||
|
||||
|
@ -35,38 +35,63 @@
|
||||
;Create a laitem for this item and add the association item->laitem to the laitems-hash
|
||||
;hash table if it's not there already. Regardless of whether a new laitem was created,
|
||||
;update the laitem's lookaheads to also include the given lookaheads.
|
||||
;If prev is non-null, update (laitem-propagates prev) to include the laitem if it's not
|
||||
;already included there.
|
||||
;forbidden is a terminalset of terminals that must not occur immediately after the dot in this
|
||||
;laitem. The forbidden set is inherited from constraints in parent laitems in the same state.
|
||||
;If prev is non-null, update (laitem-propagates prev) to include the laitem and the given
|
||||
;passthrough terminalset if it's not already included there.
|
||||
;If a new laitem was created and its first symbol after the dot exists and is a
|
||||
;nonterminal A, recursively close items A->.rhs corresponding to all rhs's in the
|
||||
;grammar's rule for A.
|
||||
((close-item (item lookaheads prev)
|
||||
(let ((laitem (gethash item laitems-hash)))
|
||||
((close-item (item forbidden lookaheads prev passthroughs)
|
||||
(let ((production (item-production item))
|
||||
(laitem (gethash item laitems-hash)))
|
||||
(unless (terminalset-empty? forbidden)
|
||||
(multiple-value-bind (dot-lookaheads dot-passthroughs)
|
||||
(string-initial-terminals grammar (item-unseen item) (production-constraints production) (item-dot item))
|
||||
(let ((dot-initial (terminalset-union dot-lookaheads dot-passthroughs)))
|
||||
;Check whether any terminal can start this item. If not, skip this item altogether.
|
||||
(when (terminalset-empty? (terminalset-difference dot-initial forbidden))
|
||||
;Mark skipped items in the laitems-hash table.
|
||||
(when (and laitem (not (eq laitem 'forbidden)))
|
||||
(error "Two laitems in the same state differing only in forbidden initial terminal constraints: ~S" laitem))
|
||||
(setf (gethash item laitems-hash) 'forbidden)
|
||||
(return-from close-item))
|
||||
;Convert forbidden into a canonical format by removing terminals that cannot begin this item's expansion anyway.
|
||||
(terminalset-intersection-f forbidden dot-initial))))
|
||||
(if laitem
|
||||
(setf (laitem-lookaheads laitem)
|
||||
(terminalset-union (laitem-lookaheads laitem) lookaheads))
|
||||
(progn
|
||||
(unless (terminalset-= forbidden (laitem-forbidden laitem))
|
||||
(error "Two laitems in the same state differing only in forbidden initial terminal constraints: ~S" laitem))
|
||||
(terminalset-union-f (laitem-lookaheads laitem) lookaheads))
|
||||
(let ((item-next-symbol (item-next-symbol item)))
|
||||
(setq laitem (allocate-laitem grammar item lookaheads))
|
||||
(setq laitem (allocate-laitem grammar item forbidden lookaheads))
|
||||
(push laitem laitems)
|
||||
(setf (gethash item laitems-hash) laitem)
|
||||
(when (nonterminal? item-next-symbol)
|
||||
(multiple-value-bind (next-lookaheads epsilon-lookahead)
|
||||
(string-initial-terminals grammar (rest (item-unseen item)))
|
||||
(let ((next-prev (and epsilon-lookahead laitem)))
|
||||
(dolist (production (rule-productions (grammar-rule grammar item-next-symbol)))
|
||||
(close-item (make-item grammar production 0) next-lookaheads next-prev)))))))
|
||||
(let* ((dot (item-dot item))
|
||||
(next-forbidden (terminalset-union forbidden
|
||||
(terminalset-complement (general-production-lookahead-constraint production dot)))))
|
||||
(multiple-value-bind (next-lookaheads next-passthroughs)
|
||||
(string-initial-terminals grammar (rest (item-unseen item)) (production-constraints production) (1+ dot))
|
||||
(let ((next-prev (and (not (terminalset-empty? next-passthroughs)) laitem)))
|
||||
(dolist (production (rule-productions (grammar-rule grammar item-next-symbol)))
|
||||
(close-item (make-item grammar production 0) next-forbidden next-lookaheads next-prev next-passthroughs))))))))
|
||||
(when prev
|
||||
(pushnew laitem (laitem-propagates prev))))))
|
||||
(laitem-add-propagation prev laitem passthroughs)))))
|
||||
|
||||
(dolist (acons kernel-item-alist)
|
||||
(close-item (car acons) initial-lookaheads (and update-propagates (cdr acons))))
|
||||
(close-item (car acons)
|
||||
*empty-terminalset*
|
||||
initial-lookaheads
|
||||
(and update-propagates (cdr acons))
|
||||
*full-terminalset*))
|
||||
(allocate-state number kernel (nreverse laitems)))))
|
||||
|
||||
|
||||
; f is a function that takes two arguments:
|
||||
; a grammar symbol, and
|
||||
; a list of kernel items in order of increasing item number.
|
||||
; a list of pairs (item . prev), where item is a kernel item and prev is a laitem;
|
||||
; f is a function that takes three arguments:
|
||||
; a grammar symbol;
|
||||
; a list of kernel items in order of increasing item number;
|
||||
; a list of pairs (item . prev), where item is a kernel item and prev is a laitem.
|
||||
; For each possible symbol X that can be shifted while in the given state S, call
|
||||
; f giving it S and the list of items that constitute the kernel of that shift's destination
|
||||
; state. The prev's are the sources of the corresponding shifted items.
|
||||
@ -130,9 +155,10 @@
|
||||
(defun propagate-internal-lookaheads (state)
|
||||
(dolist (src-laitem (state-laitems state))
|
||||
(let ((src-lookaheads (laitem-lookaheads src-laitem)))
|
||||
(dolist (dst-laitem (laitem-propagates src-laitem))
|
||||
(setf (laitem-lookaheads dst-laitem)
|
||||
(terminalset-union (laitem-lookaheads dst-laitem) src-lookaheads))))))
|
||||
(dolist (propagation (laitem-propagates src-laitem))
|
||||
(let ((dst-laitem (car propagation))
|
||||
(mask (cdr propagation)))
|
||||
(terminalset-union-f (laitem-lookaheads dst-laitem) (terminalset-intersection src-lookaheads mask)))))))
|
||||
|
||||
|
||||
; Propagate all lookaheads in kernel-item-alist, which must target destination-state.
|
||||
@ -141,8 +167,7 @@
|
||||
(dolist (acons kernel-item-alist)
|
||||
(let ((dest-laitem (state-laitem destination-state (car acons)))
|
||||
(src-laitem (cdr acons)))
|
||||
(setf (laitem-lookaheads dest-laitem)
|
||||
(terminalset-union (laitem-lookaheads dest-laitem) (laitem-lookaheads src-laitem)))))
|
||||
(terminalset-union-f (laitem-lookaheads dest-laitem) (laitem-lookaheads src-laitem))))
|
||||
(setf (gethash destination-state dirty-states) t))
|
||||
|
||||
|
||||
@ -249,7 +274,7 @@
|
||||
(let ((destination-state (gethash kernel lalr-states-hash)))
|
||||
(if destination-state
|
||||
(dolist (acons kernel-item-alist)
|
||||
(pushnew (state-laitem destination-state (car acons)) (laitem-propagates (cdr acons))))
|
||||
(laitem-add-propagation (cdr acons) (state-laitem destination-state (car acons)) *full-terminalset*))
|
||||
(progn
|
||||
(setq destination-state (make-state grammar kernel kernel-item-alist t next-state-number *empty-terminalset*))
|
||||
(setf (gethash kernel lalr-states-hash) destination-state)
|
||||
@ -278,12 +303,14 @@
|
||||
(dolist (dirty-laitem (hash-table-keys dirty-laitems))
|
||||
(remhash dirty-laitem dirty-laitems)
|
||||
(let ((src-lookaheads (laitem-lookaheads dirty-laitem)))
|
||||
(dolist (dst-laitem (laitem-propagates dirty-laitem))
|
||||
(let* ((old-dst-lookaheads (laitem-lookaheads dst-laitem))
|
||||
(new-dst-lookaheads (terminalset-union old-dst-lookaheads src-lookaheads)))
|
||||
(unless (terminalset-= old-dst-lookaheads new-dst-lookaheads)
|
||||
(setf (laitem-lookaheads dst-laitem) new-dst-lookaheads)
|
||||
(setf (gethash dst-laitem dirty-laitems) t)))))))
|
||||
(dolist (propagation (laitem-propagates dirty-laitem))
|
||||
(let ((dst-laitem (car propagation))
|
||||
(mask (cdr propagation)))
|
||||
(let* ((old-dst-lookaheads (laitem-lookaheads dst-laitem))
|
||||
(new-dst-lookaheads (terminalset-union old-dst-lookaheads (terminalset-intersection src-lookaheads mask))))
|
||||
(unless (terminalset-= old-dst-lookaheads new-dst-lookaheads)
|
||||
(setf (laitem-lookaheads dst-laitem) new-dst-lookaheads)
|
||||
(setf (gethash dst-laitem dirty-laitems) t))))))))
|
||||
|
||||
;Erase the propagates chains in all laitems.
|
||||
(dolist (state (grammar-states grammar))
|
||||
@ -306,16 +333,21 @@
|
||||
(dolist (laitem (state-laitems state))
|
||||
(let ((item (laitem-item laitem)))
|
||||
(unless (item-next-symbol item)
|
||||
(if (grammar-symbol-= (item-lhs item) *start-nonterminal*)
|
||||
(when (terminal-in-terminalset grammar *end-marker* (laitem-lookaheads laitem))
|
||||
(push (cons *end-marker* (make-accept-transition))
|
||||
(state-transitions state)))
|
||||
(map-terminalset-reverse
|
||||
#'(lambda (lookahead)
|
||||
(push (cons lookahead (make-reduce-transition (item-production item)))
|
||||
(state-transitions state)))
|
||||
grammar
|
||||
(laitem-lookaheads laitem))))))
|
||||
(let ((lookaheads (terminalset-difference
|
||||
(terminalset-intersection
|
||||
(laitem-lookaheads laitem)
|
||||
(general-production-lookahead-constraint (item-production item) (item-dot item)))
|
||||
(laitem-forbidden laitem))))
|
||||
(if (grammar-symbol-= (item-lhs item) *start-nonterminal*)
|
||||
(when (terminal-in-terminalset grammar *end-marker* lookaheads)
|
||||
(push (cons *end-marker* (make-accept-transition))
|
||||
(state-transitions state)))
|
||||
(map-terminalset-reverse
|
||||
#'(lambda (lookahead)
|
||||
(push (cons lookahead (make-reduce-transition (item-production item)))
|
||||
(state-transitions state)))
|
||||
grammar
|
||||
lookaheads))))))
|
||||
(setf (state-gotos state)
|
||||
(sort (state-gotos state) #'< :key #'(lambda (goto-cons) (state-number (cdr goto-cons)))))
|
||||
(setf (state-transitions state)
|
||||
|
@ -131,6 +131,7 @@
|
||||
((:intersection-10 1) (field (* fldinst "SYMBOL 199 \\f \"Symbol\" \\s 10") (fldrslt :symbol :10-pt)))
|
||||
((:union-10 1) (field (* fldinst "SYMBOL 200 \\f \"Symbol\" \\s 10") (fldrslt :symbol :10-pt)))
|
||||
((:member-10 2) (field (* fldinst "SYMBOL 206 \\f \"Symbol\" \\s 10") (fldrslt :symbol :10-pt)))
|
||||
((:not-member-10 2) (field (* fldinst "SYMBOL 207 \\f \"Symbol\" \\s 10") (fldrslt :symbol :10-pt)))
|
||||
((:derives-10 2) (field (* fldinst "SYMBOL 222 \\f \"Symbol\" \\s 10") (fldrslt :symbol :10-pt)))
|
||||
((:left-triangle-bracket-10 1) (field (* fldinst "SYMBOL 225 \\f \"Symbol\" \\s 10") (fldrslt :symbol :10-pt)))
|
||||
((:right-triangle-bracket-10 1) (field (* fldinst "SYMBOL 241 \\f \"Symbol\" \\s 10") (fldrslt :symbol :10-pt)))
|
||||
@ -315,6 +316,8 @@
|
||||
(:text :english)
|
||||
(:invisible v)
|
||||
((:but-not 6) (b "except"))
|
||||
((:begin-negative-lookahead 13) "[lookahead" :not-member-10 "{")
|
||||
((:end-negative-lookahead 2) "}]")
|
||||
(:subscript sub)
|
||||
(:superscript super)
|
||||
(:plain-subscript b 0 i 0 :subscript)
|
||||
|
Loading…
x
Reference in New Issue
Block a user