;;; The contents of this file are subject to the Netscape Public License
;;; Version 1.0 (the "NPL"); you may not use this file except in
;;; compliance with the NPL.  You may obtain a copy of the NPL at
;;; http://www.mozilla.org/NPL/
;;;
;;; Software distributed under the NPL is distributed on an "AS IS" basis,
;;; WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
;;; for the specific language governing rights and limitations under the
;;; NPL.
;;;
;;; The Initial Developer of this code under the NPL is Netscape
;;; Communications Corporation.  Portions created by Netscape are
;;; Copyright (C) 1998 Netscape Communications Corporation.  All Rights
;;; Reserved.

;;;
;;; Finite-state machine generator
;;;
;;; Waldemar Horwat (waldemar@netscape.com)
;;;


;;; ------------------------------------------------------------------------------------------------------
;;; METATRANSITION

(defstruct (metatransition (:constructor make-metatransition (next-metastate pre-productions post-productions)))
  (next-metastate nil :read-only t)     ;Next metastate to enter or nil if this is an accept transition
  (pre-productions nil :read-only t)    ;List of productions reduced by this transition (in order from first to last) before the shift
  (post-productions nil :read-only t))  ;List of productions reduced by this transition (in order from first to last) after the shift

  
;;; ------------------------------------------------------------------------------------------------------
;;; METASTATE

;;; A metastate is a list of states that represents a possible stack that the
;;; LALR(1) parser may encounter.
(defstruct (metastate (:constructor make-metastate (stack number transitions)))
  (stack nil :type list :read-only t)                 ;List of states that comprises a possible stack
  (number nil :type integer :read-only t)             ;Serial number of this metastate
  (transitions nil :type simple-vector :read-only t)) ;Array, indexed by terminal numbers, of either nil or metatransition structures

(declaim (inline metastate-transition))
(defun metastate-transition (metastate terminal-number)
  (svref (metastate-transitions metastate) terminal-number))


(defun print-metastate (metastate metagrammar &optional (stream t))
  (let ((grammar (metagrammar-grammar metagrammar)))
    (pprint-logical-block (stream nil)
      (format stream "M~D:~2I ~@_~<~@{S~D ~:_~}~:>~:@_"
              (metastate-number metastate)
              (nreverse (mapcar #'state-number (metastate-stack metastate))))
      (let ((transitions (metastate-transitions metastate)))
        (dotimes (terminal-number (length transitions))
          (let ((transition (svref transitions terminal-number))
                (terminal (svref (grammar-terminals grammar) terminal-number)))
            (when transition
              (let ((next-metastate (metatransition-next-metastate transition)))
                (pprint-logical-block (stream nil)
                  (format stream "~W ==> ~@_~:I~:[accept~;M~:*~D~] ~_"
                          terminal
                          (and next-metastate (metastate-number next-metastate)))
                  (pprint-fill stream (mapcar #'production-name (metatransition-pre-productions transition)))
                  (format stream " ~@_")
                  (pprint-fill stream (mapcar #'production-name (metatransition-post-productions transition))))
                (pprint-newline :mandatory stream)))))))))


(defmethod print-object ((metastate metastate) stream)
  (print-unreadable-object (metastate stream)
    (format stream "metastate S~D" (metastate-number metastate))))


;;; ------------------------------------------------------------------------------------------------------
;;; METAGRAMMAR

(defstruct (metagrammar (:constructor allocate-metagrammar))
  (grammar nil :type grammar :read-only t)          ;The grammar to which this metagrammar corresponds
  (metastates nil :type list :read-only t)          ;List of metastates ordered by metastate numbers
  (start nil :type metastate :read-only t))         ;The start metastate


(defun make-metagrammar (grammar)
  (let* ((terminals (grammar-terminals grammar))
         (n-terminals (length terminals))
         (metastates-hash (make-hash-table :test #'equal))    ;Hash table of (list of state) -> metastate
         (metastates nil)
         (metastate-number -1))
    (labels
      (;Return the stack after applying the given reduction production.
       (apply-reduction-production (stack production)
         (let* ((stack (nthcdr (production-rhs-length production) stack))
                (state (first stack))
                (dst-state (assert-non-null
                            (cdr (assoc (production-lhs production) (state-gotos state) :test *grammar-symbol-=*))))
                (dst-stack (cons dst-state stack)))
           (if (member dst-state stack :test #'eq)
             (error "This grammar cannot be represented by a FSM.  Stack: ~S" dst-stack)
             dst-stack)))
       
       (get-metatransition (stack terminal productions)
         (let* ((state (first stack))
                (transition (cdr (assoc terminal (state-transitions state) :test *grammar-symbol-=*))))
           (when transition
             (case (transition-kind transition)
               (:shift
                (multiple-value-bind (metastate forwarding-productions) (get-metastate (transition-state transition) stack t)
                  (make-metatransition metastate (nreverse productions) forwarding-productions)))
               (:reduce
                (let ((production (transition-production transition)))
                  (get-metatransition (apply-reduction-production stack production) terminal (cons production productions))))
               (:accept (make-metatransition nil (nreverse productions) nil))
               (t (error "Bad transition: ~S" transition))))))
       
       ;Return the metastate corresponding to the state stack (stack-top . stack-rest).  Construct a new
       ;metastate if necessary.
       ;If simplify is true and stack-top is a state for which every outgoing transition is the same
       ;reduction, return two values:
       ;  the metastate reached by following that reduction (doing it recursively if needed)
       ;  a list of reduction productions followed this way.
       (get-metastate (stack-top stack-rest simplify)
         (let* ((stack (cons stack-top stack-rest))
                (existing-metastate (gethash stack metastates-hash)))
           (cond
            (existing-metastate (values existing-metastate nil))
            ((member stack-top stack-rest :test #'eq)
             (error "This grammar cannot be represented by a FSM.  Stack: ~S" stack))
            (t (let ((forwarding-production (and simplify (forwarding-state-production stack-top))))
                 (if forwarding-production
                   (let ((stack (apply-reduction-production stack forwarding-production)))
                     (multiple-value-bind (metastate forwarding-productions) (get-metastate (car stack) (cdr stack) simplify)
                       (values metastate (cons forwarding-production forwarding-productions))))
                   (let* ((transitions (make-array n-terminals :initial-element nil))
                          (metastate (make-metastate stack (incf metastate-number) transitions)))
                     (setf (gethash stack metastates-hash) metastate)
                     (push metastate metastates)
                     (dotimes (n n-terminals)
                       (setf (svref transitions n)
                             (get-metatransition stack (svref terminals n) nil)))
                     (values metastate nil)))))))))
      
      (let ((start-metastate (get-metastate (grammar-start-state grammar) nil nil)))
        (allocate-metagrammar :grammar grammar
                              :metastates (nreverse metastates)
                              :start start-metastate)))))


; Print the metagrammar nicely.
(defun print-metagrammar (metagrammar &optional (stream t) &key (grammar t) (details t))
  (pprint-logical-block (stream nil)
    (when grammar
      (print-grammar (metagrammar-grammar metagrammar) stream :details details))
    
    ;Print the metastates.
    (format stream "Start metastate: ~@_M~D~:@_~:@_" (metastate-number (metagrammar-start metagrammar)))
    (pprint-logical-block (stream (metagrammar-metastates metagrammar))
      (pprint-exit-if-list-exhausted)
      (format stream "Metastates:~2I~:@_")
      (loop
        (print-metastate (pprint-pop) metagrammar stream)
        (pprint-exit-if-list-exhausted)
        (pprint-newline :mandatory stream))))
  (pprint-newline :mandatory stream))


(defmethod print-object ((metagrammar metagrammar) stream)
  (print-unreadable-object (metagrammar stream :identity t)
    (write-string "metagrammar" stream)))


; Find the longest possible prefix of the input list of tokens that is accepted by the
; grammar.  Parse that prefix and return two values:
;   the list of action results;
;   the tail of the input list of tokens remaining to be parsed.
; Signal an error if no prefix of the input list is accepted by the grammar.
;
; token-terminal is a function that returns a terminal symbol when given an input token.
; If trace is:
;   nil,     don't print trace information
;   :code,   print trace information, including action code
;   other    print trace information
(defun action-metaparse (metagrammar token-terminal input &key trace)
  (if trace
    (trace-action-metaparse metagrammar token-terminal input trace)
    (let ((grammar (metagrammar-grammar metagrammar)))
      (labels
        ((transition-value-stack (value-stack productions)
           (dolist (production productions)
             (setq value-stack (funcall (production-evaluator production) value-stack)))
           value-stack)
         
         (cut (input good-metastate good-input good-value-stack)
           (unless good-metastate
             (error "Parse error on ~S ..." (ldiff input (nthcdr 10 input))))
           (let ((last-metatransition (metastate-transition good-metastate *end-marker-terminal-number*)))
             (assert-true (null (metatransition-next-metastate last-metatransition)))
             (assert-true (null (metatransition-post-productions last-metatransition)))
             (values
              (reverse (transition-value-stack good-value-stack (metatransition-pre-productions last-metatransition)))
              good-input))))
        
        (do ((metastate (metagrammar-start metagrammar))
             (input input (cdr input))
             (value-stack nil)
             (last-good-metastate nil)
             last-good-input
             last-good-value-stack)
            (nil)
          (when (metastate-transition metastate *end-marker-terminal-number*)
            (setq last-good-metastate metastate)
            (setq last-good-input input)
            (setq last-good-value-stack value-stack))
          (when (endp input)
            (return (cut input last-good-metastate last-good-input last-good-value-stack)))
          (let* ((token (first input))
                 (terminal (funcall token-terminal token))
                 (terminal-number (terminal-number grammar terminal))
                 (metatransition (metastate-transition metastate terminal-number)))
            (unless metatransition
              (return (cut input last-good-metastate last-good-input last-good-value-stack)))
            (setq value-stack (transition-value-stack value-stack (metatransition-pre-productions metatransition)))
            (dolist (action-function-binding (gethash terminal (grammar-terminal-actions grammar)))
              (push (funcall (cdr action-function-binding) token) value-stack))
            (setq value-stack (transition-value-stack value-stack (metatransition-post-productions metatransition)))
            (setq metastate (metatransition-next-metastate metatransition))))))))


; Same as action-parse, but with tracing information
; If trace is:
;   :code,   print trace information, including action code
;   other    print trace information
(defun trace-action-metaparse (metagrammar token-terminal input trace)
  (let
    ((grammar (metagrammar-grammar metagrammar)))
    (labels
      ((print-stacks (value-stack type-stack)
         (write-string "    " *trace-output*)
         (if value-stack
           (print-values (reverse value-stack) (reverse type-stack) *trace-output*)
           (write-string "empty" *trace-output*))
         (pprint-newline :mandatory *trace-output*))
       
       (transition-value-stack (value-stack type-stack productions)
         (dolist (production productions)
           (write-string "  reduce " *trace-output*)
           (if (eq trace :code)
             (write production :stream *trace-output* :pretty t)
             (print-production production *trace-output*))
           (pprint-newline :mandatory *trace-output*)
           (setq value-stack (funcall (production-evaluator production) value-stack))
           (setq type-stack (nthcdr (production-n-action-args production) type-stack))
           (dolist (action-signature (grammar-symbol-signature grammar (production-lhs production)))
             (push (cdr action-signature) type-stack))
           (print-stacks value-stack type-stack))
         (values value-stack type-stack))
       
       (cut (input good-metastate good-input good-value-stack good-type-stack)
         (unless good-metastate
           (error "Parse error on ~S ..." (ldiff input (nthcdr 10 input))))
         (let ((last-metatransition (metastate-transition good-metastate *end-marker-terminal-number*)))
           (assert-true (null (metatransition-next-metastate last-metatransition)))
           (assert-true (null (metatransition-post-productions last-metatransition)))
           (format *trace-output* "cut to M~D~:@_" (metastate-number good-metastate))
           (print-stacks good-value-stack good-type-stack)
           (pprint-newline :mandatory *trace-output*)
           (values
            (reverse (transition-value-stack good-value-stack good-type-stack (metatransition-pre-productions last-metatransition)))
            good-input))))
      
      (do ((metastate (metagrammar-start metagrammar))
           (input input (cdr input))
           (value-stack nil)
           (type-stack nil)
           (last-good-metastate nil)
           last-good-input
           last-good-value-stack
           last-good-type-stack)
          (nil)
        (format *trace-output* "M~D" (metastate-number metastate))
        (when (metastate-transition metastate *end-marker-terminal-number*)
          (write-string " (good)" *trace-output*)
          (setq last-good-metastate metastate)
          (setq last-good-input input)
          (setq last-good-value-stack value-stack)
          (setq last-good-type-stack type-stack))
        (write-string ": " *trace-output*)
        (when (endp input)
          (return (cut input last-good-metastate last-good-input last-good-value-stack last-good-type-stack)))
        (let* ((token (first input))
               (terminal (funcall token-terminal token))
               (terminal-number (terminal-number grammar terminal))
               (metatransition (metastate-transition metastate terminal-number)))
          (unless metatransition
            (format *trace-output* "shift ~W: " terminal)
            (return (cut input last-good-metastate last-good-input last-good-value-stack last-good-type-stack)))
          (format *trace-output* "transition to M~D~:@_" (metastate-number (metatransition-next-metastate metatransition)))
          (multiple-value-setq (value-stack type-stack)
            (transition-value-stack value-stack type-stack (metatransition-pre-productions metatransition)))
          (dolist (action-function-binding (gethash terminal (grammar-terminal-actions grammar)))
            (push (funcall (cdr action-function-binding) token) value-stack))
          (dolist (action-signature (grammar-symbol-signature grammar terminal))
            (push (cdr action-signature) type-stack))
          (format *trace-output* "shift ~W~:@_" terminal)
          (print-stacks value-stack type-stack)
          (multiple-value-setq (value-stack type-stack)
            (transition-value-stack value-stack type-stack (metatransition-post-productions metatransition)))
          (setq metastate (metatransition-next-metastate metatransition)))))))


; Compute all representative strings of terminals such that, for each such string S:
;   S is rejected by the grammar's language;
;   all prefixes of S are also rejected by the grammar's language;
;   for any S and all strings of terminals T, the concatenated string ST is also
;     rejected by the grammar's language;
;   no string S1 is a prefix of (or equal to) another string S2.
; Often there are infinitely many such strings S, so only output one for each illegal
; metaparser transition.
; Return a list of S's, where each S is itself a list of terminals.
(defun compute-illegal-strings (metagrammar)
  (let* ((grammar (metagrammar-grammar metagrammar))
         (terminals (grammar-terminals grammar))
         (n-terminals (length terminals))
         (metastates (metagrammar-metastates metagrammar))
         (n-metastates (length metastates))
         (visited-metastates (make-array n-metastates :element-type 'bit :initial-element 0))
         (illegal-strings nil))
    (labels
      ((visit (metastate reversed-string)
         (let ((metastate-number (metastate-number metastate)))
           (when (= (sbit visited-metastates metastate-number) 0)
             (setf (sbit visited-metastates metastate-number) 1)
             (let ((metatransitions (metastate-transitions metastate)))
               ;If there is a transition for the end marker from this state, then string
               ;is accepted by the language, so cut off the search.
               (unless (svref metatransitions *end-marker-terminal-number*)
                 (dotimes (terminal-number n-terminals)
                   (unless (= terminal-number *end-marker-terminal-number*)
                     (let ((metatransition (svref metatransitions terminal-number))
                           (reversed-string (cons (svref terminals terminal-number) reversed-string)))
                       (if metatransition
                         (visit (metatransition-next-metastate metatransition) reversed-string)
                         (push (reverse reversed-string) illegal-strings)))))))))))
      
      (visit (metagrammar-start metagrammar) nil)
      (nreverse illegal-strings))))


; Compute and print illegal strings of terminals.  See compute-illegal-strings.
(defun print-illegal-strings (metagrammar &optional (stream t))
  (pprint-logical-block (stream (compute-illegal-strings metagrammar))
    (format stream "Illegal strings:~2I")
    (loop
      (pprint-exit-if-list-exhausted)
      (pprint-newline :mandatory stream)
      (pprint-fill stream (pprint-pop))))
  (pprint-newline :mandatory stream))