Wrapped number and string tokens in wrapper types to simplify documentation. Improved comments of Unicode character categories.

This commit is contained in:
waldemar%netscape.com 2002-10-29 01:12:27 +00:00
parent 16ae98e269
commit 006c546e93
2 changed files with 65 additions and 54 deletions

View File

@ -23,10 +23,6 @@
(cond (cond
((eq token-value :end-of-input) ((eq token-value :end-of-input)
(values *end-marker* nil)) (values *end-marker* nil))
((stringp token-value)
(values (if line-break (terminal-lf-terminal '$string) '$string) token-value))
((or (float64? token-value) (float32? token-value))
(values (if line-break (terminal-lf-terminal '$number) '$number) token-value))
((eq token-value :negated-min-long) ((eq token-value :negated-min-long)
(values (if line-break (terminal-lf-terminal '$negated-min-long) '$negated-min-long) nil)) (values (if line-break (terminal-lf-terminal '$negated-min-long) '$negated-min-long) nil))
(t (t
@ -35,7 +31,8 @@
(ecase (first token-value) (ecase (first token-value)
(l:identifier (values '$identifier data)) (l:identifier (values '$identifier data))
((l:keyword l:punctuator) (values (intern (string-upcase data)) nil)) ((l:keyword l:punctuator) (values (intern (string-upcase data)) nil))
((l:long l:u-long) (values '$number (translate-number token-value))) (l:number-token (values '$number (translate-number data)))
(l:string-token (values '$string data))
(l:regular-expression (values '$regular-expression data))) (l:regular-expression (values '$regular-expression data)))
(when line-break (when line-break
(setq token (terminal-lf-terminal token))) (setq token (terminal-lf-terminal token)))
@ -96,10 +93,8 @@
(setq lexer-state '$div)) (setq lexer-state '$div))
(setq token-value (get-next-token-value lexer-state)) (setq token-value (get-next-token-value lexer-state))
(setq line-break t)) (setq line-break t))
(setq prev-number-token (or (float64? token-value) (setq prev-number-token (or (eq token-value :negated-min-long)
(float32? token-value) (and (consp token-value) (eq (car token-value) 'l:number-token))))
(eq token-value :negated-min-long)
(and (consp token-value) (member (car token-value) '(l:long l:u-long)))))
(multiple-value-setq (token token-arg) (js-lexer-results-to-token token-value line-break))))) (multiple-value-setq (token token-arg) (js-lexer-results-to-token token-value line-break)))))
(setq transition (state-transition state token)) (setq transition (state-transition state token))
(unless transition (unless transition

View File

@ -14,15 +14,26 @@
:$next-input-element :$next-input-element
((:unicode-character (% every (:text "Any Unicode character")) () t) ((:unicode-character (% every (:text "Any Unicode character")) () t)
(:unicode-initial-alphabetic (:unicode-initial-alphabetic
(% initial-alpha (:text "Any Unicode initial alphabetic character (includes ASCII " (% initial-alpha (:text "Any character in category" :space
(:character-literal #\A) :nbhy (:character-literal #\Z) " and " (:external-name "Lu") " (uppercase letter)," :space
(:character-literal #\a) :nbhy (:character-literal #\z) ")")) (:external-name "Ll") " (lowercase letter)," :space
(:external-name "Lt") " (titlecase letter)," :space
(:external-name "Lm") " (modifier letter)," :space
(:external-name "Lo") " (other letter), or" :space
(:external-name "Nl") " (letter number)" :space "in the Unicode Character Database"))
() t) () t)
(:unicode-alphanumeric (:unicode-alphanumeric
(% alphanumeric (:text "Any Unicode alphabetic or decimal digit character (includes ASCII " (% alphanumeric (:text "Any character in category" :space
(:character-literal #\0) :nbhy (:character-literal #\9) ", " (:external-name "Lu") " (uppercase letter)," :space
(:character-literal #\A) :nbhy (:character-literal #\Z) ", and " (:external-name "Ll") " (lowercase letter)," :space
(:character-literal #\a) :nbhy (:character-literal #\z) ")")) (:external-name "Lt") " (titlecase letter)," :space
(:external-name "Lm") " (modifier letter)," :space
(:external-name "Lo") " (other letter)," :space
(:external-name "Nd") " (decimal number)," :space
(:external-name "Nl") " (letter number)," :space
(:external-name "Mn") " (non-spacing mark)," :space
(:external-name "Mc") " (combining spacing mark), or" :space
(:external-name "Pc") " (connector punctuation)" :space "in the Unicode Character Database"))
() t) () t)
(:white-space-character (++ (#?0009 #?000B #?000C #\space #?00A0) (:white-space-character (++ (#?0009 #?000B #?000C #\space #?00A0)
(#?2000 #?2001 #?2002 #?2003 #?2004 #?2005 #?2006 #?2007) (#?2000 #?2001 #?2002 #?2003 #?2004 #?2005 #?2006 #?2007)
@ -82,10 +93,12 @@
(deftuple keyword (name string)) (deftuple keyword (name string))
(deftuple punctuator (name string)) (deftuple punctuator (name string))
(deftuple identifier (name string)) (deftuple identifier (name string))
(deftuple number-token (value general-number))
(deftag negated-min-long) (deftag negated-min-long)
(deftuple string-token (value string))
(deftuple regular-expression (body string) (flags string)) (deftuple regular-expression (body string) (flags string))
(deftype token (union keyword punctuator identifier general-number (tag negated-min-long) string regular-expression)) (deftype token (union keyword punctuator identifier number-token (tag negated-min-long) string-token regular-expression))
(deftype input-element (union (tag line-break end-of-input) token)) (deftype input-element (union (tag line-break end-of-input) token))
@ -186,6 +199,21 @@
(%heading 1 "Keywords and Identifiers") (%heading 1 "Keywords and Identifiers")
(rule :identifier-or-keyword
((lex input-element))
(production :identifier-or-keyword (:identifier-name) identifier-or-keyword-identifier-name
(lex (begin
(const id string (lex-name :identifier-name))
(if (and (set-in id (list-set "abstract" "as" "break" "case" "catch" "class" "const" "continue" "debugger" "default" "delete" "do" "else" "enum"
"exclude" "export" "extends" "false" "final" "finally" "for" "function" "get" "goto" "if" "implements" "import" "in"
"include" "instanceof" "interface" "is" "named" "namespace" "native" "new" "null" "package" "private" "protected" "public" "return"
"set" "static" "super" "switch" "synchronized" "this" "throw" "throws" "transient" "true" "try" "typeof" "use"
"var" "volatile" "while" "with"))
(not (contains-escapes :identifier-name)))
(return (new keyword id))
(return (new identifier id)))))))
(%print-actions)
(rule :identifier-name (rule :identifier-name
((lex-name string) (contains-escapes boolean)) ((lex-name string) (contains-escapes boolean))
(production :identifier-name (:initial-identifier-character-or-escape) identifier-name-initial (production :identifier-name (:initial-identifier-character-or-escape) identifier-name-initial
@ -213,9 +241,11 @@
(lex-char ($default-action :initial-identifier-character)) (lex-char ($default-action :initial-identifier-character))
(contains-escapes false)) (contains-escapes false))
(production :initial-identifier-character-or-escape (#\\ :hex-escape) initial-identifier-character-or-escape-escape (production :initial-identifier-character-or-escape (#\\ :hex-escape) initial-identifier-character-or-escape-escape
(lex-char (begin (if (is-initial-identifier-character (lex-char :hex-escape)) (lex-char (begin
(return (lex-char :hex-escape)) (const ch character (lex-char :hex-escape))
(throw syntax-error)))) (if (is-initial-identifier-character ch)
(return ch)
(throw syntax-error))))
(contains-escapes true))) (contains-escapes true)))
(%charclass :initial-identifier-character) (%charclass :initial-identifier-character)
@ -226,40 +256,26 @@
(lex-char ($default-action :continuing-identifier-character)) (lex-char ($default-action :continuing-identifier-character))
(contains-escapes false)) (contains-escapes false))
(production :continuing-identifier-character-or-escape (#\\ :hex-escape) continuing-identifier-character-or-escape-escape (production :continuing-identifier-character-or-escape (#\\ :hex-escape) continuing-identifier-character-or-escape-escape
(lex-char (begin (if (is-continuing-identifier-character (lex-char :hex-escape)) (lex-char (begin
(return (lex-char :hex-escape)) (const ch character (lex-char :hex-escape))
(throw syntax-error)))) (if (is-continuing-identifier-character ch)
(return ch)
(throw syntax-error))))
(contains-escapes true))) (contains-escapes true)))
(%charclass :continuing-identifier-character) (%charclass :continuing-identifier-character)
(%print-actions) (%print-actions)
(define reserved-words (vector string) (define (is-initial-identifier-character (ch character :unused)) boolean
(vector "abstract" "as" "break" "case" "catch" "class" "const" "continue" "debugger" "default" "delete" "do" "else" "enum" (bottom (:keyword return) " " (:tag true) " if the nonterminal " (:grammar-symbol :initial-identifier-character) " can expand into " (:local ch)
"export" "extends" "false" "final" "finally" "for" "function" "goto" "if" "implements" "import" "in" " and " (:tag false) " otherwise."))
"instanceof" "interface" "is" "namespace" "native" "new" "null" "package" "private" "protected" "public" "return" "static" "super" (defprimitive is-initial-identifier-character (lambda (ch) (initial-identifier-character? ch)))
"switch" "synchronized" "this" "throw" "throws" "transient" "true" "try" "typeof" "use" "var" "volatile" "while" "with"))
(define non-reserved-words (vector string)
(vector "exclude" "get" "include" "named" "set"))
(define keywords (vector string)
(append reserved-words non-reserved-words))
(define (member (id string) (list (vector string))) boolean (define (is-continuing-identifier-character (ch character :unused)) boolean
(rwhen (empty list) (bottom (:keyword return) " " (:tag true) " if the nonterminal " (:grammar-symbol :continuing-identifier-character) " can expand into " (:local ch)
(return false)) " and " (:tag false) " otherwise."))
(rwhen (= id (nth list 0) string) (defprimitive is-continuing-identifier-character (lambda (ch) (continuing-identifier-character? ch)))
(return true))
(return (member id (subseq list 1))))
(rule :identifier-or-keyword
((lex input-element))
(production :identifier-or-keyword (:identifier-name) identifier-or-keyword-identifier-name
(lex (begin
(const id string (lex-name :identifier-name))
(if (and (member id keywords) (not (contains-escapes :identifier-name)))
(return (new keyword id))
(return (new identifier id)))))))
(%print-actions)
(%heading 1 "Punctuators") (%heading 1 "Punctuators")
@ -326,23 +342,23 @@
(rule :numeric-literal ((lex token)) (rule :numeric-literal ((lex token))
(production :numeric-literal (:decimal-literal) numeric-literal-decimal (production :numeric-literal (:decimal-literal) numeric-literal-decimal
(lex (real-to-float64 (lex-number :decimal-literal)))) (lex (new number-token (real-to-float64 (lex-number :decimal-literal)))))
(production :numeric-literal (:hex-integer-literal) numeric-literal-hex (production :numeric-literal (:hex-integer-literal) numeric-literal-hex
(lex (real-to-float64 (lex-number :hex-integer-literal)))) (lex (new number-token (real-to-float64 (lex-number :hex-integer-literal)))))
(production :numeric-literal (:decimal-literal :letter-f) numeric-literal-single (production :numeric-literal (:decimal-literal :letter-f) numeric-literal-single
(lex (real-to-float32 (lex-number :decimal-literal)))) (lex (new number-token (real-to-float32 (lex-number :decimal-literal)))))
(production :numeric-literal (:integer-literal :letter-l) numeric-literal-long (production :numeric-literal (:integer-literal :letter-l) numeric-literal-long
(lex (begin (lex (begin
(const i integer (lex-number :integer-literal)) (const i integer (lex-number :integer-literal))
(cond (cond
((<= i (- (expt 2 63) 1)) (return (new long i))) ((<= i (- (expt 2 63) 1)) (return (new number-token (new long i))))
((= i (expt 2 63)) (return negated-min-long)) ((= i (expt 2 63)) (return negated-min-long))
(nil (throw range-error)))))) (nil (throw range-error))))))
(production :numeric-literal (:integer-literal :letter-u :letter-l) numeric-literal-unsigned-long (production :numeric-literal (:integer-literal :letter-u :letter-l) numeric-literal-unsigned-long
(lex (begin (lex (begin
(const i integer (lex-number :integer-literal)) (const i integer (lex-number :integer-literal))
(if (<= i (- (expt 2 64) 1)) (if (<= i (- (expt 2 64) 1))
(return (new u-long i)) (return (new number-token (new u-long i)))
(throw range-error)))))) (throw range-error))))))
(rule :integer-literal ((lex-number integer)) (rule :integer-literal ((lex-number integer))
@ -427,9 +443,9 @@
(grammar-argument :theta single double) (grammar-argument :theta single double)
(rule :string-literal ((lex token)) (rule :string-literal ((lex token))
(production :string-literal (#\' (:string-chars single) #\') string-literal-single (production :string-literal (#\' (:string-chars single) #\') string-literal-single
(lex (lex-string :string-chars))) (lex (new string-token (lex-string :string-chars))))
(production :string-literal (#\" (:string-chars double) #\") string-literal-double (production :string-literal (#\" (:string-chars double) #\") string-literal-double
(lex (lex-string :string-chars)))) (lex (new string-token (lex-string :string-chars)))))
(%print-actions) (%print-actions)
(rule (:string-chars :theta) ((lex-string string)) (rule (:string-chars :theta) ((lex-string string))