Bug 579230 - Rewrite narcissus expr parser, refactor into builder pattern. r=gal

2024-10-13 21:35:39 +00:00 · 2010-08-01 12:20:18 -07:00 · 2010-08-01 12:20:18 -07:00 · 58c2ef3eb9
commit 58c2ef3eb9
parent 1c4522feb1
4 changed files with 1910 additions and 667 deletions
--- a/js/narcissus/jsdefs.js
+++ b/js/narcissus/jsdefs.js
@ -72,7 +72,7 @@ var tokens = [
    // Nonterminal tree node type codes.
    "SCRIPT", "BLOCK", "LABEL", "FOR_IN", "CALL", "NEW_WITH_ARGS", "INDEX",
    "ARRAY_INIT", "OBJECT_INIT", "PROPERTY_INIT", "GETTER", "SETTER",
-    "GROUP", "LIST", "LET_STM", "LET_EXP", "LET_DEF",
+    "GROUP", "LIST", "LET_BLOCK", "ARRAY_COMP", "GENERATOR", "COMP_TAIL",

    // Terminals.
    "IDENTIFIER", "NUMBER", "STRING", "REGEXP",
@ -81,7 +81,7 @@ var tokens = [
    "break",
    "case", "catch", "const", "continue",
    "debugger", "default", "delete", "do",
-    "else", "enum",
+    "else",
    "false", "finally", "for", "function",
    "if", "in", "instanceof",
    "let",
--- a/js/narcissus/jsexec.js
+++ b/js/narcissus/jsexec.js
@ -71,10 +71,27 @@ var global = {
        x2.scope = x.scope;
        ExecutionContext.current = x2;
        try {
-            execute(parse(s), x2);
+            execute(parse(new VanillaBuilder, s), x2);
        } catch (e if e == THROW) {
            x.result = x2.result;
            throw e;
+        } catch (e if e instanceof SyntaxError) {
+            x.result = e;
+            throw THROW;
+        } catch (e if e instanceof InternalError) {
+            /*
+             * If we get too much recursion during parsing we need to re-throw
+             * it as a narcissus THROW.
+             *
+             * See bug 152646.
+             */
+            var re = /InternalError: (script stack space quota is exhausted|too much recursion)/;
+            if (re.test(e.toString())) {
+                x.result = e;
+                throw THROW;
+            } else {
+                throw e;
+            }
        } finally {
            ExecutionContext.current = x;
        }
@ -106,8 +123,9 @@ var global = {
        var t = new Tokenizer("anonymous(" + p + ") {" + b + "}");

        // NB: Use the STATEMENT_FORM constant since we don't want to push this
-        // function onto the null compilation context.
-        var f = FunctionDefinition(t, null, false, STATEMENT_FORM);
+        // function onto the fake compilation context.
+        var x = { builder: new VanillaBuilder };
+        var f = FunctionDefinition(t, x, false, STATEMENT_FORM);
        var s = {object: global, parent: null};
        return newFunction(f,{scope:s});
    },
@ -482,7 +500,7 @@ function execute(n, x) {

      case ASSIGN:
        r = execute(n[0], x);
-        t = n[0].assignOp;
+        t = n.assignOp;
        if (t)
            u = getValue(r);
        v = getValue(execute(n[1], x));
@ -1007,7 +1025,7 @@ function evaluate(s, f, l) {
    var x2 = new ExecutionContext(GLOBAL_CODE);
    ExecutionContext.current = x2;
    try {
-        execute(parse(s, f, l), x2);
+        execute(parse(new VanillaBuilder, s, f, l), x2);
    } catch (e if e == THROW) {
        if (x) {
            x.result = x2.result;
--- a/js/narcissus/jslex.js
+++ b/js/narcissus/jslex.js
@ -1,4 +1,4 @@
-/* vim: set sw=4 ts=8 et tw=78: */
+/* vim: set sw=4 ts=4 et tw=78: */
 /* ***** BEGIN LICENSE BLOCK *****
 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
 *
@ -57,7 +57,9 @@ for (var op in opTypeNames) {
    }
 }

-// file ptr, path to file, line number -> Tokenizer
+/*
+ * Tokenizer :: (file ptr, path, line number) -> Tokenizer
+ */
 function Tokenizer(s, f, l) {
    this.cursor = 0;
    this.source = String(s);
@ -65,22 +67,23 @@ function Tokenizer(s, f, l) {
    this.tokenIndex = 0;
    this.lookahead = 0;
    this.scanNewlines = false;
-    this.scanOperand = true;
    this.filename = f || "";
    this.lineno = l || 1;
 }

 Tokenizer.prototype = {
    get done() {
-        return this.peek() == END;
+        // We need to set scanOperand to true here because the first thing
+        // might be a regexp.
+        return this.peek(true) == END;
    },

    get token() {
        return this.tokens[this.tokenIndex];
    },

-    match: function (tt) {
-        return this.get() == tt || this.unget();
+    match: function (tt, scanOperand) {
+        return this.get(scanOperand) == tt || this.unget();
    },

    mustMatch: function (tt) {
@ -89,7 +92,7 @@ Tokenizer.prototype = {
        return this.token;
    },

-    peek: function () {
+    peek: function (scanOperand) {
        var tt, next;
        if (this.lookahead) {
            next = this.tokens[(this.tokenIndex + this.lookahead) & 3];
@ -97,15 +100,15 @@ Tokenizer.prototype = {
                 ? NEWLINE
                 : next.type;
        } else {
-            tt = this.get();
+            tt = this.get(scanOperand);
            this.unget();
        }
        return tt;
    },

-    peekOnSameLine: function () {
+    peekOnSameLine: function (scanOperand) {
        this.scanNewlines = true;
-        var tt = this.peek();
+        var tt = this.peek(scanOperand);
        this.scanNewlines = false;
        return tt;
    },
@ -334,13 +337,6 @@ Tokenizer.prototype = {
            op += '=';
        } else {
            token.type = tokenIds[opTypeNames[op]];
-            if (this.scanOperand) {
-                switch (token.type) {
-                  case PLUS:    token.type = UNARY_PLUS;    break;
-                  case MINUS:   token.type = UNARY_MINUS;   break;
-                }
-            }
-
            token.assignOp = null;
        }

@ -364,10 +360,13 @@ Tokenizer.prototype = {
        token.value = id;
    },

-    // void -> token type
-    // It consumes input *only* if there is no lookahead.
-    // Dispatch to the appropriate lexing function depending on the input.
-    get: function () {
+    /*
+     * Tokenizer.get :: void -> token type
+     *
+     * Consumes input *only* if there is no lookahead.
+     * Dispatch to the appropriate lexing function depending on the input.
+     */
+    get: function (scanOperand) {
        var token;
        while (this.lookahead) {
            --this.lookahead;
@ -395,7 +394,7 @@ Tokenizer.prototype = {
        if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') ||
                ch === '$' || ch === '_') {
            this.lexIdent(ch);
-        } else if (this.scanOperand && ch === '/') {
+        } else if (scanOperand && ch === '/') {
            this.lexRegExp(ch);
        } else if (ch in opTokens) {
            this.lexOp(ch);
@ -419,8 +418,11 @@ Tokenizer.prototype = {
        return token.type;
    },

-    // void -> undefined
-    // match depends on unget returning undefined.
+    /*
+     * Tokenizer.unget :: void -> undefined
+     *
+     * Match depends on unget returning undefined.
+     */
    unget: function () {
        if (++this.lookahead == 4) throw "PANIC: too much lookahead!";
        this.tokenIndex = (this.tokenIndex - 1) & 3;
@ -431,6 +433,25 @@ Tokenizer.prototype = {
        e.source = this.source;
        e.cursor = this.cursor;
        return e;
+    },
+
+    save: function () {
+        return {
+            cursor: this.cursor,
+            tokenIndex: this.tokenIndex,
+            tokens: this.tokens.slice(),
+            lookahead: this.lookahead,
+            scanNewlines: this.scanNewlines,
+            lineno: this.lineno
+        };
+    },
+
+    rewind: function(point) {
+        this.cursor = point.cursor;
+        this.tokenIndex = point.tokenIndex;
+        this.tokens = point.tokens.slice();
+        this.lookahead = point.lookahead;
+        this.scanNewline = point.scanNewline;
+        this.lineno = point.lineno;
    }
 };
-
--- a/js/narcissus/jsparse.js
+++ b/js/narcissus/jsparse.js