Change TokenStream to return operational tokens as is without grouping them through Token.UNARY, Token.PRIMARY etc. Instead do grouping only in parser. In this way exceptional cases when grouping of tokens has to be changed by parser (like reinterpretation of Token.IN) it is easy to deal with. Another advantage is decompiler simplification since it does not need to perform nested switches to unwrap tokens.

This commit is contained in:
igor%mir2.org 2003-08-13 14:21:39 +00:00
parent 246f85ca9c
commit a5f9d59614
4 changed files with 246 additions and 273 deletions

View File

@ -54,14 +54,13 @@ package org.mozilla.javascript;
* this wouldn't have saved any space in the resulting source
* representation, and would have meant that I'd have to duplicate
* parser logic in the decompiler to disambiguate situations where
* newlines are important.) NativeFunction.decompile expands the
* newlines are important.) The function decompile expands the
* tokens back into their string representations, using simple
* lookahead to correct spacing and indentation.
* Token types with associated ops (ASSIGN, SHOP, PRIMARY, etc.) are
* saved as two-token pairs. Number tokens are stored inline, as a
* NUMBER token, a character representing the type, and either 1 or 4
* characters representing the bit-encoding of the number. String
*
* Assignments are saved as two-token pairs (Token.ASSIGN, op). Number tokens
* are stored inline, as a NUMBER token, a character representing the type, and
* either 1 or 4 characters representing the bit-encoding of the number. String
* types NAME, STRING and OBJECT are currently stored as a token type,
* followed by a character giving the length of the string (assumed to
* be less than 2^16), followed by the characters of the string
@ -70,13 +69,6 @@ package org.mozilla.javascript;
* save a lot of space... but would require some method of deriving
* the final constant pool entry from information available at parse
* time.
* Nested functions need a similar mechanism... fortunately the nested
* functions for a given function are generated in source order.
* Nested functions are encoded as FUNCTION followed by a function
* number (encoded as a character), which is enough information to
* find the proper generated NativeFunction instance.
*/
public class Decompiler
{
@ -127,14 +119,12 @@ public class Decompiler
append((char)Token.EOL);
}
void addOp(int token, int op)
void addAssign(int op)
{
if (!(0 <= token && token <= Token.LAST_TOKEN))
throw new IllegalArgumentException();
if (!(0 <= op && op <= Token.LAST_TOKEN))
throw new IllegalArgumentException();
append((char)token);
append((char)Token.ASSIGN);
append((char)op);
}
@ -190,7 +180,7 @@ public class Decompiler
}
else {
// we can ignore negative values, bc they're already prefixed
// by UNARYOP SUB
// by NEG
if (lbits < 0) Context.codeBug();
// will it fit in a char?
@ -292,7 +282,7 @@ public class Decompiler
int indent, int indentGap, int caseGap)
{
String source = (String)sourceObj;
int length = source.length();
if (length == 0) { return ""; }
@ -349,37 +339,20 @@ public class Decompiler
i = printSourceNumber(source, i + 1, result);
continue;
case Token.PRIMARY:
++i;
switch(source.charAt(i)) {
case Token.TRUE:
result.append("true");
break;
case Token.TRUE:
result.append("true");
break;
case Token.FALSE:
result.append("false");
break;
case Token.FALSE:
result.append("false");
break;
case Token.NULL:
result.append("null");
break;
case Token.NULL:
result.append("null");
break;
case Token.THIS:
result.append("this");
break;
case Token.TYPEOF:
result.append("typeof");
break;
case Token.VOID:
result.append("void");
break;
case Token.UNDEFINED:
result.append("undefined");
break;
}
case Token.THIS:
result.append("this");
break;
case Token.FUNCTION:
@ -588,7 +561,7 @@ public class Decompiler
case Token.ASSIGN:
++i;
switch(source.charAt(i)) {
switch (source.charAt(i)) {
case Token.NOP:
result.append(" = ");
break;
@ -680,96 +653,76 @@ public class Decompiler
result.append(" & ");
break;
case Token.EQOP:
++i;
switch(source.charAt(i)) {
case Token.SHEQ:
result.append(" === ");
break;
case Token.SHNE:
result.append(" !== ");
break;
case Token.EQ:
result.append(" == ");
break;
case Token.NE:
result.append(" != ");
break;
}
case Token.SHEQ:
result.append(" === ");
break;
case Token.RELOP:
++i;
switch(source.charAt(i)) {
case Token.LE:
result.append(" <= ");
break;
case Token.LT:
result.append(" < ");
break;
case Token.GE:
result.append(" >= ");
break;
case Token.GT:
result.append(" > ");
break;
case Token.INSTANCEOF:
result.append(" instanceof ");
break;
}
case Token.SHNE:
result.append(" !== ");
break;
case Token.SHOP:
++i;
switch(source.charAt(i)) {
case Token.LSH:
result.append(" << ");
break;
case Token.RSH:
result.append(" >> ");
break;
case Token.URSH:
result.append(" >>> ");
break;
}
case Token.EQ:
result.append(" == ");
break;
case Token.UNARYOP:
++i;
switch(source.charAt(i)) {
case Token.TYPEOF:
result.append("typeof ");
break;
case Token.NE:
result.append(" != ");
break;
case Token.VOID:
result.append("void ");
break;
case Token.LE:
result.append(" <= ");
break;
case Token.NOT:
result.append('!');
break;
case Token.LT:
result.append(" < ");
break;
case Token.BITNOT:
result.append('~');
break;
case Token.GE:
result.append(" >= ");
break;
case Token.POS:
result.append('+');
break;
case Token.GT:
result.append(" > ");
break;
case Token.NEG:
result.append('-');
break;
}
case Token.INSTANCEOF:
result.append(" instanceof ");
break;
case Token.LSH:
result.append(" << ");
break;
case Token.RSH:
result.append(" >> ");
break;
case Token.URSH:
result.append(" >>> ");
break;
case Token.TYPEOF:
result.append("typeof ");
break;
case Token.VOID:
result.append("void ");
break;
case Token.NOT:
result.append('!');
break;
case Token.BITNOT:
result.append('~');
break;
case Token.POS:
result.append('+');
break;
case Token.NEG:
result.append('-');
break;
case Token.INC:

View File

@ -231,7 +231,7 @@ class Parser {
if (memberExprNode != null) {
// transform 'function' <memberExpr> to <memberExpr> = function
// even in the decompilated source
decompiler.addOp(Token.ASSIGN, Token.NOP);
decompiler.addAssign(Token.NOP);
}
FunctionNode fnNode = nf.createFunction(name);
@ -573,9 +573,7 @@ class Parser {
}
}
tt = ts.peekToken();
if (tt == Token.RELOP && ts.getOp() == Token.IN) {
ts.matchToken(Token.RELOP);
if (ts.matchToken(Token.IN)) {
decompiler.addToken(Token.IN);
// 'cond' is the object over which we're iterating
cond = expr(ts, false);
@ -862,7 +860,7 @@ class Parser {
if (ts.getOp() != Token.NOP)
reportError(ts, "msg.bad.var.init");
decompiler.addOp(Token.ASSIGN, Token.NOP);
decompiler.addAssign(Token.NOP);
init = assignExpr(ts, inForInit);
nf.addChildToBack(name, init);
@ -893,7 +891,7 @@ class Parser {
if (ts.matchToken(Token.ASSIGN)) {
// omitted: "invalid assignment left-hand side" check.
int op = ts.getOp();
decompiler.addOp(Token.ASSIGN, op);
decompiler.addAssign(op);
pn = nf.createAssignment(op, pn, assignExpr(ts, inForInit));
}
@ -981,25 +979,35 @@ class Parser {
throws IOException, ParserException
{
Object pn = relExpr(ts, inForInit);
while (ts.matchToken(Token.EQOP)) {
int op = ts.getOp();
int decompilerOp = op;
if (languageVersion == Context.VERSION_1_2) {
if (op == Token.SHEQ) {
/*
* Emulate the C engine; if we're under version
* 1.2, then the == operator behaves like the ===
* operator (and the source is generated by
* decompiling a === opcode), so print the ===
* operator as ==.
*/
decompilerOp = Token.EQ;
} else if (op == Token.SHNE) {
decompilerOp = Token.NE;
for (;;) {
int tt = ts.peekToken();
switch (tt) {
case Token.EQ:
case Token.NE:
case Token.SHEQ:
case Token.SHNE:
ts.getToken();
int decompilerTT = tt;
if (languageVersion == Context.VERSION_1_2) {
if (tt == Token.SHEQ) {
/*
* Emulate the C engine; if we're under version
* 1.2, then the == operator behaves like the ===
* operator (and the source is generated by
* decompiling a === opcode), so print the ===
* operator as ==.
*/
decompilerTT = Token.EQ;
} else if (tt == Token.SHNE) {
decompilerTT = Token.NE;
}
}
decompiler.addToken(decompilerTT);
pn = nf.createBinary(Token.EQOP, tt, pn,
relExpr(ts, inForInit));
continue;
}
decompiler.addOp(Token.EQOP, decompilerOp);
pn = nf.createBinary(Token.EQOP, op, pn, relExpr(ts, inForInit));
break;
}
return pn;
}
@ -1008,14 +1016,24 @@ class Parser {
throws IOException, ParserException
{
Object pn = shiftExpr(ts);
while (ts.matchToken(Token.RELOP)) {
int op = ts.getOp();
if (inForInit && op == Token.IN) {
ts.ungetToken(Token.RELOP);
break;
for (;;) {
int tt = ts.peekToken();
switch (tt) {
case Token.IN:
if (inForInit)
break;
// fall through
case Token.INSTANCEOF:
case Token.LE:
case Token.LT:
case Token.GE:
case Token.GT:
ts.getToken();
decompiler.addToken(tt);
pn = nf.createBinary(Token.RELOP, tt, pn, shiftExpr(ts));
continue;
}
decompiler.addOp(Token.RELOP, op);
pn = nf.createBinary(Token.RELOP, op, pn, shiftExpr(ts));
break;
}
return pn;
}
@ -1024,10 +1042,18 @@ class Parser {
throws IOException, ParserException
{
Object pn = addExpr(ts);
while (ts.matchToken(Token.SHOP)) {
int op = ts.getOp();
decompiler.addOp(Token.SHOP, op);
pn = nf.createBinary(op, pn, addExpr(ts));
for (;;) {
int tt = ts.peekToken();
switch (tt) {
case Token.LSH:
case Token.URSH:
case Token.RSH:
ts.getToken();
decompiler.addToken(tt);
pn = nf.createBinary(tt, pn, addExpr(ts));
continue;
}
break;
}
return pn;
}
@ -1035,15 +1061,18 @@ class Parser {
private Object addExpr(TokenStream ts)
throws IOException, ParserException
{
int tt;
Object pn = mulExpr(ts);
while ((tt = ts.getToken()) == Token.ADD || tt == Token.SUB) {
decompiler.addToken(tt);
// flushNewLines
pn = nf.createBinary(tt, pn, mulExpr(ts));
for (;;) {
int tt = ts.peekToken();
if (tt == Token.ADD || tt == Token.SUB) {
ts.getToken();
decompiler.addToken(tt);
// flushNewLines
pn = nf.createBinary(tt, pn, mulExpr(ts));
continue;
}
break;
}
ts.ungetToken(tt);
return pn;
}
@ -1051,16 +1080,19 @@ class Parser {
private Object mulExpr(TokenStream ts)
throws IOException, ParserException
{
int tt;
Object pn = unaryExpr(ts);
while ((tt = ts.peekToken()) == Token.MUL ||
tt == Token.DIV ||
tt == Token.MOD) {
tt = ts.getToken();
decompiler.addToken(tt);
pn = nf.createBinary(tt, pn, unaryExpr(ts));
for (;;) {
int tt = ts.peekToken();
switch (tt) {
case Token.MUL:
case Token.DIV:
case Token.MOD:
ts.getToken();
decompiler.addToken(tt);
pn = nf.createBinary(tt, pn, unaryExpr(ts));
continue;
}
break;
}
return pn;
@ -1076,18 +1108,21 @@ class Parser {
ts.flags &= ~ts.TSF_REGEXP;
switch(tt) {
case Token.UNARYOP:
int op = ts.getOp();
decompiler.addOp(Token.UNARYOP, op);
return nf.createUnary(Token.UNARYOP, op, unaryExpr(ts));
case Token.TYPEOF:
case Token.VOID:
case Token.NOT:
case Token.BITNOT:
decompiler.addToken(tt);
return nf.createUnary(Token.UNARYOP, tt, unaryExpr(ts));
case Token.ADD:
// Convert to special POS token in decompiler and parse tree
decompiler.addOp(Token.UNARYOP, Token.POS);
decompiler.addToken(Token.POS);
return nf.createUnary(Token.UNARYOP, Token.POS, unaryExpr(ts));
case Token.SUB:
decompiler.addOp(Token.UNARYOP, Token.NEG);
// Convert to special NEG token in decompiler and parse tree
decompiler.addToken(Token.NEG);
return nf.createUnary(Token.UNARYOP, Token.NEG, unaryExpr(ts));
case Token.INC:
@ -1386,10 +1421,12 @@ class Parser {
return nf.createRegExp(index);
}
case Token.PRIMARY:
int op = ts.getOp();
decompiler.addOp(Token.PRIMARY, op);
return nf.createLeaf(Token.PRIMARY, op);
case Token.NULL:
case Token.THIS:
case Token.FALSE:
case Token.TRUE:
decompiler.addToken(tt);
return nf.createLeaf(Token.PRIMARY, tt);
case Token.RESERVED:
reportError(ts, "msg.reserved.id");

View File

@ -157,32 +157,27 @@ public class Token
COLON = 78,
OR = 79, // logical or (||)
AND = 80, // logical and (&&)
EQOP = 81, // equality ops (== !=)
RELOP = 82, // relational ops (< <= > >=)
SHOP = 83, // shift ops (<< >> >>>)
UNARYOP = 84, // unary prefix operator
INC = 85, // increment/decrement (++ --)
DEC = 86,
DOT = 87, // member operator (.)
PRIMARY = 88, // true, false, null, this
FUNCTION = 89, // function keyword
EXPORT = 90, // export keyword
IMPORT = 91, // import keyword
IF = 92, // if keyword
ELSE = 93, // else keyword
SWITCH = 94, // switch keyword
CASE = 95, // case keyword
DEFAULT = 96, // default keyword
WHILE = 97, // while keyword
DO = 98, // do keyword
FOR = 99, // for keyword
BREAK = 100, // break keyword
CONTINUE = 101, // continue keyword
VAR = 102, // var keyword
WITH = 103, // with keyword
CATCH = 104, // catch keyword
FINALLY = 105, // finally keyword
RESERVED = 106, // reserved keywords
INC = 81, // increment/decrement (++ --)
DEC = 82,
DOT = 83, // member operator (.)
FUNCTION = 84, // function keyword
EXPORT = 85, // export keyword
IMPORT = 86, // import keyword
IF = 87, // if keyword
ELSE = 88, // else keyword
SWITCH = 89, // switch keyword
CASE = 90, // case keyword
DEFAULT = 91, // default keyword
WHILE = 92, // while keyword
DO = 93, // do keyword
FOR = 94, // for keyword
BREAK = 95, // break keyword
CONTINUE = 96, // continue keyword
VAR = 97, // var keyword
WITH = 98, // with keyword
CATCH = 99, // catch keyword
FINALLY = 100, // finally keyword
RESERVED = 101, // reserved keywords
/** Added by Mike - these are JSOPs in the jsref, but I
* don't have them yet in the java implementation...
@ -191,36 +186,42 @@ public class Token
* Most of these go in the 'op' field when returning
* more general token types, eg. 'DIV' as the op of 'ASSIGN'.
*/
NOP = 107, // NOP
PRE = 108, // for INC, DEC nodes.
POST = 109,
NOP = 102, // NOP
PRE = 103, // for INC, DEC nodes.
POST = 104,
/**
* For JSOPs associated with keywords...
* eg. op = THIS; token = PRIMARY
* eg. op = ADD; token = ASSIGN
*/
VOID = 110,
VOID = 105,
/* types used for the parse tree - these never get returned
* by the scanner.
*/
BLOCK = 111, // statement block
ARRAYLIT = 112, // array literal
OBJLIT = 113, // object literal
LABEL = 114, // label
TARGET = 115,
LOOP = 116,
ENUMDONE = 117,
EXPRSTMT = 118,
PARENT = 119,
CONVERT = 120,
JSR = 121,
NEWLOCAL = 122,
USELOCAL = 123,
SCRIPT = 124, // top-level node for entire script
LAST_TOKEN = 124;
EQOP = 106, // equality ops (== !=)
RELOP = 107, // relational ops (< <= > >= in instanceof)
UNARYOP = 108, // unary prefix operator
PRIMARY = 109, // true, false, null, this
BLOCK = 110, // statement block
ARRAYLIT = 111, // array literal
OBJLIT = 112, // object literal
LABEL = 113, // label
TARGET = 114,
LOOP = 115,
ENUMDONE = 116,
EXPRSTMT = 117,
PARENT = 118,
CONVERT = 119,
JSR = 120,
NEWLOCAL = 121,
USELOCAL = 122,
SCRIPT = 123, // top-level node for entire script
LAST_TOKEN = 123;
public static String name(int token)
{
@ -310,7 +311,6 @@ public class Token
case AND: return "and";
case EQOP: return "eqop";
case RELOP: return "relop";
case SHOP: return "shop";
case UNARYOP: return "unaryop";
case INC: return "inc";
case DEC: return "dec";

View File

@ -83,12 +83,7 @@ public class TokenStream {
String name = Token.name(token);
switch (token) {
case Token.UNARYOP:
case Token.ASSIGN:
case Token.PRIMARY:
case Token.EQOP:
case Token.SHOP:
case Token.RELOP:
return name + " " + Token.name(this.op);
case Token.STRING:
@ -117,20 +112,20 @@ public class TokenStream {
Id_do = Token.DO,
Id_else = Token.ELSE,
Id_export = Token.EXPORT,
Id_false = Token.PRIMARY | (Token.FALSE << 8),
Id_false = Token.FALSE,
Id_for = Token.FOR,
Id_function = Token.FUNCTION,
Id_if = Token.IF,
Id_in = Token.RELOP | (Token.IN << 8),
Id_in = Token.IN,
Id_new = Token.NEW,
Id_null = Token.PRIMARY | (Token.NULL << 8),
Id_null = Token.NULL,
Id_return = Token.RETURN,
Id_switch = Token.SWITCH,
Id_this = Token.PRIMARY | (Token.THIS << 8),
Id_true = Token.PRIMARY | (Token.TRUE << 8),
Id_typeof = Token.UNARYOP | (Token.TYPEOF << 8),
Id_this = Token.THIS,
Id_true = Token.TRUE,
Id_typeof = Token.TYPEOF,
Id_var = Token.VAR,
Id_void = Token.UNARYOP | (Token.VOID << 8),
Id_void = Token.VOID,
Id_while = Token.WHILE,
Id_with = Token.WITH,
@ -152,7 +147,7 @@ public class TokenStream {
Id_goto = Token.RESERVED,
Id_implements = Token.RESERVED,
Id_import = Token.IMPORT,
Id_instanceof = Token.RELOP | (Token.INSTANCEOF << 8),
Id_instanceof = Token.INSTANCEOF,
Id_int = Token.RESERVED,
Id_interface = Token.RESERVED,
Id_long = Token.RESERVED,
@ -271,7 +266,6 @@ public class TokenStream {
// #/generated#
// #/string_id_map#
if (id == 0) { return Token.EOF; }
this.op = id >> 8;
return id & 0xff;
}
@ -766,10 +760,9 @@ public class TokenStream {
case '=':
if (matchChar('=')) {
if (matchChar('='))
this.op = Token.SHEQ;
return Token.SHEQ;
else
this.op = Token.EQ;
return Token.EQOP;
return Token.EQ;
} else {
this.op = Token.NOP;
return Token.ASSIGN;
@ -778,13 +771,11 @@ public class TokenStream {
case '!':
if (matchChar('=')) {
if (matchChar('='))
this.op = Token.SHNE;
return Token.SHNE;
else
this.op = Token.NE;
return Token.EQOP;
return Token.NE;
} else {
this.op = Token.NOT;
return Token.UNARYOP;
return Token.NOT;
}
case '<':
@ -804,16 +795,13 @@ public class TokenStream {
this.op = Token.LSH;
return Token.ASSIGN;
} else {
this.op = Token.LSH;
return Token.SHOP;
return Token.LSH;
}
} else {
if (matchChar('=')) {
this.op = Token.LE;
return Token.RELOP;
return Token.LE;
} else {
this.op = Token.LT;
return Token.RELOP;
return Token.LT;
}
}
@ -824,25 +812,21 @@ public class TokenStream {
this.op = Token.URSH;
return Token.ASSIGN;
} else {
this.op = Token.URSH;
return Token.SHOP;
return Token.URSH;
}
} else {
if (matchChar('=')) {
this.op = Token.RSH;
return Token.ASSIGN;
} else {
this.op = Token.RSH;
return Token.SHOP;
return Token.RSH;
}
}
} else {
if (matchChar('=')) {
this.op = Token.GE;
return Token.RELOP;
return Token.GE;
} else {
this.op = Token.GT;
return Token.RELOP;
return Token.GT;
}
}
@ -931,16 +915,15 @@ public class TokenStream {
}
case '%':
this.op = Token.MOD;
if (matchChar('=')) {
this.op = Token.MOD;
return Token.ASSIGN;
} else {
return Token.MOD;
}
case '~':
this.op = Token.BITNOT;
return Token.UNARYOP;
return Token.BITNOT;
case '+':
if (matchChar('=')) {