Addressing 256575: refactoring of parser/scanner iteraction to address bugs in eol/semicolon handling

This commit is contained in:
igor%mir2.org 2004-08-23 19:06:45 +00:00
parent 1ac8922512
commit 69135dd38a
8 changed files with 689 additions and 665 deletions

View File

@ -144,15 +144,6 @@ public class Decompiler
append((char)Token.EOL);
}
void addAssignOp(int op)
{
if (!(0 <= op && op <= Token.LAST_TOKEN))
throw new IllegalArgumentException();
append((char)Token.ASSIGNOP);
append((char)op);
}
void addName(String str)
{
addToken(Token.NAME);
@ -598,53 +589,48 @@ public class Decompiler
result.append(" = ");
break;
case Token.ASSIGNOP:
++i;
switch (source.charAt(i)) {
case Token.ADD:
result.append(" += ");
break;
case Token.ASSIGN_ADD:
result.append(" += ");
break;
case Token.SUB:
result.append(" -= ");
break;
case Token.ASSIGN_SUB:
result.append(" -= ");
break;
case Token.MUL:
result.append(" *= ");
break;
case Token.ASSIGN_MUL:
result.append(" *= ");
break;
case Token.DIV:
result.append(" /= ");
break;
case Token.ASSIGN_DIV:
result.append(" /= ");
break;
case Token.MOD:
result.append(" %= ");
break;
case Token.ASSIGN_MOD:
result.append(" %= ");
break;
case Token.BITOR:
result.append(" |= ");
break;
case Token.ASSIGN_BITOR:
result.append(" |= ");
break;
case Token.BITXOR:
result.append(" ^= ");
break;
case Token.ASSIGN_BITXOR:
result.append(" ^= ");
break;
case Token.BITAND:
result.append(" &= ");
break;
case Token.ASSIGN_BITAND:
result.append(" &= ");
break;
case Token.LSH:
result.append(" <<= ");
break;
case Token.ASSIGN_LSH:
result.append(" <<= ");
break;
case Token.RSH:
result.append(" >>= ");
break;
case Token.ASSIGN_RSH:
result.append(" >>= ");
break;
case Token.URSH:
result.append(" >>>= ");
break;
}
case Token.ASSIGN_URSH:
result.append(" >>>= ");
break;
case Token.HOOK:

View File

@ -561,16 +561,8 @@ final class IRFactory
String name;
int type = lhs.getType();
Node lvalue = lhs;
switch (type) {
case Token.NAME:
case Token.GETPROP:
case Token.GETELEM:
case Token.GET_REF:
break;
case Token.VAR:
Node lvalue;
if (type == Token.VAR) {
/*
* check that there was only one variable given.
* we can't do this in the parser, because then the
@ -582,11 +574,12 @@ final class IRFactory
parser.reportError("msg.mult.index");
}
lvalue = Node.newString(Token.NAME, lastChild.getString());
break;
default:
parser.reportError("msg.bad.for.in.lhs");
return obj;
} else {
lvalue = makeReference(lhs);
if (lvalue == null) {
parser.reportError("msg.bad.for.in.lhs");
return obj;
}
}
Node localBlock = new Node(Token.LOCAL_BLOCK);
@ -601,7 +594,7 @@ final class IRFactory
id.putProp(Node.LOCAL_BLOCK_PROP, localBlock);
Node newBody = new Node(Token.BLOCK);
Node assign = createAssignment(lvalue, id);
Node assign = simpleAssignment(lvalue, id);
newBody.addChildToBack(new Node(Token.EXPR_VOID, assign));
newBody.addChildToBack(body);
@ -1225,14 +1218,8 @@ final class IRFactory
return new Node(nodeType, left, right);
}
Node createAssignment(Node left, Node right)
private Node simpleAssignment(Node left, Node right)
{
left = makeReference(left);
if (left == null) {
parser.reportError("msg.bad.assign.left");
return null;
}
int nodeType = left.getType();
switch (nodeType) {
case Token.NAME:
@ -1260,12 +1247,30 @@ final class IRFactory
throw Kit.codeBug();
}
Node createAssignmentOp(int assignOp, Node left, Node right)
Node createAssignment(int assignType, Node left, Node right)
{
left = makeReference(left);
if (left == null) {
parser.reportError("msg.bad.assign.left");
return null;
return right;
}
int assignOp;
switch (assignType) {
case Token.ASSIGN:
return simpleAssignment(left, right);
case Token.ASSIGN_BITOR: assignOp = Token.BITOR; break;
case Token.ASSIGN_BITXOR: assignOp = Token.BITXOR; break;
case Token.ASSIGN_BITAND: assignOp = Token.BITAND; break;
case Token.ASSIGN_LSH: assignOp = Token.LSH; break;
case Token.ASSIGN_RSH: assignOp = Token.RSH; break;
case Token.ASSIGN_URSH: assignOp = Token.URSH; break;
case Token.ASSIGN_ADD: assignOp = Token.ADD; break;
case Token.ASSIGN_SUB: assignOp = Token.SUB; break;
case Token.ASSIGN_MUL: assignOp = Token.MUL; break;
case Token.ASSIGN_DIV: assignOp = Token.DIV; break;
case Token.ASSIGN_MOD: assignOp = Token.MOD; break;
default: throw Kit.codeBug();
}
int nodeType = left.getType();

File diff suppressed because it is too large Load Diff

View File

@ -197,6 +197,17 @@ public class ScriptRuntime {
return factory;
}
// It is public so NativeRegExp can access it .
public static boolean isJSLineTerminator(int c)
{
// Optimization for faster check for eol character:
// they do not have 0xDFD0 bits set
if ((c & 0xDFD0) != 0) {
return false;
}
return c == '\n' || c == '\r' || c == 0x2028 || c == 0x2029;
}
public static Boolean wrapBoolean(boolean b)
{
return b ? Boolean.TRUE : Boolean.FALSE;

View File

@ -150,10 +150,11 @@ public class Token
ESCXMLTEXT = 73,
TOATTRNAME = 74,
DESCENDANTS = 75,
XML_REF = 76,
XML_REF = 76;
LAST_BYTECODE_TOKEN = 76,
// End of interpreter bytecodes
public final static int
LAST_BYTECODE_TOKEN = XML_REF,
TRY = 77,
SEMI = 78, // semicolon
@ -164,64 +165,80 @@ public class Token
LP = 83, // left and right parentheses
RP = 84,
COMMA = 85, // comma operator
ASSIGN = 86, // simple assignment (=)
ASSIGNOP = 87, // assignment with operation (+= -= etc.)
HOOK = 88, // conditional (?:)
COLON = 89,
OR = 90, // logical or (||)
AND = 91, // logical and (&&)
INC = 92, // increment/decrement (++ --)
DEC = 93,
DOT = 94, // member operator (.)
FUNCTION = 95, // function keyword
EXPORT = 96, // export keyword
IMPORT = 97, // import keyword
IF = 98, // if keyword
ELSE = 99, // else keyword
SWITCH = 100, // switch keyword
CASE = 101, // case keyword
DEFAULT = 102, // default keyword
WHILE = 103, // while keyword
DO = 104, // do keyword
FOR = 105, // for keyword
BREAK = 106, // break keyword
CONTINUE = 107, // continue keyword
VAR = 108, // var keyword
WITH = 109, // with keyword
CATCH = 110, // catch keyword
FINALLY = 111, // finally keyword
VOID = 112, // void keyword
RESERVED = 113, // reserved keywords
EMPTY = 114,
ASSIGN = 86, // simple assignment (=)
ASSIGN_BITOR = 87, // |=
ASSIGN_BITXOR = 88, // ^=
ASSIGN_BITAND = 89, // |=
ASSIGN_LSH = 90, // <<=
ASSIGN_RSH = 94, // >>=
ASSIGN_URSH = 91, // >>>=
ASSIGN_ADD = 92, // +=
ASSIGN_SUB = 93, // -=
ASSIGN_MUL = 95, // *=
ASSIGN_DIV = 96, // /=
ASSIGN_MOD = 97; // %=
public final static int
FIRST_ASSIGN = ASSIGN,
LAST_ASSIGN = ASSIGN_MOD,
HOOK = 98, // conditional (?:)
COLON = 99,
OR = 100, // logical or (||)
AND = 101, // logical and (&&)
INC = 102, // increment/decrement (++ --)
DEC = 103,
DOT = 104, // member operator (.)
FUNCTION = 105, // function keyword
EXPORT = 106, // export keyword
IMPORT = 107, // import keyword
IF = 108, // if keyword
ELSE = 109, // else keyword
SWITCH = 110, // switch keyword
CASE = 111, // case keyword
DEFAULT = 112, // default keyword
WHILE = 113, // while keyword
DO = 114, // do keyword
FOR = 115, // for keyword
BREAK = 116, // break keyword
CONTINUE = 117, // continue keyword
VAR = 118, // var keyword
WITH = 119, // with keyword
CATCH = 120, // catch keyword
FINALLY = 121, // finally keyword
VOID = 122, // void keyword
RESERVED = 123, // reserved keywords
EMPTY = 124,
/* types used for the parse tree - these never get returned
* by the scanner.
*/
BLOCK = 115, // statement block
LABEL = 116, // label
TARGET = 117,
LOOP = 118,
EXPR_VOID = 119, // expression statement in functions
EXPR_RESULT = 120, // expression statement in scripts
JSR = 121,
SCRIPT = 122, // top-level node for entire script
TYPEOFNAME = 123, // for typeof(simple-name)
USE_STACK = 124,
SETPROP_OP = 125, // x.y op= something
SETELEM_OP = 126, // x[y] op= something
LOCAL_BLOCK = 127,
SET_REF_OP = 128, // *reference op= something
BLOCK = 125, // statement block
LABEL = 126, // label
TARGET = 127,
LOOP = 128,
EXPR_VOID = 129, // expression statement in functions
EXPR_RESULT = 130, // expression statement in scripts
JSR = 131,
SCRIPT = 132, // top-level node for entire script
TYPEOFNAME = 133, // for typeof(simple-name)
USE_STACK = 134,
SETPROP_OP = 135, // x.y op= something
SETELEM_OP = 136, // x[y] op= something
LOCAL_BLOCK = 137,
SET_REF_OP = 138, // *reference op= something
// For XML support:
DOTDOT = 129, // member operator (..)
XML = 130, // XML type
DOTQUERY = 131, // .() -- e.g., x.emps.emp.(name == "terry")
XMLATTR = 132, // @
XMLEND = 133,
DOTDOT = 139, // member operator (..)
XML = 140, // XML type
DOTQUERY = 141, // .() -- e.g., x.emps.emp.(name == "terry")
XMLATTR = 142, // @
XMLEND = 143,
LAST_TOKEN = 133;
LAST_TOKEN = 143;
public static String name(int token)
{
@ -322,7 +339,17 @@ public class Token
case RP: return "RP";
case COMMA: return "COMMA";
case ASSIGN: return "ASSIGN";
case ASSIGNOP: return "ASSIGNOP";
case ASSIGN_BITOR: return "ASSIGN_BITOR";
case ASSIGN_BITXOR: return "ASSIGN_BITXOR";
case ASSIGN_BITAND: return "ASSIGN_BITAND";
case ASSIGN_LSH: return "ASSIGN_LSH";
case ASSIGN_RSH: return "ASSIGN_RSH";
case ASSIGN_URSH: return "ASSIGN_URSH";
case ASSIGN_ADD: return "ASSIGN_ADD";
case ASSIGN_SUB: return "ASSIGN_SUB";
case ASSIGN_MUL: return "ASSIGN_MUL";
case ASSIGN_DIV: return "ASSIGN_DIV";
case ASSIGN_MOD: return "ASSIGN_MOD";
case HOOK: return "HOOK";
case COLON: return "COLON";
case OR: return "OR";

View File

@ -54,8 +54,7 @@ import java.io.*;
* @author Brendan Eich
*/
// The class is public so NativeRegExp can access TokenStream.JSLineTerminator
public class TokenStream
class TokenStream
{
/*
* For chars - because we need something out-of-range
@ -65,11 +64,13 @@ public class TokenStream
private final static int
EOF_CHAR = -1;
final static int
TSF_REGEXP = 1 << 0; // looking for a regular expression
TokenStream(Parser parser, Reader sourceReader, String sourceString,
int lineno)
{
this.parser = parser;
this.pushbackToken = Token.EOF;
this.lineno = lineno;
if (sourceReader != null) {
if (sourceString != null) Kit.codeBug();
@ -94,9 +95,6 @@ public class TokenStream
String name = Token.name(token);
switch (token) {
case Token.ASSIGNOP:
return name + " " + Token.name(this.op);
case Token.STRING:
case Token.REGEXP:
case Token.NAME:
@ -286,83 +284,17 @@ public class TokenStream
return id & 0xff;
}
final void reportCurrentLineError(String message)
{
parser.reportError(message, getLineno(), getLine(), getOffset());
}
final void reportCurrentLineWarning(String message)
{
parser.reportWarning(message, getLineno(), getLine(), getOffset());
}
final int getLineno() { return lineno; }
final int getOp() { return op; }
final String getString() { return string; }
final double getNumber() { return number; }
final int getTokenno() { return tokenno; }
final boolean eof() { return hitEOF; }
/* return and pop the token from the stream if it matches...
* otherwise return null
*/
final boolean matchToken(int toMatch) throws IOException
{
int token = getToken();
if (token == toMatch)
return true;
// didn't match, push back token
tokenno--;
this.pushbackToken = token;
return false;
}
final void ungetToken(int tt)
{
// Can not unread more then one token
if (this.pushbackToken != Token.EOF && tt != Token.ERROR)
Kit.codeBug();
this.pushbackToken = tt;
tokenno--;
}
final int peekToken() throws IOException
{
int result = getToken();
this.pushbackToken = result;
tokenno--;
return result;
}
final int peekTokenSameLine() throws IOException
{
significantEol = true; // SCAN_NEWLINES from jsscan.h
int result = getToken();
this.pushbackToken = result;
tokenno--;
significantEol = false; // HIDE_NEWLINES from jsscan.h
return result;
}
final int getToken() throws IOException
final int getToken(int flags) throws IOException
{
int c;
tokenno++;
// Check for pushed-back token
if (this.pushbackToken != Token.EOF) {
int result = this.pushbackToken;
this.pushbackToken = Token.EOF;
if (result != Token.EOL || significantEol) {
return result;
}
}
retry:
for (;;) {
@ -373,9 +305,7 @@ public class TokenStream
return Token.EOF;
} else if (c == '\n') {
dirtyLine = false;
if (significantEol) {
return Token.EOL;
}
return Token.EOL;
} else if (!isJSSpace(c)) {
if (c != '-') {
dirtyLine = true;
@ -473,8 +403,7 @@ public class TokenStream
// If implementation permits to use future reserved
// keywords in violation with the EcmaScript,
// treat it as name but issue warning
reportCurrentLineWarning(Context.getMessage1(
"msg.reserved.keyword", str));
parser.addWarning("msg.reserved.keyword", str);
}
}
}
@ -514,8 +443,8 @@ public class TokenStream
* permissive, so we warn about it.
*/
if (base == 8 && c >= '8') {
reportCurrentLineWarning(Context.getMessage1(
"msg.bad.octal.literal", c == '8' ? "8" : "9"));
parser.addWarning("msg.bad.octal.literal",
c == '8' ? "8" : "9");
base = 10;
}
addToString(c);
@ -557,11 +486,10 @@ public class TokenStream
if (base == 10 && !isInteger) {
try {
// Use Java conversion to number from string...
dval = (Double.valueOf(numString)).doubleValue();
dval = Double.valueOf(numString).doubleValue();
}
catch (NumberFormatException ex) {
reportCurrentLineError(Context.getMessage1(
"msg.caught.nfe", ex.getMessage()));
parser.addError("msg.caught.nfe");
return Token.ERROR;
}
} else {
@ -708,16 +636,14 @@ public class TokenStream
if (matchChar('|')) {
return Token.OR;
} else if (matchChar('=')) {
this.op = Token.BITOR;
return Token.ASSIGNOP;
return Token.ASSIGN_BITOR;
} else {
return Token.BITOR;
}
case '^':
if (matchChar('=')) {
this.op = Token.BITXOR;
return Token.ASSIGNOP;
return Token.ASSIGN_BITXOR;
} else {
return Token.BITXOR;
}
@ -726,8 +652,7 @@ public class TokenStream
if (matchChar('&')) {
return Token.AND;
} else if (matchChar('=')) {
this.op = Token.BITAND;
return Token.ASSIGNOP;
return Token.ASSIGN_BITAND;
} else {
return Token.BITAND;
}
@ -766,8 +691,7 @@ public class TokenStream
}
if (matchChar('<')) {
if (matchChar('=')) {
this.op = Token.LSH;
return Token.ASSIGNOP;
return Token.ASSIGN_LSH;
} else {
return Token.LSH;
}
@ -783,15 +707,13 @@ public class TokenStream
if (matchChar('>')) {
if (matchChar('>')) {
if (matchChar('=')) {
this.op = Token.URSH;
return Token.ASSIGNOP;
return Token.ASSIGN_URSH;
} else {
return Token.URSH;
}
} else {
if (matchChar('=')) {
this.op = Token.RSH;
return Token.ASSIGNOP;
return Token.ASSIGN_RSH;
} else {
return Token.RSH;
}
@ -806,8 +728,7 @@ public class TokenStream
case '*':
if (matchChar('=')) {
this.op = Token.MUL;
return Token.ASSIGNOP;
return Token.ASSIGN_MUL;
} else {
return Token.MUL;
}
@ -838,7 +759,7 @@ public class TokenStream
}
// is it a regexp?
if (allowRegExp) {
if ((flags & TSF_REGEXP) != 0) {
stringBufferTop = 0;
while ((c = getChar()) != '/') {
if (c == '\n' || c == EOF_CHAR) {
@ -879,16 +800,14 @@ public class TokenStream
if (matchChar('=')) {
this.op = Token.DIV;
return Token.ASSIGNOP;
return Token.ASSIGN_DIV;
} else {
return Token.DIV;
}
case '%':
if (matchChar('=')) {
this.op = Token.MOD;
return Token.ASSIGNOP;
return Token.ASSIGN_MOD;
} else {
return Token.MOD;
}
@ -898,8 +817,7 @@ public class TokenStream
case '+':
if (matchChar('=')) {
this.op = Token.ADD;
return Token.ASSIGNOP;
return Token.ASSIGN_ADD;
} else if (matchChar('+')) {
return Token.INC;
} else {
@ -908,8 +826,7 @@ public class TokenStream
case '-':
if (matchChar('=')) {
this.op = Token.SUB;
c = Token.ASSIGNOP;
c = Token.ASSIGN_SUB;
} else if (matchChar('-')) {
if (!dirtyLine) {
// treat HTML end-comment after possible whitespace
@ -976,12 +893,6 @@ public class TokenStream
}
}
// It is public so NativeRegExp can access it .
public static boolean isJSLineTerminator(int c)
{
return c == '\n' || c == '\r' || c == 0x2028 || c == 0x2029;
}
private static boolean isJSFormatChar(int c)
{
return c > 127 && Character.getType((char)c) == Character.FORMAT;
@ -1352,7 +1263,7 @@ public class TokenStream
if (isJSFormatChar(c)) {
continue;
}
if ((c & EOL_HINT_MASK) == 0 && isJSLineTerminator(c)) {
if (ScriptRuntime.isJSLineTerminator(c)) {
lineEndChar = c;
c = '\n';
}
@ -1386,7 +1297,7 @@ public class TokenStream
} else {
for (; lineEnd != sourceEnd; ++lineEnd) {
int c = sourceString.charAt(lineEnd);
if ((c & EOL_HINT_MASK) == 0 && isJSLineTerminator(c)) {
if (ScriptRuntime.isJSLineTerminator(c)) {
break;
}
}
@ -1413,7 +1324,7 @@ public class TokenStream
i = lineStart + lineLength;
}
int c = sourceBuffer[i];
if ((c & EOL_HINT_MASK) == 0 && isJSLineTerminator(c)) {
if (ScriptRuntime.isJSLineTerminator(c)) {
break;
}
}
@ -1447,21 +1358,13 @@ public class TokenStream
return true;
}
// tokenize newlines
private boolean significantEol;
// stuff other than whitespace since start of line
private boolean dirtyLine;
boolean allowRegExp;
String regExpFlags;
private String line;
private boolean fromEval;
private int pushbackToken;
private int tokenno;
private int op;
// Set this to an inital non-null value so that the Parser has
// something to retrieve even if an error has occured and no
@ -1480,10 +1383,6 @@ public class TokenStream
private boolean hitEOF = false;
// Optimization for faster check for eol character: isJSLineTerminator(c)
// returns true only when (c & EOL_HINT_MASK) == 0
private static final int EOL_HINT_MASK = 0xdfd0;
private int lineStart = 0;
private int lineno;
private int lineEndChar = -1;

View File

@ -389,7 +389,7 @@ if (regexp.anchorCh >= 0) {
private static boolean isLineTerm(char c)
{
return TokenStream.isJSLineTerminator(c);
return ScriptRuntime.isJSLineTerminator(c);
}
private static boolean isREWhiteSpace(int c)

View File

@ -385,6 +385,9 @@ msg.try.no.catchfinally =\
msg.syntax =\
syntax error
msg.unexpected.eof =\
Unexpected end of file
msg.XML.bad.form =\
illegally formed XML syntax
@ -517,7 +520,7 @@ msg.missing.exponent =\
missing exponent
msg.caught.nfe =\
number format error: {0}
number format error
msg.unterminated.string.lit =\
unterminated string literal