gecko-dev/js/js2/parser.cpp
2000-04-07 00:24:42 +00:00

1391 lines
38 KiB
C++

// -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
//
// The contents of this file are subject to the Netscape Public
// License Version 1.1 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of
// the License at http://www.mozilla.org/NPL/
//
// Software distributed under the License is distributed on an "AS
// IS" basis, WITHOUT WARRANTY OF ANY KIND, either express oqr
// implied. See the License for the specific language governing
// rights and limitations under the License.
//
// The Original Code is the JavaScript 2 Prototype.
//
// The Initial Developer of the Original Code is Netscape
// Communications Corporation. Portions created by Netscape are
// Copyright (C) 1998 Netscape Communications Corporation. All
// Rights Reserved.
#include "numerics.h"
#include "parser.h"
#include "world.h"
namespace JS = JavaScript;
//
// Reader
//
// Create a Reader reading characters from the source string.
// sourceLocation describes the origin of the source and may be used for error messages.
// initialLineNum is the line number of the first line of the source string.
JS::Reader::Reader(const String &source, const String &sourceLocation, uint32 initialLineNum):
source(source), sourceLocation(sourceLocation), initialLineNum(initialLineNum)
{
const char16 *b = Reader::source.data();
begin = b;
p = b;
end = b + Reader::source.size();
#ifdef DEBUG
recordString = 0;
#endif
beginLine();
}
// Mark the beginning of a line. Call this after reading every line break to fill
// out the line start table.
void JS::Reader::beginLine()
{
ASSERT(p <= end && (!linePositions.size() || p > linePositions.back()));
linePositions.push_back(p);
}
// Return the number of the line containing the given character position.
// The line starts should have been recorded by calling beginLine.
uint32 JS::Reader::posToLineNum(uint32 pos) const
{
ASSERT(pos <= getPos());
std::vector<const char16 *>::const_iterator i = std::upper_bound(linePositions.begin(), linePositions.end(), begin + pos);
ASSERT(i != linePositions.begin());
return static_cast<uint32>(i-1 - linePositions.begin()) + initialLineNum;
}
// Return the character position as well as pointers to the beginning and end (not including
// the line terminator) of the nth line. If lineNum is out of range, return 0 and two nulls.
// The line starts should have been recorded by calling beginLine(). If the nth line is the
// last one recorded, then getLine manually finds the line ending by searching for a line
// break; otherwise, getLine assumes that the line ends one character before the beginning
// of the next line.
uint32 JS::Reader::getLine(uint32 lineNum, const char16 *&lineBegin, const char16 *&lineEnd) const
{
lineBegin = 0;
lineEnd = 0;
if (lineNum < initialLineNum)
return 0;
lineNum -= initialLineNum;
if (lineNum >= linePositions.size())
return 0;
lineBegin = linePositions[lineNum];
const char16 *e;
++lineNum;
if (lineNum < linePositions.size())
e = linePositions[lineNum] - 1;
else {
e = lineBegin;
const char16 *end = Reader::end;
while (e != end && !isLineBreak(*e))
++e;
}
lineEnd = e;
return static_cast<uint32>(lineBegin - begin);
}
// Begin accumulating characters into the recordString, whose initial value is
// ignored and cleared. Each character passed to recordChar() is added to the end
// of the recordString. Recording ends when endRecord() or beginLine() is called.
// Recording is significantly optimized when the characters passed to readChar()
// are the same characters as read by get(). In this case the record String does
// not get allocated until endRecord() is called or a discrepancy appears between
// get() and recordChar().
void JS::Reader::beginRecording(String &recordString)
{
Reader::recordString = &recordString;
recordBase = p;
recordPos = p;
}
// Append ch to the recordString.
void JS::Reader::recordChar(char16 ch)
{
ASSERT(recordString);
if (recordPos) {
if (recordPos != end && *recordPos == ch) {
recordPos++;
return;
} else {
recordString->assign(recordBase, recordPos);
recordPos = 0;
}
}
*recordString += ch;
}
// Finish recording characters into the recordString that was last passed to beginRecording().
// Return that recordString.
JS::String &JS::Reader::endRecording()
{
String *rs = recordString;
ASSERT(rs);
if (recordPos)
rs->assign(recordBase, recordPos);
recordString = 0;
return *rs;
}
// Report an error at the given character position in the source code.
void JS::Reader::error(Exception::Kind kind, const String &message, uint32 pos)
{
uint32 lineNum = posToLineNum(pos);
const char16 *lineBegin;
const char16 *lineEnd;
uint32 linePos = getLine(lineNum, lineBegin, lineEnd);
ASSERT(lineBegin && lineEnd && linePos <= pos);
throw Exception(kind, message, sourceLocation, lineNum, pos - linePos, pos, lineBegin, lineEnd);
}
//
// Lexer
//
const char *const JS::Token::kindNames[] = {
// Special
"end of input", // Token::end
"number", // Token::number
"string", // Token::string
"unit", // Token::unit
"regular expression",// Token::regExp
// Punctuators
"(", // Token::openParenthesis
")", // Token::closeParenthesis
"[", // Token::openBracket
"]", // Token::closeBracket
"{", // Token::openBrace
"}", // Token::closeBrace
",", // Token::comma
";", // Token::semicolon
".", // Token::dot
"..", // Token::doubleDot
"...", // Token::tripleDot
"->", // Token::arrow
":", // Token::colon
"::", // Token::doubleColon
"#", // Token::pound
"@", // Token::at
"++", // Token::increment
"--", // Token::decrement
"~", // Token::complement
"!", // Token::logicalNot
"*", // Token::times
"/", // Token::divide
"%", // Token::modulo
"+", // Token::plus
"-", // Token::minus
"<<", // Token::leftShift
">>", // Token::rightShift
">>>", // Token::logicalRightShift
"&&", // Token::logicalAnd
"^^", // Token::logicalXor
"||", // Token::logicalOr
"&", // Token::bitwiseAnd
"^", // Token::bitwiseXor
"|", // Token::bitwiseOr
"=", // Token::assignment
"*=", // Token::timesEquals
"/=", // Token::divideEquals
"%=", // Token::moduloEquals
"+=", // Token::plusEquals
"-=", // Token::minusEquals
"<<=", // Token::leftShiftEquals
">>=", // Token::rightShiftEquals
">>>=", // Token::logicalRightShiftEquals
"&&=", // Token::logicalAndEquals
"^^=", // Token::logicalXorEquals
"||=", // Token::logicalOrEquals
"&=", // Token::bitwiseAndEquals
"^=", // Token::bitwiseXorEquals
"|=", // Token::bitwiseOrEquals
"==", // Token::equal
"!=", // Token::notEqual
"<", // Token::lessThan
"<=", // Token::lessThanOrEqual
">", // Token::greaterThan
">=", // Token::greaterThanOrEqual
"===", // Token::identical
"!==", // Token::notIdentical
"?", // Token::question
// Reserved words
"abstract", // Token::Abstract
"break", // Token::Break
"case", // Token::Case
"catch", // Token::Catch
"class", // Token::Class
"const", // Token::Const
"continue", // Token::Continue
"debugger", // Token::Debugger
"default", // Token::Default
"delete", // Token::Delete
"do", // Token::Do
"else", // Token::Else
"enum", // Token::Enum
"eval", // Token::Eval
"export", // Token::Export
"extends", // Token::Extends
"false", // Token::False
"final", // Token::Final
"finally", // Token::Finally
"for", // Token::For
"function", // Token::Function
"goto", // Token::Goto
"if", // Token::If
"implements", // Token::Implements
"import", // Token::Import
"in", // Token::In
"instanceof", // Token::Instanceof
"native", // Token::Native
"new", // Token::New
"null", // Token::Null
"package", // Token::Package
"private", // Token::Private
"protected", // Token::Protected
"public", // Token::Public
"return", // Token::Return
"static", // Token::Static
"super", // Token::Super
"switch", // Token::Switch
"synchronized", // Token::Synchronized
"this", // Token::This
"throw", // Token::Throw
"throws", // Token::Throws
"transient", // Token::Transient
"true", // Token::True
"try", // Token::Try
"typeof", // Token::Typeof
"var", // Token::Var
"volatile", // Token::Volatile
"while", // Token::While
"with", // Token::With
// Non-reserved words
"box", // Token::Box
"constructor", // Token::Constructor
"field", // Token::Field
"get", // Token::Get
"language", // Token::Language
"local", // Token::Local
"method", // Token::Method
"override", // Token::Override
"set", // Token::Set
"version", // Token::Version
"identifier" // Token::identifier
};
// Initialize the keywords in the given world.
void JS::Token::initKeywords(World &world)
{
const char *const*keywordName = kindNames + KeywordsBegin;
for (Kind kind = KeywordsBegin; kind != KeywordsEnd; kind = Kind(kind+1))
world.identifiers[widenCString(*keywordName++)].tokenKind = kind;
}
// Print a description of the token to f.
void JS::Token::print(Formatter &f, bool debug) const
{
switch (getKind()) {
case end:
f << "[end]";
break;
case number:
if (debug)
f << "[number " << getValue() << ']';
f << getChars();
break;
case unit:
if (debug)
f << "[unit]";
case string:
f << '"' << getChars() << '"';
break;
case regExp:
f << '/' << getIdentifier() << '/' << getChars();
break;
case identifier:
if (debug)
f << "[identifier]";
f << getIdentifier();
break;
default:
f << getKind();
}
}
// Create a new Lexer for lexing the provided source code. The Lexer will intern identifiers, keywords, and regular
// expressions in the designated world.
JS::Lexer::Lexer(World &world, const String &source, const String &sourceLocation, uint32 initialLineNum):
world(world), reader(source, sourceLocation, initialLineNum)
{
nextToken = tokens;
nTokensFwd = 0;
#ifdef DEBUG
nTokensBack = 0;
#endif
lexingUnit = false;
}
// Get and return the next token. The token remains valid until the next call to this Lexer.
// If the Reader reached the end of file, return a Token whose Kind is end.
// The caller may alter the value of this Token (in particular, take control over the
// auto_ptr's data), but if it does so, the caller is not allowed to unget this Token.
//
// If preferRegExp is true, a / will be preferentially interpreted as starting a regular
// expression; otherwise, a / will be preferentially interpreted as division or /=.
const JS::Token &JS::Lexer::get(bool preferRegExp)
{
const Token &t = peek(preferRegExp);
if (++nextToken == tokens + tokenBufferSize)
nextToken = tokens;
--nTokensFwd;
DEBUG_ONLY(++nTokensBack);
return t;
}
// Peek at the next token using the given preferRegExp setting. If that token's kind matches
// the given kind, consume that token and return it. Otherwise, do not consume that token and
// return nil.
const JS::Token *JS::Lexer::eat(bool preferRegExp, Token::Kind kind)
{
const Token &t = peek(preferRegExp);
if (t.kind != kind)
return 0;
if (++nextToken == tokens + tokenBufferSize)
nextToken = tokens;
--nTokensFwd;
DEBUG_ONLY(++nTokensBack);
return &t;
}
// Return the next token without consuming it.
//
// If preferRegExp is true, a / will be preferentially interpreted as starting a regular
// expression; otherwise, a / will be preferentially interpreted as division or /=.
// A subsequent call to peek or get will return the same token; that call must be presented
// with the same value for preferRegExp.
const JS::Token &JS::Lexer::peek(bool preferRegExp)
{
// Use an already looked-up token if there is one.
if (nTokensFwd) {
ASSERT(savedPreferRegExp[nextToken - tokens] == preferRegExp);
} else {
lexToken(preferRegExp);
nTokensFwd = 1;
#ifdef DEBUG
savedPreferRegExp[nextToken - tokens] = preferRegExp;
if (nTokensBack == tokenLookahead) {
nTokensBack = tokenLookahead-1;
if (tokenGuard)
(nextToken >= tokens+tokenLookahead ? nextToken-tokenLookahead : nextToken+tokenBufferSize-tokenLookahead)->valid = false;
}
#endif
}
return *nextToken;
}
#ifdef DEBUG
// Change the setting of preferRegExp for an already peeked token. The token must not be one
// for which that setting mattered.
void JS::Lexer::redesignate(bool preferRegExp)
{
ASSERT(nTokensFwd && !(nextToken->hasKind(Token::regExp) || nextToken->hasKind(Token::divide) ||
nextToken->hasKind(Token::divideEquals)));
savedPreferRegExp[nextToken - tokens] = preferRegExp;
}
#endif
// Unread the last token. This call may be called to unread at most tokenBufferSize tokens
// at a time (where a peek also counts as temporarily reading and unreading one token).
// When a token that has been unread is peeked or read again, the same value must be passed
// in preferRegExp as for the first time that token was read or peeked.
void JS::Lexer::unget()
{
ASSERT(nTokensBack--);
nTokensFwd++;
if (nextToken == tokens)
nextToken = tokens + tokenBufferSize;
--nextToken;
}
// Report a syntax error at the backUp-th last character read by the Reader.
// In other words, if backUp is 0, the error is at the next character to be read by the Reader;
// if backUp is 1, the error is at the last character read by the Reader, and so forth.
void JS::Lexer::syntaxError(const char *message, uint backUp)
{
reader.unget(backUp);
reader.error(Exception::syntaxError, widenCString(message), reader.getPos());
}
// Get the next character from the reader, skipping any Unicode format-control (Cf) characters.
inline char16 JS::Lexer::getChar()
{
char16 ch = reader.get();
if (char16Value(ch) >= firstFormatChar)
ch = internalGetChar(ch);
return ch;
}
// Helper for getChar()
char16 JS::Lexer::internalGetChar(char16 ch)
{
while (isFormat(ch))
ch = reader.get();
return ch;
}
// Peek the next character from the reader, skipping any Unicode format-control (Cf) characters,
// which are read and discarded.
inline char16 JS::Lexer::peekChar()
{
char16 ch = reader.peek();
if (char16Value(ch) >= firstFormatChar)
ch = internalPeekChar(ch);
return ch;
}
// Helper for peekChar()
char16 JS::Lexer::internalPeekChar(char16 ch)
{
while (isFormat(ch)) {
reader.get();
ch = reader.peek();
}
return ch;
}
// Peek the next character from the reader, skipping any Unicode format-control (Cf) characters,
// which are read and discarded. If the peeked character matches ch, read that character and return true;
// otherwise return false. ch must not be null.
bool JS::Lexer::testChar(char16 ch)
{
ASSERT(ch); // If ch were null, it could match the eof null.
char16 ch2 = peekChar();
if (ch == ch2) {
reader.get();
return true;
}
return false;
}
// A backslash has been read. Read the rest of the escape code.
// Return the interpreted escaped character. Throw an exception if the escape is not valid.
// If unicodeOnly is true, allow only \uxxxx escapes.
char16 JS::Lexer::lexEscape(bool unicodeOnly)
{
char16 ch = getChar();
int nDigits;
if (!unicodeOnly || ch == 'u')
switch (ch) {
case '0':
// Make sure that the next character isn't a digit.
ch = peekChar();
if (!isASCIIDecimalDigit(ch))
return 0x00;
getChar(); // Point to the next character in the error message
case 'b':
return 0x08;
case 'f':
return 0x0C;
case 'n':
return 0x0A;
case 'r':
return 0x0D;
case 't':
return 0x09;
case 'v':
return 0x0B;
case 'x':
nDigits = 2;
goto lexHex;
case 'u':
nDigits = 4;
lexHex:
{
uint32 n = 0;
while (nDigits--) {
ch = getChar();
uint digit;
if (!isASCIIHexDigit(ch, digit))
goto error;
n = (n << 4) | digit;
}
return static_cast<char16>(n);
}
default:
if (!reader.getEof(ch)) {
CharInfo chi(ch);
if (!isAlphanumeric(chi) && !isLineBreak(chi))
return ch;
}
}
error:
syntaxError("Bad escape code");
return 0;
}
// Read an identifier into s. The initial value of s is ignored and cleared.
// Return true if an escape code has been encountered.
// If allowLeadingDigit is true, allow the first character of s to be a digit, just like any
// continuing identifier character.
bool JS::Lexer::lexIdentifier(String &s, bool allowLeadingDigit)
{
reader.beginRecording(s);
bool hasEscape = false;
while (true) {
char16 ch = getChar();
char16 ch2 = ch;
if (ch == '\\') {
ch2 = lexEscape(true);
hasEscape = true;
}
CharInfo chi2(ch2);
if (!(allowLeadingDigit ? isIdContinuing(chi2) : isIdLeading(chi2))) {
if (ch == '\\')
syntaxError("Identifier escape expands into non-identifier character");
else
reader.unget();
break;
}
reader.recordChar(ch2);
allowLeadingDigit = true;
}
reader.endRecording();
return hasEscape;
}
// Read a numeric literal into nextToken->chars and nextToken->value.
// Return true if the numeric literal is followed by a unit, but don't read the unit yet.
bool JS::Lexer::lexNumeral()
{
int hasDecimalPoint = 0;
String &s = nextToken->chars;
uint digit;
reader.beginRecording(s);
char16 ch = getChar();
if (ch == '0') {
reader.recordChar('0');
ch = getChar();
if ((ch&~0x20) == 'X') {
uint32 pos = reader.getPos();
char16 ch2 = getChar();
if (isASCIIHexDigit(ch2, digit)) {
reader.recordChar(ch);
do {
reader.recordChar(ch2);
ch2 = getChar();
} while (isASCIIHexDigit(ch2, digit));
ch = ch2;
} else
reader.setPos(pos);
goto done;
} else if (isASCIIDecimalDigit(ch)) {
syntaxError("Numeric constant syntax error");
}
}
while (isASCIIDecimalDigit(ch) || ch == '.' && !hasDecimalPoint++) {
reader.recordChar(ch);
ch = getChar();
}
if ((ch&~0x20) == 'E') {
uint32 pos = reader.getPos();
char16 ch2 = getChar();
char16 sign = 0;
if (ch2 == '+' || ch2 == '-') {
sign = ch2;
ch2 = getChar();
}
if (isASCIIDecimalDigit(ch2)) {
reader.recordChar(ch);
if (sign)
reader.recordChar(sign);
do {
reader.recordChar(ch2);
ch2 = getChar();
} while (isASCIIDecimalDigit(ch2));
ch = ch2;
} else
reader.setPos(pos);
}
done:
// At this point the reader is just past the character ch, which is the first non-formatting character
// that is not part of the number.
reader.endRecording();
const char16 *sBegin = s.data();
const char16 *sEnd = sBegin + s.size();
const char16 *numEnd;
nextToken->value = stringToDouble(sBegin, sEnd, numEnd);
ASSERT(numEnd == sEnd);
reader.unget();
ASSERT(ch == reader.peek());
return isIdContinuing(ch) || ch == '\\';
}
// Read a string literal into s. The initial value of s is ignored and cleared.
// The opening quote has already been read into separator.
void JS::Lexer::lexString(String &s, char16 separator)
{
char16 ch;
reader.beginRecording(s);
while ((ch = reader.get()) != separator) {
CharInfo chi(ch);
if (!isFormat(chi)) {
if (ch == '\\')
ch = lexEscape(false);
else if (reader.getEof(ch) || isLineBreak(chi))
syntaxError("Unterminated string literal");
reader.recordChar(ch);
}
}
reader.endRecording();
}
// Read a regular expression literal. Store the regular expression in nextToken->id
// and the flags in nextToken->chars.
// The opening slash has already been read.
void JS::Lexer::lexRegExp()
{
String s;
char16 prevCh = 0;
reader.beginRecording(s);
while (true) {
char16 ch = getChar();
CharInfo chi(ch);
if (reader.getEof(ch) || isLineBreak(chi))
syntaxError("Unterminated regular expression literal");
if (prevCh == '\\') {
reader.recordChar(ch);
prevCh = 0; // Ignore slashes and backslashes immediately after a backslash
} else if (ch != '/') {
reader.recordChar(ch);
prevCh = ch;
} else
break;
}
reader.endRecording();
nextToken->id = &world.identifiers[s];
lexIdentifier(nextToken->chars, true);
}
// Read a token from the Reader and store it at *nextToken.
// If the Reader reached the end of file, store a Token whose Kind is end.
void JS::Lexer::lexToken(bool preferRegExp)
{
Token &t = *nextToken;
t.lineBreak = false;
t.id = 0;
//clear(t.chars); // Don't really need to waste time clearing this string here
Token::Kind kind;
if (lexingUnit) {
lexIdentifier(t.chars, false);
ASSERT(t.chars.size());
kind = Token::unit; // unit
lexingUnit = false;
} else {
next:
char16 ch = reader.get();
if (reader.getEof(ch)) {
endOfInput:
kind = Token::end;
} else {
char16 ch2;
CharInfo chi(ch);
switch (cGroup(chi)) {
case CharInfo::FormatGroup:
case CharInfo::WhiteGroup:
goto next;
case CharInfo::IdGroup:
t.pos = reader.getPos() - 1;
readIdentifier:
{
reader.unget();
String s;
bool hasEscape = lexIdentifier(s, false);
t.id = &world.identifiers[s];
kind = hasEscape ? Token::identifier : t.id->tokenKind;
}
break;
case CharInfo::NonIdGroup:
case CharInfo::IdContinueGroup:
t.pos = reader.getPos() - 1;
switch (ch) {
case '(':
kind = Token::openParenthesis; // (
break;
case ')':
kind = Token::closeParenthesis; // )
break;
case '[':
kind = Token::openBracket; // [
break;
case ']':
kind = Token::closeBracket; // ]
break;
case '{':
kind = Token::openBrace; // {
break;
case '}':
kind = Token::closeBrace; // }
break;
case ',':
kind = Token::comma; // ,
break;
case ';':
kind = Token::semicolon; // ;
break;
case '.':
kind = Token::dot; // .
ch2 = getChar();
if (isASCIIDecimalDigit(ch2)) {
reader.setPos(t.pos);
goto number; // decimal point
} else if (ch2 == '.') {
kind = Token::doubleDot; // ..
if (testChar('.'))
kind = Token::tripleDot; // ...
} else
reader.unget();
break;
case ':':
kind = Token::colon; // :
if (testChar(':'))
kind = Token::doubleColon; // ::
break;
case '#':
kind = Token::pound; // #
break;
case '@':
kind = Token::at; // @
break;
case '?':
kind = Token::question; // ?
break;
case '~':
kind = Token::complement; // ~
break;
case '!':
kind = Token::logicalNot; // !
if (testChar('=')) {
kind = Token::notEqual; // !=
if (testChar('='))
kind = Token::notIdentical; // !==
}
break;
case '*':
kind = Token::times; // * *=
tryAssignment:
if (testChar('='))
kind = Token::Kind(kind + Token::timesEquals - Token::times);
break;
case '/':
kind = Token::divide; // /
ch = getChar();
if (ch == '/') { // // comment
do {
ch = reader.get();
if (reader.getEof(ch))
goto endOfInput;
} while (!isLineBreak(ch));
goto endOfLine;
} else if (ch == '*') { // /* comment */
ch = 0;
do {
ch2 = ch;
ch = getChar();
if (isLineBreak(ch)) {
reader.beginLine();
t.lineBreak = true;
} else if (reader.getEof(ch))
syntaxError("Unterminated /* comment");
} while (ch != '/' || ch2 != '*');
goto next;
} else {
reader.unget();
if (preferRegExp) { // Regular expression
kind = Token::regExp;
lexRegExp();
} else
goto tryAssignment; // /=
}
break;
case '%':
kind = Token::modulo; // %
goto tryAssignment; // %=
case '+':
kind = Token::plus; // +
if (testChar('+'))
kind = Token::increment; // ++
else
goto tryAssignment; // +=
break;
case '-':
kind = Token::minus; // -
ch = getChar();
if (ch == '-')
kind = Token::decrement; // --
else if (ch == '>')
kind = Token::arrow; // ->
else {
reader.unget();
goto tryAssignment; // -=
}
break;
case '&':
kind = Token::bitwiseAnd; // & && &= &&=
logical:
if (testChar(ch))
kind = Token::Kind(kind - Token::bitwiseAnd + Token::logicalAnd);
goto tryAssignment;
case '^':
kind = Token::bitwiseXor; // ^ ^^ ^= ^^=
goto logical;
case '|':
kind = Token::bitwiseOr; // | || |= ||=
goto logical;
case '=':
kind = Token::assignment; // =
if (testChar('=')) {
kind = Token::equal; // ==
if (testChar('='))
kind = Token::identical; // ===
}
break;
case '<':
kind = Token::lessThan; // <
if (testChar('<')) {
kind = Token::leftShift; // <<
goto tryAssignment; // <<=
}
comparison:
if (testChar('=')) // <= >=
kind = Token::Kind(kind + Token::lessThanOrEqual - Token::lessThan);
break;
case '>':
kind = Token::greaterThan; // >
if (testChar('>')) {
kind = Token::rightShift; // >>
if (testChar('>'))
kind = Token::logicalRightShift; // >>>
goto tryAssignment; // >>= >>>=
}
goto comparison;
case '\\':
goto readIdentifier; // An identifier that starts with an escape
case '\'':
case '"':
kind = Token::string; // 'string' "string"
lexString(t.chars, ch);
break;
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
reader.unget(); // Number
number:
kind = Token::number;
lexingUnit = lexNumeral();
break;
default:
syntaxError("Bad character");
}
break;
case CharInfo::LineBreakGroup:
endOfLine:
reader.beginLine();
t.lineBreak = true;
goto next;
}
}
}
t.kind = kind;
#ifdef DEBUG
t.valid = true;
#endif
}
//
// Parser
//
// Create a new Parser for parsing the provided source code, interning identifiers, keywords, and regular
// expressions in the designated world, and allocating the parse tree in the designated arena.
JS::Parser::Parser(World &world, Arena &arena, const String &source, const String &sourceLocation, uint32 initialLineNum):
lexer(world, source, sourceLocation, initialLineNum), arena(arena)
{
}
// Report a syntax error at the backUp-th last token read by the Lexer.
// In other words, if backUp is 0, the error is at the next token to be read by the Lexer (which
// must have been peeked already); if backUp is 1, the error is at the last token read by the Lexer,
// and so forth.
void JS::Parser::syntaxError(const char *message, uint backUp)
{
syntaxError(widenCString(message), backUp);
}
// Same as above, but the error message is already a String.
void JS::Parser::syntaxError(const String &message, uint backUp)
{
while (backUp--)
lexer.unget();
getReader().error(Exception::syntaxError, message, lexer.getPos());
}
// Get the next token using the given preferRegExp setting. If that token's kind matches
// the given kind, consume that token and return it. Otherwise throw a syntax error.
const JS::Token &JS::Parser::require(bool preferRegExp, Token::Kind kind)
{
const Token &t = lexer.get(preferRegExp);
if (!t.hasKind(kind)) {
String message;
bool special = Token::isSpecialKind(kind);
if (special)
message += '\'';
message += Token::kindName(kind);
if (special)
message += '\'';
message += " expected";
syntaxError(message);
}
return t;
}
// Copy the Token's chars into the current arena and return the resulting copy.
inline JS::String &JS::Parser::copyTokenChars(const Token &t)
{
return newArenaString(arena, t.getChars());
}
// An identifier or parenthesized expression has just been parsed into e.
// If it is followed by one or more ::'s followed by identifiers, construct the appropriate
// qualifiedIdentifier parse node and return it and set foundQualifiers to true. If no ::
// is found, return e and set foundQualifiers to false.
JS::ExprNode *JS::Parser::parseIdentifierQualifiers(ExprNode *e, bool &foundQualifiers)
{
foundQualifiers = false;
while (true) {
const Token *tDoubleColon = lexer.eat(false, Token::doubleColon);
if (!tDoubleColon)
return e;
const Token &tId = lexer.get(true);
if (!Token::isIdentifierKind(tId.getKind()))
syntaxError("Identifier expected");
e = new(arena) OpIdentifierExprNode(tDoubleColon->getPos(), ExprNode::qualifiedIdentifier, tId.getIdentifier(), e);
foundQualifiers = true;
}
}
// An opening parenthesis has just been parsed into tParen. Finish parsing a ParenthesizedExpression.
// If it is followed by one or more ::'s followed by identifiers, construct the appropriate
// qualifiedIdentifier parse node and return it and set foundQualifiers to true. If no ::
// is found, return the ParenthesizedExpression and set foundQualifiers to false.
JS::ExprNode *JS::Parser::parseParenthesesAndIdentifierQualifiers(const Token &tParen, bool &foundQualifiers)
{
uint32 pos = tParen.getPos();
ExprNode *e = new(arena) UnaryExprNode(pos, ExprNode::parentheses, parseExpression(false));
require(false, Token::closeParenthesis);
return parseIdentifierQualifiers(e, foundQualifiers);
}
// Parse and return a qualifiedIdentifier. The first token has already been parsed and is in t.
// If the first token was peeked, it should be have been done with preferRegExp set to true.
JS::IdentifierExprNode *JS::Parser::parseQualifiedIdentifier(const Token &t)
{
bool foundQualifiers;
if (Token::isIdentifierKind(t.getKind())) {
IdentifierExprNode *id = new(arena) IdentifierExprNode(t.getPos(), ExprNode::identifier, t.getIdentifier());
return static_cast<IdentifierExprNode *>(parseIdentifierQualifiers(id, foundQualifiers));
}
if (t.hasKind(Token::openParenthesis)) {
ExprNode *e = parseParenthesesAndIdentifierQualifiers(t, foundQualifiers);
if (!foundQualifiers)
syntaxError(":: expected", 0);
return static_cast<IdentifierExprNode *>(e);
}
syntaxError("Identifier or '(' expected");
return 0; // Unreachable code here just to shut up compiler warnings
}
// Parse and return an arrayLiteral. The opening bracket has already been read into initialToken.
JS::PairListExprNode *JS::Parser::parseArrayLiteral(const Token &initialToken)
{
uint32 initialPos = initialToken.getPos();
NodeQueue<ExprPairList> elements;
while (true) {
ExprNode *element = 0;
const Token &t = lexer.peek(true);
if (t.hasKind(Token::comma) || t.hasKind(Token::closeBracket))
lexer.redesignate(false);
else {
lexer.get(true);
element = parseAssignmentExpression(false);
}
elements += new(arena) ExprPairList(0, element);
const Token &tSeparator = lexer.get(false);
if (tSeparator.hasKind(Token::closeBracket))
break;
if (!tSeparator.hasKind(Token::comma))
syntaxError("',' expected");
}
return new(arena) PairListExprNode(initialPos, ExprNode::arrayLiteral, elements.first);
}
// Parse and return an objectLiteral. The opening brace has already been read into initialToken.
JS::PairListExprNode *JS::Parser::parseObjectLiteral(const Token &initialToken)
{
uint32 initialPos = initialToken.getPos();
NodeQueue<ExprPairList> elements;
if (!lexer.eat(true, Token::closeBrace))
while (true) {
const Token &t = lexer.get(true);
ExprNode *field;
if (Token::isIdentifierKind(t.getKind()) || t.hasKind(Token::openParenthesis))
field = parseQualifiedIdentifier(t);
else if (t.hasKind(Token::string))
field = new(arena) StringExprNode(t.getPos(), ExprNode::string, copyTokenChars(t));
else if (t.hasKind(Token::number))
field = new(arena) NumberExprNode(t.getPos(), t.getValue());
else {
syntaxError("Field name expected");
field = 0; // Unreachable code here just to shut up compiler warnings
}
require(false, Token::colon);
elements += new(arena) ExprPairList(field, parseAssignmentExpression(false));
const Token &tSeparator = lexer.get(false);
if (tSeparator.hasKind(Token::closeBrace))
break;
if (!tSeparator.hasKind(Token::comma))
syntaxError("',' expected");
}
return new(arena) PairListExprNode(initialPos, ExprNode::objectLiteral, elements.first);
}
// Parse and return a PrimaryExpression.
// If the first token was peeked, it should be have been done with preferRegExp set to true.
JS::ExprNode *JS::Parser::parsePrimaryExpression()
{
ExprNode *e;
ExprNode::Kind eKind;
const Token &t = lexer.get(true);
switch (t.getKind()) {
case Token::Null:
eKind = ExprNode::Null;
makeExprNode:
e = new(arena) ExprNode(t.getPos(), eKind);
break;
case Token::True:
eKind = ExprNode::True;
goto makeExprNode;
case Token::False:
eKind = ExprNode::False;
goto makeExprNode;
case Token::This:
eKind = ExprNode::This;
goto makeExprNode;
case Token::Super:
eKind = ExprNode::Super;
goto makeExprNode;
case Token::number:
{
const Token &tUnit = lexer.peek(false);
if (!tUnit.getLineBreak() && (tUnit.hasKind(Token::unit) || tUnit.hasKind(Token::string))) {
lexer.get(false);
e = new(arena) NumUnitExprNode(t.getPos(), ExprNode::numUnit, copyTokenChars(t), t.getValue(), copyTokenChars(tUnit));
} else
e = new(arena) NumberExprNode(t.getPos(), t.getValue());
}
break;
case Token::string:
e = new(arena) StringExprNode(t.getPos(), ExprNode::string, copyTokenChars(t));
break;
case Token::regExp:
e = new(arena) RegExpExprNode(t.getPos(), ExprNode::regExp, t.getIdentifier(), copyTokenChars(t));
break;
case CASE_TOKEN_NONRESERVED:
e = parseQualifiedIdentifier(t);
break;
case Token::openParenthesis:
{
bool foundQualifiers;
e = parseParenthesesAndIdentifierQualifiers(t, foundQualifiers);
if (!foundQualifiers) {
const Token &tUnit = lexer.peek(false);
if (!tUnit.getLineBreak() && tUnit.hasKind(Token::string)) {
lexer.get(false);
e = new(arena) ExprUnitExprNode(t.getPos(), ExprNode::exprUnit, e, copyTokenChars(tUnit));
}
}
}
break;
case Token::openBracket:
e = parseArrayLiteral(t);
break;
case Token::openBrace:
e = parseObjectLiteral(t);
break;
case Token::Function:
syntaxError("***** functions not implemented yet *****");
e = 0; // Unreachable code here just to shut up compiler warnings
break;
default:
syntaxError("Expression expected");
e = 0; // Unreachable code here just to shut up compiler warnings
}
return e;
}
// Parse a . or @ followed by a QualifiedIdentifier or ParenthesizedExpression and return
// the resulting BinaryExprNode. Use kind if a QualifiedIdentifier was found or parenKind
// if a ParenthesizedExpression was found.
// tOperator is the . or @ token. target is the first operand.
JS::BinaryExprNode *JS::Parser::parseMember(ExprNode *target, const Token &tOperator, ExprNode::Kind kind, ExprNode::Kind parenKind)
{
uint32 pos = tOperator.getPos();
ExprNode *member;
const Token &t2 = lexer.get(true);
if (t2.hasKind(Token::openParenthesis)) {
bool foundQualifiers;
member = parseParenthesesAndIdentifierQualifiers(t2, foundQualifiers);
if (!foundQualifiers)
kind = parenKind;
} else
member = parseQualifiedIdentifier(t2);
return new(arena) BinaryExprNode(pos, kind, target, member);
}
// Parse an ArgumentsList followed by a closing parenthesis or bracket and return
// the resulting InvokeExprNode. The target function, indexed object, or created class
// is supplied. The opening parenthesis or bracket has already been read.
// pos is the position to use for the InvokeExprNode.
JS::InvokeExprNode *JS::Parser::parseInvoke(ExprNode *target, uint32 pos, Token::Kind closingTokenKind, ExprNode::Kind invokeKind)
{
NodeQueue<ExprPairList> arguments;
bool hasNamedArgument = false;
if (!lexer.eat(true, closingTokenKind))
while (true) {
ExprNode *field = 0;
ExprNode *value = parseAssignmentExpression(false);
if (lexer.eat(false, Token::colon)) {
field = value;
if (!ExprNode::isFieldKind(field->getKind()))
syntaxError("Argument name must be an identifier, string, or number");
hasNamedArgument = true;
value = parseAssignmentExpression(false);
} else if (hasNamedArgument)
syntaxError("Unnamed argument cannot follow named argument", 0);
arguments += new(arena) ExprPairList(field, value);
const Token &tSeparator = lexer.get(false);
if (tSeparator.hasKind(closingTokenKind))
break;
if (!tSeparator.hasKind(Token::comma))
syntaxError("',' expected");
}
return new(arena) InvokeExprNode(pos, invokeKind, target, arguments.first);
}
// Parse and return a PostfixExpression.
// If the first token was peeked, it should be have been done with preferRegExp set to true.
// If newExpression is true, this expression is immediately preceded by 'new', so don't allow
// call, postincrement, or postdecrement operators on it.
JS::ExprNode *JS::Parser::parsePostfixExpression(bool newExpression)
{
ExprNode *e;
const Token *tNew = lexer.eat(true, Token::New);
if (tNew) {
checkStackSize();
uint32 posNew = tNew->getPos();
e = parsePostfixExpression(true);
if (lexer.eat(false, Token::openParenthesis))
e = parseInvoke(e, posNew, Token::closeParenthesis, ExprNode::New);
else
e = new(arena) InvokeExprNode(posNew, ExprNode::New, e, 0);
} else
e = parsePrimaryExpression();
while (true) {
ExprNode::Kind eKind;
const Token &t = lexer.get(false);
switch (t.getKind()) {
case Token::openParenthesis:
if (newExpression)
goto other;
e = parseInvoke(e, t.getPos(), Token::closeParenthesis, ExprNode::call);
break;
case Token::openBracket:
e = parseInvoke(e, t.getPos(), Token::closeBracket, ExprNode::index);
break;
case Token::dot:
e = parseMember(e, t, ExprNode::dot, ExprNode::dotParen);
break;
case Token::at:
e = parseMember(e, t, ExprNode::at, ExprNode::at);
break;
case Token::increment:
eKind = ExprNode::postIncrement;
incDec:
if (newExpression)
goto other;
e = new(arena) UnaryExprNode(t.getPos(), eKind, e);
break;
case Token::decrement:
eKind = ExprNode::postDecrement;
goto incDec;
default:
other:
lexer.unget();
return e;
}
}
}
// Parse and return a NonAssignmentExpression.
// If the first token was peeked, it should be have been done with preferRegExp set to true.
JS::ExprNode *JS::Parser::parseNonAssignmentExpression(bool noIn)
{
checkStackSize();
syntaxError("***** parseNonAssignmentExpression not implemented yet *****");
return 0;
}
// Parse and return an AssignmentExpression.
// If the first token was peeked, it should be have been done with preferRegExp set to true.
JS::ExprNode *JS::Parser::parseAssignmentExpression(bool noIn)
{
checkStackSize();
syntaxError("***** parseAssignmentExpression not implemented yet *****");
return 0;
}
// Parse and return an Expression.
// If the first token was peeked, it should be have been done with preferRegExp set to true.
JS::ExprNode *JS::Parser::parseExpression(bool noIn)
{
checkStackSize();
syntaxError("***** parseExpression not implemented yet *****");
return 0;
}