2000-01-10 21:22:43 +00:00
|
|
|
// -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
|
|
|
|
//
|
|
|
|
// The contents of this file are subject to the Netscape Public
|
|
|
|
// License Version 1.1 (the "License"); you may not use this file
|
|
|
|
// except in compliance with the License. You may obtain a copy of
|
|
|
|
// the License at http://www.mozilla.org/NPL/
|
|
|
|
//
|
|
|
|
// Software distributed under the License is distributed on an "AS
|
|
|
|
// IS" basis, WITHOUT WARRANTY OF ANY KIND, either express oqr
|
|
|
|
// implied. See the License for the specific language governing
|
|
|
|
// rights and limitations under the License.
|
|
|
|
//
|
|
|
|
// The Original Code is the JavaScript 2 Prototype.
|
|
|
|
//
|
|
|
|
// The Initial Developer of the Original Code is Netscape
|
|
|
|
// Communications Corporation. Portions created by Netscape are
|
|
|
|
// Copyright (C) 1998 Netscape Communications Corporation. All
|
|
|
|
// Rights Reserved.
|
|
|
|
|
|
|
|
#ifndef parser_h
|
|
|
|
#define parser_h
|
|
|
|
|
|
|
|
#include "utilities.h"
|
|
|
|
|
|
|
|
namespace JavaScript {
|
|
|
|
|
2000-01-25 22:57:32 +00:00
|
|
|
class StringAtom;
|
|
|
|
class World;
|
|
|
|
|
2000-01-10 21:22:43 +00:00
|
|
|
//
|
|
|
|
// Reader
|
|
|
|
//
|
|
|
|
|
|
|
|
// A Reader reads Unicode characters from some source -- either a file or a string.
|
2000-01-25 22:57:32 +00:00
|
|
|
// get() returns all of the characters followed by a char16eof.
|
2000-01-10 21:22:43 +00:00
|
|
|
class Reader {
|
|
|
|
const char16 *begin; // Beginning of current buffer
|
|
|
|
const char16 *p; // Position in current buffer
|
|
|
|
const char16 *end; // End of current buffer
|
2000-01-25 22:57:32 +00:00
|
|
|
const char16 *lineStart; // Pointer to start of current line
|
|
|
|
uint32 nGetsPastEnd; // Number of times char16eof has been returned
|
2000-01-10 21:22:43 +00:00
|
|
|
|
2000-01-25 22:57:32 +00:00
|
|
|
String *recordString; // String, if any, into which recordChar() records characters
|
|
|
|
const char16 *recordBase; // Position of last beginRecording() call
|
|
|
|
const char16 *recordPos; // Position of last recordChar() call; nil if a discrepancy occurred
|
|
|
|
|
2000-01-10 21:22:43 +00:00
|
|
|
protected:
|
|
|
|
Reader(): nGetsPastEnd(0) {}
|
|
|
|
public:
|
|
|
|
Reader(const char16 *begin, const char16 *end);
|
|
|
|
private:
|
|
|
|
Reader(const Reader&); // No copy constructor
|
|
|
|
void operator=(const Reader&); // No assignment operator
|
|
|
|
public:
|
|
|
|
|
2000-01-25 22:57:32 +00:00
|
|
|
char16orEOF get();
|
|
|
|
char16orEOF peek();
|
|
|
|
void unget(uint32 n = 1);
|
2000-01-10 21:22:43 +00:00
|
|
|
|
2000-01-25 22:57:32 +00:00
|
|
|
void beginLine();
|
|
|
|
uint32 charPos() const;
|
|
|
|
void backUpTo(uint32 pos);
|
|
|
|
|
|
|
|
String extract(uint32 begin, uint32 end) const;
|
|
|
|
void beginRecording(String &recordString);
|
|
|
|
void recordChar(char16 ch);
|
|
|
|
String &endRecording();
|
|
|
|
|
|
|
|
virtual String sourceFile() const = 0; // A description of the source code that caused the error
|
|
|
|
|
2000-01-10 21:22:43 +00:00
|
|
|
protected:
|
|
|
|
void setBuffer(const char16 *begin, const char16 *p, const char16 *end);
|
2000-01-25 22:57:32 +00:00
|
|
|
virtual char16orEOF underflow();
|
|
|
|
char16orEOF peekUnderflow();
|
2000-01-10 21:22:43 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
|
2000-01-25 22:57:32 +00:00
|
|
|
// Get and return the next character or char16eof if at end of input.
|
|
|
|
inline char16orEOF Reader::get()
|
2000-01-10 21:22:43 +00:00
|
|
|
{
|
|
|
|
if (p != end)
|
|
|
|
return *p++;
|
|
|
|
return underflow();
|
|
|
|
}
|
|
|
|
|
2000-01-25 22:57:32 +00:00
|
|
|
// Return the next character without consuming it. Return char16eof if at end of input.
|
|
|
|
inline char16orEOF Reader::peek()
|
2000-01-10 21:22:43 +00:00
|
|
|
{
|
|
|
|
if (p != end)
|
|
|
|
return *p;
|
|
|
|
return peekUnderflow();
|
|
|
|
}
|
|
|
|
|
2000-01-25 22:57:32 +00:00
|
|
|
|
|
|
|
// Set the beginning of the current line. unget cannot be subsequently called past this point.
|
|
|
|
inline void Reader::beginLine()
|
|
|
|
{
|
|
|
|
lineStart = p;
|
|
|
|
#ifdef DEBUG
|
|
|
|
recordString = 0;
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
// Return the character offset relative to the current line. This cannot be called
|
|
|
|
// if the current position is past the end of the input.
|
|
|
|
inline uint32 Reader::charPos() const
|
2000-01-10 21:22:43 +00:00
|
|
|
{
|
2000-01-25 22:57:32 +00:00
|
|
|
ASSERT(!nGetsPastEnd);
|
|
|
|
return static_cast<uint32>(p - lineStart);
|
2000-01-10 21:22:43 +00:00
|
|
|
}
|
|
|
|
|
2000-01-25 22:57:32 +00:00
|
|
|
|
|
|
|
// Back up to the given character offset relative to the current line.
|
|
|
|
inline void Reader::backUpTo(uint32 pos)
|
2000-01-10 21:22:43 +00:00
|
|
|
{
|
2000-01-25 22:57:32 +00:00
|
|
|
ASSERT(pos <= charPos());
|
|
|
|
p = lineStart + pos;
|
|
|
|
nGetsPastEnd = 0;
|
2000-01-10 21:22:43 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
inline void Reader::setBuffer(const char16 *begin, const char16 *p, const char16 *end)
|
|
|
|
{
|
|
|
|
ASSERT(begin <= p && p <= end);
|
|
|
|
Reader::begin = begin;
|
|
|
|
Reader::p = p;
|
|
|
|
Reader::end = end;
|
2000-01-25 22:57:32 +00:00
|
|
|
lineStart = begin;
|
|
|
|
#ifdef DEBUG
|
|
|
|
recordString = 0;
|
|
|
|
#endif
|
2000-01-10 21:22:43 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// A Reader that reads from a String.
|
|
|
|
class StringReader: public Reader {
|
|
|
|
const String str;
|
2000-01-25 22:57:32 +00:00
|
|
|
const String source;
|
|
|
|
|
2000-01-10 21:22:43 +00:00
|
|
|
public:
|
2000-01-25 22:57:32 +00:00
|
|
|
StringReader(const String &s, const String &source);
|
|
|
|
String sourceFile() const;
|
2000-01-10 21:22:43 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
//
|
|
|
|
// Lexer
|
|
|
|
//
|
|
|
|
|
|
|
|
class Token {
|
|
|
|
public:
|
|
|
|
enum Kind {
|
|
|
|
End, // End of token stream
|
|
|
|
|
|
|
|
Id, // Non-keyword identifier (may be same as a keyword if it contains an escape code)
|
|
|
|
Num, // Numeral
|
|
|
|
Str, // String or unit after numeral
|
|
|
|
RegExp, // Regular expression
|
|
|
|
|
|
|
|
// Punctuators
|
|
|
|
OpenParenthesis, // (
|
|
|
|
CloseParenthesis, // )
|
|
|
|
OpenBracket, // [
|
|
|
|
CloseBracket, // ]
|
|
|
|
OpenBrace, // {
|
|
|
|
CloseBrace, // }
|
|
|
|
|
|
|
|
Comma, // ,
|
|
|
|
Semicolon, // ;
|
|
|
|
Dot, // .
|
|
|
|
DoubleDot, // ..
|
|
|
|
TripleDot, // ...
|
|
|
|
Arrow, // ->
|
|
|
|
Colon, // :
|
|
|
|
DoubleColon, // ::
|
|
|
|
Pound, // #
|
|
|
|
At, // @
|
|
|
|
|
|
|
|
Increment, // ++
|
|
|
|
Decrement, // --
|
|
|
|
|
|
|
|
Complement, // ~
|
|
|
|
Not, // !
|
|
|
|
|
|
|
|
Times, // *
|
|
|
|
Divide, // /
|
|
|
|
Modulo, // %
|
|
|
|
Plus, // +
|
|
|
|
Minus, // -
|
|
|
|
LeftShift, // <<
|
|
|
|
RightShift, // >>
|
|
|
|
LogicalRightShift, // >>>
|
|
|
|
LogicalAnd, // &&
|
|
|
|
LogicalXor, // ^^
|
|
|
|
LogicalOr, // ||
|
2000-01-25 22:57:32 +00:00
|
|
|
And, // & // These must be at constant offsets from LogicalAnd ... LogicalOr
|
2000-01-10 21:22:43 +00:00
|
|
|
Xor, // ^
|
|
|
|
Or, // |
|
|
|
|
|
|
|
|
Assignment, // =
|
2000-01-25 22:57:32 +00:00
|
|
|
TimesEquals, // *= // These must be at constant offsets from Times ... Or
|
2000-01-10 21:22:43 +00:00
|
|
|
DivideEquals, // /=
|
|
|
|
ModuloEquals, // %=
|
|
|
|
PlusEquals, // +=
|
|
|
|
MinusEquals, // -=
|
|
|
|
LeftShiftEquals, // <<=
|
|
|
|
RightShiftEquals, // >>=
|
|
|
|
LogicalRightShiftEquals, // >>>=
|
|
|
|
LogicalAndEquals, // &&=
|
|
|
|
LogicalXorEquals, // ^^=
|
|
|
|
LogicalOrEquals, // ||=
|
|
|
|
AndEquals, // &=
|
|
|
|
XorEquals, // ^=
|
|
|
|
OrEquals, // |=
|
|
|
|
|
|
|
|
Equal, // ==
|
|
|
|
NotEqual, // !=
|
|
|
|
LessThan, // <
|
|
|
|
LessThanOrEqual, // <=
|
2000-01-25 22:57:32 +00:00
|
|
|
GreaterThan, // > // >, >= must be at constant offsets from <, <=
|
2000-01-10 21:22:43 +00:00
|
|
|
GreaterThanOrEqual, // >=
|
|
|
|
Identical, // ===
|
|
|
|
NotIdentical, // !==
|
|
|
|
|
|
|
|
Question, // ?
|
|
|
|
|
|
|
|
// Reserved words
|
|
|
|
Abstract, // abstract
|
|
|
|
Break, // break
|
|
|
|
Case, // case
|
|
|
|
Catch, // catch
|
|
|
|
Class, // class
|
|
|
|
Const, // const
|
|
|
|
Continue, // continue
|
|
|
|
Debugger, // debugger
|
|
|
|
Default, // default
|
|
|
|
Delete, // delete
|
|
|
|
Do, // do
|
|
|
|
Else, // else
|
|
|
|
Enum, // enum
|
|
|
|
Eval, // eval
|
|
|
|
Export, // export
|
|
|
|
Extends, // extends
|
|
|
|
False, // false
|
|
|
|
Final, // final
|
|
|
|
Finally, // finally
|
|
|
|
For, // for
|
|
|
|
Function, // function
|
|
|
|
Goto, // goto
|
|
|
|
If, // if
|
|
|
|
Implements, // implements
|
|
|
|
Import, // import
|
|
|
|
In, // in
|
|
|
|
Instanceof, // instanceof
|
|
|
|
Native, // native
|
|
|
|
New, // new
|
|
|
|
Null, // null
|
|
|
|
Package, // package
|
|
|
|
Private, // private
|
|
|
|
Protected, // protected
|
|
|
|
Public, // public
|
|
|
|
Return, // return
|
|
|
|
Static, // static
|
|
|
|
Super, // super
|
|
|
|
Switch, // switch
|
|
|
|
Synchronized, // synchronized
|
|
|
|
This, // this
|
|
|
|
Throw, // throw
|
|
|
|
Throws, // throws
|
|
|
|
Transient, // transient
|
|
|
|
True, // true
|
|
|
|
Try, // try
|
|
|
|
Typeof, // typeof
|
|
|
|
Var, // var
|
|
|
|
Volatile, // volatile
|
|
|
|
While, // while
|
|
|
|
With, // with
|
|
|
|
|
|
|
|
// Non-reserved words
|
|
|
|
Box, // box
|
|
|
|
Constructor, // constructor
|
|
|
|
Field, // field
|
|
|
|
Get, // get
|
|
|
|
Language, // language
|
|
|
|
Local, // local
|
|
|
|
Method, // method
|
|
|
|
Override, // override
|
|
|
|
Set, // set
|
|
|
|
Version // version
|
|
|
|
};
|
|
|
|
|
|
|
|
Kind kind; // The token's kind
|
|
|
|
bool lineBreak; // True if line break precedes this token
|
|
|
|
uint32 lineNum; // One-based source line number
|
|
|
|
uint32 charPos; // Zero-based character offset of this token in source line
|
|
|
|
StringAtom *identifier; // The token's characters (identifiers, keywords, and regular expressions only)
|
|
|
|
auto_ptr<String> chars; // The token's characters (strings, numbers, and regular expression flags only)
|
|
|
|
float64 value; // The token's value (numbers only)
|
2000-01-25 22:57:32 +00:00
|
|
|
|
|
|
|
void setChars(const String &s);
|
2000-01-10 21:22:43 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
|
2000-01-25 22:57:32 +00:00
|
|
|
void initKeywords(World &world);
|
|
|
|
|
|
|
|
|
2000-01-10 21:22:43 +00:00
|
|
|
class Lexer {
|
|
|
|
static const int tokenBufferSize = 3; // Token lookahead buffer size
|
|
|
|
public:
|
|
|
|
Reader &reader;
|
2000-01-25 22:57:32 +00:00
|
|
|
World &world;
|
2000-01-10 21:22:43 +00:00
|
|
|
private:
|
|
|
|
Token tokens[tokenBufferSize]; // Circular buffer of recently read or lookahead tokens
|
|
|
|
Token *nextToken; // Address of next Token in the circular buffer to be returned by get()
|
|
|
|
int nTokensFwd; // Net number of Tokens on which unget() has been called; these Tokens are ahead of nextToken
|
|
|
|
#ifdef DEBUG
|
|
|
|
int nTokensBack; // Number of Tokens on which unget() can be called; these Tokens are beind nextToken
|
|
|
|
bool savedPreferRegExp[tokenBufferSize]; // Circular buffer of saved values of preferRegExp to get() calls
|
|
|
|
#endif
|
2000-01-25 22:57:32 +00:00
|
|
|
uint32 lineNum; // Current line number
|
|
|
|
bool lexingUnit; // True if lexing a unit identifier immediately following a number
|
2000-01-10 21:22:43 +00:00
|
|
|
|
|
|
|
public:
|
2000-01-25 22:57:32 +00:00
|
|
|
Lexer(Reader &reader, World &world);
|
2000-01-10 21:22:43 +00:00
|
|
|
|
|
|
|
Token &get(bool preferRegExp);
|
|
|
|
const Token &peek(bool preferRegExp);
|
|
|
|
void unget();
|
|
|
|
|
|
|
|
private:
|
2000-01-25 22:57:32 +00:00
|
|
|
void syntaxError(const char *message, uint backUp = 1);
|
|
|
|
char16orEOF getChar();
|
|
|
|
char16orEOF internalGetChar(char16orEOF ch);
|
|
|
|
char16orEOF peekChar();
|
|
|
|
char16orEOF internalPeekChar(char16orEOF ch);
|
|
|
|
bool testChar(char16 ch);
|
|
|
|
|
|
|
|
char16 lexEscape(bool unicodeOnly);
|
|
|
|
bool lexIdentifier(String &s, bool allowLeadingDigit);
|
|
|
|
bool lexNumeral();
|
|
|
|
String lexString(char16 separator);
|
|
|
|
void lexRegExp();
|
2000-01-10 21:22:43 +00:00
|
|
|
void lexToken(bool preferRegExp);
|
2000-01-25 22:57:32 +00:00
|
|
|
public:
|
2000-01-10 21:22:43 +00:00
|
|
|
};
|
|
|
|
}
|
|
|
|
#endif
|