gecko-dev/js/js2/parser.h
waldemar%netscape.com aef6520d4d Updated for VC6
2000-02-03 08:25:01 +00:00

356 lines
9.8 KiB
C++

// -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
//
// The contents of this file are subject to the Netscape Public
// License Version 1.1 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of
// the License at http://www.mozilla.org/NPL/
//
// Software distributed under the License is distributed on an "AS
// IS" basis, WITHOUT WARRANTY OF ANY KIND, either express oqr
// implied. See the License for the specific language governing
// rights and limitations under the License.
//
// The Original Code is the JavaScript 2 Prototype.
//
// The Initial Developer of the Original Code is Netscape
// Communications Corporation. Portions created by Netscape are
// Copyright (C) 1998 Netscape Communications Corporation. All
// Rights Reserved.
#ifndef parser_h
#define parser_h
#include "utilities.h"
namespace JavaScript {
class StringAtom;
class World;
//
// Reader
//
// A Reader reads Unicode characters from some source -- either a file or a string.
// get() returns all of the characters followed by a char16eof.
class Reader {
const char16 *begin; // Beginning of current buffer
const char16 *p; // Position in current buffer
const char16 *end; // End of current buffer
const char16 *lineStart; // Pointer to start of current line
uint32 nGetsPastEnd; // Number of times char16eof has been returned
String *recordString; // String, if any, into which recordChar() records characters
const char16 *recordBase; // Position of last beginRecording() call
const char16 *recordPos; // Position of last recordChar() call; nil if a discrepancy occurred
protected:
Reader(): nGetsPastEnd(0) {}
public:
Reader(const char16 *begin, const char16 *end);
private:
Reader(const Reader&); // No copy constructor
void operator=(const Reader&); // No assignment operator
public:
char16orEOF get();
char16orEOF peek();
void unget(uint32 n = 1);
void beginLine();
uint32 charPos() const;
void backUpTo(uint32 pos);
String extract(uint32 begin, uint32 end) const;
void beginRecording(String &recordString);
void recordChar(char16 ch);
String &endRecording();
virtual String sourceFile() const = 0; // A description of the source code that caused the error
protected:
void setBuffer(const char16 *begin, const char16 *p, const char16 *end);
virtual char16orEOF underflow();
char16orEOF peekUnderflow();
};
// Get and return the next character or char16eof if at end of input.
inline char16orEOF Reader::get()
{
if (p != end)
return *p++;
return underflow();
}
// Return the next character without consuming it. Return char16eof if at end of input.
inline char16orEOF Reader::peek()
{
if (p != end)
return *p;
return peekUnderflow();
}
// Set the beginning of the current line. unget cannot be subsequently called past this point.
inline void Reader::beginLine()
{
lineStart = p;
#ifdef DEBUG
recordString = 0;
#endif
}
// Return the character offset relative to the current line. This cannot be called
// if the current position is past the end of the input.
inline uint32 Reader::charPos() const
{
ASSERT(!nGetsPastEnd);
return static_cast<uint32>(p - lineStart);
}
// Back up to the given character offset relative to the current line.
inline void Reader::backUpTo(uint32 pos)
{
ASSERT(pos <= charPos());
p = lineStart + pos;
nGetsPastEnd = 0;
}
inline void Reader::setBuffer(const char16 *begin, const char16 *p, const char16 *end)
{
ASSERT(begin <= p && p <= end);
Reader::begin = begin;
Reader::p = p;
Reader::end = end;
lineStart = begin;
#ifdef DEBUG
recordString = 0;
#endif
}
// A Reader that reads from a String.
class StringReader: public Reader {
const String str;
const String source;
public:
StringReader(const String &s, const String &source);
String sourceFile() const;
};
//
// Lexer
//
class Token {
static const char *const kindNames[];
public:
enum Kind { // Keep synchronized with kindNames table
// Special
End, // End of token stream
Id, // Non-keyword identifier (may be same as a keyword if it contains an escape code)
Num, // Numeral
Str, // String
Unit, // Unit after numeral
RegExp, // Regular expression
// Punctuators
OpenParenthesis, // (
CloseParenthesis, // )
OpenBracket, // [
CloseBracket, // ]
OpenBrace, // {
CloseBrace, // }
Comma, // ,
Semicolon, // ;
Dot, // .
DoubleDot, // ..
TripleDot, // ...
Arrow, // ->
Colon, // :
DoubleColon, // ::
Pound, // #
At, // @
Increment, // ++
Decrement, // --
Complement, // ~
Not, // !
Times, // *
Divide, // /
Modulo, // %
Plus, // +
Minus, // -
LeftShift, // <<
RightShift, // >>
LogicalRightShift, // >>>
LogicalAnd, // &&
LogicalXor, // ^^
LogicalOr, // ||
And, // & // These must be at constant offsets from LogicalAnd ... LogicalOr
Xor, // ^
Or, // |
Assignment, // =
TimesEquals, // *= // These must be at constant offsets from Times ... Or
DivideEquals, // /=
ModuloEquals, // %=
PlusEquals, // +=
MinusEquals, // -=
LeftShiftEquals, // <<=
RightShiftEquals, // >>=
LogicalRightShiftEquals, // >>>=
LogicalAndEquals, // &&=
LogicalXorEquals, // ^^=
LogicalOrEquals, // ||=
AndEquals, // &=
XorEquals, // ^=
OrEquals, // |=
Equal, // ==
NotEqual, // !=
LessThan, // <
LessThanOrEqual, // <=
GreaterThan, // > // >, >= must be at constant offsets from <, <=
GreaterThanOrEqual, // >=
Identical, // ===
NotIdentical, // !==
Question, // ?
// Reserved words
Abstract, // abstract
Break, // break
Case, // case
Catch, // catch
Class, // class
Const, // const
Continue, // continue
Debugger, // debugger
Default, // default
Delete, // delete
Do, // do
Else, // else
Enum, // enum
Eval, // eval
Export, // export
Extends, // extends
False, // false
Final, // final
Finally, // finally
For, // for
Function, // function
Goto, // goto
If, // if
Implements, // implements
Import, // import
In, // in
Instanceof, // instanceof
Native, // native
New, // new
Null, // null
Package, // package
Private, // private
Protected, // protected
Public, // public
Return, // return
Static, // static
Super, // super
Switch, // switch
Synchronized, // synchronized
This, // this
Throw, // throw
Throws, // throws
Transient, // transient
True, // true
Try, // try
Typeof, // typeof
Var, // var
Volatile, // volatile
While, // while
With, // with
// Non-reserved words
Box, // box
Constructor, // constructor
Field, // field
Get, // get
Language, // language
Local, // local
Method, // method
Override, // override
Set, // set
Version, // version
KeywordsEnd, // End of range of special identifier tokens
KeywordsBegin = Abstract, // Beginning of range of special identifier tokens
KindsEnd = KeywordsEnd // End of token kinds
};
Kind kind; // The token's kind
bool lineBreak; // True if line break precedes this token
uint32 lineNum; // One-based source line number
uint32 charPos; // Zero-based character offset of this token in source line
StringAtom *identifier; // The token's characters; non-null for identifiers, keywords, and regular expressions only
String chars; // The token's characters; valid for strings, units, numbers, and regular expression flags only
float64 value; // The token's value (numbers only)
static void initKeywords(World &world);
friend String &operator+=(String &s, Kind k) {ASSERT(uint(k) < KindsEnd); return s += kindNames[k];}
friend String &operator+=(String &s, const Token &t) {t.print(s); return s;}
void print(String &dst, bool debug = false) const;
};
class Lexer {
enum {tokenBufferSize = 3}; // Token lookahead buffer size
public:
Reader &reader;
World &world;
private:
Token tokens[tokenBufferSize]; // Circular buffer of recently read or lookahead tokens
Token *nextToken; // Address of next Token in the circular buffer to be returned by get()
int nTokensFwd; // Net number of Tokens on which unget() has been called; these Tokens are ahead of nextToken
#ifdef DEBUG
int nTokensBack; // Number of Tokens on which unget() can be called; these Tokens are beind nextToken
bool savedPreferRegExp[tokenBufferSize]; // Circular buffer of saved values of preferRegExp to get() calls
#endif
uint32 lineNum; // Current line number
bool lexingUnit; // True if lexing a unit identifier immediately following a number
public:
Lexer(Reader &reader, World &world);
Token &get(bool preferRegExp);
const Token &peek(bool preferRegExp);
void unget();
private:
void syntaxError(const char *message, uint backUp = 1);
char16orEOF getChar();
char16orEOF internalGetChar(char16orEOF ch);
char16orEOF peekChar();
char16orEOF internalPeekChar(char16orEOF ch);
bool testChar(char16 ch);
char16 lexEscape(bool unicodeOnly);
bool lexIdentifier(String &s, bool allowLeadingDigit);
bool lexNumeral();
void lexString(String &s, char16 separator);
void lexRegExp();
void lexToken(bool preferRegExp);
public:
};
}
#endif