mirror of
https://github.com/mozilla/gecko-dev.git
synced 2024-11-06 00:55:37 +00:00
1127 lines
29 KiB
C++
1127 lines
29 KiB
C++
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
|
/* This Source Code Form is subject to the terms of the Mozilla Public
|
|
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
|
|
|
|
|
/* tokenization of CSS style sheets */
|
|
|
|
#include <math.h> // must be first due to symbol conflicts
|
|
|
|
#include "nsCSSScanner.h"
|
|
#include "nsStyleUtil.h"
|
|
#include "mozilla/css/ErrorReporter.h"
|
|
#include "mozilla/Likely.h"
|
|
#include "mozilla/Util.h"
|
|
|
|
using mozilla::ArrayLength;
|
|
|
|
static const uint8_t IS_HEX_DIGIT = 0x01;
|
|
static const uint8_t START_IDENT = 0x02;
|
|
static const uint8_t IS_IDENT = 0x04;
|
|
static const uint8_t IS_WHITESPACE = 0x08;
|
|
static const uint8_t IS_URL_CHAR = 0x10;
|
|
|
|
#define W IS_WHITESPACE
|
|
#define I IS_IDENT
|
|
#define U IS_URL_CHAR
|
|
#define S START_IDENT
|
|
#define UI IS_IDENT |IS_URL_CHAR
|
|
#define USI IS_IDENT|START_IDENT |IS_URL_CHAR
|
|
#define UXI IS_IDENT |IS_HEX_DIGIT|IS_URL_CHAR
|
|
#define UXSI IS_IDENT|START_IDENT|IS_HEX_DIGIT|IS_URL_CHAR
|
|
|
|
static const uint8_t gLexTable[] = {
|
|
// TAB LF FF CR
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, W, W, 0, W, W, 0, 0,
|
|
//
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
// SPC ! " # $ % & ' ( ) * + , - . /
|
|
W, U, 0, U, U, U, U, 0, 0, 0, U, U, U, UI, U, U,
|
|
// 0 1 2 3 4 5 6 7 8 9 : ; < = > ?
|
|
UXI,UXI,UXI,UXI,UXI,UXI,UXI,UXI,UXI,UXI,U, U, U, U, U, U,
|
|
// @ A B C D E F G H I J K L M N O
|
|
U,UXSI,UXSI,UXSI,UXSI,UXSI,UXSI,USI,USI,USI,USI,USI,USI,USI,USI,USI,
|
|
// P Q R S T U V W X Y Z [ \ ] ^ _
|
|
USI,USI,USI,USI,USI,USI,USI,USI,USI,USI,USI,U, S, U, U, USI,
|
|
// ` a b c d e f g h i j k l m n o
|
|
U,UXSI,UXSI,UXSI,UXSI,UXSI,UXSI,USI,USI,USI,USI,USI,USI,USI,USI,USI,
|
|
// p q r s t u v w x y z { | } ~
|
|
USI,USI,USI,USI,USI,USI,USI,USI,USI,USI,USI,U, U, U, U, 0,
|
|
// U+008*
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
// U+009*
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
// U+00A*
|
|
USI,USI,USI,USI,USI,USI,USI,USI,USI,USI,USI,USI,USI,USI,USI,USI,
|
|
// U+00B*
|
|
USI,USI,USI,USI,USI,USI,USI,USI,USI,USI,USI,USI,USI,USI,USI,USI,
|
|
// U+00C*
|
|
USI,USI,USI,USI,USI,USI,USI,USI,USI,USI,USI,USI,USI,USI,USI,USI,
|
|
// U+00D*
|
|
USI,USI,USI,USI,USI,USI,USI,USI,USI,USI,USI,USI,USI,USI,USI,USI,
|
|
// U+00E*
|
|
USI,USI,USI,USI,USI,USI,USI,USI,USI,USI,USI,USI,USI,USI,USI,USI,
|
|
// U+00F*
|
|
USI,USI,USI,USI,USI,USI,USI,USI,USI,USI,USI,USI,USI,USI,USI,USI
|
|
};
|
|
|
|
MOZ_STATIC_ASSERT(NS_ARRAY_LENGTH(gLexTable) == 256,
|
|
"gLexTable expected to cover all 2^8 possible PRUint8s");
|
|
|
|
#undef W
|
|
#undef S
|
|
#undef I
|
|
#undef U
|
|
#undef UI
|
|
#undef USI
|
|
#undef UXI
|
|
#undef UXSI
|
|
|
|
static inline bool
|
|
IsIdentStart(int32_t aChar)
|
|
{
|
|
return aChar >= 0 &&
|
|
(aChar >= 256 || (gLexTable[aChar] & START_IDENT) != 0);
|
|
}
|
|
|
|
static inline bool
|
|
StartsIdent(int32_t aFirstChar, int32_t aSecondChar)
|
|
{
|
|
return IsIdentStart(aFirstChar) ||
|
|
(aFirstChar == '-' && IsIdentStart(aSecondChar));
|
|
}
|
|
|
|
static inline bool
|
|
IsWhitespace(int32_t ch) {
|
|
return uint32_t(ch) < 256 && (gLexTable[ch] & IS_WHITESPACE) != 0;
|
|
}
|
|
|
|
static inline bool
|
|
IsDigit(int32_t ch) {
|
|
return (ch >= '0') && (ch <= '9');
|
|
}
|
|
|
|
static inline bool
|
|
IsHexDigit(int32_t ch) {
|
|
return uint32_t(ch) < 256 && (gLexTable[ch] & IS_HEX_DIGIT) != 0;
|
|
}
|
|
|
|
static inline bool
|
|
IsIdent(int32_t ch) {
|
|
return ch >= 0 && (ch >= 256 || (gLexTable[ch] & IS_IDENT) != 0);
|
|
}
|
|
|
|
static inline bool
|
|
IsURLChar(int32_t ch) {
|
|
return ch >= 0 && (ch >= 256 || (gLexTable[ch] & IS_URL_CHAR) != 0);
|
|
}
|
|
|
|
static inline uint32_t
|
|
DecimalDigitValue(int32_t ch)
|
|
{
|
|
return ch - '0';
|
|
}
|
|
|
|
static inline uint32_t
|
|
HexDigitValue(int32_t ch)
|
|
{
|
|
if (IsDigit(ch)) {
|
|
return DecimalDigitValue(ch);
|
|
} else {
|
|
// Note: c&7 just keeps the low three bits which causes
|
|
// upper and lower case alphabetics to both yield their
|
|
// "relative to 10" value for computing the hex value.
|
|
return (ch & 0x7) + 9;
|
|
}
|
|
}
|
|
|
|
nsCSSToken::nsCSSToken()
|
|
{
|
|
mType = eCSSToken_Symbol;
|
|
}
|
|
|
|
void
|
|
nsCSSToken::AppendToString(nsString& aBuffer) const
|
|
{
|
|
switch (mType) {
|
|
case eCSSToken_Ident:
|
|
nsStyleUtil::AppendEscapedCSSIdent(mIdent, aBuffer);
|
|
break;
|
|
|
|
case eCSSToken_AtKeyword:
|
|
aBuffer.Append('@');
|
|
nsStyleUtil::AppendEscapedCSSIdent(mIdent, aBuffer);
|
|
break;
|
|
|
|
case eCSSToken_ID:
|
|
case eCSSToken_Ref:
|
|
aBuffer.Append('#');
|
|
nsStyleUtil::AppendEscapedCSSIdent(mIdent, aBuffer);
|
|
break;
|
|
|
|
case eCSSToken_Function:
|
|
nsStyleUtil::AppendEscapedCSSIdent(mIdent, aBuffer);
|
|
aBuffer.Append('(');
|
|
break;
|
|
|
|
case eCSSToken_URL:
|
|
case eCSSToken_Bad_URL:
|
|
aBuffer.AppendLiteral("url(");
|
|
if (mSymbol != PRUnichar(0)) {
|
|
nsStyleUtil::AppendEscapedCSSString(mIdent, aBuffer, mSymbol);
|
|
} else {
|
|
aBuffer.Append(mIdent);
|
|
}
|
|
if (mType == eCSSToken_URL) {
|
|
aBuffer.Append(PRUnichar(')'));
|
|
}
|
|
break;
|
|
|
|
case eCSSToken_Number:
|
|
if (mIntegerValid) {
|
|
aBuffer.AppendInt(mInteger, 10);
|
|
} else {
|
|
aBuffer.AppendFloat(mNumber);
|
|
}
|
|
break;
|
|
|
|
case eCSSToken_Percentage:
|
|
NS_ASSERTION(!mIntegerValid, "How did a percentage token get this set?");
|
|
aBuffer.AppendFloat(mNumber * 100.0f);
|
|
aBuffer.Append(PRUnichar('%'));
|
|
break;
|
|
|
|
case eCSSToken_Dimension:
|
|
if (mIntegerValid) {
|
|
aBuffer.AppendInt(mInteger, 10);
|
|
} else {
|
|
aBuffer.AppendFloat(mNumber);
|
|
}
|
|
nsStyleUtil::AppendEscapedCSSIdent(mIdent, aBuffer);
|
|
break;
|
|
|
|
case eCSSToken_Bad_String:
|
|
nsStyleUtil::AppendEscapedCSSString(mIdent, aBuffer, mSymbol);
|
|
// remove the trailing quote character
|
|
aBuffer.Truncate(aBuffer.Length() - 1);
|
|
break;
|
|
|
|
case eCSSToken_String:
|
|
nsStyleUtil::AppendEscapedCSSString(mIdent, aBuffer, mSymbol);
|
|
break;
|
|
|
|
case eCSSToken_Symbol:
|
|
aBuffer.Append(mSymbol);
|
|
break;
|
|
|
|
case eCSSToken_WhiteSpace:
|
|
aBuffer.Append(' ');
|
|
break;
|
|
|
|
case eCSSToken_HTMLComment:
|
|
case eCSSToken_URange:
|
|
aBuffer.Append(mIdent);
|
|
break;
|
|
|
|
case eCSSToken_Includes:
|
|
aBuffer.AppendLiteral("~=");
|
|
break;
|
|
case eCSSToken_Dashmatch:
|
|
aBuffer.AppendLiteral("|=");
|
|
break;
|
|
case eCSSToken_Beginsmatch:
|
|
aBuffer.AppendLiteral("^=");
|
|
break;
|
|
case eCSSToken_Endsmatch:
|
|
aBuffer.AppendLiteral("$=");
|
|
break;
|
|
case eCSSToken_Containsmatch:
|
|
aBuffer.AppendLiteral("*=");
|
|
break;
|
|
|
|
default:
|
|
NS_ERROR("invalid token type");
|
|
break;
|
|
}
|
|
}
|
|
|
|
nsCSSScanner::nsCSSScanner(const nsAString& aBuffer, uint32_t aLineNumber)
|
|
: mReadPointer(aBuffer.BeginReading())
|
|
, mOffset(0)
|
|
, mCount(aBuffer.Length())
|
|
, mPushback(mLocalPushback)
|
|
, mPushbackCount(0)
|
|
, mPushbackSize(ArrayLength(mLocalPushback))
|
|
, mLineNumber(aLineNumber)
|
|
, mLineOffset(0)
|
|
, mRecordStartOffset(0)
|
|
, mReporter(nullptr)
|
|
, mSVGMode(false)
|
|
, mRecording(false)
|
|
{
|
|
MOZ_COUNT_CTOR(nsCSSScanner);
|
|
}
|
|
|
|
nsCSSScanner::~nsCSSScanner()
|
|
{
|
|
MOZ_COUNT_DTOR(nsCSSScanner);
|
|
if (mLocalPushback != mPushback) {
|
|
delete [] mPushback;
|
|
}
|
|
}
|
|
|
|
// Returns -1 on error or eof
|
|
int32_t
|
|
nsCSSScanner::Read()
|
|
{
|
|
int32_t rv;
|
|
if (0 < mPushbackCount) {
|
|
rv = int32_t(mPushback[--mPushbackCount]);
|
|
} else {
|
|
if (mOffset == mCount) {
|
|
return -1;
|
|
}
|
|
rv = int32_t(mReadPointer[mOffset++]);
|
|
// There are four types of newlines in CSS: "\r", "\n", "\r\n", and "\f".
|
|
// To simplify dealing with newlines, they are all normalized to "\n" here
|
|
if (rv == '\r') {
|
|
if (mOffset < mCount && mReadPointer[mOffset] == '\n') {
|
|
mOffset++;
|
|
}
|
|
rv = '\n';
|
|
} else if (rv == '\f') {
|
|
rv = '\n';
|
|
}
|
|
if (rv == '\n') {
|
|
// 0 is a magical line number meaning that we don't know (i.e., script)
|
|
if (mLineNumber != 0)
|
|
++mLineNumber;
|
|
mLineOffset = 0;
|
|
}
|
|
}
|
|
return rv;
|
|
}
|
|
|
|
int32_t
|
|
nsCSSScanner::Peek()
|
|
{
|
|
if (0 == mPushbackCount) {
|
|
int32_t ch = Read();
|
|
if (ch < 0) {
|
|
return -1;
|
|
}
|
|
mPushback[0] = PRUnichar(ch);
|
|
mPushbackCount++;
|
|
}
|
|
return int32_t(mPushback[mPushbackCount - 1]);
|
|
}
|
|
|
|
void
|
|
nsCSSScanner::Pushback(PRUnichar aChar)
|
|
{
|
|
if (mPushbackCount == mPushbackSize) { // grow buffer
|
|
PRUnichar* newPushback = new PRUnichar[mPushbackSize + 4];
|
|
if (nullptr == newPushback) {
|
|
return;
|
|
}
|
|
mPushbackSize += 4;
|
|
memcpy(newPushback, mPushback, sizeof(PRUnichar) * mPushbackCount);
|
|
if (mPushback != mLocalPushback) {
|
|
delete [] mPushback;
|
|
}
|
|
mPushback = newPushback;
|
|
}
|
|
mPushback[mPushbackCount++] = aChar;
|
|
}
|
|
|
|
void
|
|
nsCSSScanner::StartRecording()
|
|
{
|
|
NS_ASSERTION(!mRecording, "already started recording");
|
|
mRecording = true;
|
|
mRecordStartOffset = mOffset - mPushbackCount;
|
|
}
|
|
|
|
void
|
|
nsCSSScanner::StopRecording()
|
|
{
|
|
NS_ASSERTION(mRecording, "haven't started recording");
|
|
mRecording = false;
|
|
}
|
|
|
|
void
|
|
nsCSSScanner::StopRecording(nsString& aBuffer)
|
|
{
|
|
NS_ASSERTION(mRecording, "haven't started recording");
|
|
mRecording = false;
|
|
aBuffer.Append(mReadPointer + mRecordStartOffset,
|
|
mOffset - mPushbackCount - mRecordStartOffset);
|
|
}
|
|
|
|
bool
|
|
nsCSSScanner::LookAhead(PRUnichar aChar)
|
|
{
|
|
int32_t ch = Read();
|
|
if (ch < 0) {
|
|
return false;
|
|
}
|
|
if (ch == aChar) {
|
|
return true;
|
|
}
|
|
Pushback(ch);
|
|
return false;
|
|
}
|
|
|
|
bool
|
|
nsCSSScanner::LookAheadOrEOF(PRUnichar aChar)
|
|
{
|
|
int32_t ch = Read();
|
|
if (ch < 0) {
|
|
return true;
|
|
}
|
|
if (ch == aChar) {
|
|
return true;
|
|
}
|
|
Pushback(ch);
|
|
return false;
|
|
}
|
|
|
|
void
|
|
nsCSSScanner::EatWhiteSpace()
|
|
{
|
|
for (;;) {
|
|
int32_t ch = Read();
|
|
if (ch < 0) {
|
|
break;
|
|
}
|
|
if ((ch != ' ') && (ch != '\n') && (ch != '\t')) {
|
|
Pushback(ch);
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
bool
|
|
nsCSSScanner::Next(nsCSSToken& aToken)
|
|
{
|
|
for (;;) { // Infinite loop so we can restart after comments.
|
|
int32_t ch = Read();
|
|
if (ch < 0) {
|
|
return false;
|
|
}
|
|
|
|
// UNICODE-RANGE
|
|
if ((ch == 'u' || ch == 'U') && Peek() == '+')
|
|
return ParseURange(ch, aToken);
|
|
|
|
// IDENT
|
|
if (StartsIdent(ch, Peek()))
|
|
return ParseIdent(ch, aToken);
|
|
|
|
// AT_KEYWORD
|
|
if (ch == '@') {
|
|
return ParseAtKeyword(aToken);
|
|
}
|
|
|
|
// NUMBER or DIM
|
|
if ((ch == '.') || (ch == '+') || (ch == '-')) {
|
|
int32_t nextChar = Peek();
|
|
if (IsDigit(nextChar)) {
|
|
return ParseNumber(ch, aToken);
|
|
}
|
|
else if (('.' == nextChar) && ('.' != ch)) {
|
|
nextChar = Read();
|
|
int32_t followingChar = Peek();
|
|
Pushback(nextChar);
|
|
if (IsDigit(followingChar))
|
|
return ParseNumber(ch, aToken);
|
|
}
|
|
}
|
|
if (IsDigit(ch)) {
|
|
return ParseNumber(ch, aToken);
|
|
}
|
|
|
|
// ID
|
|
if (ch == '#') {
|
|
return ParseRef(ch, aToken);
|
|
}
|
|
|
|
// STRING
|
|
if ((ch == '"') || (ch == '\'')) {
|
|
return ParseString(ch, aToken);
|
|
}
|
|
|
|
// WS
|
|
if (IsWhitespace(ch)) {
|
|
aToken.mType = eCSSToken_WhiteSpace;
|
|
aToken.mIdent.Assign(PRUnichar(ch));
|
|
EatWhiteSpace();
|
|
return true;
|
|
}
|
|
if (ch == '/' && !IsSVGMode()) {
|
|
int32_t nextChar = Peek();
|
|
if (nextChar == '*') {
|
|
Read();
|
|
// FIXME: Editor wants comments to be preserved (bug 60290).
|
|
if (!SkipCComment()) {
|
|
return false;
|
|
}
|
|
continue; // start again at the beginning
|
|
}
|
|
}
|
|
if (ch == '<') { // consume HTML comment tags
|
|
if (LookAhead('!')) {
|
|
if (LookAhead('-')) {
|
|
if (LookAhead('-')) {
|
|
aToken.mType = eCSSToken_HTMLComment;
|
|
aToken.mIdent.AssignLiteral("<!--");
|
|
return true;
|
|
}
|
|
Pushback('-');
|
|
}
|
|
Pushback('!');
|
|
}
|
|
}
|
|
if (ch == '-') { // check for HTML comment end
|
|
if (LookAhead('-')) {
|
|
if (LookAhead('>')) {
|
|
aToken.mType = eCSSToken_HTMLComment;
|
|
aToken.mIdent.AssignLiteral("-->");
|
|
return true;
|
|
}
|
|
Pushback('-');
|
|
}
|
|
}
|
|
|
|
// INCLUDES ("~=") and DASHMATCH ("|=")
|
|
if (( ch == '|' ) || ( ch == '~' ) || ( ch == '^' ) ||
|
|
( ch == '$' ) || ( ch == '*' )) {
|
|
int32_t nextChar = Read();
|
|
if ( nextChar == '=' ) {
|
|
if (ch == '~') {
|
|
aToken.mType = eCSSToken_Includes;
|
|
}
|
|
else if (ch == '|') {
|
|
aToken.mType = eCSSToken_Dashmatch;
|
|
}
|
|
else if (ch == '^') {
|
|
aToken.mType = eCSSToken_Beginsmatch;
|
|
}
|
|
else if (ch == '$') {
|
|
aToken.mType = eCSSToken_Endsmatch;
|
|
}
|
|
else if (ch == '*') {
|
|
aToken.mType = eCSSToken_Containsmatch;
|
|
}
|
|
return true;
|
|
} else if (nextChar >= 0) {
|
|
Pushback(nextChar);
|
|
}
|
|
}
|
|
aToken.mType = eCSSToken_Symbol;
|
|
aToken.mSymbol = ch;
|
|
return true;
|
|
}
|
|
}
|
|
|
|
bool
|
|
nsCSSScanner::NextURL(nsCSSToken& aToken)
|
|
{
|
|
EatWhiteSpace();
|
|
|
|
int32_t ch = Read();
|
|
if (ch < 0) {
|
|
return false;
|
|
}
|
|
|
|
// STRING
|
|
if ((ch == '"') || (ch == '\'')) {
|
|
#ifdef DEBUG
|
|
bool ok =
|
|
#endif
|
|
ParseString(ch, aToken);
|
|
NS_ABORT_IF_FALSE(ok, "ParseString should never fail, "
|
|
"since there's always something read");
|
|
|
|
NS_ABORT_IF_FALSE(aToken.mType == eCSSToken_String ||
|
|
aToken.mType == eCSSToken_Bad_String,
|
|
"unexpected token type");
|
|
if (MOZ_LIKELY(aToken.mType == eCSSToken_String)) {
|
|
EatWhiteSpace();
|
|
if (LookAheadOrEOF(')')) {
|
|
aToken.mType = eCSSToken_URL;
|
|
} else {
|
|
aToken.mType = eCSSToken_Bad_URL;
|
|
}
|
|
} else {
|
|
aToken.mType = eCSSToken_Bad_URL;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
// Process a url lexical token. A CSS1 url token can contain
|
|
// characters beyond identifier characters (e.g. '/', ':', etc.)
|
|
// Because of this the normal rules for tokenizing the input don't
|
|
// apply very well. To simplify the parser and relax some of the
|
|
// requirements on the scanner we parse url's here. If we find a
|
|
// malformed URL then we emit a token of type "Bad_URL" so that
|
|
// the CSS1 parser can ignore the invalid input. The parser must
|
|
// treat a Bad_URL token like a Function token, and process
|
|
// tokens until a matching parenthesis.
|
|
|
|
aToken.mType = eCSSToken_Bad_URL;
|
|
aToken.mSymbol = PRUnichar(0);
|
|
nsString& ident = aToken.mIdent;
|
|
ident.SetLength(0);
|
|
|
|
// start of a non-quoted url (which may be empty)
|
|
bool ok = true;
|
|
for (;;) {
|
|
if (IsURLChar(ch)) {
|
|
// A regular url character.
|
|
ident.Append(PRUnichar(ch));
|
|
} else if (ch == ')') {
|
|
// All done
|
|
break;
|
|
} else if (IsWhitespace(ch)) {
|
|
// Whitespace is allowed at the end of the URL
|
|
EatWhiteSpace();
|
|
// Consume the close paren if we have it; if not we're an invalid URL.
|
|
ok = LookAheadOrEOF(')');
|
|
break;
|
|
} else if (ch == '\\') {
|
|
if (!ParseAndAppendEscape(ident, false)) {
|
|
ok = false;
|
|
Pushback(ch);
|
|
break;
|
|
}
|
|
} else {
|
|
// This is an invalid URL spec
|
|
ok = false;
|
|
Pushback(ch); // push it back so the parser can match tokens and
|
|
// then closing parenthesis
|
|
break;
|
|
}
|
|
|
|
ch = Read();
|
|
if (ch < 0) {
|
|
break;
|
|
}
|
|
}
|
|
|
|
// If the result of the above scanning is ok then change the token
|
|
// type to a useful one.
|
|
if (ok) {
|
|
aToken.mType = eCSSToken_URL;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
|
|
/**
|
|
* Returns whether an escape was succesfully parsed; if it was not,
|
|
* the backslash needs to be its own symbol token.
|
|
*/
|
|
bool
|
|
nsCSSScanner::ParseAndAppendEscape(nsString& aOutput, bool aInString)
|
|
{
|
|
int32_t ch = Read();
|
|
if (ch < 0) {
|
|
return false;
|
|
}
|
|
if (IsHexDigit(ch)) {
|
|
int32_t rv = 0;
|
|
int i;
|
|
Pushback(ch);
|
|
for (i = 0; i < 6; i++) { // up to six digits
|
|
ch = Read();
|
|
if (ch < 0) {
|
|
// Whoops: error or premature eof
|
|
break;
|
|
}
|
|
if (!IsHexDigit(ch) && !IsWhitespace(ch)) {
|
|
Pushback(ch);
|
|
break;
|
|
} else if (IsHexDigit(ch)) {
|
|
rv = rv * 16 + HexDigitValue(ch);
|
|
} else {
|
|
NS_ASSERTION(IsWhitespace(ch), "bad control flow");
|
|
// single space ends escape
|
|
break;
|
|
}
|
|
}
|
|
if (6 == i) { // look for trailing whitespace and eat it
|
|
ch = Peek();
|
|
if (IsWhitespace(ch)) {
|
|
(void) Read();
|
|
}
|
|
}
|
|
NS_ASSERTION(rv >= 0, "How did rv become negative?");
|
|
// "[at most six hexadecimal digits following a backslash] stand
|
|
// for the ISO 10646 character with that number, which must not be
|
|
// zero. (It is undefined in CSS 2.1 what happens if a style sheet
|
|
// does contain a character with Unicode codepoint zero.)"
|
|
// -- CSS2.1 section 4.1.3
|
|
//
|
|
// Silently deleting \0 opens a content-filtration loophole (see
|
|
// bug 228856), so what we do instead is pretend the "cancels the
|
|
// meaning of special characters" rule applied.
|
|
if (rv > 0) {
|
|
AppendUCS4ToUTF16(ENSURE_VALID_CHAR(rv), aOutput);
|
|
} else {
|
|
while (i--)
|
|
aOutput.Append('0');
|
|
if (IsWhitespace(ch))
|
|
Pushback(ch);
|
|
}
|
|
return true;
|
|
}
|
|
// "Any character except a hexadecimal digit can be escaped to
|
|
// remove its special meaning by putting a backslash in front"
|
|
// -- CSS1 spec section 7.1
|
|
if (ch == '\n') {
|
|
if (!aInString) {
|
|
// Outside of strings (which includes url() that contains a
|
|
// string), escaped newlines aren't special, and just tokenize as
|
|
// eCSSToken_Symbol (DELIM).
|
|
Pushback(ch);
|
|
return false;
|
|
}
|
|
// In strings (and in url() containing a string), escaped newlines
|
|
// are just dropped to allow splitting over multiple lines.
|
|
} else {
|
|
aOutput.Append(ch);
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
/**
|
|
* Gather up the characters in an identifier. The identfier was
|
|
* started by "aChar" which will be appended to aIdent. The result
|
|
* will be aIdent with all of the identifier characters appended
|
|
* until the first non-identifier character is seen. The termination
|
|
* character is unread for the future re-reading.
|
|
*
|
|
* Returns failure when the character sequence does not form an ident at
|
|
* all, in which case the caller is responsible for pushing back or
|
|
* otherwise handling aChar. (This occurs only when aChar is '\'.)
|
|
*/
|
|
bool
|
|
nsCSSScanner::GatherIdent(int32_t aChar, nsString& aIdent)
|
|
{
|
|
if (aChar == '\\') {
|
|
if (!ParseAndAppendEscape(aIdent, false)) {
|
|
return false;
|
|
}
|
|
} else {
|
|
MOZ_ASSERT(aChar > 0);
|
|
aIdent.Append(aChar);
|
|
}
|
|
for (;;) {
|
|
// If nothing in pushback, first try to get as much as possible in one go
|
|
if (!mPushbackCount && mOffset < mCount) {
|
|
// See how much we can consume and append in one go
|
|
uint32_t n = mOffset;
|
|
// Count number of Ident characters that can be processed
|
|
while (n < mCount && IsIdent(mReadPointer[n])) {
|
|
++n;
|
|
}
|
|
// Add to the token what we have so far
|
|
if (n > mOffset) {
|
|
aIdent.Append(&mReadPointer[mOffset], n - mOffset);
|
|
mOffset = n;
|
|
}
|
|
}
|
|
|
|
aChar = Read();
|
|
if (aChar < 0) break;
|
|
if (aChar == '\\') {
|
|
if (!ParseAndAppendEscape(aIdent, false)) {
|
|
Pushback(aChar);
|
|
break;
|
|
}
|
|
} else if (IsIdent(aChar)) {
|
|
aIdent.Append(PRUnichar(aChar));
|
|
} else {
|
|
Pushback(aChar);
|
|
break;
|
|
}
|
|
}
|
|
MOZ_ASSERT(aIdent.Length() > 0);
|
|
return true;
|
|
}
|
|
|
|
bool
|
|
nsCSSScanner::ParseRef(int32_t aChar, nsCSSToken& aToken)
|
|
{
|
|
// Fall back for when we don't have name characters following:
|
|
aToken.mType = eCSSToken_Symbol;
|
|
aToken.mSymbol = aChar;
|
|
|
|
int32_t ch = Read();
|
|
if (ch < 0) {
|
|
return true;
|
|
}
|
|
if (IsIdent(ch) || ch == '\\') {
|
|
// First char after the '#' is a valid ident char (or an escape),
|
|
// so it makes sense to keep going
|
|
nsCSSTokenType type =
|
|
StartsIdent(ch, Peek()) ? eCSSToken_ID : eCSSToken_Ref;
|
|
aToken.mIdent.SetLength(0);
|
|
if (GatherIdent(ch, aToken.mIdent)) {
|
|
aToken.mType = type;
|
|
return true;
|
|
}
|
|
}
|
|
|
|
// No ident chars after the '#'. Just unread |ch| and get out of here.
|
|
Pushback(ch);
|
|
return true;
|
|
}
|
|
|
|
bool
|
|
nsCSSScanner::ParseIdent(int32_t aChar, nsCSSToken& aToken)
|
|
{
|
|
nsString& ident = aToken.mIdent;
|
|
ident.SetLength(0);
|
|
if (!GatherIdent(aChar, ident)) {
|
|
aToken.mType = eCSSToken_Symbol;
|
|
aToken.mSymbol = aChar;
|
|
return true;
|
|
}
|
|
|
|
nsCSSTokenType tokenType = eCSSToken_Ident;
|
|
// look for functions (ie: "ident(")
|
|
if (Peek() == PRUnichar('(')) {
|
|
Read();
|
|
tokenType = eCSSToken_Function;
|
|
|
|
if (ident.LowerCaseEqualsLiteral("url")) {
|
|
NextURL(aToken); // ignore return value, since *we* read something
|
|
return true;
|
|
}
|
|
}
|
|
|
|
aToken.mType = tokenType;
|
|
return true;
|
|
}
|
|
|
|
bool
|
|
nsCSSScanner::ParseAtKeyword(nsCSSToken& aToken)
|
|
{
|
|
int32_t ch = Read();
|
|
if (StartsIdent(ch, Peek())) {
|
|
aToken.mIdent.SetLength(0);
|
|
aToken.mType = eCSSToken_AtKeyword;
|
|
if (GatherIdent(ch, aToken.mIdent)) {
|
|
return true;
|
|
}
|
|
}
|
|
if (ch >= 0) {
|
|
Pushback(ch);
|
|
}
|
|
aToken.mType = eCSSToken_Symbol;
|
|
aToken.mSymbol = PRUnichar('@');
|
|
return true;
|
|
}
|
|
|
|
bool
|
|
nsCSSScanner::ParseNumber(int32_t c, nsCSSToken& aToken)
|
|
{
|
|
NS_PRECONDITION(c == '.' || c == '+' || c == '-' || IsDigit(c),
|
|
"Why did we get called?");
|
|
aToken.mHasSign = (c == '+' || c == '-');
|
|
|
|
// Our sign.
|
|
int32_t sign = c == '-' ? -1 : 1;
|
|
// Absolute value of the integer part of the mantissa. This is a double so
|
|
// we don't run into overflow issues for consumers that only care about our
|
|
// floating-point value while still being able to express the full int32_t
|
|
// range for consumers who want integers.
|
|
double intPart = 0;
|
|
// Fractional part of the mantissa. This is a double so that when we convert
|
|
// to float at the end we'll end up rounding to nearest float instead of
|
|
// truncating down (as we would if fracPart were a float and we just
|
|
// effectively lost the last several digits).
|
|
double fracPart = 0;
|
|
// Absolute value of the power of 10 that we should multiply by (only
|
|
// relevant for numbers in scientific notation). Has to be a signed integer,
|
|
// because multiplication of signed by unsigned converts the unsigned to
|
|
// signed, so if we plan to actually multiply by expSign...
|
|
int32_t exponent = 0;
|
|
// Sign of the exponent.
|
|
int32_t expSign = 1;
|
|
|
|
if (aToken.mHasSign) {
|
|
NS_ASSERTION(c != '.', "How did that happen?");
|
|
c = Read();
|
|
}
|
|
|
|
bool gotDot = (c == '.');
|
|
|
|
if (!gotDot) {
|
|
// Parse the integer part of the mantisssa
|
|
NS_ASSERTION(IsDigit(c), "Why did we get called?");
|
|
do {
|
|
intPart = 10*intPart + DecimalDigitValue(c);
|
|
c = Read();
|
|
// The IsDigit check will do the right thing even if Read() returns < 0
|
|
} while (IsDigit(c));
|
|
|
|
gotDot = (c == '.') && IsDigit(Peek());
|
|
}
|
|
|
|
if (gotDot) {
|
|
// Parse the fractional part of the mantissa.
|
|
c = Read();
|
|
NS_ASSERTION(IsDigit(c), "How did we get here?");
|
|
// Power of ten by which we need to divide our next digit
|
|
float divisor = 10;
|
|
do {
|
|
fracPart += DecimalDigitValue(c) / divisor;
|
|
divisor *= 10;
|
|
c = Read();
|
|
// The IsDigit check will do the right thing even if Read() returns < 0
|
|
} while (IsDigit(c));
|
|
}
|
|
|
|
bool gotE = false;
|
|
if (IsSVGMode() && (c == 'e' || c == 'E')) {
|
|
int32_t nextChar = Peek();
|
|
int32_t expSignChar = 0;
|
|
if (nextChar == '-' || nextChar == '+') {
|
|
expSignChar = Read();
|
|
nextChar = Peek();
|
|
}
|
|
if (IsDigit(nextChar)) {
|
|
gotE = true;
|
|
if (expSignChar == '-') {
|
|
expSign = -1;
|
|
}
|
|
|
|
c = Read();
|
|
NS_ASSERTION(IsDigit(c), "Peek() must have lied");
|
|
do {
|
|
exponent = 10*exponent + DecimalDigitValue(c);
|
|
c = Read();
|
|
// The IsDigit check will do the right thing even if Read() returns < 0
|
|
} while (IsDigit(c));
|
|
} else {
|
|
if (expSignChar) {
|
|
Pushback(expSignChar);
|
|
}
|
|
}
|
|
}
|
|
|
|
nsCSSTokenType type = eCSSToken_Number;
|
|
|
|
// Set mIntegerValid for all cases (except %, below) because we need
|
|
// it for the "2n" in :nth-child(2n).
|
|
aToken.mIntegerValid = false;
|
|
|
|
// Time to reassemble our number.
|
|
float value = float(sign * (intPart + fracPart));
|
|
if (gotE) {
|
|
// pow(), not powf(), because at least wince doesn't have the latter.
|
|
// And explicitly cast everything to doubles to avoid issues with
|
|
// overloaded pow() on Windows.
|
|
value *= pow(10.0, double(expSign * exponent));
|
|
} else if (!gotDot) {
|
|
// Clamp values outside of integer range.
|
|
if (sign > 0) {
|
|
aToken.mInteger = int32_t(NS_MIN(intPart, double(INT32_MAX)));
|
|
} else {
|
|
aToken.mInteger = int32_t(NS_MAX(-intPart, double(INT32_MIN)));
|
|
}
|
|
aToken.mIntegerValid = true;
|
|
}
|
|
|
|
nsString& ident = aToken.mIdent;
|
|
ident.Truncate();
|
|
|
|
// Look at character that terminated the number
|
|
if (c >= 0) {
|
|
if (StartsIdent(c, Peek())) {
|
|
if (GatherIdent(c, ident)) {
|
|
type = eCSSToken_Dimension;
|
|
}
|
|
} else if ('%' == c) {
|
|
type = eCSSToken_Percentage;
|
|
value = value / 100.0f;
|
|
aToken.mIntegerValid = false;
|
|
} else {
|
|
// Put back character that stopped numeric scan
|
|
Pushback(c);
|
|
}
|
|
}
|
|
aToken.mNumber = value;
|
|
aToken.mType = type;
|
|
return true;
|
|
}
|
|
|
|
bool
|
|
nsCSSScanner::SkipCComment()
|
|
{
|
|
for (;;) {
|
|
int32_t ch = Read();
|
|
if (ch < 0) break;
|
|
if (ch == '*') {
|
|
if (LookAhead('/')) {
|
|
return true;
|
|
}
|
|
}
|
|
}
|
|
|
|
mReporter->ReportUnexpectedEOF("PECommentEOF");
|
|
return false;
|
|
}
|
|
|
|
bool
|
|
nsCSSScanner::ParseString(int32_t aStop, nsCSSToken& aToken)
|
|
{
|
|
aToken.mIdent.SetLength(0);
|
|
aToken.mType = eCSSToken_String;
|
|
aToken.mSymbol = PRUnichar(aStop); // remember how it's quoted
|
|
for (;;) {
|
|
// If nothing in pushback, first try to get as much as possible in one go
|
|
if (!mPushbackCount && mOffset < mCount) {
|
|
// See how much we can consume and append in one go
|
|
uint32_t n = mOffset;
|
|
// Count number of characters that can be processed
|
|
for (;n < mCount; ++n) {
|
|
PRUnichar nextChar = mReadPointer[n];
|
|
if ((nextChar == aStop) || (nextChar == '\\') ||
|
|
(nextChar == '\n') || (nextChar == '\r') || (nextChar == '\f')) {
|
|
break;
|
|
}
|
|
}
|
|
// Add to the token what we have so far
|
|
if (n > mOffset) {
|
|
aToken.mIdent.Append(&mReadPointer[mOffset], n - mOffset);
|
|
mOffset = n;
|
|
}
|
|
}
|
|
int32_t ch = Read();
|
|
if (ch < 0 || ch == aStop) {
|
|
break;
|
|
}
|
|
if (ch == '\n') {
|
|
aToken.mType = eCSSToken_Bad_String;
|
|
mReporter->ReportUnexpected("SEUnterminatedString", aToken);
|
|
break;
|
|
}
|
|
if (ch == '\\') {
|
|
if (!ParseAndAppendEscape(aToken.mIdent, true)) {
|
|
aToken.mType = eCSSToken_Bad_String;
|
|
Pushback(ch);
|
|
// For strings, the only case where ParseAndAppendEscape will
|
|
// return false is when there's a backslash to start an escape
|
|
// immediately followed by end-of-stream. In that case, the
|
|
// correct tokenization is badstring *followed* by a DELIM for
|
|
// the backslash, but as far as the author is concerned, it
|
|
// works pretty much the same as an unterminated string, so we
|
|
// use the same error message.
|
|
mReporter->ReportUnexpected("SEUnterminatedString", aToken);
|
|
break;
|
|
}
|
|
} else {
|
|
aToken.mIdent.Append(ch);
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
// UNICODE-RANGE tokens match the regular expression
|
|
//
|
|
// u\+[0-9a-f?]{1,6}(-[0-9a-f]{1,6})?
|
|
//
|
|
// However, some such tokens are "invalid". There are three valid forms:
|
|
//
|
|
// u+[0-9a-f]{x} 1 <= x <= 6
|
|
// u+[0-9a-f]{x}\?{y} 1 <= x+y <= 6
|
|
// u+[0-9a-f]{x}-[0-9a-f]{y} 1 <= x <= 6, 1 <= y <= 6
|
|
//
|
|
// All unicode-range tokens have their text recorded in mIdent; valid ones
|
|
// are also decoded into mInteger and mInteger2, and mIntegerValid is set.
|
|
|
|
bool
|
|
nsCSSScanner::ParseURange(int32_t aChar, nsCSSToken& aResult)
|
|
{
|
|
int32_t intro2 = Read();
|
|
int32_t ch = Peek();
|
|
|
|
// We should only ever be called if these things are true.
|
|
NS_ASSERTION(aChar == 'u' || aChar == 'U',
|
|
"unicode-range called with improper introducer (U)");
|
|
NS_ASSERTION(intro2 == '+',
|
|
"unicode-range called with improper introducer (+)");
|
|
|
|
// If the character immediately after the '+' is not a hex digit or
|
|
// '?', this is not really a unicode-range token; push everything
|
|
// back and scan the U as an ident.
|
|
if (!IsHexDigit(ch) && ch != '?') {
|
|
Pushback(intro2);
|
|
Pushback(aChar);
|
|
return ParseIdent(aChar, aResult);
|
|
}
|
|
|
|
aResult.mIdent.Truncate();
|
|
aResult.mIdent.Append(aChar);
|
|
aResult.mIdent.Append(intro2);
|
|
|
|
bool valid = true;
|
|
bool haveQues = false;
|
|
uint32_t low = 0;
|
|
uint32_t high = 0;
|
|
int i = 0;
|
|
|
|
for (;;) {
|
|
ch = Read();
|
|
i++;
|
|
if (i == 7 || !(IsHexDigit(ch) || ch == '?')) {
|
|
break;
|
|
}
|
|
|
|
aResult.mIdent.Append(ch);
|
|
if (IsHexDigit(ch)) {
|
|
if (haveQues) {
|
|
valid = false; // all question marks should be at the end
|
|
}
|
|
low = low*16 + HexDigitValue(ch);
|
|
high = high*16 + HexDigitValue(ch);
|
|
} else {
|
|
haveQues = true;
|
|
low = low*16 + 0x0;
|
|
high = high*16 + 0xF;
|
|
}
|
|
}
|
|
|
|
if (ch == '-' && IsHexDigit(Peek())) {
|
|
if (haveQues) {
|
|
valid = false;
|
|
}
|
|
|
|
aResult.mIdent.Append(ch);
|
|
high = 0;
|
|
i = 0;
|
|
for (;;) {
|
|
ch = Read();
|
|
i++;
|
|
if (i == 7 || !IsHexDigit(ch)) {
|
|
break;
|
|
}
|
|
aResult.mIdent.Append(ch);
|
|
high = high*16 + HexDigitValue(ch);
|
|
}
|
|
}
|
|
Pushback(ch);
|
|
|
|
aResult.mInteger = low;
|
|
aResult.mInteger2 = high;
|
|
aResult.mIntegerValid = valid;
|
|
aResult.mType = eCSSToken_URange;
|
|
return true;
|
|
}
|