mirror of
https://github.com/mozilla/gecko-dev.git
synced 2024-12-13 18:27:35 +00:00
63c6b08058
The main change is that once we discover we have a bad-url-token we consume everything up to, but not including, the next ')' character. While we do this we can cross line boundaries and don't bother about matching braces or quotes. We just keep going until we find the ')' or hit EOF.
1429 lines
38 KiB
C++
1429 lines
38 KiB
C++
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
|
/* This Source Code Form is subject to the terms of the Mozilla Public
|
|
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
|
|
|
|
|
/* tokenization of CSS style sheets */
|
|
|
|
#include "nsCSSScanner.h"
|
|
#include "nsStyleUtil.h"
|
|
#include "nsISupportsImpl.h"
|
|
#include "mozilla/ArrayUtils.h"
|
|
#include "mozilla/css/ErrorReporter.h"
|
|
#include "mozilla/Likely.h"
|
|
#include <algorithm>
|
|
|
|
/* Character class tables and related helper functions. */
|
|
|
|
static const uint8_t IS_HEX_DIGIT = 0x01;
|
|
static const uint8_t IS_IDSTART = 0x02;
|
|
static const uint8_t IS_IDCHAR = 0x04;
|
|
static const uint8_t IS_URL_CHAR = 0x08;
|
|
static const uint8_t IS_HSPACE = 0x10;
|
|
static const uint8_t IS_VSPACE = 0x20;
|
|
static const uint8_t IS_SPACE = IS_HSPACE|IS_VSPACE;
|
|
static const uint8_t IS_STRING = 0x40;
|
|
|
|
#define H IS_HSPACE
|
|
#define V IS_VSPACE
|
|
#define I IS_IDCHAR
|
|
#define J IS_IDSTART
|
|
#define U IS_URL_CHAR
|
|
#define S IS_STRING
|
|
#define X IS_HEX_DIGIT
|
|
|
|
#define SH S|H
|
|
#define SU S|U
|
|
#define SUI S|U|I
|
|
#define SUIJ S|U|I|J
|
|
#define SUIX S|U|I|X
|
|
#define SUIJX S|U|I|J|X
|
|
|
|
static const uint8_t gLexTable[] = {
|
|
// 00 01 02 03 04 05 06 07
|
|
0, S, S, S, S, S, S, S,
|
|
// 08 TAB LF 0B FF CR 0E 0F
|
|
S, SH, V, S, V, V, S, S,
|
|
// 10 11 12 13 14 15 16 17
|
|
S, S, S, S, S, S, S, S,
|
|
// 18 19 1A 1B 1C 1D 1E 1F
|
|
S, S, S, S, S, S, S, S,
|
|
//SPC ! " # $ % & '
|
|
SH, SU, 0, SU, SU, SU, SU, 0,
|
|
// ( ) * + , - . /
|
|
S, S, SU, SU, SU, SUI, SU, SU,
|
|
// 0 1 2 3 4 5 6 7
|
|
SUIX, SUIX, SUIX, SUIX, SUIX, SUIX, SUIX, SUIX,
|
|
// 8 9 : ; < = > ?
|
|
SUIX, SUIX, SU, SU, SU, SU, SU, SU,
|
|
// @ A B C D E F G
|
|
SU,SUIJX,SUIJX,SUIJX,SUIJX,SUIJX,SUIJX, SUIJ,
|
|
// H I J K L M N O
|
|
SUIJ, SUIJ, SUIJ, SUIJ, SUIJ, SUIJ, SUIJ, SUIJ,
|
|
// P Q R S T U V W
|
|
SUIJ, SUIJ, SUIJ, SUIJ, SUIJ, SUIJ, SUIJ, SUIJ,
|
|
// X Y Z [ \ ] ^ _
|
|
SUIJ, SUIJ, SUIJ, SU, J, SU, SU, SUIJ,
|
|
// ` a b c d e f g
|
|
SU,SUIJX,SUIJX,SUIJX,SUIJX,SUIJX,SUIJX, SUIJ,
|
|
// h i j k l m n o
|
|
SUIJ, SUIJ, SUIJ, SUIJ, SUIJ, SUIJ, SUIJ, SUIJ,
|
|
// p q r s t u v w
|
|
SUIJ, SUIJ, SUIJ, SUIJ, SUIJ, SUIJ, SUIJ, SUIJ,
|
|
// x y z { | } ~ 7F
|
|
SUIJ, SUIJ, SUIJ, SU, SU, SU, SU, S,
|
|
};
|
|
|
|
static_assert(MOZ_ARRAY_LENGTH(gLexTable) == 128,
|
|
"gLexTable expected to cover all 128 ASCII characters");
|
|
|
|
#undef I
|
|
#undef J
|
|
#undef U
|
|
#undef S
|
|
#undef X
|
|
#undef SH
|
|
#undef SU
|
|
#undef SUI
|
|
#undef SUIJ
|
|
#undef SUIX
|
|
#undef SUIJX
|
|
|
|
/**
|
|
* True if 'ch' is in character class 'cls', which should be one of
|
|
* the constants above or some combination of them. All characters
|
|
* above U+007F are considered to be in 'cls'. EOF is never in 'cls'.
|
|
*/
|
|
static inline bool
|
|
IsOpenCharClass(int32_t ch, uint8_t cls) {
|
|
return ch >= 0 && (ch >= 128 || (gLexTable[ch] & cls) != 0);
|
|
}
|
|
|
|
/**
|
|
* True if 'ch' is in character class 'cls', which should be one of
|
|
* the constants above or some combination of them. No characters
|
|
* above U+007F are considered to be in 'cls'. EOF is never in 'cls'.
|
|
*/
|
|
static inline bool
|
|
IsClosedCharClass(int32_t ch, uint8_t cls) {
|
|
return uint32_t(ch) < 128 && (gLexTable[ch] & cls) != 0;
|
|
}
|
|
|
|
/**
|
|
* True if 'ch' is CSS whitespace, i.e. any of the ASCII characters
|
|
* TAB, LF, FF, CR, or SPC.
|
|
*/
|
|
static inline bool
|
|
IsWhitespace(int32_t ch) {
|
|
return IsClosedCharClass(ch, IS_SPACE);
|
|
}
|
|
|
|
/**
|
|
* True if 'ch' is horizontal whitespace, i.e. TAB or SPC.
|
|
*/
|
|
static inline bool
|
|
IsHorzSpace(int32_t ch) {
|
|
return IsClosedCharClass(ch, IS_HSPACE);
|
|
}
|
|
|
|
/**
|
|
* True if 'ch' is vertical whitespace, i.e. LF, FF, or CR. Vertical
|
|
* whitespace requires special handling when consumed, see AdvanceLine.
|
|
*/
|
|
static inline bool
|
|
IsVertSpace(int32_t ch) {
|
|
return IsClosedCharClass(ch, IS_VSPACE);
|
|
}
|
|
|
|
/**
|
|
* True if 'ch' is a character that can appear in the middle of an identifier.
|
|
* This includes U+0000 since it is handled as U+FFFD, but for purposes of
|
|
* GatherText it should not be included in IsOpenCharClass.
|
|
*/
|
|
static inline bool
|
|
IsIdentChar(int32_t ch) {
|
|
return IsOpenCharClass(ch, IS_IDCHAR) || ch == 0;
|
|
}
|
|
|
|
/**
|
|
* True if 'ch' is a character that by itself begins an identifier.
|
|
* This includes U+0000 since it is handled as U+FFFD, but for purposes of
|
|
* GatherText it should not be included in IsOpenCharClass.
|
|
* (This is a subset of IsIdentChar.)
|
|
*/
|
|
static inline bool
|
|
IsIdentStart(int32_t ch) {
|
|
return IsOpenCharClass(ch, IS_IDSTART) || ch == 0;
|
|
}
|
|
|
|
/**
|
|
* True if the two-character sequence aFirstChar+aSecondChar begins an
|
|
* identifier.
|
|
*/
|
|
static inline bool
|
|
StartsIdent(int32_t aFirstChar, int32_t aSecondChar)
|
|
{
|
|
return IsIdentStart(aFirstChar) ||
|
|
(aFirstChar == '-' && (aSecondChar == '-' || IsIdentStart(aSecondChar)));
|
|
}
|
|
|
|
/**
|
|
* True if 'ch' is a decimal digit.
|
|
*/
|
|
static inline bool
|
|
IsDigit(int32_t ch) {
|
|
return (ch >= '0') && (ch <= '9');
|
|
}
|
|
|
|
/**
|
|
* True if 'ch' is a hexadecimal digit.
|
|
*/
|
|
static inline bool
|
|
IsHexDigit(int32_t ch) {
|
|
return IsClosedCharClass(ch, IS_HEX_DIGIT);
|
|
}
|
|
|
|
/**
|
|
* Assuming that 'ch' is a decimal digit, return its numeric value.
|
|
*/
|
|
static inline uint32_t
|
|
DecimalDigitValue(int32_t ch)
|
|
{
|
|
return ch - '0';
|
|
}
|
|
|
|
/**
|
|
* Assuming that 'ch' is a hexadecimal digit, return its numeric value.
|
|
*/
|
|
static inline uint32_t
|
|
HexDigitValue(int32_t ch)
|
|
{
|
|
if (IsDigit(ch)) {
|
|
return DecimalDigitValue(ch);
|
|
} else {
|
|
// Note: c&7 just keeps the low three bits which causes
|
|
// upper and lower case alphabetics to both yield their
|
|
// "relative to 10" value for computing the hex value.
|
|
return (ch & 0x7) + 9;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* If 'ch' can be the first character of a two-character match operator
|
|
* token, return the token type code for that token, otherwise return
|
|
* eCSSToken_Symbol to indicate that it can't.
|
|
*/
|
|
static inline nsCSSTokenType
|
|
MatchOperatorType(int32_t ch)
|
|
{
|
|
switch (ch) {
|
|
case '~': return eCSSToken_Includes;
|
|
case '|': return eCSSToken_Dashmatch;
|
|
case '^': return eCSSToken_Beginsmatch;
|
|
case '$': return eCSSToken_Endsmatch;
|
|
case '*': return eCSSToken_Containsmatch;
|
|
default: return eCSSToken_Symbol;
|
|
}
|
|
}
|
|
|
|
/* Out-of-line nsCSSToken methods. */
|
|
|
|
/**
|
|
* Append the textual representation of |this| to |aBuffer|.
|
|
*/
|
|
void
|
|
nsCSSToken::AppendToString(nsString& aBuffer) const
|
|
{
|
|
switch (mType) {
|
|
case eCSSToken_Ident:
|
|
nsStyleUtil::AppendEscapedCSSIdent(mIdent, aBuffer);
|
|
break;
|
|
|
|
case eCSSToken_AtKeyword:
|
|
aBuffer.Append('@');
|
|
nsStyleUtil::AppendEscapedCSSIdent(mIdent, aBuffer);
|
|
break;
|
|
|
|
case eCSSToken_ID:
|
|
case eCSSToken_Hash:
|
|
aBuffer.Append('#');
|
|
nsStyleUtil::AppendEscapedCSSIdent(mIdent, aBuffer);
|
|
break;
|
|
|
|
case eCSSToken_Function:
|
|
nsStyleUtil::AppendEscapedCSSIdent(mIdent, aBuffer);
|
|
aBuffer.Append('(');
|
|
break;
|
|
|
|
case eCSSToken_URL:
|
|
case eCSSToken_Bad_URL:
|
|
aBuffer.AppendLiteral("url(");
|
|
if (mSymbol != char16_t(0)) {
|
|
if (mType == eCSSToken_URL) {
|
|
nsStyleUtil::AppendEscapedCSSString(mIdent, aBuffer, mSymbol);
|
|
} else {
|
|
// Only things up to mInteger were part of the string.
|
|
nsStyleUtil::AppendEscapedCSSString(StringHead(mIdent, mInteger),
|
|
aBuffer, mSymbol);
|
|
MOZ_ASSERT(mInteger2 == 0 || mInteger2 == 1);
|
|
if (mInteger2 == 1) {
|
|
// This was a Bad_String; strip off the closing quote.
|
|
aBuffer.Truncate(aBuffer.Length() - 1);
|
|
}
|
|
|
|
// Now append the remaining garbage.
|
|
aBuffer.Append(Substring(mIdent, mInteger));
|
|
}
|
|
} else {
|
|
aBuffer.Append(mIdent);
|
|
}
|
|
aBuffer.Append(char16_t(')'));
|
|
break;
|
|
|
|
case eCSSToken_Number:
|
|
if (mIntegerValid) {
|
|
aBuffer.AppendInt(mInteger, 10);
|
|
} else {
|
|
aBuffer.AppendFloat(mNumber);
|
|
}
|
|
break;
|
|
|
|
case eCSSToken_Percentage:
|
|
aBuffer.AppendFloat(mNumber * 100.0f);
|
|
aBuffer.Append(char16_t('%'));
|
|
break;
|
|
|
|
case eCSSToken_Dimension:
|
|
if (mIntegerValid) {
|
|
aBuffer.AppendInt(mInteger, 10);
|
|
} else {
|
|
aBuffer.AppendFloat(mNumber);
|
|
}
|
|
nsStyleUtil::AppendEscapedCSSIdent(mIdent, aBuffer);
|
|
break;
|
|
|
|
case eCSSToken_Bad_String:
|
|
nsStyleUtil::AppendEscapedCSSString(mIdent, aBuffer, mSymbol);
|
|
// remove the trailing quote character
|
|
aBuffer.Truncate(aBuffer.Length() - 1);
|
|
break;
|
|
|
|
case eCSSToken_String:
|
|
nsStyleUtil::AppendEscapedCSSString(mIdent, aBuffer, mSymbol);
|
|
break;
|
|
|
|
case eCSSToken_Symbol:
|
|
aBuffer.Append(mSymbol);
|
|
break;
|
|
|
|
case eCSSToken_Whitespace:
|
|
aBuffer.Append(' ');
|
|
break;
|
|
|
|
case eCSSToken_HTMLComment:
|
|
case eCSSToken_URange:
|
|
aBuffer.Append(mIdent);
|
|
break;
|
|
|
|
case eCSSToken_Includes:
|
|
aBuffer.AppendLiteral("~=");
|
|
break;
|
|
case eCSSToken_Dashmatch:
|
|
aBuffer.AppendLiteral("|=");
|
|
break;
|
|
case eCSSToken_Beginsmatch:
|
|
aBuffer.AppendLiteral("^=");
|
|
break;
|
|
case eCSSToken_Endsmatch:
|
|
aBuffer.AppendLiteral("$=");
|
|
break;
|
|
case eCSSToken_Containsmatch:
|
|
aBuffer.AppendLiteral("*=");
|
|
break;
|
|
|
|
default:
|
|
NS_ERROR("invalid token type");
|
|
break;
|
|
}
|
|
}
|
|
|
|
/* nsCSSScanner methods. */
|
|
|
|
nsCSSScanner::nsCSSScanner(const nsAString& aBuffer, uint32_t aLineNumber)
|
|
: mBuffer(aBuffer.BeginReading())
|
|
, mOffset(0)
|
|
, mCount(aBuffer.Length())
|
|
, mLineNumber(aLineNumber)
|
|
, mLineOffset(0)
|
|
, mTokenLineNumber(aLineNumber)
|
|
, mTokenLineOffset(0)
|
|
, mTokenOffset(0)
|
|
, mRecordStartOffset(0)
|
|
, mEOFCharacters(eEOFCharacters_None)
|
|
, mReporter(nullptr)
|
|
, mSVGMode(false)
|
|
, mRecording(false)
|
|
, mSeenBadToken(false)
|
|
, mSeenVariableReference(false)
|
|
{
|
|
MOZ_COUNT_CTOR(nsCSSScanner);
|
|
}
|
|
|
|
nsCSSScanner::~nsCSSScanner()
|
|
{
|
|
MOZ_COUNT_DTOR(nsCSSScanner);
|
|
}
|
|
|
|
void
|
|
nsCSSScanner::StartRecording()
|
|
{
|
|
MOZ_ASSERT(!mRecording, "already started recording");
|
|
mRecording = true;
|
|
mRecordStartOffset = mOffset;
|
|
}
|
|
|
|
void
|
|
nsCSSScanner::StopRecording()
|
|
{
|
|
MOZ_ASSERT(mRecording, "haven't started recording");
|
|
mRecording = false;
|
|
}
|
|
|
|
void
|
|
nsCSSScanner::StopRecording(nsString& aBuffer)
|
|
{
|
|
MOZ_ASSERT(mRecording, "haven't started recording");
|
|
mRecording = false;
|
|
aBuffer.Append(mBuffer + mRecordStartOffset,
|
|
mOffset - mRecordStartOffset);
|
|
}
|
|
|
|
uint32_t
|
|
nsCSSScanner::RecordingLength() const
|
|
{
|
|
MOZ_ASSERT(mRecording, "haven't started recording");
|
|
return mOffset - mRecordStartOffset;
|
|
}
|
|
|
|
#ifdef DEBUG
|
|
bool
|
|
nsCSSScanner::IsRecording() const
|
|
{
|
|
return mRecording;
|
|
}
|
|
#endif
|
|
|
|
nsDependentSubstring
|
|
nsCSSScanner::GetCurrentLine() const
|
|
{
|
|
uint32_t end = mTokenOffset;
|
|
while (end < mCount && !IsVertSpace(mBuffer[end])) {
|
|
end++;
|
|
}
|
|
return nsDependentSubstring(mBuffer + mTokenLineOffset,
|
|
mBuffer + end);
|
|
}
|
|
|
|
/**
|
|
* Return the raw UTF-16 code unit at position |mOffset + n| within
|
|
* the read buffer. If that is beyond the end of the buffer, returns
|
|
* -1 to indicate end of input.
|
|
*/
|
|
inline int32_t
|
|
nsCSSScanner::Peek(uint32_t n)
|
|
{
|
|
if (mOffset + n >= mCount) {
|
|
return -1;
|
|
}
|
|
return mBuffer[mOffset + n];
|
|
}
|
|
|
|
/**
|
|
* Advance |mOffset| over |n| code units. Advance(0) is a no-op.
|
|
* If |n| is greater than the distance to end of input, will silently
|
|
* stop at the end. May not be used to advance over a line boundary;
|
|
* AdvanceLine() must be used instead.
|
|
*/
|
|
inline void
|
|
nsCSSScanner::Advance(uint32_t n)
|
|
{
|
|
#ifdef DEBUG
|
|
while (mOffset < mCount && n > 0) {
|
|
MOZ_ASSERT(!IsVertSpace(mBuffer[mOffset]),
|
|
"may not Advance() over a line boundary");
|
|
mOffset++;
|
|
n--;
|
|
}
|
|
#else
|
|
if (mOffset + n >= mCount || mOffset + n < mOffset)
|
|
mOffset = mCount;
|
|
else
|
|
mOffset += n;
|
|
#endif
|
|
}
|
|
|
|
/**
|
|
* Advance |mOffset| over a line boundary.
|
|
*/
|
|
void
|
|
nsCSSScanner::AdvanceLine()
|
|
{
|
|
MOZ_ASSERT(IsVertSpace(mBuffer[mOffset]),
|
|
"may not AdvanceLine() over a horizontal character");
|
|
// Advance over \r\n as a unit.
|
|
if (mBuffer[mOffset] == '\r' && mOffset + 1 < mCount &&
|
|
mBuffer[mOffset+1] == '\n')
|
|
mOffset += 2;
|
|
else
|
|
mOffset += 1;
|
|
// 0 is a magical line number meaning that we don't know (i.e., script)
|
|
if (mLineNumber != 0)
|
|
mLineNumber++;
|
|
mLineOffset = mOffset;
|
|
}
|
|
|
|
/**
|
|
* Back up |mOffset| over |n| code units. Backup(0) is a no-op.
|
|
* If |n| is greater than the distance to beginning of input, will
|
|
* silently stop at the beginning. May not be used to back up over a
|
|
* line boundary.
|
|
*/
|
|
void
|
|
nsCSSScanner::Backup(uint32_t n)
|
|
{
|
|
#ifdef DEBUG
|
|
while (mOffset > 0 && n > 0) {
|
|
MOZ_ASSERT(!IsVertSpace(mBuffer[mOffset-1]),
|
|
"may not Backup() over a line boundary");
|
|
mOffset--;
|
|
n--;
|
|
}
|
|
#else
|
|
if (mOffset < n)
|
|
mOffset = 0;
|
|
else
|
|
mOffset -= n;
|
|
#endif
|
|
}
|
|
|
|
void
|
|
nsCSSScanner::SavePosition(nsCSSScannerPosition& aState)
|
|
{
|
|
aState.mOffset = mOffset;
|
|
aState.mLineNumber = mLineNumber;
|
|
aState.mLineOffset = mLineOffset;
|
|
aState.mTokenLineNumber = mTokenLineNumber;
|
|
aState.mTokenLineOffset = mTokenLineOffset;
|
|
aState.mTokenOffset = mTokenOffset;
|
|
aState.mInitialized = true;
|
|
}
|
|
|
|
void
|
|
nsCSSScanner::RestoreSavedPosition(const nsCSSScannerPosition& aState)
|
|
{
|
|
MOZ_ASSERT(aState.mInitialized, "have not saved state");
|
|
if (aState.mInitialized) {
|
|
mOffset = aState.mOffset;
|
|
mLineNumber = aState.mLineNumber;
|
|
mLineOffset = aState.mLineOffset;
|
|
mTokenLineNumber = aState.mTokenLineNumber;
|
|
mTokenLineOffset = aState.mTokenLineOffset;
|
|
mTokenOffset = aState.mTokenOffset;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Skip over a sequence of whitespace characters (vertical or
|
|
* horizontal) starting at the current read position.
|
|
*/
|
|
void
|
|
nsCSSScanner::SkipWhitespace()
|
|
{
|
|
for (;;) {
|
|
int32_t ch = Peek();
|
|
if (!IsWhitespace(ch)) { // EOF counts as non-whitespace
|
|
break;
|
|
}
|
|
if (IsVertSpace(ch)) {
|
|
AdvanceLine();
|
|
} else {
|
|
Advance();
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Skip over one CSS comment starting at the current read position.
|
|
*/
|
|
void
|
|
nsCSSScanner::SkipComment()
|
|
{
|
|
MOZ_ASSERT(Peek() == '/' && Peek(1) == '*', "should not have been called");
|
|
Advance(2);
|
|
for (;;) {
|
|
int32_t ch = Peek();
|
|
if (ch < 0) {
|
|
if (mReporter)
|
|
mReporter->ReportUnexpectedEOF("PECommentEOF");
|
|
SetEOFCharacters(eEOFCharacters_Asterisk | eEOFCharacters_Slash);
|
|
return;
|
|
}
|
|
if (ch == '*') {
|
|
Advance();
|
|
ch = Peek();
|
|
if (ch < 0) {
|
|
if (mReporter)
|
|
mReporter->ReportUnexpectedEOF("PECommentEOF");
|
|
SetEOFCharacters(eEOFCharacters_Slash);
|
|
return;
|
|
}
|
|
if (ch == '/') {
|
|
Advance();
|
|
return;
|
|
}
|
|
} else if (IsVertSpace(ch)) {
|
|
AdvanceLine();
|
|
} else {
|
|
Advance();
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* If there is a valid escape sequence starting at the current read
|
|
* position, consume it, decode it, append the result to |aOutput|,
|
|
* and return true. Otherwise, consume nothing, leave |aOutput|
|
|
* unmodified, and return false. If |aInString| is true, accept the
|
|
* additional form of escape sequence allowed within string-like tokens.
|
|
*/
|
|
bool
|
|
nsCSSScanner::GatherEscape(nsString& aOutput, bool aInString)
|
|
{
|
|
MOZ_ASSERT(Peek() == '\\', "should not have been called");
|
|
int32_t ch = Peek(1);
|
|
if (ch < 0) {
|
|
// If we are in a string (or a url() containing a string), we want to drop
|
|
// the backslash on the floor. Otherwise, we want to treat it as a U+FFFD
|
|
// character.
|
|
Advance();
|
|
if (aInString) {
|
|
SetEOFCharacters(eEOFCharacters_DropBackslash);
|
|
} else {
|
|
aOutput.Append(UCS2_REPLACEMENT_CHAR);
|
|
SetEOFCharacters(eEOFCharacters_ReplacementChar);
|
|
}
|
|
return true;
|
|
}
|
|
if (IsVertSpace(ch)) {
|
|
if (aInString) {
|
|
// In strings (and in url() containing a string), escaped
|
|
// newlines are completely removed, to allow splitting over
|
|
// multiple lines.
|
|
Advance();
|
|
AdvanceLine();
|
|
return true;
|
|
}
|
|
// Outside of strings, backslash followed by a newline is not an escape.
|
|
return false;
|
|
}
|
|
|
|
if (!IsHexDigit(ch)) {
|
|
// "Any character (except a hexadecimal digit, linefeed, carriage
|
|
// return, or form feed) can be escaped with a backslash to remove
|
|
// its special meaning." -- CSS2.1 section 4.1.3
|
|
Advance(2);
|
|
if (ch == 0) {
|
|
aOutput.Append(UCS2_REPLACEMENT_CHAR);
|
|
} else {
|
|
aOutput.Append(ch);
|
|
}
|
|
return true;
|
|
}
|
|
|
|
// "[at most six hexadecimal digits following a backslash] stand
|
|
// for the ISO 10646 character with that number, which must not be
|
|
// zero. (It is undefined in CSS 2.1 what happens if a style sheet
|
|
// does contain a character with Unicode codepoint zero.)"
|
|
// -- CSS2.1 section 4.1.3
|
|
|
|
// At this point we know we have \ followed by at least one
|
|
// hexadecimal digit, therefore the escape sequence is valid and we
|
|
// can go ahead and consume the backslash.
|
|
Advance();
|
|
uint32_t val = 0;
|
|
int i = 0;
|
|
do {
|
|
val = val * 16 + HexDigitValue(ch);
|
|
i++;
|
|
Advance();
|
|
ch = Peek();
|
|
} while (i < 6 && IsHexDigit(ch));
|
|
|
|
// "Interpret the hex digits as a hexadecimal number. If this number is zero,
|
|
// or is greater than the maximum allowed codepoint, return U+FFFD
|
|
// REPLACEMENT CHARACTER" -- CSS Syntax Level 3
|
|
if (MOZ_UNLIKELY(val == 0)) {
|
|
aOutput.Append(UCS2_REPLACEMENT_CHAR);
|
|
} else {
|
|
AppendUCS4ToUTF16(ENSURE_VALID_CHAR(val), aOutput);
|
|
}
|
|
|
|
// Consume exactly one whitespace character after a
|
|
// hexadecimal escape sequence.
|
|
if (IsVertSpace(ch)) {
|
|
AdvanceLine();
|
|
} else if (IsHorzSpace(ch)) {
|
|
Advance();
|
|
}
|
|
return true;
|
|
}
|
|
|
|
/**
|
|
* Consume a run of "text" beginning with the current read position,
|
|
* consisting of characters in the class |aClass| (which must be a
|
|
* suitable argument to IsOpenCharClass) plus escape sequences.
|
|
* Append the text to |aText|, after decoding escape sequences.
|
|
*
|
|
* Returns true if at least one character was appended to |aText|,
|
|
* false otherwise.
|
|
*/
|
|
bool
|
|
nsCSSScanner::GatherText(uint8_t aClass, nsString& aText)
|
|
{
|
|
// This is all of the character classes currently used with
|
|
// GatherText. If you have a need to use this function with a
|
|
// different class, go ahead and add it.
|
|
MOZ_ASSERT(aClass == IS_STRING ||
|
|
aClass == IS_IDCHAR ||
|
|
aClass == IS_URL_CHAR,
|
|
"possibly-inappropriate character class");
|
|
|
|
uint32_t start = mOffset;
|
|
bool inString = aClass == IS_STRING;
|
|
|
|
for (;;) {
|
|
// Consume runs of unescaped characters in one go.
|
|
uint32_t n = mOffset;
|
|
while (n < mCount && IsOpenCharClass(mBuffer[n], aClass)) {
|
|
n++;
|
|
}
|
|
if (n > mOffset) {
|
|
aText.Append(&mBuffer[mOffset], n - mOffset);
|
|
mOffset = n;
|
|
}
|
|
if (n == mCount) {
|
|
break;
|
|
}
|
|
|
|
int32_t ch = Peek();
|
|
MOZ_ASSERT(!IsOpenCharClass(ch, aClass),
|
|
"should not have exited the inner loop");
|
|
if (ch == 0) {
|
|
Advance();
|
|
aText.Append(UCS2_REPLACEMENT_CHAR);
|
|
continue;
|
|
}
|
|
|
|
if (ch != '\\') {
|
|
break;
|
|
}
|
|
if (!GatherEscape(aText, inString)) {
|
|
break;
|
|
}
|
|
}
|
|
|
|
return mOffset > start;
|
|
}
|
|
|
|
/**
|
|
* Scan an Ident token. This also handles Function and URL tokens,
|
|
* both of which begin indistinguishably from an identifier. It can
|
|
* produce a Symbol token when an apparent identifier actually led
|
|
* into an invalid escape sequence.
|
|
*/
|
|
bool
|
|
nsCSSScanner::ScanIdent(nsCSSToken& aToken)
|
|
{
|
|
if (MOZ_UNLIKELY(!GatherText(IS_IDCHAR, aToken.mIdent))) {
|
|
MOZ_ASSERT(Peek() == '\\',
|
|
"unexpected IsIdentStart character that did not begin an ident");
|
|
aToken.mSymbol = Peek();
|
|
Advance();
|
|
return true;
|
|
}
|
|
|
|
if (MOZ_LIKELY(Peek() != '(')) {
|
|
aToken.mType = eCSSToken_Ident;
|
|
return true;
|
|
}
|
|
|
|
Advance();
|
|
aToken.mType = eCSSToken_Function;
|
|
if (aToken.mIdent.LowerCaseEqualsLiteral("url")) {
|
|
NextURL(aToken);
|
|
} else if (aToken.mIdent.LowerCaseEqualsLiteral("var")) {
|
|
mSeenVariableReference = true;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
/**
|
|
* Scan an AtKeyword token. Also handles production of Symbol when
|
|
* an '@' is not followed by an identifier.
|
|
*/
|
|
bool
|
|
nsCSSScanner::ScanAtKeyword(nsCSSToken& aToken)
|
|
{
|
|
MOZ_ASSERT(Peek() == '@', "should not have been called");
|
|
|
|
// Fall back for when '@' isn't followed by an identifier.
|
|
aToken.mSymbol = '@';
|
|
Advance();
|
|
|
|
int32_t ch = Peek();
|
|
if (StartsIdent(ch, Peek(1))) {
|
|
if (GatherText(IS_IDCHAR, aToken.mIdent)) {
|
|
aToken.mType = eCSSToken_AtKeyword;
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
/**
|
|
* Scan a Hash token. Handles the distinction between eCSSToken_ID
|
|
* and eCSSToken_Hash, and handles production of Symbol when a '#'
|
|
* is not followed by identifier characters.
|
|
*/
|
|
bool
|
|
nsCSSScanner::ScanHash(nsCSSToken& aToken)
|
|
{
|
|
MOZ_ASSERT(Peek() == '#', "should not have been called");
|
|
|
|
// Fall back for when '#' isn't followed by identifier characters.
|
|
aToken.mSymbol = '#';
|
|
Advance();
|
|
|
|
int32_t ch = Peek();
|
|
if (IsIdentChar(ch) || ch == '\\') {
|
|
nsCSSTokenType type =
|
|
StartsIdent(ch, Peek(1)) ? eCSSToken_ID : eCSSToken_Hash;
|
|
aToken.mIdent.SetLength(0);
|
|
if (GatherText(IS_IDCHAR, aToken.mIdent)) {
|
|
aToken.mType = type;
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
/**
|
|
* Scan a Number, Percentage, or Dimension token (all of which begin
|
|
* like a Number). Can produce a Symbol when a '.' is not followed by
|
|
* digits, or when '+' or '-' are not followed by either a digit or a
|
|
* '.' and then a digit. Can also produce a HTMLComment when it
|
|
* encounters '-->'.
|
|
*/
|
|
bool
|
|
nsCSSScanner::ScanNumber(nsCSSToken& aToken)
|
|
{
|
|
int32_t c = Peek();
|
|
#ifdef DEBUG
|
|
{
|
|
int32_t c2 = Peek(1);
|
|
int32_t c3 = Peek(2);
|
|
MOZ_ASSERT(IsDigit(c) ||
|
|
(IsDigit(c2) && (c == '.' || c == '+' || c == '-')) ||
|
|
(IsDigit(c3) && (c == '+' || c == '-') && c2 == '.'),
|
|
"should not have been called");
|
|
}
|
|
#endif
|
|
|
|
// Sign of the mantissa (-1 or 1).
|
|
int32_t sign = c == '-' ? -1 : 1;
|
|
// Absolute value of the integer part of the mantissa. This is a double so
|
|
// we don't run into overflow issues for consumers that only care about our
|
|
// floating-point value while still being able to express the full int32_t
|
|
// range for consumers who want integers.
|
|
double intPart = 0;
|
|
// Fractional part of the mantissa. This is a double so that when we convert
|
|
// to float at the end we'll end up rounding to nearest float instead of
|
|
// truncating down (as we would if fracPart were a float and we just
|
|
// effectively lost the last several digits).
|
|
double fracPart = 0;
|
|
// Absolute value of the power of 10 that we should multiply by (only
|
|
// relevant for numbers in scientific notation). Has to be a signed integer,
|
|
// because multiplication of signed by unsigned converts the unsigned to
|
|
// signed, so if we plan to actually multiply by expSign...
|
|
int32_t exponent = 0;
|
|
// Sign of the exponent.
|
|
int32_t expSign = 1;
|
|
|
|
aToken.mHasSign = (c == '+' || c == '-');
|
|
if (aToken.mHasSign) {
|
|
Advance();
|
|
c = Peek();
|
|
}
|
|
|
|
bool gotDot = (c == '.');
|
|
|
|
if (!gotDot) {
|
|
// Scan the integer part of the mantissa.
|
|
MOZ_ASSERT(IsDigit(c), "should have been excluded by logic above");
|
|
do {
|
|
intPart = 10*intPart + DecimalDigitValue(c);
|
|
Advance();
|
|
c = Peek();
|
|
} while (IsDigit(c));
|
|
|
|
gotDot = (c == '.') && IsDigit(Peek(1));
|
|
}
|
|
|
|
if (gotDot) {
|
|
// Scan the fractional part of the mantissa.
|
|
Advance();
|
|
c = Peek();
|
|
MOZ_ASSERT(IsDigit(c), "should have been excluded by logic above");
|
|
// Power of ten by which we need to divide our next digit
|
|
double divisor = 10;
|
|
do {
|
|
fracPart += DecimalDigitValue(c) / divisor;
|
|
divisor *= 10;
|
|
Advance();
|
|
c = Peek();
|
|
} while (IsDigit(c));
|
|
}
|
|
|
|
bool gotE = false;
|
|
if (c == 'e' || c == 'E') {
|
|
int32_t expSignChar = Peek(1);
|
|
int32_t nextChar = Peek(2);
|
|
if (IsDigit(expSignChar) ||
|
|
((expSignChar == '-' || expSignChar == '+') && IsDigit(nextChar))) {
|
|
gotE = true;
|
|
if (expSignChar == '-') {
|
|
expSign = -1;
|
|
}
|
|
Advance(); // consumes the E
|
|
if (expSignChar == '-' || expSignChar == '+') {
|
|
Advance();
|
|
c = nextChar;
|
|
} else {
|
|
c = expSignChar;
|
|
}
|
|
MOZ_ASSERT(IsDigit(c), "should have been excluded by logic above");
|
|
do {
|
|
exponent = 10*exponent + DecimalDigitValue(c);
|
|
Advance();
|
|
c = Peek();
|
|
} while (IsDigit(c));
|
|
}
|
|
}
|
|
|
|
nsCSSTokenType type = eCSSToken_Number;
|
|
|
|
// Set mIntegerValid for all cases (except %, below) because we need
|
|
// it for the "2n" in :nth-child(2n).
|
|
aToken.mIntegerValid = false;
|
|
|
|
// Time to reassemble our number.
|
|
// Do all the math in double precision so it's truncated only once.
|
|
double value = sign * (intPart + fracPart);
|
|
if (gotE) {
|
|
// Explicitly cast expSign*exponent to double to avoid issues with
|
|
// overloaded pow() on Windows.
|
|
value *= pow(10.0, double(expSign * exponent));
|
|
} else if (!gotDot) {
|
|
// Clamp values outside of integer range.
|
|
if (sign > 0) {
|
|
aToken.mInteger = int32_t(std::min(intPart, double(INT32_MAX)));
|
|
} else {
|
|
aToken.mInteger = int32_t(std::max(-intPart, double(INT32_MIN)));
|
|
}
|
|
aToken.mIntegerValid = true;
|
|
}
|
|
|
|
nsString& ident = aToken.mIdent;
|
|
|
|
// Check for Dimension and Percentage tokens.
|
|
if (c >= 0) {
|
|
if (StartsIdent(c, Peek(1))) {
|
|
if (GatherText(IS_IDCHAR, ident)) {
|
|
type = eCSSToken_Dimension;
|
|
}
|
|
} else if (c == '%') {
|
|
Advance();
|
|
type = eCSSToken_Percentage;
|
|
value = value / 100.0f;
|
|
aToken.mIntegerValid = false;
|
|
}
|
|
}
|
|
aToken.mNumber = value;
|
|
aToken.mType = type;
|
|
return true;
|
|
}
|
|
|
|
/**
|
|
* Scan a string constant ('foo' or "foo"). Will always produce
|
|
* either a String or a Bad_String token; the latter occurs when the
|
|
* close quote is missing. Always returns true (for convenience in Next()).
|
|
*/
|
|
bool
|
|
nsCSSScanner::ScanString(nsCSSToken& aToken)
|
|
{
|
|
int32_t aStop = Peek();
|
|
MOZ_ASSERT(aStop == '"' || aStop == '\'', "should not have been called");
|
|
aToken.mType = eCSSToken_String;
|
|
aToken.mSymbol = char16_t(aStop); // Remember how it's quoted.
|
|
Advance();
|
|
|
|
for (;;) {
|
|
GatherText(IS_STRING, aToken.mIdent);
|
|
|
|
int32_t ch = Peek();
|
|
if (ch == -1) {
|
|
AddEOFCharacters(aStop == '"' ? eEOFCharacters_DoubleQuote :
|
|
eEOFCharacters_SingleQuote);
|
|
break; // EOF ends a string token with no error.
|
|
}
|
|
if (ch == aStop) {
|
|
Advance();
|
|
break;
|
|
}
|
|
// Both " and ' are excluded from IS_STRING.
|
|
if (ch == '"' || ch == '\'') {
|
|
aToken.mIdent.Append(ch);
|
|
Advance();
|
|
continue;
|
|
}
|
|
|
|
mSeenBadToken = true;
|
|
aToken.mType = eCSSToken_Bad_String;
|
|
if (mReporter)
|
|
mReporter->ReportUnexpected("SEUnterminatedString", aToken);
|
|
break;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
/**
|
|
* Scan a unicode-range token. These match the regular expression
|
|
*
|
|
* u\+[0-9a-f?]{1,6}(-[0-9a-f]{1,6})?
|
|
*
|
|
* However, some such tokens are "invalid". There are three valid forms:
|
|
*
|
|
* u+[0-9a-f]{x} 1 <= x <= 6
|
|
* u+[0-9a-f]{x}\?{y} 1 <= x+y <= 6
|
|
* u+[0-9a-f]{x}-[0-9a-f]{y} 1 <= x <= 6, 1 <= y <= 6
|
|
*
|
|
* All unicode-range tokens have their text recorded in mIdent; valid ones
|
|
* are also decoded into mInteger and mInteger2, and mIntegerValid is set.
|
|
* Note that this does not validate the numeric range, only the syntactic
|
|
* form.
|
|
*/
|
|
bool
|
|
nsCSSScanner::ScanURange(nsCSSToken& aResult)
|
|
{
|
|
int32_t intro1 = Peek();
|
|
int32_t intro2 = Peek(1);
|
|
int32_t ch = Peek(2);
|
|
|
|
MOZ_ASSERT((intro1 == 'u' || intro1 == 'U') &&
|
|
intro2 == '+' &&
|
|
(IsHexDigit(ch) || ch == '?'),
|
|
"should not have been called");
|
|
|
|
aResult.mIdent.Append(intro1);
|
|
aResult.mIdent.Append(intro2);
|
|
Advance(2);
|
|
|
|
bool valid = true;
|
|
bool haveQues = false;
|
|
uint32_t low = 0;
|
|
uint32_t high = 0;
|
|
int i = 0;
|
|
|
|
do {
|
|
aResult.mIdent.Append(ch);
|
|
if (IsHexDigit(ch)) {
|
|
if (haveQues) {
|
|
valid = false; // All question marks should be at the end.
|
|
}
|
|
low = low*16 + HexDigitValue(ch);
|
|
high = high*16 + HexDigitValue(ch);
|
|
} else {
|
|
haveQues = true;
|
|
low = low*16 + 0x0;
|
|
high = high*16 + 0xF;
|
|
}
|
|
|
|
i++;
|
|
Advance();
|
|
ch = Peek();
|
|
} while (i < 6 && (IsHexDigit(ch) || ch == '?'));
|
|
|
|
if (ch == '-' && IsHexDigit(Peek(1))) {
|
|
if (haveQues) {
|
|
valid = false;
|
|
}
|
|
|
|
aResult.mIdent.Append(ch);
|
|
Advance();
|
|
ch = Peek();
|
|
high = 0;
|
|
i = 0;
|
|
do {
|
|
aResult.mIdent.Append(ch);
|
|
high = high*16 + HexDigitValue(ch);
|
|
|
|
i++;
|
|
Advance();
|
|
ch = Peek();
|
|
} while (i < 6 && IsHexDigit(ch));
|
|
}
|
|
|
|
aResult.mInteger = low;
|
|
aResult.mInteger2 = high;
|
|
aResult.mIntegerValid = valid;
|
|
aResult.mType = eCSSToken_URange;
|
|
return true;
|
|
}
|
|
|
|
#ifdef DEBUG
|
|
/* static */ void
|
|
nsCSSScanner::AssertEOFCharactersValid(uint32_t c)
|
|
{
|
|
MOZ_ASSERT(c == eEOFCharacters_None ||
|
|
c == eEOFCharacters_ReplacementChar ||
|
|
c == eEOFCharacters_Slash ||
|
|
c == (eEOFCharacters_Asterisk |
|
|
eEOFCharacters_Slash) ||
|
|
c == eEOFCharacters_DoubleQuote ||
|
|
c == eEOFCharacters_SingleQuote ||
|
|
c == (eEOFCharacters_DropBackslash |
|
|
eEOFCharacters_DoubleQuote) ||
|
|
c == (eEOFCharacters_DropBackslash |
|
|
eEOFCharacters_SingleQuote) ||
|
|
c == eEOFCharacters_CloseParen ||
|
|
c == (eEOFCharacters_ReplacementChar |
|
|
eEOFCharacters_CloseParen) ||
|
|
c == (eEOFCharacters_DoubleQuote |
|
|
eEOFCharacters_CloseParen) ||
|
|
c == (eEOFCharacters_SingleQuote |
|
|
eEOFCharacters_CloseParen) ||
|
|
c == (eEOFCharacters_DropBackslash |
|
|
eEOFCharacters_DoubleQuote |
|
|
eEOFCharacters_CloseParen) ||
|
|
c == (eEOFCharacters_DropBackslash |
|
|
eEOFCharacters_SingleQuote |
|
|
eEOFCharacters_CloseParen),
|
|
"invalid EOFCharacters value");
|
|
}
|
|
#endif
|
|
|
|
void
|
|
nsCSSScanner::SetEOFCharacters(uint32_t aEOFCharacters)
|
|
{
|
|
mEOFCharacters = EOFCharacters(aEOFCharacters);
|
|
}
|
|
|
|
void
|
|
nsCSSScanner::AddEOFCharacters(uint32_t aEOFCharacters)
|
|
{
|
|
mEOFCharacters = EOFCharacters(mEOFCharacters | aEOFCharacters);
|
|
}
|
|
|
|
static const char16_t kImpliedEOFCharacters[] = {
|
|
UCS2_REPLACEMENT_CHAR, '*', '/', '"', '\'', ')', 0
|
|
};
|
|
|
|
/* static */ void
|
|
nsCSSScanner::AppendImpliedEOFCharacters(EOFCharacters aEOFCharacters,
|
|
nsAString& aResult)
|
|
{
|
|
// First, ignore eEOFCharacters_DropBackslash.
|
|
uint32_t c = aEOFCharacters >> 1;
|
|
|
|
// All of the remaining EOFCharacters bits represent appended characters,
|
|
// and the bits are in the order that they need appending.
|
|
for (const char16_t* p = kImpliedEOFCharacters; *p && c; p++, c >>= 1) {
|
|
if (c & 1) {
|
|
aResult.Append(*p);
|
|
}
|
|
}
|
|
|
|
MOZ_ASSERT(c == 0, "too many bits in mEOFCharacters");
|
|
}
|
|
|
|
/**
|
|
* Consume the part of an URL token after the initial 'url('. Caller
|
|
* is assumed to have consumed 'url(' already. Will always produce
|
|
* either an URL or a Bad_URL token.
|
|
*
|
|
* Exposed for use by nsCSSParser::ParseMozDocumentRule, which applies
|
|
* the special lexical rules for URL tokens in a nonstandard context.
|
|
*/
|
|
void
|
|
nsCSSScanner::NextURL(nsCSSToken& aToken)
|
|
{
|
|
SkipWhitespace();
|
|
|
|
// aToken.mIdent may be "url" at this point; clear that out
|
|
aToken.mIdent.Truncate();
|
|
|
|
int32_t ch = Peek();
|
|
// Do we have a string?
|
|
if (ch == '"' || ch == '\'') {
|
|
ScanString(aToken);
|
|
if (MOZ_UNLIKELY(aToken.mType == eCSSToken_Bad_String)) {
|
|
aToken.mType = eCSSToken_Bad_URL;
|
|
// Flag us as having been a Bad_String.
|
|
aToken.mInteger2 = 1;
|
|
ConsumeBadURLRemnants(aToken);
|
|
return;
|
|
}
|
|
MOZ_ASSERT(aToken.mType == eCSSToken_String, "unexpected token type");
|
|
|
|
} else {
|
|
// Otherwise, this is the start of a non-quoted url (which may be empty).
|
|
aToken.mSymbol = char16_t(0);
|
|
GatherText(IS_URL_CHAR, aToken.mIdent);
|
|
}
|
|
|
|
// Consume trailing whitespace and then look for a close parenthesis.
|
|
SkipWhitespace();
|
|
ch = Peek();
|
|
// ch can be less than zero indicating EOF
|
|
if (MOZ_LIKELY(ch < 0 || ch == ')')) {
|
|
Advance();
|
|
aToken.mType = eCSSToken_URL;
|
|
if (ch < 0) {
|
|
AddEOFCharacters(eEOFCharacters_CloseParen);
|
|
}
|
|
} else {
|
|
mSeenBadToken = true;
|
|
aToken.mType = eCSSToken_Bad_URL;
|
|
if (aToken.mSymbol != 0) {
|
|
// Flag us as having been a String, not a Bad_String.
|
|
aToken.mInteger2 = 0;
|
|
}
|
|
ConsumeBadURLRemnants(aToken);
|
|
}
|
|
}
|
|
|
|
void
|
|
nsCSSScanner::ConsumeBadURLRemnants(nsCSSToken& aToken)
|
|
{
|
|
aToken.mInteger = aToken.mIdent.Length();
|
|
int32_t ch = Peek();
|
|
do {
|
|
if (ch < 0) {
|
|
AddEOFCharacters(eEOFCharacters_CloseParen);
|
|
break;
|
|
}
|
|
|
|
if (ch == '\\' && GatherEscape(aToken.mIdent, false)) {
|
|
// Nothing else needs to be done here for the moment; we've consumed the
|
|
// backslash and following escape.
|
|
} else {
|
|
// We always want to consume this character.
|
|
if (IsVertSpace(ch)) {
|
|
AdvanceLine();
|
|
} else {
|
|
Advance();
|
|
}
|
|
if (ch == 0) {
|
|
aToken.mIdent.Append(UCS2_REPLACEMENT_CHAR);
|
|
} else {
|
|
aToken.mIdent.Append(ch);
|
|
}
|
|
}
|
|
|
|
ch = Peek();
|
|
} while (ch != ')');
|
|
}
|
|
|
|
/**
|
|
* Primary scanner entry point. Consume one token and fill in
|
|
* |aToken| accordingly. Will skip over any number of comments first,
|
|
* and will also skip over rather than return whitespace and comment
|
|
* tokens, depending on the value of |aSkip|.
|
|
*
|
|
* Returns true if it successfully consumed a token, false if EOF has
|
|
* been reached. Will always advance the current read position by at
|
|
* least one character unless called when already at EOF.
|
|
*/
|
|
bool
|
|
nsCSSScanner::Next(nsCSSToken& aToken, nsCSSScannerExclude aSkip)
|
|
{
|
|
int32_t ch;
|
|
|
|
// do this here so we don't have to do it in dozens of other places
|
|
aToken.mIdent.Truncate();
|
|
aToken.mType = eCSSToken_Symbol;
|
|
|
|
for (;;) {
|
|
// Consume any number of comments, and possibly also whitespace tokens,
|
|
// in between other tokens.
|
|
mTokenOffset = mOffset;
|
|
mTokenLineOffset = mLineOffset;
|
|
mTokenLineNumber = mLineNumber;
|
|
|
|
ch = Peek();
|
|
if (IsWhitespace(ch)) {
|
|
SkipWhitespace();
|
|
if (aSkip != eCSSScannerExclude_WhitespaceAndComments) {
|
|
aToken.mType = eCSSToken_Whitespace;
|
|
return true;
|
|
}
|
|
continue; // start again at the beginning
|
|
}
|
|
if (ch == '/' && !IsSVGMode() && Peek(1) == '*') {
|
|
SkipComment();
|
|
if (aSkip == eCSSScannerExclude_None) {
|
|
aToken.mType = eCSSToken_Comment;
|
|
return true;
|
|
}
|
|
continue; // start again at the beginning
|
|
}
|
|
break;
|
|
}
|
|
|
|
// EOF
|
|
if (ch < 0) {
|
|
return false;
|
|
}
|
|
|
|
// 'u' could be UNICODE-RANGE or an identifier-family token
|
|
if (ch == 'u' || ch == 'U') {
|
|
int32_t c2 = Peek(1);
|
|
int32_t c3 = Peek(2);
|
|
if (c2 == '+' && (IsHexDigit(c3) || c3 == '?')) {
|
|
return ScanURange(aToken);
|
|
}
|
|
return ScanIdent(aToken);
|
|
}
|
|
|
|
// identifier family
|
|
if (IsIdentStart(ch)) {
|
|
return ScanIdent(aToken);
|
|
}
|
|
|
|
// number family
|
|
if (IsDigit(ch)) {
|
|
return ScanNumber(aToken);
|
|
}
|
|
|
|
if (ch == '.' && IsDigit(Peek(1))) {
|
|
return ScanNumber(aToken);
|
|
}
|
|
|
|
if (ch == '+') {
|
|
int32_t c2 = Peek(1);
|
|
if (IsDigit(c2) || (c2 == '.' && IsDigit(Peek(2)))) {
|
|
return ScanNumber(aToken);
|
|
}
|
|
}
|
|
|
|
// '-' can start an identifier-family token, a number-family token,
|
|
// or an HTML-comment
|
|
if (ch == '-') {
|
|
int32_t c2 = Peek(1);
|
|
int32_t c3 = Peek(2);
|
|
if (IsIdentStart(c2) || (c2 == '-' && c3 != '>')) {
|
|
return ScanIdent(aToken);
|
|
}
|
|
if (IsDigit(c2) || (c2 == '.' && IsDigit(c3))) {
|
|
return ScanNumber(aToken);
|
|
}
|
|
if (c2 == '-' && c3 == '>') {
|
|
Advance(3);
|
|
aToken.mType = eCSSToken_HTMLComment;
|
|
aToken.mIdent.AssignLiteral("-->");
|
|
return true;
|
|
}
|
|
}
|
|
|
|
// the other HTML-comment token
|
|
if (ch == '<' && Peek(1) == '!' && Peek(2) == '-' && Peek(3) == '-') {
|
|
Advance(4);
|
|
aToken.mType = eCSSToken_HTMLComment;
|
|
aToken.mIdent.AssignLiteral("<!--");
|
|
return true;
|
|
}
|
|
|
|
// AT_KEYWORD
|
|
if (ch == '@') {
|
|
return ScanAtKeyword(aToken);
|
|
}
|
|
|
|
// HASH
|
|
if (ch == '#') {
|
|
return ScanHash(aToken);
|
|
}
|
|
|
|
// STRING
|
|
if (ch == '"' || ch == '\'') {
|
|
return ScanString(aToken);
|
|
}
|
|
|
|
// Match operators: ~= |= ^= $= *=
|
|
nsCSSTokenType opType = MatchOperatorType(ch);
|
|
if (opType != eCSSToken_Symbol && Peek(1) == '=') {
|
|
aToken.mType = opType;
|
|
Advance(2);
|
|
return true;
|
|
}
|
|
|
|
// Otherwise, a symbol (DELIM).
|
|
aToken.mSymbol = ch;
|
|
Advance();
|
|
return true;
|
|
}
|
|
|
|
/* nsCSSGridTemplateAreaScanner methods. */
|
|
|
|
nsCSSGridTemplateAreaScanner::nsCSSGridTemplateAreaScanner(const nsAString& aBuffer)
|
|
: mBuffer(aBuffer.BeginReading())
|
|
, mOffset(0)
|
|
, mCount(aBuffer.Length())
|
|
{
|
|
}
|
|
|
|
bool
|
|
nsCSSGridTemplateAreaScanner::Next(nsCSSGridTemplateAreaToken& aTokenResult)
|
|
{
|
|
int32_t ch;
|
|
// Skip whitespace
|
|
do {
|
|
if (mOffset >= mCount) {
|
|
return false;
|
|
}
|
|
ch = mBuffer[mOffset];
|
|
mOffset++;
|
|
} while (IsWhitespace(ch));
|
|
|
|
if (IsOpenCharClass(ch, IS_IDCHAR)) {
|
|
// Named cell token
|
|
uint32_t start = mOffset - 1; // offset of |ch|
|
|
while (mOffset < mCount && IsOpenCharClass(mBuffer[mOffset], IS_IDCHAR)) {
|
|
mOffset++;
|
|
}
|
|
aTokenResult.mName.Assign(&mBuffer[start], mOffset - start);
|
|
aTokenResult.isTrash = false;
|
|
} else if (ch == '.') {
|
|
// Null cell token
|
|
// Skip any other '.'
|
|
while (mOffset < mCount && mBuffer[mOffset] == '.') {
|
|
mOffset++;
|
|
}
|
|
aTokenResult.mName.Truncate();
|
|
aTokenResult.isTrash = false;
|
|
} else {
|
|
// Trash token
|
|
aTokenResult.isTrash = true;
|
|
}
|
|
return true;
|
|
}
|