mirror of
https://github.com/mozilla/gecko-dev.git
synced 2024-12-14 02:31:59 +00:00
7b2e6d4996
Differential Revision: https://phabricator.services.mozilla.com/D131422
806 lines
21 KiB
C++
806 lines
21 KiB
C++
/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
|
/* vim: set ts=8 sts=2 et sw=2 tw=80: */
|
|
/* This Source Code Form is subject to the terms of the Mozilla Public
|
|
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
|
|
|
#include "Tokenizer.h"
|
|
|
|
#include "nsUnicharUtils.h"
|
|
#include <algorithm>
|
|
|
|
namespace mozilla {
|
|
|
|
template <>
|
|
char const TokenizerBase<char>::sWhitespaces[] = {' ', '\t', 0};
|
|
template <>
|
|
char16_t const TokenizerBase<char16_t>::sWhitespaces[3] = {' ', '\t', 0};
|
|
|
|
template <typename TChar>
|
|
static bool contains(TChar const* const list, TChar const needle) {
|
|
for (TChar const* c = list; *c; ++c) {
|
|
if (needle == *c) {
|
|
return true;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
template <typename TChar>
|
|
TTokenizer<TChar>::TTokenizer(const typename base::TAString& aSource,
|
|
const TChar* aWhitespaces,
|
|
const TChar* aAdditionalWordChars)
|
|
: TokenizerBase<TChar>(aWhitespaces, aAdditionalWordChars) {
|
|
base::mInputFinished = true;
|
|
aSource.BeginReading(base::mCursor);
|
|
mRecord = mRollback = base::mCursor;
|
|
aSource.EndReading(base::mEnd);
|
|
}
|
|
|
|
template <typename TChar>
|
|
TTokenizer<TChar>::TTokenizer(const TChar* aSource, const TChar* aWhitespaces,
|
|
const TChar* aAdditionalWordChars)
|
|
: TTokenizer(typename base::TDependentString(aSource), aWhitespaces,
|
|
aAdditionalWordChars) {}
|
|
|
|
template <typename TChar>
|
|
bool TTokenizer<TChar>::Next(typename base::Token& aToken) {
|
|
if (!base::HasInput()) {
|
|
base::mHasFailed = true;
|
|
return false;
|
|
}
|
|
|
|
mRollback = base::mCursor;
|
|
base::mCursor = base::Parse(aToken);
|
|
|
|
base::AssignFragment(aToken, mRollback, base::mCursor);
|
|
|
|
base::mPastEof = aToken.Type() == base::TOKEN_EOF;
|
|
base::mHasFailed = false;
|
|
return true;
|
|
}
|
|
|
|
template <typename TChar>
|
|
bool TTokenizer<TChar>::Check(const typename base::TokenType aTokenType,
|
|
typename base::Token& aResult) {
|
|
if (!base::HasInput()) {
|
|
base::mHasFailed = true;
|
|
return false;
|
|
}
|
|
|
|
typename base::TAString::const_char_iterator next = base::Parse(aResult);
|
|
if (aTokenType != aResult.Type()) {
|
|
base::mHasFailed = true;
|
|
return false;
|
|
}
|
|
|
|
mRollback = base::mCursor;
|
|
base::mCursor = next;
|
|
|
|
base::AssignFragment(aResult, mRollback, base::mCursor);
|
|
|
|
base::mPastEof = aResult.Type() == base::TOKEN_EOF;
|
|
base::mHasFailed = false;
|
|
return true;
|
|
}
|
|
|
|
template <typename TChar>
|
|
bool TTokenizer<TChar>::Check(const typename base::Token& aToken) {
|
|
#ifdef DEBUG
|
|
base::Validate(aToken);
|
|
#endif
|
|
|
|
if (!base::HasInput()) {
|
|
base::mHasFailed = true;
|
|
return false;
|
|
}
|
|
|
|
typename base::Token parsed;
|
|
typename base::TAString::const_char_iterator next = base::Parse(parsed);
|
|
if (!aToken.Equals(parsed)) {
|
|
base::mHasFailed = true;
|
|
return false;
|
|
}
|
|
|
|
mRollback = base::mCursor;
|
|
base::mCursor = next;
|
|
base::mPastEof = parsed.Type() == base::TOKEN_EOF;
|
|
base::mHasFailed = false;
|
|
return true;
|
|
}
|
|
|
|
template <typename TChar>
|
|
void TTokenizer<TChar>::SkipWhites(WhiteSkipping aIncludeNewLines) {
|
|
if (!CheckWhite() &&
|
|
(aIncludeNewLines == DONT_INCLUDE_NEW_LINE || !CheckEOL())) {
|
|
return;
|
|
}
|
|
|
|
typename base::TAString::const_char_iterator rollback = mRollback;
|
|
while (CheckWhite() || (aIncludeNewLines == INCLUDE_NEW_LINE && CheckEOL())) {
|
|
}
|
|
|
|
base::mHasFailed = false;
|
|
mRollback = rollback;
|
|
}
|
|
|
|
template <typename TChar>
|
|
void TTokenizer<TChar>::SkipUntil(typename base::Token const& aToken) {
|
|
typename base::TAString::const_char_iterator rollback = base::mCursor;
|
|
const typename base::Token eof = base::Token::EndOfFile();
|
|
|
|
typename base::Token t;
|
|
while (Next(t)) {
|
|
if (aToken.Equals(t) || eof.Equals(t)) {
|
|
Rollback();
|
|
break;
|
|
}
|
|
}
|
|
|
|
mRollback = rollback;
|
|
}
|
|
|
|
template <typename TChar>
|
|
bool TTokenizer<TChar>::CheckChar(bool (*aClassifier)(const TChar aChar)) {
|
|
if (!aClassifier) {
|
|
MOZ_ASSERT(false);
|
|
return false;
|
|
}
|
|
|
|
if (!base::HasInput() || base::mCursor == base::mEnd) {
|
|
base::mHasFailed = true;
|
|
return false;
|
|
}
|
|
|
|
if (!aClassifier(*base::mCursor)) {
|
|
base::mHasFailed = true;
|
|
return false;
|
|
}
|
|
|
|
mRollback = base::mCursor;
|
|
++base::mCursor;
|
|
base::mHasFailed = false;
|
|
return true;
|
|
}
|
|
|
|
template <typename TChar>
|
|
bool TTokenizer<TChar>::CheckPhrase(const typename base::TAString& aPhrase) {
|
|
if (!base::HasInput()) {
|
|
return false;
|
|
}
|
|
|
|
typedef typename base::TAString::const_char_iterator Cursor;
|
|
|
|
TTokenizer<TChar> pattern(aPhrase);
|
|
MOZ_ASSERT(!pattern.CheckEOF(),
|
|
"This will return true but won't shift the Tokenizer's cursor");
|
|
|
|
return [&](Cursor cursor, Cursor rollback) mutable {
|
|
while (true) {
|
|
if (pattern.CheckEOF()) {
|
|
base::mHasFailed = false;
|
|
mRollback = cursor;
|
|
return true;
|
|
}
|
|
|
|
typename base::Token t1, t2;
|
|
Unused << Next(t1);
|
|
Unused << pattern.Next(t2);
|
|
if (t1.Type() == t2.Type() && t1.Fragment().Equals(t2.Fragment())) {
|
|
continue;
|
|
}
|
|
|
|
break;
|
|
}
|
|
|
|
base::mHasFailed = true;
|
|
base::mPastEof = false;
|
|
base::mCursor = cursor;
|
|
mRollback = rollback;
|
|
return false;
|
|
}(base::mCursor, mRollback);
|
|
}
|
|
|
|
template <typename TChar>
|
|
bool TTokenizer<TChar>::ReadChar(TChar* aValue) {
|
|
MOZ_RELEASE_ASSERT(aValue);
|
|
|
|
typename base::Token t;
|
|
if (!Check(base::TOKEN_CHAR, t)) {
|
|
return false;
|
|
}
|
|
|
|
*aValue = t.AsChar();
|
|
return true;
|
|
}
|
|
|
|
template <typename TChar>
|
|
bool TTokenizer<TChar>::ReadChar(bool (*aClassifier)(const TChar aChar),
|
|
TChar* aValue) {
|
|
MOZ_RELEASE_ASSERT(aValue);
|
|
|
|
if (!CheckChar(aClassifier)) {
|
|
return false;
|
|
}
|
|
|
|
*aValue = *mRollback;
|
|
return true;
|
|
}
|
|
|
|
template <typename TChar>
|
|
bool TTokenizer<TChar>::ReadWord(typename base::TAString& aValue) {
|
|
typename base::Token t;
|
|
if (!Check(base::TOKEN_WORD, t)) {
|
|
return false;
|
|
}
|
|
|
|
aValue.Assign(t.AsString());
|
|
return true;
|
|
}
|
|
|
|
template <typename TChar>
|
|
bool TTokenizer<TChar>::ReadWord(typename base::TDependentSubstring& aValue) {
|
|
typename base::Token t;
|
|
if (!Check(base::TOKEN_WORD, t)) {
|
|
return false;
|
|
}
|
|
|
|
aValue.Rebind(t.AsString().BeginReading(), t.AsString().Length());
|
|
return true;
|
|
}
|
|
|
|
template <typename TChar>
|
|
bool TTokenizer<TChar>::ReadUntil(typename base::Token const& aToken,
|
|
typename base::TAString& aResult,
|
|
ClaimInclusion aInclude) {
|
|
typename base::TDependentSubstring substring;
|
|
bool rv = ReadUntil(aToken, substring, aInclude);
|
|
aResult.Assign(substring);
|
|
return rv;
|
|
}
|
|
|
|
template <typename TChar>
|
|
bool TTokenizer<TChar>::ReadUntil(typename base::Token const& aToken,
|
|
typename base::TDependentSubstring& aResult,
|
|
ClaimInclusion aInclude) {
|
|
typename base::TAString::const_char_iterator record = mRecord;
|
|
Record();
|
|
typename base::TAString::const_char_iterator rollback = mRollback =
|
|
base::mCursor;
|
|
|
|
bool found = false;
|
|
typename base::Token t;
|
|
while (Next(t)) {
|
|
if (aToken.Equals(t)) {
|
|
found = true;
|
|
break;
|
|
}
|
|
if (t.Equals(base::Token::EndOfFile())) {
|
|
// We don't want to eat it.
|
|
Rollback();
|
|
break;
|
|
}
|
|
}
|
|
|
|
Claim(aResult, aInclude);
|
|
mRollback = rollback;
|
|
mRecord = record;
|
|
return found;
|
|
}
|
|
|
|
template <typename TChar>
|
|
void TTokenizer<TChar>::Rollback() {
|
|
MOZ_ASSERT(base::mCursor > mRollback || base::mPastEof, "TODO!!!");
|
|
|
|
base::mPastEof = false;
|
|
base::mHasFailed = false;
|
|
base::mCursor = mRollback;
|
|
}
|
|
|
|
template <typename TChar>
|
|
void TTokenizer<TChar>::Record(ClaimInclusion aInclude) {
|
|
mRecord = aInclude == INCLUDE_LAST ? mRollback : base::mCursor;
|
|
}
|
|
|
|
template <typename TChar>
|
|
void TTokenizer<TChar>::Claim(typename base::TAString& aResult,
|
|
ClaimInclusion aInclusion) {
|
|
typename base::TAString::const_char_iterator close =
|
|
aInclusion == EXCLUDE_LAST ? mRollback : base::mCursor;
|
|
aResult.Assign(Substring(mRecord, close));
|
|
}
|
|
|
|
template <typename TChar>
|
|
void TTokenizer<TChar>::Claim(typename base::TDependentSubstring& aResult,
|
|
ClaimInclusion aInclusion) {
|
|
typename base::TAString::const_char_iterator close =
|
|
aInclusion == EXCLUDE_LAST ? mRollback : base::mCursor;
|
|
|
|
MOZ_RELEASE_ASSERT(close >= mRecord, "Overflow!");
|
|
aResult.Rebind(mRecord, close - mRecord);
|
|
}
|
|
|
|
// TokenizerBase
|
|
|
|
template <typename TChar>
|
|
TokenizerBase<TChar>::TokenizerBase(const TChar* aWhitespaces,
|
|
const TChar* aAdditionalWordChars)
|
|
: mPastEof(false),
|
|
mHasFailed(false),
|
|
mInputFinished(true),
|
|
mMode(Mode::FULL),
|
|
mMinRawDelivery(1024),
|
|
mWhitespaces(aWhitespaces ? aWhitespaces : sWhitespaces),
|
|
mAdditionalWordChars(aAdditionalWordChars),
|
|
mCursor(nullptr),
|
|
mEnd(nullptr),
|
|
mNextCustomTokenID(TOKEN_CUSTOM0) {}
|
|
|
|
template <typename TChar>
|
|
auto TokenizerBase<TChar>::AddCustomToken(const TAString& aValue,
|
|
ECaseSensitivity aCaseInsensitivity,
|
|
bool aEnabled) -> Token {
|
|
MOZ_ASSERT(!aValue.IsEmpty());
|
|
|
|
UniquePtr<Token>& t = *mCustomTokens.AppendElement();
|
|
t = MakeUnique<Token>();
|
|
|
|
t->mType = static_cast<TokenType>(++mNextCustomTokenID);
|
|
t->mCustomCaseInsensitivity = aCaseInsensitivity;
|
|
t->mCustomEnabled = aEnabled;
|
|
t->mCustom.Assign(aValue);
|
|
return *t;
|
|
}
|
|
|
|
template <typename TChar>
|
|
void TokenizerBase<TChar>::RemoveCustomToken(Token& aToken) {
|
|
if (aToken.mType == TOKEN_UNKNOWN) {
|
|
// Already removed
|
|
return;
|
|
}
|
|
|
|
for (UniquePtr<Token> const& custom : mCustomTokens) {
|
|
if (custom->mType == aToken.mType) {
|
|
mCustomTokens.RemoveElement(custom);
|
|
aToken.mType = TOKEN_UNKNOWN;
|
|
return;
|
|
}
|
|
}
|
|
|
|
MOZ_ASSERT(false, "Token to remove not found");
|
|
}
|
|
|
|
template <typename TChar>
|
|
void TokenizerBase<TChar>::EnableCustomToken(Token const& aToken,
|
|
bool aEnabled) {
|
|
if (aToken.mType == TOKEN_UNKNOWN) {
|
|
// Already removed
|
|
return;
|
|
}
|
|
|
|
for (UniquePtr<Token> const& custom : mCustomTokens) {
|
|
if (custom->Type() == aToken.Type()) {
|
|
// This effectively destroys the token instance.
|
|
custom->mCustomEnabled = aEnabled;
|
|
return;
|
|
}
|
|
}
|
|
|
|
MOZ_ASSERT(false, "Token to change not found");
|
|
}
|
|
|
|
template <typename TChar>
|
|
void TokenizerBase<TChar>::SetTokenizingMode(Mode aMode) {
|
|
mMode = aMode;
|
|
}
|
|
|
|
template <typename TChar>
|
|
bool TokenizerBase<TChar>::HasFailed() const {
|
|
return mHasFailed;
|
|
}
|
|
|
|
template <typename TChar>
|
|
bool TokenizerBase<TChar>::HasInput() const {
|
|
return !mPastEof;
|
|
}
|
|
|
|
template <typename TChar>
|
|
auto TokenizerBase<TChar>::Parse(Token& aToken) const ->
|
|
typename TAString::const_char_iterator {
|
|
if (mCursor == mEnd) {
|
|
if (!mInputFinished) {
|
|
return mCursor;
|
|
}
|
|
|
|
aToken = Token::EndOfFile();
|
|
return mEnd;
|
|
}
|
|
|
|
MOZ_RELEASE_ASSERT(mEnd >= mCursor, "Overflow!");
|
|
typename TAString::size_type available = mEnd - mCursor;
|
|
|
|
uint32_t longestCustom = 0;
|
|
for (UniquePtr<Token> const& custom : mCustomTokens) {
|
|
if (IsCustom(mCursor, *custom, &longestCustom)) {
|
|
aToken = *custom;
|
|
return mCursor + custom->mCustom.Length();
|
|
}
|
|
}
|
|
|
|
if (!mInputFinished && available < longestCustom) {
|
|
// Not enough data to deterministically decide.
|
|
return mCursor;
|
|
}
|
|
|
|
typename TAString::const_char_iterator next = mCursor;
|
|
|
|
if (mMode == Mode::CUSTOM_ONLY) {
|
|
// We have to do a brute-force search for all of the enabled custom
|
|
// tokens.
|
|
while (next < mEnd) {
|
|
++next;
|
|
for (UniquePtr<Token> const& custom : mCustomTokens) {
|
|
if (IsCustom(next, *custom)) {
|
|
aToken = Token::Raw();
|
|
return next;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (mInputFinished) {
|
|
// End of the data reached.
|
|
aToken = Token::Raw();
|
|
return next;
|
|
}
|
|
|
|
if (longestCustom < available && available > mMinRawDelivery) {
|
|
// We can return some data w/o waiting for either a custom token
|
|
// or call to FinishData() when we leave the tail where all the
|
|
// custom tokens potentially fit, so we can't lose only partially
|
|
// delivered tokens. This preserves reasonable granularity.
|
|
aToken = Token::Raw();
|
|
return mEnd - longestCustom + 1;
|
|
}
|
|
|
|
// Not enough data to deterministically decide.
|
|
return mCursor;
|
|
}
|
|
|
|
enum State {
|
|
PARSE_INTEGER,
|
|
PARSE_WORD,
|
|
PARSE_CRLF,
|
|
PARSE_LF,
|
|
PARSE_WS,
|
|
PARSE_CHAR,
|
|
} state;
|
|
|
|
if (IsWordFirst(*next)) {
|
|
state = PARSE_WORD;
|
|
} else if (IsNumber(*next)) {
|
|
state = PARSE_INTEGER;
|
|
} else if (contains(mWhitespaces, *next)) { // not UTF-8 friendly?
|
|
state = PARSE_WS;
|
|
} else if (*next == '\r') {
|
|
state = PARSE_CRLF;
|
|
} else if (*next == '\n') {
|
|
state = PARSE_LF;
|
|
} else {
|
|
state = PARSE_CHAR;
|
|
}
|
|
|
|
mozilla::CheckedUint64 resultingNumber = 0;
|
|
|
|
while (next < mEnd) {
|
|
switch (state) {
|
|
case PARSE_INTEGER:
|
|
// Keep it simple for now
|
|
resultingNumber *= 10;
|
|
resultingNumber += static_cast<uint64_t>(*next - '0');
|
|
|
|
++next;
|
|
if (IsPending(next)) {
|
|
break;
|
|
}
|
|
if (IsEnd(next) || !IsNumber(*next)) {
|
|
if (!resultingNumber.isValid()) {
|
|
aToken = Token::Error();
|
|
} else {
|
|
aToken = Token::Number(resultingNumber.value());
|
|
}
|
|
return next;
|
|
}
|
|
break;
|
|
|
|
case PARSE_WORD:
|
|
++next;
|
|
if (IsPending(next)) {
|
|
break;
|
|
}
|
|
if (IsEnd(next) || !IsWord(*next)) {
|
|
aToken = Token::Word(Substring(mCursor, next));
|
|
return next;
|
|
}
|
|
break;
|
|
|
|
case PARSE_CRLF:
|
|
++next;
|
|
if (IsPending(next)) {
|
|
break;
|
|
}
|
|
if (!IsEnd(next) && *next == '\n') { // LF is optional
|
|
++next;
|
|
}
|
|
aToken = Token::NewLine();
|
|
return next;
|
|
|
|
case PARSE_LF:
|
|
++next;
|
|
aToken = Token::NewLine();
|
|
return next;
|
|
|
|
case PARSE_WS:
|
|
++next;
|
|
aToken = Token::Whitespace();
|
|
return next;
|
|
|
|
case PARSE_CHAR:
|
|
++next;
|
|
aToken = Token::Char(*mCursor);
|
|
return next;
|
|
} // switch (state)
|
|
} // while (next < end)
|
|
|
|
MOZ_ASSERT(!mInputFinished);
|
|
return mCursor;
|
|
}
|
|
|
|
template <typename TChar>
|
|
bool TokenizerBase<TChar>::IsEnd(
|
|
const typename TAString::const_char_iterator& caret) const {
|
|
return caret == mEnd;
|
|
}
|
|
|
|
template <typename TChar>
|
|
bool TokenizerBase<TChar>::IsPending(
|
|
const typename TAString::const_char_iterator& caret) const {
|
|
return IsEnd(caret) && !mInputFinished;
|
|
}
|
|
|
|
template <typename TChar>
|
|
bool TokenizerBase<TChar>::IsWordFirst(const TChar aInput) const {
|
|
// TODO: make this fully work with unicode
|
|
return (ToLowerCase(static_cast<uint32_t>(aInput)) !=
|
|
ToUpperCase(static_cast<uint32_t>(aInput))) ||
|
|
'_' == aInput ||
|
|
(mAdditionalWordChars ? contains(mAdditionalWordChars, aInput)
|
|
: false);
|
|
}
|
|
|
|
template <typename TChar>
|
|
bool TokenizerBase<TChar>::IsWord(const TChar aInput) const {
|
|
return IsWordFirst(aInput) || IsNumber(aInput);
|
|
}
|
|
|
|
template <typename TChar>
|
|
bool TokenizerBase<TChar>::IsNumber(const TChar aInput) const {
|
|
// TODO: are there unicode numbers?
|
|
return aInput >= '0' && aInput <= '9';
|
|
}
|
|
|
|
template <typename TChar>
|
|
bool TokenizerBase<TChar>::IsCustom(
|
|
const typename TAString::const_char_iterator& caret,
|
|
const Token& aCustomToken, uint32_t* aLongest) const {
|
|
MOZ_ASSERT(aCustomToken.mType > TOKEN_CUSTOM0);
|
|
if (!aCustomToken.mCustomEnabled) {
|
|
return false;
|
|
}
|
|
|
|
if (aLongest) {
|
|
*aLongest = std::max<uint32_t>(*aLongest, aCustomToken.mCustom.Length());
|
|
}
|
|
|
|
// This is not very likely to happen according to how we call this method
|
|
// and since it's on a hot path, it's just a diagnostic assert,
|
|
// not a release assert.
|
|
MOZ_DIAGNOSTIC_ASSERT(mEnd >= caret, "Overflow?");
|
|
uint32_t inputLength = mEnd - caret;
|
|
if (aCustomToken.mCustom.Length() > inputLength) {
|
|
return false;
|
|
}
|
|
|
|
TDependentSubstring inputFragment(caret, aCustomToken.mCustom.Length());
|
|
if (aCustomToken.mCustomCaseInsensitivity == CASE_INSENSITIVE) {
|
|
if constexpr (std::is_same_v<TChar, char>) {
|
|
return inputFragment.Equals(aCustomToken.mCustom,
|
|
nsCaseInsensitiveUTF8StringComparator);
|
|
} else {
|
|
return inputFragment.Equals(aCustomToken.mCustom,
|
|
nsCaseInsensitiveStringComparator);
|
|
}
|
|
}
|
|
return inputFragment.Equals(aCustomToken.mCustom);
|
|
}
|
|
|
|
template <typename TChar>
|
|
void TokenizerBase<TChar>::AssignFragment(
|
|
Token& aToken, typename TAString::const_char_iterator begin,
|
|
typename TAString::const_char_iterator end) {
|
|
aToken.AssignFragment(begin, end);
|
|
}
|
|
|
|
#ifdef DEBUG
|
|
|
|
template <typename TChar>
|
|
void TokenizerBase<TChar>::Validate(Token const& aToken) {
|
|
if (aToken.Type() == TOKEN_WORD) {
|
|
typename TAString::const_char_iterator c = aToken.AsString().BeginReading();
|
|
typename TAString::const_char_iterator e = aToken.AsString().EndReading();
|
|
|
|
if (c < e) {
|
|
MOZ_ASSERT(IsWordFirst(*c));
|
|
while (++c < e) {
|
|
MOZ_ASSERT(IsWord(*c));
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
#endif
|
|
|
|
// TokenizerBase::Token
|
|
|
|
template <typename TChar>
|
|
TokenizerBase<TChar>::Token::Token()
|
|
: mType(TOKEN_UNKNOWN),
|
|
mChar(0),
|
|
mInteger(0),
|
|
mCustomCaseInsensitivity(CASE_SENSITIVE),
|
|
mCustomEnabled(false) {}
|
|
|
|
template <typename TChar>
|
|
TokenizerBase<TChar>::Token::Token(const Token& aOther)
|
|
: mType(aOther.mType),
|
|
mCustom(aOther.mCustom),
|
|
mChar(aOther.mChar),
|
|
mInteger(aOther.mInteger),
|
|
mCustomCaseInsensitivity(aOther.mCustomCaseInsensitivity),
|
|
mCustomEnabled(aOther.mCustomEnabled) {
|
|
if (mType == TOKEN_WORD || mType > TOKEN_CUSTOM0) {
|
|
mWord.Rebind(aOther.mWord.BeginReading(), aOther.mWord.Length());
|
|
}
|
|
}
|
|
|
|
template <typename TChar>
|
|
auto TokenizerBase<TChar>::Token::operator=(const Token& aOther) -> Token& {
|
|
mType = aOther.mType;
|
|
mCustom = aOther.mCustom;
|
|
mChar = aOther.mChar;
|
|
mWord.Rebind(aOther.mWord.BeginReading(), aOther.mWord.Length());
|
|
mInteger = aOther.mInteger;
|
|
mCustomCaseInsensitivity = aOther.mCustomCaseInsensitivity;
|
|
mCustomEnabled = aOther.mCustomEnabled;
|
|
return *this;
|
|
}
|
|
|
|
template <typename TChar>
|
|
void TokenizerBase<TChar>::Token::AssignFragment(
|
|
typename TAString::const_char_iterator begin,
|
|
typename TAString::const_char_iterator end) {
|
|
MOZ_RELEASE_ASSERT(end >= begin, "Overflow!");
|
|
mFragment.Rebind(begin, end - begin);
|
|
}
|
|
|
|
// static
|
|
template <typename TChar>
|
|
auto TokenizerBase<TChar>::Token::Raw() -> Token {
|
|
Token t;
|
|
t.mType = TOKEN_RAW;
|
|
return t;
|
|
}
|
|
|
|
// static
|
|
template <typename TChar>
|
|
auto TokenizerBase<TChar>::Token::Word(TAString const& aValue) -> Token {
|
|
Token t;
|
|
t.mType = TOKEN_WORD;
|
|
t.mWord.Rebind(aValue.BeginReading(), aValue.Length());
|
|
return t;
|
|
}
|
|
|
|
// static
|
|
template <typename TChar>
|
|
auto TokenizerBase<TChar>::Token::Char(TChar const aValue) -> Token {
|
|
Token t;
|
|
t.mType = TOKEN_CHAR;
|
|
t.mChar = aValue;
|
|
return t;
|
|
}
|
|
|
|
// static
|
|
template <typename TChar>
|
|
auto TokenizerBase<TChar>::Token::Number(uint64_t const aValue) -> Token {
|
|
Token t;
|
|
t.mType = TOKEN_INTEGER;
|
|
t.mInteger = aValue;
|
|
return t;
|
|
}
|
|
|
|
// static
|
|
template <typename TChar>
|
|
auto TokenizerBase<TChar>::Token::Whitespace() -> Token {
|
|
Token t;
|
|
t.mType = TOKEN_WS;
|
|
t.mChar = '\0';
|
|
return t;
|
|
}
|
|
|
|
// static
|
|
template <typename TChar>
|
|
auto TokenizerBase<TChar>::Token::NewLine() -> Token {
|
|
Token t;
|
|
t.mType = TOKEN_EOL;
|
|
return t;
|
|
}
|
|
|
|
// static
|
|
template <typename TChar>
|
|
auto TokenizerBase<TChar>::Token::EndOfFile() -> Token {
|
|
Token t;
|
|
t.mType = TOKEN_EOF;
|
|
return t;
|
|
}
|
|
|
|
// static
|
|
template <typename TChar>
|
|
auto TokenizerBase<TChar>::Token::Error() -> Token {
|
|
Token t;
|
|
t.mType = TOKEN_ERROR;
|
|
return t;
|
|
}
|
|
|
|
template <typename TChar>
|
|
bool TokenizerBase<TChar>::Token::Equals(const Token& aOther) const {
|
|
if (mType != aOther.mType) {
|
|
return false;
|
|
}
|
|
|
|
switch (mType) {
|
|
case TOKEN_INTEGER:
|
|
return AsInteger() == aOther.AsInteger();
|
|
case TOKEN_WORD:
|
|
return AsString() == aOther.AsString();
|
|
case TOKEN_CHAR:
|
|
return AsChar() == aOther.AsChar();
|
|
default:
|
|
return true;
|
|
}
|
|
}
|
|
|
|
template <typename TChar>
|
|
TChar TokenizerBase<TChar>::Token::AsChar() const {
|
|
MOZ_ASSERT(mType == TOKEN_CHAR || mType == TOKEN_WS);
|
|
return mChar;
|
|
}
|
|
|
|
template <typename TChar>
|
|
auto TokenizerBase<TChar>::Token::AsString() const -> TDependentSubstring {
|
|
MOZ_ASSERT(mType == TOKEN_WORD);
|
|
return mWord;
|
|
}
|
|
|
|
template <typename TChar>
|
|
uint64_t TokenizerBase<TChar>::Token::AsInteger() const {
|
|
MOZ_ASSERT(mType == TOKEN_INTEGER);
|
|
return mInteger;
|
|
}
|
|
|
|
template class TokenizerBase<char>;
|
|
template class TokenizerBase<char16_t>;
|
|
|
|
template class TTokenizer<char>;
|
|
template class TTokenizer<char16_t>;
|
|
|
|
} // namespace mozilla
|