Bug 1334246. r=mrbkap.

nsScannerString changes by erahm.

MozReview-Commit-ID: 6lh8SYjpGlj
This commit is contained in:
Henri Sivonen 2017-02-16 10:05:27 +02:00
parent 26a8c610e5
commit fd7f0ae458
4 changed files with 24 additions and 496 deletions

View File

@ -335,414 +335,6 @@ nsParser::GetContentSink()
return mSink;
}
/**
* Determine what DTD mode (and thus what layout nsCompatibility mode)
* to use for this document based on the first chunk of data received
* from the network (each parsercontext can have its own mode). (No,
* this is not an optimal solution -- we really don't need to know until
* after we've received the DOCTYPE, and this could easily be part of
* the regular parsing process if the parser were designed in a way that
* made such modifications easy.)
*/
// Parse the PS production in the SGML spec (excluding the part dealing
// with entity references) starting at theIndex into theBuffer, and
// return the first index after the end of the production.
static int32_t
ParsePS(const nsString& aBuffer, int32_t aIndex)
{
for (;;) {
char16_t ch = aBuffer.CharAt(aIndex);
if ((ch == char16_t(' ')) || (ch == char16_t('\t')) ||
(ch == char16_t('\n')) || (ch == char16_t('\r'))) {
++aIndex;
} else if (ch == char16_t('-')) {
int32_t tmpIndex;
if (aBuffer.CharAt(aIndex+1) == char16_t('-') &&
kNotFound != (tmpIndex=aBuffer.Find("--",false,aIndex+2,-1))) {
aIndex = tmpIndex + 2;
} else {
return aIndex;
}
} else {
return aIndex;
}
}
}
#define PARSE_DTD_HAVE_DOCTYPE (1<<0)
#define PARSE_DTD_HAVE_PUBLIC_ID (1<<1)
#define PARSE_DTD_HAVE_SYSTEM_ID (1<<2)
#define PARSE_DTD_HAVE_INTERNAL_SUBSET (1<<3)
// return true on success (includes not present), false on failure
static bool
ParseDocTypeDecl(const nsString &aBuffer,
int32_t *aResultFlags,
nsString &aPublicID,
nsString &aSystemID)
{
bool haveDoctype = false;
*aResultFlags = 0;
// Skip through any comments and processing instructions
// The PI-skipping is a bit of a hack.
int32_t theIndex = 0;
do {
theIndex = aBuffer.FindChar('<', theIndex);
if (theIndex == kNotFound) break;
char16_t nextChar = aBuffer.CharAt(theIndex+1);
if (nextChar == char16_t('!')) {
int32_t tmpIndex = theIndex + 2;
if (kNotFound !=
(theIndex=aBuffer.Find("DOCTYPE", true, tmpIndex, 0))) {
haveDoctype = true;
theIndex += 7; // skip "DOCTYPE"
break;
}
theIndex = ParsePS(aBuffer, tmpIndex);
theIndex = aBuffer.FindChar('>', theIndex);
} else if (nextChar == char16_t('?')) {
theIndex = aBuffer.FindChar('>', theIndex);
} else {
break;
}
} while (theIndex != kNotFound);
if (!haveDoctype)
return true;
*aResultFlags |= PARSE_DTD_HAVE_DOCTYPE;
theIndex = ParsePS(aBuffer, theIndex);
theIndex = aBuffer.Find("HTML", true, theIndex, 0);
if (kNotFound == theIndex)
return false;
theIndex = ParsePS(aBuffer, theIndex+4);
int32_t tmpIndex = aBuffer.Find("PUBLIC", true, theIndex, 0);
if (kNotFound != tmpIndex) {
theIndex = ParsePS(aBuffer, tmpIndex+6);
// We get here only if we've read <!DOCTYPE HTML PUBLIC
// (not case sensitive) possibly with comments within.
// Now find the beginning and end of the public identifier
// and the system identifier (if present).
char16_t lit = aBuffer.CharAt(theIndex);
if ((lit != char16_t('\"')) && (lit != char16_t('\'')))
return false;
// Start is the first character, excluding the quote, and End is
// the final quote, so there are (end-start) characters.
int32_t PublicIDStart = theIndex + 1;
int32_t PublicIDEnd = aBuffer.FindChar(lit, PublicIDStart);
if (kNotFound == PublicIDEnd)
return false;
theIndex = ParsePS(aBuffer, PublicIDEnd + 1);
char16_t next = aBuffer.CharAt(theIndex);
if (next == char16_t('>')) {
// There was a public identifier, but no system
// identifier,
// so do nothing.
// This is needed to avoid the else at the end, and it's
// also the most common case.
} else if ((next == char16_t('\"')) ||
(next == char16_t('\''))) {
// We found a system identifier.
*aResultFlags |= PARSE_DTD_HAVE_SYSTEM_ID;
int32_t SystemIDStart = theIndex + 1;
int32_t SystemIDEnd = aBuffer.FindChar(next, SystemIDStart);
if (kNotFound == SystemIDEnd)
return false;
aSystemID =
Substring(aBuffer, SystemIDStart, SystemIDEnd - SystemIDStart);
} else if (next == char16_t('[')) {
// We found an internal subset.
*aResultFlags |= PARSE_DTD_HAVE_INTERNAL_SUBSET;
} else {
// Something's wrong.
return false;
}
// Since a public ID is a minimum literal, we must trim
// and collapse whitespace
aPublicID = Substring(aBuffer, PublicIDStart, PublicIDEnd - PublicIDStart);
aPublicID.CompressWhitespace(true, true);
*aResultFlags |= PARSE_DTD_HAVE_PUBLIC_ID;
} else {
tmpIndex=aBuffer.Find("SYSTEM", true, theIndex, 0);
if (kNotFound != tmpIndex) {
// DOCTYPES with system ID but no Public ID
*aResultFlags |= PARSE_DTD_HAVE_SYSTEM_ID;
theIndex = ParsePS(aBuffer, tmpIndex+6);
char16_t next = aBuffer.CharAt(theIndex);
if (next != char16_t('\"') && next != char16_t('\''))
return false;
int32_t SystemIDStart = theIndex + 1;
int32_t SystemIDEnd = aBuffer.FindChar(next, SystemIDStart);
if (kNotFound == SystemIDEnd)
return false;
aSystemID =
Substring(aBuffer, SystemIDStart, SystemIDEnd - SystemIDStart);
theIndex = ParsePS(aBuffer, SystemIDEnd + 1);
}
char16_t nextChar = aBuffer.CharAt(theIndex);
if (nextChar == char16_t('['))
*aResultFlags |= PARSE_DTD_HAVE_INTERNAL_SUBSET;
else if (nextChar != char16_t('>'))
return false;
}
return true;
}
struct PubIDInfo
{
enum eMode {
eQuirks, /* always quirks mode, unless there's an internal subset */
eAlmostStandards,/* eCompatibility_AlmostStandards */
eFullStandards /* eCompatibility_FullStandards */
/*
* public IDs that should trigger strict mode are not listed
* since we want all future public IDs to trigger strict mode as
* well
*/
};
const char* name;
eMode mode_if_no_sysid;
eMode mode_if_sysid;
};
#define ELEMENTS_OF(array_) (sizeof(array_)/sizeof(array_[0]))
// These must be in nsCRT::strcmp order so binary-search can be used.
// This is verified, |#ifdef DEBUG|, below.
// Even though public identifiers should be case sensitive, we will do
// all comparisons after converting to lower case in order to do
// case-insensitive comparison since there are a number of existing web
// sites that use the incorrect case. Therefore all of the public
// identifiers below are in lower case (with the correct case following,
// in comments). The case is verified, |#ifdef DEBUG|, below.
static const PubIDInfo kPublicIDs[] = {
{"+//silmaril//dtd html pro v0r11 19970101//en" /* "+//Silmaril//dtd html Pro v0r11 19970101//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
{"-//advasoft ltd//dtd html 3.0 aswedit + extensions//en" /* "-//AdvaSoft Ltd//DTD HTML 3.0 asWedit + extensions//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
{"-//as//dtd html 3.0 aswedit + extensions//en" /* "-//AS//DTD HTML 3.0 asWedit + extensions//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
{"-//ietf//dtd html 2.0 level 1//en" /* "-//IETF//DTD HTML 2.0 Level 1//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
{"-//ietf//dtd html 2.0 level 2//en" /* "-//IETF//DTD HTML 2.0 Level 2//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
{"-//ietf//dtd html 2.0 strict level 1//en" /* "-//IETF//DTD HTML 2.0 Strict Level 1//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
{"-//ietf//dtd html 2.0 strict level 2//en" /* "-//IETF//DTD HTML 2.0 Strict Level 2//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
{"-//ietf//dtd html 2.0 strict//en" /* "-//IETF//DTD HTML 2.0 Strict//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
{"-//ietf//dtd html 2.0//en" /* "-//IETF//DTD HTML 2.0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
{"-//ietf//dtd html 2.1e//en" /* "-//IETF//DTD HTML 2.1E//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
{"-//ietf//dtd html 3.0//en" /* "-//IETF//DTD HTML 3.0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
{"-//ietf//dtd html 3.0//en//" /* "-//IETF//DTD HTML 3.0//EN//" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
{"-//ietf//dtd html 3.2 final//en" /* "-//IETF//DTD HTML 3.2 Final//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
{"-//ietf//dtd html 3.2//en" /* "-//IETF//DTD HTML 3.2//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
{"-//ietf//dtd html 3//en" /* "-//IETF//DTD HTML 3//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
{"-//ietf//dtd html level 0//en" /* "-//IETF//DTD HTML Level 0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
{"-//ietf//dtd html level 0//en//2.0" /* "-//IETF//DTD HTML Level 0//EN//2.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
{"-//ietf//dtd html level 1//en" /* "-//IETF//DTD HTML Level 1//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
{"-//ietf//dtd html level 1//en//2.0" /* "-//IETF//DTD HTML Level 1//EN//2.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
{"-//ietf//dtd html level 2//en" /* "-//IETF//DTD HTML Level 2//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
{"-//ietf//dtd html level 2//en//2.0" /* "-//IETF//DTD HTML Level 2//EN//2.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
{"-//ietf//dtd html level 3//en" /* "-//IETF//DTD HTML Level 3//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
{"-//ietf//dtd html level 3//en//3.0" /* "-//IETF//DTD HTML Level 3//EN//3.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
{"-//ietf//dtd html strict level 0//en" /* "-//IETF//DTD HTML Strict Level 0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
{"-//ietf//dtd html strict level 0//en//2.0" /* "-//IETF//DTD HTML Strict Level 0//EN//2.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
{"-//ietf//dtd html strict level 1//en" /* "-//IETF//DTD HTML Strict Level 1//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
{"-//ietf//dtd html strict level 1//en//2.0" /* "-//IETF//DTD HTML Strict Level 1//EN//2.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
{"-//ietf//dtd html strict level 2//en" /* "-//IETF//DTD HTML Strict Level 2//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
{"-//ietf//dtd html strict level 2//en//2.0" /* "-//IETF//DTD HTML Strict Level 2//EN//2.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
{"-//ietf//dtd html strict level 3//en" /* "-//IETF//DTD HTML Strict Level 3//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
{"-//ietf//dtd html strict level 3//en//3.0" /* "-//IETF//DTD HTML Strict Level 3//EN//3.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
{"-//ietf//dtd html strict//en" /* "-//IETF//DTD HTML Strict//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
{"-//ietf//dtd html strict//en//2.0" /* "-//IETF//DTD HTML Strict//EN//2.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
{"-//ietf//dtd html strict//en//3.0" /* "-//IETF//DTD HTML Strict//EN//3.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
{"-//ietf//dtd html//en" /* "-//IETF//DTD HTML//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
{"-//ietf//dtd html//en//2.0" /* "-//IETF//DTD HTML//EN//2.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
{"-//ietf//dtd html//en//3.0" /* "-//IETF//DTD HTML//EN//3.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
{"-//metrius//dtd metrius presentational//en" /* "-//Metrius//DTD Metrius Presentational//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
{"-//microsoft//dtd internet explorer 2.0 html strict//en" /* "-//Microsoft//DTD Internet Explorer 2.0 HTML Strict//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
{"-//microsoft//dtd internet explorer 2.0 html//en" /* "-//Microsoft//DTD Internet Explorer 2.0 HTML//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
{"-//microsoft//dtd internet explorer 2.0 tables//en" /* "-//Microsoft//DTD Internet Explorer 2.0 Tables//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
{"-//microsoft//dtd internet explorer 3.0 html strict//en" /* "-//Microsoft//DTD Internet Explorer 3.0 HTML Strict//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
{"-//microsoft//dtd internet explorer 3.0 html//en" /* "-//Microsoft//DTD Internet Explorer 3.0 HTML//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
{"-//microsoft//dtd internet explorer 3.0 tables//en" /* "-//Microsoft//DTD Internet Explorer 3.0 Tables//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
{"-//netscape comm. corp.//dtd html//en" /* "-//Netscape Comm. Corp.//DTD HTML//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
{"-//netscape comm. corp.//dtd strict html//en" /* "-//Netscape Comm. Corp.//DTD Strict HTML//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
{"-//o'reilly and associates//dtd html 2.0//en" /* "-//O'Reilly and Associates//DTD HTML 2.0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
{"-//o'reilly and associates//dtd html extended 1.0//en" /* "-//O'Reilly and Associates//DTD HTML Extended 1.0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
{"-//o'reilly and associates//dtd html extended relaxed 1.0//en" /* "-//O'Reilly and Associates//DTD HTML Extended Relaxed 1.0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
{"-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//en" /* "-//SoftQuad Software//DTD HoTMetaL PRO 6.0::19990601::extensions to HTML 4.0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
{"-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//en" /* "-//SoftQuad//DTD HoTMetaL PRO 4.0::19971010::extensions to HTML 4.0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
{"-//spyglass//dtd html 2.0 extended//en" /* "-//Spyglass//DTD HTML 2.0 Extended//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
{"-//sq//dtd html 2.0 hotmetal + extensions//en" /* "-//SQ//DTD HTML 2.0 HoTMetaL + extensions//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
{"-//sun microsystems corp.//dtd hotjava html//en" /* "-//Sun Microsystems Corp.//DTD HotJava HTML//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
{"-//sun microsystems corp.//dtd hotjava strict html//en" /* "-//Sun Microsystems Corp.//DTD HotJava Strict HTML//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
{"-//w3c//dtd html 3 1995-03-24//en" /* "-//W3C//DTD HTML 3 1995-03-24//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
{"-//w3c//dtd html 3.2 draft//en" /* "-//W3C//DTD HTML 3.2 Draft//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
{"-//w3c//dtd html 3.2 final//en" /* "-//W3C//DTD HTML 3.2 Final//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
{"-//w3c//dtd html 3.2//en" /* "-//W3C//DTD HTML 3.2//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
{"-//w3c//dtd html 3.2s draft//en" /* "-//W3C//DTD HTML 3.2S Draft//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
{"-//w3c//dtd html 4.0 frameset//en" /* "-//W3C//DTD HTML 4.0 Frameset//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
{"-//w3c//dtd html 4.0 transitional//en" /* "-//W3C//DTD HTML 4.0 Transitional//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
{"-//w3c//dtd html 4.01 frameset//en" /* "-//W3C//DTD HTML 4.01 Frameset//EN" */, PubIDInfo::eQuirks, PubIDInfo::eAlmostStandards},
{"-//w3c//dtd html 4.01 transitional//en" /* "-//W3C//DTD HTML 4.01 Transitional//EN" */, PubIDInfo::eQuirks, PubIDInfo::eAlmostStandards},
{"-//w3c//dtd html experimental 19960712//en" /* "-//W3C//DTD HTML Experimental 19960712//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
{"-//w3c//dtd html experimental 970421//en" /* "-//W3C//DTD HTML Experimental 970421//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
{"-//w3c//dtd w3 html//en" /* "-//W3C//DTD W3 HTML//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
{"-//w3c//dtd xhtml 1.0 frameset//en" /* "-//W3C//DTD XHTML 1.0 Frameset//EN" */, PubIDInfo::eAlmostStandards, PubIDInfo::eAlmostStandards},
{"-//w3c//dtd xhtml 1.0 transitional//en" /* "-//W3C//DTD XHTML 1.0 Transitional//EN" */, PubIDInfo::eAlmostStandards, PubIDInfo::eAlmostStandards},
{"-//w3o//dtd w3 html 3.0//en" /* "-//W3O//DTD W3 HTML 3.0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
{"-//w3o//dtd w3 html 3.0//en//" /* "-//W3O//DTD W3 HTML 3.0//EN//" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
{"-//w3o//dtd w3 html strict 3.0//en//" /* "-//W3O//DTD W3 HTML Strict 3.0//EN//" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
{"-//webtechs//dtd mozilla html 2.0//en" /* "-//WebTechs//DTD Mozilla HTML 2.0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
{"-//webtechs//dtd mozilla html//en" /* "-//WebTechs//DTD Mozilla HTML//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
{"-/w3c/dtd html 4.0 transitional/en" /* "-/W3C/DTD HTML 4.0 Transitional/EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
{"html" /* "HTML" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
};
#ifdef DEBUG
static void
VerifyPublicIDs()
{
static bool gVerified = false;
if (!gVerified) {
gVerified = true;
uint32_t i;
for (i = 0; i < ELEMENTS_OF(kPublicIDs) - 1; ++i) {
if (nsCRT::strcmp(kPublicIDs[i].name, kPublicIDs[i+1].name) >= 0) {
NS_NOTREACHED("doctypes out of order");
printf("Doctypes %s and %s out of order.\n",
kPublicIDs[i].name, kPublicIDs[i+1].name);
}
}
for (i = 0; i < ELEMENTS_OF(kPublicIDs); ++i) {
nsAutoCString lcPubID(kPublicIDs[i].name);
ToLowerCase(lcPubID);
if (nsCRT::strcmp(kPublicIDs[i].name, lcPubID.get()) != 0) {
NS_NOTREACHED("doctype not lower case");
printf("Doctype %s not lower case.\n", kPublicIDs[i].name);
}
}
}
}
#endif
namespace {
struct PublicIdComparator
{
const nsAutoCString& mPublicId;
explicit PublicIdComparator(const nsAutoCString& aPublicId)
: mPublicId(aPublicId) {}
int operator()(const PubIDInfo& aInfo) const {
return nsCRT::strcmp(mPublicId.get(), aInfo.name);
}
};
} // namespace
static void
DetermineHTMLParseMode(const nsString& aBuffer,
nsDTDMode& aParseMode,
eParserDocType& aDocType)
{
#ifdef DEBUG
VerifyPublicIDs();
#endif
int32_t resultFlags;
nsAutoString publicIDUCS2, sysIDUCS2;
if (ParseDocTypeDecl(aBuffer, &resultFlags, publicIDUCS2, sysIDUCS2)) {
if (!(resultFlags & PARSE_DTD_HAVE_DOCTYPE)) {
// no DOCTYPE
aParseMode = eDTDMode_quirks;
aDocType = eHTML_Quirks;
} else if ((resultFlags & PARSE_DTD_HAVE_INTERNAL_SUBSET) ||
!(resultFlags & PARSE_DTD_HAVE_PUBLIC_ID)) {
// A doctype with an internal subset is always full_standards.
// A doctype without a public ID is always full_standards.
aDocType = eHTML_Strict;
aParseMode = eDTDMode_full_standards;
// Special hack for IBM's custom DOCTYPE.
if (!(resultFlags & PARSE_DTD_HAVE_INTERNAL_SUBSET) &&
sysIDUCS2.EqualsLiteral(
"http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd")) {
aParseMode = eDTDMode_quirks;
aDocType = eHTML_Quirks;
}
} else {
// We have to check our list of public IDs to see what to do.
// Yes, we want UCS2 to ASCII lossy conversion.
nsAutoCString publicID;
publicID.AssignWithConversion(publicIDUCS2);
// See comment above definition of kPublicIDs about case
// sensitivity.
ToLowerCase(publicID);
// Binary search to see if we can find the correct public ID.
size_t index;
bool found = BinarySearchIf(kPublicIDs, 0, ArrayLength(kPublicIDs),
PublicIdComparator(publicID), &index);
if (!found) {
// The DOCTYPE is not in our list, so it must be full_standards.
aParseMode = eDTDMode_full_standards;
aDocType = eHTML_Strict;
return;
}
switch ((resultFlags & PARSE_DTD_HAVE_SYSTEM_ID)
? kPublicIDs[index].mode_if_sysid
: kPublicIDs[index].mode_if_no_sysid)
{
case PubIDInfo::eQuirks:
aParseMode = eDTDMode_quirks;
aDocType = eHTML_Quirks;
break;
case PubIDInfo::eAlmostStandards:
aParseMode = eDTDMode_almost_standards;
aDocType = eHTML_Strict;
break;
case PubIDInfo::eFullStandards:
aParseMode = eDTDMode_full_standards;
aDocType = eHTML_Strict;
break;
default:
NS_NOTREACHED("no other cases!");
}
}
} else {
// badly formed DOCTYPE -> quirks
aParseMode = eDTDMode_quirks;
aDocType = eHTML_Quirks;
}
}
static void
DetermineParseMode(const nsString& aBuffer, nsDTDMode& aParseMode,
eParserDocType& aDocType, const nsACString& aMimeType)
{
if (aMimeType.EqualsLiteral(TEXT_HTML)) {
DetermineHTMLParseMode(aBuffer, aParseMode, aDocType);
} else if (nsContentUtils::IsPlainTextType(aMimeType)) {
aDocType = ePlainText;
aParseMode = eDTDMode_quirks;
} else { // Some form of XML
aDocType = eXML;
aParseMode = eDTDMode_full_standards;
}
}
static nsIDTD*
FindSuitableDTD(CParserContext& aParserContext)
{
@ -849,15 +441,14 @@ nsParser::WillBuildModel(nsString& aFilename)
if (eDTDMode_unknown == mParserContext->mDTDMode ||
eDTDMode_autodetect == mParserContext->mDTDMode) {
char16_t buf[1025];
nsFixedString theBuffer(buf, 1024, 0);
// Grab 1024 characters, starting at the first non-whitespace
// character, to look for the doctype in.
mParserContext->mScanner->Peek(theBuffer, 1024, mParserContext->mScanner->FirstNonWhitespacePosition());
DetermineParseMode(theBuffer, mParserContext->mDTDMode,
mParserContext->mDocType, mParserContext->mMimeType);
}
if (mIsAboutBlank) {
mParserContext->mDTDMode = eDTDMode_quirks;
mParserContext->mDocType = eHTML_Quirks;
} else {
mParserContext->mDTDMode = eDTDMode_full_standards;
mParserContext->mDocType = eXML;
}
} // else XML fragment with nested parser context
NS_ASSERTION(!mDTD || !mParserContext->mPrevContext,
"Clobbering DTD for non-root parser context!");

View File

@ -329,70 +329,6 @@ nsresult nsScanner::GetChar(char16_t& aChar) {
return NS_OK;
}
/**
* peek ahead to consume next char from scanner's internal
* input buffer
*
* @update gess 3/25/98
* @param
* @return
*/
nsresult nsScanner::Peek(char16_t& aChar, uint32_t aOffset) {
aChar = 0;
if (!mSlidingBuffer || mCurrentPosition == mEndPosition) {
return NS_ERROR_HTMLPARSER_EOF;
}
if (aOffset > 0) {
if (mCountRemaining <= aOffset)
return NS_ERROR_HTMLPARSER_EOF;
nsScannerIterator pos = mCurrentPosition;
pos.advance(aOffset);
aChar=*pos;
}
else {
aChar=*mCurrentPosition;
}
return NS_OK;
}
nsresult nsScanner::Peek(nsAString& aStr, int32_t aNumChars, int32_t aOffset)
{
if (!mSlidingBuffer || mCurrentPosition == mEndPosition) {
return NS_ERROR_HTMLPARSER_EOF;
}
nsScannerIterator start, end;
start = mCurrentPosition;
if ((int32_t)mCountRemaining <= aOffset) {
return NS_ERROR_HTMLPARSER_EOF;
}
if (aOffset > 0) {
start.advance(aOffset);
}
if (mCountRemaining < uint32_t(aNumChars + aOffset)) {
end = mEndPosition;
}
else {
end = start;
end.advance(aNumChars);
}
if (!CopyUnicodeTo(start, end, aStr)) {
return NS_ERROR_OUT_OF_MEMORY;
}
return NS_OK;
}
void nsScanner::BindSubstring(nsScannerSubstring& aSubstring, const nsScannerIterator& aStart, const nsScannerIterator& aEnd)
{
aSubstring.Rebind(*mSlidingBuffer, aStart, aEnd);

View File

@ -60,18 +60,6 @@ class nsScanner {
*/
nsresult GetChar(char16_t& ch);
/**
* peek ahead to consume next char from scanner's internal
* input buffer
*
* @update gess 3/25/98
* @param ch is the char to accept new value
* @return error code reflecting read status
*/
nsresult Peek(char16_t& ch, uint32_t aOffset=0);
nsresult Peek(nsAString& aStr, int32_t aNumChars, int32_t aOffset = 0);
/**
* Records current offset position in input stream. This allows us
* to back up to this point if the need should arise, such as when

View File

@ -6,6 +6,7 @@
#include <stdlib.h>
#include "nsScannerString.h"
#include "mozilla/CheckedInt.h"
/**
@ -472,7 +473,13 @@ CopyUnicodeTo( const nsScannerIterator& aSrcStart,
nsAString& aDest )
{
nsAString::iterator writer;
if (!aDest.SetLength(Distance(aSrcStart, aSrcEnd), mozilla::fallible)) {
mozilla::CheckedInt<nsAString::size_type> distance(Distance(aSrcStart, aSrcEnd));
if (!distance.isValid()) {
return false; // overflow detected
}
if (!aDest.SetLength(distance.value(), mozilla::fallible)) {
aDest.Truncate();
return false; // out of memory
}
@ -505,8 +512,14 @@ AppendUnicodeTo( const nsScannerIterator& aSrcStart,
nsAString& aDest )
{
nsAString::iterator writer;
uint32_t oldLength = aDest.Length();
if (!aDest.SetLength(oldLength + Distance(aSrcStart, aSrcEnd), mozilla::fallible))
const nsAString::size_type oldLength = aDest.Length();
CheckedInt<nsAString::size_type> newLen(Distance(aSrcStart, aSrcEnd));
newLen += oldLength;
if (!newLen.isValid()) {
return false; // overflow detected
}
if (!aDest.SetLength(newLen.value(), mozilla::fallible))
return false; // out of memory
aDest.BeginWriting(writer).advance(oldLength);
nsScannerIterator fromBegin(aSrcStart);