diff --git a/parser/htmlparser/nsParser.cpp b/parser/htmlparser/nsParser.cpp
index 5eaf3b0a3107..4f2e41fbcdae 100644
--- a/parser/htmlparser/nsParser.cpp
+++ b/parser/htmlparser/nsParser.cpp
@@ -335,414 +335,6 @@ nsParser::GetContentSink()
return mSink;
}
-/**
- * Determine what DTD mode (and thus what layout nsCompatibility mode)
- * to use for this document based on the first chunk of data received
- * from the network (each parsercontext can have its own mode). (No,
- * this is not an optimal solution -- we really don't need to know until
- * after we've received the DOCTYPE, and this could easily be part of
- * the regular parsing process if the parser were designed in a way that
- * made such modifications easy.)
- */
-
-// Parse the PS production in the SGML spec (excluding the part dealing
-// with entity references) starting at theIndex into theBuffer, and
-// return the first index after the end of the production.
-static int32_t
-ParsePS(const nsString& aBuffer, int32_t aIndex)
-{
- for (;;) {
- char16_t ch = aBuffer.CharAt(aIndex);
- if ((ch == char16_t(' ')) || (ch == char16_t('\t')) ||
- (ch == char16_t('\n')) || (ch == char16_t('\r'))) {
- ++aIndex;
- } else if (ch == char16_t('-')) {
- int32_t tmpIndex;
- if (aBuffer.CharAt(aIndex+1) == char16_t('-') &&
- kNotFound != (tmpIndex=aBuffer.Find("--",false,aIndex+2,-1))) {
- aIndex = tmpIndex + 2;
- } else {
- return aIndex;
- }
- } else {
- return aIndex;
- }
- }
-}
-
-#define PARSE_DTD_HAVE_DOCTYPE (1<<0)
-#define PARSE_DTD_HAVE_PUBLIC_ID (1<<1)
-#define PARSE_DTD_HAVE_SYSTEM_ID (1<<2)
-#define PARSE_DTD_HAVE_INTERNAL_SUBSET (1<<3)
-
-// return true on success (includes not present), false on failure
-static bool
-ParseDocTypeDecl(const nsString &aBuffer,
- int32_t *aResultFlags,
- nsString &aPublicID,
- nsString &aSystemID)
-{
- bool haveDoctype = false;
- *aResultFlags = 0;
-
- // Skip through any comments and processing instructions
- // The PI-skipping is a bit of a hack.
- int32_t theIndex = 0;
- do {
- theIndex = aBuffer.FindChar('<', theIndex);
- if (theIndex == kNotFound) break;
- char16_t nextChar = aBuffer.CharAt(theIndex+1);
- if (nextChar == char16_t('!')) {
- int32_t tmpIndex = theIndex + 2;
- if (kNotFound !=
- (theIndex=aBuffer.Find("DOCTYPE", true, tmpIndex, 0))) {
- haveDoctype = true;
- theIndex += 7; // skip "DOCTYPE"
- break;
- }
- theIndex = ParsePS(aBuffer, tmpIndex);
- theIndex = aBuffer.FindChar('>', theIndex);
- } else if (nextChar == char16_t('?')) {
- theIndex = aBuffer.FindChar('>', theIndex);
- } else {
- break;
- }
- } while (theIndex != kNotFound);
-
- if (!haveDoctype)
- return true;
- *aResultFlags |= PARSE_DTD_HAVE_DOCTYPE;
-
- theIndex = ParsePS(aBuffer, theIndex);
- theIndex = aBuffer.Find("HTML", true, theIndex, 0);
- if (kNotFound == theIndex)
- return false;
- theIndex = ParsePS(aBuffer, theIndex+4);
- int32_t tmpIndex = aBuffer.Find("PUBLIC", true, theIndex, 0);
-
- if (kNotFound != tmpIndex) {
- theIndex = ParsePS(aBuffer, tmpIndex+6);
-
- // We get here only if we've read ')) {
- // There was a public identifier, but no system
- // identifier,
- // so do nothing.
- // This is needed to avoid the else at the end, and it's
- // also the most common case.
- } else if ((next == char16_t('\"')) ||
- (next == char16_t('\''))) {
- // We found a system identifier.
- *aResultFlags |= PARSE_DTD_HAVE_SYSTEM_ID;
- int32_t SystemIDStart = theIndex + 1;
- int32_t SystemIDEnd = aBuffer.FindChar(next, SystemIDStart);
- if (kNotFound == SystemIDEnd)
- return false;
- aSystemID =
- Substring(aBuffer, SystemIDStart, SystemIDEnd - SystemIDStart);
- } else if (next == char16_t('[')) {
- // We found an internal subset.
- *aResultFlags |= PARSE_DTD_HAVE_INTERNAL_SUBSET;
- } else {
- // Something's wrong.
- return false;
- }
-
- // Since a public ID is a minimum literal, we must trim
- // and collapse whitespace
- aPublicID = Substring(aBuffer, PublicIDStart, PublicIDEnd - PublicIDStart);
- aPublicID.CompressWhitespace(true, true);
- *aResultFlags |= PARSE_DTD_HAVE_PUBLIC_ID;
- } else {
- tmpIndex=aBuffer.Find("SYSTEM", true, theIndex, 0);
- if (kNotFound != tmpIndex) {
- // DOCTYPES with system ID but no Public ID
- *aResultFlags |= PARSE_DTD_HAVE_SYSTEM_ID;
-
- theIndex = ParsePS(aBuffer, tmpIndex+6);
- char16_t next = aBuffer.CharAt(theIndex);
- if (next != char16_t('\"') && next != char16_t('\''))
- return false;
-
- int32_t SystemIDStart = theIndex + 1;
- int32_t SystemIDEnd = aBuffer.FindChar(next, SystemIDStart);
-
- if (kNotFound == SystemIDEnd)
- return false;
- aSystemID =
- Substring(aBuffer, SystemIDStart, SystemIDEnd - SystemIDStart);
- theIndex = ParsePS(aBuffer, SystemIDEnd + 1);
- }
-
- char16_t nextChar = aBuffer.CharAt(theIndex);
- if (nextChar == char16_t('['))
- *aResultFlags |= PARSE_DTD_HAVE_INTERNAL_SUBSET;
- else if (nextChar != char16_t('>'))
- return false;
- }
- return true;
-}
-
-struct PubIDInfo
-{
- enum eMode {
- eQuirks, /* always quirks mode, unless there's an internal subset */
- eAlmostStandards,/* eCompatibility_AlmostStandards */
- eFullStandards /* eCompatibility_FullStandards */
- /*
- * public IDs that should trigger strict mode are not listed
- * since we want all future public IDs to trigger strict mode as
- * well
- */
- };
-
- const char* name;
- eMode mode_if_no_sysid;
- eMode mode_if_sysid;
-};
-
-#define ELEMENTS_OF(array_) (sizeof(array_)/sizeof(array_[0]))
-
-// These must be in nsCRT::strcmp order so binary-search can be used.
-// This is verified, |#ifdef DEBUG|, below.
-
-// Even though public identifiers should be case sensitive, we will do
-// all comparisons after converting to lower case in order to do
-// case-insensitive comparison since there are a number of existing web
-// sites that use the incorrect case. Therefore all of the public
-// identifiers below are in lower case (with the correct case following,
-// in comments). The case is verified, |#ifdef DEBUG|, below.
-static const PubIDInfo kPublicIDs[] = {
- {"+//silmaril//dtd html pro v0r11 19970101//en" /* "+//Silmaril//dtd html Pro v0r11 19970101//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
- {"-//advasoft ltd//dtd html 3.0 aswedit + extensions//en" /* "-//AdvaSoft Ltd//DTD HTML 3.0 asWedit + extensions//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
- {"-//as//dtd html 3.0 aswedit + extensions//en" /* "-//AS//DTD HTML 3.0 asWedit + extensions//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
- {"-//ietf//dtd html 2.0 level 1//en" /* "-//IETF//DTD HTML 2.0 Level 1//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
- {"-//ietf//dtd html 2.0 level 2//en" /* "-//IETF//DTD HTML 2.0 Level 2//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
- {"-//ietf//dtd html 2.0 strict level 1//en" /* "-//IETF//DTD HTML 2.0 Strict Level 1//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
- {"-//ietf//dtd html 2.0 strict level 2//en" /* "-//IETF//DTD HTML 2.0 Strict Level 2//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
- {"-//ietf//dtd html 2.0 strict//en" /* "-//IETF//DTD HTML 2.0 Strict//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
- {"-//ietf//dtd html 2.0//en" /* "-//IETF//DTD HTML 2.0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
- {"-//ietf//dtd html 2.1e//en" /* "-//IETF//DTD HTML 2.1E//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
- {"-//ietf//dtd html 3.0//en" /* "-//IETF//DTD HTML 3.0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
- {"-//ietf//dtd html 3.0//en//" /* "-//IETF//DTD HTML 3.0//EN//" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
- {"-//ietf//dtd html 3.2 final//en" /* "-//IETF//DTD HTML 3.2 Final//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
- {"-//ietf//dtd html 3.2//en" /* "-//IETF//DTD HTML 3.2//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
- {"-//ietf//dtd html 3//en" /* "-//IETF//DTD HTML 3//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
- {"-//ietf//dtd html level 0//en" /* "-//IETF//DTD HTML Level 0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
- {"-//ietf//dtd html level 0//en//2.0" /* "-//IETF//DTD HTML Level 0//EN//2.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
- {"-//ietf//dtd html level 1//en" /* "-//IETF//DTD HTML Level 1//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
- {"-//ietf//dtd html level 1//en//2.0" /* "-//IETF//DTD HTML Level 1//EN//2.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
- {"-//ietf//dtd html level 2//en" /* "-//IETF//DTD HTML Level 2//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
- {"-//ietf//dtd html level 2//en//2.0" /* "-//IETF//DTD HTML Level 2//EN//2.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
- {"-//ietf//dtd html level 3//en" /* "-//IETF//DTD HTML Level 3//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
- {"-//ietf//dtd html level 3//en//3.0" /* "-//IETF//DTD HTML Level 3//EN//3.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
- {"-//ietf//dtd html strict level 0//en" /* "-//IETF//DTD HTML Strict Level 0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
- {"-//ietf//dtd html strict level 0//en//2.0" /* "-//IETF//DTD HTML Strict Level 0//EN//2.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
- {"-//ietf//dtd html strict level 1//en" /* "-//IETF//DTD HTML Strict Level 1//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
- {"-//ietf//dtd html strict level 1//en//2.0" /* "-//IETF//DTD HTML Strict Level 1//EN//2.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
- {"-//ietf//dtd html strict level 2//en" /* "-//IETF//DTD HTML Strict Level 2//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
- {"-//ietf//dtd html strict level 2//en//2.0" /* "-//IETF//DTD HTML Strict Level 2//EN//2.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
- {"-//ietf//dtd html strict level 3//en" /* "-//IETF//DTD HTML Strict Level 3//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
- {"-//ietf//dtd html strict level 3//en//3.0" /* "-//IETF//DTD HTML Strict Level 3//EN//3.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
- {"-//ietf//dtd html strict//en" /* "-//IETF//DTD HTML Strict//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
- {"-//ietf//dtd html strict//en//2.0" /* "-//IETF//DTD HTML Strict//EN//2.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
- {"-//ietf//dtd html strict//en//3.0" /* "-//IETF//DTD HTML Strict//EN//3.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
- {"-//ietf//dtd html//en" /* "-//IETF//DTD HTML//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
- {"-//ietf//dtd html//en//2.0" /* "-//IETF//DTD HTML//EN//2.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
- {"-//ietf//dtd html//en//3.0" /* "-//IETF//DTD HTML//EN//3.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
- {"-//metrius//dtd metrius presentational//en" /* "-//Metrius//DTD Metrius Presentational//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
- {"-//microsoft//dtd internet explorer 2.0 html strict//en" /* "-//Microsoft//DTD Internet Explorer 2.0 HTML Strict//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
- {"-//microsoft//dtd internet explorer 2.0 html//en" /* "-//Microsoft//DTD Internet Explorer 2.0 HTML//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
- {"-//microsoft//dtd internet explorer 2.0 tables//en" /* "-//Microsoft//DTD Internet Explorer 2.0 Tables//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
- {"-//microsoft//dtd internet explorer 3.0 html strict//en" /* "-//Microsoft//DTD Internet Explorer 3.0 HTML Strict//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
- {"-//microsoft//dtd internet explorer 3.0 html//en" /* "-//Microsoft//DTD Internet Explorer 3.0 HTML//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
- {"-//microsoft//dtd internet explorer 3.0 tables//en" /* "-//Microsoft//DTD Internet Explorer 3.0 Tables//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
- {"-//netscape comm. corp.//dtd html//en" /* "-//Netscape Comm. Corp.//DTD HTML//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
- {"-//netscape comm. corp.//dtd strict html//en" /* "-//Netscape Comm. Corp.//DTD Strict HTML//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
- {"-//o'reilly and associates//dtd html 2.0//en" /* "-//O'Reilly and Associates//DTD HTML 2.0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
- {"-//o'reilly and associates//dtd html extended 1.0//en" /* "-//O'Reilly and Associates//DTD HTML Extended 1.0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
- {"-//o'reilly and associates//dtd html extended relaxed 1.0//en" /* "-//O'Reilly and Associates//DTD HTML Extended Relaxed 1.0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
- {"-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//en" /* "-//SoftQuad Software//DTD HoTMetaL PRO 6.0::19990601::extensions to HTML 4.0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
- {"-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//en" /* "-//SoftQuad//DTD HoTMetaL PRO 4.0::19971010::extensions to HTML 4.0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
- {"-//spyglass//dtd html 2.0 extended//en" /* "-//Spyglass//DTD HTML 2.0 Extended//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
- {"-//sq//dtd html 2.0 hotmetal + extensions//en" /* "-//SQ//DTD HTML 2.0 HoTMetaL + extensions//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
- {"-//sun microsystems corp.//dtd hotjava html//en" /* "-//Sun Microsystems Corp.//DTD HotJava HTML//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
- {"-//sun microsystems corp.//dtd hotjava strict html//en" /* "-//Sun Microsystems Corp.//DTD HotJava Strict HTML//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
- {"-//w3c//dtd html 3 1995-03-24//en" /* "-//W3C//DTD HTML 3 1995-03-24//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
- {"-//w3c//dtd html 3.2 draft//en" /* "-//W3C//DTD HTML 3.2 Draft//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
- {"-//w3c//dtd html 3.2 final//en" /* "-//W3C//DTD HTML 3.2 Final//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
- {"-//w3c//dtd html 3.2//en" /* "-//W3C//DTD HTML 3.2//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
- {"-//w3c//dtd html 3.2s draft//en" /* "-//W3C//DTD HTML 3.2S Draft//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
- {"-//w3c//dtd html 4.0 frameset//en" /* "-//W3C//DTD HTML 4.0 Frameset//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
- {"-//w3c//dtd html 4.0 transitional//en" /* "-//W3C//DTD HTML 4.0 Transitional//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
- {"-//w3c//dtd html 4.01 frameset//en" /* "-//W3C//DTD HTML 4.01 Frameset//EN" */, PubIDInfo::eQuirks, PubIDInfo::eAlmostStandards},
- {"-//w3c//dtd html 4.01 transitional//en" /* "-//W3C//DTD HTML 4.01 Transitional//EN" */, PubIDInfo::eQuirks, PubIDInfo::eAlmostStandards},
- {"-//w3c//dtd html experimental 19960712//en" /* "-//W3C//DTD HTML Experimental 19960712//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
- {"-//w3c//dtd html experimental 970421//en" /* "-//W3C//DTD HTML Experimental 970421//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
- {"-//w3c//dtd w3 html//en" /* "-//W3C//DTD W3 HTML//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
- {"-//w3c//dtd xhtml 1.0 frameset//en" /* "-//W3C//DTD XHTML 1.0 Frameset//EN" */, PubIDInfo::eAlmostStandards, PubIDInfo::eAlmostStandards},
- {"-//w3c//dtd xhtml 1.0 transitional//en" /* "-//W3C//DTD XHTML 1.0 Transitional//EN" */, PubIDInfo::eAlmostStandards, PubIDInfo::eAlmostStandards},
- {"-//w3o//dtd w3 html 3.0//en" /* "-//W3O//DTD W3 HTML 3.0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
- {"-//w3o//dtd w3 html 3.0//en//" /* "-//W3O//DTD W3 HTML 3.0//EN//" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
- {"-//w3o//dtd w3 html strict 3.0//en//" /* "-//W3O//DTD W3 HTML Strict 3.0//EN//" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
- {"-//webtechs//dtd mozilla html 2.0//en" /* "-//WebTechs//DTD Mozilla HTML 2.0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
- {"-//webtechs//dtd mozilla html//en" /* "-//WebTechs//DTD Mozilla HTML//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
- {"-/w3c/dtd html 4.0 transitional/en" /* "-/W3C/DTD HTML 4.0 Transitional/EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
- {"html" /* "HTML" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
-};
-
-#ifdef DEBUG
-static void
-VerifyPublicIDs()
-{
- static bool gVerified = false;
- if (!gVerified) {
- gVerified = true;
- uint32_t i;
- for (i = 0; i < ELEMENTS_OF(kPublicIDs) - 1; ++i) {
- if (nsCRT::strcmp(kPublicIDs[i].name, kPublicIDs[i+1].name) >= 0) {
- NS_NOTREACHED("doctypes out of order");
- printf("Doctypes %s and %s out of order.\n",
- kPublicIDs[i].name, kPublicIDs[i+1].name);
- }
- }
- for (i = 0; i < ELEMENTS_OF(kPublicIDs); ++i) {
- nsAutoCString lcPubID(kPublicIDs[i].name);
- ToLowerCase(lcPubID);
- if (nsCRT::strcmp(kPublicIDs[i].name, lcPubID.get()) != 0) {
- NS_NOTREACHED("doctype not lower case");
- printf("Doctype %s not lower case.\n", kPublicIDs[i].name);
- }
- }
- }
-}
-#endif
-
-namespace {
-
-struct PublicIdComparator
-{
- const nsAutoCString& mPublicId;
- explicit PublicIdComparator(const nsAutoCString& aPublicId)
- : mPublicId(aPublicId) {}
- int operator()(const PubIDInfo& aInfo) const {
- return nsCRT::strcmp(mPublicId.get(), aInfo.name);
- }
-};
-
-} // namespace
-
-static void
-DetermineHTMLParseMode(const nsString& aBuffer,
- nsDTDMode& aParseMode,
- eParserDocType& aDocType)
-{
-#ifdef DEBUG
- VerifyPublicIDs();
-#endif
- int32_t resultFlags;
- nsAutoString publicIDUCS2, sysIDUCS2;
- if (ParseDocTypeDecl(aBuffer, &resultFlags, publicIDUCS2, sysIDUCS2)) {
- if (!(resultFlags & PARSE_DTD_HAVE_DOCTYPE)) {
- // no DOCTYPE
- aParseMode = eDTDMode_quirks;
- aDocType = eHTML_Quirks;
- } else if ((resultFlags & PARSE_DTD_HAVE_INTERNAL_SUBSET) ||
- !(resultFlags & PARSE_DTD_HAVE_PUBLIC_ID)) {
- // A doctype with an internal subset is always full_standards.
- // A doctype without a public ID is always full_standards.
- aDocType = eHTML_Strict;
- aParseMode = eDTDMode_full_standards;
-
- // Special hack for IBM's custom DOCTYPE.
- if (!(resultFlags & PARSE_DTD_HAVE_INTERNAL_SUBSET) &&
- sysIDUCS2.EqualsLiteral(
- "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd")) {
- aParseMode = eDTDMode_quirks;
- aDocType = eHTML_Quirks;
- }
-
- } else {
- // We have to check our list of public IDs to see what to do.
- // Yes, we want UCS2 to ASCII lossy conversion.
- nsAutoCString publicID;
- publicID.AssignWithConversion(publicIDUCS2);
-
- // See comment above definition of kPublicIDs about case
- // sensitivity.
- ToLowerCase(publicID);
-
- // Binary search to see if we can find the correct public ID.
- size_t index;
- bool found = BinarySearchIf(kPublicIDs, 0, ArrayLength(kPublicIDs),
- PublicIdComparator(publicID), &index);
- if (!found) {
- // The DOCTYPE is not in our list, so it must be full_standards.
- aParseMode = eDTDMode_full_standards;
- aDocType = eHTML_Strict;
- return;
- }
-
- switch ((resultFlags & PARSE_DTD_HAVE_SYSTEM_ID)
- ? kPublicIDs[index].mode_if_sysid
- : kPublicIDs[index].mode_if_no_sysid)
- {
- case PubIDInfo::eQuirks:
- aParseMode = eDTDMode_quirks;
- aDocType = eHTML_Quirks;
- break;
- case PubIDInfo::eAlmostStandards:
- aParseMode = eDTDMode_almost_standards;
- aDocType = eHTML_Strict;
- break;
- case PubIDInfo::eFullStandards:
- aParseMode = eDTDMode_full_standards;
- aDocType = eHTML_Strict;
- break;
- default:
- NS_NOTREACHED("no other cases!");
- }
- }
- } else {
- // badly formed DOCTYPE -> quirks
- aParseMode = eDTDMode_quirks;
- aDocType = eHTML_Quirks;
- }
-}
-
-static void
-DetermineParseMode(const nsString& aBuffer, nsDTDMode& aParseMode,
- eParserDocType& aDocType, const nsACString& aMimeType)
-{
- if (aMimeType.EqualsLiteral(TEXT_HTML)) {
- DetermineHTMLParseMode(aBuffer, aParseMode, aDocType);
- } else if (nsContentUtils::IsPlainTextType(aMimeType)) {
- aDocType = ePlainText;
- aParseMode = eDTDMode_quirks;
- } else { // Some form of XML
- aDocType = eXML;
- aParseMode = eDTDMode_full_standards;
- }
-}
-
static nsIDTD*
FindSuitableDTD(CParserContext& aParserContext)
{
@@ -849,15 +441,14 @@ nsParser::WillBuildModel(nsString& aFilename)
if (eDTDMode_unknown == mParserContext->mDTDMode ||
eDTDMode_autodetect == mParserContext->mDTDMode) {
- char16_t buf[1025];
- nsFixedString theBuffer(buf, 1024, 0);
-
- // Grab 1024 characters, starting at the first non-whitespace
- // character, to look for the doctype in.
- mParserContext->mScanner->Peek(theBuffer, 1024, mParserContext->mScanner->FirstNonWhitespacePosition());
- DetermineParseMode(theBuffer, mParserContext->mDTDMode,
- mParserContext->mDocType, mParserContext->mMimeType);
- }
+ if (mIsAboutBlank) {
+ mParserContext->mDTDMode = eDTDMode_quirks;
+ mParserContext->mDocType = eHTML_Quirks;
+ } else {
+ mParserContext->mDTDMode = eDTDMode_full_standards;
+ mParserContext->mDocType = eXML;
+ }
+ } // else XML fragment with nested parser context
NS_ASSERTION(!mDTD || !mParserContext->mPrevContext,
"Clobbering DTD for non-root parser context!");
diff --git a/parser/htmlparser/nsScanner.cpp b/parser/htmlparser/nsScanner.cpp
index d1ccd8f2fc1a..cf087460c905 100644
--- a/parser/htmlparser/nsScanner.cpp
+++ b/parser/htmlparser/nsScanner.cpp
@@ -329,70 +329,6 @@ nsresult nsScanner::GetChar(char16_t& aChar) {
return NS_OK;
}
-
-/**
- * peek ahead to consume next char from scanner's internal
- * input buffer
- *
- * @update gess 3/25/98
- * @param
- * @return
- */
-nsresult nsScanner::Peek(char16_t& aChar, uint32_t aOffset) {
- aChar = 0;
-
- if (!mSlidingBuffer || mCurrentPosition == mEndPosition) {
- return NS_ERROR_HTMLPARSER_EOF;
- }
-
- if (aOffset > 0) {
- if (mCountRemaining <= aOffset)
- return NS_ERROR_HTMLPARSER_EOF;
-
- nsScannerIterator pos = mCurrentPosition;
- pos.advance(aOffset);
- aChar=*pos;
- }
- else {
- aChar=*mCurrentPosition;
- }
-
- return NS_OK;
-}
-
-nsresult nsScanner::Peek(nsAString& aStr, int32_t aNumChars, int32_t aOffset)
-{
- if (!mSlidingBuffer || mCurrentPosition == mEndPosition) {
- return NS_ERROR_HTMLPARSER_EOF;
- }
-
- nsScannerIterator start, end;
-
- start = mCurrentPosition;
-
- if ((int32_t)mCountRemaining <= aOffset) {
- return NS_ERROR_HTMLPARSER_EOF;
- }
-
- if (aOffset > 0) {
- start.advance(aOffset);
- }
-
- if (mCountRemaining < uint32_t(aNumChars + aOffset)) {
- end = mEndPosition;
- }
- else {
- end = start;
- end.advance(aNumChars);
- }
-
- if (!CopyUnicodeTo(start, end, aStr)) {
- return NS_ERROR_OUT_OF_MEMORY;
- }
-
- return NS_OK;
-}
-
void nsScanner::BindSubstring(nsScannerSubstring& aSubstring, const nsScannerIterator& aStart, const nsScannerIterator& aEnd)
{
aSubstring.Rebind(*mSlidingBuffer, aStart, aEnd);
diff --git a/parser/htmlparser/nsScanner.h b/parser/htmlparser/nsScanner.h
index cbf92291ee83..47d3de45dc43 100644
--- a/parser/htmlparser/nsScanner.h
+++ b/parser/htmlparser/nsScanner.h
@@ -60,18 +60,6 @@ class nsScanner {
*/
nsresult GetChar(char16_t& ch);
- /**
- * peek ahead to consume next char from scanner's internal
- * input buffer
- *
- * @update gess 3/25/98
- * @param ch is the char to accept new value
- * @return error code reflecting read status
- */
- nsresult Peek(char16_t& ch, uint32_t aOffset=0);
-
- nsresult Peek(nsAString& aStr, int32_t aNumChars, int32_t aOffset = 0);
-
/**
* Records current offset position in input stream. This allows us
* to back up to this point if the need should arise, such as when
diff --git a/parser/htmlparser/nsScannerString.cpp b/parser/htmlparser/nsScannerString.cpp
index 380a72e8f373..99ae311dc98e 100644
--- a/parser/htmlparser/nsScannerString.cpp
+++ b/parser/htmlparser/nsScannerString.cpp
@@ -6,6 +6,7 @@
#include
#include "nsScannerString.h"
+#include "mozilla/CheckedInt.h"
/**
@@ -472,7 +473,13 @@ CopyUnicodeTo( const nsScannerIterator& aSrcStart,
nsAString& aDest )
{
nsAString::iterator writer;
- if (!aDest.SetLength(Distance(aSrcStart, aSrcEnd), mozilla::fallible)) {
+
+ mozilla::CheckedInt distance(Distance(aSrcStart, aSrcEnd));
+ if (!distance.isValid()) {
+ return false; // overflow detected
+ }
+
+ if (!aDest.SetLength(distance.value(), mozilla::fallible)) {
aDest.Truncate();
return false; // out of memory
}
@@ -505,8 +512,14 @@ AppendUnicodeTo( const nsScannerIterator& aSrcStart,
nsAString& aDest )
{
nsAString::iterator writer;
- uint32_t oldLength = aDest.Length();
- if (!aDest.SetLength(oldLength + Distance(aSrcStart, aSrcEnd), mozilla::fallible))
+ const nsAString::size_type oldLength = aDest.Length();
+ CheckedInt newLen(Distance(aSrcStart, aSrcEnd));
+ newLen += oldLength;
+ if (!newLen.isValid()) {
+ return false; // overflow detected
+ }
+
+ if (!aDest.SetLength(newLen.value(), mozilla::fallible))
return false; // out of memory
aDest.BeginWriting(writer).advance(oldLength);
nsScannerIterator fromBegin(aSrcStart);