diff --git a/parser/htmlparser/nsParser.cpp b/parser/htmlparser/nsParser.cpp index 5eaf3b0a3107..4f2e41fbcdae 100644 --- a/parser/htmlparser/nsParser.cpp +++ b/parser/htmlparser/nsParser.cpp @@ -335,414 +335,6 @@ nsParser::GetContentSink() return mSink; } -/** - * Determine what DTD mode (and thus what layout nsCompatibility mode) - * to use for this document based on the first chunk of data received - * from the network (each parsercontext can have its own mode). (No, - * this is not an optimal solution -- we really don't need to know until - * after we've received the DOCTYPE, and this could easily be part of - * the regular parsing process if the parser were designed in a way that - * made such modifications easy.) - */ - -// Parse the PS production in the SGML spec (excluding the part dealing -// with entity references) starting at theIndex into theBuffer, and -// return the first index after the end of the production. -static int32_t -ParsePS(const nsString& aBuffer, int32_t aIndex) -{ - for (;;) { - char16_t ch = aBuffer.CharAt(aIndex); - if ((ch == char16_t(' ')) || (ch == char16_t('\t')) || - (ch == char16_t('\n')) || (ch == char16_t('\r'))) { - ++aIndex; - } else if (ch == char16_t('-')) { - int32_t tmpIndex; - if (aBuffer.CharAt(aIndex+1) == char16_t('-') && - kNotFound != (tmpIndex=aBuffer.Find("--",false,aIndex+2,-1))) { - aIndex = tmpIndex + 2; - } else { - return aIndex; - } - } else { - return aIndex; - } - } -} - -#define PARSE_DTD_HAVE_DOCTYPE (1<<0) -#define PARSE_DTD_HAVE_PUBLIC_ID (1<<1) -#define PARSE_DTD_HAVE_SYSTEM_ID (1<<2) -#define PARSE_DTD_HAVE_INTERNAL_SUBSET (1<<3) - -// return true on success (includes not present), false on failure -static bool -ParseDocTypeDecl(const nsString &aBuffer, - int32_t *aResultFlags, - nsString &aPublicID, - nsString &aSystemID) -{ - bool haveDoctype = false; - *aResultFlags = 0; - - // Skip through any comments and processing instructions - // The PI-skipping is a bit of a hack. - int32_t theIndex = 0; - do { - theIndex = aBuffer.FindChar('<', theIndex); - if (theIndex == kNotFound) break; - char16_t nextChar = aBuffer.CharAt(theIndex+1); - if (nextChar == char16_t('!')) { - int32_t tmpIndex = theIndex + 2; - if (kNotFound != - (theIndex=aBuffer.Find("DOCTYPE", true, tmpIndex, 0))) { - haveDoctype = true; - theIndex += 7; // skip "DOCTYPE" - break; - } - theIndex = ParsePS(aBuffer, tmpIndex); - theIndex = aBuffer.FindChar('>', theIndex); - } else if (nextChar == char16_t('?')) { - theIndex = aBuffer.FindChar('>', theIndex); - } else { - break; - } - } while (theIndex != kNotFound); - - if (!haveDoctype) - return true; - *aResultFlags |= PARSE_DTD_HAVE_DOCTYPE; - - theIndex = ParsePS(aBuffer, theIndex); - theIndex = aBuffer.Find("HTML", true, theIndex, 0); - if (kNotFound == theIndex) - return false; - theIndex = ParsePS(aBuffer, theIndex+4); - int32_t tmpIndex = aBuffer.Find("PUBLIC", true, theIndex, 0); - - if (kNotFound != tmpIndex) { - theIndex = ParsePS(aBuffer, tmpIndex+6); - - // We get here only if we've read ')) { - // There was a public identifier, but no system - // identifier, - // so do nothing. - // This is needed to avoid the else at the end, and it's - // also the most common case. - } else if ((next == char16_t('\"')) || - (next == char16_t('\''))) { - // We found a system identifier. - *aResultFlags |= PARSE_DTD_HAVE_SYSTEM_ID; - int32_t SystemIDStart = theIndex + 1; - int32_t SystemIDEnd = aBuffer.FindChar(next, SystemIDStart); - if (kNotFound == SystemIDEnd) - return false; - aSystemID = - Substring(aBuffer, SystemIDStart, SystemIDEnd - SystemIDStart); - } else if (next == char16_t('[')) { - // We found an internal subset. - *aResultFlags |= PARSE_DTD_HAVE_INTERNAL_SUBSET; - } else { - // Something's wrong. - return false; - } - - // Since a public ID is a minimum literal, we must trim - // and collapse whitespace - aPublicID = Substring(aBuffer, PublicIDStart, PublicIDEnd - PublicIDStart); - aPublicID.CompressWhitespace(true, true); - *aResultFlags |= PARSE_DTD_HAVE_PUBLIC_ID; - } else { - tmpIndex=aBuffer.Find("SYSTEM", true, theIndex, 0); - if (kNotFound != tmpIndex) { - // DOCTYPES with system ID but no Public ID - *aResultFlags |= PARSE_DTD_HAVE_SYSTEM_ID; - - theIndex = ParsePS(aBuffer, tmpIndex+6); - char16_t next = aBuffer.CharAt(theIndex); - if (next != char16_t('\"') && next != char16_t('\'')) - return false; - - int32_t SystemIDStart = theIndex + 1; - int32_t SystemIDEnd = aBuffer.FindChar(next, SystemIDStart); - - if (kNotFound == SystemIDEnd) - return false; - aSystemID = - Substring(aBuffer, SystemIDStart, SystemIDEnd - SystemIDStart); - theIndex = ParsePS(aBuffer, SystemIDEnd + 1); - } - - char16_t nextChar = aBuffer.CharAt(theIndex); - if (nextChar == char16_t('[')) - *aResultFlags |= PARSE_DTD_HAVE_INTERNAL_SUBSET; - else if (nextChar != char16_t('>')) - return false; - } - return true; -} - -struct PubIDInfo -{ - enum eMode { - eQuirks, /* always quirks mode, unless there's an internal subset */ - eAlmostStandards,/* eCompatibility_AlmostStandards */ - eFullStandards /* eCompatibility_FullStandards */ - /* - * public IDs that should trigger strict mode are not listed - * since we want all future public IDs to trigger strict mode as - * well - */ - }; - - const char* name; - eMode mode_if_no_sysid; - eMode mode_if_sysid; -}; - -#define ELEMENTS_OF(array_) (sizeof(array_)/sizeof(array_[0])) - -// These must be in nsCRT::strcmp order so binary-search can be used. -// This is verified, |#ifdef DEBUG|, below. - -// Even though public identifiers should be case sensitive, we will do -// all comparisons after converting to lower case in order to do -// case-insensitive comparison since there are a number of existing web -// sites that use the incorrect case. Therefore all of the public -// identifiers below are in lower case (with the correct case following, -// in comments). The case is verified, |#ifdef DEBUG|, below. -static const PubIDInfo kPublicIDs[] = { - {"+//silmaril//dtd html pro v0r11 19970101//en" /* "+//Silmaril//dtd html Pro v0r11 19970101//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, - {"-//advasoft ltd//dtd html 3.0 aswedit + extensions//en" /* "-//AdvaSoft Ltd//DTD HTML 3.0 asWedit + extensions//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, - {"-//as//dtd html 3.0 aswedit + extensions//en" /* "-//AS//DTD HTML 3.0 asWedit + extensions//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, - {"-//ietf//dtd html 2.0 level 1//en" /* "-//IETF//DTD HTML 2.0 Level 1//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, - {"-//ietf//dtd html 2.0 level 2//en" /* "-//IETF//DTD HTML 2.0 Level 2//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, - {"-//ietf//dtd html 2.0 strict level 1//en" /* "-//IETF//DTD HTML 2.0 Strict Level 1//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, - {"-//ietf//dtd html 2.0 strict level 2//en" /* "-//IETF//DTD HTML 2.0 Strict Level 2//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, - {"-//ietf//dtd html 2.0 strict//en" /* "-//IETF//DTD HTML 2.0 Strict//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, - {"-//ietf//dtd html 2.0//en" /* "-//IETF//DTD HTML 2.0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, - {"-//ietf//dtd html 2.1e//en" /* "-//IETF//DTD HTML 2.1E//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, - {"-//ietf//dtd html 3.0//en" /* "-//IETF//DTD HTML 3.0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, - {"-//ietf//dtd html 3.0//en//" /* "-//IETF//DTD HTML 3.0//EN//" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, - {"-//ietf//dtd html 3.2 final//en" /* "-//IETF//DTD HTML 3.2 Final//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, - {"-//ietf//dtd html 3.2//en" /* "-//IETF//DTD HTML 3.2//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, - {"-//ietf//dtd html 3//en" /* "-//IETF//DTD HTML 3//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, - {"-//ietf//dtd html level 0//en" /* "-//IETF//DTD HTML Level 0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, - {"-//ietf//dtd html level 0//en//2.0" /* "-//IETF//DTD HTML Level 0//EN//2.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, - {"-//ietf//dtd html level 1//en" /* "-//IETF//DTD HTML Level 1//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, - {"-//ietf//dtd html level 1//en//2.0" /* "-//IETF//DTD HTML Level 1//EN//2.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, - {"-//ietf//dtd html level 2//en" /* "-//IETF//DTD HTML Level 2//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, - {"-//ietf//dtd html level 2//en//2.0" /* "-//IETF//DTD HTML Level 2//EN//2.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, - {"-//ietf//dtd html level 3//en" /* "-//IETF//DTD HTML Level 3//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, - {"-//ietf//dtd html level 3//en//3.0" /* "-//IETF//DTD HTML Level 3//EN//3.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, - {"-//ietf//dtd html strict level 0//en" /* "-//IETF//DTD HTML Strict Level 0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, - {"-//ietf//dtd html strict level 0//en//2.0" /* "-//IETF//DTD HTML Strict Level 0//EN//2.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, - {"-//ietf//dtd html strict level 1//en" /* "-//IETF//DTD HTML Strict Level 1//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, - {"-//ietf//dtd html strict level 1//en//2.0" /* "-//IETF//DTD HTML Strict Level 1//EN//2.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, - {"-//ietf//dtd html strict level 2//en" /* "-//IETF//DTD HTML Strict Level 2//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, - {"-//ietf//dtd html strict level 2//en//2.0" /* "-//IETF//DTD HTML Strict Level 2//EN//2.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, - {"-//ietf//dtd html strict level 3//en" /* "-//IETF//DTD HTML Strict Level 3//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, - {"-//ietf//dtd html strict level 3//en//3.0" /* "-//IETF//DTD HTML Strict Level 3//EN//3.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, - {"-//ietf//dtd html strict//en" /* "-//IETF//DTD HTML Strict//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, - {"-//ietf//dtd html strict//en//2.0" /* "-//IETF//DTD HTML Strict//EN//2.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, - {"-//ietf//dtd html strict//en//3.0" /* "-//IETF//DTD HTML Strict//EN//3.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, - {"-//ietf//dtd html//en" /* "-//IETF//DTD HTML//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, - {"-//ietf//dtd html//en//2.0" /* "-//IETF//DTD HTML//EN//2.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, - {"-//ietf//dtd html//en//3.0" /* "-//IETF//DTD HTML//EN//3.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, - {"-//metrius//dtd metrius presentational//en" /* "-//Metrius//DTD Metrius Presentational//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, - {"-//microsoft//dtd internet explorer 2.0 html strict//en" /* "-//Microsoft//DTD Internet Explorer 2.0 HTML Strict//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, - {"-//microsoft//dtd internet explorer 2.0 html//en" /* "-//Microsoft//DTD Internet Explorer 2.0 HTML//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, - {"-//microsoft//dtd internet explorer 2.0 tables//en" /* "-//Microsoft//DTD Internet Explorer 2.0 Tables//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, - {"-//microsoft//dtd internet explorer 3.0 html strict//en" /* "-//Microsoft//DTD Internet Explorer 3.0 HTML Strict//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, - {"-//microsoft//dtd internet explorer 3.0 html//en" /* "-//Microsoft//DTD Internet Explorer 3.0 HTML//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, - {"-//microsoft//dtd internet explorer 3.0 tables//en" /* "-//Microsoft//DTD Internet Explorer 3.0 Tables//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, - {"-//netscape comm. corp.//dtd html//en" /* "-//Netscape Comm. Corp.//DTD HTML//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, - {"-//netscape comm. corp.//dtd strict html//en" /* "-//Netscape Comm. Corp.//DTD Strict HTML//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, - {"-//o'reilly and associates//dtd html 2.0//en" /* "-//O'Reilly and Associates//DTD HTML 2.0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, - {"-//o'reilly and associates//dtd html extended 1.0//en" /* "-//O'Reilly and Associates//DTD HTML Extended 1.0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, - {"-//o'reilly and associates//dtd html extended relaxed 1.0//en" /* "-//O'Reilly and Associates//DTD HTML Extended Relaxed 1.0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, - {"-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//en" /* "-//SoftQuad Software//DTD HoTMetaL PRO 6.0::19990601::extensions to HTML 4.0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, - {"-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//en" /* "-//SoftQuad//DTD HoTMetaL PRO 4.0::19971010::extensions to HTML 4.0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, - {"-//spyglass//dtd html 2.0 extended//en" /* "-//Spyglass//DTD HTML 2.0 Extended//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, - {"-//sq//dtd html 2.0 hotmetal + extensions//en" /* "-//SQ//DTD HTML 2.0 HoTMetaL + extensions//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, - {"-//sun microsystems corp.//dtd hotjava html//en" /* "-//Sun Microsystems Corp.//DTD HotJava HTML//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, - {"-//sun microsystems corp.//dtd hotjava strict html//en" /* "-//Sun Microsystems Corp.//DTD HotJava Strict HTML//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, - {"-//w3c//dtd html 3 1995-03-24//en" /* "-//W3C//DTD HTML 3 1995-03-24//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, - {"-//w3c//dtd html 3.2 draft//en" /* "-//W3C//DTD HTML 3.2 Draft//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, - {"-//w3c//dtd html 3.2 final//en" /* "-//W3C//DTD HTML 3.2 Final//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, - {"-//w3c//dtd html 3.2//en" /* "-//W3C//DTD HTML 3.2//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, - {"-//w3c//dtd html 3.2s draft//en" /* "-//W3C//DTD HTML 3.2S Draft//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, - {"-//w3c//dtd html 4.0 frameset//en" /* "-//W3C//DTD HTML 4.0 Frameset//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, - {"-//w3c//dtd html 4.0 transitional//en" /* "-//W3C//DTD HTML 4.0 Transitional//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, - {"-//w3c//dtd html 4.01 frameset//en" /* "-//W3C//DTD HTML 4.01 Frameset//EN" */, PubIDInfo::eQuirks, PubIDInfo::eAlmostStandards}, - {"-//w3c//dtd html 4.01 transitional//en" /* "-//W3C//DTD HTML 4.01 Transitional//EN" */, PubIDInfo::eQuirks, PubIDInfo::eAlmostStandards}, - {"-//w3c//dtd html experimental 19960712//en" /* "-//W3C//DTD HTML Experimental 19960712//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, - {"-//w3c//dtd html experimental 970421//en" /* "-//W3C//DTD HTML Experimental 970421//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, - {"-//w3c//dtd w3 html//en" /* "-//W3C//DTD W3 HTML//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, - {"-//w3c//dtd xhtml 1.0 frameset//en" /* "-//W3C//DTD XHTML 1.0 Frameset//EN" */, PubIDInfo::eAlmostStandards, PubIDInfo::eAlmostStandards}, - {"-//w3c//dtd xhtml 1.0 transitional//en" /* "-//W3C//DTD XHTML 1.0 Transitional//EN" */, PubIDInfo::eAlmostStandards, PubIDInfo::eAlmostStandards}, - {"-//w3o//dtd w3 html 3.0//en" /* "-//W3O//DTD W3 HTML 3.0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, - {"-//w3o//dtd w3 html 3.0//en//" /* "-//W3O//DTD W3 HTML 3.0//EN//" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, - {"-//w3o//dtd w3 html strict 3.0//en//" /* "-//W3O//DTD W3 HTML Strict 3.0//EN//" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, - {"-//webtechs//dtd mozilla html 2.0//en" /* "-//WebTechs//DTD Mozilla HTML 2.0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, - {"-//webtechs//dtd mozilla html//en" /* "-//WebTechs//DTD Mozilla HTML//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, - {"-/w3c/dtd html 4.0 transitional/en" /* "-/W3C/DTD HTML 4.0 Transitional/EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, - {"html" /* "HTML" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, -}; - -#ifdef DEBUG -static void -VerifyPublicIDs() -{ - static bool gVerified = false; - if (!gVerified) { - gVerified = true; - uint32_t i; - for (i = 0; i < ELEMENTS_OF(kPublicIDs) - 1; ++i) { - if (nsCRT::strcmp(kPublicIDs[i].name, kPublicIDs[i+1].name) >= 0) { - NS_NOTREACHED("doctypes out of order"); - printf("Doctypes %s and %s out of order.\n", - kPublicIDs[i].name, kPublicIDs[i+1].name); - } - } - for (i = 0; i < ELEMENTS_OF(kPublicIDs); ++i) { - nsAutoCString lcPubID(kPublicIDs[i].name); - ToLowerCase(lcPubID); - if (nsCRT::strcmp(kPublicIDs[i].name, lcPubID.get()) != 0) { - NS_NOTREACHED("doctype not lower case"); - printf("Doctype %s not lower case.\n", kPublicIDs[i].name); - } - } - } -} -#endif - -namespace { - -struct PublicIdComparator -{ - const nsAutoCString& mPublicId; - explicit PublicIdComparator(const nsAutoCString& aPublicId) - : mPublicId(aPublicId) {} - int operator()(const PubIDInfo& aInfo) const { - return nsCRT::strcmp(mPublicId.get(), aInfo.name); - } -}; - -} // namespace - -static void -DetermineHTMLParseMode(const nsString& aBuffer, - nsDTDMode& aParseMode, - eParserDocType& aDocType) -{ -#ifdef DEBUG - VerifyPublicIDs(); -#endif - int32_t resultFlags; - nsAutoString publicIDUCS2, sysIDUCS2; - if (ParseDocTypeDecl(aBuffer, &resultFlags, publicIDUCS2, sysIDUCS2)) { - if (!(resultFlags & PARSE_DTD_HAVE_DOCTYPE)) { - // no DOCTYPE - aParseMode = eDTDMode_quirks; - aDocType = eHTML_Quirks; - } else if ((resultFlags & PARSE_DTD_HAVE_INTERNAL_SUBSET) || - !(resultFlags & PARSE_DTD_HAVE_PUBLIC_ID)) { - // A doctype with an internal subset is always full_standards. - // A doctype without a public ID is always full_standards. - aDocType = eHTML_Strict; - aParseMode = eDTDMode_full_standards; - - // Special hack for IBM's custom DOCTYPE. - if (!(resultFlags & PARSE_DTD_HAVE_INTERNAL_SUBSET) && - sysIDUCS2.EqualsLiteral( - "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd")) { - aParseMode = eDTDMode_quirks; - aDocType = eHTML_Quirks; - } - - } else { - // We have to check our list of public IDs to see what to do. - // Yes, we want UCS2 to ASCII lossy conversion. - nsAutoCString publicID; - publicID.AssignWithConversion(publicIDUCS2); - - // See comment above definition of kPublicIDs about case - // sensitivity. - ToLowerCase(publicID); - - // Binary search to see if we can find the correct public ID. - size_t index; - bool found = BinarySearchIf(kPublicIDs, 0, ArrayLength(kPublicIDs), - PublicIdComparator(publicID), &index); - if (!found) { - // The DOCTYPE is not in our list, so it must be full_standards. - aParseMode = eDTDMode_full_standards; - aDocType = eHTML_Strict; - return; - } - - switch ((resultFlags & PARSE_DTD_HAVE_SYSTEM_ID) - ? kPublicIDs[index].mode_if_sysid - : kPublicIDs[index].mode_if_no_sysid) - { - case PubIDInfo::eQuirks: - aParseMode = eDTDMode_quirks; - aDocType = eHTML_Quirks; - break; - case PubIDInfo::eAlmostStandards: - aParseMode = eDTDMode_almost_standards; - aDocType = eHTML_Strict; - break; - case PubIDInfo::eFullStandards: - aParseMode = eDTDMode_full_standards; - aDocType = eHTML_Strict; - break; - default: - NS_NOTREACHED("no other cases!"); - } - } - } else { - // badly formed DOCTYPE -> quirks - aParseMode = eDTDMode_quirks; - aDocType = eHTML_Quirks; - } -} - -static void -DetermineParseMode(const nsString& aBuffer, nsDTDMode& aParseMode, - eParserDocType& aDocType, const nsACString& aMimeType) -{ - if (aMimeType.EqualsLiteral(TEXT_HTML)) { - DetermineHTMLParseMode(aBuffer, aParseMode, aDocType); - } else if (nsContentUtils::IsPlainTextType(aMimeType)) { - aDocType = ePlainText; - aParseMode = eDTDMode_quirks; - } else { // Some form of XML - aDocType = eXML; - aParseMode = eDTDMode_full_standards; - } -} - static nsIDTD* FindSuitableDTD(CParserContext& aParserContext) { @@ -849,15 +441,14 @@ nsParser::WillBuildModel(nsString& aFilename) if (eDTDMode_unknown == mParserContext->mDTDMode || eDTDMode_autodetect == mParserContext->mDTDMode) { - char16_t buf[1025]; - nsFixedString theBuffer(buf, 1024, 0); - - // Grab 1024 characters, starting at the first non-whitespace - // character, to look for the doctype in. - mParserContext->mScanner->Peek(theBuffer, 1024, mParserContext->mScanner->FirstNonWhitespacePosition()); - DetermineParseMode(theBuffer, mParserContext->mDTDMode, - mParserContext->mDocType, mParserContext->mMimeType); - } + if (mIsAboutBlank) { + mParserContext->mDTDMode = eDTDMode_quirks; + mParserContext->mDocType = eHTML_Quirks; + } else { + mParserContext->mDTDMode = eDTDMode_full_standards; + mParserContext->mDocType = eXML; + } + } // else XML fragment with nested parser context NS_ASSERTION(!mDTD || !mParserContext->mPrevContext, "Clobbering DTD for non-root parser context!"); diff --git a/parser/htmlparser/nsScanner.cpp b/parser/htmlparser/nsScanner.cpp index d1ccd8f2fc1a..cf087460c905 100644 --- a/parser/htmlparser/nsScanner.cpp +++ b/parser/htmlparser/nsScanner.cpp @@ -329,70 +329,6 @@ nsresult nsScanner::GetChar(char16_t& aChar) { return NS_OK; } - -/** - * peek ahead to consume next char from scanner's internal - * input buffer - * - * @update gess 3/25/98 - * @param - * @return - */ -nsresult nsScanner::Peek(char16_t& aChar, uint32_t aOffset) { - aChar = 0; - - if (!mSlidingBuffer || mCurrentPosition == mEndPosition) { - return NS_ERROR_HTMLPARSER_EOF; - } - - if (aOffset > 0) { - if (mCountRemaining <= aOffset) - return NS_ERROR_HTMLPARSER_EOF; - - nsScannerIterator pos = mCurrentPosition; - pos.advance(aOffset); - aChar=*pos; - } - else { - aChar=*mCurrentPosition; - } - - return NS_OK; -} - -nsresult nsScanner::Peek(nsAString& aStr, int32_t aNumChars, int32_t aOffset) -{ - if (!mSlidingBuffer || mCurrentPosition == mEndPosition) { - return NS_ERROR_HTMLPARSER_EOF; - } - - nsScannerIterator start, end; - - start = mCurrentPosition; - - if ((int32_t)mCountRemaining <= aOffset) { - return NS_ERROR_HTMLPARSER_EOF; - } - - if (aOffset > 0) { - start.advance(aOffset); - } - - if (mCountRemaining < uint32_t(aNumChars + aOffset)) { - end = mEndPosition; - } - else { - end = start; - end.advance(aNumChars); - } - - if (!CopyUnicodeTo(start, end, aStr)) { - return NS_ERROR_OUT_OF_MEMORY; - } - - return NS_OK; -} - void nsScanner::BindSubstring(nsScannerSubstring& aSubstring, const nsScannerIterator& aStart, const nsScannerIterator& aEnd) { aSubstring.Rebind(*mSlidingBuffer, aStart, aEnd); diff --git a/parser/htmlparser/nsScanner.h b/parser/htmlparser/nsScanner.h index cbf92291ee83..47d3de45dc43 100644 --- a/parser/htmlparser/nsScanner.h +++ b/parser/htmlparser/nsScanner.h @@ -60,18 +60,6 @@ class nsScanner { */ nsresult GetChar(char16_t& ch); - /** - * peek ahead to consume next char from scanner's internal - * input buffer - * - * @update gess 3/25/98 - * @param ch is the char to accept new value - * @return error code reflecting read status - */ - nsresult Peek(char16_t& ch, uint32_t aOffset=0); - - nsresult Peek(nsAString& aStr, int32_t aNumChars, int32_t aOffset = 0); - /** * Records current offset position in input stream. This allows us * to back up to this point if the need should arise, such as when diff --git a/parser/htmlparser/nsScannerString.cpp b/parser/htmlparser/nsScannerString.cpp index 380a72e8f373..99ae311dc98e 100644 --- a/parser/htmlparser/nsScannerString.cpp +++ b/parser/htmlparser/nsScannerString.cpp @@ -6,6 +6,7 @@ #include #include "nsScannerString.h" +#include "mozilla/CheckedInt.h" /** @@ -472,7 +473,13 @@ CopyUnicodeTo( const nsScannerIterator& aSrcStart, nsAString& aDest ) { nsAString::iterator writer; - if (!aDest.SetLength(Distance(aSrcStart, aSrcEnd), mozilla::fallible)) { + + mozilla::CheckedInt distance(Distance(aSrcStart, aSrcEnd)); + if (!distance.isValid()) { + return false; // overflow detected + } + + if (!aDest.SetLength(distance.value(), mozilla::fallible)) { aDest.Truncate(); return false; // out of memory } @@ -505,8 +512,14 @@ AppendUnicodeTo( const nsScannerIterator& aSrcStart, nsAString& aDest ) { nsAString::iterator writer; - uint32_t oldLength = aDest.Length(); - if (!aDest.SetLength(oldLength + Distance(aSrcStart, aSrcEnd), mozilla::fallible)) + const nsAString::size_type oldLength = aDest.Length(); + CheckedInt newLen(Distance(aSrcStart, aSrcEnd)); + newLen += oldLength; + if (!newLen.isValid()) { + return false; // overflow detected + } + + if (!aDest.SetLength(newLen.value(), mozilla::fallible)) return false; // out of memory aDest.BeginWriting(writer).advance(oldLength); nsScannerIterator fromBegin(aSrcStart);