mirror of
https://github.com/mozilla/gecko-dev.git
synced 2024-10-08 10:44:56 +00:00
Bug 1334246. r=mrbkap.
nsScannerString changes by erahm. MozReview-Commit-ID: 6lh8SYjpGlj
This commit is contained in:
parent
26a8c610e5
commit
fd7f0ae458
@ -335,414 +335,6 @@ nsParser::GetContentSink()
|
||||
return mSink;
|
||||
}
|
||||
|
||||
/**
|
||||
* Determine what DTD mode (and thus what layout nsCompatibility mode)
|
||||
* to use for this document based on the first chunk of data received
|
||||
* from the network (each parsercontext can have its own mode). (No,
|
||||
* this is not an optimal solution -- we really don't need to know until
|
||||
* after we've received the DOCTYPE, and this could easily be part of
|
||||
* the regular parsing process if the parser were designed in a way that
|
||||
* made such modifications easy.)
|
||||
*/
|
||||
|
||||
// Parse the PS production in the SGML spec (excluding the part dealing
|
||||
// with entity references) starting at theIndex into theBuffer, and
|
||||
// return the first index after the end of the production.
|
||||
static int32_t
|
||||
ParsePS(const nsString& aBuffer, int32_t aIndex)
|
||||
{
|
||||
for (;;) {
|
||||
char16_t ch = aBuffer.CharAt(aIndex);
|
||||
if ((ch == char16_t(' ')) || (ch == char16_t('\t')) ||
|
||||
(ch == char16_t('\n')) || (ch == char16_t('\r'))) {
|
||||
++aIndex;
|
||||
} else if (ch == char16_t('-')) {
|
||||
int32_t tmpIndex;
|
||||
if (aBuffer.CharAt(aIndex+1) == char16_t('-') &&
|
||||
kNotFound != (tmpIndex=aBuffer.Find("--",false,aIndex+2,-1))) {
|
||||
aIndex = tmpIndex + 2;
|
||||
} else {
|
||||
return aIndex;
|
||||
}
|
||||
} else {
|
||||
return aIndex;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#define PARSE_DTD_HAVE_DOCTYPE (1<<0)
|
||||
#define PARSE_DTD_HAVE_PUBLIC_ID (1<<1)
|
||||
#define PARSE_DTD_HAVE_SYSTEM_ID (1<<2)
|
||||
#define PARSE_DTD_HAVE_INTERNAL_SUBSET (1<<3)
|
||||
|
||||
// return true on success (includes not present), false on failure
|
||||
static bool
|
||||
ParseDocTypeDecl(const nsString &aBuffer,
|
||||
int32_t *aResultFlags,
|
||||
nsString &aPublicID,
|
||||
nsString &aSystemID)
|
||||
{
|
||||
bool haveDoctype = false;
|
||||
*aResultFlags = 0;
|
||||
|
||||
// Skip through any comments and processing instructions
|
||||
// The PI-skipping is a bit of a hack.
|
||||
int32_t theIndex = 0;
|
||||
do {
|
||||
theIndex = aBuffer.FindChar('<', theIndex);
|
||||
if (theIndex == kNotFound) break;
|
||||
char16_t nextChar = aBuffer.CharAt(theIndex+1);
|
||||
if (nextChar == char16_t('!')) {
|
||||
int32_t tmpIndex = theIndex + 2;
|
||||
if (kNotFound !=
|
||||
(theIndex=aBuffer.Find("DOCTYPE", true, tmpIndex, 0))) {
|
||||
haveDoctype = true;
|
||||
theIndex += 7; // skip "DOCTYPE"
|
||||
break;
|
||||
}
|
||||
theIndex = ParsePS(aBuffer, tmpIndex);
|
||||
theIndex = aBuffer.FindChar('>', theIndex);
|
||||
} else if (nextChar == char16_t('?')) {
|
||||
theIndex = aBuffer.FindChar('>', theIndex);
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
} while (theIndex != kNotFound);
|
||||
|
||||
if (!haveDoctype)
|
||||
return true;
|
||||
*aResultFlags |= PARSE_DTD_HAVE_DOCTYPE;
|
||||
|
||||
theIndex = ParsePS(aBuffer, theIndex);
|
||||
theIndex = aBuffer.Find("HTML", true, theIndex, 0);
|
||||
if (kNotFound == theIndex)
|
||||
return false;
|
||||
theIndex = ParsePS(aBuffer, theIndex+4);
|
||||
int32_t tmpIndex = aBuffer.Find("PUBLIC", true, theIndex, 0);
|
||||
|
||||
if (kNotFound != tmpIndex) {
|
||||
theIndex = ParsePS(aBuffer, tmpIndex+6);
|
||||
|
||||
// We get here only if we've read <!DOCTYPE HTML PUBLIC
|
||||
// (not case sensitive) possibly with comments within.
|
||||
|
||||
// Now find the beginning and end of the public identifier
|
||||
// and the system identifier (if present).
|
||||
|
||||
char16_t lit = aBuffer.CharAt(theIndex);
|
||||
if ((lit != char16_t('\"')) && (lit != char16_t('\'')))
|
||||
return false;
|
||||
|
||||
// Start is the first character, excluding the quote, and End is
|
||||
// the final quote, so there are (end-start) characters.
|
||||
|
||||
int32_t PublicIDStart = theIndex + 1;
|
||||
int32_t PublicIDEnd = aBuffer.FindChar(lit, PublicIDStart);
|
||||
if (kNotFound == PublicIDEnd)
|
||||
return false;
|
||||
theIndex = ParsePS(aBuffer, PublicIDEnd + 1);
|
||||
char16_t next = aBuffer.CharAt(theIndex);
|
||||
if (next == char16_t('>')) {
|
||||
// There was a public identifier, but no system
|
||||
// identifier,
|
||||
// so do nothing.
|
||||
// This is needed to avoid the else at the end, and it's
|
||||
// also the most common case.
|
||||
} else if ((next == char16_t('\"')) ||
|
||||
(next == char16_t('\''))) {
|
||||
// We found a system identifier.
|
||||
*aResultFlags |= PARSE_DTD_HAVE_SYSTEM_ID;
|
||||
int32_t SystemIDStart = theIndex + 1;
|
||||
int32_t SystemIDEnd = aBuffer.FindChar(next, SystemIDStart);
|
||||
if (kNotFound == SystemIDEnd)
|
||||
return false;
|
||||
aSystemID =
|
||||
Substring(aBuffer, SystemIDStart, SystemIDEnd - SystemIDStart);
|
||||
} else if (next == char16_t('[')) {
|
||||
// We found an internal subset.
|
||||
*aResultFlags |= PARSE_DTD_HAVE_INTERNAL_SUBSET;
|
||||
} else {
|
||||
// Something's wrong.
|
||||
return false;
|
||||
}
|
||||
|
||||
// Since a public ID is a minimum literal, we must trim
|
||||
// and collapse whitespace
|
||||
aPublicID = Substring(aBuffer, PublicIDStart, PublicIDEnd - PublicIDStart);
|
||||
aPublicID.CompressWhitespace(true, true);
|
||||
*aResultFlags |= PARSE_DTD_HAVE_PUBLIC_ID;
|
||||
} else {
|
||||
tmpIndex=aBuffer.Find("SYSTEM", true, theIndex, 0);
|
||||
if (kNotFound != tmpIndex) {
|
||||
// DOCTYPES with system ID but no Public ID
|
||||
*aResultFlags |= PARSE_DTD_HAVE_SYSTEM_ID;
|
||||
|
||||
theIndex = ParsePS(aBuffer, tmpIndex+6);
|
||||
char16_t next = aBuffer.CharAt(theIndex);
|
||||
if (next != char16_t('\"') && next != char16_t('\''))
|
||||
return false;
|
||||
|
||||
int32_t SystemIDStart = theIndex + 1;
|
||||
int32_t SystemIDEnd = aBuffer.FindChar(next, SystemIDStart);
|
||||
|
||||
if (kNotFound == SystemIDEnd)
|
||||
return false;
|
||||
aSystemID =
|
||||
Substring(aBuffer, SystemIDStart, SystemIDEnd - SystemIDStart);
|
||||
theIndex = ParsePS(aBuffer, SystemIDEnd + 1);
|
||||
}
|
||||
|
||||
char16_t nextChar = aBuffer.CharAt(theIndex);
|
||||
if (nextChar == char16_t('['))
|
||||
*aResultFlags |= PARSE_DTD_HAVE_INTERNAL_SUBSET;
|
||||
else if (nextChar != char16_t('>'))
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
struct PubIDInfo
|
||||
{
|
||||
enum eMode {
|
||||
eQuirks, /* always quirks mode, unless there's an internal subset */
|
||||
eAlmostStandards,/* eCompatibility_AlmostStandards */
|
||||
eFullStandards /* eCompatibility_FullStandards */
|
||||
/*
|
||||
* public IDs that should trigger strict mode are not listed
|
||||
* since we want all future public IDs to trigger strict mode as
|
||||
* well
|
||||
*/
|
||||
};
|
||||
|
||||
const char* name;
|
||||
eMode mode_if_no_sysid;
|
||||
eMode mode_if_sysid;
|
||||
};
|
||||
|
||||
#define ELEMENTS_OF(array_) (sizeof(array_)/sizeof(array_[0]))
|
||||
|
||||
// These must be in nsCRT::strcmp order so binary-search can be used.
|
||||
// This is verified, |#ifdef DEBUG|, below.
|
||||
|
||||
// Even though public identifiers should be case sensitive, we will do
|
||||
// all comparisons after converting to lower case in order to do
|
||||
// case-insensitive comparison since there are a number of existing web
|
||||
// sites that use the incorrect case. Therefore all of the public
|
||||
// identifiers below are in lower case (with the correct case following,
|
||||
// in comments). The case is verified, |#ifdef DEBUG|, below.
|
||||
static const PubIDInfo kPublicIDs[] = {
|
||||
{"+//silmaril//dtd html pro v0r11 19970101//en" /* "+//Silmaril//dtd html Pro v0r11 19970101//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
|
||||
{"-//advasoft ltd//dtd html 3.0 aswedit + extensions//en" /* "-//AdvaSoft Ltd//DTD HTML 3.0 asWedit + extensions//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
|
||||
{"-//as//dtd html 3.0 aswedit + extensions//en" /* "-//AS//DTD HTML 3.0 asWedit + extensions//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
|
||||
{"-//ietf//dtd html 2.0 level 1//en" /* "-//IETF//DTD HTML 2.0 Level 1//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
|
||||
{"-//ietf//dtd html 2.0 level 2//en" /* "-//IETF//DTD HTML 2.0 Level 2//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
|
||||
{"-//ietf//dtd html 2.0 strict level 1//en" /* "-//IETF//DTD HTML 2.0 Strict Level 1//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
|
||||
{"-//ietf//dtd html 2.0 strict level 2//en" /* "-//IETF//DTD HTML 2.0 Strict Level 2//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
|
||||
{"-//ietf//dtd html 2.0 strict//en" /* "-//IETF//DTD HTML 2.0 Strict//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
|
||||
{"-//ietf//dtd html 2.0//en" /* "-//IETF//DTD HTML 2.0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
|
||||
{"-//ietf//dtd html 2.1e//en" /* "-//IETF//DTD HTML 2.1E//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
|
||||
{"-//ietf//dtd html 3.0//en" /* "-//IETF//DTD HTML 3.0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
|
||||
{"-//ietf//dtd html 3.0//en//" /* "-//IETF//DTD HTML 3.0//EN//" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
|
||||
{"-//ietf//dtd html 3.2 final//en" /* "-//IETF//DTD HTML 3.2 Final//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
|
||||
{"-//ietf//dtd html 3.2//en" /* "-//IETF//DTD HTML 3.2//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
|
||||
{"-//ietf//dtd html 3//en" /* "-//IETF//DTD HTML 3//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
|
||||
{"-//ietf//dtd html level 0//en" /* "-//IETF//DTD HTML Level 0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
|
||||
{"-//ietf//dtd html level 0//en//2.0" /* "-//IETF//DTD HTML Level 0//EN//2.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
|
||||
{"-//ietf//dtd html level 1//en" /* "-//IETF//DTD HTML Level 1//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
|
||||
{"-//ietf//dtd html level 1//en//2.0" /* "-//IETF//DTD HTML Level 1//EN//2.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
|
||||
{"-//ietf//dtd html level 2//en" /* "-//IETF//DTD HTML Level 2//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
|
||||
{"-//ietf//dtd html level 2//en//2.0" /* "-//IETF//DTD HTML Level 2//EN//2.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
|
||||
{"-//ietf//dtd html level 3//en" /* "-//IETF//DTD HTML Level 3//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
|
||||
{"-//ietf//dtd html level 3//en//3.0" /* "-//IETF//DTD HTML Level 3//EN//3.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
|
||||
{"-//ietf//dtd html strict level 0//en" /* "-//IETF//DTD HTML Strict Level 0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
|
||||
{"-//ietf//dtd html strict level 0//en//2.0" /* "-//IETF//DTD HTML Strict Level 0//EN//2.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
|
||||
{"-//ietf//dtd html strict level 1//en" /* "-//IETF//DTD HTML Strict Level 1//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
|
||||
{"-//ietf//dtd html strict level 1//en//2.0" /* "-//IETF//DTD HTML Strict Level 1//EN//2.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
|
||||
{"-//ietf//dtd html strict level 2//en" /* "-//IETF//DTD HTML Strict Level 2//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
|
||||
{"-//ietf//dtd html strict level 2//en//2.0" /* "-//IETF//DTD HTML Strict Level 2//EN//2.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
|
||||
{"-//ietf//dtd html strict level 3//en" /* "-//IETF//DTD HTML Strict Level 3//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
|
||||
{"-//ietf//dtd html strict level 3//en//3.0" /* "-//IETF//DTD HTML Strict Level 3//EN//3.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
|
||||
{"-//ietf//dtd html strict//en" /* "-//IETF//DTD HTML Strict//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
|
||||
{"-//ietf//dtd html strict//en//2.0" /* "-//IETF//DTD HTML Strict//EN//2.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
|
||||
{"-//ietf//dtd html strict//en//3.0" /* "-//IETF//DTD HTML Strict//EN//3.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
|
||||
{"-//ietf//dtd html//en" /* "-//IETF//DTD HTML//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
|
||||
{"-//ietf//dtd html//en//2.0" /* "-//IETF//DTD HTML//EN//2.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
|
||||
{"-//ietf//dtd html//en//3.0" /* "-//IETF//DTD HTML//EN//3.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
|
||||
{"-//metrius//dtd metrius presentational//en" /* "-//Metrius//DTD Metrius Presentational//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
|
||||
{"-//microsoft//dtd internet explorer 2.0 html strict//en" /* "-//Microsoft//DTD Internet Explorer 2.0 HTML Strict//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
|
||||
{"-//microsoft//dtd internet explorer 2.0 html//en" /* "-//Microsoft//DTD Internet Explorer 2.0 HTML//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
|
||||
{"-//microsoft//dtd internet explorer 2.0 tables//en" /* "-//Microsoft//DTD Internet Explorer 2.0 Tables//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
|
||||
{"-//microsoft//dtd internet explorer 3.0 html strict//en" /* "-//Microsoft//DTD Internet Explorer 3.0 HTML Strict//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
|
||||
{"-//microsoft//dtd internet explorer 3.0 html//en" /* "-//Microsoft//DTD Internet Explorer 3.0 HTML//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
|
||||
{"-//microsoft//dtd internet explorer 3.0 tables//en" /* "-//Microsoft//DTD Internet Explorer 3.0 Tables//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
|
||||
{"-//netscape comm. corp.//dtd html//en" /* "-//Netscape Comm. Corp.//DTD HTML//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
|
||||
{"-//netscape comm. corp.//dtd strict html//en" /* "-//Netscape Comm. Corp.//DTD Strict HTML//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
|
||||
{"-//o'reilly and associates//dtd html 2.0//en" /* "-//O'Reilly and Associates//DTD HTML 2.0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
|
||||
{"-//o'reilly and associates//dtd html extended 1.0//en" /* "-//O'Reilly and Associates//DTD HTML Extended 1.0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
|
||||
{"-//o'reilly and associates//dtd html extended relaxed 1.0//en" /* "-//O'Reilly and Associates//DTD HTML Extended Relaxed 1.0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
|
||||
{"-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//en" /* "-//SoftQuad Software//DTD HoTMetaL PRO 6.0::19990601::extensions to HTML 4.0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
|
||||
{"-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//en" /* "-//SoftQuad//DTD HoTMetaL PRO 4.0::19971010::extensions to HTML 4.0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
|
||||
{"-//spyglass//dtd html 2.0 extended//en" /* "-//Spyglass//DTD HTML 2.0 Extended//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
|
||||
{"-//sq//dtd html 2.0 hotmetal + extensions//en" /* "-//SQ//DTD HTML 2.0 HoTMetaL + extensions//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
|
||||
{"-//sun microsystems corp.//dtd hotjava html//en" /* "-//Sun Microsystems Corp.//DTD HotJava HTML//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
|
||||
{"-//sun microsystems corp.//dtd hotjava strict html//en" /* "-//Sun Microsystems Corp.//DTD HotJava Strict HTML//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
|
||||
{"-//w3c//dtd html 3 1995-03-24//en" /* "-//W3C//DTD HTML 3 1995-03-24//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
|
||||
{"-//w3c//dtd html 3.2 draft//en" /* "-//W3C//DTD HTML 3.2 Draft//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
|
||||
{"-//w3c//dtd html 3.2 final//en" /* "-//W3C//DTD HTML 3.2 Final//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
|
||||
{"-//w3c//dtd html 3.2//en" /* "-//W3C//DTD HTML 3.2//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
|
||||
{"-//w3c//dtd html 3.2s draft//en" /* "-//W3C//DTD HTML 3.2S Draft//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
|
||||
{"-//w3c//dtd html 4.0 frameset//en" /* "-//W3C//DTD HTML 4.0 Frameset//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
|
||||
{"-//w3c//dtd html 4.0 transitional//en" /* "-//W3C//DTD HTML 4.0 Transitional//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
|
||||
{"-//w3c//dtd html 4.01 frameset//en" /* "-//W3C//DTD HTML 4.01 Frameset//EN" */, PubIDInfo::eQuirks, PubIDInfo::eAlmostStandards},
|
||||
{"-//w3c//dtd html 4.01 transitional//en" /* "-//W3C//DTD HTML 4.01 Transitional//EN" */, PubIDInfo::eQuirks, PubIDInfo::eAlmostStandards},
|
||||
{"-//w3c//dtd html experimental 19960712//en" /* "-//W3C//DTD HTML Experimental 19960712//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
|
||||
{"-//w3c//dtd html experimental 970421//en" /* "-//W3C//DTD HTML Experimental 970421//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
|
||||
{"-//w3c//dtd w3 html//en" /* "-//W3C//DTD W3 HTML//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
|
||||
{"-//w3c//dtd xhtml 1.0 frameset//en" /* "-//W3C//DTD XHTML 1.0 Frameset//EN" */, PubIDInfo::eAlmostStandards, PubIDInfo::eAlmostStandards},
|
||||
{"-//w3c//dtd xhtml 1.0 transitional//en" /* "-//W3C//DTD XHTML 1.0 Transitional//EN" */, PubIDInfo::eAlmostStandards, PubIDInfo::eAlmostStandards},
|
||||
{"-//w3o//dtd w3 html 3.0//en" /* "-//W3O//DTD W3 HTML 3.0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
|
||||
{"-//w3o//dtd w3 html 3.0//en//" /* "-//W3O//DTD W3 HTML 3.0//EN//" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
|
||||
{"-//w3o//dtd w3 html strict 3.0//en//" /* "-//W3O//DTD W3 HTML Strict 3.0//EN//" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
|
||||
{"-//webtechs//dtd mozilla html 2.0//en" /* "-//WebTechs//DTD Mozilla HTML 2.0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
|
||||
{"-//webtechs//dtd mozilla html//en" /* "-//WebTechs//DTD Mozilla HTML//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
|
||||
{"-/w3c/dtd html 4.0 transitional/en" /* "-/W3C/DTD HTML 4.0 Transitional/EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
|
||||
{"html" /* "HTML" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
|
||||
};
|
||||
|
||||
#ifdef DEBUG
|
||||
static void
|
||||
VerifyPublicIDs()
|
||||
{
|
||||
static bool gVerified = false;
|
||||
if (!gVerified) {
|
||||
gVerified = true;
|
||||
uint32_t i;
|
||||
for (i = 0; i < ELEMENTS_OF(kPublicIDs) - 1; ++i) {
|
||||
if (nsCRT::strcmp(kPublicIDs[i].name, kPublicIDs[i+1].name) >= 0) {
|
||||
NS_NOTREACHED("doctypes out of order");
|
||||
printf("Doctypes %s and %s out of order.\n",
|
||||
kPublicIDs[i].name, kPublicIDs[i+1].name);
|
||||
}
|
||||
}
|
||||
for (i = 0; i < ELEMENTS_OF(kPublicIDs); ++i) {
|
||||
nsAutoCString lcPubID(kPublicIDs[i].name);
|
||||
ToLowerCase(lcPubID);
|
||||
if (nsCRT::strcmp(kPublicIDs[i].name, lcPubID.get()) != 0) {
|
||||
NS_NOTREACHED("doctype not lower case");
|
||||
printf("Doctype %s not lower case.\n", kPublicIDs[i].name);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
namespace {
|
||||
|
||||
struct PublicIdComparator
|
||||
{
|
||||
const nsAutoCString& mPublicId;
|
||||
explicit PublicIdComparator(const nsAutoCString& aPublicId)
|
||||
: mPublicId(aPublicId) {}
|
||||
int operator()(const PubIDInfo& aInfo) const {
|
||||
return nsCRT::strcmp(mPublicId.get(), aInfo.name);
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
static void
|
||||
DetermineHTMLParseMode(const nsString& aBuffer,
|
||||
nsDTDMode& aParseMode,
|
||||
eParserDocType& aDocType)
|
||||
{
|
||||
#ifdef DEBUG
|
||||
VerifyPublicIDs();
|
||||
#endif
|
||||
int32_t resultFlags;
|
||||
nsAutoString publicIDUCS2, sysIDUCS2;
|
||||
if (ParseDocTypeDecl(aBuffer, &resultFlags, publicIDUCS2, sysIDUCS2)) {
|
||||
if (!(resultFlags & PARSE_DTD_HAVE_DOCTYPE)) {
|
||||
// no DOCTYPE
|
||||
aParseMode = eDTDMode_quirks;
|
||||
aDocType = eHTML_Quirks;
|
||||
} else if ((resultFlags & PARSE_DTD_HAVE_INTERNAL_SUBSET) ||
|
||||
!(resultFlags & PARSE_DTD_HAVE_PUBLIC_ID)) {
|
||||
// A doctype with an internal subset is always full_standards.
|
||||
// A doctype without a public ID is always full_standards.
|
||||
aDocType = eHTML_Strict;
|
||||
aParseMode = eDTDMode_full_standards;
|
||||
|
||||
// Special hack for IBM's custom DOCTYPE.
|
||||
if (!(resultFlags & PARSE_DTD_HAVE_INTERNAL_SUBSET) &&
|
||||
sysIDUCS2.EqualsLiteral(
|
||||
"http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd")) {
|
||||
aParseMode = eDTDMode_quirks;
|
||||
aDocType = eHTML_Quirks;
|
||||
}
|
||||
|
||||
} else {
|
||||
// We have to check our list of public IDs to see what to do.
|
||||
// Yes, we want UCS2 to ASCII lossy conversion.
|
||||
nsAutoCString publicID;
|
||||
publicID.AssignWithConversion(publicIDUCS2);
|
||||
|
||||
// See comment above definition of kPublicIDs about case
|
||||
// sensitivity.
|
||||
ToLowerCase(publicID);
|
||||
|
||||
// Binary search to see if we can find the correct public ID.
|
||||
size_t index;
|
||||
bool found = BinarySearchIf(kPublicIDs, 0, ArrayLength(kPublicIDs),
|
||||
PublicIdComparator(publicID), &index);
|
||||
if (!found) {
|
||||
// The DOCTYPE is not in our list, so it must be full_standards.
|
||||
aParseMode = eDTDMode_full_standards;
|
||||
aDocType = eHTML_Strict;
|
||||
return;
|
||||
}
|
||||
|
||||
switch ((resultFlags & PARSE_DTD_HAVE_SYSTEM_ID)
|
||||
? kPublicIDs[index].mode_if_sysid
|
||||
: kPublicIDs[index].mode_if_no_sysid)
|
||||
{
|
||||
case PubIDInfo::eQuirks:
|
||||
aParseMode = eDTDMode_quirks;
|
||||
aDocType = eHTML_Quirks;
|
||||
break;
|
||||
case PubIDInfo::eAlmostStandards:
|
||||
aParseMode = eDTDMode_almost_standards;
|
||||
aDocType = eHTML_Strict;
|
||||
break;
|
||||
case PubIDInfo::eFullStandards:
|
||||
aParseMode = eDTDMode_full_standards;
|
||||
aDocType = eHTML_Strict;
|
||||
break;
|
||||
default:
|
||||
NS_NOTREACHED("no other cases!");
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// badly formed DOCTYPE -> quirks
|
||||
aParseMode = eDTDMode_quirks;
|
||||
aDocType = eHTML_Quirks;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
DetermineParseMode(const nsString& aBuffer, nsDTDMode& aParseMode,
|
||||
eParserDocType& aDocType, const nsACString& aMimeType)
|
||||
{
|
||||
if (aMimeType.EqualsLiteral(TEXT_HTML)) {
|
||||
DetermineHTMLParseMode(aBuffer, aParseMode, aDocType);
|
||||
} else if (nsContentUtils::IsPlainTextType(aMimeType)) {
|
||||
aDocType = ePlainText;
|
||||
aParseMode = eDTDMode_quirks;
|
||||
} else { // Some form of XML
|
||||
aDocType = eXML;
|
||||
aParseMode = eDTDMode_full_standards;
|
||||
}
|
||||
}
|
||||
|
||||
static nsIDTD*
|
||||
FindSuitableDTD(CParserContext& aParserContext)
|
||||
{
|
||||
@ -849,15 +441,14 @@ nsParser::WillBuildModel(nsString& aFilename)
|
||||
|
||||
if (eDTDMode_unknown == mParserContext->mDTDMode ||
|
||||
eDTDMode_autodetect == mParserContext->mDTDMode) {
|
||||
char16_t buf[1025];
|
||||
nsFixedString theBuffer(buf, 1024, 0);
|
||||
|
||||
// Grab 1024 characters, starting at the first non-whitespace
|
||||
// character, to look for the doctype in.
|
||||
mParserContext->mScanner->Peek(theBuffer, 1024, mParserContext->mScanner->FirstNonWhitespacePosition());
|
||||
DetermineParseMode(theBuffer, mParserContext->mDTDMode,
|
||||
mParserContext->mDocType, mParserContext->mMimeType);
|
||||
if (mIsAboutBlank) {
|
||||
mParserContext->mDTDMode = eDTDMode_quirks;
|
||||
mParserContext->mDocType = eHTML_Quirks;
|
||||
} else {
|
||||
mParserContext->mDTDMode = eDTDMode_full_standards;
|
||||
mParserContext->mDocType = eXML;
|
||||
}
|
||||
} // else XML fragment with nested parser context
|
||||
|
||||
NS_ASSERTION(!mDTD || !mParserContext->mPrevContext,
|
||||
"Clobbering DTD for non-root parser context!");
|
||||
|
@ -329,70 +329,6 @@ nsresult nsScanner::GetChar(char16_t& aChar) {
|
||||
return NS_OK;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* peek ahead to consume next char from scanner's internal
|
||||
* input buffer
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @param
|
||||
* @return
|
||||
*/
|
||||
nsresult nsScanner::Peek(char16_t& aChar, uint32_t aOffset) {
|
||||
aChar = 0;
|
||||
|
||||
if (!mSlidingBuffer || mCurrentPosition == mEndPosition) {
|
||||
return NS_ERROR_HTMLPARSER_EOF;
|
||||
}
|
||||
|
||||
if (aOffset > 0) {
|
||||
if (mCountRemaining <= aOffset)
|
||||
return NS_ERROR_HTMLPARSER_EOF;
|
||||
|
||||
nsScannerIterator pos = mCurrentPosition;
|
||||
pos.advance(aOffset);
|
||||
aChar=*pos;
|
||||
}
|
||||
else {
|
||||
aChar=*mCurrentPosition;
|
||||
}
|
||||
|
||||
return NS_OK;
|
||||
}
|
||||
|
||||
nsresult nsScanner::Peek(nsAString& aStr, int32_t aNumChars, int32_t aOffset)
|
||||
{
|
||||
if (!mSlidingBuffer || mCurrentPosition == mEndPosition) {
|
||||
return NS_ERROR_HTMLPARSER_EOF;
|
||||
}
|
||||
|
||||
nsScannerIterator start, end;
|
||||
|
||||
start = mCurrentPosition;
|
||||
|
||||
if ((int32_t)mCountRemaining <= aOffset) {
|
||||
return NS_ERROR_HTMLPARSER_EOF;
|
||||
}
|
||||
|
||||
if (aOffset > 0) {
|
||||
start.advance(aOffset);
|
||||
}
|
||||
|
||||
if (mCountRemaining < uint32_t(aNumChars + aOffset)) {
|
||||
end = mEndPosition;
|
||||
}
|
||||
else {
|
||||
end = start;
|
||||
end.advance(aNumChars);
|
||||
}
|
||||
|
||||
if (!CopyUnicodeTo(start, end, aStr)) {
|
||||
return NS_ERROR_OUT_OF_MEMORY;
|
||||
}
|
||||
|
||||
return NS_OK;
|
||||
}
|
||||
|
||||
void nsScanner::BindSubstring(nsScannerSubstring& aSubstring, const nsScannerIterator& aStart, const nsScannerIterator& aEnd)
|
||||
{
|
||||
aSubstring.Rebind(*mSlidingBuffer, aStart, aEnd);
|
||||
|
@ -60,18 +60,6 @@ class nsScanner {
|
||||
*/
|
||||
nsresult GetChar(char16_t& ch);
|
||||
|
||||
/**
|
||||
* peek ahead to consume next char from scanner's internal
|
||||
* input buffer
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @param ch is the char to accept new value
|
||||
* @return error code reflecting read status
|
||||
*/
|
||||
nsresult Peek(char16_t& ch, uint32_t aOffset=0);
|
||||
|
||||
nsresult Peek(nsAString& aStr, int32_t aNumChars, int32_t aOffset = 0);
|
||||
|
||||
/**
|
||||
* Records current offset position in input stream. This allows us
|
||||
* to back up to this point if the need should arise, such as when
|
||||
|
@ -6,6 +6,7 @@
|
||||
|
||||
#include <stdlib.h>
|
||||
#include "nsScannerString.h"
|
||||
#include "mozilla/CheckedInt.h"
|
||||
|
||||
|
||||
/**
|
||||
@ -472,7 +473,13 @@ CopyUnicodeTo( const nsScannerIterator& aSrcStart,
|
||||
nsAString& aDest )
|
||||
{
|
||||
nsAString::iterator writer;
|
||||
if (!aDest.SetLength(Distance(aSrcStart, aSrcEnd), mozilla::fallible)) {
|
||||
|
||||
mozilla::CheckedInt<nsAString::size_type> distance(Distance(aSrcStart, aSrcEnd));
|
||||
if (!distance.isValid()) {
|
||||
return false; // overflow detected
|
||||
}
|
||||
|
||||
if (!aDest.SetLength(distance.value(), mozilla::fallible)) {
|
||||
aDest.Truncate();
|
||||
return false; // out of memory
|
||||
}
|
||||
@ -505,8 +512,14 @@ AppendUnicodeTo( const nsScannerIterator& aSrcStart,
|
||||
nsAString& aDest )
|
||||
{
|
||||
nsAString::iterator writer;
|
||||
uint32_t oldLength = aDest.Length();
|
||||
if (!aDest.SetLength(oldLength + Distance(aSrcStart, aSrcEnd), mozilla::fallible))
|
||||
const nsAString::size_type oldLength = aDest.Length();
|
||||
CheckedInt<nsAString::size_type> newLen(Distance(aSrcStart, aSrcEnd));
|
||||
newLen += oldLength;
|
||||
if (!newLen.isValid()) {
|
||||
return false; // overflow detected
|
||||
}
|
||||
|
||||
if (!aDest.SetLength(newLen.value(), mozilla::fallible))
|
||||
return false; // out of memory
|
||||
aDest.BeginWriting(writer).advance(oldLength);
|
||||
nsScannerIterator fromBegin(aSrcStart);
|
||||
|
Loading…
Reference in New Issue
Block a user