mirror of
https://github.com/mozilla/gecko-dev.git
synced 2024-10-10 11:55:49 +00:00
Bug 582788 - Treat non-ASCII-superset encoding the same way in late meta handling as in meta prescan. r=bzbarsky, a=blocking2.0-betaN.
--HG-- extra : rebase_source : 53407afad2a7304d77c1faa3e43301db4fa84ff2
This commit is contained in:
parent
15b293df08
commit
ac49929194
@ -76,6 +76,9 @@ nsHtml5MetaScanner::sniff(nsHtml5ByteReadable* bytes, nsIUnicodeDecoder** decode
|
||||
PRBool
|
||||
nsHtml5MetaScanner::tryCharset(nsString* charset)
|
||||
{
|
||||
// This code needs to stay in sync with
|
||||
// nsHtml5StreamParser::internalEncodingDeclaration. Unfortunately, the
|
||||
// trickery with member fields here leads to some copy-paste reuse. :-(
|
||||
nsresult res = NS_OK;
|
||||
nsCOMPtr<nsICharsetConverterManager> convManager = do_GetService(NS_CHARSETCONVERTERMANAGER_CONTRACTID, &res);
|
||||
if (NS_FAILED(res)) {
|
||||
@ -85,12 +88,9 @@ nsHtml5MetaScanner::tryCharset(nsString* charset)
|
||||
nsCAutoString encoding;
|
||||
CopyUTF16toUTF8(*charset, encoding);
|
||||
// XXX spec says only UTF-16
|
||||
if (encoding.LowerCaseEqualsASCII("utf-16") ||
|
||||
encoding.LowerCaseEqualsASCII("utf-16be") ||
|
||||
encoding.LowerCaseEqualsASCII("utf-16le") ||
|
||||
encoding.LowerCaseEqualsASCII("utf-32") ||
|
||||
encoding.LowerCaseEqualsASCII("utf-32be") ||
|
||||
encoding.LowerCaseEqualsASCII("utf-32le")) {
|
||||
if (encoding.LowerCaseEqualsLiteral("utf-16") ||
|
||||
encoding.LowerCaseEqualsLiteral("utf-16be") ||
|
||||
encoding.LowerCaseEqualsLiteral("utf-16le")) {
|
||||
mCharset.Assign("UTF-8");
|
||||
res = convManager->GetUnicodeDecoderRaw(mCharset.get(), getter_AddRefs(mUnicodeDecoder));
|
||||
if (NS_FAILED(res)) {
|
||||
@ -109,17 +109,17 @@ nsHtml5MetaScanner::tryCharset(nsString* charset)
|
||||
if (NS_FAILED(res)) {
|
||||
return PR_FALSE;
|
||||
}
|
||||
if (preferred.LowerCaseEqualsASCII("utf-16") ||
|
||||
preferred.LowerCaseEqualsASCII("utf-16be") ||
|
||||
preferred.LowerCaseEqualsASCII("utf-16le") ||
|
||||
preferred.LowerCaseEqualsASCII("utf-32") ||
|
||||
preferred.LowerCaseEqualsASCII("utf-32be") ||
|
||||
preferred.LowerCaseEqualsASCII("utf-32le") ||
|
||||
preferred.LowerCaseEqualsASCII("utf-7") ||
|
||||
preferred.LowerCaseEqualsASCII("jis_x0212-1990") ||
|
||||
preferred.LowerCaseEqualsASCII("x-jis0208") ||
|
||||
preferred.LowerCaseEqualsASCII("x-imap4-modified-utf7") ||
|
||||
preferred.LowerCaseEqualsASCII("x-user-defined")) {
|
||||
if (preferred.LowerCaseEqualsLiteral("utf-16") ||
|
||||
preferred.LowerCaseEqualsLiteral("utf-16be") ||
|
||||
preferred.LowerCaseEqualsLiteral("utf-16le") ||
|
||||
preferred.LowerCaseEqualsLiteral("utf-32") ||
|
||||
preferred.LowerCaseEqualsLiteral("utf-32be") ||
|
||||
preferred.LowerCaseEqualsLiteral("utf-32le") ||
|
||||
preferred.LowerCaseEqualsLiteral("utf-7") ||
|
||||
preferred.LowerCaseEqualsLiteral("jis_x0212-1990") ||
|
||||
preferred.LowerCaseEqualsLiteral("x-jis0208") ||
|
||||
preferred.LowerCaseEqualsLiteral("x-imap4-modified-utf7") ||
|
||||
preferred.LowerCaseEqualsLiteral("x-user-defined")) {
|
||||
return PR_FALSE;
|
||||
}
|
||||
res = convManager->GetUnicodeDecoderRaw(preferred.get(), getter_AddRefs(mUnicodeDecoder));
|
||||
|
@ -749,6 +749,9 @@ nsHtml5StreamParser::OnDataAvailable(nsIRequest* aRequest,
|
||||
void
|
||||
nsHtml5StreamParser::internalEncodingDeclaration(nsString* aEncoding)
|
||||
{
|
||||
// This code needs to stay in sync with
|
||||
// nsHtml5MetaScanner::tryCharset. Unfortunately, the
|
||||
// trickery with member fields there leads to some copy-paste reuse. :-(
|
||||
NS_ASSERTION(IsParserThread(), "Wrong thread!");
|
||||
if (mCharsetSource >= kCharsetFromMetaTag) { // this threshold corresponds to "confident" in the HTML5 spec
|
||||
return;
|
||||
@ -758,14 +761,21 @@ nsHtml5StreamParser::internalEncodingDeclaration(nsString* aEncoding)
|
||||
return; // not reparsing even if we wanted to
|
||||
}
|
||||
|
||||
nsCAutoString newEncoding;
|
||||
CopyUTF16toUTF8(*aEncoding, newEncoding);
|
||||
// XXX spec says only UTF-16
|
||||
if (newEncoding.LowerCaseEqualsLiteral("utf-16") ||
|
||||
newEncoding.LowerCaseEqualsLiteral("utf-16be") ||
|
||||
newEncoding.LowerCaseEqualsLiteral("utf-16le")) {
|
||||
newEncoding.Assign("UTF-8");
|
||||
}
|
||||
|
||||
nsresult rv = NS_OK;
|
||||
nsCOMPtr<nsICharsetAlias> calias(do_GetService(kCharsetAliasCID, &rv));
|
||||
if (NS_FAILED(rv)) {
|
||||
NS_NOTREACHED("Charset alias service not available.");
|
||||
return;
|
||||
}
|
||||
nsCAutoString newEncoding;
|
||||
CopyUTF16toUTF8(*aEncoding, newEncoding);
|
||||
PRBool eq;
|
||||
rv = calias->Equals(newEncoding, mCharset, &eq);
|
||||
if (NS_FAILED(rv)) {
|
||||
@ -787,6 +797,21 @@ nsHtml5StreamParser::internalEncodingDeclaration(nsString* aEncoding)
|
||||
return;
|
||||
}
|
||||
|
||||
if (preferred.LowerCaseEqualsLiteral("utf-16") ||
|
||||
preferred.LowerCaseEqualsLiteral("utf-16be") ||
|
||||
preferred.LowerCaseEqualsLiteral("utf-16le") ||
|
||||
preferred.LowerCaseEqualsLiteral("utf-32") ||
|
||||
preferred.LowerCaseEqualsLiteral("utf-32be") ||
|
||||
preferred.LowerCaseEqualsLiteral("utf-32le") ||
|
||||
preferred.LowerCaseEqualsLiteral("utf-7") ||
|
||||
preferred.LowerCaseEqualsLiteral("jis_x0212-1990") ||
|
||||
preferred.LowerCaseEqualsLiteral("x-jis0208") ||
|
||||
preferred.LowerCaseEqualsLiteral("x-imap4-modified-utf7") ||
|
||||
preferred.LowerCaseEqualsLiteral("x-user-defined")) {
|
||||
// Not a rough ASCII superset
|
||||
return;
|
||||
}
|
||||
|
||||
mTreeBuilder->NeedsCharsetSwitchTo(preferred);
|
||||
FlushTreeOpsAndDisarmTimer();
|
||||
Interrupt();
|
||||
|
11
parser/htmlparser/tests/reftest/bug582788-1-ref.html
Normal file
11
parser/htmlparser/tests/reftest/bug582788-1-ref.html
Normal file
@ -0,0 +1,11 @@
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<meta http-equiv="content-type" content="text/html; charset=utf-8">
|
||||
<title>Not ISO-10646</title>
|
||||
</head>
|
||||
<body>
|
||||
<p>Not ISO-10646</p>
|
||||
</body>
|
||||
</html>
|
||||
|
11
parser/htmlparser/tests/reftest/bug582788-1.html
Normal file
11
parser/htmlparser/tests/reftest/bug582788-1.html
Normal file
@ -0,0 +1,11 @@
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<meta http-equiv="content-type" content="text/html; charset=iso-10646">
|
||||
<title>Not ISO-10646</title>
|
||||
</head>
|
||||
<body>
|
||||
<p>Not ISO-10646</p>
|
||||
</body>
|
||||
</html>
|
||||
|
@ -1,2 +1,2 @@
|
||||
== bug566280-1.html bug566280-1-ref.html
|
||||
|
||||
== bug582788-1.html bug582788-1-ref.html
|
||||
|
Loading…
Reference in New Issue
Block a user