Bug 582788 - Treat non-ASCII-superset encoding the same way in late meta handling as in meta prescan. r=bzbarsky, a=blocking2.0-betaN.

--HG--
extra : rebase_source : 53407afad2a7304d77c1faa3e43301db4fa84ff2
This commit is contained in:
Henri Sivonen 2010-07-30 13:03:54 +03:00
parent 15b293df08
commit ac49929194
5 changed files with 67 additions and 20 deletions

View File

@ -76,6 +76,9 @@ nsHtml5MetaScanner::sniff(nsHtml5ByteReadable* bytes, nsIUnicodeDecoder** decode
PRBool
nsHtml5MetaScanner::tryCharset(nsString* charset)
{
// This code needs to stay in sync with
// nsHtml5StreamParser::internalEncodingDeclaration. Unfortunately, the
// trickery with member fields here leads to some copy-paste reuse. :-(
nsresult res = NS_OK;
nsCOMPtr<nsICharsetConverterManager> convManager = do_GetService(NS_CHARSETCONVERTERMANAGER_CONTRACTID, &res);
if (NS_FAILED(res)) {
@ -85,12 +88,9 @@ nsHtml5MetaScanner::tryCharset(nsString* charset)
nsCAutoString encoding;
CopyUTF16toUTF8(*charset, encoding);
// XXX spec says only UTF-16
if (encoding.LowerCaseEqualsASCII("utf-16") ||
encoding.LowerCaseEqualsASCII("utf-16be") ||
encoding.LowerCaseEqualsASCII("utf-16le") ||
encoding.LowerCaseEqualsASCII("utf-32") ||
encoding.LowerCaseEqualsASCII("utf-32be") ||
encoding.LowerCaseEqualsASCII("utf-32le")) {
if (encoding.LowerCaseEqualsLiteral("utf-16") ||
encoding.LowerCaseEqualsLiteral("utf-16be") ||
encoding.LowerCaseEqualsLiteral("utf-16le")) {
mCharset.Assign("UTF-8");
res = convManager->GetUnicodeDecoderRaw(mCharset.get(), getter_AddRefs(mUnicodeDecoder));
if (NS_FAILED(res)) {
@ -109,17 +109,17 @@ nsHtml5MetaScanner::tryCharset(nsString* charset)
if (NS_FAILED(res)) {
return PR_FALSE;
}
if (preferred.LowerCaseEqualsASCII("utf-16") ||
preferred.LowerCaseEqualsASCII("utf-16be") ||
preferred.LowerCaseEqualsASCII("utf-16le") ||
preferred.LowerCaseEqualsASCII("utf-32") ||
preferred.LowerCaseEqualsASCII("utf-32be") ||
preferred.LowerCaseEqualsASCII("utf-32le") ||
preferred.LowerCaseEqualsASCII("utf-7") ||
preferred.LowerCaseEqualsASCII("jis_x0212-1990") ||
preferred.LowerCaseEqualsASCII("x-jis0208") ||
preferred.LowerCaseEqualsASCII("x-imap4-modified-utf7") ||
preferred.LowerCaseEqualsASCII("x-user-defined")) {
if (preferred.LowerCaseEqualsLiteral("utf-16") ||
preferred.LowerCaseEqualsLiteral("utf-16be") ||
preferred.LowerCaseEqualsLiteral("utf-16le") ||
preferred.LowerCaseEqualsLiteral("utf-32") ||
preferred.LowerCaseEqualsLiteral("utf-32be") ||
preferred.LowerCaseEqualsLiteral("utf-32le") ||
preferred.LowerCaseEqualsLiteral("utf-7") ||
preferred.LowerCaseEqualsLiteral("jis_x0212-1990") ||
preferred.LowerCaseEqualsLiteral("x-jis0208") ||
preferred.LowerCaseEqualsLiteral("x-imap4-modified-utf7") ||
preferred.LowerCaseEqualsLiteral("x-user-defined")) {
return PR_FALSE;
}
res = convManager->GetUnicodeDecoderRaw(preferred.get(), getter_AddRefs(mUnicodeDecoder));

View File

@ -749,6 +749,9 @@ nsHtml5StreamParser::OnDataAvailable(nsIRequest* aRequest,
void
nsHtml5StreamParser::internalEncodingDeclaration(nsString* aEncoding)
{
// This code needs to stay in sync with
// nsHtml5MetaScanner::tryCharset. Unfortunately, the
// trickery with member fields there leads to some copy-paste reuse. :-(
NS_ASSERTION(IsParserThread(), "Wrong thread!");
if (mCharsetSource >= kCharsetFromMetaTag) { // this threshold corresponds to "confident" in the HTML5 spec
return;
@ -758,14 +761,21 @@ nsHtml5StreamParser::internalEncodingDeclaration(nsString* aEncoding)
return; // not reparsing even if we wanted to
}
nsCAutoString newEncoding;
CopyUTF16toUTF8(*aEncoding, newEncoding);
// XXX spec says only UTF-16
if (newEncoding.LowerCaseEqualsLiteral("utf-16") ||
newEncoding.LowerCaseEqualsLiteral("utf-16be") ||
newEncoding.LowerCaseEqualsLiteral("utf-16le")) {
newEncoding.Assign("UTF-8");
}
nsresult rv = NS_OK;
nsCOMPtr<nsICharsetAlias> calias(do_GetService(kCharsetAliasCID, &rv));
if (NS_FAILED(rv)) {
NS_NOTREACHED("Charset alias service not available.");
return;
}
nsCAutoString newEncoding;
CopyUTF16toUTF8(*aEncoding, newEncoding);
PRBool eq;
rv = calias->Equals(newEncoding, mCharset, &eq);
if (NS_FAILED(rv)) {
@ -787,6 +797,21 @@ nsHtml5StreamParser::internalEncodingDeclaration(nsString* aEncoding)
return;
}
if (preferred.LowerCaseEqualsLiteral("utf-16") ||
preferred.LowerCaseEqualsLiteral("utf-16be") ||
preferred.LowerCaseEqualsLiteral("utf-16le") ||
preferred.LowerCaseEqualsLiteral("utf-32") ||
preferred.LowerCaseEqualsLiteral("utf-32be") ||
preferred.LowerCaseEqualsLiteral("utf-32le") ||
preferred.LowerCaseEqualsLiteral("utf-7") ||
preferred.LowerCaseEqualsLiteral("jis_x0212-1990") ||
preferred.LowerCaseEqualsLiteral("x-jis0208") ||
preferred.LowerCaseEqualsLiteral("x-imap4-modified-utf7") ||
preferred.LowerCaseEqualsLiteral("x-user-defined")) {
// Not a rough ASCII superset
return;
}
mTreeBuilder->NeedsCharsetSwitchTo(preferred);
FlushTreeOpsAndDisarmTimer();
Interrupt();

View File

@ -0,0 +1,11 @@
<!DOCTYPE html>
<html>
<head>
<meta http-equiv="content-type" content="text/html; charset=utf-8">
<title>Not ISO-10646</title>
</head>
<body>
<p>Not ISO-10646</p>
</body>
</html>

View File

@ -0,0 +1,11 @@
<!DOCTYPE html>
<html>
<head>
<meta http-equiv="content-type" content="text/html; charset=iso-10646">
<title>Not ISO-10646</title>
</head>
<body>
<p>Not ISO-10646</p>
</body>
</html>

View File

@ -1,2 +1,2 @@
== bug566280-1.html bug566280-1-ref.html
== bug582788-1.html bug582788-1-ref.html