diff --git a/content/html/document/src/nsHTMLDocument.cpp b/content/html/document/src/nsHTMLDocument.cpp
index a929aa51307d..769262fb95e6 100644
--- a/content/html/document/src/nsHTMLDocument.cpp
+++ b/content/html/document/src/nsHTMLDocument.cpp
@@ -313,11 +313,7 @@ nsHTMLDocument::CreateShell(nsPresContext* aContext,
aInstancePtrResult);
}
-// The following Try*Charset will return false only if the charset source
-// should be considered (ie. aCharsetSource < thisCharsetSource) but we failed
-// to get the charset from this source.
-
-bool
+void
nsHTMLDocument::TryHintCharset(nsIMarkupDocumentViewer* aMarkupDV,
int32_t& aCharsetSource, nsACString& aCharset)
{
@@ -331,17 +327,17 @@ nsHTMLDocument::TryHintCharset(nsIMarkupDocumentViewer* aMarkupDV,
aMarkupDV->SetHintCharacterSetSource((int32_t)(kCharsetUninitialized));
if(requestCharsetSource <= aCharsetSource)
- return true;
+ return;
- if(NS_SUCCEEDED(rv)) {
+ if(NS_SUCCEEDED(rv) && IsAsciiCompatible(requestCharset)) {
aCharsetSource = requestCharsetSource;
aCharset = requestCharset;
- return true;
+ return;
}
}
}
- return false;
+ return;
}
@@ -361,6 +357,8 @@ nsHTMLDocument::TryUserForcedCharset(nsIMarkupDocumentViewer* aMarkupDV,
rv = aMarkupDV->GetForceCharacterSet(forceCharsetFromDocShell);
}
+ // Not making the IsAsciiCompatible() check here to allow the user to
+ // force UTF-16 from the menu.
if(NS_SUCCEEDED(rv) && !forceCharsetFromDocShell.IsEmpty()) {
aCharset = forceCharsetFromDocShell;
//TODO: we should define appropriate constant for force charset
@@ -392,7 +390,12 @@ nsHTMLDocument::TryCacheCharset(nsICachingChannel* aCachingChannel,
nsCString cachedCharset;
rv = aCachingChannel->GetCacheTokenCachedCharset(cachedCharset);
- if (NS_SUCCEEDED(rv) && !cachedCharset.IsEmpty())
+ // Check IsAsciiCompatible() even in the cache case, because the value
+ // might be stale and in the case of a stale charset that is not a rough
+ // ASCII superset, the parser has no way to recover.
+ if (NS_SUCCEEDED(rv) &&
+ !cachedCharset.IsEmpty() &&
+ IsAsciiCompatible(cachedCharset))
{
aCharset = cachedCharset;
aCharsetSource = kCharsetFromCache;
@@ -417,69 +420,87 @@ CheckSameOrigin(nsINode* aNode1, nsINode* aNode2)
}
bool
+nsHTMLDocument::IsAsciiCompatible(const nsACString& aPreferredName)
+{
+ return !(aPreferredName.LowerCaseEqualsLiteral("utf-16") ||
+ aPreferredName.LowerCaseEqualsLiteral("utf-16be") ||
+ aPreferredName.LowerCaseEqualsLiteral("utf-16le") ||
+ aPreferredName.LowerCaseEqualsLiteral("utf-7") ||
+ aPreferredName.LowerCaseEqualsLiteral("x-imap4-modified-utf7"));
+}
+
+void
nsHTMLDocument::TryParentCharset(nsIDocShell* aDocShell,
nsIDocument* aParentDocument,
int32_t& aCharsetSource,
nsACString& aCharset)
{
- if (aDocShell) {
- int32_t source;
- nsCOMPtr csAtom;
- int32_t parentSource;
- aDocShell->GetParentCharsetSource(&parentSource);
- if (kCharsetFromParentForced <= parentSource)
- source = kCharsetFromParentForced;
- else if (kCharsetFromHintPrevDoc == parentSource) {
- // Make sure that's OK
- if (!aParentDocument || !CheckSameOrigin(this, aParentDocument)) {
- return false;
- }
-
- // if parent is posted doc, set this prevent autodections
- // I'm not sure this makes much sense... but whatever.
- source = kCharsetFromHintPrevDoc;
- }
- else if (kCharsetFromCache <= parentSource) {
- // Make sure that's OK
- if (!aParentDocument || !CheckSameOrigin(this, aParentDocument)) {
- return false;
- }
-
- source = kCharsetFromParentFrame;
- }
- else
- return false;
-
- if (source < aCharsetSource)
- return true;
-
- aDocShell->GetParentCharset(getter_AddRefs(csAtom));
- if (csAtom) {
- csAtom->ToUTF8String(aCharset);
- aCharsetSource = source;
- return true;
- }
+ if (!aDocShell) {
+ return;
}
- return false;
+ int32_t source;
+ nsCOMPtr csAtom;
+ int32_t parentSource;
+ nsAutoCString parentCharset;
+ aDocShell->GetParentCharset(getter_AddRefs(csAtom));
+ if (!csAtom) {
+ return;
+ }
+ aDocShell->GetParentCharsetSource(&parentSource);
+ csAtom->ToUTF8String(parentCharset);
+ if (kCharsetFromParentForced <= parentSource) {
+ source = kCharsetFromParentForced;
+ } else if (kCharsetFromHintPrevDoc == parentSource) {
+ // Make sure that's OK
+ if (!aParentDocument ||
+ !CheckSameOrigin(this, aParentDocument) ||
+ !IsAsciiCompatible(parentCharset)) {
+ return;
+ }
+
+ // if parent is posted doc, set this prevent autodetections
+ // I'm not sure this makes much sense... but whatever.
+ source = kCharsetFromHintPrevDoc;
+ } else if (kCharsetFromCache <= parentSource) {
+ // Make sure that's OK
+ if (!aParentDocument ||
+ !CheckSameOrigin(this, aParentDocument) ||
+ !IsAsciiCompatible(parentCharset)) {
+ return;
+ }
+
+ source = kCharsetFromParentFrame;
+ } else {
+ return;
+ }
+
+ if (source < aCharsetSource) {
+ return;
+ }
+
+ aCharset.Assign(parentCharset);
+ aCharsetSource = source;
}
-bool
+void
nsHTMLDocument::UseWeakDocTypeDefault(int32_t& aCharsetSource,
nsACString& aCharset)
{
if (kCharsetFromWeakDocTypeDefault <= aCharsetSource)
- return true;
- // fallback value in case docshell return error
- aCharset.AssignLiteral("ISO-8859-1");
+ return;
const nsAdoptingCString& defCharset =
Preferences::GetLocalizedCString("intl.charset.default");
- if (!defCharset.IsEmpty()) {
+ // Don't let the user break things by setting intl.charset.default to
+ // not a rough ASCII superset
+ if (!defCharset.IsEmpty() && IsAsciiCompatible(defCharset)) {
aCharset = defCharset;
- aCharsetSource = kCharsetFromWeakDocTypeDefault;
+ } else {
+ aCharset.AssignLiteral("ISO-8859-1");
}
- return true;
+ aCharsetSource = kCharsetFromWeakDocTypeDefault;
+ return;
}
bool
@@ -494,6 +515,8 @@ nsHTMLDocument::TryDefaultCharset( nsIMarkupDocumentViewer* aMarkupDV,
if (aMarkupDV) {
nsresult rv =
aMarkupDV->GetDefaultCharacterSet(defaultCharsetFromDocShell);
+ // Not making the IsAsciiCompatible() check here to allow the user to
+ // force UTF-16 from the menu.
if(NS_SUCCEEDED(rv)) {
aCharset = defaultCharsetFromDocShell;
diff --git a/content/html/document/src/nsHTMLDocument.h b/content/html/document/src/nsHTMLDocument.h
index a2e58e7c8e07..17d97ffcae3d 100644
--- a/content/html/document/src/nsHTMLDocument.h
+++ b/content/html/document/src/nsHTMLDocument.h
@@ -218,7 +218,9 @@ protected:
static uint32_t gWyciwygSessionCnt;
- static bool TryHintCharset(nsIMarkupDocumentViewer* aMarkupDV,
+ static bool IsAsciiCompatible(const nsACString& aPreferredName);
+
+ static void TryHintCharset(nsIMarkupDocumentViewer* aMarkupDV,
int32_t& aCharsetSource,
nsACString& aCharset);
static bool TryUserForcedCharset(nsIMarkupDocumentViewer* aMarkupDV,
@@ -229,10 +231,10 @@ protected:
int32_t& aCharsetSource,
nsACString& aCharset);
// aParentDocument could be null.
- bool TryParentCharset(nsIDocShell* aDocShell,
+ void TryParentCharset(nsIDocShell* aDocShell,
nsIDocument* aParentDocument,
int32_t& charsetSource, nsACString& aCharset);
- static bool UseWeakDocTypeDefault(int32_t& aCharsetSource,
+ static void UseWeakDocTypeDefault(int32_t& aCharsetSource,
nsACString& aCharset);
static bool TryDefaultCharset(nsIMarkupDocumentViewer* aMarkupDV,
int32_t& aCharsetSource,
diff --git a/parser/html/nsHtml5MetaScannerCppSupplement.h b/parser/html/nsHtml5MetaScannerCppSupplement.h
index c6b6376432e4..402e0216ccf5 100644
--- a/parser/html/nsHtml5MetaScannerCppSupplement.h
+++ b/parser/html/nsHtml5MetaScannerCppSupplement.h
@@ -56,10 +56,7 @@ nsHtml5MetaScanner::tryCharset(nsString* charset)
preferred.LowerCaseEqualsLiteral("utf-16be") ||
preferred.LowerCaseEqualsLiteral("utf-16le") ||
preferred.LowerCaseEqualsLiteral("utf-7") ||
- preferred.LowerCaseEqualsLiteral("jis_x0212-1990") ||
- preferred.LowerCaseEqualsLiteral("x-jis0208") ||
- preferred.LowerCaseEqualsLiteral("x-imap4-modified-utf7") ||
- preferred.LowerCaseEqualsLiteral("x-user-defined")) {
+ preferred.LowerCaseEqualsLiteral("x-imap4-modified-utf7")) {
return false;
}
res = convManager->GetUnicodeDecoderRaw(preferred.get(), getter_AddRefs(mUnicodeDecoder));
diff --git a/parser/html/nsHtml5StreamParser.cpp b/parser/html/nsHtml5StreamParser.cpp
index c8671bcdde58..1ac7f733bcf2 100644
--- a/parser/html/nsHtml5StreamParser.cpp
+++ b/parser/html/nsHtml5StreamParser.cpp
@@ -1213,10 +1213,7 @@ nsHtml5StreamParser::PreferredForInternalEncodingDecl(nsACString& aEncoding)
preferred.LowerCaseEqualsLiteral("utf-16be") ||
preferred.LowerCaseEqualsLiteral("utf-16le") ||
preferred.LowerCaseEqualsLiteral("utf-7") ||
- preferred.LowerCaseEqualsLiteral("jis_x0212-1990") ||
- preferred.LowerCaseEqualsLiteral("x-jis0208") ||
- preferred.LowerCaseEqualsLiteral("x-imap4-modified-utf7") ||
- preferred.LowerCaseEqualsLiteral("x-user-defined")) {
+ preferred.LowerCaseEqualsLiteral("x-imap4-modified-utf7")) {
// Not a rough ASCII superset
mTreeBuilder->MaybeComplainAboutCharset("EncMetaNonRoughSuperset",
true,
diff --git a/parser/htmlparser/tests/reftest/bug599320-1-ref.html b/parser/htmlparser/tests/reftest/bug599320-1-ref.html
new file mode 100644
index 000000000000..bb48fe5d256b
--- /dev/null
+++ b/parser/htmlparser/tests/reftest/bug599320-1-ref.html
@@ -0,0 +1,17 @@
+
+
+
+
+
+UTF-16 doc
+
+
+UTF-16 doc
+
+Euro sign: €
+iframe:
+
+
+
+
+
diff --git a/parser/htmlparser/tests/reftest/bug599320-1.html b/parser/htmlparser/tests/reftest/bug599320-1.html
new file mode 100644
index 000000000000..590e9126c3fa
Binary files /dev/null and b/parser/htmlparser/tests/reftest/bug599320-1.html differ
diff --git a/parser/htmlparser/tests/reftest/frame599320-1-ref.html b/parser/htmlparser/tests/reftest/frame599320-1-ref.html
new file mode 100644
index 000000000000..735c368f8b53
--- /dev/null
+++ b/parser/htmlparser/tests/reftest/frame599320-1-ref.html
@@ -0,0 +1,15 @@
+
+
+
+
+
+Non-UTF-16 doc
+
+
+Non-UTF-16 doc
+
+Euro sign: €
+
+
+
+
diff --git a/parser/htmlparser/tests/reftest/frame599320-1.html b/parser/htmlparser/tests/reftest/frame599320-1.html
new file mode 100644
index 000000000000..2b5b3383690e
--- /dev/null
+++ b/parser/htmlparser/tests/reftest/frame599320-1.html
@@ -0,0 +1,1092 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Non-UTF-16 doc
+
+
+Non-UTF-16 doc
+
+Euro sign: €
+
+
+
+
diff --git a/parser/htmlparser/tests/reftest/reftest.list b/parser/htmlparser/tests/reftest/reftest.list
index cd378313cbf1..d4015f890bd8 100644
--- a/parser/htmlparser/tests/reftest/reftest.list
+++ b/parser/htmlparser/tests/reftest/reftest.list
@@ -5,6 +5,7 @@
== bug582788-1.html bug582788-1-ref.html
== bug582940-1.html bug582940-1-ref.html
== bug592656-1.html bug592656-1-ref.html
+== bug599320-1.html bug599320-1-ref.html
== bug608373-1.html bug608373-1-ref.html
fails-if(/^Windows\x20NT\x206\.1/.test(http.oscpu)&&!layersGPUAccelerated&&!azureSkia) == view-source:bug482921-1.html bug482921-1-ref.html # bug 703201
== view-source:bug482921-2.xhtml bug482921-2-ref.html