diff --git a/content/html/document/src/nsHTMLDocument.cpp b/content/html/document/src/nsHTMLDocument.cpp index a929aa51307d..769262fb95e6 100644 --- a/content/html/document/src/nsHTMLDocument.cpp +++ b/content/html/document/src/nsHTMLDocument.cpp @@ -313,11 +313,7 @@ nsHTMLDocument::CreateShell(nsPresContext* aContext, aInstancePtrResult); } -// The following Try*Charset will return false only if the charset source -// should be considered (ie. aCharsetSource < thisCharsetSource) but we failed -// to get the charset from this source. - -bool +void nsHTMLDocument::TryHintCharset(nsIMarkupDocumentViewer* aMarkupDV, int32_t& aCharsetSource, nsACString& aCharset) { @@ -331,17 +327,17 @@ nsHTMLDocument::TryHintCharset(nsIMarkupDocumentViewer* aMarkupDV, aMarkupDV->SetHintCharacterSetSource((int32_t)(kCharsetUninitialized)); if(requestCharsetSource <= aCharsetSource) - return true; + return; - if(NS_SUCCEEDED(rv)) { + if(NS_SUCCEEDED(rv) && IsAsciiCompatible(requestCharset)) { aCharsetSource = requestCharsetSource; aCharset = requestCharset; - return true; + return; } } } - return false; + return; } @@ -361,6 +357,8 @@ nsHTMLDocument::TryUserForcedCharset(nsIMarkupDocumentViewer* aMarkupDV, rv = aMarkupDV->GetForceCharacterSet(forceCharsetFromDocShell); } + // Not making the IsAsciiCompatible() check here to allow the user to + // force UTF-16 from the menu. if(NS_SUCCEEDED(rv) && !forceCharsetFromDocShell.IsEmpty()) { aCharset = forceCharsetFromDocShell; //TODO: we should define appropriate constant for force charset @@ -392,7 +390,12 @@ nsHTMLDocument::TryCacheCharset(nsICachingChannel* aCachingChannel, nsCString cachedCharset; rv = aCachingChannel->GetCacheTokenCachedCharset(cachedCharset); - if (NS_SUCCEEDED(rv) && !cachedCharset.IsEmpty()) + // Check IsAsciiCompatible() even in the cache case, because the value + // might be stale and in the case of a stale charset that is not a rough + // ASCII superset, the parser has no way to recover. + if (NS_SUCCEEDED(rv) && + !cachedCharset.IsEmpty() && + IsAsciiCompatible(cachedCharset)) { aCharset = cachedCharset; aCharsetSource = kCharsetFromCache; @@ -417,69 +420,87 @@ CheckSameOrigin(nsINode* aNode1, nsINode* aNode2) } bool +nsHTMLDocument::IsAsciiCompatible(const nsACString& aPreferredName) +{ + return !(aPreferredName.LowerCaseEqualsLiteral("utf-16") || + aPreferredName.LowerCaseEqualsLiteral("utf-16be") || + aPreferredName.LowerCaseEqualsLiteral("utf-16le") || + aPreferredName.LowerCaseEqualsLiteral("utf-7") || + aPreferredName.LowerCaseEqualsLiteral("x-imap4-modified-utf7")); +} + +void nsHTMLDocument::TryParentCharset(nsIDocShell* aDocShell, nsIDocument* aParentDocument, int32_t& aCharsetSource, nsACString& aCharset) { - if (aDocShell) { - int32_t source; - nsCOMPtr csAtom; - int32_t parentSource; - aDocShell->GetParentCharsetSource(&parentSource); - if (kCharsetFromParentForced <= parentSource) - source = kCharsetFromParentForced; - else if (kCharsetFromHintPrevDoc == parentSource) { - // Make sure that's OK - if (!aParentDocument || !CheckSameOrigin(this, aParentDocument)) { - return false; - } - - // if parent is posted doc, set this prevent autodections - // I'm not sure this makes much sense... but whatever. - source = kCharsetFromHintPrevDoc; - } - else if (kCharsetFromCache <= parentSource) { - // Make sure that's OK - if (!aParentDocument || !CheckSameOrigin(this, aParentDocument)) { - return false; - } - - source = kCharsetFromParentFrame; - } - else - return false; - - if (source < aCharsetSource) - return true; - - aDocShell->GetParentCharset(getter_AddRefs(csAtom)); - if (csAtom) { - csAtom->ToUTF8String(aCharset); - aCharsetSource = source; - return true; - } + if (!aDocShell) { + return; } - return false; + int32_t source; + nsCOMPtr csAtom; + int32_t parentSource; + nsAutoCString parentCharset; + aDocShell->GetParentCharset(getter_AddRefs(csAtom)); + if (!csAtom) { + return; + } + aDocShell->GetParentCharsetSource(&parentSource); + csAtom->ToUTF8String(parentCharset); + if (kCharsetFromParentForced <= parentSource) { + source = kCharsetFromParentForced; + } else if (kCharsetFromHintPrevDoc == parentSource) { + // Make sure that's OK + if (!aParentDocument || + !CheckSameOrigin(this, aParentDocument) || + !IsAsciiCompatible(parentCharset)) { + return; + } + + // if parent is posted doc, set this prevent autodetections + // I'm not sure this makes much sense... but whatever. + source = kCharsetFromHintPrevDoc; + } else if (kCharsetFromCache <= parentSource) { + // Make sure that's OK + if (!aParentDocument || + !CheckSameOrigin(this, aParentDocument) || + !IsAsciiCompatible(parentCharset)) { + return; + } + + source = kCharsetFromParentFrame; + } else { + return; + } + + if (source < aCharsetSource) { + return; + } + + aCharset.Assign(parentCharset); + aCharsetSource = source; } -bool +void nsHTMLDocument::UseWeakDocTypeDefault(int32_t& aCharsetSource, nsACString& aCharset) { if (kCharsetFromWeakDocTypeDefault <= aCharsetSource) - return true; - // fallback value in case docshell return error - aCharset.AssignLiteral("ISO-8859-1"); + return; const nsAdoptingCString& defCharset = Preferences::GetLocalizedCString("intl.charset.default"); - if (!defCharset.IsEmpty()) { + // Don't let the user break things by setting intl.charset.default to + // not a rough ASCII superset + if (!defCharset.IsEmpty() && IsAsciiCompatible(defCharset)) { aCharset = defCharset; - aCharsetSource = kCharsetFromWeakDocTypeDefault; + } else { + aCharset.AssignLiteral("ISO-8859-1"); } - return true; + aCharsetSource = kCharsetFromWeakDocTypeDefault; + return; } bool @@ -494,6 +515,8 @@ nsHTMLDocument::TryDefaultCharset( nsIMarkupDocumentViewer* aMarkupDV, if (aMarkupDV) { nsresult rv = aMarkupDV->GetDefaultCharacterSet(defaultCharsetFromDocShell); + // Not making the IsAsciiCompatible() check here to allow the user to + // force UTF-16 from the menu. if(NS_SUCCEEDED(rv)) { aCharset = defaultCharsetFromDocShell; diff --git a/content/html/document/src/nsHTMLDocument.h b/content/html/document/src/nsHTMLDocument.h index a2e58e7c8e07..17d97ffcae3d 100644 --- a/content/html/document/src/nsHTMLDocument.h +++ b/content/html/document/src/nsHTMLDocument.h @@ -218,7 +218,9 @@ protected: static uint32_t gWyciwygSessionCnt; - static bool TryHintCharset(nsIMarkupDocumentViewer* aMarkupDV, + static bool IsAsciiCompatible(const nsACString& aPreferredName); + + static void TryHintCharset(nsIMarkupDocumentViewer* aMarkupDV, int32_t& aCharsetSource, nsACString& aCharset); static bool TryUserForcedCharset(nsIMarkupDocumentViewer* aMarkupDV, @@ -229,10 +231,10 @@ protected: int32_t& aCharsetSource, nsACString& aCharset); // aParentDocument could be null. - bool TryParentCharset(nsIDocShell* aDocShell, + void TryParentCharset(nsIDocShell* aDocShell, nsIDocument* aParentDocument, int32_t& charsetSource, nsACString& aCharset); - static bool UseWeakDocTypeDefault(int32_t& aCharsetSource, + static void UseWeakDocTypeDefault(int32_t& aCharsetSource, nsACString& aCharset); static bool TryDefaultCharset(nsIMarkupDocumentViewer* aMarkupDV, int32_t& aCharsetSource, diff --git a/parser/html/nsHtml5MetaScannerCppSupplement.h b/parser/html/nsHtml5MetaScannerCppSupplement.h index c6b6376432e4..402e0216ccf5 100644 --- a/parser/html/nsHtml5MetaScannerCppSupplement.h +++ b/parser/html/nsHtml5MetaScannerCppSupplement.h @@ -56,10 +56,7 @@ nsHtml5MetaScanner::tryCharset(nsString* charset) preferred.LowerCaseEqualsLiteral("utf-16be") || preferred.LowerCaseEqualsLiteral("utf-16le") || preferred.LowerCaseEqualsLiteral("utf-7") || - preferred.LowerCaseEqualsLiteral("jis_x0212-1990") || - preferred.LowerCaseEqualsLiteral("x-jis0208") || - preferred.LowerCaseEqualsLiteral("x-imap4-modified-utf7") || - preferred.LowerCaseEqualsLiteral("x-user-defined")) { + preferred.LowerCaseEqualsLiteral("x-imap4-modified-utf7")) { return false; } res = convManager->GetUnicodeDecoderRaw(preferred.get(), getter_AddRefs(mUnicodeDecoder)); diff --git a/parser/html/nsHtml5StreamParser.cpp b/parser/html/nsHtml5StreamParser.cpp index c8671bcdde58..1ac7f733bcf2 100644 --- a/parser/html/nsHtml5StreamParser.cpp +++ b/parser/html/nsHtml5StreamParser.cpp @@ -1213,10 +1213,7 @@ nsHtml5StreamParser::PreferredForInternalEncodingDecl(nsACString& aEncoding) preferred.LowerCaseEqualsLiteral("utf-16be") || preferred.LowerCaseEqualsLiteral("utf-16le") || preferred.LowerCaseEqualsLiteral("utf-7") || - preferred.LowerCaseEqualsLiteral("jis_x0212-1990") || - preferred.LowerCaseEqualsLiteral("x-jis0208") || - preferred.LowerCaseEqualsLiteral("x-imap4-modified-utf7") || - preferred.LowerCaseEqualsLiteral("x-user-defined")) { + preferred.LowerCaseEqualsLiteral("x-imap4-modified-utf7")) { // Not a rough ASCII superset mTreeBuilder->MaybeComplainAboutCharset("EncMetaNonRoughSuperset", true, diff --git a/parser/htmlparser/tests/reftest/bug599320-1-ref.html b/parser/htmlparser/tests/reftest/bug599320-1-ref.html new file mode 100644 index 000000000000..bb48fe5d256b --- /dev/null +++ b/parser/htmlparser/tests/reftest/bug599320-1-ref.html @@ -0,0 +1,17 @@ + + + + + +UTF-16 doc + + +

UTF-16 doc

+ +

Euro sign: €

+

iframe:

+ + + + + diff --git a/parser/htmlparser/tests/reftest/bug599320-1.html b/parser/htmlparser/tests/reftest/bug599320-1.html new file mode 100644 index 000000000000..590e9126c3fa Binary files /dev/null and b/parser/htmlparser/tests/reftest/bug599320-1.html differ diff --git a/parser/htmlparser/tests/reftest/frame599320-1-ref.html b/parser/htmlparser/tests/reftest/frame599320-1-ref.html new file mode 100644 index 000000000000..735c368f8b53 --- /dev/null +++ b/parser/htmlparser/tests/reftest/frame599320-1-ref.html @@ -0,0 +1,15 @@ + + + + + +Non-UTF-16 doc + + +

Non-UTF-16 doc

+ +

Euro sign: €

+ + + + diff --git a/parser/htmlparser/tests/reftest/frame599320-1.html b/parser/htmlparser/tests/reftest/frame599320-1.html new file mode 100644 index 000000000000..2b5b3383690e --- /dev/null +++ b/parser/htmlparser/tests/reftest/frame599320-1.html @@ -0,0 +1,1092 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Non-UTF-16 doc + + +

Non-UTF-16 doc

+ +

Euro sign: €

+ + + + diff --git a/parser/htmlparser/tests/reftest/reftest.list b/parser/htmlparser/tests/reftest/reftest.list index cd378313cbf1..d4015f890bd8 100644 --- a/parser/htmlparser/tests/reftest/reftest.list +++ b/parser/htmlparser/tests/reftest/reftest.list @@ -5,6 +5,7 @@ == bug582788-1.html bug582788-1-ref.html == bug582940-1.html bug582940-1-ref.html == bug592656-1.html bug592656-1-ref.html +== bug599320-1.html bug599320-1-ref.html == bug608373-1.html bug608373-1-ref.html fails-if(/^Windows\x20NT\x206\.1/.test(http.oscpu)&&!layersGPUAccelerated&&!azureSkia) == view-source:bug482921-1.html bug482921-1-ref.html # bug 703201 == view-source:bug482921-2.xhtml bug482921-2-ref.html