Bug 1716290 - Remove protections against the document changing as part of kCharsetFromFinalUserForcedAutoDetection reload. r=emk,emilio

NOTE! In cases where there is no HTTP-layer encoding declaration, and CSS
parsing inherits the encoding from the HTML document, for preloads, this
changes the inherited encoding from windows-1252 to UTF-8 in order to
make the speculative encoding correct in the common `<meta charset=utf-8>`
case.

Differential Revision: https://phabricator.services.mozilla.com/D123593
This commit is contained in:
Henri Sivonen 2021-08-26 18:02:15 +00:00
parent e94e52b857
commit 58e0b2946c
21 changed files with 274 additions and 104 deletions

View File

@ -67,6 +67,10 @@ support-files =
file_bug1648464-1-child.html
file_bug1688368-1.sjs
file_bug1691153.html
file_bug1716290-1.sjs
file_bug1716290-2.sjs
file_bug1716290-3.sjs
file_bug1716290-4.sjs
[browser_TopLevelNavigationDelegate.js]
support-files =
@ -216,3 +220,7 @@ https_first_disabled = true
[browser_bug1705872.js]
[browser_isInitialDocument.js]
https_first_disabled = true
[browser_bug1716290-1.js]
[browser_bug1716290-2.js]
[browser_bug1716290-3.js]
[browser_bug1716290-4.js]

View File

@ -0,0 +1,24 @@
function test() {
var rootDir = "http://mochi.test:8888/browser/docshell/test/browser/";
runCharsetTest(
rootDir + "file_bug1716290-1.sjs",
afterOpen,
afterChangeCharset
);
}
function afterOpen() {
is(
content.document.characterSet,
"Shift_JIS",
"Doc should report Shift_JIS initially"
);
}
function afterChangeCharset() {
is(
content.document.characterSet,
"windows-1252",
"Doc should report windows-1252 subsequently (detector should override header)"
);
}

View File

@ -0,0 +1,24 @@
function test() {
var rootDir = "http://mochi.test:8888/browser/docshell/test/browser/";
runCharsetTest(
rootDir + "file_bug1716290-2.sjs",
afterOpen,
afterChangeCharset
);
}
function afterOpen() {
is(
content.document.characterSet,
"Shift_JIS",
"Doc should report Shift_JIS initially"
);
}
function afterChangeCharset() {
is(
content.document.characterSet,
"windows-1252",
"Doc should report windows-1252 subsequently (detector should override meta resolving to the replacement encoding)"
);
}

View File

@ -0,0 +1,24 @@
function test() {
var rootDir = "http://mochi.test:8888/browser/docshell/test/browser/";
runCharsetTest(
rootDir + "file_bug1716290-3.sjs",
afterOpen,
afterChangeCharset
);
}
function afterOpen() {
is(
content.document.characterSet,
"Shift_JIS",
"Doc should report Shift_JIS initially"
);
}
function afterChangeCharset() {
is(
content.document.characterSet,
"replacement",
"Doc should report replacement subsequently (non-ASCII-compatible HTTP header should override detector)"
);
}

View File

@ -0,0 +1,24 @@
function test() {
var rootDir = "http://mochi.test:8888/browser/docshell/test/browser/";
runCharsetTest(
rootDir + "file_bug1716290-4.sjs",
afterOpen,
afterChangeCharset
);
}
function afterOpen() {
is(
content.document.characterSet,
"Shift_JIS",
"Doc should report Shift_JIS initially"
);
}
function afterChangeCharset() {
is(
content.document.characterSet,
"UTF-16BE",
"Doc should report UTF-16BE subsequently (BOM should override detector)"
);
}

View File

@ -0,0 +1,18 @@
function handleRequest(request, response)
{
if (getState("reloaded") == "reloaded") {
response.setHeader("Content-Type", "text/html; charset=windows-1254", false);
response.write("\u00E4");
} else {
response.setHeader("Content-Type", "text/html; charset=Shift_JIS", false);
if (getState("loaded") == "loaded") {
setState("reloaded", "reloaded");
} else {
setState("loaded", "loaded");
}
// kilobyte to force late-detection reload
response.write("a".repeat(1024));
response.write("<body>");
response.write("\u00E4");
}
}

View File

@ -0,0 +1,19 @@
function handleRequest(request, response)
{
if (getState("reloaded") == "reloaded") {
response.setHeader("Content-Type", "text/html", false);
response.write("<meta charset=iso-2022-kr>\u00E4");
} else {
response.setHeader("Content-Type", "text/html", false);
if (getState("loaded") == "loaded") {
setState("reloaded", "reloaded");
} else {
setState("loaded", "loaded");
}
response.write("<meta charset=Shift_JIS>");
// kilobyte to force late-detection reload
response.write("a".repeat(1024));
response.write("<body>");
response.write("\u00E4");
}
}

View File

@ -0,0 +1,18 @@
function handleRequest(request, response)
{
if (getState("reloaded") == "reloaded") {
response.setHeader("Content-Type", "text/html; charset=iso-2022-kr", false);
response.write("\u00E4");
} else {
response.setHeader("Content-Type", "text/html; charset=Shift_JIS", false);
if (getState("loaded") == "loaded") {
setState("reloaded", "reloaded");
} else {
setState("loaded", "loaded");
}
// kilobyte to force late-detection reload
response.write("a".repeat(1024));
response.write("<body>");
response.write("\u00E4");
}
}

View File

@ -0,0 +1,18 @@
function handleRequest(request, response)
{
if (getState("reloaded") == "reloaded") {
response.setHeader("Content-Type", "text/html", false);
response.write("\u00FE\u00FF\u00E4");
} else {
response.setHeader("Content-Type", "text/html; charset=Shift_JIS", false);
if (getState("loaded") == "loaded") {
setState("reloaded", "reloaded");
} else {
setState("loaded", "loaded");
}
// kilobyte to force late-detection reload
response.write("a".repeat(1024));
response.write("<body>");
response.write("\u00E4");
}
}

View File

@ -84,6 +84,7 @@
#include "nsFocusManager.h"
#include "nsIFrame.h"
#include "nsIContent.h"
#include "mozilla/ScopeExit.h"
#include "mozilla/StyleSheet.h"
#include "mozilla/StyleSheetInlines.h"
#include "mozilla/Unused.h"
@ -192,7 +193,10 @@ void nsHTMLDocument::TryReloadCharset(nsIContentViewer* aCv,
if (kCharsetUninitialized != reloadEncodingSource) {
aCv->ForgetReloadEncoding();
if (reloadEncodingSource <= aCharsetSource) return;
if (reloadEncodingSource <= aCharsetSource ||
!IsAsciiCompatible(aEncoding)) {
return;
}
if (reloadEncoding && IsAsciiCompatible(reloadEncoding)) {
aCharsetSource = reloadEncodingSource;
@ -205,8 +209,15 @@ void nsHTMLDocument::TryReloadCharset(nsIContentViewer* aCv,
void nsHTMLDocument::TryUserForcedCharset(nsIContentViewer* aCv,
nsIDocShell* aDocShell,
int32_t& aCharsetSource,
NotNull<const Encoding*>& aEncoding) {
if (aCharsetSource >= kCharsetFromXmlDeclarationUtf16) {
NotNull<const Encoding*>& aEncoding,
bool& aForceAutoDetection) {
auto resetForce = MakeScopeExit([&] {
if (aDocShell) {
nsDocShell::Cast(aDocShell)->ResetForcedAutodetection();
}
});
if (aCharsetSource >= kCharsetFromOtherComponent) {
return;
}
@ -217,19 +228,18 @@ void nsHTMLDocument::TryUserForcedCharset(nsIContentViewer* aCv,
if (aDocShell && nsDocShell::Cast(aDocShell)->GetForcedAutodetection()) {
// This is the Character Encoding menu code path in Firefox
aEncoding = WINDOWS_1252_ENCODING;
aCharsetSource = kCharsetFromPendingUserForcedAutoDetection;
nsDocShell::Cast(aDocShell)->ResetForcedAutodetection();
aForceAutoDetection = true;
}
}
void nsHTMLDocument::TryParentCharset(nsIDocShell* aDocShell,
int32_t& aCharsetSource,
NotNull<const Encoding*>& aEncoding) {
NotNull<const Encoding*>& aEncoding,
bool& aForceAutoDetection) {
if (!aDocShell) {
return;
}
if (aCharsetSource >= kCharsetFromXmlDeclarationUtf16) {
if (aCharsetSource >= kCharsetFromOtherComponent) {
return;
}
@ -241,8 +251,7 @@ void nsHTMLDocument::TryParentCharset(nsIDocShell* aDocShell,
if (!parentCharset) {
return;
}
if (kCharsetFromPendingUserForcedAutoDetection == parentSource ||
kCharsetFromInitialUserForcedAutoDetection == parentSource ||
if (kCharsetFromInitialUserForcedAutoDetection == parentSource ||
kCharsetFromFinalUserForcedAutoDetection == parentSource) {
if (WillIgnoreCharsetOverride() ||
!IsAsciiCompatible(aEncoding) || // if channel said UTF-16
@ -250,7 +259,8 @@ void nsHTMLDocument::TryParentCharset(nsIDocShell* aDocShell,
return;
}
aEncoding = WrapNotNull(parentCharset);
aCharsetSource = kCharsetFromPendingUserForcedAutoDetection;
aCharsetSource = kCharsetFromParentFrame;
aForceAutoDetection = true;
return;
}
@ -421,7 +431,8 @@ nsresult nsHTMLDocument::StartDocumentLoad(const char* aCommand,
#endif
// These are the charset source and charset for our document
int32_t charsetSource;
bool forceAutoDetection = false;
int32_t charsetSource = kCharsetUninitialized;
auto encoding = UTF_8_ENCODING;
// For error reporting and referrer policy setting
@ -430,21 +441,15 @@ nsresult nsHTMLDocument::StartDocumentLoad(const char* aCommand,
executor = static_cast<nsHtml5TreeOpExecutor*>(mParser->GetContentSink());
}
bool channelHadCharset = false;
if (forceUtf8) {
charsetSource = kCharsetFromUtf8OnlyMime;
} else if (!IsHTMLDocument() || !docShell) { // no docshell for text/html XHR
charsetSource =
IsHTMLDocument() ? kCharsetFromFallback : kCharsetFromDocTypeDefault;
TryChannelCharset(aChannel, charsetSource, encoding, executor);
channelHadCharset = (charsetSource == kCharsetFromChannel);
} else {
NS_ASSERTION(docShell, "Unexpected null value");
charsetSource = kCharsetUninitialized;
// Used for .in and .lk TLDs. .jp is handled in the parser.
encoding = WINDOWS_1252_ENCODING;
// The following will try to get the character encoding from various
// sources. Each Try* function will return early if the source is already
// at least as large as any of the sources it might look at. Some of
@ -460,12 +465,12 @@ nsresult nsHTMLDocument::StartDocumentLoad(const char* aCommand,
// interpretation as ASCII and the user can be lured to using the
// charset menu.
TryChannelCharset(aChannel, charsetSource, encoding, executor);
channelHadCharset = (charsetSource == kCharsetFromChannel);
TryUserForcedCharset(cv, docShell, charsetSource, encoding);
TryUserForcedCharset(cv, docShell, charsetSource, encoding,
forceAutoDetection);
TryReloadCharset(cv, charsetSource, encoding); // For encoding reload
TryParentCharset(docShell, charsetSource, encoding);
TryParentCharset(docShell, charsetSource, encoding, forceAutoDetection);
}
SetDocumentCharacterSetSource(charsetSource);
@ -479,7 +484,7 @@ nsresult nsHTMLDocument::StartDocumentLoad(const char* aCommand,
#ifdef DEBUG_charset
printf(" charset = %s source %d\n", charset.get(), charsetSource);
#endif
mParser->SetDocumentCharset(encoding, charsetSource, channelHadCharset);
mParser->SetDocumentCharset(encoding, charsetSource, forceAutoDetection);
mParser->SetCommand(aCommand);
if (!IsHTMLDocument()) {

View File

@ -175,9 +175,11 @@ class nsHTMLDocument : public mozilla::dom::Document {
NotNull<const Encoding*>& aEncoding);
void TryUserForcedCharset(nsIContentViewer* aCv, nsIDocShell* aDocShell,
int32_t& aCharsetSource,
NotNull<const Encoding*>& aEncoding);
NotNull<const Encoding*>& aEncoding,
bool& aForceAutoDetection);
void TryParentCharset(nsIDocShell* aDocShell, int32_t& charsetSource,
NotNull<const Encoding*>& aEncoding);
NotNull<const Encoding*>& aEncoding,
bool& aForceAutoDetection);
// Load flags of the document's channel
uint32_t mLoadFlags;

View File

@ -95,11 +95,11 @@ nsHtml5Parser::SetCommand(eParserCommands aParserCommand) {
void nsHtml5Parser::SetDocumentCharset(NotNull<const Encoding*> aEncoding,
int32_t aCharsetSource,
bool aChannelHadCharset) {
bool aForceAutoDetection) {
MOZ_ASSERT(!mExecutor->HasStarted(), "Document charset set too late.");
MOZ_ASSERT(GetStreamParser(), "Setting charset on a script-only parser.");
GetStreamParser()->SetDocumentCharset(aEncoding, aCharsetSource,
aChannelHadCharset);
aForceAutoDetection);
mExecutor->SetDocumentCharsetAndSource(aEncoding, aCharsetSource);
}

View File

@ -67,7 +67,7 @@ class nsHtml5Parser final : public nsIParser, public nsSupportsWeakReference {
*/
virtual void SetDocumentCharset(NotNull<const Encoding*> aEncoding,
int32_t aSource,
bool aChannelHadCharset) override;
bool aForceAutoDetection) override;
/**
* Get the channel associated with this parser

View File

@ -203,6 +203,7 @@ nsHtml5StreamParser::nsHtml5StreamParser(nsHtml5TreeOpExecutor* aExecutor,
mFeedChardet(true),
mGuessEncoding(true),
mReparseForbidden(false),
mForceAutoDetection(false),
mChannelHadCharset(false),
mLastBuffer(nullptr), // Will be filled when starting
mExecutor(aExecutor),
@ -310,8 +311,6 @@ void nsHtml5StreamParser::GuessEncoding(bool aEof, bool aInitial) {
} else {
mGuessEncoding = false;
}
bool forced = (mCharsetSource == kCharsetFromPendingUserForcedAutoDetection ||
mCharsetSource == kCharsetFromInitialUserForcedAutoDetection);
MOZ_ASSERT(
mCharsetSource != kCharsetFromFinalUserForcedAutoDetection &&
mCharsetSource != kCharsetFromFinalAutoDetectionWouldHaveBeenUTF8 &&
@ -324,14 +323,15 @@ void nsHtml5StreamParser::GuessEncoding(bool aEof, bool aInitial) {
mCharsetSource != kCharsetFromFinalAutoDetectionFile);
auto ifHadBeenForced = mDetector->Guess(EmptyCString(), true);
auto encoding =
forced ? ifHadBeenForced
: mDetector->Guess(mTLD, mDecodingLocalFileWithoutTokenizing);
mForceAutoDetection
? ifHadBeenForced
: mDetector->Guess(mTLD, mDecodingLocalFileWithoutTokenizing);
int32_t source =
aInitial
? (forced
? (mForceAutoDetection
? kCharsetFromInitialUserForcedAutoDetection
: kCharsetFromInitialAutoDetectionWouldNotHaveBeenUTF8Generic)
: (forced
: (mForceAutoDetection
? kCharsetFromFinalUserForcedAutoDetection
: (mDecodingLocalFileWithoutTokenizing
? kCharsetFromFinalAutoDetectionFile
@ -377,7 +377,8 @@ void nsHtml5StreamParser::GuessEncoding(bool aEof, bool aInitial) {
mCharsetSource = MaybeRollBackSource(source);
mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource);
} else {
MOZ_ASSERT(mCharsetSource < kCharsetFromXmlDeclarationUtf16 || forced);
MOZ_ASSERT(mCharsetSource < kCharsetFromXmlDeclarationUtf16 ||
mForceAutoDetection);
// We've already committed to a decoder. Request a reload from the
// docshell.
mTreeBuilder->NeedsCharsetSwitchTo(encoding, source, 0);
@ -452,8 +453,7 @@ nsHtml5StreamParser::SetupDecodingAndWriteSniffingBufferAndCurrentSegment(
mUnicodeDecoder = UTF_8_ENCODING->NewDecoderWithBOMRemoval();
} else {
if (mCharsetSource >= kCharsetFromFinalAutoDetectionWouldHaveBeenUTF8) {
if (!(mCharsetSource == kCharsetFromPendingUserForcedAutoDetection ||
mCharsetSource == kCharsetFromInitialUserForcedAutoDetection)) {
if (!mForceAutoDetection) {
DontGuessEncoding();
}
mDecodingLocalFileWithoutTokenizing = false;
@ -477,6 +477,7 @@ void nsHtml5StreamParser::SetupDecodingFromBom(
mUnicodeDecoder = mEncoding->NewDecoderWithoutBOMHandling();
mCharsetSource = kCharsetFromByteOrderMark;
DontGuessEncoding();
mForceAutoDetection = false;
mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource);
mSniffingBuffer = nullptr;
mMetaScanner = nullptr;
@ -699,11 +700,7 @@ nsresult nsHtml5StreamParser::FinalizeSniffing(Span<const uint8_t> aFromSegment,
return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(aFromSegment);
}
bool forced = (mCharsetSource == kCharsetFromPendingUserForcedAutoDetection ||
mCharsetSource == kCharsetFromInitialUserForcedAutoDetection ||
mCharsetSource == kCharsetFromFinalUserForcedAutoDetection);
if (!mChannelHadCharset &&
(forced || mCharsetSource < kCharsetFromMetaPrescan) &&
if ((mForceAutoDetection || mCharsetSource < kCharsetFromMetaPrescan) &&
(mMode == NORMAL || mMode == VIEW_SOURCE_HTML || mMode == LOAD_AS_DATA)) {
// Look for XML declaration in text/html.
@ -724,16 +721,12 @@ nsresult nsHtml5StreamParser::FinalizeSniffing(Span<const uint8_t> aFromSegment,
bufLen = aCountToSniffingLimit;
}
const Encoding* encoding = xmldecl_parse(buf, bufLen);
if (encoding) {
if (forced &&
if (encoding && !mChannelHadCharset) {
if (mForceAutoDetection &&
(encoding->IsAsciiCompatible() || encoding == ISO_2022_JP_ENCODING)) {
// Honor override
if (mCharsetSource == kCharsetFromFinalUserForcedAutoDetection) {
DontGuessEncoding();
} else {
FinalizeSniffingWithDetector(aFromSegment, aCountToSniffingLimit,
false);
}
FinalizeSniffingWithDetector(aFromSegment, aCountToSniffingLimit,
false);
return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(
aFromSegment);
}
@ -748,13 +741,10 @@ nsresult nsHtml5StreamParser::FinalizeSniffing(Span<const uint8_t> aFromSegment,
SniffBOMlessUTF16BasicLatin(buf, bufLen);
}
}
if (forced && mCharsetSource != kCharsetFromIrreversibleAutoDetection) {
if (mForceAutoDetection &&
mCharsetSource != kCharsetFromIrreversibleAutoDetection) {
// neither meta nor XML declaration found, honor override
if (mCharsetSource == kCharsetFromFinalUserForcedAutoDetection) {
DontGuessEncoding();
} else {
FinalizeSniffingWithDetector(aFromSegment, aCountToSniffingLimit, false);
}
FinalizeSniffingWithDetector(aFromSegment, aCountToSniffingLimit, false);
return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(aFromSegment);
}
@ -804,7 +794,7 @@ nsresult nsHtml5StreamParser::SniffStreamBytes(
break;
case 0x00:
if (mCharsetSource < kCharsetFromXmlDeclarationUtf16 &&
!mChannelHadCharset) {
mCharsetSource != kCharsetFromChannel) {
mBomState = SEEN_UTF_16_BE_XML_FIRST;
} else {
mBomState = BOM_SNIFFING_OVER;
@ -812,7 +802,7 @@ nsresult nsHtml5StreamParser::SniffStreamBytes(
break;
case 0x3C:
if (mCharsetSource < kCharsetFromXmlDeclarationUtf16 &&
!mChannelHadCharset) {
mCharsetSource != kCharsetFromChannel) {
mBomState = SEEN_UTF_16_LE_XML_FIRST;
} else {
mBomState = BOM_SNIFFING_OVER;
@ -936,7 +926,8 @@ nsresult nsHtml5StreamParser::SniffStreamBytes(
MOZ_ASSERT(mCharsetSource != kCharsetFromOtherComponent,
"kCharsetFromOtherComponent is for XSLT.");
if (mBomState == BOM_SNIFFING_OVER && mCharsetSource == kCharsetFromChannel) {
if (mBomState == BOM_SNIFFING_OVER && mCharsetSource >= kCharsetFromChannel &&
!mForceAutoDetection) {
// There was no BOM and the charset came from channel. mEncoding
// still contains the charset from the channel as set by an
// earlier call to SetDocumentCharset(), since we didn't find a BOM and
@ -946,7 +937,12 @@ nsresult nsHtml5StreamParser::SniffStreamBytes(
return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(aFromSegment);
}
if (!mChannelHadCharset && !mMetaScanner &&
MOZ_ASSERT(!(mBomState == BOM_SNIFFING_OVER && mChannelHadCharset &&
!mForceAutoDetection),
"How come we're running post-BOM sniffing with channel charset unless "
"we're also processing forced detection?");
if (!mMetaScanner &&
(mMode == NORMAL || mMode == VIEW_SOURCE_HTML || mMode == LOAD_AS_DATA)) {
mMetaScanner = MakeUnique<nsHtml5MetaScanner>(mTreeBuilder.get());
}
@ -954,12 +950,7 @@ nsresult nsHtml5StreamParser::SniffStreamBytes(
if (mSniffingLength + aFromSegment.Length() >= SNIFFING_BUFFER_SIZE) {
// this is the last buffer
uint32_t countToSniffingLimit = SNIFFING_BUFFER_SIZE - mSniffingLength;
bool forced =
(mCharsetSource == kCharsetFromPendingUserForcedAutoDetection ||
mCharsetSource == kCharsetFromInitialUserForcedAutoDetection ||
mCharsetSource == kCharsetFromFinalUserForcedAutoDetection);
if (!mChannelHadCharset && (mMode == NORMAL || mMode == VIEW_SOURCE_HTML ||
mMode == LOAD_AS_DATA)) {
if (mMode == NORMAL || mMode == VIEW_SOURCE_HTML || mMode == LOAD_AS_DATA) {
nsHtml5ByteReadable readable(
aFromSegment.Elements(),
aFromSegment.Elements() + countToSniffingLimit);
@ -972,17 +963,15 @@ nsresult nsHtml5StreamParser::SniffStreamBytes(
return rv;
}
if (encoding) {
// Ignore encoding from meta if channel had charset and we're here in
// order to make forced autodetection work.
if (encoding && !mChannelHadCharset) {
// meta scan successful; honor overrides unless meta is XSS-dangerous
if (forced && (encoding->IsAsciiCompatible() ||
encoding == ISO_2022_JP_ENCODING)) {
if (mForceAutoDetection && (encoding->IsAsciiCompatible() ||
encoding == ISO_2022_JP_ENCODING)) {
// Honor override
if (mCharsetSource == kCharsetFromFinalUserForcedAutoDetection) {
DontGuessEncoding();
} else {
FinalizeSniffingWithDetector(aFromSegment, countToSniffingLimit,
false);
}
FinalizeSniffingWithDetector(aFromSegment, countToSniffingLimit,
false);
return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(
aFromSegment);
}
@ -998,8 +987,7 @@ nsresult nsHtml5StreamParser::SniffStreamBytes(
}
// not the last buffer
if (!mChannelHadCharset &&
(mMode == NORMAL || mMode == VIEW_SOURCE_HTML || mMode == LOAD_AS_DATA)) {
if (mMode == NORMAL || mMode == VIEW_SOURCE_HTML || mMode == LOAD_AS_DATA) {
nsHtml5ByteReadable readable(
aFromSegment.Elements(),
aFromSegment.Elements() + aFromSegment.Length());
@ -1010,16 +998,11 @@ nsresult nsHtml5StreamParser::SniffStreamBytes(
MarkAsBroken(rv);
return rv;
}
if (encoding) {
// Ignore encoding from meta if channel had charset and we're here in
// order to make forced autodetection work.
if (encoding && !mChannelHadCharset) {
// meta scan successful; honor overrides unless meta is XSS-dangerous
if ((mCharsetSource == kCharsetFromFinalUserForcedAutoDetection) &&
(encoding->IsAsciiCompatible() || encoding == ISO_2022_JP_ENCODING)) {
// Honor override
return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(
aFromSegment);
}
if ((mCharsetSource == kCharsetFromPendingUserForcedAutoDetection ||
mCharsetSource == kCharsetFromInitialUserForcedAutoDetection) &&
if (mForceAutoDetection &&
(encoding->IsAsciiCompatible() || encoding == ISO_2022_JP_ENCODING)) {
FinalizeSniffingWithDetector(aFromSegment, aFromSegment.Length(),
false);
@ -1370,12 +1353,9 @@ nsresult nsHtml5StreamParser::OnStartRequest(nsIRequest* aRequest) {
mInitialEncodingWasFromParentFrame = true;
}
if (!(mCharsetSource == kCharsetFromPendingUserForcedAutoDetection ||
mCharsetSource == kCharsetFromInitialUserForcedAutoDetection ||
mCharsetSource == kCharsetFromFinalUserForcedAutoDetection)) {
if (mCharsetSource >= kCharsetFromFinalAutoDetectionWouldHaveBeenUTF8) {
DontGuessEncoding();
}
if (!mForceAutoDetection &&
mCharsetSource >= kCharsetFromFinalAutoDetectionWouldHaveBeenUTF8) {
DontGuessEncoding();
}
if (mCharsetSource < kCharsetFromUtf8OnlyMime) {

View File

@ -229,15 +229,16 @@ class nsHtml5StreamParser final : public nsISupports {
* @param aCharsetSource the source of the charset
*/
inline void SetDocumentCharset(NotNull<const Encoding*> aEncoding,
int32_t aSource, bool aChannelHadCharset) {
int32_t aSource, bool aForceAutoDetection) {
MOZ_ASSERT(mStreamState == STREAM_NOT_STARTED,
"SetDocumentCharset called too late.");
NS_ASSERTION(NS_IsMainThread(), "Wrong thread!");
MOZ_ASSERT(!(aSource == kCharsetFromChannel && !aChannelHadCharset),
"If charset is from channel, channel must have had charset.");
MOZ_ASSERT(NS_IsMainThread(), "Wrong thread!");
MOZ_ASSERT(!(aForceAutoDetection && aSource >= kCharsetFromOtherComponent),
"Can't force with high-ranking source.");
mEncoding = aEncoding;
mCharsetSource = aSource;
mChannelHadCharset = aChannelHadCharset;
mForceAutoDetection = aForceAutoDetection;
mChannelHadCharset = (aSource == kCharsetFromChannel);
}
nsresult GetChannel(nsIChannel** aChannel);
@ -532,7 +533,12 @@ class nsHtml5StreamParser final : public nsISupports {
bool mReparseForbidden;
/**
* Whether the channel had charset.
* Whether the Repair Text Encoding menu item was invoked
*/
bool mForceAutoDetection;
/**
* Whether there was a valid charset parameter on the HTTP layer.
*/
bool mChannelHadCharset;

View File

@ -101,12 +101,13 @@ class nsIParser : public nsParserBase {
* @update ftang 4/23/99
* @param aCharset- the charest of a document
* @param aCharsetSource- the soure of the chares
* @param aChannelHadCharset- whether the channel had charset
* @param aForceAutoDetection- whether Repair Text Encoding menu item was
* invoked
* @return nada
*/
virtual void SetDocumentCharset(NotNull<const Encoding*> aCharset,
int32_t aSource,
bool aChannelHadCharset = false) = 0;
bool aForceAutoDetection = false) = 0;
/**
* Get the channel associated with this parser

View File

@ -256,7 +256,7 @@ nsParser::SetCommand(eParserCommands aParserCommand) {
*/
void nsParser::SetDocumentCharset(NotNull<const Encoding*> aCharset,
int32_t aCharsetSource,
bool aChannelHadCharset) {
bool aForceAutoDetection) {
mCharset = aCharset;
mCharsetSource = aCharsetSource;
if (mParserContext && mParserContext->mScanner) {

View File

@ -129,7 +129,7 @@ class nsParser final : public nsIParser,
*/
virtual void SetDocumentCharset(NotNull<const Encoding*> aCharset,
int32_t aSource,
bool aChannelHadCharset) override;
bool aForceAutoDetection) override;
NotNull<const Encoding*> GetDocumentCharset(int32_t& aSource) {
aSource = mCharsetSource;

View File

@ -31,8 +31,6 @@ enum {
kCharsetFromMetaTag, // this one and greater: HTML5 Confident
kCharsetFromChannel,
kCharsetFromOtherComponent,
kCharsetFromPendingUserForcedAutoDetection, // Marker value to be upgraded
// later
kCharsetFromInitialUserForcedAutoDetection,
kCharsetFromFinalUserForcedAutoDetection,
kCharsetFromXmlDeclarationUtf16, // This one is overridden by

View File

@ -56,7 +56,7 @@ class PrototypeDocumentParser final : public nsIParser,
virtual void SetDocumentCharset(NotNull<const Encoding*> aEncoding,
int32_t aSource,
bool aChannelHadCharset) override {}
bool aForceAutoDetection) override {}
NS_IMETHOD GetChannel(nsIChannel** aChannel) override {
return NS_ERROR_NOT_IMPLEMENTED;

View File

@ -1,4 +1,5 @@
<!DOCTYPE html>
<meta charset=utf-8>
<title>Makes sure that Link headers preload resources</title>
<!--
This and the line below ensure that the trailing crossorigin in the link