mirror of
https://github.com/mozilla/gecko-dev.git
synced 2024-11-24 21:31:04 +00:00
Bug 1716290 - Remove protections against the document changing as part of kCharsetFromFinalUserForcedAutoDetection reload. r=emk,emilio
NOTE! In cases where there is no HTTP-layer encoding declaration, and CSS parsing inherits the encoding from the HTML document, for preloads, this changes the inherited encoding from windows-1252 to UTF-8 in order to make the speculative encoding correct in the common `<meta charset=utf-8>` case. Differential Revision: https://phabricator.services.mozilla.com/D123593
This commit is contained in:
parent
e94e52b857
commit
58e0b2946c
@ -67,6 +67,10 @@ support-files =
|
||||
file_bug1648464-1-child.html
|
||||
file_bug1688368-1.sjs
|
||||
file_bug1691153.html
|
||||
file_bug1716290-1.sjs
|
||||
file_bug1716290-2.sjs
|
||||
file_bug1716290-3.sjs
|
||||
file_bug1716290-4.sjs
|
||||
|
||||
[browser_TopLevelNavigationDelegate.js]
|
||||
support-files =
|
||||
@ -216,3 +220,7 @@ https_first_disabled = true
|
||||
[browser_bug1705872.js]
|
||||
[browser_isInitialDocument.js]
|
||||
https_first_disabled = true
|
||||
[browser_bug1716290-1.js]
|
||||
[browser_bug1716290-2.js]
|
||||
[browser_bug1716290-3.js]
|
||||
[browser_bug1716290-4.js]
|
||||
|
24
docshell/test/browser/browser_bug1716290-1.js
Normal file
24
docshell/test/browser/browser_bug1716290-1.js
Normal file
@ -0,0 +1,24 @@
|
||||
function test() {
|
||||
var rootDir = "http://mochi.test:8888/browser/docshell/test/browser/";
|
||||
runCharsetTest(
|
||||
rootDir + "file_bug1716290-1.sjs",
|
||||
afterOpen,
|
||||
afterChangeCharset
|
||||
);
|
||||
}
|
||||
|
||||
function afterOpen() {
|
||||
is(
|
||||
content.document.characterSet,
|
||||
"Shift_JIS",
|
||||
"Doc should report Shift_JIS initially"
|
||||
);
|
||||
}
|
||||
|
||||
function afterChangeCharset() {
|
||||
is(
|
||||
content.document.characterSet,
|
||||
"windows-1252",
|
||||
"Doc should report windows-1252 subsequently (detector should override header)"
|
||||
);
|
||||
}
|
24
docshell/test/browser/browser_bug1716290-2.js
Normal file
24
docshell/test/browser/browser_bug1716290-2.js
Normal file
@ -0,0 +1,24 @@
|
||||
function test() {
|
||||
var rootDir = "http://mochi.test:8888/browser/docshell/test/browser/";
|
||||
runCharsetTest(
|
||||
rootDir + "file_bug1716290-2.sjs",
|
||||
afterOpen,
|
||||
afterChangeCharset
|
||||
);
|
||||
}
|
||||
|
||||
function afterOpen() {
|
||||
is(
|
||||
content.document.characterSet,
|
||||
"Shift_JIS",
|
||||
"Doc should report Shift_JIS initially"
|
||||
);
|
||||
}
|
||||
|
||||
function afterChangeCharset() {
|
||||
is(
|
||||
content.document.characterSet,
|
||||
"windows-1252",
|
||||
"Doc should report windows-1252 subsequently (detector should override meta resolving to the replacement encoding)"
|
||||
);
|
||||
}
|
24
docshell/test/browser/browser_bug1716290-3.js
Normal file
24
docshell/test/browser/browser_bug1716290-3.js
Normal file
@ -0,0 +1,24 @@
|
||||
function test() {
|
||||
var rootDir = "http://mochi.test:8888/browser/docshell/test/browser/";
|
||||
runCharsetTest(
|
||||
rootDir + "file_bug1716290-3.sjs",
|
||||
afterOpen,
|
||||
afterChangeCharset
|
||||
);
|
||||
}
|
||||
|
||||
function afterOpen() {
|
||||
is(
|
||||
content.document.characterSet,
|
||||
"Shift_JIS",
|
||||
"Doc should report Shift_JIS initially"
|
||||
);
|
||||
}
|
||||
|
||||
function afterChangeCharset() {
|
||||
is(
|
||||
content.document.characterSet,
|
||||
"replacement",
|
||||
"Doc should report replacement subsequently (non-ASCII-compatible HTTP header should override detector)"
|
||||
);
|
||||
}
|
24
docshell/test/browser/browser_bug1716290-4.js
Normal file
24
docshell/test/browser/browser_bug1716290-4.js
Normal file
@ -0,0 +1,24 @@
|
||||
function test() {
|
||||
var rootDir = "http://mochi.test:8888/browser/docshell/test/browser/";
|
||||
runCharsetTest(
|
||||
rootDir + "file_bug1716290-4.sjs",
|
||||
afterOpen,
|
||||
afterChangeCharset
|
||||
);
|
||||
}
|
||||
|
||||
function afterOpen() {
|
||||
is(
|
||||
content.document.characterSet,
|
||||
"Shift_JIS",
|
||||
"Doc should report Shift_JIS initially"
|
||||
);
|
||||
}
|
||||
|
||||
function afterChangeCharset() {
|
||||
is(
|
||||
content.document.characterSet,
|
||||
"UTF-16BE",
|
||||
"Doc should report UTF-16BE subsequently (BOM should override detector)"
|
||||
);
|
||||
}
|
18
docshell/test/browser/file_bug1716290-1.sjs
Normal file
18
docshell/test/browser/file_bug1716290-1.sjs
Normal file
@ -0,0 +1,18 @@
|
||||
function handleRequest(request, response)
|
||||
{
|
||||
if (getState("reloaded") == "reloaded") {
|
||||
response.setHeader("Content-Type", "text/html; charset=windows-1254", false);
|
||||
response.write("\u00E4");
|
||||
} else {
|
||||
response.setHeader("Content-Type", "text/html; charset=Shift_JIS", false);
|
||||
if (getState("loaded") == "loaded") {
|
||||
setState("reloaded", "reloaded");
|
||||
} else {
|
||||
setState("loaded", "loaded");
|
||||
}
|
||||
// kilobyte to force late-detection reload
|
||||
response.write("a".repeat(1024));
|
||||
response.write("<body>");
|
||||
response.write("\u00E4");
|
||||
}
|
||||
}
|
19
docshell/test/browser/file_bug1716290-2.sjs
Normal file
19
docshell/test/browser/file_bug1716290-2.sjs
Normal file
@ -0,0 +1,19 @@
|
||||
function handleRequest(request, response)
|
||||
{
|
||||
if (getState("reloaded") == "reloaded") {
|
||||
response.setHeader("Content-Type", "text/html", false);
|
||||
response.write("<meta charset=iso-2022-kr>\u00E4");
|
||||
} else {
|
||||
response.setHeader("Content-Type", "text/html", false);
|
||||
if (getState("loaded") == "loaded") {
|
||||
setState("reloaded", "reloaded");
|
||||
} else {
|
||||
setState("loaded", "loaded");
|
||||
}
|
||||
response.write("<meta charset=Shift_JIS>");
|
||||
// kilobyte to force late-detection reload
|
||||
response.write("a".repeat(1024));
|
||||
response.write("<body>");
|
||||
response.write("\u00E4");
|
||||
}
|
||||
}
|
18
docshell/test/browser/file_bug1716290-3.sjs
Normal file
18
docshell/test/browser/file_bug1716290-3.sjs
Normal file
@ -0,0 +1,18 @@
|
||||
function handleRequest(request, response)
|
||||
{
|
||||
if (getState("reloaded") == "reloaded") {
|
||||
response.setHeader("Content-Type", "text/html; charset=iso-2022-kr", false);
|
||||
response.write("\u00E4");
|
||||
} else {
|
||||
response.setHeader("Content-Type", "text/html; charset=Shift_JIS", false);
|
||||
if (getState("loaded") == "loaded") {
|
||||
setState("reloaded", "reloaded");
|
||||
} else {
|
||||
setState("loaded", "loaded");
|
||||
}
|
||||
// kilobyte to force late-detection reload
|
||||
response.write("a".repeat(1024));
|
||||
response.write("<body>");
|
||||
response.write("\u00E4");
|
||||
}
|
||||
}
|
18
docshell/test/browser/file_bug1716290-4.sjs
Normal file
18
docshell/test/browser/file_bug1716290-4.sjs
Normal file
@ -0,0 +1,18 @@
|
||||
function handleRequest(request, response)
|
||||
{
|
||||
if (getState("reloaded") == "reloaded") {
|
||||
response.setHeader("Content-Type", "text/html", false);
|
||||
response.write("\u00FE\u00FF\u00E4");
|
||||
} else {
|
||||
response.setHeader("Content-Type", "text/html; charset=Shift_JIS", false);
|
||||
if (getState("loaded") == "loaded") {
|
||||
setState("reloaded", "reloaded");
|
||||
} else {
|
||||
setState("loaded", "loaded");
|
||||
}
|
||||
// kilobyte to force late-detection reload
|
||||
response.write("a".repeat(1024));
|
||||
response.write("<body>");
|
||||
response.write("\u00E4");
|
||||
}
|
||||
}
|
@ -84,6 +84,7 @@
|
||||
#include "nsFocusManager.h"
|
||||
#include "nsIFrame.h"
|
||||
#include "nsIContent.h"
|
||||
#include "mozilla/ScopeExit.h"
|
||||
#include "mozilla/StyleSheet.h"
|
||||
#include "mozilla/StyleSheetInlines.h"
|
||||
#include "mozilla/Unused.h"
|
||||
@ -192,7 +193,10 @@ void nsHTMLDocument::TryReloadCharset(nsIContentViewer* aCv,
|
||||
if (kCharsetUninitialized != reloadEncodingSource) {
|
||||
aCv->ForgetReloadEncoding();
|
||||
|
||||
if (reloadEncodingSource <= aCharsetSource) return;
|
||||
if (reloadEncodingSource <= aCharsetSource ||
|
||||
!IsAsciiCompatible(aEncoding)) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (reloadEncoding && IsAsciiCompatible(reloadEncoding)) {
|
||||
aCharsetSource = reloadEncodingSource;
|
||||
@ -205,8 +209,15 @@ void nsHTMLDocument::TryReloadCharset(nsIContentViewer* aCv,
|
||||
void nsHTMLDocument::TryUserForcedCharset(nsIContentViewer* aCv,
|
||||
nsIDocShell* aDocShell,
|
||||
int32_t& aCharsetSource,
|
||||
NotNull<const Encoding*>& aEncoding) {
|
||||
if (aCharsetSource >= kCharsetFromXmlDeclarationUtf16) {
|
||||
NotNull<const Encoding*>& aEncoding,
|
||||
bool& aForceAutoDetection) {
|
||||
auto resetForce = MakeScopeExit([&] {
|
||||
if (aDocShell) {
|
||||
nsDocShell::Cast(aDocShell)->ResetForcedAutodetection();
|
||||
}
|
||||
});
|
||||
|
||||
if (aCharsetSource >= kCharsetFromOtherComponent) {
|
||||
return;
|
||||
}
|
||||
|
||||
@ -217,19 +228,18 @@ void nsHTMLDocument::TryUserForcedCharset(nsIContentViewer* aCv,
|
||||
|
||||
if (aDocShell && nsDocShell::Cast(aDocShell)->GetForcedAutodetection()) {
|
||||
// This is the Character Encoding menu code path in Firefox
|
||||
aEncoding = WINDOWS_1252_ENCODING;
|
||||
aCharsetSource = kCharsetFromPendingUserForcedAutoDetection;
|
||||
nsDocShell::Cast(aDocShell)->ResetForcedAutodetection();
|
||||
aForceAutoDetection = true;
|
||||
}
|
||||
}
|
||||
|
||||
void nsHTMLDocument::TryParentCharset(nsIDocShell* aDocShell,
|
||||
int32_t& aCharsetSource,
|
||||
NotNull<const Encoding*>& aEncoding) {
|
||||
NotNull<const Encoding*>& aEncoding,
|
||||
bool& aForceAutoDetection) {
|
||||
if (!aDocShell) {
|
||||
return;
|
||||
}
|
||||
if (aCharsetSource >= kCharsetFromXmlDeclarationUtf16) {
|
||||
if (aCharsetSource >= kCharsetFromOtherComponent) {
|
||||
return;
|
||||
}
|
||||
|
||||
@ -241,8 +251,7 @@ void nsHTMLDocument::TryParentCharset(nsIDocShell* aDocShell,
|
||||
if (!parentCharset) {
|
||||
return;
|
||||
}
|
||||
if (kCharsetFromPendingUserForcedAutoDetection == parentSource ||
|
||||
kCharsetFromInitialUserForcedAutoDetection == parentSource ||
|
||||
if (kCharsetFromInitialUserForcedAutoDetection == parentSource ||
|
||||
kCharsetFromFinalUserForcedAutoDetection == parentSource) {
|
||||
if (WillIgnoreCharsetOverride() ||
|
||||
!IsAsciiCompatible(aEncoding) || // if channel said UTF-16
|
||||
@ -250,7 +259,8 @@ void nsHTMLDocument::TryParentCharset(nsIDocShell* aDocShell,
|
||||
return;
|
||||
}
|
||||
aEncoding = WrapNotNull(parentCharset);
|
||||
aCharsetSource = kCharsetFromPendingUserForcedAutoDetection;
|
||||
aCharsetSource = kCharsetFromParentFrame;
|
||||
aForceAutoDetection = true;
|
||||
return;
|
||||
}
|
||||
|
||||
@ -421,7 +431,8 @@ nsresult nsHTMLDocument::StartDocumentLoad(const char* aCommand,
|
||||
#endif
|
||||
|
||||
// These are the charset source and charset for our document
|
||||
int32_t charsetSource;
|
||||
bool forceAutoDetection = false;
|
||||
int32_t charsetSource = kCharsetUninitialized;
|
||||
auto encoding = UTF_8_ENCODING;
|
||||
|
||||
// For error reporting and referrer policy setting
|
||||
@ -430,21 +441,15 @@ nsresult nsHTMLDocument::StartDocumentLoad(const char* aCommand,
|
||||
executor = static_cast<nsHtml5TreeOpExecutor*>(mParser->GetContentSink());
|
||||
}
|
||||
|
||||
bool channelHadCharset = false;
|
||||
if (forceUtf8) {
|
||||
charsetSource = kCharsetFromUtf8OnlyMime;
|
||||
} else if (!IsHTMLDocument() || !docShell) { // no docshell for text/html XHR
|
||||
charsetSource =
|
||||
IsHTMLDocument() ? kCharsetFromFallback : kCharsetFromDocTypeDefault;
|
||||
TryChannelCharset(aChannel, charsetSource, encoding, executor);
|
||||
channelHadCharset = (charsetSource == kCharsetFromChannel);
|
||||
} else {
|
||||
NS_ASSERTION(docShell, "Unexpected null value");
|
||||
|
||||
charsetSource = kCharsetUninitialized;
|
||||
// Used for .in and .lk TLDs. .jp is handled in the parser.
|
||||
encoding = WINDOWS_1252_ENCODING;
|
||||
|
||||
// The following will try to get the character encoding from various
|
||||
// sources. Each Try* function will return early if the source is already
|
||||
// at least as large as any of the sources it might look at. Some of
|
||||
@ -460,12 +465,12 @@ nsresult nsHTMLDocument::StartDocumentLoad(const char* aCommand,
|
||||
// interpretation as ASCII and the user can be lured to using the
|
||||
// charset menu.
|
||||
TryChannelCharset(aChannel, charsetSource, encoding, executor);
|
||||
channelHadCharset = (charsetSource == kCharsetFromChannel);
|
||||
|
||||
TryUserForcedCharset(cv, docShell, charsetSource, encoding);
|
||||
TryUserForcedCharset(cv, docShell, charsetSource, encoding,
|
||||
forceAutoDetection);
|
||||
|
||||
TryReloadCharset(cv, charsetSource, encoding); // For encoding reload
|
||||
TryParentCharset(docShell, charsetSource, encoding);
|
||||
TryParentCharset(docShell, charsetSource, encoding, forceAutoDetection);
|
||||
}
|
||||
|
||||
SetDocumentCharacterSetSource(charsetSource);
|
||||
@ -479,7 +484,7 @@ nsresult nsHTMLDocument::StartDocumentLoad(const char* aCommand,
|
||||
#ifdef DEBUG_charset
|
||||
printf(" charset = %s source %d\n", charset.get(), charsetSource);
|
||||
#endif
|
||||
mParser->SetDocumentCharset(encoding, charsetSource, channelHadCharset);
|
||||
mParser->SetDocumentCharset(encoding, charsetSource, forceAutoDetection);
|
||||
mParser->SetCommand(aCommand);
|
||||
|
||||
if (!IsHTMLDocument()) {
|
||||
|
@ -175,9 +175,11 @@ class nsHTMLDocument : public mozilla::dom::Document {
|
||||
NotNull<const Encoding*>& aEncoding);
|
||||
void TryUserForcedCharset(nsIContentViewer* aCv, nsIDocShell* aDocShell,
|
||||
int32_t& aCharsetSource,
|
||||
NotNull<const Encoding*>& aEncoding);
|
||||
NotNull<const Encoding*>& aEncoding,
|
||||
bool& aForceAutoDetection);
|
||||
void TryParentCharset(nsIDocShell* aDocShell, int32_t& charsetSource,
|
||||
NotNull<const Encoding*>& aEncoding);
|
||||
NotNull<const Encoding*>& aEncoding,
|
||||
bool& aForceAutoDetection);
|
||||
|
||||
// Load flags of the document's channel
|
||||
uint32_t mLoadFlags;
|
||||
|
@ -95,11 +95,11 @@ nsHtml5Parser::SetCommand(eParserCommands aParserCommand) {
|
||||
|
||||
void nsHtml5Parser::SetDocumentCharset(NotNull<const Encoding*> aEncoding,
|
||||
int32_t aCharsetSource,
|
||||
bool aChannelHadCharset) {
|
||||
bool aForceAutoDetection) {
|
||||
MOZ_ASSERT(!mExecutor->HasStarted(), "Document charset set too late.");
|
||||
MOZ_ASSERT(GetStreamParser(), "Setting charset on a script-only parser.");
|
||||
GetStreamParser()->SetDocumentCharset(aEncoding, aCharsetSource,
|
||||
aChannelHadCharset);
|
||||
aForceAutoDetection);
|
||||
mExecutor->SetDocumentCharsetAndSource(aEncoding, aCharsetSource);
|
||||
}
|
||||
|
||||
|
@ -67,7 +67,7 @@ class nsHtml5Parser final : public nsIParser, public nsSupportsWeakReference {
|
||||
*/
|
||||
virtual void SetDocumentCharset(NotNull<const Encoding*> aEncoding,
|
||||
int32_t aSource,
|
||||
bool aChannelHadCharset) override;
|
||||
bool aForceAutoDetection) override;
|
||||
|
||||
/**
|
||||
* Get the channel associated with this parser
|
||||
|
@ -203,6 +203,7 @@ nsHtml5StreamParser::nsHtml5StreamParser(nsHtml5TreeOpExecutor* aExecutor,
|
||||
mFeedChardet(true),
|
||||
mGuessEncoding(true),
|
||||
mReparseForbidden(false),
|
||||
mForceAutoDetection(false),
|
||||
mChannelHadCharset(false),
|
||||
mLastBuffer(nullptr), // Will be filled when starting
|
||||
mExecutor(aExecutor),
|
||||
@ -310,8 +311,6 @@ void nsHtml5StreamParser::GuessEncoding(bool aEof, bool aInitial) {
|
||||
} else {
|
||||
mGuessEncoding = false;
|
||||
}
|
||||
bool forced = (mCharsetSource == kCharsetFromPendingUserForcedAutoDetection ||
|
||||
mCharsetSource == kCharsetFromInitialUserForcedAutoDetection);
|
||||
MOZ_ASSERT(
|
||||
mCharsetSource != kCharsetFromFinalUserForcedAutoDetection &&
|
||||
mCharsetSource != kCharsetFromFinalAutoDetectionWouldHaveBeenUTF8 &&
|
||||
@ -324,14 +323,15 @@ void nsHtml5StreamParser::GuessEncoding(bool aEof, bool aInitial) {
|
||||
mCharsetSource != kCharsetFromFinalAutoDetectionFile);
|
||||
auto ifHadBeenForced = mDetector->Guess(EmptyCString(), true);
|
||||
auto encoding =
|
||||
forced ? ifHadBeenForced
|
||||
: mDetector->Guess(mTLD, mDecodingLocalFileWithoutTokenizing);
|
||||
mForceAutoDetection
|
||||
? ifHadBeenForced
|
||||
: mDetector->Guess(mTLD, mDecodingLocalFileWithoutTokenizing);
|
||||
int32_t source =
|
||||
aInitial
|
||||
? (forced
|
||||
? (mForceAutoDetection
|
||||
? kCharsetFromInitialUserForcedAutoDetection
|
||||
: kCharsetFromInitialAutoDetectionWouldNotHaveBeenUTF8Generic)
|
||||
: (forced
|
||||
: (mForceAutoDetection
|
||||
? kCharsetFromFinalUserForcedAutoDetection
|
||||
: (mDecodingLocalFileWithoutTokenizing
|
||||
? kCharsetFromFinalAutoDetectionFile
|
||||
@ -377,7 +377,8 @@ void nsHtml5StreamParser::GuessEncoding(bool aEof, bool aInitial) {
|
||||
mCharsetSource = MaybeRollBackSource(source);
|
||||
mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource);
|
||||
} else {
|
||||
MOZ_ASSERT(mCharsetSource < kCharsetFromXmlDeclarationUtf16 || forced);
|
||||
MOZ_ASSERT(mCharsetSource < kCharsetFromXmlDeclarationUtf16 ||
|
||||
mForceAutoDetection);
|
||||
// We've already committed to a decoder. Request a reload from the
|
||||
// docshell.
|
||||
mTreeBuilder->NeedsCharsetSwitchTo(encoding, source, 0);
|
||||
@ -452,8 +453,7 @@ nsHtml5StreamParser::SetupDecodingAndWriteSniffingBufferAndCurrentSegment(
|
||||
mUnicodeDecoder = UTF_8_ENCODING->NewDecoderWithBOMRemoval();
|
||||
} else {
|
||||
if (mCharsetSource >= kCharsetFromFinalAutoDetectionWouldHaveBeenUTF8) {
|
||||
if (!(mCharsetSource == kCharsetFromPendingUserForcedAutoDetection ||
|
||||
mCharsetSource == kCharsetFromInitialUserForcedAutoDetection)) {
|
||||
if (!mForceAutoDetection) {
|
||||
DontGuessEncoding();
|
||||
}
|
||||
mDecodingLocalFileWithoutTokenizing = false;
|
||||
@ -477,6 +477,7 @@ void nsHtml5StreamParser::SetupDecodingFromBom(
|
||||
mUnicodeDecoder = mEncoding->NewDecoderWithoutBOMHandling();
|
||||
mCharsetSource = kCharsetFromByteOrderMark;
|
||||
DontGuessEncoding();
|
||||
mForceAutoDetection = false;
|
||||
mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource);
|
||||
mSniffingBuffer = nullptr;
|
||||
mMetaScanner = nullptr;
|
||||
@ -699,11 +700,7 @@ nsresult nsHtml5StreamParser::FinalizeSniffing(Span<const uint8_t> aFromSegment,
|
||||
|
||||
return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(aFromSegment);
|
||||
}
|
||||
bool forced = (mCharsetSource == kCharsetFromPendingUserForcedAutoDetection ||
|
||||
mCharsetSource == kCharsetFromInitialUserForcedAutoDetection ||
|
||||
mCharsetSource == kCharsetFromFinalUserForcedAutoDetection);
|
||||
if (!mChannelHadCharset &&
|
||||
(forced || mCharsetSource < kCharsetFromMetaPrescan) &&
|
||||
if ((mForceAutoDetection || mCharsetSource < kCharsetFromMetaPrescan) &&
|
||||
(mMode == NORMAL || mMode == VIEW_SOURCE_HTML || mMode == LOAD_AS_DATA)) {
|
||||
// Look for XML declaration in text/html.
|
||||
|
||||
@ -724,16 +721,12 @@ nsresult nsHtml5StreamParser::FinalizeSniffing(Span<const uint8_t> aFromSegment,
|
||||
bufLen = aCountToSniffingLimit;
|
||||
}
|
||||
const Encoding* encoding = xmldecl_parse(buf, bufLen);
|
||||
if (encoding) {
|
||||
if (forced &&
|
||||
if (encoding && !mChannelHadCharset) {
|
||||
if (mForceAutoDetection &&
|
||||
(encoding->IsAsciiCompatible() || encoding == ISO_2022_JP_ENCODING)) {
|
||||
// Honor override
|
||||
if (mCharsetSource == kCharsetFromFinalUserForcedAutoDetection) {
|
||||
DontGuessEncoding();
|
||||
} else {
|
||||
FinalizeSniffingWithDetector(aFromSegment, aCountToSniffingLimit,
|
||||
false);
|
||||
}
|
||||
FinalizeSniffingWithDetector(aFromSegment, aCountToSniffingLimit,
|
||||
false);
|
||||
return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(
|
||||
aFromSegment);
|
||||
}
|
||||
@ -748,13 +741,10 @@ nsresult nsHtml5StreamParser::FinalizeSniffing(Span<const uint8_t> aFromSegment,
|
||||
SniffBOMlessUTF16BasicLatin(buf, bufLen);
|
||||
}
|
||||
}
|
||||
if (forced && mCharsetSource != kCharsetFromIrreversibleAutoDetection) {
|
||||
if (mForceAutoDetection &&
|
||||
mCharsetSource != kCharsetFromIrreversibleAutoDetection) {
|
||||
// neither meta nor XML declaration found, honor override
|
||||
if (mCharsetSource == kCharsetFromFinalUserForcedAutoDetection) {
|
||||
DontGuessEncoding();
|
||||
} else {
|
||||
FinalizeSniffingWithDetector(aFromSegment, aCountToSniffingLimit, false);
|
||||
}
|
||||
FinalizeSniffingWithDetector(aFromSegment, aCountToSniffingLimit, false);
|
||||
return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(aFromSegment);
|
||||
}
|
||||
|
||||
@ -804,7 +794,7 @@ nsresult nsHtml5StreamParser::SniffStreamBytes(
|
||||
break;
|
||||
case 0x00:
|
||||
if (mCharsetSource < kCharsetFromXmlDeclarationUtf16 &&
|
||||
!mChannelHadCharset) {
|
||||
mCharsetSource != kCharsetFromChannel) {
|
||||
mBomState = SEEN_UTF_16_BE_XML_FIRST;
|
||||
} else {
|
||||
mBomState = BOM_SNIFFING_OVER;
|
||||
@ -812,7 +802,7 @@ nsresult nsHtml5StreamParser::SniffStreamBytes(
|
||||
break;
|
||||
case 0x3C:
|
||||
if (mCharsetSource < kCharsetFromXmlDeclarationUtf16 &&
|
||||
!mChannelHadCharset) {
|
||||
mCharsetSource != kCharsetFromChannel) {
|
||||
mBomState = SEEN_UTF_16_LE_XML_FIRST;
|
||||
} else {
|
||||
mBomState = BOM_SNIFFING_OVER;
|
||||
@ -936,7 +926,8 @@ nsresult nsHtml5StreamParser::SniffStreamBytes(
|
||||
MOZ_ASSERT(mCharsetSource != kCharsetFromOtherComponent,
|
||||
"kCharsetFromOtherComponent is for XSLT.");
|
||||
|
||||
if (mBomState == BOM_SNIFFING_OVER && mCharsetSource == kCharsetFromChannel) {
|
||||
if (mBomState == BOM_SNIFFING_OVER && mCharsetSource >= kCharsetFromChannel &&
|
||||
!mForceAutoDetection) {
|
||||
// There was no BOM and the charset came from channel. mEncoding
|
||||
// still contains the charset from the channel as set by an
|
||||
// earlier call to SetDocumentCharset(), since we didn't find a BOM and
|
||||
@ -946,7 +937,12 @@ nsresult nsHtml5StreamParser::SniffStreamBytes(
|
||||
return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(aFromSegment);
|
||||
}
|
||||
|
||||
if (!mChannelHadCharset && !mMetaScanner &&
|
||||
MOZ_ASSERT(!(mBomState == BOM_SNIFFING_OVER && mChannelHadCharset &&
|
||||
!mForceAutoDetection),
|
||||
"How come we're running post-BOM sniffing with channel charset unless "
|
||||
"we're also processing forced detection?");
|
||||
|
||||
if (!mMetaScanner &&
|
||||
(mMode == NORMAL || mMode == VIEW_SOURCE_HTML || mMode == LOAD_AS_DATA)) {
|
||||
mMetaScanner = MakeUnique<nsHtml5MetaScanner>(mTreeBuilder.get());
|
||||
}
|
||||
@ -954,12 +950,7 @@ nsresult nsHtml5StreamParser::SniffStreamBytes(
|
||||
if (mSniffingLength + aFromSegment.Length() >= SNIFFING_BUFFER_SIZE) {
|
||||
// this is the last buffer
|
||||
uint32_t countToSniffingLimit = SNIFFING_BUFFER_SIZE - mSniffingLength;
|
||||
bool forced =
|
||||
(mCharsetSource == kCharsetFromPendingUserForcedAutoDetection ||
|
||||
mCharsetSource == kCharsetFromInitialUserForcedAutoDetection ||
|
||||
mCharsetSource == kCharsetFromFinalUserForcedAutoDetection);
|
||||
if (!mChannelHadCharset && (mMode == NORMAL || mMode == VIEW_SOURCE_HTML ||
|
||||
mMode == LOAD_AS_DATA)) {
|
||||
if (mMode == NORMAL || mMode == VIEW_SOURCE_HTML || mMode == LOAD_AS_DATA) {
|
||||
nsHtml5ByteReadable readable(
|
||||
aFromSegment.Elements(),
|
||||
aFromSegment.Elements() + countToSniffingLimit);
|
||||
@ -972,17 +963,15 @@ nsresult nsHtml5StreamParser::SniffStreamBytes(
|
||||
return rv;
|
||||
}
|
||||
|
||||
if (encoding) {
|
||||
// Ignore encoding from meta if channel had charset and we're here in
|
||||
// order to make forced autodetection work.
|
||||
if (encoding && !mChannelHadCharset) {
|
||||
// meta scan successful; honor overrides unless meta is XSS-dangerous
|
||||
if (forced && (encoding->IsAsciiCompatible() ||
|
||||
encoding == ISO_2022_JP_ENCODING)) {
|
||||
if (mForceAutoDetection && (encoding->IsAsciiCompatible() ||
|
||||
encoding == ISO_2022_JP_ENCODING)) {
|
||||
// Honor override
|
||||
if (mCharsetSource == kCharsetFromFinalUserForcedAutoDetection) {
|
||||
DontGuessEncoding();
|
||||
} else {
|
||||
FinalizeSniffingWithDetector(aFromSegment, countToSniffingLimit,
|
||||
false);
|
||||
}
|
||||
FinalizeSniffingWithDetector(aFromSegment, countToSniffingLimit,
|
||||
false);
|
||||
return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(
|
||||
aFromSegment);
|
||||
}
|
||||
@ -998,8 +987,7 @@ nsresult nsHtml5StreamParser::SniffStreamBytes(
|
||||
}
|
||||
|
||||
// not the last buffer
|
||||
if (!mChannelHadCharset &&
|
||||
(mMode == NORMAL || mMode == VIEW_SOURCE_HTML || mMode == LOAD_AS_DATA)) {
|
||||
if (mMode == NORMAL || mMode == VIEW_SOURCE_HTML || mMode == LOAD_AS_DATA) {
|
||||
nsHtml5ByteReadable readable(
|
||||
aFromSegment.Elements(),
|
||||
aFromSegment.Elements() + aFromSegment.Length());
|
||||
@ -1010,16 +998,11 @@ nsresult nsHtml5StreamParser::SniffStreamBytes(
|
||||
MarkAsBroken(rv);
|
||||
return rv;
|
||||
}
|
||||
if (encoding) {
|
||||
// Ignore encoding from meta if channel had charset and we're here in
|
||||
// order to make forced autodetection work.
|
||||
if (encoding && !mChannelHadCharset) {
|
||||
// meta scan successful; honor overrides unless meta is XSS-dangerous
|
||||
if ((mCharsetSource == kCharsetFromFinalUserForcedAutoDetection) &&
|
||||
(encoding->IsAsciiCompatible() || encoding == ISO_2022_JP_ENCODING)) {
|
||||
// Honor override
|
||||
return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(
|
||||
aFromSegment);
|
||||
}
|
||||
if ((mCharsetSource == kCharsetFromPendingUserForcedAutoDetection ||
|
||||
mCharsetSource == kCharsetFromInitialUserForcedAutoDetection) &&
|
||||
if (mForceAutoDetection &&
|
||||
(encoding->IsAsciiCompatible() || encoding == ISO_2022_JP_ENCODING)) {
|
||||
FinalizeSniffingWithDetector(aFromSegment, aFromSegment.Length(),
|
||||
false);
|
||||
@ -1370,12 +1353,9 @@ nsresult nsHtml5StreamParser::OnStartRequest(nsIRequest* aRequest) {
|
||||
mInitialEncodingWasFromParentFrame = true;
|
||||
}
|
||||
|
||||
if (!(mCharsetSource == kCharsetFromPendingUserForcedAutoDetection ||
|
||||
mCharsetSource == kCharsetFromInitialUserForcedAutoDetection ||
|
||||
mCharsetSource == kCharsetFromFinalUserForcedAutoDetection)) {
|
||||
if (mCharsetSource >= kCharsetFromFinalAutoDetectionWouldHaveBeenUTF8) {
|
||||
DontGuessEncoding();
|
||||
}
|
||||
if (!mForceAutoDetection &&
|
||||
mCharsetSource >= kCharsetFromFinalAutoDetectionWouldHaveBeenUTF8) {
|
||||
DontGuessEncoding();
|
||||
}
|
||||
|
||||
if (mCharsetSource < kCharsetFromUtf8OnlyMime) {
|
||||
|
@ -229,15 +229,16 @@ class nsHtml5StreamParser final : public nsISupports {
|
||||
* @param aCharsetSource the source of the charset
|
||||
*/
|
||||
inline void SetDocumentCharset(NotNull<const Encoding*> aEncoding,
|
||||
int32_t aSource, bool aChannelHadCharset) {
|
||||
int32_t aSource, bool aForceAutoDetection) {
|
||||
MOZ_ASSERT(mStreamState == STREAM_NOT_STARTED,
|
||||
"SetDocumentCharset called too late.");
|
||||
NS_ASSERTION(NS_IsMainThread(), "Wrong thread!");
|
||||
MOZ_ASSERT(!(aSource == kCharsetFromChannel && !aChannelHadCharset),
|
||||
"If charset is from channel, channel must have had charset.");
|
||||
MOZ_ASSERT(NS_IsMainThread(), "Wrong thread!");
|
||||
MOZ_ASSERT(!(aForceAutoDetection && aSource >= kCharsetFromOtherComponent),
|
||||
"Can't force with high-ranking source.");
|
||||
mEncoding = aEncoding;
|
||||
mCharsetSource = aSource;
|
||||
mChannelHadCharset = aChannelHadCharset;
|
||||
mForceAutoDetection = aForceAutoDetection;
|
||||
mChannelHadCharset = (aSource == kCharsetFromChannel);
|
||||
}
|
||||
|
||||
nsresult GetChannel(nsIChannel** aChannel);
|
||||
@ -532,7 +533,12 @@ class nsHtml5StreamParser final : public nsISupports {
|
||||
bool mReparseForbidden;
|
||||
|
||||
/**
|
||||
* Whether the channel had charset.
|
||||
* Whether the Repair Text Encoding menu item was invoked
|
||||
*/
|
||||
bool mForceAutoDetection;
|
||||
|
||||
/**
|
||||
* Whether there was a valid charset parameter on the HTTP layer.
|
||||
*/
|
||||
bool mChannelHadCharset;
|
||||
|
||||
|
@ -101,12 +101,13 @@ class nsIParser : public nsParserBase {
|
||||
* @update ftang 4/23/99
|
||||
* @param aCharset- the charest of a document
|
||||
* @param aCharsetSource- the soure of the chares
|
||||
* @param aChannelHadCharset- whether the channel had charset
|
||||
* @param aForceAutoDetection- whether Repair Text Encoding menu item was
|
||||
* invoked
|
||||
* @return nada
|
||||
*/
|
||||
virtual void SetDocumentCharset(NotNull<const Encoding*> aCharset,
|
||||
int32_t aSource,
|
||||
bool aChannelHadCharset = false) = 0;
|
||||
bool aForceAutoDetection = false) = 0;
|
||||
|
||||
/**
|
||||
* Get the channel associated with this parser
|
||||
|
@ -256,7 +256,7 @@ nsParser::SetCommand(eParserCommands aParserCommand) {
|
||||
*/
|
||||
void nsParser::SetDocumentCharset(NotNull<const Encoding*> aCharset,
|
||||
int32_t aCharsetSource,
|
||||
bool aChannelHadCharset) {
|
||||
bool aForceAutoDetection) {
|
||||
mCharset = aCharset;
|
||||
mCharsetSource = aCharsetSource;
|
||||
if (mParserContext && mParserContext->mScanner) {
|
||||
|
@ -129,7 +129,7 @@ class nsParser final : public nsIParser,
|
||||
*/
|
||||
virtual void SetDocumentCharset(NotNull<const Encoding*> aCharset,
|
||||
int32_t aSource,
|
||||
bool aChannelHadCharset) override;
|
||||
bool aForceAutoDetection) override;
|
||||
|
||||
NotNull<const Encoding*> GetDocumentCharset(int32_t& aSource) {
|
||||
aSource = mCharsetSource;
|
||||
|
@ -31,8 +31,6 @@ enum {
|
||||
kCharsetFromMetaTag, // this one and greater: HTML5 Confident
|
||||
kCharsetFromChannel,
|
||||
kCharsetFromOtherComponent,
|
||||
kCharsetFromPendingUserForcedAutoDetection, // Marker value to be upgraded
|
||||
// later
|
||||
kCharsetFromInitialUserForcedAutoDetection,
|
||||
kCharsetFromFinalUserForcedAutoDetection,
|
||||
kCharsetFromXmlDeclarationUtf16, // This one is overridden by
|
||||
|
@ -56,7 +56,7 @@ class PrototypeDocumentParser final : public nsIParser,
|
||||
|
||||
virtual void SetDocumentCharset(NotNull<const Encoding*> aEncoding,
|
||||
int32_t aSource,
|
||||
bool aChannelHadCharset) override {}
|
||||
bool aForceAutoDetection) override {}
|
||||
|
||||
NS_IMETHOD GetChannel(nsIChannel** aChannel) override {
|
||||
return NS_ERROR_NOT_IMPLEMENTED;
|
||||
|
@ -1,4 +1,5 @@
|
||||
<!DOCTYPE html>
|
||||
<meta charset=utf-8>
|
||||
<title>Makes sure that Link headers preload resources</title>
|
||||
<!--
|
||||
This and the line below ensure that the trailing crossorigin in the link
|
||||
|
Loading…
Reference in New Issue
Block a user