Bug 910192 non-UI part - Get rid of intl.charset.default as a localizable pref and deduce the fallback from the locale. r=bzbarsky.

This commit is contained in:
Henri Sivonen 2013-11-04 13:24:33 +02:00
parent eec37e8d72
commit 7af818f242
20 changed files with 315 additions and 91 deletions

View File

@ -342,29 +342,6 @@ Preferences.prototype = {
this._set("WebKitDisplayImagesKey", "permissions.default.image",
function(webkitVal) webkitVal ? 1 : 2);
// Default charset migration
this._set("WebKitDefaultTextEncodingName", "intl.charset.default",
function(webkitCharset) {
// We don't support x-mac-korean (see bug 713516), but it mostly matches
// EUC-KR.
if (webkitCharset == "x-mac-korean")
return "EUC-KR";
// getCharsetAlias throws if an invalid value is passed in.
try {
return Cc["@mozilla.org/charset-converter-manager;1"].
getService(Ci.nsICharsetConverterManager).
getCharsetAlias(webkitCharset);
}
catch(ex) {
Cu.reportError("Could not convert webkit charset '" + webkitCharset +
"' to a supported charset");
}
// Don't set the preference if we could not get the corresponding
// charset.
return undefined;
});
#ifdef XP_WIN
// Cookie-accept policy.
// For the OS X version, see WebFoundationCookieBehavior.

View File

@ -83,6 +83,7 @@
#include "nsBidiUtils.h"
#include "mozilla/dom/EncodingUtils.h"
#include "mozilla/dom/FallbackEncoding.h"
#include "nsIEditingSession.h"
#include "nsIEditor.h"
#include "nsNodeInfoManager.h"
@ -445,26 +446,13 @@ nsHTMLDocument::TryParentCharset(nsIDocShell* aDocShell,
}
void
nsHTMLDocument::TryWeakDocTypeDefault(int32_t& aCharsetSource,
nsACString& aCharset)
nsHTMLDocument::TryFallback(int32_t& aCharsetSource, nsACString& aCharset)
{
if (kCharsetFromWeakDocTypeDefault <= aCharsetSource)
if (kCharsetFromFallback <= aCharsetSource)
return;
const nsAdoptingCString& defCharset =
Preferences::GetLocalizedCString("intl.charset.default");
// Don't let the user break things by setting intl.charset.default to
// not a rough ASCII superset
nsAutoCString canonical;
if (EncodingUtils::FindEncodingForLabel(defCharset, canonical) &&
EncodingUtils::IsAsciiCompatible(canonical)) {
aCharset = canonical;
} else {
aCharset.AssignLiteral("windows-1252");
}
aCharsetSource = kCharsetFromWeakDocTypeDefault;
return;
aCharsetSource = kCharsetFromFallback;
FallbackEncoding::FromLocale(aCharset);
}
void
@ -642,7 +630,7 @@ nsHTMLDocument::StartDocumentLoad(const char* aCommand,
}
if (!IsHTML() || !docShell) { // no docshell for text/html XHR
charsetSource = IsHTML() ? kCharsetFromWeakDocTypeDefault
charsetSource = IsHTML() ? kCharsetFromFallback
: kCharsetFromDocTypeDefault;
charset.AssignLiteral("UTF-8");
TryChannelCharset(aChannel, charsetSource, charset, executor);
@ -683,7 +671,7 @@ nsHTMLDocument::StartDocumentLoad(const char* aCommand,
TryCacheCharset(cachingChan, charsetSource, charset);
}
TryWeakDocTypeDefault(charsetSource, charset);
TryFallback(charsetSource, charset);
if (wyciwygChannel) {
// We know for sure that the parser needs to be using UTF16.

View File

@ -313,8 +313,7 @@ protected:
nsACString& aCharset);
void TryParentCharset(nsIDocShell* aDocShell,
int32_t& charsetSource, nsACString& aCharset);
static void TryWeakDocTypeDefault(int32_t& aCharsetSource,
nsACString& aCharset);
static void TryFallback(int32_t& aCharsetSource, nsACString& aCharset);
// Override so we can munge the charset on our wyciwyg channel as needed.
virtual void SetDocumentCharacterSet(const nsACString& aCharSetID) MOZ_OVERRIDE;

View File

@ -1896,7 +1896,7 @@ nsDocShell::GatherCharsetMenuTelemetry()
int32_t charsetSource = doc->GetDocumentCharacterSetSource();
switch (charsetSource) {
case kCharsetFromWeakDocTypeDefault:
case kCharsetFromFallback:
case kCharsetFromDocTypeDefault:
case kCharsetFromCache:
case kCharsetFromParentFrame:

View File

@ -0,0 +1,137 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#include "mozilla/dom/FallbackEncoding.h"
#include "mozilla/dom/EncodingUtils.h"
#include "nsUConvPropertySearch.h"
#include "nsIChromeRegistry.h"
#include "mozilla/Preferences.h"
#include "mozilla/Services.h"
namespace mozilla {
namespace dom {
static const char* localesFallbacks[][3] = {
#include "localesfallbacks.properties.h"
};
FallbackEncoding* FallbackEncoding::sInstance = nullptr;
FallbackEncoding::FallbackEncoding()
{
MOZ_COUNT_CTOR(FallbackEncoding);
MOZ_ASSERT(!FallbackEncoding::sInstance,
"Singleton already exists.");
}
FallbackEncoding::~FallbackEncoding()
{
MOZ_COUNT_DTOR(FallbackEncoding);
}
void
FallbackEncoding::Get(nsACString& aFallback)
{
if (!mFallback.IsEmpty()) {
aFallback = mFallback;
return;
}
const nsAdoptingCString& override =
Preferences::GetCString("intl.charset.fallback.override");
// Don't let the user break things by setting the override to unreasonable
// values via about:config
if (!EncodingUtils::FindEncodingForLabel(override, mFallback) ||
!EncodingUtils::IsAsciiCompatible(mFallback) ||
mFallback.EqualsLiteral("UTF-8")) {
mFallback.Truncate();
}
if (!mFallback.IsEmpty()) {
aFallback = mFallback;
return;
}
nsAutoCString locale;
nsCOMPtr<nsIXULChromeRegistry> registry =
mozilla::services::GetXULChromeRegistryService();
if (registry) {
registry->GetSelectedLocale(NS_LITERAL_CSTRING("global"), locale);
}
// Let's lower case the string just in case unofficial language packs
// don't stick to conventions.
ToLowerCase(locale); // ASCII lowercasing with CString input!
// Special case Traditional Chinese before throwing away stuff after the
// language itself. Today we only ship zh-TW, but be defensive about
// possible future values.
if (locale.EqualsLiteral("zh-tw") ||
locale.EqualsLiteral("zh-hk") ||
locale.EqualsLiteral("zh-mo") ||
locale.EqualsLiteral("zh-hant")) {
mFallback.AssignLiteral("Big5");
aFallback = mFallback;
return;
}
// Throw away regions and other variants to accommodate weird stuff seen
// in telemetry--apparently unofficial language packs.
int32_t index = locale.FindChar('-');
if (index >= 0) {
locale.Truncate(index);
}
if (NS_FAILED(nsUConvPropertySearch::SearchPropertyValue(
localesFallbacks, ArrayLength(localesFallbacks), locale, mFallback))) {
mFallback.AssignLiteral("windows-1252");
}
aFallback = mFallback;
}
void
FallbackEncoding::FromLocale(nsACString& aFallback)
{
MOZ_ASSERT(FallbackEncoding::sInstance,
"Using uninitialized fallback cache.");
FallbackEncoding::sInstance->Get(aFallback);
}
// PrefChangedFunc
int
FallbackEncoding::PrefChanged(const char*, void*)
{
MOZ_ASSERT(FallbackEncoding::sInstance,
"Pref callback called with null fallback cache.");
FallbackEncoding::sInstance->Invalidate();
return 0;
}
void
FallbackEncoding::Initialize()
{
MOZ_ASSERT(!FallbackEncoding::sInstance,
"Initializing pre-existing fallback cache.");
FallbackEncoding::sInstance = new FallbackEncoding;
Preferences::RegisterCallback(FallbackEncoding::PrefChanged,
"intl.charset.fallback.override",
nullptr);
Preferences::RegisterCallback(FallbackEncoding::PrefChanged,
"general.useragent.locale",
nullptr);
}
void
FallbackEncoding::Shutdown()
{
MOZ_ASSERT(FallbackEncoding::sInstance,
"Releasing non-existent fallback cache.");
delete FallbackEncoding::sInstance;
FallbackEncoding::sInstance = nullptr;
}
} // namespace dom
} // namespace mozilla

View File

@ -0,0 +1,72 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this file,
* You can obtain one at http://mozilla.org/MPL/2.0/. */
#ifndef mozilla_dom_FallbackEncoding_h_
#define mozilla_dom_FallbackEncoding_h_
#include "nsString.h"
namespace mozilla {
namespace dom {
class FallbackEncoding
{
public:
/**
* Gets the locale-dependent fallback encoding for legacy HTML and plain
* text content.
*
* @param aFallback the outparam for the fallback encoding
*/
static void FromLocale(nsACString& aFallback);
// public API ends here!
/**
* Allocate sInstance used by FromLocale().
* To be called from nsLayoutStatics only.
*/
static void Initialize();
/**
* Delete sInstance used by FromLocale().
* To be called from nsLayoutStatics only.
*/
static void Shutdown();
private:
/**
* The fallback cache.
*/
static FallbackEncoding* sInstance;
FallbackEncoding();
~FallbackEncoding();
/**
* Invalidates the cache.
*/
void Invalidate()
{
mFallback.Truncate();
}
static int PrefChanged(const char*, void*);
/**
* Gets the fallback encoding label.
* @param aFallback the fallback encoding
*/
void Get(nsACString& aFallback);
nsCString mFallback;
};
} // dom
} // mozilla
#endif // mozilla_dom_FallbackEncoding_h_

View File

@ -9,11 +9,15 @@ LOCAL_INCLUDES = \
include $(topsrcdir)/config/rules.mk
EncodingUtils.$(OBJ_SUFFIX): labelsencodings.properties.h
FallbackEncoding.$(OBJ_SUFFIX): localesfallbacks.properties.h
PROPS2ARRAYS = $(topsrcdir)/intl/locale/src/props2arrays.py
labelsencodings.properties.h: $(PROPS2ARRAYS) labelsencodings.properties
$(PYTHON) $^ $@
localesfallbacks.properties.h: $(PROPS2ARRAYS) localesfallbacks.properties
$(PYTHON) $^ $@
GARBAGE += \
charsetalias.properties.h \
labelsencodings.properties.h \
localesfallbacks.properties.h \
$(NULL)

View File

@ -0,0 +1,72 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
# This file contains mappings from languages to legacy encodings for languages
# that are associated with legacy encoding other than windows-1252 (except
# Traditional Chinese, which is handled as a special case elsewhere).
#
# The keys are language codes without regions. The values are Gecko-canonical
# encoding labels (not necessarily lower case!).
#
# Rules:
#
# * Avoid editing this file!
#
# * If you do edit this file, be sure to file a spec bug against WHATWG HTML
# to keep this file in sync with
# http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#determining-the-character-encoding
#
# * As an exception to the previous rule, gbk is used instead of GB18030
# until/unless work on http://encoding.spec.whatwg.org/ shows that the former
# can be treated as an alias of the latter and our decoder implementation
# has been audited to match the spec.
#
# * Use only the language code without a hyphen or anything that would come
# after the hyphen.
#
# * Don't put windows-1252-affiliated languages here.
#
# * Don't put Traditional Chinese here.
ar=windows-1256
# https://www.w3.org/Bugs/Public/show_bug.cgi?id=23089
ba=windows-1251
# https://www.w3.org/Bugs/Public/show_bug.cgi?id=23089
be=windows-1251
bg=windows-1251
cs=windows-1250
# https://www.w3.org/Bugs/Public/show_bug.cgi?id=23090
el=ISO-8859-7
et=windows-1257
fa=windows-1256
he=windows-1255
hr=windows-1250
hu=ISO-8859-2
ja=Shift_JIS
# https://www.w3.org/Bugs/Public/show_bug.cgi?id=23089
kk=windows-1251
ko=EUC-KR
ku=windows-1254
# https://www.w3.org/Bugs/Public/show_bug.cgi?id=23089
ky=windows-1251
lt=windows-1257
lv=windows-1257
# https://www.w3.org/Bugs/Public/show_bug.cgi?id=23089
mk=windows-1251
pl=ISO-8859-2
ru=windows-1251
# https://www.w3.org/Bugs/Public/show_bug.cgi?id=23089
sah=windows-1251
sk=windows-1250
sl=ISO-8859-2
sr=windows-1251
# https://www.w3.org/Bugs/Public/show_bug.cgi?id=23089
tg=windows-1251
th=windows-874
tr=windows-1254
# https://www.w3.org/Bugs/Public/show_bug.cgi?id=23089
tt=windows-1251
uk=windows-1251
vi=windows-1258
zh=gbk

View File

@ -10,12 +10,14 @@ MODULE = 'dom'
EXPORTS.mozilla.dom += [
'EncodingUtils.h',
'FallbackEncoding.h',
'TextDecoder.h',
'TextEncoder.h',
]
SOURCES += [
'EncodingUtils.cpp',
'FallbackEncoding.cpp',
'TextDecoder.cpp',
'TextEncoder.cpp',
]

View File

@ -45,17 +45,9 @@ function InitDetectorTests()
$("testframe").onload = DoDetectionTest;
if (gExpectedCharset == "default") {
try {
gExpectedCharset = prefService
.getComplexValue("intl.charset.default",
Ci.nsIPrefLocalizedString)
.data;
if (gExpectedCharset == "ISO-8859-1") {
gExpectedCharset = "windows-1252";
}
} catch (e) {
gExpectedCharset = "windows-1252";
}
// No point trying to be generic here, because we have plenty of other
// unit tests that fail if run using a non-windows-1252 locale.
gExpectedCharset = "windows-1252";
}
// Get the local directory. This needs to be a file: URI because chrome:

View File

@ -50,6 +50,7 @@
#include "nsCrossSiteListenerProxy.h"
#include "nsHTMLDNSPrefetch.h"
#include "nsHtml5Module.h"
#include "mozilla/dom/FallbackEncoding.h"
#include "nsFocusManager.h"
#include "nsListControlFrame.h"
#include "mozilla/dom/HTMLInputElement.h"
@ -258,6 +259,7 @@ nsLayoutStatics::Initialize()
nsContentSink::InitializeStatics();
nsHtml5Module::InitializeStatics();
mozilla::dom::FallbackEncoding::Initialize();
nsLayoutUtils::Initialize();
nsIPresShell::InitializeStatics();
nsRefreshDriver::InitializeStatics();
@ -384,6 +386,8 @@ nsLayoutStatics::Shutdown()
nsHtml5Module::ReleaseStatics();
mozilla::dom::FallbackEncoding::Shutdown();
nsRegion::ShutdownStatic();
NS_ShutdownEventTargetChainRecycler();

View File

@ -1520,7 +1520,7 @@ pref("intl.charsetmenu.mailview.cache", "");
pref("intl.charsetmenu.composer.cache", "");
pref("intl.charsetmenu.browser.cache.size", 5);
pref("intl.charset.detector", "chrome://global/locale/intl.properties");
pref("intl.charset.default", "chrome://global-platform/locale/intl.properties");
pref("intl.charset.fallback.override", "");
pref("intl.ellipsis", "chrome://global-platform/locale/intl.properties");
pref("intl.locale.matchOS", false);
// fallback charset list for Unicode conversion (converting from Unicode)

View File

@ -35,24 +35,7 @@ nsDirIndexParser::Init() {
mLineStart = 0;
mHasDescription = false;
mFormat = nullptr;
// get default charset to be used for directory listings (fallback to
// ISO-8859-1 if pref is unavailable).
NS_NAMED_LITERAL_CSTRING(kFallbackEncoding, "ISO-8859-1");
nsXPIDLString defCharset;
nsCOMPtr<nsIPrefBranch> prefs(do_GetService(NS_PREFSERVICE_CONTRACTID));
if (prefs) {
nsCOMPtr<nsIPrefLocalizedString> prefVal;
prefs->GetComplexValue("intl.charset.default",
NS_GET_IID(nsIPrefLocalizedString),
getter_AddRefs(prefVal));
if (prefVal)
prefVal->ToString(getter_Copies(defCharset));
}
if (!defCharset.IsEmpty())
LossyCopyUTF16toASCII(defCharset, mEncoding); // charset labels are always ASCII
else
mEncoding.Assign(kFallbackEncoding);
mozilla::dom::FallbackEncoding::FromLocale(mEncoding);
nsresult rv;
// XXX not threadsafe

View File

@ -302,7 +302,7 @@ nsHtml5StreamParser::SetupDecodingAndWriteSniffingBufferAndCurrentSegment(const
rv = convManager->GetUnicodeDecoder(mCharset.get(), getter_AddRefs(mUnicodeDecoder));
if (rv == NS_ERROR_UCONV_NOCONV) {
mCharset.AssignLiteral("windows-1252"); // lower case is the raw form
mCharsetSource = kCharsetFromWeakDocTypeDefault;
mCharsetSource = kCharsetFromFallback;
rv = convManager->GetUnicodeDecoderRaw(mCharset.get(), getter_AddRefs(mUnicodeDecoder));
mTreeBuilder->SetDocumentCharset(mCharset, mCharsetSource);
}
@ -612,10 +612,10 @@ nsHtml5StreamParser::FinalizeSniffing(const uint8_t* aFromSegment, // can be nul
if (mCharsetSource == kCharsetUninitialized) {
// Hopefully this case is never needed, but dealing with it anyway
mCharset.AssignLiteral("windows-1252");
mCharsetSource = kCharsetFromWeakDocTypeDefault;
mCharsetSource = kCharsetFromFallback;
mTreeBuilder->SetDocumentCharset(mCharset, mCharsetSource);
} else if (mMode == LOAD_AS_DATA &&
mCharsetSource == kCharsetFromWeakDocTypeDefault) {
mCharsetSource == kCharsetFromFallback) {
NS_ASSERTION(mReparseForbidden, "Reparse should be forbidden for XHR");
NS_ASSERTION(!mFeedChardet, "Should not feed chardet for XHR");
NS_ASSERTION(mCharset.EqualsLiteral("UTF-8"),
@ -731,7 +731,7 @@ nsHtml5StreamParser::SniffStreamBytes(const uint8_t* aFromSegment,
// nsHTMLDocument is supposed to make sure this does not happen. Let's
// deal with this anyway, since who knows how kCharsetFromOtherComponent
// is used.
mCharsetSource = kCharsetFromWeakDocTypeDefault;
mCharsetSource = kCharsetFromFallback;
}
}
@ -981,7 +981,7 @@ nsHtml5StreamParser::OnStartRequest(nsIRequest* aRequest, nsISupports* aContext)
// if we failed to get a decoder, there will be fallback, so don't propagate
// the error.
if (NS_FAILED(rv)) {
mCharsetSource = kCharsetFromWeakDocTypeDefault;
mCharsetSource = kCharsetFromFallback;
}
return NS_OK;
}

View File

@ -7,7 +7,7 @@
// note: the value order defines the priority; higher numbers take priority
#define kCharsetUninitialized 0
#define kCharsetFromWeakDocTypeDefault 1
#define kCharsetFromFallback 1
#define kCharsetFromDocTypeDefault 2 // This and up confident for XHR
#define kCharsetFromCache 3
#define kCharsetFromParentFrame 4

View File

@ -564,7 +564,8 @@ function queryCharsetFromCode(aCode) {
if (codes[aCode])
return codes[aCode];
return getLocalizedPref("intl.charset.default", DEFAULT_QUERY_CHARSET);
// Don't bother being fancy about what to return in the failure case.
return "windows-1252";
}
function fileCharsetFromCode(aCode) {
const codes = [

View File

@ -2,8 +2,6 @@
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
# moved from navigator/locale/navigator.properties
intl.charset.default=ISO-8859-1
# LOCALIZATION NOTE (intl.ellipsis): Use the unicode ellipsis char, \u2026,
# or use "..." if \u2026 doesn't suit traditions in your locale.
intl.ellipsis=

View File

@ -2,8 +2,6 @@
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
# moved from navigator/locale/navigator.properties
intl.charset.default=ISO-8859-1
# LOCALIZATION NOTE (intl.ellipsis): Use the unicode ellipsis char, \u2026,
# or use "..." if \u2026 doesn't suit traditions in your locale.
intl.ellipsis=

View File

@ -2,8 +2,6 @@
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
# moved from navigator/locale/navigator.properties
intl.charset.default=ISO-8859-1
# LOCALIZATION NOTE (intl.ellipsis): Use the unicode ellipsis char, \u2026,
# or use "..." if \u2026 doesn't suit traditions in your locale.
intl.ellipsis=

View File

@ -40,7 +40,7 @@ intl.accept_languages=en-US, en
# http://mxr.mozilla.org/mozilla/source/browser/components/preferences/fonts.xul
font.language.group=x-western
# LOCALIZATION NOTE (intl.charset.detector, intl.charset.default, intl.charsetmenu.browser.static, intl.charsetmenu.mailedit):
# LOCALIZATION NOTE (intl.charset.detector, intl.charsetmenu.browser.static, intl.charsetmenu.mailedit):
# For the list of canonical charset values, refer to:
# http://mxr.mozilla.org/mozilla-central/source/intl/locale/src/charsetalias.properties
#
@ -51,7 +51,6 @@ font.language.group=x-western
# Note also that the list of charsets in 'intl.charsetmenu.browser.static'
# must always include "UTF-8".
intl.charset.detector=
intl.charset.default=ISO-8859-1
intl.charsetmenu.browser.static=ISO-8859-1, UTF-8
intl.charsetmenu.mailedit=ISO-8859-1, ISO-8859-15, ISO-8859-6, armscii-8, ISO-8859-13, ISO-8859-14, ISO-8859-2, GB2312, GB18030, Big5, KOI8-R, windows-1251, KOI8-U, ISO-8859-7, ISO-8859-8-I, windows-1255, ISO-2022-JP, EUC-KR, ISO-8859-10, ISO-8859-3, TIS-620, ISO-8859-9, UTF-8, VISCII