Bug 1824744 - Part 4: Update language tag mappings. r=platform-i18n-reviewers,gregtatum

- Update the URL because the "core.zip" is no longer present.
- Add code to remove redundant language mappings.

Depends on D176265

Differential Revision: https://phabricator.services.mozilla.com/D176266
This commit is contained in:
André Bargull 2023-05-25 20:41:01 +00:00
parent 806d734076
commit 3df7435860
3 changed files with 12464 additions and 77 deletions

View File

@ -1,6 +1,6 @@
// Generated by make_intl_data.py. DO NOT EDIT.
// Version: CLDR-42
// URL: https://unicode.org/Public/cldr/42/core.zip
// Version: CLDR-43
// URL: https://unicode.org/Public/cldr/43/cldr-common-43.0.zip
#include "mozilla/Assertions.h"
#include "mozilla/Span.h"
@ -99,8 +99,8 @@ static bool IsCanonicallyCasedTransformType(mozilla::Span<const char> type) {
#endif
// Mappings from language subtags to preferred values.
// Derived from CLDR Supplemental Data, version 42.
// https://unicode.org/Public/cldr/42/core.zip
// Derived from CLDR Supplemental Data, version 43.
// https://unicode.org/Public/cldr/43/cldr-common-43.0.zip
bool mozilla::intl::Locale::LanguageMapping(LanguageSubtag& language) {
MOZ_ASSERT(IsStructurallyValidLanguageTag(language.Span()));
MOZ_ASSERT(IsCanonicallyCasedLanguageTag(language.Span()));
@ -219,8 +219,8 @@ bool mozilla::intl::Locale::LanguageMapping(LanguageSubtag& language) {
}
// Language subtags with complex mappings.
// Derived from CLDR Supplemental Data, version 42.
// https://unicode.org/Public/cldr/42/core.zip
// Derived from CLDR Supplemental Data, version 43.
// https://unicode.org/Public/cldr/43/cldr-common-43.0.zip
bool mozilla::intl::Locale::ComplexLanguageMapping(const LanguageSubtag& language) {
MOZ_ASSERT(IsStructurallyValidLanguageTag(language.Span()));
MOZ_ASSERT(IsCanonicallyCasedLanguageTag(language.Span()));
@ -241,8 +241,8 @@ bool mozilla::intl::Locale::ComplexLanguageMapping(const LanguageSubtag& languag
}
// Mappings from script subtags to preferred values.
// Derived from CLDR Supplemental Data, version 42.
// https://unicode.org/Public/cldr/42/core.zip
// Derived from CLDR Supplemental Data, version 43.
// https://unicode.org/Public/cldr/43/cldr-common-43.0.zip
bool mozilla::intl::Locale::ScriptMapping(ScriptSubtag& script) {
MOZ_ASSERT(IsStructurallyValidScriptTag(script.Span()));
MOZ_ASSERT(IsCanonicallyCasedScriptTag(script.Span()));
@ -257,8 +257,8 @@ bool mozilla::intl::Locale::ScriptMapping(ScriptSubtag& script) {
}
// Mappings from region subtags to preferred values.
// Derived from CLDR Supplemental Data, version 42.
// https://unicode.org/Public/cldr/42/core.zip
// Derived from CLDR Supplemental Data, version 43.
// https://unicode.org/Public/cldr/43/cldr-common-43.0.zip
bool mozilla::intl::Locale::RegionMapping(RegionSubtag& region) {
MOZ_ASSERT(IsStructurallyValidRegionTag(region.Span()));
MOZ_ASSERT(IsCanonicallyCasedRegionTag(region.Span()));
@ -357,8 +357,8 @@ bool mozilla::intl::Locale::RegionMapping(RegionSubtag& region) {
}
// Region subtags with complex mappings.
// Derived from CLDR Supplemental Data, version 42.
// https://unicode.org/Public/cldr/42/core.zip
// Derived from CLDR Supplemental Data, version 43.
// https://unicode.org/Public/cldr/43/cldr-common-43.0.zip
bool mozilla::intl::Locale::ComplexRegionMapping(const RegionSubtag& region) {
MOZ_ASSERT(IsStructurallyValidRegionTag(region.Span()));
MOZ_ASSERT(IsCanonicallyCasedRegionTag(region.Span()));
@ -380,8 +380,8 @@ bool mozilla::intl::Locale::ComplexRegionMapping(const RegionSubtag& region) {
}
// Language subtags with complex mappings.
// Derived from CLDR Supplemental Data, version 42.
// https://unicode.org/Public/cldr/42/core.zip
// Derived from CLDR Supplemental Data, version 43.
// https://unicode.org/Public/cldr/43/cldr-common-43.0.zip
void mozilla::intl::Locale::PerformComplexLanguageMappings() {
MOZ_ASSERT(IsStructurallyValidLanguageTag(Language().Span()));
MOZ_ASSERT(IsCanonicallyCasedLanguageTag(Language().Span()));
@ -416,8 +416,8 @@ void mozilla::intl::Locale::PerformComplexLanguageMappings() {
}
// Region subtags with complex mappings.
// Derived from CLDR Supplemental Data, version 42.
// https://unicode.org/Public/cldr/42/core.zip
// Derived from CLDR Supplemental Data, version 43.
// https://unicode.org/Public/cldr/43/cldr-common-43.0.zip
void mozilla::intl::Locale::PerformComplexRegionMappings() {
MOZ_ASSERT(IsStructurallyValidLanguageTag(Language().Span()));
MOZ_ASSERT(IsCanonicallyCasedLanguageTag(Language().Span()));
@ -434,53 +434,82 @@ void mozilla::intl::Locale::PerformComplexRegionMappings() {
}
}
else if (Region().EqualTo("172")) {
if (Language().EqualTo("hy") ||
if (Language().EqualTo("axm") ||
Language().EqualTo("hy") ||
Language().EqualTo("hyw") ||
Language().EqualTo("rmi") ||
(Language().EqualTo("und") && Script().EqualTo("Armn"))) {
SetRegion("AM");
}
else if (Language().EqualTo("az") ||
(Language().EqualTo("azb") && Script().EqualTo("Cyrl")) ||
(Language().EqualTo("azb") && Script().EqualTo("Latn")) ||
Language().EqualTo("bdk") ||
(Language().EqualTo("jdt") && Script().EqualTo("Latn")) ||
Language().EqualTo("kjj") ||
Language().EqualTo("kry") ||
(Language().EqualTo("rut") && Script().EqualTo("Latn")) ||
Language().EqualTo("tkr") ||
Language().EqualTo("tly") ||
Language().EqualTo("ttt")) {
SetRegion("AZ");
}
else if (Language().EqualTo("be")) {
else if (Language().EqualTo("be") ||
(Language().EqualTo("rml") && Script().EqualTo("Cyrl"))) {
SetRegion("BY");
}
else if (Language().EqualTo("ab") ||
Language().EqualTo("bbl") ||
Language().EqualTo("bhn") ||
Language().EqualTo("jge") ||
Language().EqualTo("ka") ||
(Language().EqualTo("ku") && Script().EqualTo("Yezi")) ||
Language().EqualTo("oav") ||
Language().EqualTo("os") ||
Language().EqualTo("sva") ||
(Language().EqualTo("und") && Script().EqualTo("Geor")) ||
(Language().EqualTo("und") && Script().EqualTo("Yezi")) ||
Language().EqualTo("uum") ||
Language().EqualTo("xmf")) {
SetRegion("GE");
}
else if (Language().EqualTo("ky")) {
else if (Language().EqualTo("dng") ||
Language().EqualTo("ky")) {
SetRegion("KG");
}
else if (Language().EqualTo("kk") ||
else if ((Language().EqualTo("ili") && Script().EqualTo("Cyrl")) ||
Language().EqualTo("kk") ||
(Language().EqualTo("ug") && Script().EqualTo("Cyrl"))) {
SetRegion("KZ");
}
else if (Language().EqualTo("gag")) {
SetRegion("MD");
}
else if (Language().EqualTo("tg")) {
else if (Language().EqualTo("abh") ||
(Language().EqualTo("isk") && Script().EqualTo("Cyrl")) ||
Language().EqualTo("paq") ||
Language().EqualTo("sgh") ||
Language().EqualTo("tg") ||
(Language().EqualTo("wbl") && Script().EqualTo("Cyrl")) ||
Language().EqualTo("yai")) {
SetRegion("TJ");
}
else if (Language().EqualTo("tk")) {
else if (Language().EqualTo("chg") ||
Language().EqualTo("tk")) {
SetRegion("TM");
}
else if (Language().EqualTo("crh") ||
Language().EqualTo("got") ||
Language().EqualTo("jct") ||
Language().EqualTo("ji") ||
(Language().EqualTo("kdr") && Script().EqualTo("Cyrl")) ||
Language().EqualTo("rue") ||
Language().EqualTo("uk") ||
(Language().EqualTo("und") && Script().EqualTo("Goth"))) {
SetRegion("UA");
}
else if (Language().EqualTo("kaa") ||
else if (Language().EqualTo("auz") ||
Language().EqualTo("kaa") ||
Language().EqualTo("sog") ||
(Language().EqualTo("und") && Script().EqualTo("Chrs")) ||
(Language().EqualTo("und") && Script().EqualTo("Sogd")) ||
@ -494,7 +523,8 @@ void mozilla::intl::Locale::PerformComplexRegionMappings() {
}
}
else if (Region().EqualTo("200")) {
if (Language().EqualTo("sk")) {
if (Language().EqualTo("rmc") ||
Language().EqualTo("sk")) {
SetRegion("SK");
}
else {
@ -513,9 +543,16 @@ void mozilla::intl::Locale::PerformComplexRegionMappings() {
}
else if (Region().EqualTo("536") ||
Region().EqualTo("NT")) {
if (Language().EqualTo("akk") ||
if (Language().EqualTo("acm") ||
Language().EqualTo("akk") ||
Language().EqualTo("ayp") ||
Language().EqualTo("bjm") ||
Language().EqualTo("ckb") ||
Language().EqualTo("kqd") ||
(Language().EqualTo("ku") && Script().EqualTo("Arab")) ||
Language().EqualTo("mid") ||
Language().EqualTo("sdb") ||
Language().EqualTo("sdf") ||
Language().EqualTo("syr") ||
(Language().EqualTo("und") && Script().EqualTo("Syrc")) ||
(Language().EqualTo("und") && Script().EqualTo("Xsux"))) {
@ -530,7 +567,13 @@ void mozilla::intl::Locale::PerformComplexRegionMappings() {
if (Language().EqualTo("mh")) {
SetRegion("MH");
}
else if (Language().EqualTo("pau")) {
else if (Language().EqualTo("cal") ||
Language().EqualTo("tpv")) {
SetRegion("MP");
}
else if (Language().EqualTo("pau") ||
Language().EqualTo("sov") ||
Language().EqualTo("tox")) {
SetRegion("PW");
}
else {
@ -539,17 +582,28 @@ void mozilla::intl::Locale::PerformComplexRegionMappings() {
}
else if (Region().EqualTo("810") ||
Region().EqualTo("SU")) {
if (Language().EqualTo("hy") ||
if (Language().EqualTo("axm") ||
Language().EqualTo("hy") ||
Language().EqualTo("hyw") ||
Language().EqualTo("rmi") ||
(Language().EqualTo("und") && Script().EqualTo("Armn"))) {
SetRegion("AM");
}
else if (Language().EqualTo("az") ||
(Language().EqualTo("azb") && Script().EqualTo("Cyrl")) ||
(Language().EqualTo("azb") && Script().EqualTo("Latn")) ||
Language().EqualTo("bdk") ||
(Language().EqualTo("jdt") && Script().EqualTo("Latn")) ||
Language().EqualTo("kjj") ||
Language().EqualTo("kry") ||
(Language().EqualTo("rut") && Script().EqualTo("Latn")) ||
Language().EqualTo("tkr") ||
Language().EqualTo("tly") ||
Language().EqualTo("ttt")) {
SetRegion("AZ");
}
else if (Language().EqualTo("be")) {
else if (Language().EqualTo("be") ||
(Language().EqualTo("rml") && Script().EqualTo("Cyrl"))) {
SetRegion("BY");
}
else if (Language().EqualTo("et") ||
@ -557,47 +611,68 @@ void mozilla::intl::Locale::PerformComplexRegionMappings() {
SetRegion("EE");
}
else if (Language().EqualTo("ab") ||
Language().EqualTo("bbl") ||
Language().EqualTo("bhn") ||
Language().EqualTo("jge") ||
Language().EqualTo("ka") ||
(Language().EqualTo("ku") && Script().EqualTo("Yezi")) ||
Language().EqualTo("oav") ||
Language().EqualTo("os") ||
Language().EqualTo("sva") ||
(Language().EqualTo("und") && Script().EqualTo("Geor")) ||
(Language().EqualTo("und") && Script().EqualTo("Yezi")) ||
Language().EqualTo("uum") ||
Language().EqualTo("xmf")) {
SetRegion("GE");
}
else if (Language().EqualTo("ky")) {
else if (Language().EqualTo("dng") ||
Language().EqualTo("ky")) {
SetRegion("KG");
}
else if (Language().EqualTo("kk") ||
else if ((Language().EqualTo("ili") && Script().EqualTo("Cyrl")) ||
Language().EqualTo("kk") ||
(Language().EqualTo("ug") && Script().EqualTo("Cyrl"))) {
SetRegion("KZ");
}
else if (Language().EqualTo("lt") ||
else if (Language().EqualTo("kdr") ||
Language().EqualTo("lt") ||
Language().EqualTo("olt") ||
Language().EqualTo("sgs")) {
SetRegion("LT");
}
else if (Language().EqualTo("ltg") ||
else if (Language().EqualTo("liv") ||
Language().EqualTo("ltg") ||
Language().EqualTo("lv")) {
SetRegion("LV");
}
else if (Language().EqualTo("gag")) {
SetRegion("MD");
}
else if (Language().EqualTo("tg")) {
else if (Language().EqualTo("abh") ||
(Language().EqualTo("isk") && Script().EqualTo("Cyrl")) ||
Language().EqualTo("paq") ||
Language().EqualTo("sgh") ||
Language().EqualTo("tg") ||
(Language().EqualTo("wbl") && Script().EqualTo("Cyrl")) ||
Language().EqualTo("yai")) {
SetRegion("TJ");
}
else if (Language().EqualTo("tk")) {
else if (Language().EqualTo("chg") ||
Language().EqualTo("tk")) {
SetRegion("TM");
}
else if (Language().EqualTo("crh") ||
Language().EqualTo("got") ||
Language().EqualTo("jct") ||
Language().EqualTo("ji") ||
(Language().EqualTo("kdr") && Script().EqualTo("Cyrl")) ||
Language().EqualTo("rue") ||
Language().EqualTo("uk") ||
(Language().EqualTo("und") && Script().EqualTo("Goth"))) {
SetRegion("UA");
}
else if (Language().EqualTo("kaa") ||
else if (Language().EqualTo("auz") ||
Language().EqualTo("kaa") ||
Language().EqualTo("sog") ||
(Language().EqualTo("und") && Script().EqualTo("Chrs")) ||
(Language().EqualTo("und") && Script().EqualTo("Sogd")) ||
@ -614,7 +689,11 @@ void mozilla::intl::Locale::PerformComplexRegionMappings() {
if (Language().EqualTo("bs")) {
SetRegion("BA");
}
else if (Language().EqualTo("hr")) {
else if (Language().EqualTo("ckm") ||
Language().EqualTo("dlm") ||
Language().EqualTo("hr") ||
Language().EqualTo("ist") ||
Language().EqualTo("ruo")) {
SetRegion("HR");
}
else if (Language().EqualTo("mk")) {
@ -643,8 +722,8 @@ static bool IsLessThan(const T& a, const U& b) {
}
// Mappings from variant subtags to preferred values.
// Derived from CLDR Supplemental Data, version 42.
// https://unicode.org/Public/cldr/42/core.zip
// Derived from CLDR Supplemental Data, version 43.
// https://unicode.org/Public/cldr/43/cldr-common-43.0.zip
bool mozilla::intl::Locale::PerformVariantMappings() {
// The variant subtags need to be sorted for binary search.
MOZ_ASSERT(std::is_sorted(mVariants.begin(), mVariants.end(),
@ -707,8 +786,8 @@ bool mozilla::intl::Locale::PerformVariantMappings() {
}
// Canonicalize legacy locale identifiers.
// Derived from CLDR Supplemental Data, version 42.
// https://unicode.org/Public/cldr/42/core.zip
// Derived from CLDR Supplemental Data, version 43.
// https://unicode.org/Public/cldr/43/cldr-common-43.0.zip
bool mozilla::intl::Locale::UpdateLegacyMappings() {
// We're mapping legacy tags to non-legacy form here.
// Other tags remain unchanged.
@ -865,8 +944,8 @@ bool mozilla::intl::Locale::UpdateLegacyMappings() {
}
// Mappings from legacy sign languages.
// Derived from CLDR Supplemental Data, version 42.
// https://unicode.org/Public/cldr/42/core.zip
// Derived from CLDR Supplemental Data, version 43.
// https://unicode.org/Public/cldr/43/cldr-common-43.0.zip
bool mozilla::intl::Locale::SignLanguageMapping(LanguageSubtag& language,
const RegionSubtag& region) {
MOZ_ASSERT(language.EqualTo("sgn"));
@ -1057,19 +1136,19 @@ const char* mozilla::intl::Locale::ReplaceUnicodeExtensionType(
return SearchUnicodeReplacement(types, aliases, type);
}
else if (IsUnicodeKey(key, "tz")) {
static const char* types[29] = {
"aqams" , "cnckg" , "cnhrb" , "cnkhg" , "cuba" , "egypt" ,
"eire" , "est" , "gaza" , "gmt0" , "hongkong", "hst" ,
"iceland" , "iran" , "israel" , "jamaica" , "japan" , "libya" ,
"mst" , "navajo" , "poland" , "portugal", "prc" , "roc" ,
"rok" , "turkey" , "uct" , "usnavajo", "zulu" ,
static const char* types[30] = {
"aqams" , "camtr" , "cnckg" , "cnhrb" , "cnkhg" , "cuba" ,
"egypt" , "eire" , "est" , "gaza" , "gmt0" , "hongkong",
"hst" , "iceland" , "iran" , "israel" , "jamaica" , "japan" ,
"libya" , "mst" , "navajo" , "poland" , "portugal", "prc" ,
"roc" , "rok" , "turkey" , "uct" , "usnavajo", "zulu" ,
};
static const char* aliases[29] = {
"nzakl" , "cnsha" , "cnsha" , "cnurc" , "cuhav" , "egcai" ,
"iedub" , "utcw05" , "gazastrp", "gmt" , "hkhkg" , "utcw10" ,
"isrey" , "irthr" , "jeruslm" , "jmkin" , "jptyo" , "lytip" ,
"utcw07" , "usden" , "plwaw" , "ptlis" , "cnsha" , "twtpe" ,
"krsel" , "trist" , "utc" , "usden" , "utc" ,
static const char* aliases[30] = {
"nzakl" , "cator" , "cnsha" , "cnsha" , "cnurc" , "cuhav" ,
"egcai" , "iedub" , "utcw05" , "gazastrp", "gmt" , "hkhkg" ,
"utcw10" , "isrey" , "irthr" , "jeruslm" , "jmkin" , "jptyo" ,
"lytip" , "utcw07" , "usden" , "plwaw" , "ptlis" , "cnsha" ,
"twtpe" , "krsel" , "trist" , "utc" , "usden" , "utc" ,
};
return SearchUnicodeReplacement(types, aliases, type);
}

View File

@ -6,7 +6,7 @@
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
""" Usage:
make_intl_data.py langtags [cldr_core.zip]
make_intl_data.py langtags [cldr_common.zip]
make_intl_data.py tzdata
make_intl_data.py currency
make_intl_data.py units
@ -1430,6 +1430,23 @@ def readSupplementalData(core_file):
if (language, script) not in default_replacements
]
# Remove redundant mappings.
#
# For example starting with CLDR 43, the deprecated region "SU" has the
# following non-default replacement entries for "GE":
# - ('sva', None, 'GE')
# - ('sva', 'Cyrl', 'GE')
# - ('sva', 'Latn', 'GE')
#
# The latter two entries are redundant, because they're already handled
# by the first entry.
non_default_replacements = [
(language, script, region)
for (language, script, region) in non_default_replacements
if script is None
or (language, None, region) not in non_default_replacements
]
# If there are no non-default replacements, we can handle the region as
# part of the simple region mapping.
if non_default_replacements:
@ -2052,7 +2069,7 @@ def updateCLDRLangTags(args):
print("\tCLDR version: %s" % version)
print("\tDownload url: %s" % url)
if filename is not None:
print("\tLocal CLDR core.zip file: %s" % filename)
print("\tLocal CLDR common.zip file: %s" % filename)
print("\tOutput file: %s" % out)
print("")
@ -2067,11 +2084,11 @@ def updateCLDRLangTags(args):
print("Processing CLDR data...")
if filename is not None:
print("Always make sure you have the newest CLDR core.zip!")
print("Always make sure you have the newest CLDR common.zip!")
with open(filename, "rb") as cldr_file:
readFiles(cldr_file)
else:
print("Downloading CLDR core.zip...")
print("Downloading CLDR common.zip...")
with closing(urlopen(url)) as cldr_file:
cldr_data = io.BytesIO(cldr_file.read())
readFiles(cldr_data)
@ -4045,7 +4062,7 @@ if __name__ == "__main__":
parser_cldr_tags.add_argument(
"--url",
metavar="URL",
default="https://unicode.org/Public/cldr/<VERSION>/core.zip",
default="https://unicode.org/Public/cldr/<VERSION>/cldr-common-<VERSION>.0.zip",
type=EnsureHttps,
help="Download url CLDR data (default: %(default)s)",
)
@ -4057,7 +4074,7 @@ if __name__ == "__main__":
help="Output file (default: %(default)s)",
)
parser_cldr_tags.add_argument(
"file", nargs="?", help="Local cldr-core.zip file, if omitted uses <URL>"
"file", nargs="?", help="Local cldr-common.zip file, if omitted uses <URL>"
)
parser_cldr_tags.set_defaults(func=updateCLDRLangTags)

File diff suppressed because it is too large Load Diff