diff --git a/intl/locale/Quotes.cpp b/intl/locale/Quotes.cpp new file mode 100644 index 000000000000..9379f18a14ec --- /dev/null +++ b/intl/locale/Quotes.cpp @@ -0,0 +1,78 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode:nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "Quotes.h" +#include "MozLocale.h" +#include "mozilla/ClearOnShutdown.h" +#include "mozilla/StaticPtr.h" +#include "nsDataHashtable.h" +#include "nsPrintfCString.h" + +using namespace mozilla; +using namespace mozilla::intl; + +namespace { +struct LangQuotesRec { + const char* mLangs; + Quotes mQuotes; +}; + +#include "cldr-quotes.inc" + +static StaticAutoPtr> sQuotesForLang; +} // anonymous namespace + +namespace mozilla { +namespace intl { + +const Quotes* QuotesForLang(const nsAtom* aLang) { + MOZ_ASSERT(NS_IsMainThread()); + + // On first use, initialize the hashtable from our CLDR-derived data array. + if (!sQuotesForLang) { + sQuotesForLang = new nsDataHashtable(32); + ClearOnShutdown(&sQuotesForLang); + for (const auto& i : sLangQuotes) { + const char* s = i.mLangs; + size_t len; + while ((len = strlen(s))) { + sQuotesForLang->Put(nsDependentCString(s, len), i.mQuotes); + s += len + 1; + } + } + } + + nsAtomCString langStr(aLang); + const Quotes* entry = sQuotesForLang->GetValue(langStr); + if (entry) { + // Found an exact match for the requested lang. + return entry; + } + + // Try parsing lang as a Locale (which will also canonicalize case of the + // subtags), then see if we can match it with region or script subtags, + // if present, or just the primary language tag. + Locale loc(langStr); + if (loc.IsWellFormed()) { + if (!loc.GetRegion().IsEmpty() && + (entry = sQuotesForLang->GetValue(nsPrintfCString( + "%s-%s", loc.GetLanguage().get(), loc.GetRegion().get())))) { + return entry; + } + if (!loc.GetScript().IsEmpty() && + (entry = sQuotesForLang->GetValue(nsPrintfCString( + "%s-%s", loc.GetLanguage().get(), loc.GetScript().get())))) { + return entry; + } + if ((entry = sQuotesForLang->GetValue(loc.GetLanguage()))) { + return entry; + } + } + + return nullptr; +} + +} // namespace intl +} // namespace mozilla diff --git a/intl/locale/Quotes.h b/intl/locale/Quotes.h new file mode 100644 index 000000000000..7bd7d277ca27 --- /dev/null +++ b/intl/locale/Quotes.h @@ -0,0 +1,35 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode:nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef mozilla_intl_Quotes_h__ +#define mozilla_intl_Quotes_h__ + +#include "nsAtom.h" + +namespace mozilla { +namespace intl { + +// Currently, all the quotation characters provided by CLDR are single BMP +// codepoints, so they fit into char16_t fields. If there are ever multi- +// character strings or non-BMP codepoints in a future version, we'll need +// to extend this to a larger/more flexible structure, but for now it's +// deliberately kept simple and lightweight. +struct Quotes { + // Entries in order [open, close, alternativeOpen, alternativeClose] + char16_t mChars[4]; +}; + +/** + * Return a pointer to the Quotes record for the given locale (lang attribute), + * or nullptr if none available. + * The returned value points to a hashtable entry, but will remain valid until + * shutdown begins, as the table is not modified after initialization. + */ +const Quotes* QuotesForLang(const nsAtom* aLang); + +} // namespace intl +} // namespace mozilla + +#endif // mozilla_intl_Quotes_h__ diff --git a/intl/locale/cldr-quotes.inc b/intl/locale/cldr-quotes.inc new file mode 100644 index 000000000000..bc2d9f9de285 --- /dev/null +++ b/intl/locale/cldr-quotes.inc @@ -0,0 +1,45 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* + * Derived from the Unicode Common Locale Data Repository by cldr-quotes.pl. + * + * For terms of use, see http://www.unicode.org/copyright.html. + */ + +/* + * Created on Mon Jul 8 20:11:49 2019 from CLDR data file cldr-common-35.1.zip. + * + * * * * * This file contains MACHINE-GENERATED DATA, do not edit! * * * * * + * + * (generated by intl/locale/cldr-quotes.pl) + */ + +static const LangQuotesRec sLangQuotes[] = { + // clang-format off + { "af\0ak\0as\0asa\0az\0bem\0bez\0bn\0brx\0ccp\0ceb\0cgg\0chr\0cy\0da\0dav\0dje\0dz\0ebu\0ee\0en\0fil\0fo\0ga\0gd\0gl\0gu\0guz\0ha\0id\0ig\0jmc\0jv\0kam\0kde\0kea\0khq\0ki\0kln\0km\0kn\0kok\0ksb\0ku\0lg\0ln\0lo\0lrc\0lu\0luo\0lv\0mas\0mer\0mfe\0mgo\0mi\0ml\0mn\0mr\0ms\0mt\0my\0naq\0nd\0ne\0nus\0nyn\0or\0ps\0rof\0rwk\0saq\0sbp\0sd\0seh\0ses\0si\0so\0sw\0te\0teo\0th\0to\0tr\0tt\0twq\0tzm\0uz-Cyrl\0vai\0vun\0wo\0xog\0yo\0yue-Hans\0zh\0zu\0", { { 0x201c, 0x201d, 0x2018, 0x2019 } } }, + { "agq\0ff\0", { { 0x201e, 0x201d, 0x201a, 0x2019 } } }, + { "am\0az-Cyrl\0fa\0fr-CH\0gsw\0jgo\0kkj\0mzn\0", { { 0xab, 0xbb, 0x2039, 0x203a } } }, + { "ar\0ur\0", { { 0x201d, 0x201c, 0x2019, 0x2018 } } }, + { "ast\0bm\0ca\0dyo\0el\0es\0ewo\0it\0kab\0kk\0mg\0mua\0nnh\0pt-PT\0sg\0sq\0", { { 0xab, 0xbb, 0x201c, 0x201d } } }, + { "bas\0be\0ky\0ru\0sah\0uk\0", { { 0xab, 0xbb, 0x201e, 0x201c } } }, + { "bg\0lt\0", { { 0x201e, 0x201c, 0x201e, 0x201c } } }, + { "br\0", { { 0x201c, 0x201d, 0xab, 0xbb } } }, + { "bs-Cyrl\0cs\0de\0dsb\0et\0hr\0hsb\0is\0lb\0luy\0mk\0sk\0sl\0", { { 0x201e, 0x201c, 0x201a, 0x2018 } } }, + { "bs\0", { { 0x201e, 0x201d, 0x2018, 0x2019 } } }, + { "dua\0ksf\0nb\0nn\0rw\0", { { 0xab, 0xbb, 0x2018, 0x2019 } } }, + { "es-419\0eu\0tk\0", { { 0x201c, 0x201d, 0x201c, 0x201d } } }, + { "fi\0he\0lag\0rn\0sn\0sv\0", { { 0x201d, 0x201d, 0x2019, 0x2019 } } }, + { "fr-CA\0", { { 0xab, 0xbb, 0x201d, 0x201c } } }, + { "fr\0hy\0tg\0yav\0", { { 0xab, 0xbb, 0xab, 0xbb } } }, + { "hu\0", { { 0x201e, 0x201d, 0xbb, 0xab } } }, + { "ia\0nl\0ti-ER\0xh\0", { { 0x2018, 0x2019, 0x201c, 0x201d } } }, + { "ja\0yue\0zh-Hant\0", { { 0x300c, 0x300d, 0x300e, 0x300f } } }, + { "ka\0", { { 0x201e, 0x201c, 0xab, 0xbb } } }, + { "nmg\0pl\0ro\0", { { 0x201e, 0x201d, 0xab, 0xbb } } }, + { "shi\0zgh\0", { { 0xab, 0xbb, 0x201e, 0x201d } } }, + { "sr\0", { { 0x201e, 0x201c, 0x2018, 0x2018 } } }, + { "uz\0", { { 0x201c, 0x201d, 0x2019, 0x2018 } } }, + // clang-format on +}; diff --git a/intl/locale/cldr-quotes.pl b/intl/locale/cldr-quotes.pl new file mode 100644 index 000000000000..33164bbadeff --- /dev/null +++ b/intl/locale/cldr-quotes.pl @@ -0,0 +1,104 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +# Tool to generate the cldr-quotes.inc file, to be #include'd in Quotes.cpp +# to provide locale-appropriate opening and closing quote marks. + +# To regenerate cldr-quotes.inc for a new CLDR release, download the data file +# "cldr-common-##.zip" from http://unicode.org/Public/cldr/latest into the +# current directory, update the $filename variable below accordingly, run +# +# perl cldr-quotes.pl > cldr-quotes.inc +# +# then use `hg diff` to check that the result looks sane. + +use warnings; +use strict; + +use Encode; +use IO::Uncompress::Unzip; + +my $filename = 'cldr-common-35.1.zip'; + +my (%langQuotes, %quoteLangs); + +my $zip = IO::Uncompress::Unzip->new($filename) || + die "unzip failed: $IO::Uncompress::Unzip::UnzipError\n"; +my $status = 1; +while ($status > 0) { + my $name = $zip->getHeaderInfo()->{Name}; + if ($name =~ m@common/main/([A-Za-z0-9_]+)\.xml@) { + my $lang = $1; + $lang =~ s/_/-/; + while (<$zip>) { + $langQuotes{$lang}[0] = $1 if (m!(.+)(.+)(.+)(.+)nextStream(); +} +$zip->close; + +foreach my $lang (sort keys %langQuotes) { + # We don't actually want to emit anything for the root locale + next if $lang eq "root"; + + # Inherit any missing entries from the locale's parent + my $parent = $lang; + while ($parent =~ m/\-/) { + # Strip off a trailing subtag to find a parent locale code + $parent =~ s/\-[^-]+$//; + # Fill in any values available from the parent + for (my $i = 0; $i < 4; $i++) { + $langQuotes{$lang}[$i] = $langQuotes{$parent}[$i] unless $langQuotes{$lang}[$i]; + } + } + + # Anything still missing is copied from the root locale + for (my $i = 0; $i < 4; $i++) { + $langQuotes{$lang}[$i] = $langQuotes{"root"}[$i] unless $langQuotes{$lang}[$i]; + } + + # If the locale ends up the same as its parent, skip + next if ($parent ne $lang) && (exists $langQuotes{$parent}) && + (join(",", @{$langQuotes{$lang}}) eq join(",", @{$langQuotes{$parent}})); + + # Create a string with the C source form for the array of 4 quote characters + my $quoteChars = join(", ", map { sprintf("0x%x", ord Encode::decode("UTF-8", $_)) } @{$langQuotes{$lang}}); + + # Record this locale in the list of those which use this particular set of quotes + $quoteLangs{$quoteChars} = [] unless exists $quoteLangs{$quoteChars}; + push $quoteLangs{$quoteChars}, $lang; +} + +# Output each unique list of quotes, with the string of associated locales +my $timestamp = gmtime(); +print <<__EOT__; +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* + * Derived from the Unicode Common Locale Data Repository by cldr-quotes.pl. + * + * For terms of use, see http://www.unicode.org/copyright.html. + */ + +/* + * Created on $timestamp from CLDR data file $filename. + * + * * * * * This file contains MACHINE-GENERATED DATA, do not edit! * * * * * + * + * (generated by intl/locale/cldr-quotes.pl) + */ + +__EOT__ + +print "static const LangQuotesRec sLangQuotes[] = {\n"; +print " // clang-format off\n"; +print sort map { sprintf(" { \"%s\\0\", { { %s } } },\n", join("\\0", sort @{$quoteLangs{$_}}), $_) } (keys %quoteLangs); +print " // clang-format on\n"; +print "};\n"; diff --git a/intl/locale/moz.build b/intl/locale/moz.build index fe97934a33af..8587ed4bfe0c 100644 --- a/intl/locale/moz.build +++ b/intl/locale/moz.build @@ -36,6 +36,7 @@ EXPORTS.mozilla.intl += [ 'LocaleService.h', 'MozLocale.h', 'OSPreferences.h', + 'Quotes.h', ] UNIFIED_SOURCES += [ @@ -47,6 +48,7 @@ UNIFIED_SOURCES += [ 'nsLanguageAtomService.cpp', 'nsUConvPropertySearch.cpp', 'OSPreferences.cpp', + 'Quotes.cpp', ] EXTRA_JS_MODULES += [