Bug 1719550 - Add support for BCP 47 extensions to mozilla::intl::Collator; r=platform-i18n-reviewers,nordzilla

SpiderMonkey requires the BCP 47 locale extensions, which involves iterating
over the keywords in ICU, and mapping them to the BCP 47 version. Specifically,
this will change the "phonebook" keyword to "phonebk". This should hopefully
expose a simpler API to SpiderMonkey, the only consumer.

Differential Revision: https://phabricator.services.mozilla.com/D120903
This commit is contained in:
Greg Tatum 2021-08-10 11:46:34 +00:00
parent 0087d035b9
commit 1fb2daab2a
3 changed files with 81 additions and 0 deletions

View File

@ -190,4 +190,40 @@ TEST(IntlCollator, IgnorePunctuation)
ASSERT_EQ(collator->CompareStrings(u"aa", u".bb"), 1);
}
TEST(IntlCollator, GetBcp47KeywordValuesForLocale)
{
auto result = Collator::TryCreate("en-US");
ASSERT_TRUE(result.isOk());
auto collator = result.unwrap();
auto extsResult = collator->GetBcp47KeywordValuesForLocale("de");
ASSERT_TRUE(extsResult.isOk());
auto extensions = extsResult.unwrap();
// Since this list is dependent on ICU, and may change between upgrades, only
// test a subset of the keywords.
auto standard = MakeStringSpan("standard");
auto search = MakeStringSpan("search");
auto phonebk = MakeStringSpan("phonebk"); // Valid BCP 47.
auto phonebook = MakeStringSpan("phonebook"); // Not valid BCP 47.
bool hasStandard = false;
bool hasSearch = false;
bool hasPhonebk = false;
bool hasPhonebook = false;
for (auto extensionResult : extensions) {
ASSERT_TRUE(extensionResult.isOk());
auto extension = extensionResult.unwrap();
hasStandard |= extension == standard;
hasSearch |= extension == search;
hasPhonebk |= extension == phonebk;
hasPhonebook |= extension == phonebook;
}
ASSERT_TRUE(hasStandard);
ASSERT_TRUE(hasSearch);
ASSERT_TRUE(hasPhonebk);
ASSERT_FALSE(hasPhonebook); // Not valid BCP 47.
}
} // namespace mozilla::intl

View File

@ -250,4 +250,27 @@ ICUResult Collator::SetOptions(const Options& aOptions,
#undef FEATURE_TO_ICU
/* static */
Result<Collator::Bcp47ExtEnumeration, InternalError>
Collator::GetBcp47KeywordValuesForLocale(const char* aLocale) {
UErrorCode status = U_ZERO_ERROR;
UEnumeration* enumeration = ucol_getKeywordValuesForLocale(
"collator", aLocale, /* commonlyUsed */ false, &status);
if (U_SUCCESS(status)) {
return Bcp47ExtEnumeration(enumeration);
}
return Err(InternalError{});
}
/* static */
SpanResult<char> Collator::KeywordValueToBcp47Extension(const char* aKeyword,
int32_t aLength) {
if (aKeyword == nullptr) {
return Err(InternalError{});
}
return MakeStringSpan(uloc_toUnicodeLocaleType("co", aKeyword));
}
} // namespace mozilla::intl

View File

@ -134,6 +134,28 @@ class Collator final {
ICUResult SetOptions(const Options& aOptions,
const Maybe<Options&> aPrevOptions = Nothing());
/**
* Map keywords to their BCP 47 equivalents.
*/
static SpanResult<char> KeywordValueToBcp47Extension(const char* aKeyword,
int32_t aLength);
using Bcp47ExtEnumeration =
Enumeration<char, SpanResult<char>,
Collator::KeywordValueToBcp47Extension>;
/**
* Returns an iterator of collator locale extensions in the preferred order.
* These extensions can be used in BCP 47 locales. For instance this
* iterator could return "phonebk" and could be appled to the German locale
* "de" as "de-co-phonebk" for a phonebook-style collation.
*
* The collation extensions can be found here:
* http://cldr.unicode.org/core-spec/#Key_Type_Definitions
*/
static Result<Bcp47ExtEnumeration, InternalError>
GetBcp47KeywordValuesForLocale(const char* aLocale);
private:
/**
* Toggle features, or use the default setting.