mirror of
https://github.com/mozilla/gecko-dev.git
synced 2024-11-23 04:41:11 +00:00
Bug 1648139 - Part 1: Add DateTimeFormat::GetTimeSeparator(). r=platform-i18n-reviewers,dminor
ICU doesn't provide a public API to retrieve the time separator, so we have to read it manually from the resource bundles. Differential Revision: https://phabricator.services.mozilla.com/D152744
This commit is contained in:
parent
263389e482
commit
af23be36d7
@ -633,4 +633,75 @@ TEST(IntlDateTimeFormat, SetStartTimeIfGregorian)
|
||||
ASSERT_TRUE(buffer.verboseMatches(Jan01_1583));
|
||||
}
|
||||
}
|
||||
|
||||
TEST(IntlDateTimeFormat, GetTimeSeparator)
|
||||
{
|
||||
struct TestData {
|
||||
const char* locale;
|
||||
const char* numberingSystem;
|
||||
const char16_t* expected;
|
||||
} testData[] = {
|
||||
{"root", "latn", u":"},
|
||||
{"root", "arab", u":"},
|
||||
{"root", "thai", u":"},
|
||||
{"root", "arabext", u"٫"},
|
||||
|
||||
// English uses the same data as the root locale.
|
||||
{"en", "latn", u":"},
|
||||
{"en", "arab", u":"},
|
||||
{"en", "thai", u":"},
|
||||
{"en", "arabext", u"٫"},
|
||||
|
||||
// Spanish uses the same data as the root locale.
|
||||
{"es", "latn", u":"},
|
||||
{"es", "arab", u":"},
|
||||
{"es", "thai", u":"},
|
||||
{"es", "arabext", u"٫"},
|
||||
|
||||
// German (Austria) uses the same data as the root locale.
|
||||
{"de-AT", "latn", u":"},
|
||||
{"de-AT", "arab", u":"},
|
||||
{"de-AT", "thai", u":"},
|
||||
{"de-AT", "arabext", u"٫"},
|
||||
|
||||
// Danish has a different time separator for "latn".
|
||||
{"da", "latn", u"."},
|
||||
{"da", "arab", u":"},
|
||||
{"da", "thai", u"."},
|
||||
{"da", "arabext", u"٫"},
|
||||
|
||||
// Same time separator as Danish.
|
||||
{"en-DK", "latn", u"."},
|
||||
{"en-DK", "arab", u":"},
|
||||
{"en-DK", "thai", u"."},
|
||||
{"en-DK", "arabext", u"٫"},
|
||||
|
||||
// Norwegian overrides time separators for "arab" and "arabext".
|
||||
{"no", "latn", u":"},
|
||||
{"no", "arab", u"."},
|
||||
{"no", "thai", u":"},
|
||||
{"no", "arabext", u"."},
|
||||
|
||||
// Parent locale of Bokmål is Norwegian.
|
||||
{"nb", "latn", u":"},
|
||||
{"nb", "arab", u"."},
|
||||
{"nb", "thai", u":"},
|
||||
{"nb", "arabext", u"."},
|
||||
|
||||
// Farsi overrides the time separator for "arabext".
|
||||
{"fa", "latn", u":"},
|
||||
{"fa", "arab", u":"},
|
||||
{"fa", "thai", u":"},
|
||||
{"fa", "arabext", u":"},
|
||||
};
|
||||
|
||||
for (const auto& data : testData) {
|
||||
TestBuffer<char16_t> timeSeparator;
|
||||
auto timeSeparatorResult = DateTimeFormat::GetTimeSeparator(
|
||||
MakeStringSpan(data.locale), MakeStringSpan(data.numberingSystem),
|
||||
timeSeparator);
|
||||
ASSERT_TRUE(timeSeparatorResult.isOk());
|
||||
ASSERT_TRUE(timeSeparator.verboseMatches(data.expected));
|
||||
}
|
||||
}
|
||||
} // namespace mozilla::intl
|
||||
|
@ -2,6 +2,7 @@
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstring>
|
||||
|
||||
#include "unicode/ucal.h"
|
||||
@ -12,6 +13,7 @@
|
||||
#include "DateTimeFormatUtils.h"
|
||||
#include "ScopedICUObject.h"
|
||||
|
||||
#include "mozilla/Buffer.h"
|
||||
#include "mozilla/EnumSet.h"
|
||||
#include "mozilla/intl/Calendar.h"
|
||||
#include "mozilla/intl/DateTimeFormat.h"
|
||||
@ -804,6 +806,181 @@ DateTimeFormat::GetAllowedHourCycles(Span<const char> aLanguage,
|
||||
return result;
|
||||
}
|
||||
|
||||
template <typename CharT>
|
||||
static Result<Buffer<char>, ICUError> DuplicateChars(Span<CharT> aView) {
|
||||
auto chars = MakeUnique<char[]>(aView.Length() + 1);
|
||||
std::copy_n(aView.Elements(), aView.Length(), chars.get());
|
||||
chars[aView.Length()] = '\0';
|
||||
return Buffer{std::move(chars), aView.Length()};
|
||||
}
|
||||
|
||||
static Result<Buffer<char>, ICUError> GetParentLocale(
|
||||
const UResourceBundle* aLocaleBundle) {
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
|
||||
// First check for an explicit parent locale using the "%%Parent" key.
|
||||
int32_t length = 0;
|
||||
const char16_t* parent =
|
||||
ures_getStringByKey(aLocaleBundle, "%%Parent", &length, &status);
|
||||
if (status == U_MISSING_RESOURCE_ERROR) {
|
||||
status = U_ZERO_ERROR;
|
||||
parent = nullptr;
|
||||
}
|
||||
if (U_FAILURE(status)) {
|
||||
return Err(ToICUError(status));
|
||||
}
|
||||
if (parent) {
|
||||
return DuplicateChars(Span{parent, size_t(length)});
|
||||
}
|
||||
|
||||
// Retrieve the actual locale of the resource bundle.
|
||||
const char* locale =
|
||||
ures_getLocaleByType(aLocaleBundle, ULOC_ACTUAL_LOCALE, &status);
|
||||
if (U_FAILURE(status)) {
|
||||
return Err(ToICUError(status));
|
||||
}
|
||||
|
||||
// Strip off the last subtag, if possible.
|
||||
if (const char* sep = std::strrchr(locale, '_')) {
|
||||
return DuplicateChars(Span{locale, size_t(sep - locale)});
|
||||
}
|
||||
|
||||
// The parent locale of all locales is "root".
|
||||
if (std::strcmp(locale, "root") != 0) {
|
||||
static constexpr auto root = MakeStringSpan("root");
|
||||
return DuplicateChars(root);
|
||||
}
|
||||
|
||||
// "root" itself doesn't have a parent locale.
|
||||
static constexpr auto empty = MakeStringSpan("");
|
||||
return DuplicateChars(empty);
|
||||
}
|
||||
|
||||
static Result<Span<const char16_t>, ICUError> FindTimeSeparator(
|
||||
Span<const char> aRequestedLocale, Span<const char> aLocale,
|
||||
Span<const char> aNumberingSystem) {
|
||||
// We didn't find the numbering system. Retry using the default numbering
|
||||
// system "latn". (We don't use the default numbering system of the requested
|
||||
// locale to match ICU.)
|
||||
if (aLocale == MakeStringSpan("")) {
|
||||
return FindTimeSeparator(aRequestedLocale, aRequestedLocale, "latn");
|
||||
}
|
||||
|
||||
// First open the resource bundle of the input locale.
|
||||
//
|
||||
// Note: ICU's resource API accepts both Unicode CLDR locale identifiers and
|
||||
// Unicode BCP 47 locale identifiers, so we don't have to convert the input
|
||||
// into a Unicode CLDR locale identifier.
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
UResourceBundle* localeBundle =
|
||||
ures_open(nullptr, AssertNullTerminatedString(aLocale), &status);
|
||||
if (U_FAILURE(status)) {
|
||||
return Err(ToICUError(status));
|
||||
}
|
||||
ScopedICUObject<UResourceBundle, ures_close> closeLocaleBundle(localeBundle);
|
||||
|
||||
do {
|
||||
// Search for the "NumberElements" table. Fall back to the parent locale if
|
||||
// no "NumberElements" table is present.
|
||||
UResourceBundle* numberElements =
|
||||
ures_getByKey(localeBundle, "NumberElements", nullptr, &status);
|
||||
if (status == U_MISSING_RESOURCE_ERROR) {
|
||||
break;
|
||||
}
|
||||
if (U_FAILURE(status)) {
|
||||
return Err(ToICUError(status));
|
||||
}
|
||||
ScopedICUObject<UResourceBundle, ures_close> closeNumberElements(
|
||||
numberElements);
|
||||
|
||||
// Search for the table of the requested numbering system. Fall back to the
|
||||
// parent locale if no table was found.
|
||||
UResourceBundle* numberingSystem = ures_getByKey(
|
||||
numberElements, AssertNullTerminatedString(aNumberingSystem), nullptr,
|
||||
&status);
|
||||
if (status == U_MISSING_RESOURCE_ERROR) {
|
||||
break;
|
||||
}
|
||||
if (U_FAILURE(status)) {
|
||||
return Err(ToICUError(status));
|
||||
}
|
||||
ScopedICUObject<UResourceBundle, ures_close> closeNumberingSystem(
|
||||
numberingSystem);
|
||||
|
||||
// Search for the "symbols" table. Fall back to the parent locale if no
|
||||
// "symbols" table is present.
|
||||
UResourceBundle* symbols =
|
||||
ures_getByKey(numberingSystem, "symbols", nullptr, &status);
|
||||
if (status == U_MISSING_RESOURCE_ERROR) {
|
||||
break;
|
||||
}
|
||||
if (U_FAILURE(status)) {
|
||||
return Err(ToICUError(status));
|
||||
}
|
||||
ScopedICUObject<UResourceBundle, ures_close> closeSymbols(symbols);
|
||||
|
||||
// And finally look up the "timeSeparator" string in the "symbols" table. If
|
||||
// the string isn't present, fall back to the parent locale.
|
||||
int32_t length = 0;
|
||||
const UChar* str =
|
||||
ures_getStringByKey(symbols, "timeSeparator", &length, &status);
|
||||
if (status == U_MISSING_RESOURCE_ERROR) {
|
||||
break;
|
||||
}
|
||||
if (U_FAILURE(status)) {
|
||||
return Err(ToICUError(status));
|
||||
}
|
||||
|
||||
Span<const char16_t> timeSeparator{str, size_t(length)};
|
||||
|
||||
static constexpr auto defaultTimeSeparator = MakeStringSpan(u":");
|
||||
|
||||
// Many numbering systems don't define their own symbols, but instead link
|
||||
// to the symbols for "latn" of the requested locale. The link is performed
|
||||
// through an alias entry like:
|
||||
// `symbols:alias{"/LOCALE/NumberElements/latn/symbols"}`
|
||||
//
|
||||
// ICU doesn't provide a public API to detect these alias entries, but
|
||||
// instead always automatically resolves the link. But that leads to
|
||||
// incorrectly using the symbols from the "root" locale instead of the
|
||||
// requested locale.
|
||||
//
|
||||
// Thankfully these alias entries are only present on the "root" locale. So
|
||||
// we are using this heuristic to detect alias entries:
|
||||
//
|
||||
// - If the resolved time separator is the default time separator ":".
|
||||
// - The current locale is "root".
|
||||
// - And the numbering system is neither "latn" nor "arab".
|
||||
// - Then search the time separator for "latn" of the requested locale.
|
||||
//
|
||||
// We have to exclude "arab", because it's also using ":" for the time
|
||||
// separator, but doesn't use an alias link to "latn".
|
||||
if (timeSeparator == defaultTimeSeparator &&
|
||||
aLocale == MakeStringSpan("root") &&
|
||||
aNumberingSystem != MakeStringSpan("latn") &&
|
||||
aNumberingSystem != MakeStringSpan("arab")) {
|
||||
return FindTimeSeparator(aRequestedLocale, aRequestedLocale,
|
||||
MakeStringSpan("latn"));
|
||||
}
|
||||
|
||||
return timeSeparator;
|
||||
} while (false);
|
||||
|
||||
// Fall back to the parent locale.
|
||||
auto parent = GetParentLocale(localeBundle);
|
||||
if (parent.isErr()) {
|
||||
return parent.propagateErr();
|
||||
}
|
||||
return FindTimeSeparator(aRequestedLocale, parent.inspect().AsSpan(),
|
||||
aNumberingSystem);
|
||||
}
|
||||
|
||||
/* static */
|
||||
Result<Span<const char16_t>, ICUError> DateTimeFormat::GetTimeSeparator(
|
||||
Span<const char> aLocale, Span<const char> aNumberingSystem) {
|
||||
return FindTimeSeparator(aLocale, aLocale, aNumberingSystem);
|
||||
}
|
||||
|
||||
Result<DateTimeFormat::ComponentsBag, ICUError>
|
||||
DateTimeFormat::ResolveComponents() {
|
||||
// Maps an ICU pattern string to a corresponding set of date-time components
|
||||
|
@ -537,6 +537,24 @@ class DateTimeFormat final {
|
||||
udat_getAvailable>();
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the time separator for the given locale and numbering system.
|
||||
*/
|
||||
template <typename B>
|
||||
static ICUResult GetTimeSeparator(Span<const char> aLocale,
|
||||
Span<const char> aNumberingSystem,
|
||||
B& aBuffer) {
|
||||
static_assert(std::is_same_v<typename B::CharType, char16_t>);
|
||||
auto separator = GetTimeSeparator(aLocale, aNumberingSystem);
|
||||
if (separator.isErr()) {
|
||||
return separator.propagateErr();
|
||||
}
|
||||
if (!FillBuffer(separator.unwrap(), aBuffer)) {
|
||||
return Err(ICUError::OutOfMemory);
|
||||
}
|
||||
return Ok();
|
||||
}
|
||||
|
||||
private:
|
||||
explicit DateTimeFormat(UDateFormat* aDateFormat);
|
||||
|
||||
@ -583,6 +601,9 @@ class DateTimeFormat final {
|
||||
DateTimeFormat::PatternVector& aPattern, bool aHour12,
|
||||
DateTimeFormat::SkeletonVector& aSkeleton);
|
||||
|
||||
static Result<Span<const char16_t>, ICUError> GetTimeSeparator(
|
||||
Span<const char> aLocale, Span<const char> aNumberingSystem);
|
||||
|
||||
UDateFormat* mDateFormat = nullptr;
|
||||
|
||||
SkeletonVector mOriginalSkeleton;
|
||||
|
Loading…
Reference in New Issue
Block a user