mirror of
https://github.com/mozilla/gecko-dev.git
synced 2024-11-23 04:41:11 +00:00
Bug 1648139 - Part 1: Add DateTimeFormat::GetTimeSeparator(). r=platform-i18n-reviewers,dminor
ICU doesn't provide a public API to retrieve the time separator, so we have to read it manually from the resource bundles. Differential Revision: https://phabricator.services.mozilla.com/D152744
This commit is contained in:
parent
263389e482
commit
af23be36d7
@ -633,4 +633,75 @@ TEST(IntlDateTimeFormat, SetStartTimeIfGregorian)
|
|||||||
ASSERT_TRUE(buffer.verboseMatches(Jan01_1583));
|
ASSERT_TRUE(buffer.verboseMatches(Jan01_1583));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST(IntlDateTimeFormat, GetTimeSeparator)
|
||||||
|
{
|
||||||
|
struct TestData {
|
||||||
|
const char* locale;
|
||||||
|
const char* numberingSystem;
|
||||||
|
const char16_t* expected;
|
||||||
|
} testData[] = {
|
||||||
|
{"root", "latn", u":"},
|
||||||
|
{"root", "arab", u":"},
|
||||||
|
{"root", "thai", u":"},
|
||||||
|
{"root", "arabext", u"٫"},
|
||||||
|
|
||||||
|
// English uses the same data as the root locale.
|
||||||
|
{"en", "latn", u":"},
|
||||||
|
{"en", "arab", u":"},
|
||||||
|
{"en", "thai", u":"},
|
||||||
|
{"en", "arabext", u"٫"},
|
||||||
|
|
||||||
|
// Spanish uses the same data as the root locale.
|
||||||
|
{"es", "latn", u":"},
|
||||||
|
{"es", "arab", u":"},
|
||||||
|
{"es", "thai", u":"},
|
||||||
|
{"es", "arabext", u"٫"},
|
||||||
|
|
||||||
|
// German (Austria) uses the same data as the root locale.
|
||||||
|
{"de-AT", "latn", u":"},
|
||||||
|
{"de-AT", "arab", u":"},
|
||||||
|
{"de-AT", "thai", u":"},
|
||||||
|
{"de-AT", "arabext", u"٫"},
|
||||||
|
|
||||||
|
// Danish has a different time separator for "latn".
|
||||||
|
{"da", "latn", u"."},
|
||||||
|
{"da", "arab", u":"},
|
||||||
|
{"da", "thai", u"."},
|
||||||
|
{"da", "arabext", u"٫"},
|
||||||
|
|
||||||
|
// Same time separator as Danish.
|
||||||
|
{"en-DK", "latn", u"."},
|
||||||
|
{"en-DK", "arab", u":"},
|
||||||
|
{"en-DK", "thai", u"."},
|
||||||
|
{"en-DK", "arabext", u"٫"},
|
||||||
|
|
||||||
|
// Norwegian overrides time separators for "arab" and "arabext".
|
||||||
|
{"no", "latn", u":"},
|
||||||
|
{"no", "arab", u"."},
|
||||||
|
{"no", "thai", u":"},
|
||||||
|
{"no", "arabext", u"."},
|
||||||
|
|
||||||
|
// Parent locale of Bokmål is Norwegian.
|
||||||
|
{"nb", "latn", u":"},
|
||||||
|
{"nb", "arab", u"."},
|
||||||
|
{"nb", "thai", u":"},
|
||||||
|
{"nb", "arabext", u"."},
|
||||||
|
|
||||||
|
// Farsi overrides the time separator for "arabext".
|
||||||
|
{"fa", "latn", u":"},
|
||||||
|
{"fa", "arab", u":"},
|
||||||
|
{"fa", "thai", u":"},
|
||||||
|
{"fa", "arabext", u":"},
|
||||||
|
};
|
||||||
|
|
||||||
|
for (const auto& data : testData) {
|
||||||
|
TestBuffer<char16_t> timeSeparator;
|
||||||
|
auto timeSeparatorResult = DateTimeFormat::GetTimeSeparator(
|
||||||
|
MakeStringSpan(data.locale), MakeStringSpan(data.numberingSystem),
|
||||||
|
timeSeparator);
|
||||||
|
ASSERT_TRUE(timeSeparatorResult.isOk());
|
||||||
|
ASSERT_TRUE(timeSeparator.verboseMatches(data.expected));
|
||||||
|
}
|
||||||
|
}
|
||||||
} // namespace mozilla::intl
|
} // namespace mozilla::intl
|
||||||
|
@ -2,6 +2,7 @@
|
|||||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||||
|
|
||||||
|
#include <algorithm>
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
|
|
||||||
#include "unicode/ucal.h"
|
#include "unicode/ucal.h"
|
||||||
@ -12,6 +13,7 @@
|
|||||||
#include "DateTimeFormatUtils.h"
|
#include "DateTimeFormatUtils.h"
|
||||||
#include "ScopedICUObject.h"
|
#include "ScopedICUObject.h"
|
||||||
|
|
||||||
|
#include "mozilla/Buffer.h"
|
||||||
#include "mozilla/EnumSet.h"
|
#include "mozilla/EnumSet.h"
|
||||||
#include "mozilla/intl/Calendar.h"
|
#include "mozilla/intl/Calendar.h"
|
||||||
#include "mozilla/intl/DateTimeFormat.h"
|
#include "mozilla/intl/DateTimeFormat.h"
|
||||||
@ -804,6 +806,181 @@ DateTimeFormat::GetAllowedHourCycles(Span<const char> aLanguage,
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <typename CharT>
|
||||||
|
static Result<Buffer<char>, ICUError> DuplicateChars(Span<CharT> aView) {
|
||||||
|
auto chars = MakeUnique<char[]>(aView.Length() + 1);
|
||||||
|
std::copy_n(aView.Elements(), aView.Length(), chars.get());
|
||||||
|
chars[aView.Length()] = '\0';
|
||||||
|
return Buffer{std::move(chars), aView.Length()};
|
||||||
|
}
|
||||||
|
|
||||||
|
static Result<Buffer<char>, ICUError> GetParentLocale(
|
||||||
|
const UResourceBundle* aLocaleBundle) {
|
||||||
|
UErrorCode status = U_ZERO_ERROR;
|
||||||
|
|
||||||
|
// First check for an explicit parent locale using the "%%Parent" key.
|
||||||
|
int32_t length = 0;
|
||||||
|
const char16_t* parent =
|
||||||
|
ures_getStringByKey(aLocaleBundle, "%%Parent", &length, &status);
|
||||||
|
if (status == U_MISSING_RESOURCE_ERROR) {
|
||||||
|
status = U_ZERO_ERROR;
|
||||||
|
parent = nullptr;
|
||||||
|
}
|
||||||
|
if (U_FAILURE(status)) {
|
||||||
|
return Err(ToICUError(status));
|
||||||
|
}
|
||||||
|
if (parent) {
|
||||||
|
return DuplicateChars(Span{parent, size_t(length)});
|
||||||
|
}
|
||||||
|
|
||||||
|
// Retrieve the actual locale of the resource bundle.
|
||||||
|
const char* locale =
|
||||||
|
ures_getLocaleByType(aLocaleBundle, ULOC_ACTUAL_LOCALE, &status);
|
||||||
|
if (U_FAILURE(status)) {
|
||||||
|
return Err(ToICUError(status));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Strip off the last subtag, if possible.
|
||||||
|
if (const char* sep = std::strrchr(locale, '_')) {
|
||||||
|
return DuplicateChars(Span{locale, size_t(sep - locale)});
|
||||||
|
}
|
||||||
|
|
||||||
|
// The parent locale of all locales is "root".
|
||||||
|
if (std::strcmp(locale, "root") != 0) {
|
||||||
|
static constexpr auto root = MakeStringSpan("root");
|
||||||
|
return DuplicateChars(root);
|
||||||
|
}
|
||||||
|
|
||||||
|
// "root" itself doesn't have a parent locale.
|
||||||
|
static constexpr auto empty = MakeStringSpan("");
|
||||||
|
return DuplicateChars(empty);
|
||||||
|
}
|
||||||
|
|
||||||
|
static Result<Span<const char16_t>, ICUError> FindTimeSeparator(
|
||||||
|
Span<const char> aRequestedLocale, Span<const char> aLocale,
|
||||||
|
Span<const char> aNumberingSystem) {
|
||||||
|
// We didn't find the numbering system. Retry using the default numbering
|
||||||
|
// system "latn". (We don't use the default numbering system of the requested
|
||||||
|
// locale to match ICU.)
|
||||||
|
if (aLocale == MakeStringSpan("")) {
|
||||||
|
return FindTimeSeparator(aRequestedLocale, aRequestedLocale, "latn");
|
||||||
|
}
|
||||||
|
|
||||||
|
// First open the resource bundle of the input locale.
|
||||||
|
//
|
||||||
|
// Note: ICU's resource API accepts both Unicode CLDR locale identifiers and
|
||||||
|
// Unicode BCP 47 locale identifiers, so we don't have to convert the input
|
||||||
|
// into a Unicode CLDR locale identifier.
|
||||||
|
UErrorCode status = U_ZERO_ERROR;
|
||||||
|
UResourceBundle* localeBundle =
|
||||||
|
ures_open(nullptr, AssertNullTerminatedString(aLocale), &status);
|
||||||
|
if (U_FAILURE(status)) {
|
||||||
|
return Err(ToICUError(status));
|
||||||
|
}
|
||||||
|
ScopedICUObject<UResourceBundle, ures_close> closeLocaleBundle(localeBundle);
|
||||||
|
|
||||||
|
do {
|
||||||
|
// Search for the "NumberElements" table. Fall back to the parent locale if
|
||||||
|
// no "NumberElements" table is present.
|
||||||
|
UResourceBundle* numberElements =
|
||||||
|
ures_getByKey(localeBundle, "NumberElements", nullptr, &status);
|
||||||
|
if (status == U_MISSING_RESOURCE_ERROR) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (U_FAILURE(status)) {
|
||||||
|
return Err(ToICUError(status));
|
||||||
|
}
|
||||||
|
ScopedICUObject<UResourceBundle, ures_close> closeNumberElements(
|
||||||
|
numberElements);
|
||||||
|
|
||||||
|
// Search for the table of the requested numbering system. Fall back to the
|
||||||
|
// parent locale if no table was found.
|
||||||
|
UResourceBundle* numberingSystem = ures_getByKey(
|
||||||
|
numberElements, AssertNullTerminatedString(aNumberingSystem), nullptr,
|
||||||
|
&status);
|
||||||
|
if (status == U_MISSING_RESOURCE_ERROR) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (U_FAILURE(status)) {
|
||||||
|
return Err(ToICUError(status));
|
||||||
|
}
|
||||||
|
ScopedICUObject<UResourceBundle, ures_close> closeNumberingSystem(
|
||||||
|
numberingSystem);
|
||||||
|
|
||||||
|
// Search for the "symbols" table. Fall back to the parent locale if no
|
||||||
|
// "symbols" table is present.
|
||||||
|
UResourceBundle* symbols =
|
||||||
|
ures_getByKey(numberingSystem, "symbols", nullptr, &status);
|
||||||
|
if (status == U_MISSING_RESOURCE_ERROR) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (U_FAILURE(status)) {
|
||||||
|
return Err(ToICUError(status));
|
||||||
|
}
|
||||||
|
ScopedICUObject<UResourceBundle, ures_close> closeSymbols(symbols);
|
||||||
|
|
||||||
|
// And finally look up the "timeSeparator" string in the "symbols" table. If
|
||||||
|
// the string isn't present, fall back to the parent locale.
|
||||||
|
int32_t length = 0;
|
||||||
|
const UChar* str =
|
||||||
|
ures_getStringByKey(symbols, "timeSeparator", &length, &status);
|
||||||
|
if (status == U_MISSING_RESOURCE_ERROR) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (U_FAILURE(status)) {
|
||||||
|
return Err(ToICUError(status));
|
||||||
|
}
|
||||||
|
|
||||||
|
Span<const char16_t> timeSeparator{str, size_t(length)};
|
||||||
|
|
||||||
|
static constexpr auto defaultTimeSeparator = MakeStringSpan(u":");
|
||||||
|
|
||||||
|
// Many numbering systems don't define their own symbols, but instead link
|
||||||
|
// to the symbols for "latn" of the requested locale. The link is performed
|
||||||
|
// through an alias entry like:
|
||||||
|
// `symbols:alias{"/LOCALE/NumberElements/latn/symbols"}`
|
||||||
|
//
|
||||||
|
// ICU doesn't provide a public API to detect these alias entries, but
|
||||||
|
// instead always automatically resolves the link. But that leads to
|
||||||
|
// incorrectly using the symbols from the "root" locale instead of the
|
||||||
|
// requested locale.
|
||||||
|
//
|
||||||
|
// Thankfully these alias entries are only present on the "root" locale. So
|
||||||
|
// we are using this heuristic to detect alias entries:
|
||||||
|
//
|
||||||
|
// - If the resolved time separator is the default time separator ":".
|
||||||
|
// - The current locale is "root".
|
||||||
|
// - And the numbering system is neither "latn" nor "arab".
|
||||||
|
// - Then search the time separator for "latn" of the requested locale.
|
||||||
|
//
|
||||||
|
// We have to exclude "arab", because it's also using ":" for the time
|
||||||
|
// separator, but doesn't use an alias link to "latn".
|
||||||
|
if (timeSeparator == defaultTimeSeparator &&
|
||||||
|
aLocale == MakeStringSpan("root") &&
|
||||||
|
aNumberingSystem != MakeStringSpan("latn") &&
|
||||||
|
aNumberingSystem != MakeStringSpan("arab")) {
|
||||||
|
return FindTimeSeparator(aRequestedLocale, aRequestedLocale,
|
||||||
|
MakeStringSpan("latn"));
|
||||||
|
}
|
||||||
|
|
||||||
|
return timeSeparator;
|
||||||
|
} while (false);
|
||||||
|
|
||||||
|
// Fall back to the parent locale.
|
||||||
|
auto parent = GetParentLocale(localeBundle);
|
||||||
|
if (parent.isErr()) {
|
||||||
|
return parent.propagateErr();
|
||||||
|
}
|
||||||
|
return FindTimeSeparator(aRequestedLocale, parent.inspect().AsSpan(),
|
||||||
|
aNumberingSystem);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* static */
|
||||||
|
Result<Span<const char16_t>, ICUError> DateTimeFormat::GetTimeSeparator(
|
||||||
|
Span<const char> aLocale, Span<const char> aNumberingSystem) {
|
||||||
|
return FindTimeSeparator(aLocale, aLocale, aNumberingSystem);
|
||||||
|
}
|
||||||
|
|
||||||
Result<DateTimeFormat::ComponentsBag, ICUError>
|
Result<DateTimeFormat::ComponentsBag, ICUError>
|
||||||
DateTimeFormat::ResolveComponents() {
|
DateTimeFormat::ResolveComponents() {
|
||||||
// Maps an ICU pattern string to a corresponding set of date-time components
|
// Maps an ICU pattern string to a corresponding set of date-time components
|
||||||
|
@ -537,6 +537,24 @@ class DateTimeFormat final {
|
|||||||
udat_getAvailable>();
|
udat_getAvailable>();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Return the time separator for the given locale and numbering system.
|
||||||
|
*/
|
||||||
|
template <typename B>
|
||||||
|
static ICUResult GetTimeSeparator(Span<const char> aLocale,
|
||||||
|
Span<const char> aNumberingSystem,
|
||||||
|
B& aBuffer) {
|
||||||
|
static_assert(std::is_same_v<typename B::CharType, char16_t>);
|
||||||
|
auto separator = GetTimeSeparator(aLocale, aNumberingSystem);
|
||||||
|
if (separator.isErr()) {
|
||||||
|
return separator.propagateErr();
|
||||||
|
}
|
||||||
|
if (!FillBuffer(separator.unwrap(), aBuffer)) {
|
||||||
|
return Err(ICUError::OutOfMemory);
|
||||||
|
}
|
||||||
|
return Ok();
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
explicit DateTimeFormat(UDateFormat* aDateFormat);
|
explicit DateTimeFormat(UDateFormat* aDateFormat);
|
||||||
|
|
||||||
@ -583,6 +601,9 @@ class DateTimeFormat final {
|
|||||||
DateTimeFormat::PatternVector& aPattern, bool aHour12,
|
DateTimeFormat::PatternVector& aPattern, bool aHour12,
|
||||||
DateTimeFormat::SkeletonVector& aSkeleton);
|
DateTimeFormat::SkeletonVector& aSkeleton);
|
||||||
|
|
||||||
|
static Result<Span<const char16_t>, ICUError> GetTimeSeparator(
|
||||||
|
Span<const char> aLocale, Span<const char> aNumberingSystem);
|
||||||
|
|
||||||
UDateFormat* mDateFormat = nullptr;
|
UDateFormat* mDateFormat = nullptr;
|
||||||
|
|
||||||
SkeletonVector mOriginalSkeleton;
|
SkeletonVector mOriginalSkeleton;
|
||||||
|
Loading…
Reference in New Issue
Block a user