Bug 1719696 - Cache the original skeleton, and use it in DateIntervalFormat; r=anba,platform-i18n-reviewers,dminor

Differential Revision: https://phabricator.services.mozilla.com/D125254
This commit is contained in:
Greg Tatum 2021-09-13 20:53:01 +00:00
parent 7f63288178
commit b0d6525aca
4 changed files with 176 additions and 33 deletions

View File

@ -444,4 +444,40 @@ TEST(IntlDateTimeFormat, ResolvedComponentsHour12)
ASSERT_TRUE(VerboseEquals(expected, resolved));
}
TEST(IntlDateTimeFormat, GetOriginalSkeleton)
{
// Demonstrate that the original skeleton and the resolved skeleton can
// differ.
DateTimeFormat::ComponentsBag components{};
components.month = Some(DateTimeFormat::Month::Narrow);
components.day = Some(DateTimeFormat::Numeric::TwoDigit);
const char* locale = "zh-Hans-CN";
auto dateTimePatternGenerator =
DateTimePatternGenerator::TryCreate(locale).unwrap();
auto result = DateTimeFormat::TryCreateFromComponents(
MakeStringSpan(locale), components, dateTimePatternGenerator.get(),
Some(MakeStringSpan(u"GMT+3")));
ASSERT_TRUE(result.isOk());
auto dtFormat = result.unwrap();
TestBuffer<char16_t> originalSkeleton;
auto originalSkeletonResult = dtFormat->GetOriginalSkeleton(originalSkeleton);
ASSERT_TRUE(originalSkeletonResult.isOk());
ASSERT_TRUE(originalSkeleton.verboseMatches(u"MMMMMdd"));
TestBuffer<char16_t> pattern;
auto patternResult = dtFormat->GetPattern(pattern);
ASSERT_TRUE(patternResult.isOk());
ASSERT_TRUE(pattern.verboseMatches(u"M月dd日"));
TestBuffer<char16_t> resolvedSkeleton;
auto resolvedSkeletonResult = DateTimePatternGenerator::GetSkeleton(
Span(pattern.data(), pattern.length()), resolvedSkeleton);
ASSERT_TRUE(resolvedSkeletonResult.isOk());
ASSERT_TRUE(resolvedSkeleton.verboseMatches(u"Mdd"));
}
} // namespace mozilla::intl

View File

@ -127,8 +127,10 @@ static PatternField ToPatternField(CharT aCh) {
* Replaces all hour pattern characters in |patternOrSkeleton| to use the
* matching hour representation for |hourCycle|.
*/
static void ReplaceHourSymbol(mozilla::Span<char16_t> aPatternOrSkeleton,
DateTimeFormat::HourCycle aHourCycle) {
/* static */
void DateTimeFormat::ReplaceHourSymbol(
mozilla::Span<char16_t> aPatternOrSkeleton,
DateTimeFormat::HourCycle aHourCycle) {
char16_t replacement = HourSymbol(aHourCycle);
PatternIterator<char16_t> iter(aPatternOrSkeleton);
while (auto* ptr = iter.next()) {
@ -165,18 +167,20 @@ static void ReplaceHourSymbol(mozilla::Span<char16_t> aPatternOrSkeleton,
* "h23", we'll end up with the pattern "MMMM d, y, HH:mm:ss z", so the
* combinator element " 'at' " was lost in the process.
*/
static ICUResult FindPatternWithHourCycle(
/* static */
ICUResult DateTimeFormat::FindPatternWithHourCycle(
DateTimePatternGenerator& aDateTimePatternGenerator,
DateTimeFormat::PatternVector& aPattern, bool aHour12) {
DateTimeFormat::PatternVector skeleton{};
MOZ_TRY(
mozilla::intl::DateTimePatternGenerator::GetSkeleton(aPattern, skeleton));
DateTimeFormat::PatternVector& aPattern, bool aHour12,
DateTimeFormat::SkeletonVector& aSkeleton) {
MOZ_TRY(mozilla::intl::DateTimePatternGenerator::GetSkeleton(aPattern,
aSkeleton));
// Input skeletons don't differentiate between "K" and "h" resp. "k" and "H".
ReplaceHourSymbol(skeleton, aHour12 ? DateTimeFormat::HourCycle::H12
: DateTimeFormat::HourCycle::H23);
DateTimeFormat::ReplaceHourSymbol(aSkeleton,
aHour12 ? DateTimeFormat::HourCycle::H12
: DateTimeFormat::HourCycle::H23);
MOZ_TRY(aDateTimePatternGenerator.GetBestPattern(skeleton, aPattern));
MOZ_TRY(aDateTimePatternGenerator.GetBestPattern(aSkeleton, aPattern));
return Ok();
}
@ -279,6 +283,8 @@ Result<UniquePtr<DateTimeFormat>, ICUError> DateTimeFormat::TryCreateFromStyle(
MOZ_TRY(df->GetPattern(buffer));
Maybe<DateTimeFormat::HourCycle> hcPattern = HourCycleFromPattern(pattern);
DateTimeFormat::SkeletonVector skeleton{};
if (hcPattern) {
bool wantHour12 =
aStyleBag.hour12 ? *aStyleBag.hour12 : IsHour12(*aStyleBag.hourCycle);
@ -290,18 +296,24 @@ Result<UniquePtr<DateTimeFormat>, ICUError> DateTimeFormat::TryCreateFromStyle(
}
} else {
MOZ_ASSERT(aDateTimePatternGenerator);
MOZ_TRY(FindPatternWithHourCycle(*aDateTimePatternGenerator, pattern,
wantHour12));
MOZ_TRY(DateTimeFormat::FindPatternWithHourCycle(
*aDateTimePatternGenerator, pattern, wantHour12, skeleton));
}
// Replace the hourCycle, if present, in the pattern string. But only do
// this if no hour12 option is present, because the latter takes
// precedence over hourCycle.
if (!aStyleBag.hour12) {
ReplaceHourSymbol(pattern, *aStyleBag.hourCycle);
DateTimeFormat::ReplaceHourSymbol(pattern, *aStyleBag.hourCycle);
}
return DateTimeFormat::TryCreateFromPattern(aLocale, pattern,
aTimeZoneOverride);
auto result = DateTimeFormat::TryCreateFromPattern(aLocale, pattern,
aTimeZoneOverride);
if (result.isErr()) {
return Err(result.unwrapErr());
}
auto dateTimeFormat = result.unwrap();
MOZ_TRY(dateTimeFormat->CacheSkeleton(skeleton));
return dateTimeFormat;
}
}
@ -336,7 +348,7 @@ static ICUResult PushChar(V& aVec, char16_t aCh) {
* http://unicode.org/reports/tr35/tr35-dates.html#Date_Field_Symbol_Table
*/
ICUResult ToICUSkeleton(const DateTimeFormat::ComponentsBag& aBag,
DateTimeFormat::PatternVector& aSkeleton) {
DateTimeFormat::SkeletonVector& aSkeleton) {
// Create an ICU skeleton representing the specified aBag. See
if (aBag.weekday) {
switch (*aBag.weekday) {
@ -513,7 +525,7 @@ DateTimeFormat::TryCreateFromComponents(
Span<const char> aLocale, const DateTimeFormat::ComponentsBag& aBag,
DateTimePatternGenerator* aDateTimePatternGenerator,
Maybe<Span<const char16_t>> aTimeZoneOverride) {
DateTimeFormat::PatternVector skeleton;
DateTimeFormat::SkeletonVector skeleton;
MOZ_TRY(ToICUSkeleton(aBag, skeleton));
return TryCreateFromSkeleton(aLocale, skeleton, aDateTimePatternGenerator,
aBag.hourCycle, aTimeZoneOverride);
@ -567,11 +579,17 @@ DateTimeFormat::TryCreateFromSkeleton(
aDateTimePatternGenerator->GetBestPattern(aSkeleton, pattern, options));
if (aHourCycle) {
ReplaceHourSymbol(pattern, *aHourCycle);
DateTimeFormat::ReplaceHourSymbol(pattern, *aHourCycle);
}
return DateTimeFormat::TryCreateFromPattern(aLocale, pattern,
aTimeZoneOverride);
auto result =
DateTimeFormat::TryCreateFromPattern(aLocale, pattern, aTimeZoneOverride);
if (result.isErr()) {
return Err(result.unwrapErr());
}
auto dateTimeFormat = result.unwrap();
MOZ_TRY(dateTimeFormat->CacheSkeleton(aSkeleton));
return dateTimeFormat;
}
/* static */
@ -582,7 +600,7 @@ DateTimeFormat::TryCreateFromSkeleton(
Maybe<DateTimeFormat::HourCycle> aHourCycle,
Maybe<Span<const char>> aTimeZoneOverride) {
// Convert the skeleton to UTF-16.
DateTimeFormat::PatternVector skeletonUtf16Buffer;
DateTimeFormat::SkeletonVector skeletonUtf16Buffer;
if (!FillUTF16Vector(aSkeleton, skeletonUtf16Buffer)) {
return Err(ICUError::OutOfMemory);
@ -599,9 +617,22 @@ DateTimeFormat::TryCreateFromSkeleton(
Some(Span<const char16_t>(tzUtf16Vec.begin(), tzUtf16Vec.length()));
}
return DateTimeFormat::TryCreateFromSkeleton(aLocale, skeletonUtf16Buffer,
aDateTimePatternGenerator,
aHourCycle, timeZone);
auto result = DateTimeFormat::TryCreateFromSkeleton(
aLocale, skeletonUtf16Buffer, aDateTimePatternGenerator, aHourCycle,
timeZone);
if (result.isErr()) {
return result;
}
auto dateTimeFormat = result.unwrap();
MOZ_TRY(dateTimeFormat->CacheSkeleton(skeletonUtf16Buffer));
return dateTimeFormat;
}
ICUResult DateTimeFormat::CacheSkeleton(Span<const char16_t> aSkeleton) {
if (mOriginalSkeleton.append(aSkeleton.Elements(), aSkeleton.Length())) {
return Ok();
}
return Err(ICUError::OutOfMemory);
}
void DateTimeFormat::SetStartTimeIfGregorian(double aTime) {

View File

@ -9,6 +9,7 @@
#include "mozilla/Assertions.h"
#include "mozilla/intl/ICU4CGlue.h"
#include "mozilla/intl/ICUError.h"
#include "mozilla/intl/DateTimePatternGenerator.h"
#include "mozilla/Maybe.h"
#include "mozilla/Result.h"
#include "mozilla/ResultVariant.h"
@ -20,7 +21,6 @@
namespace mozilla::intl {
class DateTimePatternGenerator;
class Calendar;
/**
@ -233,6 +233,7 @@ class DateTimeFormat final {
// mozilla::Vector can avoid heap allocations for small transient buffers.
using PatternVector = Vector<char16_t, 128>;
using SkeletonVector = Vector<char16_t, 16>;
/**
* Create a DateTimeFormat from styles.
@ -368,6 +369,40 @@ class DateTimeFormat final {
});
}
/**
* Copies the skeleton that was used to generate the current DateTimeFormat to
* the given buffer. If no skeleton was used, then a skeleton is generated
* from the resolved pattern. Note that going from skeleton -> resolved
* pattern -> skeleton is not a 1:1 mapping, as the resolved pattern can
* contain different symbols than the requested skeleton.
*
* Warning: This method should not be added to new code. In the near future we
* plan to remove it.
*/
template <typename B>
ICUResult GetOriginalSkeleton(B& aBuffer,
Maybe<HourCycle> aHourCycle = Nothing()) {
static_assert(std::is_same_v<typename B::CharType, char16_t>);
if (mOriginalSkeleton.length() == 0) {
// Generate a skeleton from the resolved pattern, there was no originally
// cached skeleton.
PatternVector pattern{};
VectorToBufferAdaptor buffer(pattern);
MOZ_TRY(GetPattern(buffer));
VectorToBufferAdaptor skeleton(mOriginalSkeleton);
MOZ_TRY(DateTimePatternGenerator::GetSkeleton(pattern, skeleton));
}
if (!FillBuffer(mOriginalSkeleton, aBuffer)) {
return Err(ICUError::OutOfMemory);
}
if (aHourCycle) {
DateTimeFormat::ReplaceHourSymbol(Span(aBuffer.data(), aBuffer.length()),
*aHourCycle);
}
return Ok();
}
/**
* Set the start time of the Gregorian calendar. This is useful for
* ensuring the consistent use of a proleptic Gregorian calendar for ECMA-402.
@ -413,7 +448,49 @@ class DateTimeFormat final {
private:
explicit DateTimeFormat(UDateFormat* aDateFormat);
ICUResult CacheSkeleton(Span<const char16_t> aSkeleton);
/**
* Replaces all hour pattern characters in |patternOrSkeleton| to use the
* matching hour representation for |hourCycle|.
*/
static void ReplaceHourSymbol(Span<char16_t> aPatternOrSkeleton,
DateTimeFormat::HourCycle aHourCycle);
/**
* Find a matching pattern using the requested hour-12 options.
*
* This function is needed to work around the following two issues.
* - https://unicode-org.atlassian.net/browse/ICU-21023
* - https://unicode-org.atlassian.net/browse/CLDR-13425
*
* We're currently using a relatively simple workaround, which doesn't give
* the most accurate results. For example:
*
* ```
* var dtf = new Intl.DateTimeFormat("en", {
* timeZone: "UTC",
* dateStyle: "long",
* timeStyle: "long",
* hourCycle: "h12",
* });
* print(dtf.format(new Date("2020-01-01T00:00Z")));
* ```
*
* Returns the pattern "MMMM d, y 'at' h:mm:ss a z", but when going through
* |DateTimePatternGenerator::GetSkeleton| and then
* |DateTimePatternGenerator::GetBestPattern| to find an equivalent pattern
* for "h23", we'll end up with the pattern "MMMM d, y, HH:mm:ss z", so the
* combinator element " 'at' " was lost in the process.
*/
static ICUResult FindPatternWithHourCycle(
DateTimePatternGenerator& aDateTimePatternGenerator,
DateTimeFormat::PatternVector& aPattern, bool aHour12,
DateTimeFormat::SkeletonVector& aSkeleton);
UDateFormat* mDateFormat = nullptr;
SkeletonVector mOriginalSkeleton;
};
} // namespace mozilla::intl

View File

@ -1413,14 +1413,6 @@ static UDateIntervalFormat* NewUDateIntervalFormat(
return nullptr;
}
FormatBuffer<char16_t, intl::INITIAL_CHAR_BUFFER_SIZE> skeleton(cx);
auto skelResult =
mozilla::intl::DateTimePatternGenerator::GetSkeleton(pattern, skeleton);
if (skelResult.isErr()) {
intl::ReportInternalError(cx, skelResult.unwrapErr());
return nullptr;
}
// Determine the hour cycle used in the resolved pattern. This is needed to
// workaround <https://unicode-org.atlassian.net/browse/ICU-21154> and
// <https://unicode-org.atlassian.net/browse/ICU-21155>.
@ -1442,6 +1434,13 @@ static UDateIntervalFormat* NewUDateIntervalFormat(
}
mozilla::Span<const char16_t> timeZoneChars = timeZone.twoByteRange();
FormatBuffer<char16_t, intl::INITIAL_CHAR_BUFFER_SIZE> skeleton(cx);
auto skelResult = mozDtf.GetOriginalSkeleton(skeleton, hcPattern);
if (skelResult.isErr()) {
intl::ReportInternalError(cx, skelResult.unwrapErr());
return nullptr;
}
UErrorCode status = U_ZERO_ERROR;
UDateIntervalFormat* dif = udtitvfmt_open(
IcuLocale(locale.get()), skeleton.data(), skeleton.length(),