Bug 1856290: Handle time zone offset skeletons in DateIntervalFormat. r=dminor

Add various workarounds for ICU-20548 to improve the output of the "timeZoneName"
option when formatting date-time ranges.

Changes to "intl/icu/source/i18n/dtitvinf.cpp":

The existing ICU code was already changing `LOW_Z` to `LOW_V` when searching
for the best-fit pattern, but it was missing support for `CAP_O`. (The other
time zone name skeleton characters aren't currently supported in ECMA-402, so
I didn't handle them here.)


Changes to "intl/icu/source/i18n/dtitvfmt.cpp":

In `DateIntervalFormat::getDateTimeSkeleton()`, handle `CAP_O` similar to the
existing code for `LOW_Z` and `LOW_V`. And also keep the original number of
skeleton characters in `normalizedTimeSkeleton`.

In `DateIntervalFormat::adjustFieldWidth()`, copy the field width information
for `LOW_V` into `LOW_Z` resp. `CAP_O` because `LOW_V` is replaced in the
resolved pattern with `LOW_Z` resp. `CAP_O`.

Differential Revision: https://phabricator.services.mozilla.com/D189743
This commit is contained in:
André Bargull 2023-11-05 12:27:06 +00:00
parent 16abe6be85
commit 39f07f7d01
8 changed files with 384 additions and 3 deletions

View File

@ -0,0 +1,163 @@
# Handle 'O' time zone skeleton in DateIntervalFormat.
# Keep time zone skeleton field widths in DateIntervalFormat.
#
# ICU bug: https://unicode-org.atlassian.net/browse/ICU-20548
diff --git a/intl/icu/source/i18n/dtitv_impl.h b/intl/icu/source/i18n/dtitv_impl.h
--- a/intl/icu/source/i18n/dtitv_impl.h
+++ b/intl/icu/source/i18n/dtitv_impl.h
@@ -84,16 +84,19 @@
#define CAP_W ((char16_t)0x0057)
#define CAP_Y ((char16_t)0x0059)
#define CAP_Z ((char16_t)0x005A)
//#define MINIMUM_SUPPORTED_CALENDAR_FIELD UCAL_MINUTE
#define MAX_E_COUNT 5
#define MAX_M_COUNT 5
+#define MAX_z_COUNT 4
+#define MAX_v_COUNT 4
+#define MAX_O_COUNT 4
//#define MAX_INTERVAL_INDEX 4
#define MAX_POSITIVE_INT 56632
#endif /* #if !UCONFIG_NO_FORMATTING */
#endif
//eof
diff --git a/intl/icu/source/i18n/dtitvfmt.cpp b/intl/icu/source/i18n/dtitvfmt.cpp
--- a/intl/icu/source/i18n/dtitvfmt.cpp
+++ b/intl/icu/source/i18n/dtitvfmt.cpp
@@ -1061,16 +1061,17 @@ DateIntervalFormat::getDateTimeSkeleton(
// timeSkeleton follows the sequence of hm*[v|z]?
int32_t ECount = 0;
int32_t dCount = 0;
int32_t MCount = 0;
int32_t yCount = 0;
int32_t mCount = 0;
int32_t vCount = 0;
int32_t zCount = 0;
+ int32_t OCount = 0;
char16_t hourChar = u'\0';
int32_t i;
for (i = 0; i < skeleton.length(); ++i) {
char16_t ch = skeleton[i];
switch ( ch ) {
case CAP_E:
dateSkeleton.append(ch);
@@ -1123,16 +1124,20 @@ DateIntervalFormat::getDateTimeSkeleton(
case LOW_Z:
++zCount;
timeSkeleton.append(ch);
break;
case LOW_V:
++vCount;
timeSkeleton.append(ch);
break;
+ case CAP_O:
+ ++OCount;
+ timeSkeleton.append(ch);
+ break;
case LOW_A:
case CAP_V:
case CAP_Z:
case LOW_J:
case LOW_S:
case CAP_S:
case CAP_A:
case LOW_B:
@@ -1174,20 +1179,41 @@ DateIntervalFormat::getDateTimeSkeleton(
/* generate normalized form for time */
if ( hourChar != u'\0' ) {
normalizedTimeSkeleton.append(hourChar);
}
if ( mCount != 0 ) {
normalizedTimeSkeleton.append(LOW_M);
}
if ( zCount != 0 ) {
- normalizedTimeSkeleton.append(LOW_Z);
+ if ( zCount <= 3 ) {
+ normalizedTimeSkeleton.append(LOW_Z);
+ } else {
+ for ( int32_t j = 0; j < zCount && j < MAX_z_COUNT; ++j ) {
+ normalizedTimeSkeleton.append(LOW_Z);
+ }
+ }
}
if ( vCount != 0 ) {
- normalizedTimeSkeleton.append(LOW_V);
+ if ( vCount <= 3 ) {
+ normalizedTimeSkeleton.append(LOW_V);
+ } else {
+ for ( int32_t j = 0; j < vCount && j < MAX_v_COUNT; ++j ) {
+ normalizedTimeSkeleton.append(LOW_V);
+ }
+ }
+ }
+ if ( OCount != 0 ) {
+ if ( OCount <= 3 ) {
+ normalizedTimeSkeleton.append(CAP_O);
+ } else {
+ for ( int32_t j = 0; j < OCount && j < MAX_O_COUNT; ++j ) {
+ normalizedTimeSkeleton.append(CAP_O);
+ }
+ }
}
}
/**
* Generate date or time interval pattern from resource,
* and set them into the interval pattern locale to this formatter.
*
@@ -1732,18 +1758,23 @@ DateIntervalFormat::adjustFieldWidth(con
findReplaceInPattern(adjustedPtn, UnicodeString(u"a\u202F",-1), UnicodeString());
findReplaceInPattern(adjustedPtn, UnicodeString(LOW_A), UnicodeString());
// adjust interior double spaces, remove exterior whitespace
findReplaceInPattern(adjustedPtn, UnicodeString(" "), UnicodeString(" "));
adjustedPtn.trim();
}
if ( differenceInfo == 2 ) {
if (inputSkeleton.indexOf(LOW_Z) != -1) {
+ bestMatchSkeletonFieldWidth[(int)(LOW_Z - PATTERN_CHAR_BASE)] = bestMatchSkeletonFieldWidth[(int)(LOW_V - PATTERN_CHAR_BASE)];
findReplaceInPattern(adjustedPtn, UnicodeString(LOW_V), UnicodeString(LOW_Z));
}
+ if (inputSkeleton.indexOf(CAP_O) != -1) {
+ bestMatchSkeletonFieldWidth[(int)(CAP_O - PATTERN_CHAR_BASE)] = bestMatchSkeletonFieldWidth[(int)(LOW_V - PATTERN_CHAR_BASE)];
+ findReplaceInPattern(adjustedPtn, UnicodeString(LOW_V), UnicodeString(CAP_O));
+ }
if (inputSkeleton.indexOf(CAP_K) != -1) {
findReplaceInPattern(adjustedPtn, UnicodeString(LOW_H), UnicodeString(CAP_K));
}
if (inputSkeleton.indexOf(LOW_K) != -1) {
findReplaceInPattern(adjustedPtn, UnicodeString(CAP_H), UnicodeString(LOW_K));
}
if (inputSkeleton.indexOf(LOW_B) != -1) {
findReplaceInPattern(adjustedPtn, UnicodeString(LOW_A), UnicodeString(LOW_B));
diff --git a/intl/icu/source/i18n/dtitvinf.cpp b/intl/icu/source/i18n/dtitvinf.cpp
--- a/intl/icu/source/i18n/dtitvinf.cpp
+++ b/intl/icu/source/i18n/dtitvinf.cpp
@@ -582,19 +582,20 @@ DateIntervalInfo::getBestSkeleton(const
// hack for certain alternate characters
// resource bundles only have time skeletons containing 'v', 'h', and 'H'
// but not time skeletons containing 'z', 'K', or 'k'
// the skeleton may also include 'a' or 'b', which never occur in the resource bundles, so strip them out too
UBool replacedAlternateChars = false;
const UnicodeString* inputSkeleton = &skeleton;
UnicodeString copySkeleton;
- if ( skeleton.indexOf(LOW_Z) != -1 || skeleton.indexOf(LOW_K) != -1 || skeleton.indexOf(CAP_K) != -1 || skeleton.indexOf(LOW_A) != -1 || skeleton.indexOf(LOW_B) != -1 ) {
+ if ( skeleton.indexOf(LOW_Z) != -1 || skeleton.indexOf(CAP_O) != -1 || skeleton.indexOf(LOW_K) != -1 || skeleton.indexOf(CAP_K) != -1 || skeleton.indexOf(LOW_A) != -1 || skeleton.indexOf(LOW_B) != -1 ) {
copySkeleton = skeleton;
copySkeleton.findAndReplace(UnicodeString(LOW_Z), UnicodeString(LOW_V));
+ copySkeleton.findAndReplace(UnicodeString(CAP_O), UnicodeString(LOW_V));
copySkeleton.findAndReplace(UnicodeString(LOW_K), UnicodeString(CAP_H));
copySkeleton.findAndReplace(UnicodeString(CAP_K), UnicodeString(LOW_H));
copySkeleton.findAndReplace(UnicodeString(LOW_A), UnicodeString());
copySkeleton.findAndReplace(UnicodeString(LOW_B), UnicodeString());
inputSkeleton = &copySkeleton;
replacedAlternateChars = true;
}

View File

@ -89,6 +89,9 @@
#define MAX_E_COUNT 5
#define MAX_M_COUNT 5
#define MAX_z_COUNT 4
#define MAX_v_COUNT 4
#define MAX_O_COUNT 4
//#define MAX_INTERVAL_INDEX 4
#define MAX_POSITIVE_INT 56632

View File

@ -1066,6 +1066,7 @@ DateIntervalFormat::getDateTimeSkeleton(const UnicodeString& skeleton,
int32_t mCount = 0;
int32_t vCount = 0;
int32_t zCount = 0;
int32_t OCount = 0;
char16_t hourChar = u'\0';
int32_t i;
@ -1128,6 +1129,10 @@ DateIntervalFormat::getDateTimeSkeleton(const UnicodeString& skeleton,
++vCount;
timeSkeleton.append(ch);
break;
case CAP_O:
++OCount;
timeSkeleton.append(ch);
break;
case LOW_A:
case CAP_V:
case CAP_Z:
@ -1179,10 +1184,31 @@ DateIntervalFormat::getDateTimeSkeleton(const UnicodeString& skeleton,
normalizedTimeSkeleton.append(LOW_M);
}
if ( zCount != 0 ) {
normalizedTimeSkeleton.append(LOW_Z);
if ( zCount <= 3 ) {
normalizedTimeSkeleton.append(LOW_Z);
} else {
for ( int32_t j = 0; j < zCount && j < MAX_z_COUNT; ++j ) {
normalizedTimeSkeleton.append(LOW_Z);
}
}
}
if ( vCount != 0 ) {
normalizedTimeSkeleton.append(LOW_V);
if ( vCount <= 3 ) {
normalizedTimeSkeleton.append(LOW_V);
} else {
for ( int32_t j = 0; j < vCount && j < MAX_v_COUNT; ++j ) {
normalizedTimeSkeleton.append(LOW_V);
}
}
}
if ( OCount != 0 ) {
if ( OCount <= 3 ) {
normalizedTimeSkeleton.append(CAP_O);
} else {
for ( int32_t j = 0; j < OCount && j < MAX_O_COUNT; ++j ) {
normalizedTimeSkeleton.append(CAP_O);
}
}
}
}
@ -1737,8 +1763,13 @@ DateIntervalFormat::adjustFieldWidth(const UnicodeString& inputSkeleton,
}
if ( differenceInfo == 2 ) {
if (inputSkeleton.indexOf(LOW_Z) != -1) {
bestMatchSkeletonFieldWidth[(int)(LOW_Z - PATTERN_CHAR_BASE)] = bestMatchSkeletonFieldWidth[(int)(LOW_V - PATTERN_CHAR_BASE)];
findReplaceInPattern(adjustedPtn, UnicodeString(LOW_V), UnicodeString(LOW_Z));
}
if (inputSkeleton.indexOf(CAP_O) != -1) {
bestMatchSkeletonFieldWidth[(int)(CAP_O - PATTERN_CHAR_BASE)] = bestMatchSkeletonFieldWidth[(int)(LOW_V - PATTERN_CHAR_BASE)];
findReplaceInPattern(adjustedPtn, UnicodeString(LOW_V), UnicodeString(CAP_O));
}
if (inputSkeleton.indexOf(CAP_K) != -1) {
findReplaceInPattern(adjustedPtn, UnicodeString(LOW_H), UnicodeString(CAP_K));
}

View File

@ -587,9 +587,10 @@ DateIntervalInfo::getBestSkeleton(const UnicodeString& skeleton,
UBool replacedAlternateChars = false;
const UnicodeString* inputSkeleton = &skeleton;
UnicodeString copySkeleton;
if ( skeleton.indexOf(LOW_Z) != -1 || skeleton.indexOf(LOW_K) != -1 || skeleton.indexOf(CAP_K) != -1 || skeleton.indexOf(LOW_A) != -1 || skeleton.indexOf(LOW_B) != -1 ) {
if ( skeleton.indexOf(LOW_Z) != -1 || skeleton.indexOf(CAP_O) != -1 || skeleton.indexOf(LOW_K) != -1 || skeleton.indexOf(CAP_K) != -1 || skeleton.indexOf(LOW_A) != -1 || skeleton.indexOf(LOW_B) != -1 ) {
copySkeleton = skeleton;
copySkeleton.findAndReplace(UnicodeString(LOW_Z), UnicodeString(LOW_V));
copySkeleton.findAndReplace(UnicodeString(CAP_O), UnicodeString(LOW_V));
copySkeleton.findAndReplace(UnicodeString(LOW_K), UnicodeString(CAP_H));
copySkeleton.findAndReplace(UnicodeString(CAP_K), UnicodeString(LOW_H));
copySkeleton.findAndReplace(UnicodeString(LOW_A), UnicodeString());

View File

@ -62,6 +62,7 @@ for patch in \
bug-1814862-ICU-22260.diff \
double-conversion.diff \
bug-1856428-ICU-22541.diff \
bug-1856290-ICU-20548-dateinterval-timezone.diff \
; do
echo "Applying local patch $patch"
patch -d ${icu_dir}/../../ -p1 --no-backup-if-mismatch < ${icu_dir}/../icu-patches/$patch

View File

@ -0,0 +1,45 @@
// |reftest| skip-if(!this.hasOwnProperty("Intl"))
// Test DateTimeFormat.prototype.format and DateTimeFormat.prototype.formatRange
// use the same formatting for the "timeZoneName" option.
function hours(v) {
return v * 60 * 60 * 1000;
}
const locales = [
"en", "fr", "de", "es",
"ja", "th", "zh", "ar",
];
const timeZones = [
"UTC", "Etc/GMT-1", "Etc/GMT+1",
"Africa/Cairo",
"America/New_York", "America/Los_Angeles",
"Asia/Riyadh", "Asia/Bangkok", "Asia/Shanghai", "Asia/Tokyo",
"Europe/Berlin", "Europe/London", "Europe/Madrid", "Europe/Paris",
];
const timeZoneNames = [
"short", "shortOffset", "shortGeneric",
"long", "longOffset", "longGeneric",
];
function timeZonePart(parts) {
return parts.filter(({type}) => type === "timeZoneName").map(({value}) => value)[0];
}
for (let locale of locales) {
for (let timeZone of timeZones) {
for (let timeZoneName of timeZoneNames) {
let dtf = new Intl.DateTimeFormat(locale, {timeZone, timeZoneName, hour: "numeric"});
let formatTimeZone = timeZonePart(dtf.formatToParts(0));
let formatRangeTimeZone = timeZonePart(dtf.formatRangeToParts(0, hours(1)));
assertEq(formatRangeTimeZone, formatTimeZone);
}
}
}
if (typeof reportCompare === "function")
reportCompare(0, 0, "ok");

View File

@ -0,0 +1,124 @@
// |reftest| skip-if(!this.hasOwnProperty("Intl"))
// Test all "timeZoneName" options with various locales when formatting a
// date-time range.
const {
Hour, Minute, Literal, TimeZoneName,
} = DateTimeFormatParts;
function hours(v) {
return v * 60 * 60 * 1000;
}
function minutes(v) {
return v * 60 * 1000;
}
const tests = {
"en": [
{
start: 0,
end: minutes(2),
timeZone: "UTC",
options: {hour: "numeric", minute: "numeric", hour12: false},
timeZoneNames: {
short: [Hour("00"), Literal(":"), Minute("00"), Literal(" "), Hour("00"), Literal(":"), Minute("02"), Literal(" "), TimeZoneName("UTC")],
shortOffset: [Hour("00"), Literal(":"), Minute("00"), Literal(" "), Hour("00"), Literal(":"), Minute("02"), Literal(" "), TimeZoneName("GMT")],
shortGeneric: "shortOffset",
long: [Hour("00"), Literal(":"), Minute("00"), Literal(" "), Hour("00"), Literal(":"), Minute("02"), Literal(" "), TimeZoneName("Coordinated Universal Time")],
longOffset: "shortOffset",
longGeneric: "shortOffset",
},
},
{
start: 0,
end: minutes(2),
timeZone: "America/New_York",
options: {hour: "numeric", minute: "numeric", hour12: false},
timeZoneNames: {
short: [Hour("19"), Literal(":"), Minute("00"), Literal(" "), Hour("19"), Literal(":"), Minute("02"), Literal(" "), TimeZoneName("EST")],
shortOffset: [Hour("19"), Literal(":"), Minute("00"), Literal(" "), Hour("19"), Literal(":"), Minute("02"), Literal(" "), TimeZoneName("GMT-5")],
shortGeneric: [Hour("19"), Literal(":"), Minute("00"), Literal(" "), Hour("19"), Literal(":"), Minute("02"), Literal(" "), TimeZoneName("ET")],
long: [Hour("19"), Literal(":"), Minute("00"), Literal(" "), Hour("19"), Literal(":"), Minute("02"), Literal(" "), TimeZoneName("Eastern Standard Time")],
longOffset: [Hour("19"), Literal(":"), Minute("00"), Literal(" "), Hour("19"), Literal(":"), Minute("02"), Literal(" "), TimeZoneName("GMT-05:00")],
longGeneric: [Hour("19"), Literal(":"), Minute("00"), Literal(" "), Hour("19"), Literal(":"), Minute("02"), Literal(" "), TimeZoneName("Eastern Time")],
},
},
],
"fr": [
{
start: 0,
end: hours(2),
timeZone: "UTC",
options: {hour: "numeric", hour12: false},
timeZoneNames: {
short: [Hour("00"), Literal(" "), Hour("02"), Literal(" "), TimeZoneName("UTC")],
shortOffset: "short",
shortGeneric: "short",
long: [Hour("00"), Literal(" "), Hour("02"), Literal(" "), TimeZoneName("temps universel coordonné")],
longOffset: "short",
longGeneric: "short",
},
},
{
start: minutes(15),
end: hours(5) + minutes(30),
timeZone: "Europe/Paris",
options: {hour: "numeric", minute: "numeric", hour12: false},
timeZoneNames: {
short: [Hour("01"), Literal(":"), Minute("15"), Literal(" "), Hour("06"), Literal(":"), Minute("30"), Literal(" "), TimeZoneName("UTC+1")],
shortOffset: "short",
shortGeneric: [Hour("01"), Literal(":"), Minute("15"), Literal(" "), Hour("06"), Literal(":"), Minute("30"), Literal(" "), TimeZoneName("heure : France")],
long: [Hour("01"), Literal(":"), Minute("15"), Literal(" "), Hour("06"), Literal(":"), Minute("30"), Literal(" "), TimeZoneName("heure normale dEurope centrale")],
longOffset: [Hour("01"), Literal(":"), Minute("15"), Literal(" "), Hour("06"), Literal(":"), Minute("30"), Literal(" "), TimeZoneName("UTC+01:00")],
longGeneric: "long",
},
},
],
"de": [
{
start: 0,
end: hours(2),
timeZone: "UTC",
options: {hour: "numeric", hour12: false},
timeZoneNames: {
short: [Hour("00"), Literal(""), Hour("02"), Literal(" Uhr "), TimeZoneName("UTC")],
shortOffset: [Hour("00"), Literal(""), Hour("02"), Literal(" Uhr "), TimeZoneName("GMT")],
shortGeneric: "shortOffset",
long: [Hour("00"), Literal(""), Hour("02"), Literal(" Uhr "), TimeZoneName("Koordinierte Weltzeit")],
longOffset: "shortOffset",
longGeneric: "shortOffset",
},
},
{
start: hours(5) + minutes(15),
end: hours(5) + minutes(30),
timeZone: "Europe/Berlin",
options: {hour: "numeric", minute: "numeric", hour12: false},
timeZoneNames: {
short: [Hour("06"), Literal(":"), Minute("15"), Literal(""), Hour("06"), Literal(":"), Minute("30"), Literal(" Uhr "), TimeZoneName("MEZ")],
shortOffset: [Hour("06"), Literal(":"), Minute("15"), Literal(""), Hour("06"), Literal(":"), Minute("30"), Literal(" Uhr "), TimeZoneName("GMT+1")],
shortGeneric: "short",
long: [Hour("06"), Literal(":"), Minute("15"), Literal(""), Hour("06"), Literal(":"), Minute("30"), Literal(" Uhr "), TimeZoneName("Mitteleuropäische Normalzeit")],
longOffset: [Hour("06"), Literal(":"), Minute("15"), Literal(""), Hour("06"), Literal(":"), Minute("30"), Literal(" Uhr "), TimeZoneName("GMT+01:00")],
longGeneric: "long",
},
},
],
};
for (let [locale, formats] of Object.entries(tests)) {
for (let {start, end, timeZone, options, timeZoneNames} of formats) {
for (let [timeZoneName, format] of Object.entries(timeZoneNames)) {
let df = new Intl.DateTimeFormat(locale, {timeZone, timeZoneName, ...options});
if (typeof format === "string") {
format = timeZoneNames[format];
}
assertRangeParts(df, start, end, format);
}
}
}
if (typeof reportCompare === "function")
reportCompare(0, 0, "ok");

View File

@ -121,3 +121,16 @@ function assertParts(df, x, expected) {
}
}
}
function assertRangeParts(df, start, end, expected) {
var parts = df.formatRangeToParts(start, end);
assertEq(parts.map(part => part.value).join(""), df.formatRange(start, end),
"formatRangeToParts and formatRange must agree");
var len = parts.length;
assertEq(len, expected.length, "parts count mismatch");
for (var i = 0; i < len; i++) {
assertEq(parts[i].type, expected[i].type, "type mismatch at " + i);
assertEq(parts[i].value, expected[i].value, "value mismatch at " + i);
}
}