Bug 1531091: Append Unicode extensions without values and remove "true" value from keywords. r=jwalden

- Implement the spec changes from <https://github.com/tc39/ecma402/pull/302>.
- Additionally for the test262 test to pass, we also need to implement parts of
  the Unicode extension canonicalisation from bug 1522070. Namely removing the
  value "true" from keywords.
- The two helper functions were copied from the Intl.Locale patch.
- `CanonicalizeUnicodeExtension` was slightly modified to allow duplicate
  keywords, which is required by spec.

Differential Revision: https://phabricator.services.mozilla.com/D23156

--HG--
extra : moz-landing-system : lando
This commit is contained in:
André Bargull 2019-04-04 14:07:14 +00:00
parent 663f9a19bc
commit 6abb73e9ae
2 changed files with 175 additions and 5 deletions

View File

@ -648,6 +648,21 @@ function CanonicalizeLanguageTagFromObject(localeObj) {
// "u-ca-chinese-t-zh-latn" -> "t-zh-latn-u-ca-chinese"
callFunction(ArraySort, extensions);
// Canonicalize Unicode locale extension subtag if present.
for (var i = 0; i < extensions.length; i++) {
var ext = extensions[i];
assert(ext === callFunction(std_String_toLowerCase, ext),
"extension subtags must be in lower-case");
assert(ext[1] === "-",
"extension subtags start with a singleton");
if (ext[0] === "u") {
var {attributes, keywords} = UnicodeExtensionComponents(ext);
extensions[i] = CanonicalizeUnicodeExtension(attributes, keywords);
break;
}
}
canonical += "-" + callFunction(std_Array_join, extensions, "-");
}
@ -658,6 +673,163 @@ function CanonicalizeLanguageTagFromObject(localeObj) {
return canonical;
}
/**
* Intl.Locale proposal
*
* UnicodeExtensionComponents( extension )
*
* Returns the components of |extension| where |extension| is a "Unicode locale
* extension sequence" (ECMA-402, 6.2.1) without the starting separator
* character.
*/
function UnicodeExtensionComponents(extension) {
assert(typeof extension === "string", "extension is a String value");
// Step 1.
var attributes = [];
// Step 2.
var keywords = [];
// Step 3.
var isKeyword = false;
// Step 4.
var size = extension.length;
// Step 5.
// |extension| starts with "u-" instead of "-u-" in our implementation, so
// we need to initialize |k| with 2 instead of 3.
assert(callFunction(std_String_startsWith, extension, "u-"),
"extension starts with 'u-'");
var k = 2;
// Step 6.
var key, value;
while (k < size) {
// Step 6.a.
var e = callFunction(std_String_indexOf, extension, "-", k);
// Step 6.b.
var len = (e < 0 ? size : e) - k;
// Step 6.c.
var subtag = Substring(extension, k, len);
// Steps 6.d-e.
if (!isKeyword) {
// Step 6.d.
// NB: Duplicates are handled elsewhere in our implementation.
if (len !== 2)
_DefineDataProperty(attributes, attributes.length, subtag);
} else {
// Steps 6.e.i-ii.
if (len === 2) {
// Step 6.e.i.1.
// NB: Duplicates are handled elsewhere in our implementation.
_DefineDataProperty(keywords, keywords.length, {key, value});
} else {
// Step 6.e.ii.1.
if (value !== "")
value += "-";
// Step 6.e.ii.2.
value += subtag;
}
}
// Step 6.f.
if (len === 2) {
// Step 6.f.i.
isKeyword = true;
// Step 6.f.ii.
key = subtag;
// Step 6.f.iii.
value = "";
}
// Step 6.g.
k += len + 1;
}
// Step 7.
if (isKeyword) {
// Step 7.a.
// NB: Duplicates are handled elsewhere in our implementation.
_DefineDataProperty(keywords, keywords.length, {key, value});
}
// Step 8.
return {attributes, keywords};
}
/**
* CanonicalizeUnicodeExtension( attributes, keywords )
*
* Canonical form per <https://unicode.org/reports/tr35/#u_Extension>:
*
* - All attributes are sorted in alphabetical order.
* - All keywords are sorted by alphabetical order of keys.
* - All keywords are in lowercase.
* - Note: The parser already converted keywords to lowercase.
* - All keys and types use the canonical form (from the name attribute;
* see Section 3.6.4 U Extension Data Files).
* - Note: Not yet implemented (bug 1522070).
* - Type value "true" is removed.
*/
function CanonicalizeUnicodeExtension(attributes, keywords) {
assert(attributes.length > 0 || keywords.length > 0,
"unexpected empty Unicode locale extension components");
// All attributes are sorted in alphabetical order.
if (attributes.length > 1)
callFunction(ArraySort, attributes);
// All keywords are sorted by alphabetical order of keys.
if (keywords.length > 1) {
function UnicodeKeySort(left, right) {
var leftKey = left.key;
var rightKey = right.key;
assert(leftKey.length === 2, "left key is a Unicode key");
assert(rightKey.length === 2, "right key is a Unicode key");
// Compare both strings using charCodeAt(), because relational
// string comparison always calls into the VM, whereas charCodeAt
// can be inlined by Ion.
var diff = callFunction(std_String_charCodeAt, leftKey, 0) -
callFunction(std_String_charCodeAt, rightKey, 0);
if (diff === 0) {
diff = callFunction(std_String_charCodeAt, leftKey, 1) -
callFunction(std_String_charCodeAt, rightKey, 1);
}
return diff;
}
callFunction(ArraySort, keywords, UnicodeKeySort);
}
var extension = "u";
// Append all attributes.
for (var i = 0; i < attributes.length; i++) {
extension += "-" + attributes[i];
}
// Append all keywords.
for (var i = 0; i < keywords.length; i++) {
var {key, value} = keywords[i];
extension += "-" + key;
// Type value "true" is removed.
if (value !== "" && value !== "true")
extension += "-" + value;
}
return extension;
}
/**
* Canonicalizes the given structurally valid BCP 47 language tag, including
* regularized case of subtags. For example, the language tag
@ -1193,8 +1365,10 @@ function ResolveLocale(availableLocales, requestedLocales, options, relevantExte
// According to the LDML spec, if there's no type value,
// and true is an allowed value, it's used.
if (callFunction(ArrayIndexOf, keyLocaleData, "true") !== -1)
if (callFunction(ArrayIndexOf, keyLocaleData, "true") !== -1) {
value = "true";
supportedExtensionAddition = "-" + key;
}
}
}
}

View File

@ -558,10 +558,6 @@ skip script test262/intl402/Intl/getCanonicalLocales/non-iana-canon.js
# https://bugzilla.mozilla.org/show_bug.cgi?id=1508684
skip script test262/language/expressions/import.meta/syntax/invalid-assignment-target-update-expr.js
# https://github.com/tc39/test262/pull/1995 seemingly needs SpiderMonkey changes
# before we can run the updated test -- bug 1531091.
skip script test262/intl402/Collator/missing-unicode-ext-value-defaults-to-true.js
###########################################################
# Tests disabled due to issues in test262 importer script #