mirror of
https://github.com/mozilla/gecko-dev.git
synced 2024-11-23 12:51:06 +00:00
Backed out 9 changesets (bug 1719746, bug 1735341) as requested by dev. CLOSED TREE
Backed out changeset f1b3e7dec7e2 (bug 1735341) Backed out changeset e7675e3524da (bug 1719746) Backed out changeset 05109157a4b5 (bug 1719746) Backed out changeset b98f10477f44 (bug 1719746) Backed out changeset dce9c0d6c79c (bug 1719746) Backed out changeset 472767f43cad (bug 1719746) Backed out changeset 4fa55bded471 (bug 1719746) Backed out changeset 391c305dce1f (bug 1719746) Backed out changeset 5f20632de2be (bug 1719746)
This commit is contained in:
parent
04fb36a97c
commit
7495be3566
@ -4,7 +4,7 @@ build/clang-plugin/.*
|
||||
config/gcc-stl-wrapper.template.h
|
||||
config/msvc-stl-wrapper.template.h
|
||||
# Generated code
|
||||
intl/components/src/LocaleGenerated.cpp
|
||||
js/src/builtin/intl/LanguageTagGenerated.cpp
|
||||
js/src/builtin/intl/TimeZoneDataGenerated.h
|
||||
|
||||
# Don't want to reformat irregexp (third-party code)
|
||||
|
@ -4,85 +4,12 @@
|
||||
#include "gtest/gtest.h"
|
||||
|
||||
#include "mozilla/intl/Locale.h"
|
||||
#include "mozilla/Span.h"
|
||||
|
||||
#include "TestBuffer.h"
|
||||
#include <stdint.h>
|
||||
#include <string_view>
|
||||
|
||||
namespace mozilla::intl {
|
||||
|
||||
TEST(IntlLocale, LocaleSettersAndGetters)
|
||||
{
|
||||
Locale locale;
|
||||
locale.setLanguage("fr");
|
||||
locale.setRegion("CA");
|
||||
locale.setScript("Latn");
|
||||
ASSERT_TRUE(locale.setUnicodeExtension("u-ca-gregory"));
|
||||
ASSERT_TRUE(locale.language().equalTo("fr"));
|
||||
ASSERT_TRUE(locale.region().equalTo("CA"));
|
||||
ASSERT_TRUE(locale.script().equalTo("Latn"));
|
||||
ASSERT_EQ(MakeStringSpan(locale.unicodeExtension()),
|
||||
MakeStringSpan("u-ca-gregory"));
|
||||
|
||||
TestBuffer<char> buffer;
|
||||
ASSERT_TRUE(locale.toString(buffer).isOk());
|
||||
ASSERT_TRUE(buffer.verboseMatches("fr-Latn-CA-u-ca-gregory"));
|
||||
|
||||
// No setters for variants or other extensions...
|
||||
Locale locale2;
|
||||
ASSERT_TRUE(LocaleParser::tryParse(
|
||||
MakeStringSpan("fr-CA-fonipa-t-es-AR-h0-hybrid"), locale2)
|
||||
.isOk());
|
||||
ASSERT_EQ(MakeStringSpan(locale2.variants()[0].get()),
|
||||
MakeStringSpan("fonipa"));
|
||||
ASSERT_EQ(MakeStringSpan(locale2.extensions()[0].get()),
|
||||
MakeStringSpan("t-es-AR-h0-hybrid"));
|
||||
locale2.clearVariants();
|
||||
ASSERT_EQ(locale2.variants().length(), 0UL);
|
||||
}
|
||||
|
||||
TEST(IntlLocale, LocaleParser)
|
||||
{
|
||||
const char* tags[] = {
|
||||
"en-US", "en-GB", "es-AR", "it", "zh-Hans-CN",
|
||||
"de-AT", "pl", "fr-FR", "de-AT", "sr-Cyrl-SR",
|
||||
"nb-NO", "fr-FR", "mk", "uk", "und-PL",
|
||||
"und-Latn-AM", "ug-Cyrl", "sr-ME", "mn-Mong", "lif-Limb",
|
||||
"gan", "zh-Hant", "yue-Hans", "unr", "unr-Deva",
|
||||
"und-Thai-CN", "ug-Cyrl", "en-Latn-DE", "pl-FR", "de-CH",
|
||||
"tuq", "sr-ME", "ng", "klx", "kk-Arab",
|
||||
"en-Cyrl", "und-Cyrl-UK", "und-Arab", "und-Arab-FO"};
|
||||
|
||||
Locale locale;
|
||||
for (const auto* tag : tags) {
|
||||
ASSERT_TRUE(LocaleParser::tryParse(MakeStringSpan(tag), locale).isOk());
|
||||
}
|
||||
}
|
||||
|
||||
TEST(IntlLocale, LikelySubtags)
|
||||
{
|
||||
Locale locale;
|
||||
ASSERT_TRUE(LocaleParser::tryParse(MakeStringSpan("zh"), locale).isOk());
|
||||
ASSERT_TRUE(locale.addLikelySubtags());
|
||||
TestBuffer<char> buffer;
|
||||
ASSERT_TRUE(locale.toString(buffer).isOk());
|
||||
ASSERT_TRUE(buffer.verboseMatches("zh-Hans-CN"));
|
||||
ASSERT_TRUE(locale.removeLikelySubtags());
|
||||
buffer.clear();
|
||||
ASSERT_TRUE(locale.toString(buffer).isOk());
|
||||
ASSERT_TRUE(buffer.verboseMatches("zh"));
|
||||
}
|
||||
|
||||
TEST(IntlLocale, Canonicalize)
|
||||
{
|
||||
Locale locale;
|
||||
ASSERT_TRUE(
|
||||
LocaleParser::tryParse(MakeStringSpan("nob-bokmal"), locale).isOk());
|
||||
ASSERT_TRUE(locale.canonicalize().isOk());
|
||||
TestBuffer<char> buffer;
|
||||
ASSERT_TRUE(locale.toString(buffer).isOk());
|
||||
ASSERT_TRUE(buffer.verboseMatches("nb"));
|
||||
}
|
||||
|
||||
// These tests are dependent on the machine that this test is being run on.
|
||||
TEST(IntlLocale, SystemDependentTests)
|
||||
{
|
||||
|
@ -35,9 +35,7 @@ UNIFIED_SOURCES += [
|
||||
"src/ICU4CGlue.cpp",
|
||||
"src/ICU4CLibrary.cpp",
|
||||
"src/ListFormat.cpp",
|
||||
"src/Locale.cpp",
|
||||
"src/LocaleCanonicalizer.cpp",
|
||||
"src/LocaleGenerated.cpp",
|
||||
"src/MeasureUnit.cpp",
|
||||
"src/NumberFormat.cpp",
|
||||
"src/NumberFormatFields.cpp",
|
||||
|
@ -327,7 +327,7 @@ class DateTimeFormat final {
|
||||
// Write the formatted date into the u16Buffer.
|
||||
PatternVector u16Vec;
|
||||
|
||||
auto result = FillBufferWithICUCall(
|
||||
auto result = FillVectorWithICUCall(
|
||||
u16Vec, [this, &aUnixEpoch](UChar* target, int32_t length,
|
||||
UErrorCode* status) {
|
||||
return udat_format(mDateFormat, aUnixEpoch, target, length,
|
||||
|
@ -76,6 +76,25 @@ class DateTimePatternGenerator final {
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Given a skeleton (a string with unordered datetime fields), get a best
|
||||
* pattern that will fit for that locale. This pattern will be filled into the
|
||||
* buffer. e.g. The skeleton "yMd" would return the pattern "M/d/y" for en-US,
|
||||
* or "dd/MM/y" for en-GB.
|
||||
*/
|
||||
template <size_t S>
|
||||
ICUResult GetBestPattern(Span<const char16_t> aSkeleton,
|
||||
Vector<char16_t, S>& aVector,
|
||||
EnumSet<PatternMatchOption> options = {}) {
|
||||
return FillVectorWithICUCall(
|
||||
aVector, [&](UChar* target, int32_t length, UErrorCode* status) {
|
||||
return udatpg_getBestPatternWithOptions(
|
||||
mGenerator.GetMut(), aSkeleton.data(),
|
||||
static_cast<int32_t>(aSkeleton.Length()),
|
||||
toUDateTimePatternMatchOptions(options), target, length, status);
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a skeleton (a string with unordered datetime fields) from a pattern.
|
||||
* For example, both "MMM-dd" and "dd/MMM" produce the skeleton "MMMdd".
|
||||
@ -92,6 +111,23 @@ class DateTimePatternGenerator final {
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a skeleton (a string with unordered datetime fields) from a pattern.
|
||||
* For example, both "MMM-dd" and "dd/MMM" produce the skeleton "MMMdd".
|
||||
*/
|
||||
template <typename V, size_t N, typename A>
|
||||
static ICUResult GetSkeleton(Span<const char16_t> aPattern,
|
||||
Vector<V, N, A>& aVector) {
|
||||
// At one time udatpg_getSkeleton required a UDateTimePatternGenerator*, but
|
||||
// now it is valid to pass in a nullptr.
|
||||
return FillVectorWithICUCall(
|
||||
aVector, [&](UChar* target, int32_t length, UErrorCode* status) {
|
||||
return udatpg_getSkeleton(nullptr, aPattern.data(),
|
||||
static_cast<int32_t>(aPattern.Length()),
|
||||
target, length, status);
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* TODO(Bug 1686965) - Temporarily get the underlying ICU object while
|
||||
* migrating to the unified API. This should be removed when completing the
|
||||
|
@ -159,11 +159,11 @@ class VectorToBufferAdaptor {
|
||||
};
|
||||
|
||||
/**
|
||||
* An overload of FillBufferWithICUCall that accepts a mozilla::Vector rather
|
||||
* than a Buffer.
|
||||
* A variant of FillBufferWithICUCall that accepts a mozilla::Vector rather than
|
||||
* a Buffer.
|
||||
*/
|
||||
template <typename ICUStringFunction, size_t InlineSize, typename CharType>
|
||||
static ICUResult FillBufferWithICUCall(Vector<CharType, InlineSize>& vector,
|
||||
static ICUResult FillVectorWithICUCall(Vector<CharType, InlineSize>& vector,
|
||||
const ICUStringFunction& strFn) {
|
||||
VectorToBufferAdaptor buffer(vector);
|
||||
return FillBufferWithICUCall(buffer, strFn);
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -2,407 +2,22 @@
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
|
||||
/* Structured representation of Unicode locale IDs used with Intl functions. */
|
||||
|
||||
#ifndef intl_components_Locale_h
|
||||
#define intl_components_Locale_h
|
||||
|
||||
#include "mozilla/Assertions.h"
|
||||
#include "mozilla/intl/ICUError.h"
|
||||
#include "mozilla/intl/ICU4CGlue.h"
|
||||
#include "mozilla/Span.h"
|
||||
#include "mozilla/TextUtils.h"
|
||||
#include "mozilla/TypedEnumBits.h"
|
||||
#include "mozilla/Variant.h"
|
||||
#include "mozilla/Vector.h"
|
||||
#include "mozilla/Result.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <utility>
|
||||
#ifndef intl_components_Locale_h_
|
||||
#define intl_components_Locale_h_
|
||||
|
||||
#include "unicode/uloc.h"
|
||||
|
||||
#include "mozilla/intl/ICU4CGlue.h"
|
||||
|
||||
namespace mozilla::intl {
|
||||
|
||||
/**
|
||||
* Return true if |language| is a valid language subtag.
|
||||
*/
|
||||
template <typename CharT>
|
||||
bool IsStructurallyValidLanguageTag(mozilla::Span<const CharT> language);
|
||||
|
||||
/**
|
||||
* Return true if |script| is a valid script subtag.
|
||||
*/
|
||||
template <typename CharT>
|
||||
bool IsStructurallyValidScriptTag(mozilla::Span<const CharT> script);
|
||||
|
||||
/**
|
||||
* Return true if |region| is a valid region subtag.
|
||||
*/
|
||||
template <typename CharT>
|
||||
bool IsStructurallyValidRegionTag(mozilla::Span<const CharT> region);
|
||||
|
||||
#ifdef DEBUG
|
||||
/**
|
||||
* Return true if |variant| is a valid variant subtag.
|
||||
*/
|
||||
bool IsStructurallyValidVariantTag(mozilla::Span<const char> variant);
|
||||
|
||||
/**
|
||||
* Return true if |extension| is a valid Unicode extension subtag.
|
||||
*/
|
||||
bool IsStructurallyValidUnicodeExtensionTag(
|
||||
mozilla::Span<const char> extension);
|
||||
|
||||
/**
|
||||
* Return true if |privateUse| is a valid private-use subtag.
|
||||
*/
|
||||
bool IsStructurallyValidPrivateUseTag(mozilla::Span<const char> privateUse);
|
||||
|
||||
#endif
|
||||
|
||||
template <typename CharT>
|
||||
char AsciiToLowerCase(CharT c) {
|
||||
MOZ_ASSERT(mozilla::IsAscii(c));
|
||||
return mozilla::IsAsciiUppercaseAlpha(c) ? (c + 0x20) : c;
|
||||
}
|
||||
|
||||
template <typename CharT>
|
||||
char AsciiToUpperCase(CharT c) {
|
||||
MOZ_ASSERT(mozilla::IsAscii(c));
|
||||
return mozilla::IsAsciiLowercaseAlpha(c) ? (c - 0x20) : c;
|
||||
}
|
||||
|
||||
template <typename CharT>
|
||||
void AsciiToLowerCase(CharT* chars, size_t length, char* dest) {
|
||||
char (&fn)(CharT) = AsciiToLowerCase;
|
||||
std::transform(chars, chars + length, dest, fn);
|
||||
}
|
||||
|
||||
template <typename CharT>
|
||||
void AsciiToUpperCase(CharT* chars, size_t length, char* dest) {
|
||||
char (&fn)(CharT) = AsciiToUpperCase;
|
||||
std::transform(chars, chars + length, dest, fn);
|
||||
}
|
||||
|
||||
template <typename CharT>
|
||||
void AsciiToTitleCase(CharT* chars, size_t length, char* dest) {
|
||||
if (length > 0) {
|
||||
AsciiToUpperCase(chars, 1, dest);
|
||||
AsciiToLowerCase(chars + 1, length - 1, dest + 1);
|
||||
}
|
||||
}
|
||||
|
||||
// Constants for language subtag lengths.
|
||||
namespace LanguageTagLimits {
|
||||
|
||||
// unicode_language_subtag = alpha{2,3} | alpha{5,8} ;
|
||||
static constexpr size_t LanguageLength = 8;
|
||||
|
||||
// unicode_script_subtag = alpha{4} ;
|
||||
static constexpr size_t ScriptLength = 4;
|
||||
|
||||
// unicode_region_subtag = (alpha{2} | digit{3}) ;
|
||||
static constexpr size_t RegionLength = 3;
|
||||
static constexpr size_t AlphaRegionLength = 2;
|
||||
static constexpr size_t DigitRegionLength = 3;
|
||||
|
||||
// key = alphanum alpha ;
|
||||
static constexpr size_t UnicodeKeyLength = 2;
|
||||
|
||||
// tkey = alpha digit ;
|
||||
static constexpr size_t TransformKeyLength = 2;
|
||||
|
||||
} // namespace LanguageTagLimits
|
||||
|
||||
// Fixed size language subtag which is stored inline in Locale.
|
||||
template <size_t Length>
|
||||
class LanguageTagSubtag final {
|
||||
uint8_t length_ = 0;
|
||||
char chars_[Length] = {}; // zero initialize
|
||||
|
||||
class Locale final {
|
||||
public:
|
||||
LanguageTagSubtag() = default;
|
||||
|
||||
LanguageTagSubtag(const LanguageTagSubtag&) = delete;
|
||||
LanguageTagSubtag& operator=(const LanguageTagSubtag&) = delete;
|
||||
|
||||
size_t length() const { return length_; }
|
||||
bool missing() const { return length_ == 0; }
|
||||
bool present() const { return length_ > 0; }
|
||||
|
||||
mozilla::Span<const char> span() const { return {chars_, length_}; }
|
||||
|
||||
template <typename CharT>
|
||||
void set(mozilla::Span<const CharT> str) {
|
||||
MOZ_ASSERT(str.size() <= Length);
|
||||
std::copy_n(str.data(), str.size(), chars_);
|
||||
length_ = str.size();
|
||||
}
|
||||
|
||||
// The toXYZCase() methods are using |Length| instead of |length()|, because
|
||||
// current compilers (tested GCC and Clang) can't infer the maximum string
|
||||
// length - even when using hints like |std::min| - and instead are emitting
|
||||
// SIMD optimized code. Using a fixed sized length avoids emitting the SIMD
|
||||
// code. (Emitting SIMD code doesn't make sense here, because the SIMD code
|
||||
// only kicks in for long strings.) A fixed length will additionally ensure
|
||||
// the compiler unrolls the loop in the case conversion code.
|
||||
|
||||
void toLowerCase() { AsciiToLowerCase(chars_, Length, chars_); }
|
||||
|
||||
void toUpperCase() { AsciiToUpperCase(chars_, Length, chars_); }
|
||||
|
||||
void toTitleCase() { AsciiToTitleCase(chars_, Length, chars_); }
|
||||
|
||||
template <size_t N>
|
||||
bool equalTo(const char (&str)[N]) const {
|
||||
static_assert(N - 1 <= Length,
|
||||
"subtag literals must not exceed the maximum subtag length");
|
||||
|
||||
return length_ == N - 1 && memcmp(chars_, str, N - 1) == 0;
|
||||
}
|
||||
};
|
||||
|
||||
using LanguageSubtag = LanguageTagSubtag<LanguageTagLimits::LanguageLength>;
|
||||
using ScriptSubtag = LanguageTagSubtag<LanguageTagLimits::ScriptLength>;
|
||||
using RegionSubtag = LanguageTagSubtag<LanguageTagLimits::RegionLength>;
|
||||
|
||||
using Latin1Char = unsigned char;
|
||||
using UniqueChars = UniquePtr<char[]>;
|
||||
|
||||
/**
|
||||
* Object representing a Unicode BCP 47 locale identifier.
|
||||
*
|
||||
* All subtags are already in canonicalized case.
|
||||
*/
|
||||
class MOZ_STACK_CLASS Locale final {
|
||||
LanguageSubtag language_ = {};
|
||||
ScriptSubtag script_ = {};
|
||||
RegionSubtag region_ = {};
|
||||
|
||||
using VariantsVector = Vector<UniqueChars, 2>;
|
||||
using ExtensionsVector = Vector<UniqueChars, 2>;
|
||||
|
||||
VariantsVector variants_;
|
||||
ExtensionsVector extensions_;
|
||||
UniqueChars privateuse_ = nullptr;
|
||||
|
||||
friend class LocaleParser;
|
||||
|
||||
public:
|
||||
enum class CanonicalizationError : uint8_t {
|
||||
DuplicateVariant,
|
||||
InternalError,
|
||||
OutOfMemory,
|
||||
};
|
||||
|
||||
private:
|
||||
Result<Ok, CanonicalizationError> canonicalizeUnicodeExtension(
|
||||
UniqueChars& unicodeExtension);
|
||||
|
||||
Result<Ok, CanonicalizationError> canonicalizeTransformExtension(
|
||||
UniqueChars& transformExtension);
|
||||
|
||||
public:
|
||||
static bool languageMapping(LanguageSubtag& language);
|
||||
static bool complexLanguageMapping(const LanguageSubtag& language);
|
||||
|
||||
private:
|
||||
static bool scriptMapping(ScriptSubtag& script);
|
||||
static bool regionMapping(RegionSubtag& region);
|
||||
static bool complexRegionMapping(const RegionSubtag& region);
|
||||
|
||||
void performComplexLanguageMappings();
|
||||
void performComplexRegionMappings();
|
||||
[[nodiscard]] bool performVariantMappings();
|
||||
|
||||
[[nodiscard]] bool updateLegacyMappings();
|
||||
|
||||
static bool signLanguageMapping(LanguageSubtag& language,
|
||||
const RegionSubtag& region);
|
||||
|
||||
static const char* replaceTransformExtensionType(
|
||||
mozilla::Span<const char> key, mozilla::Span<const char> type);
|
||||
|
||||
public:
|
||||
/**
|
||||
* Given a Unicode key and type, return the null-terminated preferred
|
||||
* replacement for that type if there is one, or null if there is none, e.g.
|
||||
* in effect
|
||||
* |replaceUnicodeExtensionType("ca", "islamicc") == "islamic-civil"|
|
||||
* and
|
||||
* |replaceUnicodeExtensionType("ca", "islamic-civil") == nullptr|.
|
||||
*/
|
||||
static const char* replaceUnicodeExtensionType(
|
||||
mozilla::Span<const char> key, mozilla::Span<const char> type);
|
||||
|
||||
public:
|
||||
Locale() = default;
|
||||
Locale(const Locale&) = delete;
|
||||
Locale& operator=(const Locale&) = delete;
|
||||
|
||||
const LanguageSubtag& language() const { return language_; }
|
||||
const ScriptSubtag& script() const { return script_; }
|
||||
const RegionSubtag& region() const { return region_; }
|
||||
const auto& variants() const { return variants_; }
|
||||
const auto& extensions() const { return extensions_; }
|
||||
const char* privateuse() const { return privateuse_.get(); }
|
||||
|
||||
/**
|
||||
* Return the Unicode extension subtag or nullptr if not present.
|
||||
*/
|
||||
const char* unicodeExtension() const;
|
||||
|
||||
private:
|
||||
ptrdiff_t unicodeExtensionIndex() const;
|
||||
|
||||
public:
|
||||
/**
|
||||
* Set the language subtag. The input must be a valid language subtag.
|
||||
*/
|
||||
template <size_t N>
|
||||
void setLanguage(const char (&language)[N]) {
|
||||
mozilla::Span<const char> span(language, N - 1);
|
||||
MOZ_ASSERT(IsStructurallyValidLanguageTag(span));
|
||||
language_.set(span);
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the language subtag. The input must be a valid language subtag.
|
||||
*/
|
||||
void setLanguage(const LanguageSubtag& language) {
|
||||
MOZ_ASSERT(IsStructurallyValidLanguageTag(language.span()));
|
||||
language_.set(language.span());
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the script subtag. The input must be a valid script subtag.
|
||||
*/
|
||||
template <size_t N>
|
||||
void setScript(const char (&script)[N]) {
|
||||
mozilla::Span<const char> span(script, N - 1);
|
||||
MOZ_ASSERT(IsStructurallyValidScriptTag(span));
|
||||
script_.set(span);
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the script subtag. The input must be a valid script subtag or the empty
|
||||
* string.
|
||||
*/
|
||||
void setScript(const ScriptSubtag& script) {
|
||||
MOZ_ASSERT(script.missing() || IsStructurallyValidScriptTag(script.span()));
|
||||
script_.set(script.span());
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the region subtag. The input must be a valid region subtag.
|
||||
*/
|
||||
template <size_t N>
|
||||
void setRegion(const char (®ion)[N]) {
|
||||
mozilla::Span<const char> span(region, N - 1);
|
||||
MOZ_ASSERT(IsStructurallyValidRegionTag(span));
|
||||
region_.set(span);
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the region subtag. The input must be a valid region subtag or the empty
|
||||
* empty string.
|
||||
*/
|
||||
void setRegion(const RegionSubtag& region) {
|
||||
MOZ_ASSERT(region.missing() || IsStructurallyValidRegionTag(region.span()));
|
||||
region_.set(region.span());
|
||||
}
|
||||
|
||||
/**
|
||||
* Removes all variant subtags.
|
||||
*/
|
||||
void clearVariants() { variants_.clearAndFree(); }
|
||||
|
||||
/**
|
||||
* Set the Unicode extension subtag. The input must be a valid Unicode
|
||||
* extension subtag.
|
||||
*/
|
||||
[[nodiscard]] bool setUnicodeExtension(const char* extension);
|
||||
|
||||
/**
|
||||
* Remove any Unicode extension subtag if present.
|
||||
*/
|
||||
void clearUnicodeExtension();
|
||||
|
||||
/** Canonicalize the base-name (language, script, region, variant) subtags. */
|
||||
Result<Ok, CanonicalizationError> canonicalizeBaseName();
|
||||
|
||||
/**
|
||||
* Canonicalize all extension subtags.
|
||||
*/
|
||||
Result<Ok, CanonicalizationError> canonicalizeExtensions();
|
||||
|
||||
/**
|
||||
* Canonicalizes the given structurally valid Unicode BCP 47 locale
|
||||
* identifier, including regularized case of subtags. For example, the
|
||||
* locale Zh-haNS-bu-variant2-Variant1-u-ca-chinese-t-Zh-laTN-x-PRIVATE,
|
||||
* where
|
||||
*
|
||||
* Zh ; 2*3ALPHA
|
||||
* -haNS ; ["-" script]
|
||||
* -bu ; ["-" region]
|
||||
* -variant2 ; *("-" variant)
|
||||
* -Variant1
|
||||
* -u-ca-chinese ; *("-" extension)
|
||||
* -t-Zh-laTN
|
||||
* -x-PRIVATE ; ["-" privateuse]
|
||||
*
|
||||
* becomes zh-Hans-MM-variant1-variant2-t-zh-latn-u-ca-chinese-x-private
|
||||
*
|
||||
* Spec: ECMAScript Internationalization API Specification, 6.2.3.
|
||||
*/
|
||||
Result<Ok, CanonicalizationError> canonicalize() {
|
||||
MOZ_TRY(canonicalizeBaseName());
|
||||
return canonicalizeExtensions();
|
||||
}
|
||||
|
||||
/**
|
||||
* Fill the buffer with a string representation of the locale.
|
||||
*/
|
||||
template <typename B>
|
||||
Result<Ok, ICUError> toString(B& buffer) const {
|
||||
static_assert(std::is_same_v<typename B::CharType, char>);
|
||||
|
||||
size_t capacity = toStringCapacity();
|
||||
|
||||
// Attempt to reserve needed capacity
|
||||
if (!buffer.reserve(capacity)) {
|
||||
return Err(ICUError::OutOfMemory);
|
||||
}
|
||||
|
||||
size_t offset = toStringAppend(buffer.data());
|
||||
|
||||
MOZ_ASSERT(capacity == offset);
|
||||
buffer.written(offset);
|
||||
|
||||
return Ok();
|
||||
}
|
||||
|
||||
/**
|
||||
* Add likely-subtags to the locale.
|
||||
*
|
||||
* Spec: <https://www.unicode.org/reports/tr35/#Likely_Subtags>
|
||||
*/
|
||||
[[nodiscard]] bool addLikelySubtags();
|
||||
|
||||
/**
|
||||
* Remove likely-subtags from the locale.
|
||||
*
|
||||
* Spec: <https://www.unicode.org/reports/tr35/#Likely_Subtags>
|
||||
*/
|
||||
[[nodiscard]] bool removeLikelySubtags();
|
||||
Locale() = delete;
|
||||
|
||||
/**
|
||||
* Returns the default locale as an ICU locale identifier. The returned string
|
||||
* is NOT a valid BCP 47 locale!
|
||||
* is NOT a valid BCP 47 language tag!
|
||||
*
|
||||
* Also see <https://unicode-org.github.io/icu/userguide/locale>.
|
||||
*/
|
||||
@ -420,273 +35,8 @@ class MOZ_STACK_CLASS Locale final {
|
||||
return AvailableLocalesEnumeration<uloc_countAvailable,
|
||||
uloc_getAvailable>();
|
||||
}
|
||||
|
||||
private:
|
||||
static UniqueChars DuplicateStringToUniqueChars(const char* s);
|
||||
size_t toStringCapacity() const;
|
||||
size_t toStringAppend(char* buffer) const;
|
||||
};
|
||||
|
||||
/**
|
||||
* Parser for Unicode BCP 47 locale identifiers.
|
||||
*
|
||||
* <https://unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers>
|
||||
*/
|
||||
class MOZ_STACK_CLASS LocaleParser final {
|
||||
public:
|
||||
enum class ParserError : uint8_t {
|
||||
// Input was not parseable as a locale, subtag or extension.
|
||||
NotParseable,
|
||||
// Unable to allocate memory for the parser to operate.
|
||||
OutOfMemory,
|
||||
};
|
||||
|
||||
// Exposed as |public| for |MOZ_MAKE_ENUM_CLASS_BITWISE_OPERATORS|.
|
||||
enum class TokenKind : uint8_t {
|
||||
None = 0b000,
|
||||
Alpha = 0b001,
|
||||
Digit = 0b010,
|
||||
AlphaDigit = 0b011,
|
||||
Error = 0b100
|
||||
};
|
||||
|
||||
private:
|
||||
class Token final {
|
||||
size_t index_;
|
||||
size_t length_;
|
||||
TokenKind kind_;
|
||||
|
||||
public:
|
||||
Token(TokenKind kind, size_t index, size_t length)
|
||||
: index_(index), length_(length), kind_(kind) {}
|
||||
|
||||
TokenKind kind() const { return kind_; }
|
||||
size_t index() const { return index_; }
|
||||
size_t length() const { return length_; }
|
||||
|
||||
bool isError() const { return kind_ == TokenKind::Error; }
|
||||
bool isNone() const { return kind_ == TokenKind::None; }
|
||||
bool isAlpha() const { return kind_ == TokenKind::Alpha; }
|
||||
bool isDigit() const { return kind_ == TokenKind::Digit; }
|
||||
bool isAlphaDigit() const { return kind_ == TokenKind::AlphaDigit; }
|
||||
};
|
||||
|
||||
const char* locale_;
|
||||
size_t length_;
|
||||
size_t index_ = 0;
|
||||
|
||||
explicit LocaleParser(Span<const char> locale)
|
||||
: locale_(locale.data()), length_(locale.size()) {}
|
||||
|
||||
char charAt(size_t index) const { return locale_[index]; }
|
||||
|
||||
// Copy the token characters into |subtag|.
|
||||
template <size_t N>
|
||||
void copyChars(const Token& tok, LanguageTagSubtag<N>& subtag) const {
|
||||
subtag.set(mozilla::Span(locale_ + tok.index(), tok.length()));
|
||||
}
|
||||
|
||||
// Create a string copy of |length| characters starting at |index|.
|
||||
UniqueChars chars(size_t index, size_t length) const;
|
||||
|
||||
// Create a string copy of the token characters.
|
||||
UniqueChars chars(const Token& tok) const {
|
||||
return chars(tok.index(), tok.length());
|
||||
}
|
||||
|
||||
UniqueChars extension(const Token& start, const Token& end) const {
|
||||
MOZ_ASSERT(start.index() < end.index());
|
||||
|
||||
size_t length = end.index() - 1 - start.index();
|
||||
return chars(start.index(), length);
|
||||
}
|
||||
|
||||
Token nextToken();
|
||||
|
||||
// unicode_language_subtag = alpha{2,3} | alpha{5,8} ;
|
||||
//
|
||||
// Four character language subtags are not allowed in Unicode BCP 47 locale
|
||||
// identifiers. Also see the comparison to Unicode CLDR locale identifiers in
|
||||
// <https://unicode.org/reports/tr35/#BCP_47_Conformance>.
|
||||
bool isLanguage(const Token& tok) const {
|
||||
return tok.isAlpha() && ((2 <= tok.length() && tok.length() <= 3) ||
|
||||
(5 <= tok.length() && tok.length() <= 8));
|
||||
}
|
||||
|
||||
// unicode_script_subtag = alpha{4} ;
|
||||
bool isScript(const Token& tok) const {
|
||||
return tok.isAlpha() && tok.length() == 4;
|
||||
}
|
||||
|
||||
// unicode_region_subtag = (alpha{2} | digit{3}) ;
|
||||
bool isRegion(const Token& tok) const {
|
||||
return (tok.isAlpha() && tok.length() == 2) ||
|
||||
(tok.isDigit() && tok.length() == 3);
|
||||
}
|
||||
|
||||
// unicode_variant_subtag = (alphanum{5,8} | digit alphanum{3}) ;
|
||||
bool isVariant(const Token& tok) const {
|
||||
return (5 <= tok.length() && tok.length() <= 8) ||
|
||||
(tok.length() == 4 && mozilla::IsAsciiDigit(charAt(tok.index())));
|
||||
}
|
||||
|
||||
// Returns the code unit of the first character at the given singleton token.
|
||||
// Always returns the lower case form of an alphabetical character.
|
||||
char singletonKey(const Token& tok) const {
|
||||
MOZ_ASSERT(tok.length() == 1);
|
||||
return AsciiToLowerCase(charAt(tok.index()));
|
||||
}
|
||||
|
||||
// extensions = unicode_locale_extensions |
|
||||
// transformed_extensions |
|
||||
// other_extensions ;
|
||||
//
|
||||
// unicode_locale_extensions = sep [uU] ((sep keyword)+ |
|
||||
// (sep attribute)+ (sep keyword)*) ;
|
||||
//
|
||||
// transformed_extensions = sep [tT] ((sep tlang (sep tfield)*) |
|
||||
// (sep tfield)+) ;
|
||||
//
|
||||
// other_extensions = sep [alphanum-[tTuUxX]] (sep alphanum{2,8})+ ;
|
||||
bool isExtensionStart(const Token& tok) const {
|
||||
return tok.length() == 1 && singletonKey(tok) != 'x';
|
||||
}
|
||||
|
||||
// other_extensions = sep [alphanum-[tTuUxX]] (sep alphanum{2,8})+ ;
|
||||
bool isOtherExtensionPart(const Token& tok) const {
|
||||
return 2 <= tok.length() && tok.length() <= 8;
|
||||
}
|
||||
|
||||
// unicode_locale_extensions = sep [uU] ((sep keyword)+ |
|
||||
// (sep attribute)+ (sep keyword)*) ;
|
||||
// keyword = key (sep type)? ;
|
||||
bool isUnicodeExtensionPart(const Token& tok) const {
|
||||
return isUnicodeExtensionKey(tok) || isUnicodeExtensionType(tok) ||
|
||||
isUnicodeExtensionAttribute(tok);
|
||||
}
|
||||
|
||||
// attribute = alphanum{3,8} ;
|
||||
bool isUnicodeExtensionAttribute(const Token& tok) const {
|
||||
return 3 <= tok.length() && tok.length() <= 8;
|
||||
}
|
||||
|
||||
// key = alphanum alpha ;
|
||||
bool isUnicodeExtensionKey(const Token& tok) const {
|
||||
return tok.length() == 2 && mozilla::IsAsciiAlpha(charAt(tok.index() + 1));
|
||||
}
|
||||
|
||||
// type = alphanum{3,8} (sep alphanum{3,8})* ;
|
||||
bool isUnicodeExtensionType(const Token& tok) const {
|
||||
return 3 <= tok.length() && tok.length() <= 8;
|
||||
}
|
||||
|
||||
// tkey = alpha digit ;
|
||||
bool isTransformExtensionKey(const Token& tok) const {
|
||||
return tok.length() == 2 && mozilla::IsAsciiAlpha(charAt(tok.index())) &&
|
||||
mozilla::IsAsciiDigit(charAt(tok.index() + 1));
|
||||
}
|
||||
|
||||
// tvalue = (sep alphanum{3,8})+ ;
|
||||
bool isTransformExtensionPart(const Token& tok) const {
|
||||
return 3 <= tok.length() && tok.length() <= 8;
|
||||
}
|
||||
|
||||
// pu_extensions = sep [xX] (sep alphanum{1,8})+ ;
|
||||
bool isPrivateUseStart(const Token& tok) const {
|
||||
return tok.length() == 1 && singletonKey(tok) == 'x';
|
||||
}
|
||||
|
||||
// pu_extensions = sep [xX] (sep alphanum{1,8})+ ;
|
||||
bool isPrivateUsePart(const Token& tok) const {
|
||||
return 1 <= tok.length() && tok.length() <= 8;
|
||||
}
|
||||
|
||||
// Helper function for use in |parseBaseName| and
|
||||
// |parseTlangInTransformExtension|. Do not use this directly!
|
||||
static Result<Ok, ParserError> internalParseBaseName(LocaleParser& ts,
|
||||
Locale& tag, Token& tok);
|
||||
|
||||
// Parse the `unicode_language_id` production, i.e. the
|
||||
// language/script/region/variants portion of a locale, into |tag|.
|
||||
// |tok| must be the current token.
|
||||
static Result<Ok, ParserError> parseBaseName(LocaleParser& ts, Locale& tag,
|
||||
Token& tok) {
|
||||
return internalParseBaseName(ts, tag, tok);
|
||||
}
|
||||
|
||||
// Parse the `tlang` production within a parsed 't' transform extension.
|
||||
// The precise requirements for "previously parsed" are:
|
||||
//
|
||||
// * the input begins from current token |tok| with a valid `tlang`
|
||||
// * the `tlang` is wholly lowercase (*not* canonical case)
|
||||
// * variant subtags in the `tlang` may contain duplicates and be
|
||||
// unordered
|
||||
//
|
||||
// Return an error on internal failure. Otherwise, return a success value. If
|
||||
// there was no `tlang`, then |tag.language().missing()|. But if there was a
|
||||
// `tlang`, then |tag| is filled with subtags exactly as they appeared in the
|
||||
// parse input.
|
||||
static Result<Ok, ParserError> parseTlangInTransformExtension(
|
||||
LocaleParser& ts, Locale& tag, Token& tok) {
|
||||
MOZ_ASSERT(ts.isLanguage(tok));
|
||||
return internalParseBaseName(ts, tag, tok);
|
||||
}
|
||||
|
||||
friend class Locale;
|
||||
|
||||
class Range final {
|
||||
size_t begin_;
|
||||
size_t length_;
|
||||
|
||||
public:
|
||||
Range(size_t begin, size_t length) : begin_(begin), length_(length) {}
|
||||
|
||||
template <typename T>
|
||||
T* begin(T* ptr) const {
|
||||
return ptr + begin_;
|
||||
}
|
||||
|
||||
size_t length() const { return length_; }
|
||||
};
|
||||
|
||||
using TFieldVector = Vector<Range, 8>;
|
||||
using AttributesVector = Vector<Range, 8>;
|
||||
using KeywordsVector = Vector<Range, 8>;
|
||||
|
||||
// Parse |extension|, which must be a validated, fully lowercase
|
||||
// `transformed_extensions` subtag, and fill |tag| and |fields| from the
|
||||
// `tlang` and `tfield` components. Data in |tag| is lowercase, consistent
|
||||
// with |extension|.
|
||||
static Result<Ok, ParserError> parseTransformExtension(
|
||||
mozilla::Span<const char> extension, Locale& tag, TFieldVector& fields);
|
||||
|
||||
// Parse |extension|, which must be a validated, fully lowercase
|
||||
// `unicode_locale_extensions` subtag, and fill |attributes| and |keywords|
|
||||
// from the `attribute` and `keyword` components.
|
||||
static Result<Ok, ParserError> parseUnicodeExtension(
|
||||
mozilla::Span<const char> extension, AttributesVector& attributes,
|
||||
KeywordsVector& keywords);
|
||||
|
||||
public:
|
||||
// Parse the input string as a locale.
|
||||
static Result<Ok, ParserError> tryParse(Span<const char> locale, Locale& tag);
|
||||
|
||||
// Parse the input string as the base-name parts (language, script, region,
|
||||
// variants) of a locale.
|
||||
static Result<Ok, ParserError> tryParseBaseName(Span<const char> locale,
|
||||
Locale& tag);
|
||||
|
||||
// Return Ok() iff |extension| can be parsed as a Unicode extension subtag.
|
||||
static Result<Ok, ParserError> canParseUnicodeExtension(
|
||||
Span<const char> extension);
|
||||
|
||||
// Return Ok() iff |unicodeType| can be parsed as a Unicode extension type.
|
||||
static Result<Ok, ParserError> canParseUnicodeExtensionType(
|
||||
Span<const char> unicodeType);
|
||||
};
|
||||
|
||||
MOZ_MAKE_ENUM_CLASS_BITWISE_OPERATORS(LocaleParser::TokenKind)
|
||||
|
||||
} // namespace mozilla::intl
|
||||
|
||||
#endif /* intl_components_Locale_h */
|
||||
#endif
|
||||
|
@ -11,7 +11,7 @@ namespace mozilla::intl {
|
||||
/* static */
|
||||
ICUResult LocaleCanonicalizer::CanonicalizeICULevel1(
|
||||
const char* aLocaleIn, LocaleCanonicalizer::Vector& aLocaleOut) {
|
||||
auto result = FillBufferWithICUCall(
|
||||
auto result = FillVectorWithICUCall(
|
||||
aLocaleOut,
|
||||
[&aLocaleIn](char* target, int32_t length, UErrorCode* status) {
|
||||
return uloc_canonicalize(aLocaleIn, target, length, status);
|
||||
|
@ -91,7 +91,7 @@ class RelativeTimeFormat final {
|
||||
if constexpr (std::is_same<typename B::CharType, char>::value) {
|
||||
mozilla::Vector<char16_t, StackU16VectorSize> u16Vec;
|
||||
|
||||
MOZ_TRY(FillBufferWithICUCall(
|
||||
MOZ_TRY(FillVectorWithICUCall(
|
||||
u16Vec, [this, aNumber, aUnit, fmt](UChar* target, int32_t length,
|
||||
UErrorCode* status) {
|
||||
return fmt(mFormatter, aNumber, ToURelativeDateTimeUnit(aUnit),
|
||||
|
@ -268,14 +268,14 @@ Result<bool, ICUError> TimeZone::SetDefaultTimeZone(
|
||||
|
||||
// Retrieve the current default time zone in case we need to restore it.
|
||||
TimeZoneIdentifierVector defaultTimeZone;
|
||||
MOZ_TRY(FillBufferWithICUCall(defaultTimeZone, ucal_getDefaultTimeZone));
|
||||
MOZ_TRY(FillVectorWithICUCall(defaultTimeZone, ucal_getDefaultTimeZone));
|
||||
|
||||
// Try to set the new time zone.
|
||||
MOZ_TRY(mozilla::intl::SetDefaultTimeZone(tzid));
|
||||
|
||||
// Check if the time zone was actually applied.
|
||||
TimeZoneIdentifierVector newTimeZone;
|
||||
MOZ_TRY(FillBufferWithICUCall(newTimeZone, ucal_getDefaultTimeZone));
|
||||
MOZ_TRY(FillVectorWithICUCall(newTimeZone, ucal_getDefaultTimeZone));
|
||||
|
||||
// Return if the new time zone was successfully applied.
|
||||
if (!IsUnknownTimeZone(newTimeZone)) {
|
||||
@ -296,7 +296,7 @@ ICUResult TimeZone::SetDefaultTimeZoneFromHostTimeZone() {
|
||||
}
|
||||
#else
|
||||
TimeZoneIdentifierVector hostTimeZone;
|
||||
MOZ_TRY(FillBufferWithICUCall(hostTimeZone, ucal_getHostTimeZone));
|
||||
MOZ_TRY(FillVectorWithICUCall(hostTimeZone, ucal_getHostTimeZone));
|
||||
|
||||
MOZ_TRY(mozilla::intl::SetDefaultTimeZone(hostTimeZone));
|
||||
#endif
|
||||
|
@ -230,7 +230,7 @@ Use ``make_intl_data.py``\ ’s ``langtags`` mode to update language tag informa
|
||||
$ export PYTHONPATH="$topsrcdir/third_party/python/PyYAML/lib3/"
|
||||
$ python3 ./make_intl_data.py langtags
|
||||
|
||||
The CLDR version used will be printed in the header of CLDR-sensitive generated files. For example, ``intl/components/src/LocaleGenerated.cpp`` currently begins with:
|
||||
The CLDR version used will be printed in the header of CLDR-sensitive generated files. For example, ``js/src/builtin/intl/LanguageTagGenerated.cpp`` currently begins with:
|
||||
|
||||
.. code:: cpp
|
||||
|
||||
|
@ -583,7 +583,7 @@ MSG_DEF(JSMSG_TRACELOGGER_ENABLE_FAIL, 1, JSEXN_ERR, "enabling tracelogger faile
|
||||
|
||||
// Intl
|
||||
MSG_DEF(JSMSG_DATE_NOT_FINITE, 2, JSEXN_RANGEERR, "date value is not finite in {0}.{1}()")
|
||||
MSG_DEF(JSMSG_DUPLICATE_VARIANT_SUBTAG, 0, JSEXN_RANGEERR, "duplicate variant subtag")
|
||||
MSG_DEF(JSMSG_DUPLICATE_VARIANT_SUBTAG, 1, JSEXN_RANGEERR, "duplicate variant subtag: {0}")
|
||||
MSG_DEF(JSMSG_INTERNAL_INTL_ERROR, 0, JSEXN_ERR, "internal error while computing Intl data")
|
||||
MSG_DEF(JSMSG_INVALID_CURRENCY_CODE, 1, JSEXN_RANGEERR, "invalid currency code in NumberFormat(): {0}")
|
||||
MSG_DEF(JSMSG_INVALID_UNIT_IDENTIFIER, 1, JSEXN_RANGEERR, "invalid unit identifier in NumberFormat(): {0}")
|
||||
|
@ -10,12 +10,10 @@
|
||||
|
||||
#include "mozilla/Assertions.h"
|
||||
#include "mozilla/intl/Collator.h"
|
||||
#include "mozilla/intl/Locale.h"
|
||||
#include "mozilla/Span.h"
|
||||
|
||||
#include "builtin/Array.h"
|
||||
#include "builtin/intl/CommonFunctions.h"
|
||||
#include "builtin/intl/FormatBuffer.h"
|
||||
#include "builtin/intl/LanguageTag.h"
|
||||
#include "builtin/intl/ScopedICUObject.h"
|
||||
#include "builtin/intl/SharedIntlData.h"
|
||||
@ -243,12 +241,9 @@ static mozilla::intl::Collator* NewIntlCollator(
|
||||
}
|
||||
if (StringEqualsLiteral(usage, "search")) {
|
||||
// ICU expects search as a Unicode locale extension on locale.
|
||||
mozilla::intl::Locale tag;
|
||||
if (mozilla::intl::LocaleParser::tryParse(
|
||||
mozilla::MakeStringSpan(locale.get()), tag)
|
||||
.isErr()) {
|
||||
JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr,
|
||||
JSMSG_INVALID_LANGUAGE_TAG, locale.get());
|
||||
intl::LanguageTag tag(cx);
|
||||
if (!intl::LanguageTagParser::parse(
|
||||
cx, mozilla::MakeStringSpan(locale.get()), tag)) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
@ -266,13 +261,7 @@ static mozilla::intl::Collator* NewIntlCollator(
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
intl::FormatBuffer<char> buffer(cx);
|
||||
if (auto result = tag.toString(buffer); result.isErr()) {
|
||||
intl::ReportInternalError(cx, result.unwrapErr());
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
locale = buffer.extractStringZ();
|
||||
locale = tag.toStringZ(cx);
|
||||
if (!locale) {
|
||||
return nullptr;
|
||||
}
|
||||
|
@ -13,7 +13,6 @@
|
||||
#include "mozilla/intl/Calendar.h"
|
||||
#include "mozilla/intl/DateTimeFormat.h"
|
||||
#include "mozilla/intl/DateTimePatternGenerator.h"
|
||||
#include "mozilla/intl/Locale.h"
|
||||
#include "mozilla/intl/TimeZone.h"
|
||||
#include "mozilla/Range.h"
|
||||
#include "mozilla/Span.h"
|
||||
@ -521,14 +520,14 @@ static UniqueChars DateTimeFormatLocale(
|
||||
// ICU expects calendar, numberingSystem, and hourCycle as Unicode locale
|
||||
// extensions on locale.
|
||||
|
||||
mozilla::intl::Locale tag;
|
||||
intl::LanguageTag tag(cx);
|
||||
{
|
||||
RootedLinearString locale(cx, value.toString()->ensureLinear(cx));
|
||||
JSLinearString* locale = value.toString()->ensureLinear(cx);
|
||||
if (!locale) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
if (!intl::ParseLocale(cx, locale, tag)) {
|
||||
if (!intl::LanguageTagParser::parse(cx, locale, tag)) {
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
@ -596,12 +595,7 @@ static UniqueChars DateTimeFormatLocale(
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
FormatBuffer<char> buffer(cx);
|
||||
if (auto result = tag.toString(buffer); result.isErr()) {
|
||||
intl::ReportInternalError(cx, result.unwrapErr());
|
||||
return nullptr;
|
||||
}
|
||||
return buffer.extractStringZ();
|
||||
return tag.toStringZ(cx);
|
||||
}
|
||||
|
||||
static bool AssignTextComponent(
|
||||
|
@ -10,7 +10,6 @@
|
||||
|
||||
#include "mozilla/Assertions.h"
|
||||
#include "mozilla/intl/DateTimePatternGenerator.h"
|
||||
#include "mozilla/intl/Locale.h"
|
||||
#include "mozilla/Span.h"
|
||||
#include "mozilla/TextUtils.h"
|
||||
|
||||
@ -22,11 +21,9 @@
|
||||
#include "jspubtd.h"
|
||||
|
||||
#include "builtin/intl/CommonFunctions.h"
|
||||
#include "builtin/intl/FormatBuffer.h"
|
||||
#include "builtin/intl/LanguageTag.h"
|
||||
#include "builtin/intl/ScopedICUObject.h"
|
||||
#include "builtin/intl/SharedIntlData.h"
|
||||
#include "builtin/intl/StringAsciiChars.h"
|
||||
#include "builtin/String.h"
|
||||
#include "gc/AllocKind.h"
|
||||
#include "gc/FreeOp.h"
|
||||
@ -73,8 +70,6 @@ using namespace js;
|
||||
using js::intl::CallICU;
|
||||
using js::intl::IcuLocale;
|
||||
|
||||
using mozilla::intl::LocaleParser;
|
||||
|
||||
const JSClassOps DisplayNamesObject::classOps_ = {nullptr, /* addProperty */
|
||||
nullptr, /* delProperty */
|
||||
nullptr, /* enumerate */
|
||||
@ -337,54 +332,27 @@ static void ReportInvalidOptionError(JSContext* cx, const char* type,
|
||||
}
|
||||
}
|
||||
|
||||
static bool TryParseBaseName(JSContext* cx, HandleLinearString languageStr,
|
||||
mozilla::intl::Locale& tag) {
|
||||
if (StringIsAscii(languageStr)) {
|
||||
intl::StringAsciiChars chars(languageStr);
|
||||
if (!chars.init(cx)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (LocaleParser::tryParseBaseName(chars, tag).isOk()) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
ReportInvalidOptionError(cx, "language", languageStr);
|
||||
return false;
|
||||
}
|
||||
|
||||
static JSString* GetLanguageDisplayName(
|
||||
JSContext* cx, Handle<DisplayNamesObject*> displayNames, const char* locale,
|
||||
DisplayNamesStyle displayStyle, DisplayNamesLanguageDisplay languageDisplay,
|
||||
DisplayNamesFallback fallback, HandleLinearString languageStr) {
|
||||
mozilla::intl::Locale tag;
|
||||
if (!TryParseBaseName(cx, languageStr, tag)) {
|
||||
bool ok;
|
||||
intl::LanguageTag tag(cx);
|
||||
JS_TRY_VAR_OR_RETURN_NULL(
|
||||
cx, ok, intl::LanguageTagParser::tryParseBaseName(cx, languageStr, tag));
|
||||
if (!ok) {
|
||||
ReportInvalidOptionError(cx, "language", languageStr);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// ICU always canonicalizes the input locale, but since we know that ICU's
|
||||
// canonicalization is incomplete, we need to perform our own canonicalization
|
||||
// to ensure consistent result.
|
||||
if (auto result = tag.canonicalizeBaseName(); result.isErr()) {
|
||||
if (result.unwrapErr() ==
|
||||
mozilla::intl::Locale::CanonicalizationError::DuplicateVariant) {
|
||||
JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr,
|
||||
JSMSG_DUPLICATE_VARIANT_SUBTAG);
|
||||
} else {
|
||||
intl::ReportInternalError(cx);
|
||||
}
|
||||
|
||||
if (!tag.canonicalizeBaseName(cx)) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
intl::FormatBuffer<char> buffer(cx);
|
||||
if (auto result = tag.toString(buffer); result.isErr()) {
|
||||
intl::ReportInternalError(cx, result.unwrapErr());
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
UniqueChars languageChars = buffer.extractStringZ();
|
||||
UniqueChars languageChars = tag.toStringZ(cx);
|
||||
if (!languageChars) {
|
||||
return nullptr;
|
||||
}
|
||||
@ -426,24 +394,22 @@ static JSString* GetScriptDisplayName(JSContext* cx,
|
||||
DisplayNamesStyle displayStyle,
|
||||
DisplayNamesFallback fallback,
|
||||
HandleLinearString scriptStr) {
|
||||
mozilla::intl::ScriptSubtag script;
|
||||
intl::ScriptSubtag script;
|
||||
if (!intl::ParseStandaloneScriptTag(scriptStr, script)) {
|
||||
ReportInvalidOptionError(cx, "script", scriptStr);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
mozilla::intl::Locale tag;
|
||||
intl::LanguageTag tag(cx);
|
||||
tag.setLanguage("und");
|
||||
tag.setScript(script);
|
||||
|
||||
// ICU always canonicalizes the input locale, but since we know that ICU's
|
||||
// canonicalization is incomplete, we need to perform our own canonicalization
|
||||
// to ensure consistent result.
|
||||
if (tag.canonicalizeBaseName().isErr()) {
|
||||
intl::ReportInternalError(cx);
|
||||
if (!tag.canonicalizeBaseName(cx)) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
MOZ_ASSERT(tag.script().present());
|
||||
|
||||
// |uldn_scriptDisplayName| doesn't use the stand-alone form for script
|
||||
@ -453,13 +419,7 @@ static JSString* GetScriptDisplayName(JSContext* cx,
|
||||
// ICU bug: https://unicode-org.atlassian.net/browse/ICU-9301
|
||||
if (displayStyle == DisplayNamesStyle::Long) {
|
||||
// |uloc_getDisplayScript| expects a full locale identifier as its input.
|
||||
intl::FormatBuffer<char> buffer(cx);
|
||||
if (auto result = tag.toString(buffer); result.isErr()) {
|
||||
intl::ReportInternalError(cx, result.unwrapErr());
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
UniqueChars scriptChars = buffer.extractStringZ();
|
||||
UniqueChars scriptChars = tag.toStringZ(cx);
|
||||
if (!scriptChars) {
|
||||
return nullptr;
|
||||
}
|
||||
@ -492,9 +452,9 @@ static JSString* GetScriptDisplayName(JSContext* cx,
|
||||
}
|
||||
|
||||
// Note: ICU requires the script subtag to be in canonical case.
|
||||
const mozilla::intl::ScriptSubtag& canonicalScript = tag.script();
|
||||
const intl::ScriptSubtag& canonicalScript = tag.script();
|
||||
|
||||
char scriptChars[mozilla::intl::LanguageTagLimits::ScriptLength + 1] = {};
|
||||
char scriptChars[intl::LanguageTagLimits::ScriptLength + 1] = {};
|
||||
std::copy_n(canonicalScript.span().data(), canonicalScript.length(),
|
||||
scriptChars);
|
||||
|
||||
@ -535,30 +495,28 @@ static JSString* GetRegionDisplayName(JSContext* cx,
|
||||
DisplayNamesStyle displayStyle,
|
||||
DisplayNamesFallback fallback,
|
||||
HandleLinearString regionStr) {
|
||||
mozilla::intl::RegionSubtag region;
|
||||
intl::RegionSubtag region;
|
||||
if (!intl::ParseStandaloneRegionTag(regionStr, region)) {
|
||||
ReportInvalidOptionError(cx, "region", regionStr);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
mozilla::intl::Locale tag;
|
||||
intl::LanguageTag tag(cx);
|
||||
tag.setLanguage("und");
|
||||
tag.setRegion(region);
|
||||
|
||||
// ICU always canonicalizes the input locale, but since we know that ICU's
|
||||
// canonicalization is incomplete, we need to perform our own canonicalization
|
||||
// to ensure consistent result.
|
||||
if (tag.canonicalizeBaseName().isErr()) {
|
||||
intl::ReportInternalError(cx);
|
||||
if (!tag.canonicalizeBaseName(cx)) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
MOZ_ASSERT(tag.region().present());
|
||||
|
||||
// Note: ICU requires the region subtag to be in canonical case.
|
||||
const mozilla::intl::RegionSubtag& canonicalRegion = tag.region();
|
||||
const intl::RegionSubtag& canonicalRegion = tag.region();
|
||||
|
||||
char regionChars[mozilla::intl::LanguageTagLimits::RegionLength + 1] = {};
|
||||
char regionChars[intl::LanguageTagLimits::RegionLength + 1] = {};
|
||||
std::copy_n(canonicalRegion.span().data(), canonicalRegion.length(),
|
||||
regionChars);
|
||||
|
||||
@ -655,26 +613,21 @@ static JSString* GetCalendarDisplayName(
|
||||
DisplayNamesStyle displayStyle, DisplayNamesFallback fallback,
|
||||
HandleLinearString calendarStr) {
|
||||
// Report an error if the input can't be parsed as a Unicode type nonterminal.
|
||||
if (calendarStr->empty() || !StringIsAscii(calendarStr)) {
|
||||
if (calendarStr->empty() ||
|
||||
!intl::LanguageTagParser::canParseUnicodeExtensionType(calendarStr)) {
|
||||
ReportInvalidOptionError(cx, "calendar", calendarStr);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
MOZ_ASSERT(StringIsAscii(calendarStr), "Unicode extension types are ASCII");
|
||||
|
||||
UniqueChars calendar = EncodeAscii(cx, calendarStr);
|
||||
if (!calendar) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
if (LocaleParser::canParseUnicodeExtensionType(
|
||||
mozilla::Span(calendar.get(), calendarStr->length()))
|
||||
.isErr()) {
|
||||
ReportInvalidOptionError(cx, "calendar", calendarStr);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// Convert into canonical case before searching for replacements.
|
||||
mozilla::intl::AsciiToLowerCase(calendar.get(), calendarStr->length(),
|
||||
calendar.get());
|
||||
intl::AsciiToLowerCase(calendar.get(), calendarStr->length(), calendar.get());
|
||||
|
||||
auto key = mozilla::MakeStringSpan("ca");
|
||||
auto type = mozilla::Span(calendar.get(), calendarStr->length());
|
||||
@ -682,7 +635,7 @@ static JSString* GetCalendarDisplayName(
|
||||
// Search if there's a replacement for the Unicode calendar keyword.
|
||||
const char* canonicalCalendar = calendar.get();
|
||||
if (const char* replacement =
|
||||
mozilla::intl::Locale::replaceUnicodeExtensionType(key, type)) {
|
||||
intl::LanguageTag::replaceUnicodeExtensionType(key, type)) {
|
||||
canonicalCalendar = replacement;
|
||||
}
|
||||
|
||||
@ -779,10 +732,9 @@ static ListObject* GetDateTimeDisplayNames(
|
||||
return names;
|
||||
}
|
||||
|
||||
mozilla::intl::Locale tag;
|
||||
if (LocaleParser::tryParse(mozilla::MakeStringSpan(locale), tag).isErr()) {
|
||||
JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr,
|
||||
JSMSG_INVALID_LANGUAGE_TAG, locale);
|
||||
intl::LanguageTag tag(cx);
|
||||
if (!intl::LanguageTagParser::parse(cx, mozilla::MakeStringSpan(locale),
|
||||
tag)) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
@ -795,12 +747,7 @@ static ListObject* GetDateTimeDisplayNames(
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
intl::FormatBuffer<char> buffer(cx);
|
||||
if (auto result = tag.toString(buffer); result.isErr()) {
|
||||
intl::ReportInternalError(cx, result.unwrapErr());
|
||||
return nullptr;
|
||||
}
|
||||
UniqueChars localeWithCalendar = buffer.extractStringZ();
|
||||
UniqueChars localeWithCalendar = tag.toStringZ(cx);
|
||||
if (!localeWithCalendar) {
|
||||
return nullptr;
|
||||
}
|
||||
|
@ -8,6 +8,7 @@
|
||||
#define builtin_intl_FormatBuffer_h
|
||||
|
||||
#include "mozilla/Assertions.h"
|
||||
#include "mozilla/Range.h"
|
||||
#include "mozilla/Span.h"
|
||||
|
||||
#include <stddef.h>
|
||||
@ -15,7 +16,6 @@
|
||||
|
||||
#include "gc/Allocator.h"
|
||||
#include "js/AllocPolicy.h"
|
||||
#include "js/CharacterEncoding.h"
|
||||
#include "js/TypeDecls.h"
|
||||
#include "js/UniquePtr.h"
|
||||
#include "js/Vector.h"
|
||||
@ -89,7 +89,8 @@ class FormatBuffer {
|
||||
std::is_same_v<CharT, char>) {
|
||||
// Handle the UTF-8 encoding case.
|
||||
return NewStringCopyUTF8N<CanGC>(
|
||||
cx, JS::UTF8Chars(buffer_.begin(), buffer_.length()));
|
||||
cx, mozilla::Range(reinterpret_cast<unsigned char>(buffer_.begin()),
|
||||
buffer_.length()));
|
||||
} else {
|
||||
// Handle the UTF-16 encoding case.
|
||||
static_assert(std::is_same_v<CharT, char16_t>);
|
||||
|
@ -32,7 +32,6 @@
|
||||
#include "builtin/intl/PluralRules.h"
|
||||
#include "builtin/intl/RelativeTimeFormat.h"
|
||||
#include "builtin/intl/SharedIntlData.h"
|
||||
#include "builtin/intl/StringAsciiChars.h"
|
||||
#include "ds/Sort.h"
|
||||
#include "js/CharacterEncoding.h"
|
||||
#include "js/Class.h"
|
||||
@ -261,54 +260,29 @@ bool js::intl_BestAvailableLocale(JSContext* cx, unsigned argc, Value* vp) {
|
||||
|
||||
#ifdef DEBUG
|
||||
{
|
||||
MOZ_ASSERT(StringIsAscii(locale), "language tags are ASCII-only");
|
||||
|
||||
// |locale| is a structurally valid language tag.
|
||||
mozilla::intl::Locale tag;
|
||||
|
||||
using ParserError = mozilla::intl::LocaleParser::ParserError;
|
||||
mozilla::Result<mozilla::Ok, ParserError> parse_result = Ok();
|
||||
{
|
||||
intl::StringAsciiChars chars(locale);
|
||||
if (!chars.init(cx)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
parse_result = mozilla::intl::LocaleParser::tryParse(chars, tag);
|
||||
}
|
||||
|
||||
if (parse_result.isErr()) {
|
||||
MOZ_ASSERT(parse_result.unwrapErr() == ParserError::OutOfMemory,
|
||||
"locale is a structurally valid language tag");
|
||||
|
||||
intl::ReportInternalError(cx);
|
||||
return false;
|
||||
}
|
||||
intl::LanguageTag tag(cx);
|
||||
bool ok;
|
||||
JS_TRY_VAR_OR_RETURN_FALSE(
|
||||
cx, ok, intl::LanguageTagParser::tryParse(cx, locale, tag));
|
||||
MOZ_ASSERT(ok, "locale is a structurally valid language tag");
|
||||
|
||||
MOZ_ASSERT(!tag.unicodeExtension(),
|
||||
"locale must contain no Unicode extensions");
|
||||
|
||||
if (auto result = tag.canonicalize(); result.isErr()) {
|
||||
MOZ_ASSERT(
|
||||
result.unwrapErr() !=
|
||||
mozilla::intl::Locale::CanonicalizationError::DuplicateVariant);
|
||||
intl::ReportInternalError(cx);
|
||||
if (!tag.canonicalize(cx)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
intl::FormatBuffer<char, intl::INITIAL_CHAR_BUFFER_SIZE> buffer(cx);
|
||||
if (auto result = tag.toString(buffer); result.isErr()) {
|
||||
intl::ReportInternalError(cx, result.unwrapErr());
|
||||
return false;
|
||||
}
|
||||
|
||||
JSLinearString* tagStr = buffer.toString(cx);
|
||||
JSString* tagStr = tag.toString(cx);
|
||||
if (!tagStr) {
|
||||
return false;
|
||||
}
|
||||
|
||||
MOZ_ASSERT(EqualStrings(locale, tagStr),
|
||||
"locale is a canonicalized language tag");
|
||||
bool canonical;
|
||||
if (!EqualStrings(cx, locale, tagStr, &canonical)) {
|
||||
return false;
|
||||
}
|
||||
MOZ_ASSERT(canonical, "locale is a canonicalized language tag");
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -344,47 +318,39 @@ bool js::intl_supportedLocaleOrFallback(JSContext* cx, unsigned argc,
|
||||
return false;
|
||||
}
|
||||
|
||||
mozilla::intl::Locale tag;
|
||||
bool canParseLocale = false;
|
||||
if (StringIsAscii(locale)) {
|
||||
intl::StringAsciiChars chars(locale);
|
||||
if (!chars.init(cx)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Tell the analysis the |tag.canonicalize()| method can't GC.
|
||||
JS::AutoSuppressGCAnalysis nogc;
|
||||
|
||||
canParseLocale = mozilla::intl::LocaleParser::tryParse(chars, tag).isOk() &&
|
||||
tag.canonicalize().isOk();
|
||||
}
|
||||
intl::LanguageTag tag(cx);
|
||||
bool ok;
|
||||
JS_TRY_VAR_OR_RETURN_FALSE(
|
||||
cx, ok, intl::LanguageTagParser::tryParse(cx, locale, tag));
|
||||
|
||||
RootedLinearString candidate(cx);
|
||||
if (!canParseLocale) {
|
||||
if (!ok) {
|
||||
candidate = NewStringCopyZ<CanGC>(cx, intl::LastDitchLocale());
|
||||
if (!candidate) {
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
if (!tag.canonicalize(cx)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// The default locale must be in [[AvailableLocales]], and that list must
|
||||
// not contain any locales with Unicode extension sequences, so remove any
|
||||
// present in the candidate.
|
||||
tag.clearUnicodeExtension();
|
||||
|
||||
intl::FormatBuffer<char, intl::INITIAL_CHAR_BUFFER_SIZE> buffer(cx);
|
||||
if (auto result = tag.toString(buffer); result.isErr()) {
|
||||
intl::ReportInternalError(cx, result.unwrapErr());
|
||||
JSString* canonical = tag.toString(cx);
|
||||
if (!canonical) {
|
||||
return false;
|
||||
}
|
||||
|
||||
candidate = buffer.toString(cx);
|
||||
candidate = canonical->ensureLinear(cx);
|
||||
if (!candidate) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Certain old-style language tags lack a script code, but in current
|
||||
// usage they *would* include a script code. Map these over to modern
|
||||
// forms.
|
||||
// Certain old-style language tags lack a script code, but in current usage
|
||||
// they *would* include a script code. Map these over to modern forms.
|
||||
for (const auto& mapping : js::intl::oldStyleLanguageTagMappings) {
|
||||
const char* oldStyle = mapping.oldStyle;
|
||||
const char* modernStyle = mapping.modernStyle;
|
||||
@ -404,8 +370,8 @@ bool js::intl_supportedLocaleOrFallback(JSContext* cx, unsigned argc,
|
||||
// - [[AvailableLocales]] is a List [...]. The list must include the value
|
||||
// returned by the DefaultLocale abstract operation (6.2.4), [...].
|
||||
//
|
||||
// That implies we must ignore any candidate which isn't supported by all
|
||||
// Intl service constructors.
|
||||
// That implies we must ignore any candidate which isn't supported by all Intl
|
||||
// service constructors.
|
||||
|
||||
RootedLinearString supportedCollator(cx);
|
||||
JS_TRY_VAR_OR_RETURN_FALSE(
|
||||
@ -421,8 +387,8 @@ bool js::intl_supportedLocaleOrFallback(JSContext* cx, unsigned argc,
|
||||
|
||||
#ifdef DEBUG
|
||||
// Note: We don't test the supported locales of the remaining Intl service
|
||||
// constructors, because the set of supported locales is exactly equal to
|
||||
// the set of supported locales of Intl.DateTimeFormat.
|
||||
// constructors, because the set of supported locales is exactly equal to the
|
||||
// set of supported locales of Intl.DateTimeFormat.
|
||||
for (auto kind :
|
||||
{SupportedLocaleKind::DisplayNames, SupportedLocaleKind::ListFormat,
|
||||
SupportedLocaleKind::NumberFormat, SupportedLocaleKind::PluralRules,
|
||||
@ -592,10 +558,9 @@ static ArrayObject* AvailableCalendars(JSContext* cx) {
|
||||
Rooted<StringList> list(cx, StringList(cx));
|
||||
|
||||
{
|
||||
// Hazard analysis complains that the mozilla::Result destructor calls a
|
||||
// GC function, which is unsound when returning an unrooted value. Work
|
||||
// around this issue by restricting the lifetime of |keywords| to a
|
||||
// separate block.
|
||||
// Hazard analysis complains that the mozilla::Result destructor calls a GC
|
||||
// function, which is unsound when returning an unrooted value. Work around
|
||||
// this issue by restricting the lifetime of |keywords| to a separate block.
|
||||
auto keywords = mozilla::intl::Calendar::GetBcp47KeywordValuesForLocale("");
|
||||
if (keywords.isErr()) {
|
||||
intl::ReportInternalError(cx, keywords.unwrapErr());
|
||||
@ -634,10 +599,9 @@ static ArrayObject* AvailableCollations(JSContext* cx) {
|
||||
Rooted<StringList> list(cx, StringList(cx));
|
||||
|
||||
{
|
||||
// Hazard analysis complains that the mozilla::Result destructor calls a
|
||||
// GC function, which is unsound when returning an unrooted value. Work
|
||||
// around this issue by restricting the lifetime of |keywords| to a
|
||||
// separate block.
|
||||
// Hazard analysis complains that the mozilla::Result destructor calls a GC
|
||||
// function, which is unsound when returning an unrooted value. Work around
|
||||
// this issue by restricting the lifetime of |keywords| to a separate block.
|
||||
auto keywords = mozilla::intl::Collator::GetBcp47KeywordValues();
|
||||
if (keywords.isErr()) {
|
||||
intl::ReportInternalError(cx, keywords.unwrapErr());
|
||||
@ -653,15 +617,13 @@ static ArrayObject* AvailableCollations(JSContext* cx) {
|
||||
|
||||
// |ucol_getKeywordValues| returns the possible collations for all installed
|
||||
// locales. The root locale is excluded in the list of installed locales, so
|
||||
// we have to explicitly request the available collations of the root
|
||||
// locale.
|
||||
// we have to explicitly request the available collations of the root locale.
|
||||
//
|
||||
// https://unicode-org.atlassian.net/browse/ICU-21641
|
||||
{
|
||||
// Hazard analysis complains that the mozilla::Result destructor calls a
|
||||
// GC function, which is unsound when returning an unrooted value. Work
|
||||
// around this issue by restricting the lifetime of |keywords| to a
|
||||
// separate block.
|
||||
// Hazard analysis complains that the mozilla::Result destructor calls a GC
|
||||
// function, which is unsound when returning an unrooted value. Work around
|
||||
// this issue by restricting the lifetime of |keywords| to a separate block.
|
||||
auto keywords = mozilla::intl::Collator::GetBcp47KeywordValuesForLocale("");
|
||||
if (keywords.isErr()) {
|
||||
intl::ReportInternalError(cx, keywords.unwrapErr());
|
||||
@ -715,10 +677,9 @@ static ArrayObject* AvailableCurrencies(JSContext* cx) {
|
||||
Rooted<StringList> list(cx, StringList(cx));
|
||||
|
||||
{
|
||||
// Hazard analysis complains that the mozilla::Result destructor calls a
|
||||
// GC function, which is unsound when returning an unrooted value. Work
|
||||
// around this issue by restricting the lifetime of |keywords| to a
|
||||
// separate block.
|
||||
// Hazard analysis complains that the mozilla::Result destructor calls a GC
|
||||
// function, which is unsound when returning an unrooted value. Work around
|
||||
// this issue by restricting the lifetime of |keywords| to a separate block.
|
||||
auto currencies = mozilla::intl::Currency::GetISOCurrencies();
|
||||
if (currencies.isErr()) {
|
||||
intl::ReportInternalError(cx, currencies.unwrapErr());
|
||||
@ -762,8 +723,7 @@ static ArrayObject* AvailableNumberingSystems(JSContext* cx) {
|
||||
* AvailableTimeZones ( )
|
||||
*/
|
||||
static ArrayObject* AvailableTimeZones(JSContext* cx) {
|
||||
// Unsorted list of canonical time zone names, possibly containing
|
||||
// duplicates.
|
||||
// Unsorted list of canonical time zone names, possibly containing duplicates.
|
||||
Rooted<StringList> timeZones(cx, StringList(cx));
|
||||
|
||||
intl::SharedIntlData& sharedIntlData = cx->runtime()->sharedIntlData.ref();
|
||||
@ -780,8 +740,8 @@ static ArrayObject* AvailableTimeZones(JSContext* cx) {
|
||||
|
||||
// Canonicalize the time zone before adding it to the result array.
|
||||
|
||||
// Some time zone names are canonicalized differently by ICU -- handle
|
||||
// those first.
|
||||
// Some time zone names are canonicalized differently by ICU -- handle those
|
||||
// first.
|
||||
ianaTimeZone.set(nullptr);
|
||||
if (!sharedIntlData.tryCanonicalizeTimeZoneConsistentWithIANA(
|
||||
cx, validatedTimeZone, &ianaTimeZone)) {
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -9,8 +9,17 @@
|
||||
#ifndef builtin_intl_LanguageTag_h
|
||||
#define builtin_intl_LanguageTag_h
|
||||
|
||||
#include "mozilla/intl/Locale.h"
|
||||
#include "mozilla/Assertions.h"
|
||||
#include "mozilla/Span.h"
|
||||
#include "mozilla/TextUtils.h"
|
||||
#include "mozilla/TypedEnumBits.h"
|
||||
#include "mozilla/Variant.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <utility>
|
||||
|
||||
#include "js/AllocPolicy.h"
|
||||
#include "js/GCAPI.h"
|
||||
@ -29,32 +38,702 @@ namespace js {
|
||||
namespace intl {
|
||||
|
||||
/**
|
||||
* Parse a string Unicode BCP 47 locale identifier. If successful, store in
|
||||
* |result| and return true. Otherwise return false.
|
||||
* Return true if |language| is a valid language subtag.
|
||||
*/
|
||||
[[nodiscard]] bool ParseLocale(JSContext* cx, JS::Handle<JSLinearString*> str,
|
||||
mozilla::intl::Locale& result);
|
||||
template <typename CharT>
|
||||
bool IsStructurallyValidLanguageTag(mozilla::Span<const CharT> language);
|
||||
|
||||
/**
|
||||
* Return true if |script| is a valid script subtag.
|
||||
*/
|
||||
template <typename CharT>
|
||||
bool IsStructurallyValidScriptTag(mozilla::Span<const CharT> script);
|
||||
|
||||
/**
|
||||
* Return true if |region| is a valid region subtag.
|
||||
*/
|
||||
template <typename CharT>
|
||||
bool IsStructurallyValidRegionTag(mozilla::Span<const CharT> region);
|
||||
|
||||
#ifdef DEBUG
|
||||
/**
|
||||
* Return true if |variant| is a valid variant subtag.
|
||||
*/
|
||||
bool IsStructurallyValidVariantTag(mozilla::Span<const char> variant);
|
||||
|
||||
/**
|
||||
* Return true if |extension| is a valid Unicode extension subtag.
|
||||
*/
|
||||
bool IsStructurallyValidUnicodeExtensionTag(
|
||||
mozilla::Span<const char> extension);
|
||||
|
||||
/**
|
||||
* Return true if |privateUse| is a valid private-use subtag.
|
||||
*/
|
||||
bool IsStructurallyValidPrivateUseTag(mozilla::Span<const char> privateUse);
|
||||
|
||||
#endif
|
||||
|
||||
template <typename CharT>
|
||||
char AsciiToLowerCase(CharT c) {
|
||||
MOZ_ASSERT(mozilla::IsAscii(c));
|
||||
return mozilla::IsAsciiUppercaseAlpha(c) ? (c + 0x20) : c;
|
||||
}
|
||||
|
||||
template <typename CharT>
|
||||
char AsciiToUpperCase(CharT c) {
|
||||
MOZ_ASSERT(mozilla::IsAscii(c));
|
||||
return mozilla::IsAsciiLowercaseAlpha(c) ? (c - 0x20) : c;
|
||||
}
|
||||
|
||||
template <typename CharT>
|
||||
void AsciiToLowerCase(CharT* chars, size_t length, char* dest) {
|
||||
// Tell the analysis the |std::transform| function can't GC.
|
||||
JS::AutoSuppressGCAnalysis nogc;
|
||||
|
||||
char (&fn)(CharT) = AsciiToLowerCase;
|
||||
std::transform(chars, chars + length, dest, fn);
|
||||
}
|
||||
|
||||
template <typename CharT>
|
||||
void AsciiToUpperCase(CharT* chars, size_t length, char* dest) {
|
||||
// Tell the analysis the |std::transform| function can't GC.
|
||||
JS::AutoSuppressGCAnalysis nogc;
|
||||
|
||||
char (&fn)(CharT) = AsciiToUpperCase;
|
||||
std::transform(chars, chars + length, dest, fn);
|
||||
}
|
||||
|
||||
template <typename CharT>
|
||||
void AsciiToTitleCase(CharT* chars, size_t length, char* dest) {
|
||||
if (length > 0) {
|
||||
AsciiToUpperCase(chars, 1, dest);
|
||||
AsciiToLowerCase(chars + 1, length - 1, dest + 1);
|
||||
}
|
||||
}
|
||||
|
||||
// Constants for language subtag lengths.
|
||||
namespace LanguageTagLimits {
|
||||
|
||||
// unicode_language_subtag = alpha{2,3} | alpha{5,8} ;
|
||||
static constexpr size_t LanguageLength = 8;
|
||||
|
||||
// unicode_script_subtag = alpha{4} ;
|
||||
static constexpr size_t ScriptLength = 4;
|
||||
|
||||
// unicode_region_subtag = (alpha{2} | digit{3}) ;
|
||||
static constexpr size_t RegionLength = 3;
|
||||
static constexpr size_t AlphaRegionLength = 2;
|
||||
static constexpr size_t DigitRegionLength = 3;
|
||||
|
||||
// key = alphanum alpha ;
|
||||
static constexpr size_t UnicodeKeyLength = 2;
|
||||
|
||||
// tkey = alpha digit ;
|
||||
static constexpr size_t TransformKeyLength = 2;
|
||||
|
||||
} // namespace LanguageTagLimits
|
||||
|
||||
// Fixed size language subtag which is stored inline in LanguageTag.
|
||||
template <size_t Length>
|
||||
class LanguageTagSubtag final {
|
||||
uint8_t length_ = 0;
|
||||
char chars_[Length] = {}; // zero initialize
|
||||
|
||||
public:
|
||||
LanguageTagSubtag() = default;
|
||||
|
||||
LanguageTagSubtag(const LanguageTagSubtag&) = delete;
|
||||
LanguageTagSubtag& operator=(const LanguageTagSubtag&) = delete;
|
||||
|
||||
size_t length() const { return length_; }
|
||||
bool missing() const { return length_ == 0; }
|
||||
bool present() const { return length_ > 0; }
|
||||
|
||||
mozilla::Span<const char> span() const { return {chars_, length_}; }
|
||||
|
||||
template <typename CharT>
|
||||
void set(mozilla::Span<const CharT> str) {
|
||||
MOZ_ASSERT(str.size() <= Length);
|
||||
std::copy_n(str.data(), str.size(), chars_);
|
||||
length_ = str.size();
|
||||
}
|
||||
|
||||
// The toXYZCase() methods are using |Length| instead of |length()|, because
|
||||
// current compilers (tested GCC and Clang) can't infer the maximum string
|
||||
// length - even when using hints like |std::min| - and instead are emitting
|
||||
// SIMD optimized code. Using a fixed sized length avoids emitting the SIMD
|
||||
// code. (Emitting SIMD code doesn't make sense here, because the SIMD code
|
||||
// only kicks in for long strings.) A fixed length will additionally ensure
|
||||
// the compiler unrolls the loop in the case conversion code.
|
||||
|
||||
void toLowerCase() { AsciiToLowerCase(chars_, Length, chars_); }
|
||||
|
||||
void toUpperCase() { AsciiToUpperCase(chars_, Length, chars_); }
|
||||
|
||||
void toTitleCase() { AsciiToTitleCase(chars_, Length, chars_); }
|
||||
|
||||
template <size_t N>
|
||||
bool equalTo(const char (&str)[N]) const {
|
||||
static_assert(N - 1 <= Length,
|
||||
"subtag literals must not exceed the maximum subtag length");
|
||||
|
||||
return length_ == N - 1 && memcmp(chars_, str, N - 1) == 0;
|
||||
}
|
||||
};
|
||||
|
||||
using LanguageSubtag = LanguageTagSubtag<LanguageTagLimits::LanguageLength>;
|
||||
using ScriptSubtag = LanguageTagSubtag<LanguageTagLimits::ScriptLength>;
|
||||
using RegionSubtag = LanguageTagSubtag<LanguageTagLimits::RegionLength>;
|
||||
|
||||
/**
|
||||
* Object representing a language tag.
|
||||
*
|
||||
* All subtags are already in canonicalized case.
|
||||
*/
|
||||
class MOZ_STACK_CLASS LanguageTag final {
|
||||
LanguageSubtag language_ = {};
|
||||
ScriptSubtag script_ = {};
|
||||
RegionSubtag region_ = {};
|
||||
|
||||
using VariantsVector = Vector<JS::UniqueChars, 2>;
|
||||
using ExtensionsVector = Vector<JS::UniqueChars, 2>;
|
||||
|
||||
VariantsVector variants_;
|
||||
ExtensionsVector extensions_;
|
||||
JS::UniqueChars privateuse_ = nullptr;
|
||||
|
||||
friend class LanguageTagParser;
|
||||
|
||||
bool canonicalizeUnicodeExtension(JSContext* cx,
|
||||
JS::UniqueChars& unicodeExtension);
|
||||
|
||||
bool canonicalizeTransformExtension(JSContext* cx,
|
||||
JS::UniqueChars& transformExtension);
|
||||
|
||||
public:
|
||||
static bool languageMapping(LanguageSubtag& language);
|
||||
static bool complexLanguageMapping(const LanguageSubtag& language);
|
||||
|
||||
private:
|
||||
static bool scriptMapping(ScriptSubtag& script);
|
||||
static bool regionMapping(RegionSubtag& region);
|
||||
static bool complexRegionMapping(const RegionSubtag& region);
|
||||
|
||||
void performComplexLanguageMappings();
|
||||
void performComplexRegionMappings();
|
||||
[[nodiscard]] bool performVariantMappings(JSContext* cx);
|
||||
|
||||
[[nodiscard]] bool updateLegacyMappings(JSContext* cx);
|
||||
|
||||
static bool signLanguageMapping(LanguageSubtag& language,
|
||||
const RegionSubtag& region);
|
||||
|
||||
static const char* replaceTransformExtensionType(
|
||||
mozilla::Span<const char> key, mozilla::Span<const char> type);
|
||||
|
||||
public:
|
||||
/**
|
||||
* Given a Unicode key and type, return the null-terminated preferred
|
||||
* replacement for that type if there is one, or null if there is none, e.g.
|
||||
* in effect
|
||||
* |replaceUnicodeExtensionType("ca", "islamicc") == "islamic-civil"|
|
||||
* and
|
||||
* |replaceUnicodeExtensionType("ca", "islamic-civil") == nullptr|.
|
||||
*/
|
||||
static const char* replaceUnicodeExtensionType(
|
||||
mozilla::Span<const char> key, mozilla::Span<const char> type);
|
||||
|
||||
public:
|
||||
explicit LanguageTag(JSContext* cx) : variants_(cx), extensions_(cx) {}
|
||||
|
||||
LanguageTag(const LanguageTag&) = delete;
|
||||
LanguageTag& operator=(const LanguageTag&) = delete;
|
||||
|
||||
const LanguageSubtag& language() const { return language_; }
|
||||
const ScriptSubtag& script() const { return script_; }
|
||||
const RegionSubtag& region() const { return region_; }
|
||||
const auto& variants() const { return variants_; }
|
||||
const auto& extensions() const { return extensions_; }
|
||||
const char* privateuse() const { return privateuse_.get(); }
|
||||
|
||||
/**
|
||||
* Return the Unicode extension subtag or nullptr if not present.
|
||||
*/
|
||||
const char* unicodeExtension() const;
|
||||
|
||||
private:
|
||||
ptrdiff_t unicodeExtensionIndex() const;
|
||||
|
||||
public:
|
||||
/**
|
||||
* Set the language subtag. The input must be a valid language subtag.
|
||||
*/
|
||||
template <size_t N>
|
||||
void setLanguage(const char (&language)[N]) {
|
||||
mozilla::Span<const char> span(language, N - 1);
|
||||
MOZ_ASSERT(IsStructurallyValidLanguageTag(span));
|
||||
language_.set(span);
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the language subtag. The input must be a valid language subtag.
|
||||
*/
|
||||
void setLanguage(const LanguageSubtag& language) {
|
||||
MOZ_ASSERT(IsStructurallyValidLanguageTag(language.span()));
|
||||
language_.set(language.span());
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the script subtag. The input must be a valid script subtag.
|
||||
*/
|
||||
template <size_t N>
|
||||
void setScript(const char (&script)[N]) {
|
||||
mozilla::Span<const char> span(script, N - 1);
|
||||
MOZ_ASSERT(IsStructurallyValidScriptTag(span));
|
||||
script_.set(span);
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the script subtag. The input must be a valid script subtag or the empty
|
||||
* string.
|
||||
*/
|
||||
void setScript(const ScriptSubtag& script) {
|
||||
MOZ_ASSERT(script.missing() || IsStructurallyValidScriptTag(script.span()));
|
||||
script_.set(script.span());
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the region subtag. The input must be a valid region subtag.
|
||||
*/
|
||||
template <size_t N>
|
||||
void setRegion(const char (®ion)[N]) {
|
||||
mozilla::Span<const char> span(region, N - 1);
|
||||
MOZ_ASSERT(IsStructurallyValidRegionTag(span));
|
||||
region_.set(span);
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the region subtag. The input must be a valid region subtag or the empty
|
||||
* empty string.
|
||||
*/
|
||||
void setRegion(const RegionSubtag& region) {
|
||||
MOZ_ASSERT(region.missing() || IsStructurallyValidRegionTag(region.span()));
|
||||
region_.set(region.span());
|
||||
}
|
||||
|
||||
/**
|
||||
* Removes all variant subtags.
|
||||
*/
|
||||
void clearVariants() { variants_.clearAndFree(); }
|
||||
|
||||
/**
|
||||
* Set the Unicode extension subtag. The input must be a valid Unicode
|
||||
* extension subtag.
|
||||
*/
|
||||
bool setUnicodeExtension(JS::UniqueChars extension);
|
||||
|
||||
/**
|
||||
* Remove any Unicode extension subtag if present.
|
||||
*/
|
||||
void clearUnicodeExtension();
|
||||
|
||||
/**
|
||||
* Set the private-use subtag. The input must be a valid private-use subtag
|
||||
* or nullptr.
|
||||
*/
|
||||
void setPrivateuse(JS::UniqueChars privateuse) {
|
||||
MOZ_ASSERT(!privateuse ||
|
||||
IsStructurallyValidPrivateUseTag(
|
||||
{privateuse.get(), strlen(privateuse.get())}));
|
||||
privateuse_ = std::move(privateuse);
|
||||
}
|
||||
|
||||
/** Canonicalize the base-name (language, script, region, variant) subtags. */
|
||||
bool canonicalizeBaseName(JSContext* cx);
|
||||
|
||||
/**
|
||||
* Canonicalize all extension subtags.
|
||||
*/
|
||||
bool canonicalizeExtensions(JSContext* cx);
|
||||
|
||||
/**
|
||||
* Canonicalizes the given structurally valid Unicode BCP 47 locale
|
||||
* identifier, including regularized case of subtags. For example, the
|
||||
* language tag Zh-haNS-bu-variant2-Variant1-u-ca-chinese-t-Zh-laTN-x-PRIVATE,
|
||||
* where
|
||||
*
|
||||
* Zh ; 2*3ALPHA
|
||||
* -haNS ; ["-" script]
|
||||
* -bu ; ["-" region]
|
||||
* -variant2 ; *("-" variant)
|
||||
* -Variant1
|
||||
* -u-ca-chinese ; *("-" extension)
|
||||
* -t-Zh-laTN
|
||||
* -x-PRIVATE ; ["-" privateuse]
|
||||
*
|
||||
* becomes zh-Hans-MM-variant1-variant2-t-zh-latn-u-ca-chinese-x-private
|
||||
*
|
||||
* Spec: ECMAScript Internationalization API Specification, 6.2.3.
|
||||
*/
|
||||
bool canonicalize(JSContext* cx) {
|
||||
return canonicalizeBaseName(cx) && canonicalizeExtensions(cx);
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the string representation of this language tag.
|
||||
*/
|
||||
JSString* toString(JSContext* cx) const;
|
||||
|
||||
/**
|
||||
* Return the string representation of this language tag as a null-terminated
|
||||
* C-string.
|
||||
*/
|
||||
JS::UniqueChars toStringZ(JSContext* cx) const;
|
||||
|
||||
/**
|
||||
* Add likely-subtags to the language tag.
|
||||
*
|
||||
* Spec: <https://www.unicode.org/reports/tr35/#Likely_Subtags>
|
||||
*/
|
||||
bool addLikelySubtags(JSContext* cx);
|
||||
|
||||
/**
|
||||
* Remove likely-subtags from the language tag.
|
||||
*
|
||||
* Spec: <https://www.unicode.org/reports/tr35/#Likely_Subtags>
|
||||
*/
|
||||
bool removeLikelySubtags(JSContext* cx);
|
||||
};
|
||||
|
||||
/**
|
||||
* Parser for Unicode BCP 47 locale identifiers.
|
||||
*
|
||||
* <https://unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers>
|
||||
*/
|
||||
class MOZ_STACK_CLASS LanguageTagParser final {
|
||||
public:
|
||||
// Exposed as |public| for |MOZ_MAKE_ENUM_CLASS_BITWISE_OPERATORS|.
|
||||
enum class TokenKind : uint8_t {
|
||||
None = 0b000,
|
||||
Alpha = 0b001,
|
||||
Digit = 0b010,
|
||||
AlphaDigit = 0b011,
|
||||
Error = 0b100
|
||||
};
|
||||
|
||||
private:
|
||||
class Token final {
|
||||
size_t index_;
|
||||
size_t length_;
|
||||
TokenKind kind_;
|
||||
|
||||
public:
|
||||
Token(TokenKind kind, size_t index, size_t length)
|
||||
: index_(index), length_(length), kind_(kind) {}
|
||||
|
||||
TokenKind kind() const { return kind_; }
|
||||
size_t index() const { return index_; }
|
||||
size_t length() const { return length_; }
|
||||
|
||||
bool isError() const { return kind_ == TokenKind::Error; }
|
||||
bool isNone() const { return kind_ == TokenKind::None; }
|
||||
bool isAlpha() const { return kind_ == TokenKind::Alpha; }
|
||||
bool isDigit() const { return kind_ == TokenKind::Digit; }
|
||||
bool isAlphaDigit() const { return kind_ == TokenKind::AlphaDigit; }
|
||||
};
|
||||
|
||||
using LocaleChars = mozilla::Variant<const JS::Latin1Char*, const char16_t*>;
|
||||
|
||||
const LocaleChars& locale_;
|
||||
size_t length_;
|
||||
size_t index_ = 0;
|
||||
|
||||
LanguageTagParser(const LocaleChars& locale, size_t length)
|
||||
: locale_(locale), length_(length) {}
|
||||
|
||||
char16_t charAtUnchecked(size_t index) const {
|
||||
if (locale_.is<const JS::Latin1Char*>()) {
|
||||
return locale_.as<const JS::Latin1Char*>()[index];
|
||||
}
|
||||
return locale_.as<const char16_t*>()[index];
|
||||
}
|
||||
|
||||
char charAt(size_t index) const {
|
||||
char16_t c = charAtUnchecked(index);
|
||||
MOZ_ASSERT(mozilla::IsAscii(c));
|
||||
return c;
|
||||
}
|
||||
|
||||
// Copy the token characters into |subtag|.
|
||||
template <size_t N>
|
||||
void copyChars(const Token& tok, LanguageTagSubtag<N>& subtag) const {
|
||||
size_t index = tok.index();
|
||||
size_t length = tok.length();
|
||||
if (locale_.is<const JS::Latin1Char*>()) {
|
||||
using T = const JS::Latin1Char;
|
||||
subtag.set(mozilla::Span(locale_.as<T*>() + index, length));
|
||||
} else {
|
||||
using T = const char16_t;
|
||||
subtag.set(mozilla::Span(locale_.as<T*>() + index, length));
|
||||
}
|
||||
}
|
||||
|
||||
// Create a string copy of |length| characters starting at |index|.
|
||||
JS::UniqueChars chars(JSContext* cx, size_t index, size_t length) const;
|
||||
|
||||
// Create a string copy of the token characters.
|
||||
JS::UniqueChars chars(JSContext* cx, const Token& tok) const {
|
||||
return chars(cx, tok.index(), tok.length());
|
||||
}
|
||||
|
||||
JS::UniqueChars extension(JSContext* cx, const Token& start,
|
||||
const Token& end) const {
|
||||
MOZ_ASSERT(start.index() < end.index());
|
||||
|
||||
size_t length = end.index() - 1 - start.index();
|
||||
return chars(cx, start.index(), length);
|
||||
}
|
||||
|
||||
Token nextToken();
|
||||
|
||||
// unicode_language_subtag = alpha{2,3} | alpha{5,8} ;
|
||||
//
|
||||
// Four character language subtags are not allowed in Unicode BCP 47 locale
|
||||
// identifiers. Also see the comparison to Unicode CLDR locale identifiers in
|
||||
// <https://unicode.org/reports/tr35/#BCP_47_Conformance>.
|
||||
bool isLanguage(const Token& tok) const {
|
||||
return tok.isAlpha() && ((2 <= tok.length() && tok.length() <= 3) ||
|
||||
(5 <= tok.length() && tok.length() <= 8));
|
||||
}
|
||||
|
||||
// unicode_script_subtag = alpha{4} ;
|
||||
bool isScript(const Token& tok) const {
|
||||
return tok.isAlpha() && tok.length() == 4;
|
||||
}
|
||||
|
||||
// unicode_region_subtag = (alpha{2} | digit{3}) ;
|
||||
bool isRegion(const Token& tok) const {
|
||||
return (tok.isAlpha() && tok.length() == 2) ||
|
||||
(tok.isDigit() && tok.length() == 3);
|
||||
}
|
||||
|
||||
// unicode_variant_subtag = (alphanum{5,8} | digit alphanum{3}) ;
|
||||
bool isVariant(const Token& tok) const {
|
||||
return (5 <= tok.length() && tok.length() <= 8) ||
|
||||
(tok.length() == 4 && mozilla::IsAsciiDigit(charAt(tok.index())));
|
||||
}
|
||||
|
||||
// Returns the code unit of the first character at the given singleton token.
|
||||
// Always returns the lower case form of an alphabetical character.
|
||||
char singletonKey(const Token& tok) const {
|
||||
MOZ_ASSERT(tok.length() == 1);
|
||||
return AsciiToLowerCase(charAt(tok.index()));
|
||||
}
|
||||
|
||||
// extensions = unicode_locale_extensions |
|
||||
// transformed_extensions |
|
||||
// other_extensions ;
|
||||
//
|
||||
// unicode_locale_extensions = sep [uU] ((sep keyword)+ |
|
||||
// (sep attribute)+ (sep keyword)*) ;
|
||||
//
|
||||
// transformed_extensions = sep [tT] ((sep tlang (sep tfield)*) |
|
||||
// (sep tfield)+) ;
|
||||
//
|
||||
// other_extensions = sep [alphanum-[tTuUxX]] (sep alphanum{2,8})+ ;
|
||||
bool isExtensionStart(const Token& tok) const {
|
||||
return tok.length() == 1 && singletonKey(tok) != 'x';
|
||||
}
|
||||
|
||||
// other_extensions = sep [alphanum-[tTuUxX]] (sep alphanum{2,8})+ ;
|
||||
bool isOtherExtensionPart(const Token& tok) const {
|
||||
return 2 <= tok.length() && tok.length() <= 8;
|
||||
}
|
||||
|
||||
// unicode_locale_extensions = sep [uU] ((sep keyword)+ |
|
||||
// (sep attribute)+ (sep keyword)*) ;
|
||||
// keyword = key (sep type)? ;
|
||||
bool isUnicodeExtensionPart(const Token& tok) const {
|
||||
return isUnicodeExtensionKey(tok) || isUnicodeExtensionType(tok) ||
|
||||
isUnicodeExtensionAttribute(tok);
|
||||
}
|
||||
|
||||
// attribute = alphanum{3,8} ;
|
||||
bool isUnicodeExtensionAttribute(const Token& tok) const {
|
||||
return 3 <= tok.length() && tok.length() <= 8;
|
||||
}
|
||||
|
||||
// key = alphanum alpha ;
|
||||
bool isUnicodeExtensionKey(const Token& tok) const {
|
||||
return tok.length() == 2 && mozilla::IsAsciiAlpha(charAt(tok.index() + 1));
|
||||
}
|
||||
|
||||
// type = alphanum{3,8} (sep alphanum{3,8})* ;
|
||||
bool isUnicodeExtensionType(const Token& tok) const {
|
||||
return 3 <= tok.length() && tok.length() <= 8;
|
||||
}
|
||||
|
||||
// tkey = alpha digit ;
|
||||
bool isTransformExtensionKey(const Token& tok) const {
|
||||
return tok.length() == 2 && mozilla::IsAsciiAlpha(charAt(tok.index())) &&
|
||||
mozilla::IsAsciiDigit(charAt(tok.index() + 1));
|
||||
}
|
||||
|
||||
// tvalue = (sep alphanum{3,8})+ ;
|
||||
bool isTransformExtensionPart(const Token& tok) const {
|
||||
return 3 <= tok.length() && tok.length() <= 8;
|
||||
}
|
||||
|
||||
// pu_extensions = sep [xX] (sep alphanum{1,8})+ ;
|
||||
bool isPrivateUseStart(const Token& tok) const {
|
||||
return tok.length() == 1 && singletonKey(tok) == 'x';
|
||||
}
|
||||
|
||||
// pu_extensions = sep [xX] (sep alphanum{1,8})+ ;
|
||||
bool isPrivateUsePart(const Token& tok) const {
|
||||
return 1 <= tok.length() && tok.length() <= 8;
|
||||
}
|
||||
|
||||
// Helper function for use in |parseBaseName| and
|
||||
// |parseTlangInTransformExtension|. Do not use this directly!
|
||||
static JS::Result<bool> internalParseBaseName(JSContext* cx,
|
||||
LanguageTagParser& ts,
|
||||
LanguageTag& tag, Token& tok);
|
||||
|
||||
// Parse the `unicode_language_id` production, i.e. the
|
||||
// language/script/region/variants portion of a language tag, into |tag|.
|
||||
// |tok| must be the current token.
|
||||
static JS::Result<bool> parseBaseName(JSContext* cx, LanguageTagParser& ts,
|
||||
LanguageTag& tag, Token& tok) {
|
||||
return internalParseBaseName(cx, ts, tag, tok);
|
||||
}
|
||||
|
||||
// Parse the `tlang` production within a parsed 't' transform extension.
|
||||
// The precise requirements for "previously parsed" are:
|
||||
//
|
||||
// * the input begins from current token |tok| with a valid `tlang`
|
||||
// * the `tlang` is wholly lowercase (*not* canonical case)
|
||||
// * variant subtags in the `tlang` may contain duplicates and be
|
||||
// unordered
|
||||
//
|
||||
// Return an error on internal failure. Otherwise, return a success value. If
|
||||
// there was no `tlang`, then |tag.language().missing()|. But if there was a
|
||||
// `tlang`, then |tag| is filled with subtags exactly as they appeared in the
|
||||
// parse input.
|
||||
static JS::Result<JS::Ok> parseTlangInTransformExtension(
|
||||
JSContext* cx, LanguageTagParser& ts, LanguageTag& tag, Token& tok) {
|
||||
MOZ_ASSERT(ts.isLanguage(tok));
|
||||
return internalParseBaseName(cx, ts, tag, tok).map([](bool parsed) {
|
||||
MOZ_ASSERT(parsed);
|
||||
return JS::Ok();
|
||||
});
|
||||
}
|
||||
|
||||
friend class LanguageTag;
|
||||
|
||||
class Range final {
|
||||
size_t begin_;
|
||||
size_t length_;
|
||||
|
||||
public:
|
||||
Range(size_t begin, size_t length) : begin_(begin), length_(length) {}
|
||||
|
||||
template <typename T>
|
||||
T* begin(T* ptr) const {
|
||||
return ptr + begin_;
|
||||
}
|
||||
|
||||
size_t length() const { return length_; }
|
||||
};
|
||||
|
||||
using TFieldVector = js::Vector<Range, 8>;
|
||||
using AttributesVector = js::Vector<Range, 8>;
|
||||
using KeywordsVector = js::Vector<Range, 8>;
|
||||
|
||||
// Parse |extension|, which must be a validated, fully lowercase
|
||||
// `transformed_extensions` subtag, and fill |tag| and |fields| from the
|
||||
// `tlang` and `tfield` components. Data in |tag| is lowercase, consistent
|
||||
// with |extension|.
|
||||
static JS::Result<bool> parseTransformExtension(
|
||||
JSContext* cx, mozilla::Span<const char> extension, LanguageTag& tag,
|
||||
TFieldVector& fields);
|
||||
|
||||
// Parse |extension|, which must be a validated, fully lowercase
|
||||
// `unicode_locale_extensions` subtag, and fill |attributes| and |keywords|
|
||||
// from the `attribute` and `keyword` components.
|
||||
static JS::Result<bool> parseUnicodeExtension(
|
||||
JSContext* cx, mozilla::Span<const char> extension,
|
||||
AttributesVector& attributes, KeywordsVector& keywords);
|
||||
|
||||
static JS::Result<bool> tryParse(JSContext* cx, LocaleChars& localeChars,
|
||||
size_t localeLength, LanguageTag& tag);
|
||||
|
||||
public:
|
||||
// Parse the input string as a language tag. Reports an error to the context
|
||||
// if the input can't be parsed completely.
|
||||
static bool parse(JSContext* cx, JSLinearString* locale, LanguageTag& tag);
|
||||
|
||||
// Parse the input string as a language tag. Reports an error to the context
|
||||
// if the input can't be parsed completely.
|
||||
static bool parse(JSContext* cx, mozilla::Span<const char> locale,
|
||||
LanguageTag& tag);
|
||||
|
||||
// Parse the input string as a language tag. Returns Ok(true) if the input
|
||||
// could be completely parsed, Ok(false) if the input couldn't be parsed,
|
||||
// or Err() in case of internal error.
|
||||
static JS::Result<bool> tryParse(JSContext* cx, JSLinearString* locale,
|
||||
LanguageTag& tag);
|
||||
|
||||
// Parse the input string as a language tag. Returns Ok(true) if the input
|
||||
// could be completely parsed, Ok(false) if the input couldn't be parsed,
|
||||
// or Err() in case of internal error.
|
||||
static JS::Result<bool> tryParse(JSContext* cx,
|
||||
mozilla::Span<const char> locale,
|
||||
LanguageTag& tag);
|
||||
|
||||
// Parse the input string as the base-name parts (language, script, region,
|
||||
// variants) of a language tag. Ignores any trailing characters.
|
||||
static bool parseBaseName(JSContext* cx, mozilla::Span<const char> locale,
|
||||
LanguageTag& tag);
|
||||
|
||||
// Parse the input string as the base-name parts (language, script, region,
|
||||
// variants) of a language tag. Returns Ok(true) if the input could be
|
||||
// completely parsed, Ok(false) if the input couldn't be parsed, or Err() in
|
||||
// case of internal error.
|
||||
static JS::Result<bool> tryParseBaseName(JSContext* cx,
|
||||
JSLinearString* locale,
|
||||
LanguageTag& tag);
|
||||
|
||||
// Return true iff |extension| can be parsed as a Unicode extension subtag.
|
||||
static bool canParseUnicodeExtension(mozilla::Span<const char> extension);
|
||||
|
||||
// Return true iff |unicodeType| can be parsed as a Unicode extension type.
|
||||
static bool canParseUnicodeExtensionType(JSLinearString* unicodeType);
|
||||
};
|
||||
|
||||
MOZ_MAKE_ENUM_CLASS_BITWISE_OPERATORS(LanguageTagParser::TokenKind)
|
||||
|
||||
/**
|
||||
* Parse a string as a standalone |language| tag. If |str| is a standalone
|
||||
* language tag, store it in |result| and return true. Otherwise return false.
|
||||
*/
|
||||
[[nodiscard]] bool ParseStandaloneLanguageTag(
|
||||
JS::Handle<JSLinearString*> str, mozilla::intl::LanguageSubtag& result);
|
||||
[[nodiscard]] bool ParseStandaloneLanguageTag(JS::Handle<JSLinearString*> str,
|
||||
LanguageSubtag& result);
|
||||
|
||||
/**
|
||||
* Parse a string as a standalone |script| tag. If |str| is a standalone script
|
||||
* tag, store it in |result| and return true. Otherwise return false.
|
||||
*/
|
||||
[[nodiscard]] bool ParseStandaloneScriptTag(
|
||||
JS::Handle<JSLinearString*> str, mozilla::intl::ScriptSubtag& result);
|
||||
[[nodiscard]] bool ParseStandaloneScriptTag(JS::Handle<JSLinearString*> str,
|
||||
ScriptSubtag& result);
|
||||
|
||||
/**
|
||||
* Parse a string as a standalone |region| tag. If |str| is a standalone region
|
||||
* tag, store it in |result| and return true. Otherwise return false.
|
||||
*/
|
||||
[[nodiscard]] bool ParseStandaloneRegionTag(
|
||||
JS::Handle<JSLinearString*> str, mozilla::intl::RegionSubtag& result);
|
||||
[[nodiscard]] bool ParseStandaloneRegionTag(JS::Handle<JSLinearString*> str,
|
||||
RegionSubtag& result);
|
||||
|
||||
/**
|
||||
* Parse a string as an ISO-639 language code. Return |nullptr| in the result if
|
||||
@ -65,15 +744,13 @@ JS::Result<JSString*> ParseStandaloneISO639LanguageTag(
|
||||
JSContext* cx, JS::Handle<JSLinearString*> str);
|
||||
|
||||
class UnicodeExtensionKeyword final {
|
||||
char key_[mozilla::intl::LanguageTagLimits::UnicodeKeyLength];
|
||||
char key_[LanguageTagLimits::UnicodeKeyLength];
|
||||
JSLinearString* type_;
|
||||
|
||||
public:
|
||||
using UnicodeKey =
|
||||
const char (&)[mozilla::intl::LanguageTagLimits::UnicodeKeyLength + 1];
|
||||
using UnicodeKey = const char (&)[LanguageTagLimits::UnicodeKeyLength + 1];
|
||||
using UnicodeKeySpan =
|
||||
mozilla::Span<const char,
|
||||
mozilla::intl::LanguageTagLimits::UnicodeKeyLength>;
|
||||
mozilla::Span<const char, LanguageTagLimits::UnicodeKeyLength>;
|
||||
|
||||
UnicodeExtensionKeyword(UnicodeKey key, JSLinearString* type)
|
||||
: key_{key[0], key[1]}, type_(type) {}
|
||||
@ -85,7 +762,7 @@ class UnicodeExtensionKeyword final {
|
||||
};
|
||||
|
||||
[[nodiscard]] extern bool ApplyUnicodeExtensionToTag(
|
||||
JSContext* cx, mozilla::intl::Locale& tag,
|
||||
JSContext* cx, LanguageTag& tag,
|
||||
JS::HandleVector<UnicodeExtensionKeyword> keywords);
|
||||
|
||||
} // namespace intl
|
||||
|
@ -13,36 +13,39 @@
|
||||
#include <string>
|
||||
#include <type_traits>
|
||||
|
||||
#include "mozilla/intl/Locale.h"
|
||||
#include "builtin/intl/LanguageTag.h"
|
||||
#include "util/Text.h"
|
||||
#include "vm/JSContext.h"
|
||||
|
||||
using namespace mozilla::intl::LanguageTagLimits;
|
||||
using namespace js::intl::LanguageTagLimits;
|
||||
|
||||
template <size_t Length, size_t TagLength, size_t SubtagLength>
|
||||
static inline bool HasReplacement(
|
||||
const char (&subtags)[Length][TagLength],
|
||||
const mozilla::intl::LanguageTagSubtag<SubtagLength>& subtag) {
|
||||
const js::intl::LanguageTagSubtag<SubtagLength>& subtag) {
|
||||
MOZ_ASSERT(subtag.length() == TagLength - 1,
|
||||
"subtag must have the same length as the list of subtags");
|
||||
|
||||
const char* ptr = subtag.span().data();
|
||||
return std::binary_search(std::begin(subtags), std::end(subtags), ptr,
|
||||
[](const char* a, const char* b) {
|
||||
return memcmp(a, b, TagLength - 1) < 0;
|
||||
});
|
||||
return memcmp(a, b, TagLength - 1) < 0;
|
||||
});
|
||||
}
|
||||
|
||||
template <size_t Length, size_t TagLength, size_t SubtagLength>
|
||||
static inline const char* SearchReplacement(
|
||||
const char (&subtags)[Length][TagLength], const char* (&aliases)[Length],
|
||||
const mozilla::intl::LanguageTagSubtag<SubtagLength>& subtag) {
|
||||
const char (&subtags)[Length][TagLength],
|
||||
const char* (&aliases)[Length],
|
||||
const js::intl::LanguageTagSubtag<SubtagLength>& subtag) {
|
||||
MOZ_ASSERT(subtag.length() == TagLength - 1,
|
||||
"subtag must have the same length as the list of subtags");
|
||||
|
||||
const char* ptr = subtag.span().data();
|
||||
auto p = std::lower_bound(std::begin(subtags), std::end(subtags), ptr,
|
||||
[](const char* a, const char* b) {
|
||||
return memcmp(a, b, TagLength - 1) < 0;
|
||||
});
|
||||
return memcmp(a, b, TagLength - 1) < 0;
|
||||
});
|
||||
if (p != std::end(subtags) && memcmp(*p, ptr, TagLength - 1) == 0) {
|
||||
return aliases[std::distance(std::begin(subtags), p)];
|
||||
}
|
||||
@ -59,23 +62,32 @@ static bool IsAsciiLowercaseAlphanumericOrDash(char c) {
|
||||
}
|
||||
|
||||
static bool IsCanonicallyCasedLanguageTag(mozilla::Span<const char> span) {
|
||||
return std::all_of(span.begin(), span.end(),
|
||||
mozilla::IsAsciiLowercaseAlpha<char>);
|
||||
// Tell the analysis the |std::all_of| function can't GC.
|
||||
JS::AutoSuppressGCAnalysis nogc;
|
||||
|
||||
return std::all_of(span.begin(), span.end(), mozilla::IsAsciiLowercaseAlpha<char>);
|
||||
}
|
||||
|
||||
static bool IsCanonicallyCasedScriptTag(mozilla::Span<const char> span) {
|
||||
// Tell the analysis the |std::all_of| function can't GC.
|
||||
JS::AutoSuppressGCAnalysis nogc;
|
||||
|
||||
return mozilla::IsAsciiUppercaseAlpha(span[0]) &&
|
||||
std::all_of(span.begin() + 1, span.end(),
|
||||
mozilla::IsAsciiLowercaseAlpha<char>);
|
||||
std::all_of(span.begin() + 1, span.end(), mozilla::IsAsciiLowercaseAlpha<char>);
|
||||
}
|
||||
|
||||
static bool IsCanonicallyCasedRegionTag(mozilla::Span<const char> span) {
|
||||
return std::all_of(span.begin(), span.end(),
|
||||
mozilla::IsAsciiUppercaseAlpha<char>) ||
|
||||
// Tell the analysis the |std::all_of| function can't GC.
|
||||
JS::AutoSuppressGCAnalysis nogc;
|
||||
|
||||
return std::all_of(span.begin(), span.end(), mozilla::IsAsciiUppercaseAlpha<char>) ||
|
||||
std::all_of(span.begin(), span.end(), mozilla::IsAsciiDigit<char>);
|
||||
}
|
||||
|
||||
static bool IsCanonicallyCasedVariantTag(mozilla::Span<const char> span) {
|
||||
// Tell the analysis the |std::all_of| function can't GC.
|
||||
JS::AutoSuppressGCAnalysis nogc;
|
||||
|
||||
return std::all_of(span.begin(), span.end(), IsAsciiLowercaseAlphanumeric);
|
||||
}
|
||||
|
||||
@ -84,8 +96,7 @@ static bool IsCanonicallyCasedUnicodeKey(mozilla::Span<const char> key) {
|
||||
}
|
||||
|
||||
static bool IsCanonicallyCasedUnicodeType(mozilla::Span<const char> type) {
|
||||
return std::all_of(type.begin(), type.end(),
|
||||
IsAsciiLowercaseAlphanumericOrDash);
|
||||
return std::all_of(type.begin(), type.end(), IsAsciiLowercaseAlphanumericOrDash);
|
||||
}
|
||||
|
||||
static bool IsCanonicallyCasedTransformKey(mozilla::Span<const char> key) {
|
||||
@ -93,15 +104,14 @@ static bool IsCanonicallyCasedTransformKey(mozilla::Span<const char> key) {
|
||||
}
|
||||
|
||||
static bool IsCanonicallyCasedTransformType(mozilla::Span<const char> type) {
|
||||
return std::all_of(type.begin(), type.end(),
|
||||
IsAsciiLowercaseAlphanumericOrDash);
|
||||
return std::all_of(type.begin(), type.end(), IsAsciiLowercaseAlphanumericOrDash);
|
||||
}
|
||||
#endif
|
||||
|
||||
// Mappings from language subtags to preferred values.
|
||||
// Derived from CLDR Supplemental Data, version 39.
|
||||
// https://unicode.org/Public/cldr/39/core.zip
|
||||
bool mozilla::intl::Locale::languageMapping(LanguageSubtag& language) {
|
||||
bool js::intl::LanguageTag::languageMapping(LanguageSubtag& language) {
|
||||
MOZ_ASSERT(IsStructurallyValidLanguageTag(language.span()));
|
||||
MOZ_ASSERT(IsCanonicallyCasedLanguageTag(language.span()));
|
||||
|
||||
@ -221,7 +231,7 @@ bool mozilla::intl::Locale::languageMapping(LanguageSubtag& language) {
|
||||
// Language subtags with complex mappings.
|
||||
// Derived from CLDR Supplemental Data, version 39.
|
||||
// https://unicode.org/Public/cldr/39/core.zip
|
||||
bool mozilla::intl::Locale::complexLanguageMapping(const LanguageSubtag& language) {
|
||||
bool js::intl::LanguageTag::complexLanguageMapping(const LanguageSubtag& language) {
|
||||
MOZ_ASSERT(IsStructurallyValidLanguageTag(language.span()));
|
||||
MOZ_ASSERT(IsCanonicallyCasedLanguageTag(language.span()));
|
||||
|
||||
@ -243,7 +253,7 @@ bool mozilla::intl::Locale::complexLanguageMapping(const LanguageSubtag& languag
|
||||
// Mappings from script subtags to preferred values.
|
||||
// Derived from CLDR Supplemental Data, version 39.
|
||||
// https://unicode.org/Public/cldr/39/core.zip
|
||||
bool mozilla::intl::Locale::scriptMapping(ScriptSubtag& script) {
|
||||
bool js::intl::LanguageTag::scriptMapping(ScriptSubtag& script) {
|
||||
MOZ_ASSERT(IsStructurallyValidScriptTag(script.span()));
|
||||
MOZ_ASSERT(IsCanonicallyCasedScriptTag(script.span()));
|
||||
|
||||
@ -259,7 +269,7 @@ bool mozilla::intl::Locale::scriptMapping(ScriptSubtag& script) {
|
||||
// Mappings from region subtags to preferred values.
|
||||
// Derived from CLDR Supplemental Data, version 39.
|
||||
// https://unicode.org/Public/cldr/39/core.zip
|
||||
bool mozilla::intl::Locale::regionMapping(RegionSubtag& region) {
|
||||
bool js::intl::LanguageTag::regionMapping(RegionSubtag& region) {
|
||||
MOZ_ASSERT(IsStructurallyValidRegionTag(region.span()));
|
||||
MOZ_ASSERT(IsCanonicallyCasedRegionTag(region.span()));
|
||||
|
||||
@ -359,7 +369,7 @@ bool mozilla::intl::Locale::regionMapping(RegionSubtag& region) {
|
||||
// Region subtags with complex mappings.
|
||||
// Derived from CLDR Supplemental Data, version 39.
|
||||
// https://unicode.org/Public/cldr/39/core.zip
|
||||
bool mozilla::intl::Locale::complexRegionMapping(const RegionSubtag& region) {
|
||||
bool js::intl::LanguageTag::complexRegionMapping(const RegionSubtag& region) {
|
||||
MOZ_ASSERT(IsStructurallyValidRegionTag(region.span()));
|
||||
MOZ_ASSERT(IsCanonicallyCasedRegionTag(region.span()));
|
||||
|
||||
@ -382,7 +392,7 @@ bool mozilla::intl::Locale::complexRegionMapping(const RegionSubtag& region) {
|
||||
// Language subtags with complex mappings.
|
||||
// Derived from CLDR Supplemental Data, version 39.
|
||||
// https://unicode.org/Public/cldr/39/core.zip
|
||||
void mozilla::intl::Locale::performComplexLanguageMappings() {
|
||||
void js::intl::LanguageTag::performComplexLanguageMappings() {
|
||||
MOZ_ASSERT(IsStructurallyValidLanguageTag(language().span()));
|
||||
MOZ_ASSERT(IsCanonicallyCasedLanguageTag(language().span()));
|
||||
|
||||
@ -418,7 +428,7 @@ void mozilla::intl::Locale::performComplexLanguageMappings() {
|
||||
// Region subtags with complex mappings.
|
||||
// Derived from CLDR Supplemental Data, version 39.
|
||||
// https://unicode.org/Public/cldr/39/core.zip
|
||||
void mozilla::intl::Locale::performComplexRegionMappings() {
|
||||
void js::intl::LanguageTag::performComplexRegionMappings() {
|
||||
MOZ_ASSERT(IsStructurallyValidLanguageTag(language().span()));
|
||||
MOZ_ASSERT(IsCanonicallyCasedLanguageTag(language().span()));
|
||||
MOZ_ASSERT(IsStructurallyValidRegionTag(region().span()));
|
||||
@ -624,7 +634,7 @@ static const char* ToCharPointer(const char* str) {
|
||||
return str;
|
||||
}
|
||||
|
||||
static const char* ToCharPointer(const mozilla::intl::UniqueChars& str) {
|
||||
static const char* ToCharPointer(const js::UniqueChars& str) {
|
||||
return str.get();
|
||||
}
|
||||
|
||||
@ -636,7 +646,7 @@ static bool IsLessThan(const T& a, const U& b) {
|
||||
// Mappings from variant subtags to preferred values.
|
||||
// Derived from CLDR Supplemental Data, version 39.
|
||||
// https://unicode.org/Public/cldr/39/core.zip
|
||||
bool mozilla::intl::Locale::performVariantMappings() {
|
||||
bool js::intl::LanguageTag::performVariantMappings(JSContext* cx) {
|
||||
// The variant subtags need to be sorted for binary search.
|
||||
MOZ_ASSERT(std::is_sorted(variants_.begin(), variants_.end(),
|
||||
IsLessThan<decltype(variants_)::ElementType>));
|
||||
@ -646,9 +656,9 @@ bool mozilla::intl::Locale::performVariantMappings() {
|
||||
};
|
||||
|
||||
auto insertVariantSortedIfNotPresent = [&](const char* variant) {
|
||||
auto* p = std::lower_bound(
|
||||
variants_.begin(), variants_.end(), variant,
|
||||
IsLessThan<decltype(variants_)::ElementType, decltype(variant)>);
|
||||
auto* p = std::lower_bound(variants_.begin(), variants_.end(), variant,
|
||||
IsLessThan<decltype(variants_)::ElementType,
|
||||
decltype(variant)>);
|
||||
|
||||
// Don't insert the replacement when already present.
|
||||
if (p != variants_.end() && strcmp(p->get(), variant) == 0) {
|
||||
@ -656,11 +666,14 @@ bool mozilla::intl::Locale::performVariantMappings() {
|
||||
}
|
||||
|
||||
// Insert the preferred variant in sort order.
|
||||
auto preferred = DuplicateStringToUniqueChars(variant);
|
||||
auto preferred = DuplicateString(cx, variant);
|
||||
if (!preferred) {
|
||||
return false;
|
||||
}
|
||||
return !!variants_.insert(p, std::move(preferred));
|
||||
};
|
||||
|
||||
for (size_t i = 0; i < variants_.length();) {
|
||||
for (size_t i = 0; i < variants_.length(); ) {
|
||||
const char* variant = variants_[i].get();
|
||||
MOZ_ASSERT(IsCanonicallyCasedVariantTag(mozilla::MakeStringSpan(variant)));
|
||||
|
||||
@ -700,7 +713,7 @@ bool mozilla::intl::Locale::performVariantMappings() {
|
||||
// Canonicalize legacy locale identifiers.
|
||||
// Derived from CLDR Supplemental Data, version 39.
|
||||
// https://unicode.org/Public/cldr/39/core.zip
|
||||
bool mozilla::intl::Locale::updateLegacyMappings() {
|
||||
bool js::intl::LanguageTag::updateLegacyMappings(JSContext* cx) {
|
||||
// We're mapping legacy tags to non-legacy form here.
|
||||
// Other tags remain unchanged.
|
||||
//
|
||||
@ -715,10 +728,8 @@ bool mozilla::intl::Locale::updateLegacyMappings() {
|
||||
}
|
||||
|
||||
for ([[maybe_unused]] const auto& variant : variants()) {
|
||||
MOZ_ASSERT(
|
||||
IsStructurallyValidVariantTag(mozilla::MakeStringSpan(variant.get())));
|
||||
MOZ_ASSERT(
|
||||
IsCanonicallyCasedVariantTag(mozilla::MakeStringSpan(variant.get())));
|
||||
MOZ_ASSERT(IsStructurallyValidVariantTag(mozilla::MakeStringSpan(variant.get())));
|
||||
MOZ_ASSERT(IsCanonicallyCasedVariantTag(mozilla::MakeStringSpan(variant.get())));
|
||||
}
|
||||
|
||||
// The variant subtags need to be sorted for binary search.
|
||||
@ -747,7 +758,10 @@ bool mozilla::intl::Locale::updateLegacyMappings() {
|
||||
}
|
||||
|
||||
// Insert the preferred variant in sort order.
|
||||
auto preferred = DuplicateStringToUniqueChars(variant);
|
||||
auto preferred = DuplicateString(cx, variant);
|
||||
if (!preferred) {
|
||||
return false;
|
||||
}
|
||||
return !!variants_.insert(p, std::move(preferred));
|
||||
};
|
||||
|
||||
@ -858,7 +872,7 @@ bool mozilla::intl::Locale::updateLegacyMappings() {
|
||||
// Mappings from legacy sign languages.
|
||||
// Derived from CLDR Supplemental Data, version 39.
|
||||
// https://unicode.org/Public/cldr/39/core.zip
|
||||
bool mozilla::intl::Locale::signLanguageMapping(LanguageSubtag& language,
|
||||
bool js::intl::LanguageTag::signLanguageMapping(LanguageSubtag& language,
|
||||
const RegionSubtag& region) {
|
||||
MOZ_ASSERT(language.equalTo("sgn"));
|
||||
MOZ_ASSERT(IsStructurallyValidRegionTag(region.span()));
|
||||
@ -904,14 +918,16 @@ bool mozilla::intl::Locale::signLanguageMapping(LanguageSubtag& language,
|
||||
}
|
||||
|
||||
template <size_t Length>
|
||||
static inline bool IsUnicodeKey(mozilla::Span<const char> key, const char (&str)[Length]) {
|
||||
static inline bool IsUnicodeKey(
|
||||
mozilla::Span<const char> key, const char (&str)[Length]) {
|
||||
static_assert(Length == UnicodeKeyLength + 1,
|
||||
"Unicode extension key is two characters long");
|
||||
return memcmp(key.data(), str, Length - 1) == 0;
|
||||
}
|
||||
|
||||
template <size_t Length>
|
||||
static inline bool IsUnicodeType(mozilla::Span<const char> type, const char (&str)[Length]) {
|
||||
static inline bool IsUnicodeType(
|
||||
mozilla::Span<const char> type, const char (&str)[Length]) {
|
||||
static_assert(Length > UnicodeKeyLength + 1,
|
||||
"Unicode extension type contains more than two characters");
|
||||
return type.size() == (Length - 1) &&
|
||||
@ -944,8 +960,8 @@ static inline const char* SearchUnicodeReplacement(
|
||||
|
||||
auto p = std::lower_bound(std::begin(types), std::end(types), type,
|
||||
[](const auto& a, const auto& b) {
|
||||
return CompareUnicodeType(a, b) < 0;
|
||||
});
|
||||
return CompareUnicodeType(a, b) < 0;
|
||||
});
|
||||
if (p != std::end(types) && CompareUnicodeType(*p, type) == 0) {
|
||||
return aliases[std::distance(std::begin(types), p)];
|
||||
}
|
||||
@ -959,7 +975,7 @@ static inline const char* SearchUnicodeReplacement(
|
||||
* Spec: https://www.unicode.org/reports/tr35/#Unicode_Locale_Extension_Data_Files
|
||||
* Spec: https://www.unicode.org/reports/tr35/#t_Extension
|
||||
*/
|
||||
const char* mozilla::intl::Locale::replaceUnicodeExtensionType(
|
||||
const char* js::intl::LanguageTag::replaceUnicodeExtensionType(
|
||||
mozilla::Span<const char> key, mozilla::Span<const char> type) {
|
||||
MOZ_ASSERT(key.size() == UnicodeKeyLength);
|
||||
MOZ_ASSERT(IsCanonicallyCasedUnicodeKey(key));
|
||||
@ -1000,67 +1016,67 @@ const char* mozilla::intl::Locale::replaceUnicodeExtensionType(
|
||||
else if (IsUnicodeKey(key, "rg") ||
|
||||
IsUnicodeKey(key, "sd")) {
|
||||
static const char* types[144] = {
|
||||
"cn11" , "cn12" , "cn13" , "cn14" , "cn15" , "cn21" , "cn22" ,
|
||||
"cn23" , "cn31" , "cn32" , "cn33" , "cn34" , "cn35" , "cn36" ,
|
||||
"cn37" , "cn41" , "cn42" , "cn43" , "cn44" , "cn45" , "cn46" ,
|
||||
"cn50" , "cn51" , "cn52" , "cn53" , "cn54" , "cn61" , "cn62" ,
|
||||
"cn63" , "cn64" , "cn65" , "cn71" , "cn91" , "cn92" , "cz10a" ,
|
||||
"cz10b" , "cz10c" , "cz10d" , "cz10e" , "cz10f" , "cz611" , "cz612" ,
|
||||
"cz613" , "cz614" , "cz615" , "cz621" , "cz622" , "cz623" , "cz624" ,
|
||||
"cz626" , "cz627" , "czjc" , "czjm" , "czka" , "czkr" , "czli" ,
|
||||
"czmo" , "czol" , "czpa" , "czpl" , "czpr" , "czst" , "czus" ,
|
||||
"czvy" , "czzl" , "fi01" , "fra" , "frb" , "frbl" , "frc" ,
|
||||
"frcp" , "frd" , "fre" , "frf" , "frg" , "frgf" , "frgp" ,
|
||||
"frh" , "fri" , "frj" , "frk" , "frl" , "frm" , "frmf" ,
|
||||
"frmq" , "frn" , "frnc" , "fro" , "frp" , "frpf" , "frpm" ,
|
||||
"frq" , "frr" , "frre" , "frs" , "frt" , "frtf" , "fru" ,
|
||||
"frv" , "frwf" , "fryt" , "laxn" , "lud" , "lug" , "lul" ,
|
||||
"mrnkc" , "nlaw" , "nlcw" , "nlsx" , "no23" , "nzn" , "nzs" ,
|
||||
"omba" , "omsh" , "plds" , "plkp" , "pllb" , "plld" , "pllu" ,
|
||||
"plma" , "plmz" , "plop" , "plpd" , "plpk" , "plpm" , "plsk" ,
|
||||
"plsl" , "plwn" , "plwp" , "plzp" , "shta" , "tteto" , "ttrcm" ,
|
||||
"ttwto" , "twkhq" , "twtnq" , "twtpq" , "twtxq" , "usas" , "usgu" ,
|
||||
"usmp" , "uspr" , "usum" , "usvi" ,
|
||||
"cn11", "cn12", "cn13", "cn14", "cn15", "cn21", "cn22",
|
||||
"cn23", "cn31", "cn32", "cn33", "cn34", "cn35", "cn36",
|
||||
"cn37", "cn41", "cn42", "cn43", "cn44", "cn45", "cn46",
|
||||
"cn50", "cn51", "cn52", "cn53", "cn54", "cn61", "cn62",
|
||||
"cn63", "cn64", "cn65", "cn71", "cn91", "cn92", "cz10a",
|
||||
"cz10b", "cz10c", "cz10d", "cz10e", "cz10f", "cz611", "cz612",
|
||||
"cz613", "cz614", "cz615", "cz621", "cz622", "cz623", "cz624",
|
||||
"cz626", "cz627", "czjc", "czjm", "czka", "czkr", "czli",
|
||||
"czmo", "czol", "czpa", "czpl", "czpr", "czst", "czus",
|
||||
"czvy", "czzl", "fi01", "fra", "frb", "frbl", "frc",
|
||||
"frcp", "frd", "fre", "frf", "frg", "frgf", "frgp",
|
||||
"frh", "fri", "frj", "frk", "frl", "frm", "frmf",
|
||||
"frmq", "frn", "frnc", "fro", "frp", "frpf", "frpm",
|
||||
"frq", "frr", "frre", "frs", "frt", "frtf", "fru",
|
||||
"frv", "frwf", "fryt", "laxn", "lud", "lug", "lul",
|
||||
"mrnkc", "nlaw", "nlcw", "nlsx", "no23", "nzn", "nzs",
|
||||
"omba", "omsh", "plds", "plkp", "pllb", "plld", "pllu",
|
||||
"plma", "plmz", "plop", "plpd", "plpk", "plpm", "plsk",
|
||||
"plsl", "plwn", "plwp", "plzp", "shta", "tteto", "ttrcm",
|
||||
"ttwto", "twkhq", "twtnq", "twtpq", "twtxq", "usas", "usgu",
|
||||
"usmp", "uspr", "usum", "usvi",
|
||||
};
|
||||
static const char* aliases[144] = {
|
||||
"cnbj" , "cntj" , "cnhe" , "cnsx" , "cnmn" , "cnln" , "cnjl" ,
|
||||
"cnhl" , "cnsh" , "cnjs" , "cnzj" , "cnah" , "cnfj" , "cnjx" ,
|
||||
"cnsd" , "cnha" , "cnhb" , "cnhn" , "cngd" , "cngx" , "cnhi" ,
|
||||
"cncq" , "cnsc" , "cngz" , "cnyn" , "cnxz" , "cnsn" , "cngs" ,
|
||||
"cnqh" , "cnnx" , "cnxj" , "twzzzz", "hkzzzz", "mozzzz", "cz110" ,
|
||||
"cz111" , "cz112" , "cz113" , "cz114" , "cz115" , "cz663" , "cz632" ,
|
||||
"cz633" , "cz634" , "cz635" , "cz641" , "cz642" , "cz643" , "cz644" ,
|
||||
"cz646" , "cz647" , "cz31" , "cz64" , "cz41" , "cz52" , "cz51" ,
|
||||
"cz80" , "cz71" , "cz53" , "cz32" , "cz10" , "cz20" , "cz42" ,
|
||||
"cz63" , "cz72" , "axzzzz", "frges" , "frnaq" , "blzzzz", "frara" ,
|
||||
"cpzzzz", "frbfc" , "frbre" , "frcvl" , "frges" , "gfzzzz", "gpzzzz",
|
||||
"frcor" , "frbfc" , "fridf" , "frocc" , "frnaq" , "frges" , "mfzzzz",
|
||||
"mqzzzz", "frocc" , "nczzzz", "frhdf" , "frnor" , "pfzzzz", "pmzzzz",
|
||||
"frnor" , "frpdl" , "rezzzz", "frhdf" , "frnaq" , "tfzzzz", "frpac" ,
|
||||
"frara" , "wfzzzz", "ytzzzz", "laxs" , "lucl" , "luec" , "luca" ,
|
||||
"mr13" , "awzzzz", "cwzzzz", "sxzzzz", "no50" , "nzauk" , "nzcan" ,
|
||||
"ombj" , "omsj" , "pl02" , "pl04" , "pl08" , "pl10" , "pl06" ,
|
||||
"pl12" , "pl14" , "pl16" , "pl20" , "pl18" , "pl22" , "pl26" ,
|
||||
"pl24" , "pl28" , "pl30" , "pl32" , "tazzzz", "tttob" , "ttmrc" ,
|
||||
"tttob" , "twkhh" , "twtnn" , "twnwt" , "twtxg" , "aszzzz", "guzzzz",
|
||||
"mpzzzz", "przzzz", "umzzzz", "vizzzz",
|
||||
"cnbj", "cntj", "cnhe", "cnsx", "cnmn", "cnln", "cnjl",
|
||||
"cnhl", "cnsh", "cnjs", "cnzj", "cnah", "cnfj", "cnjx",
|
||||
"cnsd", "cnha", "cnhb", "cnhn", "cngd", "cngx", "cnhi",
|
||||
"cncq", "cnsc", "cngz", "cnyn", "cnxz", "cnsn", "cngs",
|
||||
"cnqh", "cnnx", "cnxj", "twzzzz", "hkzzzz", "mozzzz", "cz110",
|
||||
"cz111", "cz112", "cz113", "cz114", "cz115", "cz663", "cz632",
|
||||
"cz633", "cz634", "cz635", "cz641", "cz642", "cz643", "cz644",
|
||||
"cz646", "cz647", "cz31", "cz64", "cz41", "cz52", "cz51",
|
||||
"cz80", "cz71", "cz53", "cz32", "cz10", "cz20", "cz42",
|
||||
"cz63", "cz72", "axzzzz", "frges", "frnaq", "blzzzz", "frara",
|
||||
"cpzzzz", "frbfc", "frbre", "frcvl", "frges", "gfzzzz", "gpzzzz",
|
||||
"frcor", "frbfc", "fridf", "frocc", "frnaq", "frges", "mfzzzz",
|
||||
"mqzzzz", "frocc", "nczzzz", "frhdf", "frnor", "pfzzzz", "pmzzzz",
|
||||
"frnor", "frpdl", "rezzzz", "frhdf", "frnaq", "tfzzzz", "frpac",
|
||||
"frara", "wfzzzz", "ytzzzz", "laxs", "lucl", "luec", "luca",
|
||||
"mr13", "awzzzz", "cwzzzz", "sxzzzz", "no50", "nzauk", "nzcan",
|
||||
"ombj", "omsj", "pl02", "pl04", "pl08", "pl10", "pl06",
|
||||
"pl12", "pl14", "pl16", "pl20", "pl18", "pl22", "pl26",
|
||||
"pl24", "pl28", "pl30", "pl32", "tazzzz", "tttob", "ttmrc",
|
||||
"tttob", "twkhh", "twtnn", "twnwt", "twtxg", "aszzzz", "guzzzz",
|
||||
"mpzzzz", "przzzz", "umzzzz", "vizzzz",
|
||||
};
|
||||
return SearchUnicodeReplacement(types, aliases, type);
|
||||
}
|
||||
else if (IsUnicodeKey(key, "tz")) {
|
||||
static const char* types[28] = {
|
||||
"aqams" , "cnckg" , "cnhrb" , "cnkhg" , "cuba" , "egypt" ,
|
||||
"eire" , "est" , "gmt0" , "hongkong", "hst" , "iceland" ,
|
||||
"iran" , "israel" , "jamaica" , "japan" , "libya" , "mst" ,
|
||||
"navajo" , "poland" , "portugal", "prc" , "roc" , "rok" ,
|
||||
"turkey" , "uct" , "usnavajo", "zulu" ,
|
||||
"aqams", "cnckg", "cnhrb", "cnkhg", "cuba", "egypt",
|
||||
"eire", "est", "gmt0", "hongkong", "hst", "iceland",
|
||||
"iran", "israel", "jamaica", "japan", "libya", "mst",
|
||||
"navajo", "poland", "portugal", "prc", "roc", "rok",
|
||||
"turkey", "uct", "usnavajo", "zulu",
|
||||
};
|
||||
static const char* aliases[28] = {
|
||||
"nzakl" , "cnsha" , "cnsha" , "cnurc" , "cuhav" , "egcai" ,
|
||||
"iedub" , "utcw05" , "gmt" , "hkhkg" , "utcw10" , "isrey" ,
|
||||
"irthr" , "jeruslm" , "jmkin" , "jptyo" , "lytip" , "utcw07" ,
|
||||
"usden" , "plwaw" , "ptlis" , "cnsha" , "twtpe" , "krsel" ,
|
||||
"trist" , "utc" , "usden" , "utc" ,
|
||||
"nzakl", "cnsha", "cnsha", "cnurc", "cuhav", "egcai",
|
||||
"iedub", "utcw05", "gmt", "hkhkg", "utcw10", "isrey",
|
||||
"irthr", "jeruslm", "jmkin", "jptyo", "lytip", "utcw07",
|
||||
"usden", "plwaw", "ptlis", "cnsha", "twtpe", "krsel",
|
||||
"trist", "utc", "usden", "utc",
|
||||
};
|
||||
return SearchUnicodeReplacement(types, aliases, type);
|
||||
}
|
||||
@ -1068,14 +1084,16 @@ const char* mozilla::intl::Locale::replaceUnicodeExtensionType(
|
||||
}
|
||||
|
||||
template <size_t Length>
|
||||
static inline bool IsTransformKey(mozilla::Span<const char> key, const char (&str)[Length]) {
|
||||
static inline bool IsTransformKey(
|
||||
mozilla::Span<const char> key, const char (&str)[Length]) {
|
||||
static_assert(Length == TransformKeyLength + 1,
|
||||
"Transform extension key is two characters long");
|
||||
return memcmp(key.data(), str, Length - 1) == 0;
|
||||
}
|
||||
|
||||
template <size_t Length>
|
||||
static inline bool IsTransformType(mozilla::Span<const char> type, const char (&str)[Length]) {
|
||||
static inline bool IsTransformType(
|
||||
mozilla::Span<const char> type, const char (&str)[Length]) {
|
||||
static_assert(Length > TransformKeyLength + 1,
|
||||
"Transform extension type contains more than two characters");
|
||||
return type.size() == (Length - 1) &&
|
||||
@ -1089,7 +1107,7 @@ static inline bool IsTransformType(mozilla::Span<const char> type, const char (&
|
||||
* Spec: https://www.unicode.org/reports/tr35/#Unicode_Locale_Extension_Data_Files
|
||||
* Spec: https://www.unicode.org/reports/tr35/#t_Extension
|
||||
*/
|
||||
const char* mozilla::intl::Locale::replaceTransformExtensionType(
|
||||
const char* js::intl::LanguageTag::replaceTransformExtensionType(
|
||||
mozilla::Span<const char> key, mozilla::Span<const char> type) {
|
||||
MOZ_ASSERT(key.size() == TransformKeyLength);
|
||||
MOZ_ASSERT(IsCanonicallyCasedTransformKey(key));
|
@ -11,7 +11,6 @@
|
||||
#include "mozilla/ArrayUtils.h"
|
||||
#include "mozilla/Assertions.h"
|
||||
#include "mozilla/Casting.h"
|
||||
#include "mozilla/intl/Locale.h"
|
||||
#include "mozilla/Maybe.h"
|
||||
#include "mozilla/Span.h"
|
||||
#include "mozilla/TextUtils.h"
|
||||
@ -24,9 +23,7 @@
|
||||
|
||||
#include "builtin/Boolean.h"
|
||||
#include "builtin/intl/CommonFunctions.h"
|
||||
#include "builtin/intl/FormatBuffer.h"
|
||||
#include "builtin/intl/LanguageTag.h"
|
||||
#include "builtin/intl/StringAsciiChars.h"
|
||||
#include "builtin/String.h"
|
||||
#include "gc/Rooting.h"
|
||||
#include "js/Conversions.h"
|
||||
@ -45,7 +42,10 @@
|
||||
#include "vm/NativeObject-inl.h"
|
||||
|
||||
using namespace js;
|
||||
using namespace mozilla::intl::LanguageTagLimits;
|
||||
using namespace js::intl::LanguageTagLimits;
|
||||
|
||||
using intl::LanguageTag;
|
||||
using intl::LanguageTagParser;
|
||||
|
||||
const JSClass LocaleObject::class_ = {
|
||||
"Intl.Locale",
|
||||
@ -60,7 +60,7 @@ static inline bool IsLocale(HandleValue v) {
|
||||
}
|
||||
|
||||
// Return the length of the base-name subtags.
|
||||
static size_t BaseNameLength(const mozilla::intl::Locale& tag) {
|
||||
static size_t BaseNameLength(const LanguageTag& tag) {
|
||||
size_t baseNameLength = tag.language().length();
|
||||
if (tag.script().present()) {
|
||||
baseNameLength += 1 + tag.script().length();
|
||||
@ -88,7 +88,7 @@ struct IndexAndLength {
|
||||
|
||||
// Compute the Unicode extension's index and length in the extension subtag.
|
||||
static mozilla::Maybe<IndexAndLength> UnicodeExtensionPosition(
|
||||
const mozilla::intl::Locale& tag) {
|
||||
const LanguageTag& tag) {
|
||||
size_t index = 0;
|
||||
for (const auto& extension : tag.extensions()) {
|
||||
MOZ_ASSERT(!mozilla::IsAsciiUppercaseAlpha(extension[0]),
|
||||
@ -106,14 +106,8 @@ static mozilla::Maybe<IndexAndLength> UnicodeExtensionPosition(
|
||||
}
|
||||
|
||||
static LocaleObject* CreateLocaleObject(JSContext* cx, HandleObject prototype,
|
||||
const mozilla::intl::Locale& tag) {
|
||||
intl::FormatBuffer<char, intl::INITIAL_CHAR_BUFFER_SIZE> buffer(cx);
|
||||
if (auto result = tag.toString(buffer); result.isErr()) {
|
||||
intl::ReportInternalError(cx, result.unwrapErr());
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
RootedString tagStr(cx, buffer.toString(cx));
|
||||
const LanguageTag& tag) {
|
||||
RootedString tagStr(cx, tag.toString(cx));
|
||||
if (!tagStr) {
|
||||
return nullptr;
|
||||
}
|
||||
@ -148,27 +142,9 @@ static LocaleObject* CreateLocaleObject(JSContext* cx, HandleObject prototype,
|
||||
return locale;
|
||||
}
|
||||
|
||||
static inline bool IsValidUnicodeExtensionValue(JSContext* cx,
|
||||
JSLinearString* linear,
|
||||
bool* isValid) {
|
||||
if (linear->length() == 0) {
|
||||
*isValid = false;
|
||||
return true;
|
||||
}
|
||||
|
||||
if (!StringIsAscii(linear)) {
|
||||
*isValid = false;
|
||||
return true;
|
||||
}
|
||||
|
||||
intl::StringAsciiChars chars(linear);
|
||||
if (!chars.init(cx)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
*isValid =
|
||||
mozilla::intl::LocaleParser::canParseUnicodeExtensionType(chars).isOk();
|
||||
return true;
|
||||
static inline bool IsValidUnicodeExtensionValue(JSLinearString* linear) {
|
||||
return linear->length() > 0 &&
|
||||
LanguageTagParser::canParseUnicodeExtensionType(linear);
|
||||
}
|
||||
|
||||
/** Iterate through (sep keyword) in a valid, lowercased Unicode extension. */
|
||||
@ -298,7 +274,7 @@ static bool GetBooleanOption(JSContext* cx, HandleObject options,
|
||||
/**
|
||||
* ApplyOptionsToTag ( tag, options )
|
||||
*/
|
||||
static bool ApplyOptionsToTag(JSContext* cx, mozilla::intl::Locale& tag,
|
||||
static bool ApplyOptionsToTag(JSContext* cx, LanguageTag& tag,
|
||||
HandleObject options) {
|
||||
// Steps 1-2 (Already performed in caller).
|
||||
|
||||
@ -310,7 +286,7 @@ static bool ApplyOptionsToTag(JSContext* cx, mozilla::intl::Locale& tag,
|
||||
}
|
||||
|
||||
// Step 4.
|
||||
mozilla::intl::LanguageSubtag language;
|
||||
intl::LanguageSubtag language;
|
||||
if (option && !intl::ParseStandaloneLanguageTag(option, language)) {
|
||||
if (UniqueChars str = QuoteString(cx, option, '"')) {
|
||||
JS_ReportErrorNumberASCII(cx, js::GetErrorMessage, nullptr,
|
||||
@ -326,7 +302,7 @@ static bool ApplyOptionsToTag(JSContext* cx, mozilla::intl::Locale& tag,
|
||||
}
|
||||
|
||||
// Step 6.
|
||||
mozilla::intl::ScriptSubtag script;
|
||||
intl::ScriptSubtag script;
|
||||
if (option && !intl::ParseStandaloneScriptTag(option, script)) {
|
||||
if (UniqueChars str = QuoteString(cx, option, '"')) {
|
||||
JS_ReportErrorNumberASCII(cx, js::GetErrorMessage, nullptr,
|
||||
@ -342,7 +318,7 @@ static bool ApplyOptionsToTag(JSContext* cx, mozilla::intl::Locale& tag,
|
||||
}
|
||||
|
||||
// Step 8.
|
||||
mozilla::intl::RegionSubtag region;
|
||||
intl::RegionSubtag region;
|
||||
if (option && !intl::ParseStandaloneRegionTag(option, region)) {
|
||||
if (UniqueChars str = QuoteString(cx, option, '"')) {
|
||||
JS_ReportErrorNumberASCII(cx, js::GetErrorMessage, nullptr,
|
||||
@ -374,16 +350,8 @@ static bool ApplyOptionsToTag(JSContext* cx, mozilla::intl::Locale& tag,
|
||||
// Step 13.
|
||||
// Optimized to only canonicalize the base-name subtags. All other
|
||||
// canonicalization steps will happen later.
|
||||
auto result = tag.canonicalizeBaseName();
|
||||
if (result.isErr()) {
|
||||
if (result.unwrapErr() ==
|
||||
mozilla::intl::Locale::CanonicalizationError::DuplicateVariant) {
|
||||
JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr,
|
||||
JSMSG_DUPLICATE_VARIANT_SUBTAG);
|
||||
} else {
|
||||
intl::ReportInternalError(cx);
|
||||
}
|
||||
return false;
|
||||
if (!tag.canonicalizeBaseName(cx)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
@ -394,7 +362,7 @@ static bool ApplyOptionsToTag(JSContext* cx, mozilla::intl::Locale& tag,
|
||||
* ApplyUnicodeExtensionToTag( tag, options, relevantExtensionKeys )
|
||||
*/
|
||||
bool js::intl::ApplyUnicodeExtensionToTag(
|
||||
JSContext* cx, mozilla::intl::Locale& tag,
|
||||
JSContext* cx, LanguageTag& tag,
|
||||
JS::HandleVector<intl::UnicodeExtensionKeyword> keywords) {
|
||||
// If no Unicode extensions were present in the options object, we can skip
|
||||
// everything below and directly return.
|
||||
@ -469,12 +437,12 @@ bool js::intl::ApplyUnicodeExtensionToTag(
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!tag.setUnicodeExtension(newExtension.begin())) {
|
||||
intl::ReportInternalError(cx);
|
||||
// Insert the new Unicode extension string into the language tag.
|
||||
UniqueChars newExtensionChars(newExtension.extractOrCopyRawBuffer());
|
||||
if (!newExtensionChars) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
return tag.setUnicodeExtension(std::move(newExtensionChars));
|
||||
}
|
||||
|
||||
static JS::Result<JSString*> LanguageTagFromMaybeWrappedLocale(JSContext* cx,
|
||||
@ -553,19 +521,12 @@ static bool Locale(JSContext* cx, unsigned argc, Value* vp) {
|
||||
}
|
||||
|
||||
// ApplyOptionsToTag, steps 2 and 9.
|
||||
mozilla::intl::Locale tag;
|
||||
if (!intl::ParseLocale(cx, tagLinearStr, tag)) {
|
||||
LanguageTag tag(cx);
|
||||
if (!LanguageTagParser::parse(cx, tagLinearStr, tag)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (auto result = tag.canonicalizeBaseName(); result.isErr()) {
|
||||
if (result.unwrapErr() ==
|
||||
mozilla::intl::Locale::CanonicalizationError::DuplicateVariant) {
|
||||
JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr,
|
||||
JSMSG_DUPLICATE_VARIANT_SUBTAG);
|
||||
} else {
|
||||
intl::ReportInternalError(cx);
|
||||
}
|
||||
if (!tag.canonicalizeBaseName(cx)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -586,12 +547,7 @@ static bool Locale(JSContext* cx, unsigned argc, Value* vp) {
|
||||
|
||||
// Steps 15-16.
|
||||
if (calendar) {
|
||||
bool isValid;
|
||||
if (!IsValidUnicodeExtensionValue(cx, calendar, &isValid)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!isValid) {
|
||||
if (!IsValidUnicodeExtensionValue(calendar)) {
|
||||
if (UniqueChars str = QuoteString(cx, calendar, '"')) {
|
||||
JS_ReportErrorNumberASCII(cx, js::GetErrorMessage, nullptr,
|
||||
JSMSG_INVALID_OPTION_VALUE, "calendar",
|
||||
@ -613,12 +569,7 @@ static bool Locale(JSContext* cx, unsigned argc, Value* vp) {
|
||||
|
||||
// Steps 18-19.
|
||||
if (collation) {
|
||||
bool isValid;
|
||||
if (!IsValidUnicodeExtensionValue(cx, collation, &isValid)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!isValid) {
|
||||
if (!IsValidUnicodeExtensionValue(collation)) {
|
||||
if (UniqueChars str = QuoteString(cx, collation, '"')) {
|
||||
JS_ReportErrorNumberASCII(cx, js::GetErrorMessage, nullptr,
|
||||
JSMSG_INVALID_OPTION_VALUE, "collation",
|
||||
@ -703,11 +654,7 @@ static bool Locale(JSContext* cx, unsigned argc, Value* vp) {
|
||||
|
||||
// Steps 28-29.
|
||||
if (numberingSystem) {
|
||||
bool isValid;
|
||||
if (!IsValidUnicodeExtensionValue(cx, numberingSystem, &isValid)) {
|
||||
return false;
|
||||
}
|
||||
if (!isValid) {
|
||||
if (!IsValidUnicodeExtensionValue(numberingSystem)) {
|
||||
if (UniqueChars str = QuoteString(cx, numberingSystem, '"')) {
|
||||
JS_ReportErrorNumberASCII(cx, js::GetErrorMessage, nullptr,
|
||||
JSMSG_INVALID_OPTION_VALUE,
|
||||
@ -729,14 +676,7 @@ static bool Locale(JSContext* cx, unsigned argc, Value* vp) {
|
||||
|
||||
// ApplyOptionsToTag, steps 9 and 13.
|
||||
// ApplyUnicodeExtensionToTag, step 9.
|
||||
if (auto result = tag.canonicalizeExtensions(); result.isErr()) {
|
||||
if (result.unwrapErr() ==
|
||||
mozilla::intl::Locale::CanonicalizationError::DuplicateVariant) {
|
||||
JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr,
|
||||
JSMSG_DUPLICATE_VARIANT_SUBTAG);
|
||||
} else {
|
||||
intl::ReportInternalError(cx);
|
||||
}
|
||||
if (!tag.canonicalizeExtensions(cx)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -900,25 +840,19 @@ static BaseNamePartsResult BaseNameParts(const CharT* baseName, size_t length) {
|
||||
languageLength = length;
|
||||
}
|
||||
|
||||
// Tell the analysis the |IsStructurallyValid*Tag| functions can't GC.
|
||||
JS::AutoSuppressGCAnalysis nogc;
|
||||
|
||||
IndexAndLength language{0, languageLength};
|
||||
MOZ_ASSERT(
|
||||
mozilla::intl::IsStructurallyValidLanguageTag(language.spanOf(baseName)));
|
||||
MOZ_ASSERT(intl::IsStructurallyValidLanguageTag(language.spanOf(baseName)));
|
||||
|
||||
mozilla::Maybe<IndexAndLength> script{};
|
||||
if (scriptIndex) {
|
||||
script.emplace(scriptIndex, ScriptLength);
|
||||
MOZ_ASSERT(
|
||||
mozilla::intl::IsStructurallyValidScriptTag(script->spanOf(baseName)));
|
||||
MOZ_ASSERT(intl::IsStructurallyValidScriptTag(script->spanOf(baseName)));
|
||||
}
|
||||
|
||||
mozilla::Maybe<IndexAndLength> region{};
|
||||
if (regionIndex) {
|
||||
region.emplace(regionIndex, regionLength);
|
||||
MOZ_ASSERT(
|
||||
mozilla::intl::IsStructurallyValidRegionTag(region->spanOf(baseName)));
|
||||
MOZ_ASSERT(intl::IsStructurallyValidRegionTag(region->spanOf(baseName)));
|
||||
}
|
||||
|
||||
return {language, script, region};
|
||||
@ -942,13 +876,12 @@ static bool Locale_maximize(JSContext* cx, const CallArgs& args) {
|
||||
return false;
|
||||
}
|
||||
|
||||
mozilla::intl::Locale tag;
|
||||
if (!intl::ParseLocale(cx, tagStr, tag)) {
|
||||
LanguageTag tag(cx);
|
||||
if (!LanguageTagParser::parse(cx, tagStr, tag)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!tag.addLikelySubtags()) {
|
||||
intl::ReportInternalError(cx);
|
||||
if (!tag.addLikelySubtags(cx)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -979,13 +912,12 @@ static bool Locale_minimize(JSContext* cx, const CallArgs& args) {
|
||||
return false;
|
||||
}
|
||||
|
||||
mozilla::intl::Locale tag;
|
||||
if (!intl::ParseLocale(cx, tagStr, tag)) {
|
||||
LanguageTag tag(cx);
|
||||
if (!LanguageTagParser::parse(cx, tagStr, tag)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!tag.removeLikelySubtags()) {
|
||||
intl::ReportInternalError(cx);
|
||||
if (!tag.removeLikelySubtags(cx)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -1348,34 +1280,19 @@ bool js::intl_ValidateAndCanonicalizeLanguageTag(JSContext* cx, unsigned argc,
|
||||
return true;
|
||||
}
|
||||
|
||||
mozilla::intl::Locale tag;
|
||||
if (!intl::ParseLocale(cx, tagLinearStr, tag)) {
|
||||
LanguageTag tag(cx);
|
||||
if (!LanguageTagParser::parse(cx, tagLinearStr, tag)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
auto result = tag.canonicalize();
|
||||
if (result.isErr()) {
|
||||
if (result.unwrapErr() ==
|
||||
mozilla::intl::Locale::CanonicalizationError::DuplicateVariant) {
|
||||
JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr,
|
||||
JSMSG_DUPLICATE_VARIANT_SUBTAG);
|
||||
} else {
|
||||
intl::ReportInternalError(cx);
|
||||
}
|
||||
if (!tag.canonicalize(cx)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
intl::FormatBuffer<char, intl::INITIAL_CHAR_BUFFER_SIZE> buffer(cx);
|
||||
if (auto result = tag.toString(buffer); result.isErr()) {
|
||||
intl::ReportInternalError(cx, result.unwrapErr());
|
||||
return false;
|
||||
}
|
||||
|
||||
JSString* resultStr = buffer.toString(cx);
|
||||
JSString* resultStr = tag.toString(cx);
|
||||
if (!resultStr) {
|
||||
return false;
|
||||
}
|
||||
|
||||
args.rval().setString(resultStr);
|
||||
return true;
|
||||
}
|
||||
@ -1390,45 +1307,22 @@ bool js::intl_TryValidateAndCanonicalizeLanguageTag(JSContext* cx,
|
||||
return false;
|
||||
}
|
||||
|
||||
mozilla::intl::Locale tag;
|
||||
{
|
||||
if (!StringIsAscii(linear)) {
|
||||
// The caller handles invalid inputs.
|
||||
args.rval().setNull();
|
||||
return true;
|
||||
}
|
||||
LanguageTag tag(cx);
|
||||
bool ok;
|
||||
JS_TRY_VAR_OR_RETURN_FALSE(cx, ok,
|
||||
LanguageTagParser::tryParse(cx, linear, tag));
|
||||
|
||||
intl::StringAsciiChars chars(linear);
|
||||
if (!chars.init(cx)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (mozilla::intl::LocaleParser::tryParse(chars, tag).isErr()) {
|
||||
// The caller handles invalid inputs.
|
||||
args.rval().setNull();
|
||||
return true;
|
||||
}
|
||||
// The caller handles invalid inputs.
|
||||
if (!ok) {
|
||||
args.rval().setNull();
|
||||
return true;
|
||||
}
|
||||
|
||||
auto result = tag.canonicalize();
|
||||
if (result.isErr()) {
|
||||
if (result.unwrapErr() ==
|
||||
mozilla::intl::Locale::CanonicalizationError::DuplicateVariant) {
|
||||
JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr,
|
||||
JSMSG_DUPLICATE_VARIANT_SUBTAG);
|
||||
} else {
|
||||
intl::ReportInternalError(cx);
|
||||
}
|
||||
if (!tag.canonicalize(cx)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
intl::FormatBuffer<char, intl::INITIAL_CHAR_BUFFER_SIZE> buffer(cx);
|
||||
if (auto result = tag.toString(buffer); result.isErr()) {
|
||||
intl::ReportInternalError(cx, result.unwrapErr());
|
||||
return false;
|
||||
}
|
||||
|
||||
JSString* resultStr = buffer.toString(cx);
|
||||
JSString* resultStr = tag.toString(cx);
|
||||
if (!resultStr) {
|
||||
return false;
|
||||
}
|
||||
@ -1456,11 +1350,7 @@ bool js::intl_ValidateAndCanonicalizeUnicodeExtensionType(JSContext* cx,
|
||||
return false;
|
||||
}
|
||||
|
||||
bool isValid;
|
||||
if (!IsValidUnicodeExtensionValue(cx, unicodeType, &isValid)) {
|
||||
return false;
|
||||
}
|
||||
if (!isValid) {
|
||||
if (!IsValidUnicodeExtensionValue(unicodeType)) {
|
||||
UniqueChars optionChars = EncodeAscii(cx, optionArg.toString());
|
||||
if (!optionChars) {
|
||||
return false;
|
||||
@ -1501,8 +1391,8 @@ bool js::intl_ValidateAndCanonicalizeUnicodeExtensionType(JSContext* cx,
|
||||
MOZ_ASSERT(strlen(unicodeTypeChars.get()) == unicodeTypeLength);
|
||||
|
||||
// Convert into canonical case before searching for replacements.
|
||||
mozilla::intl::AsciiToLowerCase(unicodeTypeChars.get(), unicodeTypeLength,
|
||||
unicodeTypeChars.get());
|
||||
intl::AsciiToLowerCase(unicodeTypeChars.get(), unicodeTypeLength,
|
||||
unicodeTypeChars.get());
|
||||
|
||||
auto key = mozilla::Span(unicodeKey, UnicodeKeyLength);
|
||||
auto type = mozilla::Span(unicodeTypeChars.get(), unicodeTypeLength);
|
||||
@ -1510,7 +1400,7 @@ bool js::intl_ValidateAndCanonicalizeUnicodeExtensionType(JSContext* cx,
|
||||
// Search if there's a replacement for the current Unicode keyword.
|
||||
JSString* result;
|
||||
if (const char* replacement =
|
||||
mozilla::intl::Locale::replaceUnicodeExtensionType(key, type)) {
|
||||
LanguageTag::replaceUnicodeExtensionType(key, type)) {
|
||||
result = NewStringCopyZ<CanGC>(cx, replacement);
|
||||
} else {
|
||||
result = StringToLowerCase(cx, unicodeType);
|
||||
|
@ -11,7 +11,6 @@
|
||||
#include "mozilla/Assertions.h"
|
||||
#include "mozilla/Casting.h"
|
||||
#include "mozilla/FloatingPoint.h"
|
||||
#include "mozilla/intl/Locale.h"
|
||||
#include "mozilla/intl/MeasureUnit.h"
|
||||
#include "mozilla/intl/NumberFormat.h"
|
||||
#include "mozilla/intl/NumberingSystem.h"
|
||||
@ -32,7 +31,6 @@
|
||||
#include "builtin/Array.h"
|
||||
#include "builtin/intl/CommonFunctions.h"
|
||||
#include "builtin/intl/DecimalNumber.h"
|
||||
#include "builtin/intl/FormatBuffer.h"
|
||||
#include "builtin/intl/LanguageTag.h"
|
||||
#include "builtin/intl/MeasureUnitGenerated.h"
|
||||
#include "builtin/intl/RelativeTimeFormat.h"
|
||||
@ -290,14 +288,14 @@ static UniqueChars NumberFormatLocale(JSContext* cx, HandleObject internals) {
|
||||
|
||||
// ICU expects numberingSystem as a Unicode locale extensions on locale.
|
||||
|
||||
mozilla::intl::Locale tag;
|
||||
intl::LanguageTag tag(cx);
|
||||
{
|
||||
RootedLinearString locale(cx, value.toString()->ensureLinear(cx));
|
||||
JSLinearString* locale = value.toString()->ensureLinear(cx);
|
||||
if (!locale) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
if (!intl::ParseLocale(cx, locale, tag)) {
|
||||
if (!intl::LanguageTagParser::parse(cx, locale, tag)) {
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
@ -328,12 +326,7 @@ static UniqueChars NumberFormatLocale(JSContext* cx, HandleObject internals) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
intl::FormatBuffer<char> buffer(cx);
|
||||
if (auto result = tag.toString(buffer); result.isErr()) {
|
||||
intl::ReportInternalError(cx, result.unwrapErr());
|
||||
return nullptr;
|
||||
}
|
||||
return buffer.extractStringZ();
|
||||
return tag.toStringZ(cx);
|
||||
}
|
||||
|
||||
struct NumberFormatOptions : public mozilla::intl::NumberRangeFormatOptions {
|
||||
|
@ -168,14 +168,14 @@ static mozilla::intl::RelativeTimeFormat* NewRelativeTimeFormatter(
|
||||
|
||||
// ICU expects numberingSystem as a Unicode locale extensions on locale.
|
||||
|
||||
mozilla::intl::Locale tag;
|
||||
intl::LanguageTag tag(cx);
|
||||
{
|
||||
RootedLinearString locale(cx, value.toString()->ensureLinear(cx));
|
||||
JSLinearString* locale = value.toString()->ensureLinear(cx);
|
||||
if (!locale) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
if (!intl::ParseLocale(cx, locale, tag)) {
|
||||
if (!intl::LanguageTagParser::parse(cx, locale, tag)) {
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
@ -206,13 +206,7 @@ static mozilla::intl::RelativeTimeFormat* NewRelativeTimeFormatter(
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
intl::FormatBuffer<char> buffer(cx);
|
||||
if (auto result = tag.toString(buffer); result.isErr()) {
|
||||
intl::ReportInternalError(cx, result.unwrapErr());
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
UniqueChars locale = buffer.extractStringZ();
|
||||
UniqueChars locale = tag.toStringZ(cx);
|
||||
if (!locale) {
|
||||
return nullptr;
|
||||
}
|
||||
|
@ -377,7 +377,7 @@ bool js::intl::SharedIntlData::getAvailableLocales(
|
||||
// + 4 * Alphanum script subtag
|
||||
// + 1 separator
|
||||
// + 2 * Alpha region subtag
|
||||
using namespace mozilla::intl::LanguageTagLimits;
|
||||
using namespace intl::LanguageTagLimits;
|
||||
static constexpr size_t MinLanguageLength = 2;
|
||||
static constexpr size_t MinLengthForScriptAndRegion =
|
||||
MinLanguageLength + 1 + ScriptLength + 1 + AlphaRegionLength;
|
||||
@ -407,8 +407,7 @@ bool js::intl::SharedIntlData::getAvailableLocales(
|
||||
|
||||
// Continue with the next locale if we didn't find a script subtag.
|
||||
size_t scriptLength = sep - script;
|
||||
if (!mozilla::intl::IsStructurallyValidScriptTag<char>(
|
||||
{script, scriptLength})) {
|
||||
if (!IsStructurallyValidScriptTag<char>({script, scriptLength})) {
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -420,8 +419,7 @@ bool js::intl::SharedIntlData::getAvailableLocales(
|
||||
|
||||
// Continue with the next locale if we didn't find a region subtag.
|
||||
size_t regionLength = (sep ? sep : lang.end()) - region;
|
||||
if (!mozilla::intl::IsStructurallyValidRegionTag<char>(
|
||||
{region, regionLength})) {
|
||||
if (!IsStructurallyValidRegionTag<char>({region, regionLength})) {
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -1,78 +0,0 @@
|
||||
/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
|
||||
* vim: set ts=8 sts=2 et sw=2 tw=80:
|
||||
* This Source Code Form is subject to the terms of the Mozilla Public
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
|
||||
#ifndef builtin_intl_StringAsciiChars_h
|
||||
#define builtin_intl_StringAsciiChars_h
|
||||
|
||||
#include "mozilla/Assertions.h"
|
||||
#include "mozilla/Attributes.h"
|
||||
#include "mozilla/Maybe.h"
|
||||
#include "mozilla/Span.h"
|
||||
#include "mozilla/TextUtils.h"
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#include "js/GCAPI.h"
|
||||
#include "js/TypeDecls.h"
|
||||
#include "js/Vector.h"
|
||||
|
||||
#include "vm/StringType.h"
|
||||
|
||||
namespace js::intl {
|
||||
|
||||
/**
|
||||
* String view of an ASCII-only string.
|
||||
*
|
||||
* This holds a reference to a JSLinearString and can produce a string view
|
||||
* into that string. If the string is represented by Latin1 characters, the
|
||||
* span is returned directly. If the string is represented by UTF-16
|
||||
* characters, it copies the char16_t characters into a char array, and then
|
||||
* returns a span based on the copy.
|
||||
*
|
||||
* This allows us to avoid copying for the common use case that the ASCII
|
||||
* characters are represented in Latin1.
|
||||
*/
|
||||
class MOZ_STACK_CLASS StringAsciiChars final {
|
||||
// When copying string characters, use this many bytes of inline storage.
|
||||
static const size_t InlineCapacity = 24;
|
||||
|
||||
JS::AutoCheckCannotGC nogc_;
|
||||
|
||||
JSLinearString* str_;
|
||||
|
||||
mozilla::Maybe<Vector<Latin1Char, InlineCapacity>> ownChars_;
|
||||
|
||||
public:
|
||||
explicit StringAsciiChars(JSLinearString* str) : str_(str) {
|
||||
MOZ_ASSERT(StringIsAscii(str));
|
||||
}
|
||||
|
||||
operator mozilla::Span<const char>() const {
|
||||
if (str_->hasLatin1Chars()) {
|
||||
return mozilla::AsChars(str_->latin1Range(nogc_));
|
||||
}
|
||||
return mozilla::AsChars(mozilla::Span<const Latin1Char>(*ownChars_));
|
||||
}
|
||||
|
||||
[[nodiscard]] bool init(JSContext* cx) {
|
||||
if (str_->hasLatin1Chars()) {
|
||||
return true;
|
||||
}
|
||||
|
||||
ownChars_.emplace(cx);
|
||||
if (!ownChars_->resize(str_->length())) {
|
||||
return false;
|
||||
}
|
||||
|
||||
js::CopyChars(ownChars_->begin(), *str_);
|
||||
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace js::intl
|
||||
|
||||
#endif // builtin_intl_StringAsciiChars_h
|
@ -17,8 +17,7 @@
|
||||
This script extracts information about 1) mappings between deprecated and
|
||||
current Unicode BCP 47 locale identifiers, and 2) deprecated and current
|
||||
BCP 47 Unicode extension value from CLDR, and converts it to C++ mapping
|
||||
code in intl/components/LocaleGenerated.cpp. The code is used in
|
||||
intl/components/Locale.cpp.
|
||||
code in LanguageTagGenerated.cpp. The code is used in LanguageTag.cpp.
|
||||
|
||||
|
||||
Target "tzdata":
|
||||
@ -126,7 +125,7 @@ def writeMappingsBinarySearch(
|
||||
writeMappingHeader(println, description, source, url)
|
||||
println(
|
||||
"""
|
||||
bool mozilla::intl::Locale::{0}({1} {2}) {{
|
||||
bool js::intl::LanguageTag::{0}({1} {2}) {{
|
||||
MOZ_ASSERT({3}({2}.span()));
|
||||
MOZ_ASSERT({4}({2}.span()));
|
||||
""".format(
|
||||
@ -310,7 +309,7 @@ def writeComplexLanguageTagMappings(
|
||||
writeMappingHeader(println, description, source, url)
|
||||
println(
|
||||
"""
|
||||
void mozilla::intl::Locale::performComplexLanguageMappings() {
|
||||
void js::intl::LanguageTag::performComplexLanguageMappings() {
|
||||
MOZ_ASSERT(IsStructurallyValidLanguageTag(language().span()));
|
||||
MOZ_ASSERT(IsCanonicallyCasedLanguageTag(language().span()));
|
||||
""".lstrip()
|
||||
@ -407,7 +406,7 @@ def writeComplexRegionTagMappings(
|
||||
writeMappingHeader(println, description, source, url)
|
||||
println(
|
||||
"""
|
||||
void mozilla::intl::Locale::performComplexRegionMappings() {
|
||||
void js::intl::LanguageTag::performComplexRegionMappings() {
|
||||
MOZ_ASSERT(IsStructurallyValidLanguageTag(language().span()));
|
||||
MOZ_ASSERT(IsCanonicallyCasedLanguageTag(language().span()));
|
||||
MOZ_ASSERT(IsStructurallyValidRegionTag(region().span()));
|
||||
@ -525,7 +524,7 @@ static const char* ToCharPointer(const char* str) {
|
||||
return str;
|
||||
}
|
||||
|
||||
static const char* ToCharPointer(const mozilla::intl::UniqueChars& str) {
|
||||
static const char* ToCharPointer(const js::UniqueChars& str) {
|
||||
return str.get();
|
||||
}
|
||||
|
||||
@ -538,7 +537,7 @@ static bool IsLessThan(const T& a, const U& b) {
|
||||
writeMappingHeader(println, description, source, url)
|
||||
println(
|
||||
"""
|
||||
bool mozilla::intl::Locale::performVariantMappings() {
|
||||
bool js::intl::LanguageTag::performVariantMappings(JSContext* cx) {
|
||||
// The variant subtags need to be sorted for binary search.
|
||||
MOZ_ASSERT(std::is_sorted(variants_.begin(), variants_.end(),
|
||||
IsLessThan<decltype(variants_)::ElementType>));
|
||||
@ -548,9 +547,9 @@ bool mozilla::intl::Locale::performVariantMappings() {
|
||||
};
|
||||
|
||||
auto insertVariantSortedIfNotPresent = [&](const char* variant) {
|
||||
auto* p = std::lower_bound(
|
||||
variants_.begin(), variants_.end(), variant,
|
||||
IsLessThan<decltype(variants_)::ElementType, decltype(variant)>);
|
||||
auto* p = std::lower_bound(variants_.begin(), variants_.end(), variant,
|
||||
IsLessThan<decltype(variants_)::ElementType,
|
||||
decltype(variant)>);
|
||||
|
||||
// Don't insert the replacement when already present.
|
||||
if (p != variants_.end() && strcmp(p->get(), variant) == 0) {
|
||||
@ -558,11 +557,14 @@ bool mozilla::intl::Locale::performVariantMappings() {
|
||||
}
|
||||
|
||||
// Insert the preferred variant in sort order.
|
||||
auto preferred = DuplicateStringToUniqueChars(variant);
|
||||
auto preferred = DuplicateString(cx, variant);
|
||||
if (!preferred) {
|
||||
return false;
|
||||
}
|
||||
return !!variants_.insert(p, std::move(preferred));
|
||||
};
|
||||
|
||||
for (size_t i = 0; i < variants_.length();) {
|
||||
for (size_t i = 0; i < variants_.length(); ) {
|
||||
const char* variant = variants_[i].get();
|
||||
MOZ_ASSERT(IsCanonicallyCasedVariantTag(mozilla::MakeStringSpan(variant)));
|
||||
""".lstrip()
|
||||
@ -655,7 +657,7 @@ def writeLegacyMappingsFunction(println, legacy_mappings, description, source, u
|
||||
writeMappingHeader(println, description, source, url)
|
||||
println(
|
||||
"""\
|
||||
bool mozilla::intl::Locale::updateLegacyMappings() {
|
||||
bool js::intl::LanguageTag::updateLegacyMappings(JSContext* cx) {
|
||||
// We're mapping legacy tags to non-legacy form here.
|
||||
// Other tags remain unchanged.
|
||||
//
|
||||
@ -670,10 +672,8 @@ bool mozilla::intl::Locale::updateLegacyMappings() {
|
||||
}
|
||||
|
||||
for ([[maybe_unused]] const auto& variant : variants()) {
|
||||
MOZ_ASSERT(
|
||||
IsStructurallyValidVariantTag(mozilla::MakeStringSpan(variant.get())));
|
||||
MOZ_ASSERT(
|
||||
IsCanonicallyCasedVariantTag(mozilla::MakeStringSpan(variant.get())));
|
||||
MOZ_ASSERT(IsStructurallyValidVariantTag(mozilla::MakeStringSpan(variant.get())));
|
||||
MOZ_ASSERT(IsCanonicallyCasedVariantTag(mozilla::MakeStringSpan(variant.get())));
|
||||
}
|
||||
|
||||
// The variant subtags need to be sorted for binary search.
|
||||
@ -702,7 +702,10 @@ bool mozilla::intl::Locale::updateLegacyMappings() {
|
||||
}
|
||||
|
||||
// Insert the preferred variant in sort order.
|
||||
auto preferred = DuplicateStringToUniqueChars(variant);
|
||||
auto preferred = DuplicateString(cx, variant);
|
||||
if (!preferred) {
|
||||
return false;
|
||||
}
|
||||
return !!variants_.insert(p, std::move(preferred));
|
||||
};
|
||||
|
||||
@ -921,7 +924,7 @@ def writeSignLanguageMappingsFunction(
|
||||
writeMappingHeader(println, description, source, url)
|
||||
println(
|
||||
"""\
|
||||
bool mozilla::intl::Locale::signLanguageMapping(LanguageSubtag& language,
|
||||
bool js::intl::LanguageTag::signLanguageMapping(LanguageSubtag& language,
|
||||
const RegionSubtag& region) {
|
||||
MOZ_ASSERT(language.equalTo("sgn"));
|
||||
MOZ_ASSERT(IsStructurallyValidRegionTag(region.span()));
|
||||
@ -1643,36 +1646,39 @@ def writeCLDRLanguageTagData(println, data, url):
|
||||
#include <string>
|
||||
#include <type_traits>
|
||||
|
||||
#include "mozilla/intl/Locale.h"
|
||||
#include "builtin/intl/LanguageTag.h"
|
||||
#include "util/Text.h"
|
||||
#include "vm/JSContext.h"
|
||||
|
||||
using namespace mozilla::intl::LanguageTagLimits;
|
||||
using namespace js::intl::LanguageTagLimits;
|
||||
|
||||
template <size_t Length, size_t TagLength, size_t SubtagLength>
|
||||
static inline bool HasReplacement(
|
||||
const char (&subtags)[Length][TagLength],
|
||||
const mozilla::intl::LanguageTagSubtag<SubtagLength>& subtag) {
|
||||
const js::intl::LanguageTagSubtag<SubtagLength>& subtag) {
|
||||
MOZ_ASSERT(subtag.length() == TagLength - 1,
|
||||
"subtag must have the same length as the list of subtags");
|
||||
|
||||
const char* ptr = subtag.span().data();
|
||||
return std::binary_search(std::begin(subtags), std::end(subtags), ptr,
|
||||
[](const char* a, const char* b) {
|
||||
return memcmp(a, b, TagLength - 1) < 0;
|
||||
});
|
||||
return memcmp(a, b, TagLength - 1) < 0;
|
||||
});
|
||||
}
|
||||
|
||||
template <size_t Length, size_t TagLength, size_t SubtagLength>
|
||||
static inline const char* SearchReplacement(
|
||||
const char (&subtags)[Length][TagLength], const char* (&aliases)[Length],
|
||||
const mozilla::intl::LanguageTagSubtag<SubtagLength>& subtag) {
|
||||
const char (&subtags)[Length][TagLength],
|
||||
const char* (&aliases)[Length],
|
||||
const js::intl::LanguageTagSubtag<SubtagLength>& subtag) {
|
||||
MOZ_ASSERT(subtag.length() == TagLength - 1,
|
||||
"subtag must have the same length as the list of subtags");
|
||||
|
||||
const char* ptr = subtag.span().data();
|
||||
auto p = std::lower_bound(std::begin(subtags), std::end(subtags), ptr,
|
||||
[](const char* a, const char* b) {
|
||||
return memcmp(a, b, TagLength - 1) < 0;
|
||||
});
|
||||
return memcmp(a, b, TagLength - 1) < 0;
|
||||
});
|
||||
if (p != std::end(subtags) && memcmp(*p, ptr, TagLength - 1) == 0) {
|
||||
return aliases[std::distance(std::begin(subtags), p)];
|
||||
}
|
||||
@ -1689,23 +1695,32 @@ static bool IsAsciiLowercaseAlphanumericOrDash(char c) {
|
||||
}
|
||||
|
||||
static bool IsCanonicallyCasedLanguageTag(mozilla::Span<const char> span) {
|
||||
return std::all_of(span.begin(), span.end(),
|
||||
mozilla::IsAsciiLowercaseAlpha<char>);
|
||||
// Tell the analysis the |std::all_of| function can't GC.
|
||||
JS::AutoSuppressGCAnalysis nogc;
|
||||
|
||||
return std::all_of(span.begin(), span.end(), mozilla::IsAsciiLowercaseAlpha<char>);
|
||||
}
|
||||
|
||||
static bool IsCanonicallyCasedScriptTag(mozilla::Span<const char> span) {
|
||||
// Tell the analysis the |std::all_of| function can't GC.
|
||||
JS::AutoSuppressGCAnalysis nogc;
|
||||
|
||||
return mozilla::IsAsciiUppercaseAlpha(span[0]) &&
|
||||
std::all_of(span.begin() + 1, span.end(),
|
||||
mozilla::IsAsciiLowercaseAlpha<char>);
|
||||
std::all_of(span.begin() + 1, span.end(), mozilla::IsAsciiLowercaseAlpha<char>);
|
||||
}
|
||||
|
||||
static bool IsCanonicallyCasedRegionTag(mozilla::Span<const char> span) {
|
||||
return std::all_of(span.begin(), span.end(),
|
||||
mozilla::IsAsciiUppercaseAlpha<char>) ||
|
||||
// Tell the analysis the |std::all_of| function can't GC.
|
||||
JS::AutoSuppressGCAnalysis nogc;
|
||||
|
||||
return std::all_of(span.begin(), span.end(), mozilla::IsAsciiUppercaseAlpha<char>) ||
|
||||
std::all_of(span.begin(), span.end(), mozilla::IsAsciiDigit<char>);
|
||||
}
|
||||
|
||||
static bool IsCanonicallyCasedVariantTag(mozilla::Span<const char> span) {
|
||||
// Tell the analysis the |std::all_of| function can't GC.
|
||||
JS::AutoSuppressGCAnalysis nogc;
|
||||
|
||||
return std::all_of(span.begin(), span.end(), IsAsciiLowercaseAlphanumeric);
|
||||
}
|
||||
|
||||
@ -1714,8 +1729,7 @@ static bool IsCanonicallyCasedUnicodeKey(mozilla::Span<const char> key) {
|
||||
}
|
||||
|
||||
static bool IsCanonicallyCasedUnicodeType(mozilla::Span<const char> type) {
|
||||
return std::all_of(type.begin(), type.end(),
|
||||
IsAsciiLowercaseAlphanumericOrDash);
|
||||
return std::all_of(type.begin(), type.end(), IsAsciiLowercaseAlphanumericOrDash);
|
||||
}
|
||||
|
||||
static bool IsCanonicallyCasedTransformKey(mozilla::Span<const char> key) {
|
||||
@ -1723,8 +1737,7 @@ static bool IsCanonicallyCasedTransformKey(mozilla::Span<const char> key) {
|
||||
}
|
||||
|
||||
static bool IsCanonicallyCasedTransformType(mozilla::Span<const char> type) {
|
||||
return std::all_of(type.begin(), type.end(),
|
||||
IsAsciiLowercaseAlphanumericOrDash);
|
||||
return std::all_of(type.begin(), type.end(), IsAsciiLowercaseAlphanumericOrDash);
|
||||
}
|
||||
#endif
|
||||
""".rstrip()
|
||||
@ -2036,7 +2049,7 @@ def readCLDRVersionFromICU():
|
||||
|
||||
|
||||
def updateCLDRLangTags(args):
|
||||
""" Update the LocaleGenerated.cpp file. """
|
||||
""" Update the LanguageTagGenerated.cpp file. """
|
||||
version = args.version
|
||||
url = args.url
|
||||
out = args.out
|
||||
@ -3191,14 +3204,16 @@ def writeUnicodeExtensionsMappings(println, mapping, extension):
|
||||
println(
|
||||
"""
|
||||
template <size_t Length>
|
||||
static inline bool Is{0}Key(mozilla::Span<const char> key, const char (&str)[Length]) {{
|
||||
static inline bool Is{0}Key(
|
||||
mozilla::Span<const char> key, const char (&str)[Length]) {{
|
||||
static_assert(Length == {0}KeyLength + 1,
|
||||
"{0} extension key is two characters long");
|
||||
return memcmp(key.data(), str, Length - 1) == 0;
|
||||
}}
|
||||
|
||||
template <size_t Length>
|
||||
static inline bool Is{0}Type(mozilla::Span<const char> type, const char (&str)[Length]) {{
|
||||
static inline bool Is{0}Type(
|
||||
mozilla::Span<const char> type, const char (&str)[Length]) {{
|
||||
static_assert(Length > {0}KeyLength + 1,
|
||||
"{0} extension type contains more than two characters");
|
||||
return type.size() == (Length - 1) &&
|
||||
@ -3247,8 +3262,8 @@ static inline const char* Search{0}Replacement(
|
||||
|
||||
auto p = std::lower_bound(std::begin(types), std::end(types), type,
|
||||
[](const auto& a, const auto& b) {{
|
||||
return Compare{0}Type(a, b) < 0;
|
||||
}});
|
||||
return Compare{0}Type(a, b) < 0;
|
||||
}});
|
||||
if (p != std::end(types) && Compare{0}Type(*p, type) == 0) {{
|
||||
return aliases[std::distance(std::begin(types), p)];
|
||||
}}
|
||||
@ -3270,7 +3285,7 @@ static inline const char* Search{0}Replacement(
|
||||
* Spec: https://www.unicode.org/reports/tr35/#Unicode_Locale_Extension_Data_Files
|
||||
* Spec: https://www.unicode.org/reports/tr35/#t_Extension
|
||||
*/
|
||||
const char* mozilla::intl::Locale::replace{0}ExtensionType(
|
||||
const char* js::intl::LanguageTag::replace{0}ExtensionType(
|
||||
mozilla::Span<const char> key, mozilla::Span<const char> type) {{
|
||||
MOZ_ASSERT(key.size() == {0}KeyLength);
|
||||
MOZ_ASSERT(IsCanonicallyCased{0}Key(key));
|
||||
@ -3292,11 +3307,11 @@ const char* mozilla::intl::Locale::replace{0}ExtensionType(
|
||||
|
||||
for entries in grouper(subtags, max_entries):
|
||||
entries = (
|
||||
'"{}"'.format(tag).center(length + 2)
|
||||
'"{}"'.format(tag).rjust(length + 2)
|
||||
for tag in entries
|
||||
if tag is not None
|
||||
)
|
||||
println(" {},".format(", ".join(entries)))
|
||||
println(" {},".format(", ".join(entries)))
|
||||
|
||||
println(" };")
|
||||
|
||||
@ -4039,9 +4054,7 @@ if __name__ == "__main__":
|
||||
)
|
||||
parser_cldr_tags.add_argument(
|
||||
"--out",
|
||||
default=os.path.join(
|
||||
topsrcdir, "intl", "components", "src", "LocaleGenerated.cpp"
|
||||
),
|
||||
default="LanguageTagGenerated.cpp",
|
||||
help="Output file (default: %(default)s)",
|
||||
)
|
||||
parser_cldr_tags.add_argument(
|
||||
|
@ -478,6 +478,7 @@ if CONFIG["JS_HAS_INTL_API"]:
|
||||
"builtin/intl/DisplayNames.cpp",
|
||||
"builtin/intl/IntlObject.cpp",
|
||||
"builtin/intl/LanguageTag.cpp",
|
||||
"builtin/intl/LanguageTagGenerated.cpp",
|
||||
"builtin/intl/ListFormat.cpp",
|
||||
"builtin/intl/Locale.cpp",
|
||||
"builtin/intl/NumberFormat.cpp",
|
||||
|
Loading…
Reference in New Issue
Block a user