From 56c46d06a105fd121f7b55c6d43e11e6a5bf83f4 Mon Sep 17 00:00:00 2001 From: Butkovits Atila Date: Sat, 4 Dec 2021 00:58:15 +0200 Subject: [PATCH] Backed out 3 changesets (bug 1719554) for causing bustages complaining about gfxTextRun.cpp. Backed out changeset 6181e40d4da1 (bug 1719554) Backed out changeset c261ede6ae81 (bug 1719554) Backed out changeset 221ec418475c (bug 1719554) --- .clang-format-ignore | 2 +- dom/base/DirectionalityUtils.cpp | 9 +- dom/serializers/nsPlainTextSerializer.cpp | 5 +- gfx/thebes/gfxCoreTextShaper.cpp | 7 +- gfx/thebes/gfxFont.cpp | 10 +- gfx/thebes/gfxFont.h | 10 +- gfx/thebes/gfxFontEntry.cpp | 5 +- gfx/thebes/gfxFontEntry.h | 4 +- gfx/thebes/gfxHarfBuzzShaper.cpp | 15 +- gfx/thebes/gfxPlatform.h | 4 +- gfx/thebes/gfxPlatformFontList.cpp | 2 +- gfx/thebes/gfxPlatformFontList.h | 2 +- gfx/thebes/gfxScriptItemizer.cpp | 20 +- gfx/thebes/gfxScriptItemizer.h | 4 +- gfx/thebes/gfxTextRun.h | 9 +- intl/components/gtest/TestScript.cpp | 40 +-- intl/components/moz.build | 5 +- intl/components/src/BidiClass.h | 47 --- intl/components/src/Script.cpp | 40 +++ intl/components/src/Script.h | 55 ++++ intl/components/src/UnicodeProperties.h | 306 ------------------ intl/lwbrk/LineBreaker.cpp | 19 +- intl/lwbrk/WordBreaker.cpp | 8 +- .../tools/genUnicodePropertyData.pl | 40 ++- intl/unicharutil/util/moz.build | 1 + intl/unicharutil/util/nsBidiUtils.h | 59 +++- intl/unicharutil/util/nsUnicharUtils.cpp | 22 +- intl/unicharutil/util/nsUnicodeProperties.cpp | 3 +- intl/unicharutil/util/nsUnicodeProperties.h | 125 +++++-- .../util/nsUnicodePropertyData.cpp | 4 +- .../util/nsUnicodeScriptCodes.h} | 28 +- layout/base/nsBidiPresUtils.cpp | 98 +++--- layout/base/nsBidiPresUtils.h | 14 +- layout/generic/MathMLTextRunFactory.cpp | 6 +- layout/generic/nsTextFrame.cpp | 4 +- layout/mathml/nsMathMLChar.cpp | 4 +- netwerk/dns/nsIDNService.cpp | 14 +- netwerk/dns/nsIDNService.h | 5 +- toolkit/components/find/nsFind.cpp | 3 +- .../places/tests/gtest/test_casing.cpp | 6 +- tools/rewriting/Generated.txt | 2 +- 41 files changed, 479 insertions(+), 587 deletions(-) delete mode 100644 intl/components/src/BidiClass.h create mode 100644 intl/components/src/Script.cpp create mode 100644 intl/components/src/Script.h delete mode 100644 intl/components/src/UnicodeProperties.h rename intl/{components/src/UnicodeScriptCodes.h => unicharutil/util/nsUnicodeScriptCodes.h} (85%) diff --git a/.clang-format-ignore b/.clang-format-ignore index 65b9e2920315..89f4a0d65833 100644 --- a/.clang-format-ignore +++ b/.clang-format-ignore @@ -35,9 +35,9 @@ layout/style/nsStyleStructList.h gfx/gl/GLConsts.h gfx/webrender_bindings/webrender_ffi_generated.h dom/webgpu/ffi/wgpu_ffi_generated.h -intl/components/src/UnicodeScriptCodes.h intl/unicharutil/util/nsSpecialCasingData.cpp intl/unicharutil/util/nsUnicodePropertyData.cpp +intl/unicharutil/util/nsUnicodeScriptCodes.h media/mp4parse-rust/mp4parse.h security/manager/ssl/StaticHPKPins.h widget/gtk/wayland/gtk-primary-selection-client-protocol.h diff --git a/dom/base/DirectionalityUtils.cpp b/dom/base/DirectionalityUtils.cpp index bd6d2378c433..40c24454317d 100644 --- a/dom/base/DirectionalityUtils.cpp +++ b/dom/base/DirectionalityUtils.cpp @@ -215,7 +215,6 @@ #include "mozilla/dom/Element.h" #include "mozilla/dom/HTMLSlotElement.h" #include "mozilla/dom/ShadowRoot.h" -#include "mozilla/intl/UnicodeProperties.h" #include "nsUnicodeProperties.h" #include "nsTextFragment.h" #include "nsAttrValue.h" @@ -304,12 +303,12 @@ static bool DoesNotAffectDirectionOfAncestors(const Element* aElement) { * Returns the directionality of a Unicode character */ static Directionality GetDirectionFromChar(uint32_t ch) { - switch (intl::UnicodeProperties::GetBidiClass(ch)) { - case intl::BidiClass::RightToLeft: - case intl::BidiClass::RightToLeftArabic: + switch (mozilla::unicode::GetBidiCat(ch)) { + case eCharType_RightToLeft: + case eCharType_RightToLeftArabic: return eDir_RTL; - case intl::BidiClass::LeftToRight: + case eCharType_LeftToRight: return eDir_LTR; default: diff --git a/dom/serializers/nsPlainTextSerializer.cpp b/dom/serializers/nsPlainTextSerializer.cpp index 508bef3c152f..318e93517504 100644 --- a/dom/serializers/nsPlainTextSerializer.cpp +++ b/dom/serializers/nsPlainTextSerializer.cpp @@ -22,6 +22,7 @@ #include "nsContentUtils.h" #include "nsReadableUtils.h" #include "nsUnicharUtils.h" +#include "nsUnicodeProperties.h" #include "nsCRT.h" #include "mozilla/Casting.h" #include "mozilla/EditorUtils.h" @@ -30,8 +31,6 @@ #include "mozilla/dom/HTMLBRElement.h" #include "mozilla/dom/Text.h" #include "mozilla/intl/Segmenter.h" -#include "mozilla/intl/UnicodeProperties.h" -#include "nsUnicodeProperties.h" #include "mozilla/Span.h" #include "mozilla/Preferences.h" #include "mozilla/StaticPrefs_converter.h" @@ -1804,7 +1803,7 @@ int32_t GetUnicharWidth(char32_t aCh) { return 1; } - return intl::UnicodeProperties::IsEastAsianWidthFW(aCh) ? 2 : 1; + return unicode::IsEastAsianWidthFW(aCh) ? 2 : 1; } int32_t GetUnicharStringWidth(Span aString) { diff --git a/gfx/thebes/gfxCoreTextShaper.cpp b/gfx/thebes/gfxCoreTextShaper.cpp index 2d9ac9fca00f..0e24337bf47e 100644 --- a/gfx/thebes/gfxCoreTextShaper.cpp +++ b/gfx/thebes/gfxCoreTextShaper.cpp @@ -69,9 +69,10 @@ gfxCoreTextShaper::~gfxCoreTextShaper() { } } -static bool IsBuggyIndicScript(intl::Script aScript) { - return aScript == intl::Script::BENGALI || aScript == intl::Script::KANNADA || - aScript == intl::Script::ORIYA || aScript == intl::Script::KHMER; +static bool IsBuggyIndicScript(unicode::Script aScript) { + return aScript == unicode::Script::BENGALI || + aScript == unicode::Script::KANNADA || + aScript == unicode::Script::ORIYA || aScript == unicode::Script::KHMER; } bool gfxCoreTextShaper::ShapeText(DrawTarget* aDrawTarget, diff --git a/gfx/thebes/gfxFont.cpp b/gfx/thebes/gfxFont.cpp index 2539544eb325..c11c8a4d28c3 100644 --- a/gfx/thebes/gfxFont.cpp +++ b/gfx/thebes/gfxFont.cpp @@ -1105,10 +1105,10 @@ static void HasLookupRuleWithGlyph(hb_face_t* aFace, hb_tag_t aTableTag, hb_set_destroy(otherLookups); } -nsTHashMap* gfxFont::sScriptTagToCode = nullptr; +nsTHashMap* gfxFont::sScriptTagToCode = nullptr; nsTHashSet* gfxFont::sDefaultFeatures = nullptr; -static inline bool HasSubstitution(uint32_t* aBitVector, intl::Script aScript) { +static inline bool HasSubstitution(uint32_t* aBitVector, Script aScript) { return (aBitVector[static_cast(aScript) >> 5] & (1 << (static_cast(aScript) & 0x1f))) != 0; } @@ -1165,9 +1165,9 @@ void gfxFont::CheckForFeaturesInvolvingSpace() { // Ensure that we don't try to look at script codes beyond what the // current version of ICU (at runtime -- in case of system ICU) // knows about. - Script scriptCount = Script( - std::min(intl::UnicodeProperties::GetMaxNumberOfScripts() + 1, - int(Script::NUM_SCRIPT_CODES))); + Script scriptCount = + Script(std::min(u_getIntPropertyMaxValue(UCHAR_SCRIPT) + 1, + int(Script::NUM_SCRIPT_CODES))); for (Script s = Script::ARABIC; s < scriptCount; s = Script(static_cast(s) + 1)) { hb_script_t script = hb_script_t(GetScriptTagForCode(s)); diff --git a/gfx/thebes/gfxFont.h b/gfx/thebes/gfxFont.h index 64d2b4d1cc7e..7f118f457da2 100644 --- a/gfx/thebes/gfxFont.h +++ b/gfx/thebes/gfxFont.h @@ -24,7 +24,6 @@ #include "mozilla/UniquePtr.h" #include "mozilla/gfx/MatrixFwd.h" #include "mozilla/gfx/Point.h" -#include "mozilla/intl/UnicodeScriptCodes.h" #include "nsCOMPtr.h" #include "nsColor.h" #include "nsTHashMap.h" @@ -38,6 +37,7 @@ #include "nsString.h" #include "nsTArray.h" #include "nsTHashtable.h" +#include "nsUnicodeScriptCodes.h" #include "nscore.h" // Only required for function bodys @@ -672,7 +672,7 @@ class gfxTextRunFactory { class gfxFontShaper { public: typedef mozilla::gfx::DrawTarget DrawTarget; - typedef mozilla::intl::Script Script; + typedef mozilla::unicode::Script Script; enum class RoundingFlags : uint8_t { kRoundX = 0x01, kRoundY = 0x02 }; @@ -731,7 +731,7 @@ MOZ_MAKE_ENUM_CLASS_BITWISE_OPERATORS(gfxFontShaper::RoundingFlags) */ class gfxShapedText { public: - typedef mozilla::intl::Script Script; + typedef mozilla::unicode::Script Script; gfxShapedText(uint32_t aLength, mozilla::gfx::ShapedTextFlags aFlags, uint16_t aAppUnitsPerDevUnit) @@ -1261,7 +1261,7 @@ class gfxShapedText { */ class gfxShapedWord final : public gfxShapedText { public: - typedef mozilla::intl::Script Script; + typedef mozilla::unicode::Script Script; // Create a ShapedWord that can hold glyphs for aLength characters, // with mCharacterGlyphs sized appropriately. @@ -1426,7 +1426,7 @@ class gfxFont { protected: using DrawTarget = mozilla::gfx::DrawTarget; - using Script = mozilla::intl::Script; + using Script = mozilla::unicode::Script; using SVGContextPaint = mozilla::SVGContextPaint; using RoundingFlags = gfxFontShaper::RoundingFlags; diff --git a/gfx/thebes/gfxFontEntry.cpp b/gfx/thebes/gfxFontEntry.cpp index b2abace1e568..c65c99dbf2dd 100644 --- a/gfx/thebes/gfxFontEntry.cpp +++ b/gfx/thebes/gfxFontEntry.cpp @@ -814,7 +814,7 @@ tainted_boolean_hint gfxFontEntry::HasGraphiteSpaceContextuals() { #define FEATURE_SCRIPT_MASK 0x000000ff // script index replaces low byte of tag -static_assert(int(intl::Script::NUM_SCRIPT_CODES) <= FEATURE_SCRIPT_MASK, +static_assert(int(Script::NUM_SCRIPT_CODES) <= FEATURE_SCRIPT_MASK, "Too many script codes"); // high-order three bytes of tag with script in low-order byte @@ -1780,8 +1780,7 @@ void gfxFontFamily::FindFontForChar(GlobalFontMatch* aMatchData) { LogModule* log = gfxPlatform::GetLog(eGfxLog_textrun); if (MOZ_UNLIKELY(MOZ_LOG_TEST(log, LogLevel::Debug))) { - intl::Script script = - intl::UnicodeProperties::GetScriptCode(aMatchData->mCh); + Script script = GetScriptCode(aMatchData->mCh); MOZ_LOG(log, LogLevel::Debug, ("(textrun-systemfallback-fonts) char: u+%6.6x " "script: %d match: [%s]\n", diff --git a/gfx/thebes/gfxFontEntry.h b/gfx/thebes/gfxFontEntry.h index 8d29fc38cfc3..33eb8d83ce44 100644 --- a/gfx/thebes/gfxFontEntry.h +++ b/gfx/thebes/gfxFontEntry.h @@ -23,13 +23,13 @@ #include "mozilla/RefPtr.h" #include "mozilla/TypedEnumBits.h" #include "mozilla/UniquePtr.h" -#include "mozilla/intl/UnicodeScriptCodes.h" #include "nsTHashMap.h" #include "nsDebug.h" #include "nsHashKeys.h" #include "nsISupports.h" #include "nsStringFwd.h" #include "nsTArray.h" +#include "nsUnicodeScriptCodes.h" #include "nscore.h" class FontInfoData; @@ -131,7 +131,7 @@ struct gfxFontFeatureInfo { class gfxFontEntry { public: typedef mozilla::gfx::DrawTarget DrawTarget; - typedef mozilla::intl::Script Script; + typedef mozilla::unicode::Script Script; typedef mozilla::FontWeight FontWeight; typedef mozilla::FontSlantStyle FontSlantStyle; typedef mozilla::FontStretch FontStretch; diff --git a/gfx/thebes/gfxHarfBuzzShaper.cpp b/gfx/thebes/gfxHarfBuzzShaper.cpp index e0e2c7b257c4..e80a7d1d6a76 100644 --- a/gfx/thebes/gfxHarfBuzzShaper.cpp +++ b/gfx/thebes/gfxHarfBuzzShaper.cpp @@ -11,9 +11,8 @@ #include "gfxTextRun.h" #include "mozilla/Sprintf.h" #include "mozilla/intl/String.h" -#include "mozilla/intl/UnicodeProperties.h" -#include "mozilla/intl/UnicodeScriptCodes.h" #include "nsUnicodeProperties.h" +#include "nsUnicodeScriptCodes.h" #include "harfbuzz/hb.h" #include "harfbuzz/hb-ot.h" @@ -982,7 +981,7 @@ static hb_position_t HBGetHKerning(hb_font_t* font, void* font_data, static hb_codepoint_t HBGetMirroring(hb_unicode_funcs_t* ufuncs, hb_codepoint_t aCh, void* user_data) { - return intl::UnicodeProperties::CharMirror(aCh); + return GetMirroredChar(aCh); } static hb_unicode_general_category_t HBGetGeneralCategory( @@ -992,20 +991,18 @@ static hb_unicode_general_category_t HBGetGeneralCategory( static hb_script_t HBGetScript(hb_unicode_funcs_t* ufuncs, hb_codepoint_t aCh, void* user_data) { - return hb_script_t( - GetScriptTagForCode(intl::UnicodeProperties::GetScriptCode(aCh))); + return hb_script_t(GetScriptTagForCode(GetScriptCode(aCh))); } static hb_unicode_combining_class_t HBGetCombiningClass( hb_unicode_funcs_t* ufuncs, hb_codepoint_t aCh, void* user_data) { - return hb_unicode_combining_class_t( - intl::UnicodeProperties::GetCombiningClass(aCh)); + return hb_unicode_combining_class_t(GetCombiningClass(aCh)); } static hb_bool_t HBUnicodeCompose(hb_unicode_funcs_t* ufuncs, hb_codepoint_t a, hb_codepoint_t b, hb_codepoint_t* ab, void* user_data) { - char32_t ch = intl::String::ComposePairNFC(a, b); + char32_t ch = mozilla::intl::String::ComposePairNFC(a, b); if (ch > 0) { *ab = ch; return true; @@ -1028,7 +1025,7 @@ static hb_bool_t HBUnicodeDecompose(hb_unicode_funcs_t* ufuncs, #endif char32_t decomp[2] = {0}; - if (intl::String::DecomposeRawNFD(ab, decomp)) { + if (mozilla::intl::String::DecomposeRawNFD(ab, decomp)) { if (decomp[1] || decomp[0] != ab) { *a = decomp[0]; *b = decomp[1]; diff --git a/gfx/thebes/gfxPlatform.h b/gfx/thebes/gfxPlatform.h index 5ed5239cb884..c81e3693946d 100644 --- a/gfx/thebes/gfxPlatform.h +++ b/gfx/thebes/gfxPlatform.h @@ -8,10 +8,10 @@ #include "mozilla/FontPropertyTypes.h" #include "mozilla/gfx/Types.h" -#include "mozilla/intl/UnicodeScriptCodes.h" #include "nsTArray.h" #include "nsString.h" #include "nsCOMPtr.h" +#include "nsUnicodeScriptCodes.h" #include "gfxTelemetry.h" #include "gfxTypes.h" @@ -186,7 +186,7 @@ class gfxPlatform : public mozilla::layers::MemoryPressureListener { typedef mozilla::gfx::DrawTarget DrawTarget; typedef mozilla::gfx::IntSize IntSize; typedef mozilla::gfx::SourceSurface SourceSurface; - typedef mozilla::intl::Script Script; + typedef mozilla::unicode::Script Script; /** * Return a pointer to the current active platform. diff --git a/gfx/thebes/gfxPlatformFontList.cpp b/gfx/thebes/gfxPlatformFontList.cpp index 1dbb05a6b438..462caa9508e3 100644 --- a/gfx/thebes/gfxPlatformFontList.cpp +++ b/gfx/thebes/gfxPlatformFontList.cpp @@ -975,7 +975,7 @@ gfxFont* gfxPlatformFontList::SystemFindFontForChar( LogModule* log = gfxPlatform::GetLog(eGfxLog_textrun); if (MOZ_UNLIKELY(MOZ_LOG_TEST(log, LogLevel::Warning))) { - Script script = intl::UnicodeProperties::GetScriptCode(aCh); + Script script = mozilla::unicode::GetScriptCode(aCh); MOZ_LOG(log, LogLevel::Warning, ("(textrun-systemfallback-%s) char: u+%6.6x " "script: %d match: [%s]" diff --git a/gfx/thebes/gfxPlatformFontList.h b/gfx/thebes/gfxPlatformFontList.h index 649483160511..c5ba279e741a 100644 --- a/gfx/thebes/gfxPlatformFontList.h +++ b/gfx/thebes/gfxPlatformFontList.h @@ -162,7 +162,7 @@ class gfxPlatformFontList : public gfxFontInfoLoader { typedef mozilla::StretchRange StretchRange; typedef mozilla::SlantStyleRange SlantStyleRange; typedef mozilla::WeightRange WeightRange; - typedef mozilla::intl::Script Script; + typedef mozilla::unicode::Script Script; // For font family lists loaded from user preferences (prefs such as // font.name-list..) that map CSS generics to diff --git a/gfx/thebes/gfxScriptItemizer.cpp b/gfx/thebes/gfxScriptItemizer.cpp index 8f78bd7421eb..fe85f1e87aa1 100644 --- a/gfx/thebes/gfxScriptItemizer.cpp +++ b/gfx/thebes/gfxScriptItemizer.cpp @@ -48,12 +48,11 @@ */ #include "gfxScriptItemizer.h" -#include "mozilla/intl/UnicodeProperties.h" -#include "nsCharTraits.h" +#include "mozilla/intl/Script.h" #include "nsUnicodeProperties.h" +#include "nsCharTraits.h" #include "harfbuzz/hb.h" -using namespace mozilla::intl; using namespace mozilla::unicode; #define MOD(sp) ((sp) % PAREN_STACK_DEPTH) @@ -117,8 +116,7 @@ static inline bool SameScript(Script runScript, Script currCharScript, uint32_t aCurrCh) { return CanMergeWithContext(runScript) || CanMergeWithContext(currCharScript) || currCharScript == runScript || - IsClusterExtender(aCurrCh) || - UnicodeProperties::HasScript(aCurrCh, runScript); + IsClusterExtender(aCurrCh) || HasScript(aCurrCh, runScript); } gfxScriptItemizer::gfxScriptItemizer(const char16_t* src, uint32_t length) @@ -164,7 +162,7 @@ bool gfxScriptItemizer::Next(uint32_t& aRunStart, uint32_t& aRunLimit, // if the character has script=COMMON, otherwise we don't care. uint8_t gc = HB_UNICODE_GENERAL_CATEGORY_UNASSIGNED; - sc = UnicodeProperties::GetScriptCode(ch); + sc = GetScriptCode(ch); if (sc == Script::COMMON) { /* * Paired character handling: @@ -179,12 +177,12 @@ bool gfxScriptItemizer::Next(uint32_t& aRunStart, uint32_t& aRunLimit, */ gc = GetGeneralCategory(ch); if (gc == HB_UNICODE_GENERAL_CATEGORY_OPEN_PUNCTUATION) { - uint32_t endPairChar = UnicodeProperties::CharMirror(ch); + uint32_t endPairChar = mozilla::unicode::GetMirroredChar(ch); if (endPairChar != ch) { push(endPairChar, scriptCode); } } else if (gc == HB_UNICODE_GENERAL_CATEGORY_CLOSE_PUNCTUATION && - UnicodeProperties::IsMirrored(ch)) { + HasMirroredChar(ch)) { while (STACK_IS_NOT_EMPTY() && TOP().endPairChar != ch) { pop(); } @@ -206,8 +204,8 @@ bool gfxScriptItemizer::Next(uint32_t& aRunStart, uint32_t& aRunLimit, } else if (fallbackScript == Script::UNKNOWN) { // See if the character has a ScriptExtensions property we can // store for use in the event the run remains unresolved. - UnicodeProperties::ScriptExtensionVector extensions; - auto extResult = UnicodeProperties::GetExtensions(ch, extensions); + mozilla::intl::ScriptExtensionVector extensions; + auto extResult = mozilla::intl::Script::GetExtensions(ch, extensions); if (extResult.isOk()) { Script ext = Script(extensions[0]); if (!CanMergeWithContext(ext)) { @@ -222,7 +220,7 @@ bool gfxScriptItemizer::Next(uint32_t& aRunStart, uint32_t& aRunLimit, * pop the matching open character from the stack */ if (gc == HB_UNICODE_GENERAL_CATEGORY_CLOSE_PUNCTUATION && - UnicodeProperties::IsMirrored(ch)) { + HasMirroredChar(ch)) { pop(); } } else { diff --git a/gfx/thebes/gfxScriptItemizer.h b/gfx/thebes/gfxScriptItemizer.h index 6deb37e19cba..b218f5c8d2cf 100644 --- a/gfx/thebes/gfxScriptItemizer.h +++ b/gfx/thebes/gfxScriptItemizer.h @@ -51,13 +51,13 @@ #define GFX_SCRIPTITEMIZER_H #include -#include "mozilla/intl/UnicodeScriptCodes.h" +#include "nsUnicodeScriptCodes.h" #define PAREN_STACK_DEPTH 32 class gfxScriptItemizer { public: - typedef mozilla::intl::Script Script; + typedef mozilla::unicode::Script Script; gfxScriptItemizer(const char16_t* src, uint32_t length); diff --git a/gfx/thebes/gfxTextRun.h b/gfx/thebes/gfxTextRun.h index e59b85ca6c0f..95ab7d9c084c 100644 --- a/gfx/thebes/gfxTextRun.h +++ b/gfx/thebes/gfxTextRun.h @@ -19,7 +19,6 @@ #include "gfxUserFontSet.h" #include "mozilla/MemoryReporting.h" #include "mozilla/RefPtr.h" -#include "mozilla/intl/UnicodeScriptCodes.h" #include "nsPoint.h" #include "nsString.h" #include "nsTArray.h" @@ -27,6 +26,7 @@ #include "nsTextFrameUtils.h" #include "DrawMode.h" #include "harfbuzz/hb.h" +#include "nsUnicodeScriptCodes.h" #include "nsColor.h" #include "nsFrameList.h" #include "X11UndefineNone.h" @@ -901,7 +901,7 @@ class gfxTextRun : public gfxShapedText { class gfxFontGroup final : public gfxTextRunFactory { public: - typedef mozilla::intl::Script Script; + typedef mozilla::unicode::Script Script; typedef gfxShapedText::CompressedGlyph CompressedGlyph; static void @@ -1508,7 +1508,7 @@ class gfxMissingFontRecorder { } // record this script code in our mMissingFonts bitset - void RecordScript(mozilla::intl::Script aScriptCode) { + void RecordScript(mozilla::unicode::Script aScriptCode) { mMissingFonts[static_cast(aScriptCode) >> 5] |= (1 << (static_cast(aScriptCode) & 0x1f)); } @@ -1524,7 +1524,8 @@ class gfxMissingFontRecorder { private: // Number of 32-bit words needed for the missing-script flags static const uint32_t kNumScriptBitsWords = - ((static_cast(mozilla::intl::Script::NUM_SCRIPT_CODES) + 31) / 32); + ((static_cast(mozilla::unicode::Script::NUM_SCRIPT_CODES) + 31) / + 32); uint32_t mMissingFonts[kNumScriptBitsWords]; }; diff --git a/intl/components/gtest/TestScript.cpp b/intl/components/gtest/TestScript.cpp index 72d8cd108722..280028a48d97 100644 --- a/intl/components/gtest/TestScript.cpp +++ b/intl/components/gtest/TestScript.cpp @@ -3,60 +3,60 @@ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ #include "gtest/gtest.h" -#include "mozilla/intl/UnicodeProperties.h" -#include "mozilla/intl/UnicodeScriptCodes.h" +#include "mozilla/intl/Script.h" +#include "nsUnicodeScriptCodes.h" namespace mozilla::intl { TEST(IntlScript, GetExtensions) { - UnicodeProperties::ScriptExtensionVector extensions; + ScriptExtensionVector extensions; // 0x0000..0x0040 are Common. for (char32_t ch = 0; ch < 0x0041; ch++) { - ASSERT_TRUE(UnicodeProperties::GetExtensions(ch, extensions).isOk()); + ASSERT_TRUE(Script::GetExtensions(ch, extensions).isOk()); ASSERT_EQ(extensions.length(), 1u); - ASSERT_EQ(Script(extensions[0]), Script::COMMON); + ASSERT_EQ(unicode::Script(extensions[0]), unicode::Script::COMMON); } // 0x0300..0x0341 are Inherited. for (char32_t ch = 0x300; ch < 0x0341; ch++) { - ASSERT_TRUE(UnicodeProperties::GetExtensions(ch, extensions).isOk()); + ASSERT_TRUE(Script::GetExtensions(ch, extensions).isOk()); ASSERT_EQ(extensions.length(), 1u); - ASSERT_EQ(Script(extensions[0]), Script::INHERITED); + ASSERT_EQ(unicode::Script(extensions[0]), unicode::Script::INHERITED); } // 0x1cf7's script code is Common, but its script extension is Beng. - ASSERT_TRUE(UnicodeProperties::GetExtensions(0x1cf7, extensions).isOk()); + ASSERT_TRUE(Script::GetExtensions(0x1cf7, extensions).isOk()); ASSERT_EQ(extensions.length(), 1u); - ASSERT_EQ(Script(extensions[0]), Script::BENGALI); + ASSERT_EQ(unicode::Script(extensions[0]), unicode::Script::BENGALI); // ؿ // https://unicode-table.com/en/063F/ // This character doesn't have any script extension, so the script code is // returned. - ASSERT_TRUE(UnicodeProperties::GetExtensions(0x063f, extensions).isOk()); + ASSERT_TRUE(Script::GetExtensions(0x063f, extensions).isOk()); ASSERT_EQ(extensions.length(), 1u); - ASSERT_EQ(Script(extensions[0]), Script::ARABIC); + ASSERT_EQ(unicode::Script(extensions[0]), unicode::Script::ARABIC); // 0xff65 is the unicode character '・', see https://unicode-table.com/en/FF65/ // Halfwidth Katakana Middle Dot. - ASSERT_TRUE(UnicodeProperties::GetExtensions(0xff65, extensions).isOk()); + ASSERT_TRUE(Script::GetExtensions(0xff65, extensions).isOk()); // 0xff65 should have the following script extensions: // Bopo Hang Hani Hira Kana Yiii. ASSERT_EQ(extensions.length(), 6u); - ASSERT_EQ(Script(extensions[0]), Script::BOPOMOFO); - ASSERT_EQ(Script(extensions[1]), Script::HAN); - ASSERT_EQ(Script(extensions[2]), Script::HANGUL); - ASSERT_EQ(Script(extensions[3]), Script::HIRAGANA); - ASSERT_EQ(Script(extensions[4]), Script::KATAKANA); - ASSERT_EQ(Script(extensions[5]), Script::YI); + ASSERT_EQ(unicode::Script(extensions[0]), unicode::Script::BOPOMOFO); + ASSERT_EQ(unicode::Script(extensions[1]), unicode::Script::HAN); + ASSERT_EQ(unicode::Script(extensions[2]), unicode::Script::HANGUL); + ASSERT_EQ(unicode::Script(extensions[3]), unicode::Script::HIRAGANA); + ASSERT_EQ(unicode::Script(extensions[4]), unicode::Script::KATAKANA); + ASSERT_EQ(unicode::Script(extensions[5]), unicode::Script::YI); // The max code point is 0x10ffff, so 0x110000 should be invalid. // Script::UNKNOWN should be returned for an invalid code point. - ASSERT_TRUE(UnicodeProperties::GetExtensions(0x110000, extensions).isOk()); + ASSERT_TRUE(Script::GetExtensions(0x110000, extensions).isOk()); ASSERT_EQ(extensions.length(), 1u); - ASSERT_EQ(Script(extensions[0]), Script::UNKNOWN); + ASSERT_EQ(unicode::Script(extensions[0]), unicode::Script::UNKNOWN); } } // namespace mozilla::intl diff --git a/intl/components/moz.build b/intl/components/moz.build index 494cbc5ecd7f..4f0a5328bc1e 100644 --- a/intl/components/moz.build +++ b/intl/components/moz.build @@ -5,7 +5,6 @@ # file, You can obtain one at http://mozilla.org/MPL/2.0/. EXPORTS.mozilla.intl = [ "src/Bidi.h", - "src/BidiClass.h", "src/BidiEmbeddingLevel.h", "src/Calendar.h", "src/Collator.h", @@ -28,10 +27,9 @@ EXPORTS.mozilla.intl = [ "src/NumberRangeFormat.h", "src/PluralRules.h", "src/RelativeTimeFormat.h", + "src/Script.h", "src/String.h", "src/TimeZone.h", - "src/UnicodeProperties.h", - "src/UnicodeScriptCodes.h", ] UNIFIED_SOURCES += [ @@ -58,6 +56,7 @@ UNIFIED_SOURCES += [ "src/NumberRangeFormat.cpp", "src/PluralRules.cpp", "src/RelativeTimeFormat.cpp", + "src/Script.cpp", "src/String.cpp", "src/TimeZone.cpp", ] diff --git a/intl/components/src/BidiClass.h b/intl/components/src/BidiClass.h deleted file mode 100644 index f4d31e9e95f3..000000000000 --- a/intl/components/src/BidiClass.h +++ /dev/null @@ -1,47 +0,0 @@ -/* This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ -#ifndef intl_components_BidiClass_h_ -#define intl_components_BidiClass_h_ - -namespace mozilla::intl { - -/** - * Read ftp://ftp.unicode.org/Public/UNIDATA/ReadMe-Latest.txt - * section BIDIRECTIONAL PROPERTIES - * for the detailed definition of the following categories - * - * The values here must match the equivalents in %bidicategorycode in - * mozilla/intl/unicharutil/tools/genUnicodePropertyData.pl, - * and must also match the values used by ICU's UCharDirection. - */ -enum class BidiClass : uint8_t { - LeftToRight = 0, - RightToLeft = 1, - EuropeanNumber = 2, - EuropeanNumberSeparator = 3, - EuropeanNumberTerminator = 4, - ArabicNumber = 5, - CommonNumberSeparator = 6, - BlockSeparator = 7, - SegmentSeparator = 8, - WhiteSpaceNeutral = 9, - OtherNeutral = 10, - LeftToRightEmbedding = 11, - LeftToRightOverride = 12, - RightToLeftArabic = 13, - RightToLeftEmbedding = 14, - RightToLeftOverride = 15, - PopDirectionalFormat = 16, - DirNonSpacingMark = 17, - BoundaryNeutral = 18, - FirstStrongIsolate = 19, - LeftToRightIsolate = 20, - RightToLeftIsolate = 21, - PopDirectionalIsolate = 22, - BidiClassCount -}; - -} // namespace mozilla::intl - -#endif diff --git a/intl/components/src/Script.cpp b/intl/components/src/Script.cpp new file mode 100644 index 000000000000..31776f782a36 --- /dev/null +++ b/intl/components/src/Script.cpp @@ -0,0 +1,40 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "mozilla/intl/Script.h" + +#include "unicode/uscript.h" + +namespace mozilla::intl { + +// static +ICUResult Script::GetExtensions(char32_t aCodePoint, + ScriptExtensionVector& aExtensions) { + // Clear the vector first. + aExtensions.clear(); + + // We cannot pass aExtensions to uscript_getScriptExtension as USCriptCode + // takes 4 bytes, so create a local UScriptCode array to get the extensions. + UScriptCode ext[kMaxScripts]; + UErrorCode status = U_ZERO_ERROR; + int32_t len = uscript_getScriptExtensions(static_cast(aCodePoint), + ext, kMaxScripts, &status); + if (U_FAILURE(status)) { + // kMaxScripts should be large enough to hold the maximun number of script + // extensions. + MOZ_DIAGNOSTIC_ASSERT(status != U_BUFFER_OVERFLOW_ERROR); + return Err(ToICUError(status)); + } + + if (!aExtensions.reserve(len)) { + return Err(ICUError::OutOfMemory); + } + + for (int32_t i = 0; i < len; i++) { + aExtensions.infallibleAppend(ext[i]); + } + + return Ok(); +} +} // namespace mozilla::intl diff --git a/intl/components/src/Script.h b/intl/components/src/Script.h new file mode 100644 index 000000000000..fd56c47ae278 --- /dev/null +++ b/intl/components/src/Script.h @@ -0,0 +1,55 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +#ifndef intl_components_Script_h_ +#define intl_components_Script_h_ + +#include "mozilla/intl/ICU4CGlue.h" +#include "mozilla/Vector.h" + +namespace mozilla::intl { + +// The code point which has the most script extensions is 0x0965, which has 21 +// script extensions, so choose the vector size as 32 to prevent heap +// allocation. +constexpr size_t kMaxScripts = 32; + +// The list of script extensions, it consists of one or more script codes from +// ISO 15924, or mozilla::unicode::Script. +// +// Choose the element type as int16_t to have the same size of +// mozilla::unicode::Script. +// We didn't use mozilla::unicode::Script directly here because we cannot +// include the header in standalone JS shell build. +using ScriptExtensionVector = Vector; + +/** + * This component is a Mozilla-focused API for working with Unicode scripts. + */ +class Script final { + public: + /** + * Get the script extensions for the given code point, and write the script + * extensions to aExtensions vector. If the code point has script extensions, + * the script code (Script::COMMON or Script::INHERITED) will be excluded. + * + * If the code point doesn't have any script extension, then its script code + * will be written to aExtensions vector. + * + * If the code point is invalid, Script::UNKNOWN will be written to + * aExtensions vector. + * + * Note: aExtensions will be cleared after calling this method regardless of + * failure. + * + * See [1] for the script code of the code point, [2] for the script + * extensions. + * + * https://www.unicode.org/Public/UNIDATA/Scripts.txt + * https://www.unicode.org/Public/UNIDATA/ScriptExtensions.txt + */ + static ICUResult GetExtensions(char32_t aCodePoint, + ScriptExtensionVector& aExtensions); +}; +} // namespace mozilla::intl +#endif // intl_components_Script_h_ diff --git a/intl/components/src/UnicodeProperties.h b/intl/components/src/UnicodeProperties.h deleted file mode 100644 index 785bc356f8e6..000000000000 --- a/intl/components/src/UnicodeProperties.h +++ /dev/null @@ -1,306 +0,0 @@ -/* This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ -#ifndef intl_components_UnicodeProperties_h_ -#define intl_components_UnicodeProperties_h_ - -#include "mozilla/intl/BidiClass.h" -#include "mozilla/intl/ICU4CGlue.h" -#include "mozilla/intl/UnicodeScriptCodes.h" -#include "mozilla/Vector.h" - -#include "unicode/uchar.h" -#include "unicode/uscript.h" - -namespace mozilla::intl { - -/** - * This component is a Mozilla-focused API for working with text properties. - */ -class UnicodeProperties final { - public: - /** - * Return the BidiClass for the character. - */ - static inline BidiClass GetBidiClass(uint32_t aCh) { - return BidiClass(u_charDirection(aCh)); - } - - /** - * Maps the specified character to a "mirror-image" character. - */ - static inline uint32_t CharMirror(uint32_t aCh) { return u_charMirror(aCh); } - - /** - * Return the general category value for the code point. - */ - static inline uint32_t CharType(uint32_t aCh) { return u_charType(aCh); } - - /** - * Determine whether the code point has the Bidi_Mirrored property. - */ - static inline bool IsMirrored(uint32_t aCh) { return u_isMirrored(aCh); } - - /** - * Returns the combining class of the code point as specified in - * UnicodeData.txt. - */ - static inline uint8_t GetCombiningClass(uint32_t aCh) { - return u_getCombiningClass(aCh); - } - - enum class IntProperty { - BidiPairedBracketType, - EastAsianWidth, - HangulSyllableType, - LineBreak, - NumericType, - }; - - /** - * Get the property value for an enumerated or integer Unicode property for a - * code point. - */ - static inline int32_t GetIntPropertyValue(uint32_t aCh, IntProperty aProp) { - UProperty prop; - switch (aProp) { - case IntProperty::BidiPairedBracketType: - prop = UCHAR_BIDI_PAIRED_BRACKET_TYPE; - break; - case IntProperty::EastAsianWidth: - prop = UCHAR_EAST_ASIAN_WIDTH; - break; - case IntProperty::HangulSyllableType: - prop = UCHAR_HANGUL_SYLLABLE_TYPE; - break; - case IntProperty::LineBreak: - prop = UCHAR_LINE_BREAK; - break; - case IntProperty::NumericType: - prop = UCHAR_NUMERIC_TYPE; - break; - } - return u_getIntPropertyValue(aCh, prop); - } - - /** - * Get the numeric value for a Unicode code point as defined in the - * Unicode Character Database if the input is decimal or a digit, - * otherwise, returns -1. - */ - static inline int8_t GetNumericValue(uint32_t aCh) { - UNumericType type = - UNumericType(GetIntPropertyValue(aCh, IntProperty::NumericType)); - return type == U_NT_DECIMAL || type == U_NT_DIGIT - ? int8_t(u_getNumericValue(aCh)) - : -1; - } - - /** - * Maps the specified character to its paired bracket character. - */ - static inline uint32_t GetBidiPairedBracket(uint32_t aCh) { - return u_getBidiPairedBracket(aCh); - } - - /** - * The given character is mapped to its uppercase equivalent according to - * UnicodeData.txt; if the character has no uppercase equivalent, the - * character itself is returned. - */ - static inline uint32_t ToUpper(uint32_t aCh) { return u_toupper(aCh); } - - /** - * The given character is mapped to its lowercase equivalent according to - * UnicodeData.txt; if the character has no lowercase equivalent, the - * character itself is returned. - */ - static inline uint32_t ToLower(uint32_t aCh) { return u_tolower(aCh); } - - /** - * Check if a code point has the Lowercase Unicode property. - */ - static inline bool IsLowercase(uint32_t aCh) { return u_isULowercase(aCh); } - - /** - * The given character is mapped to its titlecase equivalent according to - * UnicodeData.txt; if the character has no titlecase equivalent, the - * character itself is returned. - */ - static inline uint32_t ToTitle(uint32_t aCh) { return u_totitle(aCh); } - - /** - * The given character is mapped to its case folding equivalent according to - * UnicodeData.txt and CaseFolding.txt; - * if the character has no case folding equivalent, the character - * itself is returned. - */ - static inline uint32_t FoldCase(uint32_t aCh) { - return u_foldCase(aCh, U_FOLD_CASE_DEFAULT); - } - - enum class BinaryProperty { - DefaultIgnorableCodePoint, - Emoji, - EmojiPresentation, - }; - - /** - * Check a binary Unicode property for a code point. - */ - static inline bool HasBinaryProperty(uint32_t aCh, BinaryProperty aProp) { - UProperty prop; - switch (aProp) { - case BinaryProperty::DefaultIgnorableCodePoint: - prop = UCHAR_DEFAULT_IGNORABLE_CODE_POINT; - break; - case BinaryProperty::Emoji: - prop = UCHAR_EMOJI; - break; - case BinaryProperty::EmojiPresentation: - prop = UCHAR_EMOJI_PRESENTATION; - break; - } - return u_hasBinaryProperty(aCh, prop); - } - - /** - * Check if the width of aCh is full width, half width or wide - * excluding emoji. - */ - static inline bool IsEastAsianWidthFHWexcludingEmoji(uint32_t aCh) { - switch (GetIntPropertyValue(aCh, IntProperty::EastAsianWidth)) { - case U_EA_FULLWIDTH: - case U_EA_HALFWIDTH: - return true; - case U_EA_WIDE: - return HasBinaryProperty(aCh, BinaryProperty::Emoji) ? false : true; - case U_EA_AMBIGUOUS: - case U_EA_NARROW: - case U_EA_NEUTRAL: - return false; - } - return false; - } - - /** - * Check if the width of aCh is ambiguous, full width, or wide. - */ - static inline bool IsEastAsianWidthAFW(uint32_t aCh) { - switch (GetIntPropertyValue(aCh, IntProperty::EastAsianWidth)) { - case U_EA_AMBIGUOUS: - case U_EA_FULLWIDTH: - case U_EA_WIDE: - return true; - case U_EA_HALFWIDTH: - case U_EA_NARROW: - case U_EA_NEUTRAL: - return false; - } - return false; - } - - /** - * Check if the width of aCh is full width, or wide. - */ - static inline bool IsEastAsianWidthFW(uint32_t aCh) { - switch (GetIntPropertyValue(aCh, IntProperty::EastAsianWidth)) { - case U_EA_FULLWIDTH: - case U_EA_WIDE: - return true; - case U_EA_AMBIGUOUS: - case U_EA_HALFWIDTH: - case U_EA_NARROW: - case U_EA_NEUTRAL: - return false; - } - return false; - } - - /** - * Check if the CharType of aCh is math or other symbol. - */ - static inline bool IsMathOrMusicSymbol(uint32_t aCh) { - // Keep this function in sync with is_math_symbol in base_chars.py. - return CharType(aCh) == U_MATH_SYMBOL || CharType(aCh) == U_OTHER_SYMBOL; - } - - static inline Script GetScriptCode(uint32_t aCh) { - // We can safely ignore the error code here because uscript_getScript - // returns USCRIPT_INVALID_CODE in the event of an error. - UErrorCode err = U_ZERO_ERROR; - return Script(uscript_getScript(aCh, &err)); - } - - static inline bool HasScript(uint32_t aCh, Script aScript) { - return uscript_hasScript(aCh, UScriptCode(aScript)); - } - - static inline const char* GetScriptShortName(Script aScript) { - return uscript_getShortName(UScriptCode(aScript)); - } - - static inline int32_t GetMaxNumberOfScripts() { - return u_getIntPropertyMaxValue(UCHAR_SCRIPT); - } - - // The code point which has the most script extensions is 0x0965, which has 21 - // script extensions, so choose the vector size as 32 to prevent heap - // allocation. - static constexpr size_t kMaxScripts = 32; - - using ScriptExtensionVector = Vector; - - /** - * Get the script extensions for the given code point, and write the script - * extensions to aExtensions vector. If the code point has script extensions, - * the script code (Script::COMMON or Script::INHERITED) will be excluded. - * - * If the code point doesn't have any script extension, then its script code - * will be written to aExtensions vector. - * - * If the code point is invalid, Script::UNKNOWN will be written to - * aExtensions vector. - * - * Note: aExtensions will be cleared after calling this method regardless of - * failure. - * - * See [1] for the script code of the code point, [2] for the script - * extensions. - * - * https://www.unicode.org/Public/UNIDATA/Scripts.txt - * https://www.unicode.org/Public/UNIDATA/ScriptExtensions.txt - */ - static ICUResult GetExtensions(char32_t aCodePoint, - ScriptExtensionVector& aExtensions) { - // Clear the vector first. - aExtensions.clear(); - - // We cannot pass aExtensions to uscript_getScriptExtension as USCriptCode - // takes 4 bytes, so create a local UScriptCode array to get the extensions. - UScriptCode ext[kMaxScripts]; - UErrorCode status = U_ZERO_ERROR; - int32_t len = uscript_getScriptExtensions(static_cast(aCodePoint), - ext, kMaxScripts, &status); - if (U_FAILURE(status)) { - // kMaxScripts should be large enough to hold the maximun number of script - // extensions. - MOZ_DIAGNOSTIC_ASSERT(status != U_BUFFER_OVERFLOW_ERROR); - return Err(ToICUError(status)); - } - - if (!aExtensions.reserve(len)) { - return Err(ICUError::OutOfMemory); - } - - for (int32_t i = 0; i < len; i++) { - aExtensions.infallibleAppend(Script(ext[i])); - } - - return Ok(); - } -}; - -} // namespace mozilla::intl - -#endif diff --git a/intl/lwbrk/LineBreaker.cpp b/intl/lwbrk/LineBreaker.cpp index 45c073b7bb3f..60352f92c381 100644 --- a/intl/lwbrk/LineBreaker.cpp +++ b/intl/lwbrk/LineBreaker.cpp @@ -11,7 +11,6 @@ #include "nsUnicodeProperties.h" #include "mozilla/ArrayUtils.h" #include "mozilla/intl/Segmenter.h" -#include "mozilla/intl/UnicodeProperties.h" using namespace mozilla::unicode; using namespace mozilla::intl; @@ -463,12 +462,10 @@ static int8_t GetClass(uint32_t u, LineBreakRule aLevel, return CLASS_CLOSE_LIKE_CHARACTER; } if (aIsChineseOrJapanese) { - if (cls == U_LB_POSTFIX_NUMERIC && - UnicodeProperties::IsEastAsianWidthAFW(u)) { + if (cls == U_LB_POSTFIX_NUMERIC && IsEastAsianWidthAFW(u)) { return CLASS_CLOSE_LIKE_CHARACTER; } - if (cls == U_LB_PREFIX_NUMERIC && - UnicodeProperties::IsEastAsianWidthAFW(u)) { + if (cls == U_LB_PREFIX_NUMERIC && IsEastAsianWidthAFW(u)) { return CLASS_OPEN_LIKE_CHARACTER; } if (u == 0x2010 || u == 0x2013 || u == 0x301C || u == 0x30A0) { @@ -488,12 +485,10 @@ static int8_t GetClass(uint32_t u, LineBreakRule aLevel, return CLASS_CLOSE_LIKE_CHARACTER; } if (aIsChineseOrJapanese) { - if (cls == U_LB_POSTFIX_NUMERIC && - UnicodeProperties::IsEastAsianWidthAFW(u)) { + if (cls == U_LB_POSTFIX_NUMERIC && IsEastAsianWidthAFW(u)) { return CLASS_CLOSE_LIKE_CHARACTER; } - if (cls == U_LB_PREFIX_NUMERIC && - UnicodeProperties::IsEastAsianWidthAFW(u)) { + if (cls == U_LB_PREFIX_NUMERIC && IsEastAsianWidthAFW(u)) { return CLASS_OPEN_LIKE_CHARACTER; } if (u == 0x2010 || u == 0x2013 || u == 0x301C || u == 0x30A0) { @@ -518,12 +513,10 @@ static int8_t GetClass(uint32_t u, LineBreakRule aLevel, u == 0xFF01 || u == 0xFF1F) { return CLASS_BREAKABLE; } - if (cls == U_LB_POSTFIX_NUMERIC && - UnicodeProperties::IsEastAsianWidthAFW(u)) { + if (cls == U_LB_POSTFIX_NUMERIC && IsEastAsianWidthAFW(u)) { return CLASS_BREAKABLE; } - if (cls == U_LB_PREFIX_NUMERIC && - UnicodeProperties::IsEastAsianWidthAFW(u)) { + if (cls == U_LB_PREFIX_NUMERIC && IsEastAsianWidthAFW(u)) { return CLASS_BREAKABLE; } if (u == 0x2010 || u == 0x2013 || u == 0x301C || u == 0x30A0) { diff --git a/intl/lwbrk/WordBreaker.cpp b/intl/lwbrk/WordBreaker.cpp index 992cec331ef6..b688947c5cb6 100644 --- a/intl/lwbrk/WordBreaker.cpp +++ b/intl/lwbrk/WordBreaker.cpp @@ -3,17 +3,17 @@ * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ -#include "mozilla/intl/UnicodeProperties.h" #include "mozilla/intl/WordBreaker.h" #include "mozilla/StaticPrefs_layout.h" #include "nsComplexBreaker.h" #include "nsTArray.h" +#include "nsUnicodeProperties.h" -using mozilla::intl::Script; -using mozilla::intl::UnicodeProperties; using mozilla::intl::WordBreaker; using mozilla::intl::WordRange; using mozilla::unicode::GetGenCategory; +using mozilla::unicode::GetScriptCode; +using mozilla::unicode::Script; #define IS_ASCII(c) (0 == (0xFF80 & (c))) #define ASCII_IS_ALPHA(c) \ @@ -40,7 +40,7 @@ using mozilla::unicode::GetGenCategory; // the script is not supported by the platform, we just won't find any useful // boundaries.) static bool IsScriptioContinua(char16_t aChar) { - Script sc = UnicodeProperties::GetScriptCode(aChar); + Script sc = GetScriptCode(aChar); return sc == Script::THAI || sc == Script::MYANMAR || sc == Script::KHMER || sc == Script::JAVANESE || sc == Script::BALINESE || sc == Script::SUNDANESE || sc == Script::LAO; diff --git a/intl/unicharutil/tools/genUnicodePropertyData.pl b/intl/unicharutil/tools/genUnicodePropertyData.pl index f72f18f1ae0d..c92bb5f57992 100755 --- a/intl/unicharutil/tools/genUnicodePropertyData.pl +++ b/intl/unicharutil/tools/genUnicodePropertyData.pl @@ -47,7 +47,7 @@ # This will generate (or overwrite!) the files # # nsUnicodePropertyData.cpp -# UnicodeScriptCodes.h +# nsUnicodeScriptCodes.h # # in the current directory. @@ -71,7 +71,7 @@ if ($#ARGV != 1) { # This will generate (or overwrite!) the files # # nsUnicodePropertyData.cpp -# UnicodeScriptCodes.h +# nsUnicodeScriptCodes.h # # in the current directory. __EOT @@ -132,7 +132,7 @@ my %idType = ( "Deprecated" => 12 ); -# These match the IdentifierType enum in UnicodeProperties.h. +# These match the IdentifierType enum in nsUnicodeProperties.h. my %mappedIdType = ( "Restricted" => 0, "Allowed" => 1 @@ -292,7 +292,9 @@ my $timestamp = gmtime(); open DATA_TABLES, "> nsUnicodePropertyData.cpp" or die "unable to open nsUnicodePropertyData.cpp for output"; -my $licenseBlock = q[/* This Source Code Form is subject to the terms of the Mozilla Public +my $licenseBlock = q[ +/* -*- Mode: C++; tab-width: 20; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ @@ -322,7 +324,7 @@ $versionInfo __END -open HEADER, "> UnicodeScriptCodes.h" or die "unable to open UnicodeScriptCodes.h for output"; +open HEADER, "> nsUnicodeScriptCodes.h" or die "unable to open nsUnicodeScriptCodes.h for output"; print HEADER <<__END; $licenseBlock @@ -336,8 +338,8 @@ $versionInfo * * * * * This file contains MACHINE-GENERATED DATA, do not edit! * * * * * */ -#ifndef intl_components_UnicodeScriptCodes_h_ -#define intl_components_UnicodeScriptCodes_h_ +#ifndef NS_UNICODE_SCRIPT_CODES +#define NS_UNICODE_SCRIPT_CODES __END @@ -349,7 +351,16 @@ sub sprintCharProps2_short return sprintf("{%d,%d},", $verticalOrientation[$usv], $idtype[$usv]); } -&genTables("CharProp2", "", "nsCharProps2", 9, 7, \&sprintCharProps2_short, 16, 1, 1); +my $type = q| +struct nsCharProps2 { + // Currently only 4 bits are defined here, so 4 more could be added without + // affecting the storage requirements for this struct. Or we could pack two + // records per byte, at the cost of a slightly more complex accessor. + unsigned char mVertOrient:2; + unsigned char mIdType:2; +}; +|; +&genTables("CharProp2", $type, "nsCharProps2", 9, 7, \&sprintCharProps2_short, 16, 1, 1); sub sprintHanVariants { @@ -474,7 +485,8 @@ __END close DATA_TABLES; -print HEADER "namespace mozilla::intl {\n"; +print HEADER "namespace mozilla {\n"; +print HEADER "namespace unicode {\n"; print HEADER "enum class Script : int16_t {\n"; for (my $i = 0; $i < scalar @scriptCodeToName; ++$i) { print HEADER " ", $scriptCodeToName[$i], " = ", $i, ",\n"; @@ -482,7 +494,15 @@ for (my $i = 0; $i < scalar @scriptCodeToName; ++$i) { print HEADER "\n NUM_SCRIPT_CODES = ", scalar @scriptCodeToName, ",\n"; print HEADER "\n INVALID = -1\n"; print HEADER "};\n"; -print HEADER "} // namespace mozilla::intl\n\n"; +print HEADER <<__END; + +// mozilla::intl::ScriptExtensionVector assumes sizeof(Script) is equal to +// sizeof(int16_t), so if the data type of Script is changed then +// ScriptExtensionVector needs to be updated accordingly. +static_assert(sizeof(Script) == sizeof(int16_t)); +__END +print HEADER "} // namespace unicode\n"; +print HEADER "} // namespace mozilla\n\n"; print HEADER <<__END; #endif diff --git a/intl/unicharutil/util/moz.build b/intl/unicharutil/util/moz.build index b52920725e4f..301eb9a854ba 100644 --- a/intl/unicharutil/util/moz.build +++ b/intl/unicharutil/util/moz.build @@ -12,6 +12,7 @@ EXPORTS += [ "nsSpecialCasingData.h", "nsUnicharUtils.h", "nsUnicodeProperties.h", + "nsUnicodeScriptCodes.h", ] UNIFIED_SOURCES += [ diff --git a/intl/unicharutil/util/nsBidiUtils.h b/intl/unicharutil/util/nsBidiUtils.h index 74493acbbf68..14f95ec998b0 100644 --- a/intl/unicharutil/util/nsBidiUtils.h +++ b/intl/unicharutil/util/nsBidiUtils.h @@ -6,24 +6,63 @@ #ifndef nsBidiUtils_h__ #define nsBidiUtils_h__ -#include "mozilla/intl/BidiClass.h" - #include "nsString.h" #include "encoding_rs_mem.h" +/** + * Read ftp://ftp.unicode.org/Public/UNIDATA/ReadMe-Latest.txt + * section BIDIRECTIONAL PROPERTIES + * for the detailed definition of the following categories + * + * The values here must match the equivalents in %bidicategorycode in + * mozilla/intl/unicharutil/tools/genUnicodePropertyData.pl, + * and must also match the values used by ICU's UCharDirection. + */ + +enum nsCharType { + eCharType_LeftToRight = 0, + eCharType_RightToLeft = 1, + eCharType_EuropeanNumber = 2, + eCharType_EuropeanNumberSeparator = 3, + eCharType_EuropeanNumberTerminator = 4, + eCharType_ArabicNumber = 5, + eCharType_CommonNumberSeparator = 6, + eCharType_BlockSeparator = 7, + eCharType_SegmentSeparator = 8, + eCharType_WhiteSpaceNeutral = 9, + eCharType_OtherNeutral = 10, + eCharType_LeftToRightEmbedding = 11, + eCharType_LeftToRightOverride = 12, + eCharType_RightToLeftArabic = 13, + eCharType_RightToLeftEmbedding = 14, + eCharType_RightToLeftOverride = 15, + eCharType_PopDirectionalFormat = 16, + eCharType_DirNonSpacingMark = 17, + eCharType_BoundaryNeutral = 18, + eCharType_FirstStrongIsolate = 19, + eCharType_LeftToRightIsolate = 20, + eCharType_RightToLeftIsolate = 21, + eCharType_PopDirectionalIsolate = 22, + eCharType_CharTypeCount +}; + +/** + * This specifies the language directional property of a character set. + */ +typedef enum nsCharType nsCharType; + /** * definitions of bidirection character types by category */ -#define BIDICLASS_IS_RTL(val) \ - (((val) == mozilla::intl::BidiClass::RightToLeft) || \ - ((val) == mozilla::intl::BidiClass::RightToLeftArabic)) +#define CHARTYPE_IS_RTL(val) \ + (((val) == eCharType_RightToLeft) || ((val) == eCharType_RightToLeftArabic)) -#define BIDICLASS_IS_WEAK(val) \ - (((val) == mozilla::intl::BidiClass::EuropeanNumberSeparator) || \ - ((val) == mozilla::intl::BidiClass::EuropeanNumberTerminator) || \ - (((val) > mozilla::intl::BidiClass::ArabicNumber) && \ - ((val) != mozilla::intl::BidiClass::RightToLeftArabic))) +#define CHARTYPE_IS_WEAK(val) \ + (((val) == eCharType_EuropeanNumberSeparator) || \ + ((val) == eCharType_EuropeanNumberTerminator) || \ + (((val) > eCharType_ArabicNumber) && \ + ((val) != eCharType_RightToLeftArabic))) /** * Inspects a Unichar, converting numbers to Arabic or Hindi forms and diff --git a/intl/unicharutil/util/nsUnicharUtils.cpp b/intl/unicharutil/util/nsUnicharUtils.cpp index 38b721ebe431..fb4c4f8d2c6e 100644 --- a/intl/unicharutil/util/nsUnicharUtils.cpp +++ b/intl/unicharutil/util/nsUnicharUtils.cpp @@ -5,9 +5,9 @@ #include "nsUnicharUtils.h" #include "nsUTF8Utils.h" +#include "nsUnicodeProperties.h" #include "mozilla/Likely.h" #include "mozilla/HashFunctions.h" -#include "mozilla/intl/UnicodeProperties.h" // We map x -> x, except for upper-case letters, // which we map to their lower-case equivalents. @@ -33,7 +33,7 @@ static MOZ_ALWAYS_INLINE uint32_t ToLowerCase_inline(uint32_t aChar) { return gASCIIToLower[aChar]; } - return mozilla::intl::UnicodeProperties::ToLower(aChar); + return mozilla::unicode::GetLowercase(aChar); } static MOZ_ALWAYS_INLINE uint32_t @@ -244,8 +244,7 @@ void ToLowerCase(const char16_t* aIn, char16_t* aOut, uint32_t aLen) { for (uint32_t i = 0; i < aLen; i++) { uint32_t ch = aIn[i]; if (i < aLen - 1 && NS_IS_SURROGATE_PAIR(ch, aIn[i + 1])) { - ch = mozilla::intl::UnicodeProperties::ToLower( - SURROGATE_TO_UCS4(ch, aIn[i + 1])); + ch = mozilla::unicode::GetLowercase(SURROGATE_TO_UCS4(ch, aIn[i + 1])); NS_ASSERTION(!IS_IN_BMP(ch), "case mapping crossed BMP/SMP boundary!"); aOut[i++] = H_SURROGATE(ch); aOut[i] = L_SURROGATE(ch); @@ -270,15 +269,14 @@ uint32_t ToUpperCase(uint32_t aChar) { return aChar; } - return mozilla::intl::UnicodeProperties::ToUpper(aChar); + return mozilla::unicode::GetUppercase(aChar); } void ToUpperCase(const char16_t* aIn, char16_t* aOut, uint32_t aLen) { for (uint32_t i = 0; i < aLen; i++) { uint32_t ch = aIn[i]; if (i < aLen - 1 && NS_IS_SURROGATE_PAIR(ch, aIn[i + 1])) { - ch = mozilla::intl::UnicodeProperties::ToUpper( - SURROGATE_TO_UCS4(ch, aIn[i + 1])); + ch = mozilla::unicode::GetUppercase(SURROGATE_TO_UCS4(ch, aIn[i + 1])); NS_ASSERTION(!IS_IN_BMP(ch), "case mapping crossed BMP/SMP boundary!"); aOut[i++] = H_SURROGATE(ch); aOut[i] = L_SURROGATE(ch); @@ -364,7 +362,7 @@ static MOZ_ALWAYS_INLINE uint32_t GetLowerUTF8Codepoint_inline( // we don't go through ToLowerCase here, because we know this isn't // an ASCII character so the ASCII fast-path there is useless - c = mozilla::intl::UnicodeProperties::ToLower(c); + c = mozilla::unicode::GetLowercase(c); *aNext = aStr + 2; return c; @@ -379,7 +377,7 @@ static MOZ_ALWAYS_INLINE uint32_t GetLowerUTF8Codepoint_inline( c += (str[1] & 0x3F) << 6; c += (str[2] & 0x3F); - c = mozilla::intl::UnicodeProperties::ToLower(c); + c = mozilla::unicode::GetLowercase(c); *aNext = aStr + 3; return c; @@ -394,7 +392,7 @@ static MOZ_ALWAYS_INLINE uint32_t GetLowerUTF8Codepoint_inline( c += (str[2] & 0x3F) << 6; c += (str[3] & 0x3F); - c = mozilla::intl::UnicodeProperties::ToLower(c); + c = mozilla::unicode::GetLowercase(c); *aNext = aStr + 4; return c; @@ -516,8 +514,8 @@ uint32_t HashUTF8AsUTF16(const char* aUTF8, uint32_t aLength, bool* aErr) { } bool IsSegmentBreakSkipChar(uint32_t u) { - return intl::UnicodeProperties::IsEastAsianWidthFHWexcludingEmoji(u) && - intl::UnicodeProperties::GetScriptCode(u) != intl::Script::HANGUL; + return unicode::IsEastAsianWidthFHWexcludingEmoji(u) && + unicode::GetScriptCode(u) != unicode::Script::HANGUL; } } // namespace mozilla diff --git a/intl/unicharutil/util/nsUnicodeProperties.cpp b/intl/unicharutil/util/nsUnicodeProperties.cpp index 69edf03e4206..942f65b2da41 100644 --- a/intl/unicharutil/util/nsUnicodeProperties.cpp +++ b/intl/unicharutil/util/nsUnicodeProperties.cpp @@ -177,8 +177,7 @@ enum HSType { }; static HSType GetHangulSyllableType(uint32_t aCh) { - return HSType(intl::UnicodeProperties::GetIntPropertyValue( - aCh, intl::UnicodeProperties::IntProperty::HangulSyllableType)); + return HSType(u_getIntPropertyValue(aCh, UCHAR_HANGUL_SYLLABLE_TYPE)); } void ClusterIterator::Next() { diff --git a/intl/unicharutil/util/nsUnicodeProperties.h b/intl/unicharutil/util/nsUnicodeProperties.h index e2d42a4922ea..970699f92bbd 100644 --- a/intl/unicharutil/util/nsUnicodeProperties.h +++ b/intl/unicharutil/util/nsUnicodeProperties.h @@ -7,19 +7,13 @@ #ifndef NS_UNICODEPROPERTIES_H #define NS_UNICODEPROPERTIES_H -#include "mozilla/intl/UnicodeProperties.h" - #include "nsBidiUtils.h" #include "nsUGenCategory.h" +#include "nsUnicodeScriptCodes.h" #include "harfbuzz/hb.h" -struct nsCharProps2 { - // Currently only 4 bits are defined here, so 4 more could be added without - // affecting the storage requirements for this struct. Or we could pack two - // records per byte, at the cost of a slightly more complex accessor. - unsigned char mVertOrient : 2; - unsigned char mIdType : 2; -}; +#include "unicode/uchar.h" +#include "unicode/uscript.h" const nsCharProps2& GetCharProps2(uint32_t aCh); @@ -63,21 +57,45 @@ const uint32_t kEmojiSkinToneLast = 0x1f3ff; extern const hb_unicode_general_category_t sICUtoHBcategory[]; +inline uint32_t GetMirroredChar(uint32_t aCh) { return u_charMirror(aCh); } + +inline bool HasMirroredChar(uint32_t aCh) { return u_isMirrored(aCh); } + +inline uint8_t GetCombiningClass(uint32_t aCh) { + return u_getCombiningClass(aCh); +} + inline uint8_t GetGeneralCategory(uint32_t aCh) { - return sICUtoHBcategory[intl::UnicodeProperties::CharType(aCh)]; + return sICUtoHBcategory[u_charType(aCh)]; +} + +inline nsCharType GetBidiCat(uint32_t aCh) { + return nsCharType(u_charDirection(aCh)); } inline int8_t GetNumericValue(uint32_t aCh) { - return intl::UnicodeProperties::GetNumericValue(aCh); + UNumericType type = + UNumericType(u_getIntPropertyValue(aCh, UCHAR_NUMERIC_TYPE)); + return type == U_NT_DECIMAL || type == U_NT_DIGIT + ? int8_t(u_getNumericValue(aCh)) + : -1; } inline uint8_t GetLineBreakClass(uint32_t aCh) { - return intl::UnicodeProperties::GetIntPropertyValue( - aCh, intl::UnicodeProperties::IntProperty::LineBreak); + return u_getIntPropertyValue(aCh, UCHAR_LINE_BREAK); } -inline uint32_t GetScriptTagForCode(intl::Script aScriptCode) { - const char* tag = intl::UnicodeProperties::GetScriptShortName(aScriptCode); +inline Script GetScriptCode(uint32_t aCh) { + UErrorCode err = U_ZERO_ERROR; + return Script(uscript_getScript(aCh, &err)); +} + +inline bool HasScript(uint32_t aCh, Script aScript) { + return uscript_hasScript(aCh, UScriptCode(aScript)); +} + +inline uint32_t GetScriptTagForCode(Script aScriptCode) { + const char* tag = uscript_getShortName(UScriptCode(aScriptCode)); if (tag) { return HB_TAG(tag[0], tag[1], tag[2], tag[3]); } @@ -86,22 +104,28 @@ inline uint32_t GetScriptTagForCode(intl::Script aScriptCode) { } inline PairedBracketType GetPairedBracketType(uint32_t aCh) { - return PairedBracketType(intl::UnicodeProperties::GetIntPropertyValue( - aCh, intl::UnicodeProperties::IntProperty::BidiPairedBracketType)); + return PairedBracketType( + u_getIntPropertyValue(aCh, UCHAR_BIDI_PAIRED_BRACKET_TYPE)); } +inline uint32_t GetPairedBracket(uint32_t aCh) { + return u_getBidiPairedBracket(aCh); +} + +inline uint32_t GetUppercase(uint32_t aCh) { return u_toupper(aCh); } + +inline uint32_t GetLowercase(uint32_t aCh) { return u_tolower(aCh); } + inline uint32_t GetTitlecaseForLower( uint32_t aCh) // maps LC to titlecase, UC unchanged { - return intl::UnicodeProperties::IsLowercase(aCh) - ? intl::UnicodeProperties::ToTitle(aCh) - : aCh; + return u_isULowercase(aCh) ? u_totitle(aCh) : aCh; } inline uint32_t GetTitlecaseForAll( uint32_t aCh) // maps both UC and LC to titlecase { - return intl::UnicodeProperties::ToTitle(aCh); + return u_totitle(aCh); } inline uint32_t GetFoldedcase(uint32_t aCh) { @@ -111,22 +135,62 @@ inline uint32_t GetFoldedcase(uint32_t aCh) { if (aCh == 0x0130 || aCh == 0x0131) { return 'i'; } - return intl::UnicodeProperties::FoldCase(aCh); + return u_foldCase(aCh, U_FOLD_CASE_DEFAULT); +} + +inline bool IsEastAsianWidthFHWexcludingEmoji(uint32_t aCh) { + switch (u_getIntPropertyValue(aCh, UCHAR_EAST_ASIAN_WIDTH)) { + case U_EA_FULLWIDTH: + case U_EA_HALFWIDTH: + return true; + case U_EA_WIDE: + return u_hasBinaryProperty(aCh, UCHAR_EMOJI) ? false : true; + case U_EA_AMBIGUOUS: + case U_EA_NARROW: + case U_EA_NEUTRAL: + return false; + } + return false; +} + +inline bool IsEastAsianWidthAFW(uint32_t aCh) { + switch (u_getIntPropertyValue(aCh, UCHAR_EAST_ASIAN_WIDTH)) { + case U_EA_AMBIGUOUS: + case U_EA_FULLWIDTH: + case U_EA_WIDE: + return true; + case U_EA_HALFWIDTH: + case U_EA_NARROW: + case U_EA_NEUTRAL: + return false; + } + return false; +} + +inline bool IsEastAsianWidthFW(uint32_t aCh) { + switch (u_getIntPropertyValue(aCh, UCHAR_EAST_ASIAN_WIDTH)) { + case U_EA_FULLWIDTH: + case U_EA_WIDE: + return true; + case U_EA_AMBIGUOUS: + case U_EA_HALFWIDTH: + case U_EA_NARROW: + case U_EA_NEUTRAL: + return false; + } + return false; } inline bool IsDefaultIgnorable(uint32_t aCh) { - return intl::UnicodeProperties::HasBinaryProperty( - aCh, intl::UnicodeProperties::BinaryProperty::DefaultIgnorableCodePoint); + return u_hasBinaryProperty(aCh, UCHAR_DEFAULT_IGNORABLE_CODE_POINT); } inline EmojiPresentation GetEmojiPresentation(uint32_t aCh) { - if (!intl::UnicodeProperties::HasBinaryProperty( - aCh, intl::UnicodeProperties::BinaryProperty::Emoji)) { + if (!u_hasBinaryProperty(aCh, UCHAR_EMOJI)) { return TextOnly; } - if (intl::UnicodeProperties::HasBinaryProperty( - aCh, intl::UnicodeProperties::BinaryProperty::EmojiPresentation)) { + if (u_hasBinaryProperty(aCh, UCHAR_EMOJI_PRESENTATION)) { return EmojiDefault; } return TextDefault; @@ -207,6 +271,11 @@ uint32_t CountGraphemeClusters(const char16_t* aText, uint32_t aLength); // to the values we care about at runtime. bool IsCombiningDiacritic(uint32_t aCh); +// Keep this function in sync with is_math_symbol in base_chars.py. +inline bool IsMathOrMusicSymbol(uint32_t aCh) { + return u_charType(aCh) == U_MATH_SYMBOL || u_charType(aCh) == U_OTHER_SYMBOL; +} + // Remove diacritics from a character uint32_t GetNaked(uint32_t aCh); diff --git a/intl/unicharutil/util/nsUnicodePropertyData.cpp b/intl/unicharutil/util/nsUnicodePropertyData.cpp index 7cd8eeadc9e4..3d1756230720 100644 --- a/intl/unicharutil/util/nsUnicodePropertyData.cpp +++ b/intl/unicharutil/util/nsUnicodePropertyData.cpp @@ -1,3 +1,5 @@ + +/* -*- Mode: C++; tab-width: 20; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ /* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ @@ -9,7 +11,7 @@ */ /* - * Created on Thu Nov 25 12:44:10 2021 from UCD data files with version info: + * Created on Fri Oct 29 09:00:15 2021 from UCD data files with version info: * # Unicode Character Database diff --git a/intl/components/src/UnicodeScriptCodes.h b/intl/unicharutil/util/nsUnicodeScriptCodes.h similarity index 85% rename from intl/components/src/UnicodeScriptCodes.h rename to intl/unicharutil/util/nsUnicodeScriptCodes.h index 4bc45eeaa89d..03756198f2d1 100644 --- a/intl/components/src/UnicodeScriptCodes.h +++ b/intl/unicharutil/util/nsUnicodeScriptCodes.h @@ -1,3 +1,5 @@ + +/* -*- Mode: C++; tab-width: 20; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ /* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ @@ -9,7 +11,7 @@ */ /* - * Created on Thu Dec 2 15:20:26 2021 from UCD data files with version info: + * Created on Thu Nov 18 12:50:48 2021 from UCD data files with version info: * # Unicode Character Database @@ -43,10 +45,20 @@ for the Unicode Character Database, for Version 14.0.0 of the Unicode Standard. * * * * * This file contains MACHINE-GENERATED DATA, do not edit! * * * * * */ -#ifndef intl_components_UnicodeScriptCodes_h_ -#define intl_components_UnicodeScriptCodes_h_ +#ifndef NS_UNICODE_SCRIPT_CODES +#define NS_UNICODE_SCRIPT_CODES -namespace mozilla::intl { + +struct nsCharProps2 { + // Currently only 4 bits are defined here, so 4 more could be added without + // affecting the storage requirements for this struct. Or we could pack two + // records per byte, at the cost of a slightly more complex accessor. + unsigned char mVertOrient:2; + unsigned char mIdType:2; +}; + +namespace mozilla { +namespace unicode { enum class Script : int16_t { COMMON = 0, INHERITED = 1, @@ -251,7 +263,13 @@ enum class Script : int16_t { INVALID = -1 }; -} // namespace mozilla::intl + +// mozilla::intl::ScriptExtensionVector assumes sizeof(Script) is equal to +// sizeof(int16_t), so if the data type of Script is changed then +// ScriptExtensionVector needs to be updated accordingly. +static_assert(sizeof(Script) == sizeof(int16_t)); +} // namespace unicode +} // namespace mozilla #endif /* diff --git a/layout/base/nsBidiPresUtils.cpp b/layout/base/nsBidiPresUtils.cpp index 1406a310f681..8b189635e241 100644 --- a/layout/base/nsBidiPresUtils.cpp +++ b/layout/base/nsBidiPresUtils.cpp @@ -1991,7 +1991,7 @@ void nsBidiPresUtils::RemoveBidiContinuation(BidiParagraphData* aBpd, nsresult nsBidiPresUtils::FormatUnicodeText(nsPresContext* aPresContext, char16_t* aText, int32_t& aTextLength, - intl::BidiClass aBidiClass) { + nsCharType aCharType) { nsresult rv = NS_OK; // ahmed // adjusted for correct numeral shaping @@ -2011,12 +2011,12 @@ nsresult nsBidiPresUtils::FormatUnicodeText(nsPresContext* aPresContext, case IBMBIDI_NUMERAL_REGULAR: - switch (aBidiClass) { - case intl::BidiClass::EuropeanNumber: + switch (aCharType) { + case eCharType_EuropeanNumber: HandleNumbers(aText, aTextLength, IBMBIDI_NUMERAL_ARABIC); break; - case intl::BidiClass::ArabicNumber: + case eCharType_ArabicNumber: HandleNumbers(aText, aTextLength, IBMBIDI_NUMERAL_HINDI); break; @@ -2029,22 +2029,20 @@ nsresult nsBidiPresUtils::FormatUnicodeText(nsPresContext* aPresContext, if (((GET_BIDI_OPTION_DIRECTION(bidiOptions) == IBMBIDI_TEXTDIRECTION_RTL) && (IS_ARABIC_DIGIT(aText[0]))) || - (intl::BidiClass::ArabicNumber == aBidiClass)) { + (eCharType_ArabicNumber == aCharType)) HandleNumbers(aText, aTextLength, IBMBIDI_NUMERAL_HINDI); - } else if (intl::BidiClass::EuropeanNumber == aBidiClass) { + else if (eCharType_EuropeanNumber == aCharType) HandleNumbers(aText, aTextLength, IBMBIDI_NUMERAL_ARABIC); - } break; case IBMBIDI_NUMERAL_PERSIANCONTEXT: if (((GET_BIDI_OPTION_DIRECTION(bidiOptions) == IBMBIDI_TEXTDIRECTION_RTL) && (IS_ARABIC_DIGIT(aText[0]))) || - (intl::BidiClass::ArabicNumber == aBidiClass)) { + (eCharType_ArabicNumber == aCharType)) HandleNumbers(aText, aTextLength, IBMBIDI_NUMERAL_PERSIAN); - } else if (intl::BidiClass::EuropeanNumber == aBidiClass) { + else if (eCharType_EuropeanNumber == aCharType) HandleNumbers(aText, aTextLength, IBMBIDI_NUMERAL_ARABIC); - } break; case IBMBIDI_NUMERAL_NOMINAL: @@ -2076,40 +2074,64 @@ void nsBidiPresUtils::StripBidiControlCharacters(char16_t* aText, aTextLength -= stripLen; } -void nsBidiPresUtils::CalculateBidiClass( - intl::Bidi* aBidiEngine, const char16_t* aText, int32_t& aOffset, - int32_t aBidiClassLimit, int32_t& aRunLimit, int32_t& aRunLength, - int32_t& aRunCount, intl::BidiClass& aBidiClass, - intl::BidiClass& aPrevBidiClass) { +#if 0 // XXX: for the future use ??? +void +RemoveDiacritics(char16_t* aText, + int32_t& aTextLength) +{ + if (aText && (aTextLength > 0) ) { + int32_t offset = 0; + + for (int32_t i = 0; i < aTextLength && aText[i]; i++) { + if (IS_BIDI_DIACRITIC(aText[i]) ) { + ++offset; + continue; + } + aText[i - offset] = aText[i]; + } + aTextLength = i - offset; + aText[aTextLength] = 0; + } +} +#endif + +void nsBidiPresUtils::CalculateCharType(intl::Bidi* aBidiEngine, + const char16_t* aText, int32_t& aOffset, + int32_t aCharTypeLimit, + int32_t& aRunLimit, int32_t& aRunLength, + int32_t& aRunCount, uint8_t& aCharType, + uint8_t& aPrevCharType) + +{ bool strongTypeFound = false; int32_t offset; - intl::BidiClass bidiClass; + nsCharType charType; - aBidiClass = intl::BidiClass::OtherNeutral; + aCharType = eCharType_OtherNeutral; int32_t charLen; - for (offset = aOffset; offset < aBidiClassLimit; offset += charLen) { + for (offset = aOffset; offset < aCharTypeLimit; offset += charLen) { // Make sure we give RTL chartype to all characters that would be classified // as Right-To-Left by a bidi platform. // (May differ from the UnicodeData, eg we set RTL chartype to some NSMs.) charLen = 1; uint32_t ch = aText[offset]; if (IS_HEBREW_CHAR(ch)) { - bidiClass = intl::BidiClass::RightToLeft; + charType = eCharType_RightToLeft; } else if (IS_ARABIC_ALPHABETIC(ch)) { - bidiClass = intl::BidiClass::RightToLeftArabic; + charType = eCharType_RightToLeftArabic; } else { - if (offset + 1 < aBidiClassLimit && + if (offset + 1 < aCharTypeLimit && NS_IS_SURROGATE_PAIR(ch, aText[offset + 1])) { ch = SURROGATE_TO_UCS4(ch, aText[offset + 1]); charLen = 2; } - bidiClass = intl::UnicodeProperties::GetBidiClass(ch); + charType = unicode::GetBidiCat(ch); } - if (!BIDICLASS_IS_WEAK(bidiClass)) { - if (strongTypeFound && (bidiClass != aPrevBidiClass) && - (BIDICLASS_IS_RTL(bidiClass) || BIDICLASS_IS_RTL(aPrevBidiClass))) { + if (!CHARTYPE_IS_WEAK(charType)) { + if (strongTypeFound && (charType != aPrevCharType) && + (CHARTYPE_IS_RTL(charType) || CHARTYPE_IS_RTL(aPrevCharType))) { // Stop at this point to ensure uni-directionality of the text // (from platform's point of view). // Also, don't mix Arabic and Hebrew content (since platform may @@ -2120,18 +2142,18 @@ void nsBidiPresUtils::CalculateBidiClass( break; } - if ((intl::BidiClass::RightToLeftArabic == aPrevBidiClass || - intl::BidiClass::ArabicNumber == aPrevBidiClass) && - intl::BidiClass::EuropeanNumber == bidiClass) { - bidiClass = intl::BidiClass::ArabicNumber; + if ((eCharType_RightToLeftArabic == aPrevCharType || + eCharType_ArabicNumber == aPrevCharType) && + eCharType_EuropeanNumber == charType) { + charType = eCharType_ArabicNumber; } - // Set PrevBidiClass to the last strong type in this frame + // Set PrevCharType to the last strong type in this frame // (for correct numeric shaping) - aPrevBidiClass = bidiClass; + aPrevCharType = charType; strongTypeFound = true; - aBidiClass = bidiClass; + aCharType = charType; } } aOffset = offset; @@ -2166,8 +2188,8 @@ nsresult nsBidiPresUtils::ProcessText(const char16_t* aText, size_t aLength, nscoord totalWidth = 0; int32_t i, start, limit, length; uint32_t visualStart = 0; - intl::BidiClass bidiClass; - intl::BidiClass prevClass = intl::BidiClass::LeftToRight; + uint8_t charType; + uint8_t prevType = eCharType_LeftToRight; for (int nPosResolve = 0; nPosResolve < aPosResolveCount; ++nPosResolve) { aPosResolve[nPosResolve].visualIndex = kNotFound; @@ -2209,17 +2231,17 @@ nsresult nsBidiPresUtils::ProcessText(const char16_t* aText, size_t aLength, } while (subRunCount > 0) { - // CalculateBidiClass can increment subRunCount if the run + // CalculateCharType can increment subRunCount if the run // contains mixed character types - CalculateBidiClass(aBidiEngine, text, lineOffset, typeLimit, subRunLimit, - subRunLength, subRunCount, bidiClass, prevClass); + CalculateCharType(aBidiEngine, text, lineOffset, typeLimit, subRunLimit, + subRunLength, subRunCount, charType, prevType); nsAutoString runVisualText; runVisualText.Assign(text + start, subRunLength); if (int32_t(runVisualText.Length()) < subRunLength) return NS_ERROR_OUT_OF_MEMORY; FormatUnicodeText(aPresContext, runVisualText.BeginWriting(), - subRunLength, bidiClass); + subRunLength, (nsCharType)charType); aprocessor.SetText(runVisualText.get(), subRunLength, dir); width = aprocessor.GetWidth(); diff --git a/layout/base/nsBidiPresUtils.h b/layout/base/nsBidiPresUtils.h index c40240b48ffa..d3fa132110da 100644 --- a/layout/base/nsBidiPresUtils.h +++ b/layout/base/nsBidiPresUtils.h @@ -8,7 +8,6 @@ #define nsBidiPresUtils_h___ #include "gfxContext.h" -#include "mozilla/intl/BidiClass.h" #include "mozilla/intl/BidiEmbeddingLevel.h" #include "nsBidiUtils.h" #include "nsHashKeys.h" @@ -224,7 +223,7 @@ class nsBidiPresUtils { */ static nsresult FormatUnicodeText(nsPresContext* aPresContext, char16_t* aText, int32_t& aTextLength, - mozilla::intl::BidiClass aBidiClass); + nsCharType aCharType); /** * Reorder plain text using the Unicode Bidi algorithm and send it to @@ -563,12 +562,11 @@ class nsBidiPresUtils { */ static void RemoveBidiContinuation(BidiParagraphData* aBpd, nsIFrame* aFrame, int32_t aFirstIndex, int32_t aLastIndex); - static void CalculateBidiClass(mozilla::intl::Bidi* aBidiEngine, - const char16_t* aText, int32_t& aOffset, - int32_t aBidiClassLimit, int32_t& aRunLimit, - int32_t& aRunLength, int32_t& aRunCount, - mozilla::intl::BidiClass& aBidiClass, - mozilla::intl::BidiClass& aPrevBidiClass); + static void CalculateCharType(mozilla::intl::Bidi* aBidiEngine, + const char16_t* aText, int32_t& aOffset, + int32_t aCharTypeLimit, int32_t& aRunLimit, + int32_t& aRunLength, int32_t& aRunCount, + uint8_t& aCharType, uint8_t& aPrevCharType); static void StripBidiControlCharacters(char16_t* aText, int32_t& aTextLength); }; diff --git a/layout/generic/MathMLTextRunFactory.cpp b/layout/generic/MathMLTextRunFactory.cpp index 2d03e35c2d4a..f7d0431b2ccf 100644 --- a/layout/generic/MathMLTextRunFactory.cpp +++ b/layout/generic/MathMLTextRunFactory.cpp @@ -10,12 +10,12 @@ #include "mozilla/BinarySearch.h" #include "mozilla/ComputedStyle.h" #include "mozilla/ComputedStyleInlines.h" -#include "mozilla/intl/UnicodeScriptCodes.h" #include "nsStyleConsts.h" #include "nsTextFrameUtils.h" #include "nsFontMetrics.h" #include "nsDeviceContext.h" +#include "nsUnicodeScriptCodes.h" using namespace mozilla; @@ -564,7 +564,7 @@ void MathMLTextRunFactory::RebuildTextRun( // character is actually available. FontMatchType matchType; RefPtr mathFont = fontGroup->FindFontForChar( - ch2, 0, 0, intl::Script::COMMON, nullptr, &matchType); + ch2, 0, 0, unicode::Script::COMMON, nullptr, &matchType); if (mathFont) { // Don't apply the CSS style if there is a math font for at least one // of the transformed character in this text run. @@ -573,7 +573,7 @@ void MathMLTextRunFactory::RebuildTextRun( // We fallback to the original character. ch2 = ch; if (aMFR) { - aMFR->RecordScript(intl::Script::MATHEMATICAL_NOTATION); + aMFR->RecordScript(unicode::Script::MATHEMATICAL_NOTATION); } } } diff --git a/layout/generic/nsTextFrame.cpp b/layout/generic/nsTextFrame.cpp index 4f126ede2b54..73c0786f75ec 100644 --- a/layout/generic/nsTextFrame.cpp +++ b/layout/generic/nsTextFrame.cpp @@ -8370,8 +8370,8 @@ static bool FindFirstLetterRange(const nsTextFragment* aFrag, // want to allow this to split a ligature. bool allowSplitLigature; - typedef intl::Script Script; - Script script = intl::UnicodeProperties::GetScriptCode(usv); + typedef unicode::Script Script; + Script script = unicode::GetScriptCode(usv); switch (script) { default: allowSplitLigature = true; diff --git a/layout/mathml/nsMathMLChar.cpp b/layout/mathml/nsMathMLChar.cpp index 38aff6d94a8f..57d489dff943 100644 --- a/layout/mathml/nsMathMLChar.cpp +++ b/layout/mathml/nsMathMLChar.cpp @@ -11,7 +11,6 @@ #include "gfxUtils.h" #include "mozilla/dom/Document.h" #include "mozilla/gfx/2D.h" -#include "mozilla/intl/UnicodeScriptCodes.h" #include "mozilla/ComputedStyle.h" #include "mozilla/MathAlgorithms.h" #include "mozilla/UniquePtr.h" @@ -43,6 +42,7 @@ #include #include "gfxMathTable.h" +#include "nsUnicodeScriptCodes.h" using namespace mozilla; using namespace mozilla::gfx; @@ -1541,7 +1541,7 @@ nsresult nsMathMLChar::StretchInternal( // and record missing math script otherwise. gfxMissingFontRecorder* MFR = presContext->MissingFontRecorder(); if (MFR && !fm->GetThebesFontGroup()->GetFirstMathFont()) { - MFR->RecordScript(intl::Script::MATHEMATICAL_NOTATION); + MFR->RecordScript(unicode::Script::MATHEMATICAL_NOTATION); } // If the scale_stretchy_operators option is disabled, we are done. diff --git a/netwerk/dns/nsIDNService.cpp b/netwerk/dns/nsIDNService.cpp index cd4b72dc196c..c0ffa6f9aca9 100644 --- a/netwerk/dns/nsIDNService.cpp +++ b/netwerk/dns/nsIDNService.cpp @@ -11,14 +11,14 @@ #include "nsServiceManagerUtils.h" #include "nsUnicharUtils.h" #include "nsUnicodeProperties.h" +#include "nsUnicodeScriptCodes.h" #include "harfbuzz/hb.h" #include "punycode.h" #include "mozilla/ArrayUtils.h" #include "mozilla/Casting.h" #include "mozilla/TextUtils.h" #include "mozilla/Utf8.h" -#include "mozilla/intl/UnicodeProperties.h" -#include "mozilla/intl/UnicodeScriptCodes.h" +#include "mozilla/intl/Script.h" // Currently we use the non-transitional processing option -- see // http://unicode.org/reports/tr46/ @@ -30,7 +30,6 @@ const bool kIDNA2008_TransitionalProcessing = false; #include "ICUUtils.h" using namespace mozilla; -using namespace mozilla::intl; using namespace mozilla::unicode; using namespace mozilla::net; using mozilla::Preferences; @@ -764,7 +763,7 @@ bool nsIDNService::isLabelSafe(const nsAString& label) { MOZ_ASSERT(idType == IDTYPE_ALLOWED); // Check for mixed script - Script script = UnicodeProperties::GetScriptCode(ch); + Script script = GetScriptCode(ch); if (script != Script::COMMON && script != Script::INHERITED && script != lastScript) { if (illegalScriptCombo(script, savedScript)) { @@ -775,8 +774,7 @@ bool nsIDNService::isLabelSafe(const nsAString& label) { // Check for mixed numbering systems auto genCat = GetGeneralCategory(ch); if (genCat == HB_UNICODE_GENERAL_CATEGORY_DECIMAL_NUMBER) { - uint32_t zeroCharacter = - ch - mozilla::intl::UnicodeProperties::GetNumericValue(ch); + uint32_t zeroCharacter = ch - GetNumericValue(ch); if (savedNumberingSystem == 0) { // If we encounter a decimal number, save the zero character from that // numbering system. @@ -793,8 +791,8 @@ bool nsIDNService::isLabelSafe(const nsAString& label) { } // Check for marks whose expected script doesn't match the base script. if (lastScript != Script::INVALID) { - UnicodeProperties::ScriptExtensionVector scripts; - auto extResult = UnicodeProperties::GetExtensions(ch, scripts); + mozilla::intl::ScriptExtensionVector scripts; + auto extResult = mozilla::intl::Script::GetExtensions(ch, scripts); MOZ_ASSERT(extResult.isOk()); if (extResult.isErr()) { return false; diff --git a/netwerk/dns/nsIDNService.h b/netwerk/dns/nsIDNService.h index 09ebd53ed317..9fdfde6de8ad 100644 --- a/netwerk/dns/nsIDNService.h +++ b/netwerk/dns/nsIDNService.h @@ -8,11 +8,11 @@ #include "nsIIDNService.h" #include "nsCOMPtr.h" +#include "nsUnicodeScriptCodes.h" #include "nsWeakReference.h" #include "unicode/uidna.h" #include "mozilla/Mutex.h" -#include "mozilla/intl/UnicodeScriptCodes.h" #include "mozilla/net/IDNBlocklistUtils.h" #include "nsString.h" @@ -147,7 +147,8 @@ class nsIDNService final : public nsIIDNService, * For the "Moderately restrictive" profile, Latin is also allowed * with other scripts except Cyrillic and Greek */ - bool illegalScriptCombo(mozilla::intl::Script script, int32_t& savedScript); + bool illegalScriptCombo(mozilla::unicode::Script script, + int32_t& savedScript); /** * Convert a DNS label from ASCII to Unicode using IDNA2008 diff --git a/toolkit/components/find/nsFind.cpp b/toolkit/components/find/nsFind.cpp index 4dc90a6fce11..fe18c0987562 100644 --- a/toolkit/components/find/nsFind.cpp +++ b/toolkit/components/find/nsFind.cpp @@ -31,7 +31,6 @@ #include "mozilla/dom/HTMLOptionElement.h" #include "mozilla/dom/HTMLSelectElement.h" #include "mozilla/dom/Text.h" -#include "mozilla/intl/UnicodeProperties.h" #include "mozilla/intl/WordBreaker.h" #include "mozilla/StaticPrefs_browser.h" @@ -776,7 +775,7 @@ nsFind::Find(const nsAString& aPatText, nsRange* aSearchRange, // already guaranteed to not be a combining diacritical mark.) c = (t2b ? DecodeChar(t2b, &findex) : CHAR_TO_UNICHAR(t1b[findex])); if (!mMatchDiacritics && IsCombiningDiacritic(c) && - !intl::UnicodeProperties::IsMathOrMusicSymbol(prevChar)) { + !IsMathOrMusicSymbol(prevChar)) { continue; } patc = DecodeChar(patStr, &pindex); diff --git a/toolkit/components/places/tests/gtest/test_casing.cpp b/toolkit/components/places/tests/gtest/test_casing.cpp index 079d64bbd070..1668f2f5a6c3 100644 --- a/toolkit/components/places/tests/gtest/test_casing.cpp +++ b/toolkit/components/places/tests/gtest/test_casing.cpp @@ -5,7 +5,7 @@ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ #include "gtest/gtest.h" -#include "mozilla/intl/UnicodeProperties.h" +#include "nsUnicodeProperties.h" // Verify the assertion in SQLFunctions.cpp / nextSearchCandidate that the // only non-ASCII characters that lower-case to ASCII ones are: @@ -15,7 +15,7 @@ TEST(MatchAutocompleteCasing, CaseAssumption) { for (uint32_t c = 128; c < 0x110000; c++) { if (c != 304 && c != 8490) { - ASSERT_GE(mozilla::intl::UnicodeProperties::ToLower(c), 128U); + ASSERT_GE(mozilla::unicode::GetLowercase(c), 128U); } } } @@ -24,6 +24,6 @@ TEST(MatchAutocompleteCasing, CaseAssumption) TEST(MatchAutocompleteCasing, CaseAssumption2) { for (uint32_t c = 0; c < 128; c++) { - ASSERT_LT(mozilla::intl::UnicodeProperties::ToLower(c), 128U); + ASSERT_LT(mozilla::unicode::GetLowercase(c), 128U); } } diff --git a/tools/rewriting/Generated.txt b/tools/rewriting/Generated.txt index 1865e4ac32c9..4bd75637a7b6 100644 --- a/tools/rewriting/Generated.txt +++ b/tools/rewriting/Generated.txt @@ -6,6 +6,6 @@ devtools/client/debugger/node_modules/ dom/tests/ajax/jquery/ dom/tests/ajax/mochikit/ node_modules/ -intl/components/src/UnicodeScriptCodes.h intl/unicharutil/util/nsSpecialCasingData.cpp intl/unicharutil/util/nsUnicodePropertyData.cpp +intl/unicharutil/util/nsUnicodeScriptCodes.h