Bug 1719554 - Unify nsCharType as intl::BidiClass; r=platform-i18n-reviewers,jfkthame,gregtatum

Differential Revision: https://phabricator.services.mozilla.com/D132275
This commit is contained in:
Dan Minor 2021-12-03 20:49:31 +00:00
parent c0ebed22d3
commit 8d9076933e
8 changed files with 119 additions and 121 deletions

View File

@ -304,12 +304,12 @@ static bool DoesNotAffectDirectionOfAncestors(const Element* aElement) {
* Returns the directionality of a Unicode character
*/
static Directionality GetDirectionFromChar(uint32_t ch) {
switch (mozilla::unicode::GetBidiCat(ch)) {
case eCharType_RightToLeft:
case eCharType_RightToLeftArabic:
switch (intl::UnicodeProperties::GetBidiClass(ch)) {
case intl::BidiClass::RightToLeft:
case intl::BidiClass::RightToLeftArabic:
return eDir_RTL;
case eCharType_LeftToRight:
case intl::BidiClass::LeftToRight:
return eDir_LTR;
default:

View File

@ -5,6 +5,7 @@
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
EXPORTS.mozilla.intl = [
"src/Bidi.h",
"src/BidiClass.h",
"src/BidiEmbeddingLevel.h",
"src/Calendar.h",
"src/Collator.h",

View File

@ -0,0 +1,47 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#ifndef intl_components_BidiClass_h_
#define intl_components_BidiClass_h_
namespace mozilla::intl {
/**
* Read ftp://ftp.unicode.org/Public/UNIDATA/ReadMe-Latest.txt
* section BIDIRECTIONAL PROPERTIES
* for the detailed definition of the following categories
*
* The values here must match the equivalents in %bidicategorycode in
* mozilla/intl/unicharutil/tools/genUnicodePropertyData.pl,
* and must also match the values used by ICU's UCharDirection.
*/
enum class BidiClass : uint8_t {
LeftToRight = 0,
RightToLeft = 1,
EuropeanNumber = 2,
EuropeanNumberSeparator = 3,
EuropeanNumberTerminator = 4,
ArabicNumber = 5,
CommonNumberSeparator = 6,
BlockSeparator = 7,
SegmentSeparator = 8,
WhiteSpaceNeutral = 9,
OtherNeutral = 10,
LeftToRightEmbedding = 11,
LeftToRightOverride = 12,
RightToLeftArabic = 13,
RightToLeftEmbedding = 14,
RightToLeftOverride = 15,
PopDirectionalFormat = 16,
DirNonSpacingMark = 17,
BoundaryNeutral = 18,
FirstStrongIsolate = 19,
LeftToRightIsolate = 20,
RightToLeftIsolate = 21,
PopDirectionalIsolate = 22,
BidiClassCount
};
} // namespace mozilla::intl
#endif

View File

@ -4,6 +4,8 @@
#ifndef intl_components_UnicodeProperties_h_
#define intl_components_UnicodeProperties_h_
#include "mozilla/intl/BidiClass.h"
#include "unicode/uchar.h"
#include "unicode/uscript.h"
@ -14,6 +16,13 @@ namespace mozilla::intl {
*/
class UnicodeProperties final {
public:
/**
* Return the BidiClass for the character.
*/
static inline BidiClass GetBidiClass(uint32_t aCh) {
return BidiClass(u_charDirection(aCh));
}
/**
* Maps the specified character to a "mirror-image" character.
*/

View File

@ -6,63 +6,24 @@
#ifndef nsBidiUtils_h__
#define nsBidiUtils_h__
#include "mozilla/intl/BidiClass.h"
#include "nsString.h"
#include "encoding_rs_mem.h"
/**
* Read ftp://ftp.unicode.org/Public/UNIDATA/ReadMe-Latest.txt
* section BIDIRECTIONAL PROPERTIES
* for the detailed definition of the following categories
*
* The values here must match the equivalents in %bidicategorycode in
* mozilla/intl/unicharutil/tools/genUnicodePropertyData.pl,
* and must also match the values used by ICU's UCharDirection.
*/
enum nsCharType {
eCharType_LeftToRight = 0,
eCharType_RightToLeft = 1,
eCharType_EuropeanNumber = 2,
eCharType_EuropeanNumberSeparator = 3,
eCharType_EuropeanNumberTerminator = 4,
eCharType_ArabicNumber = 5,
eCharType_CommonNumberSeparator = 6,
eCharType_BlockSeparator = 7,
eCharType_SegmentSeparator = 8,
eCharType_WhiteSpaceNeutral = 9,
eCharType_OtherNeutral = 10,
eCharType_LeftToRightEmbedding = 11,
eCharType_LeftToRightOverride = 12,
eCharType_RightToLeftArabic = 13,
eCharType_RightToLeftEmbedding = 14,
eCharType_RightToLeftOverride = 15,
eCharType_PopDirectionalFormat = 16,
eCharType_DirNonSpacingMark = 17,
eCharType_BoundaryNeutral = 18,
eCharType_FirstStrongIsolate = 19,
eCharType_LeftToRightIsolate = 20,
eCharType_RightToLeftIsolate = 21,
eCharType_PopDirectionalIsolate = 22,
eCharType_CharTypeCount
};
/**
* This specifies the language directional property of a character set.
*/
typedef enum nsCharType nsCharType;
/**
* definitions of bidirection character types by category
*/
#define CHARTYPE_IS_RTL(val) \
(((val) == eCharType_RightToLeft) || ((val) == eCharType_RightToLeftArabic))
#define BIDICLASS_IS_RTL(val) \
(((val) == mozilla::intl::BidiClass::RightToLeft) || \
((val) == mozilla::intl::BidiClass::RightToLeftArabic))
#define CHARTYPE_IS_WEAK(val) \
(((val) == eCharType_EuropeanNumberSeparator) || \
((val) == eCharType_EuropeanNumberTerminator) || \
(((val) > eCharType_ArabicNumber) && \
((val) != eCharType_RightToLeftArabic)))
#define BIDICLASS_IS_WEAK(val) \
(((val) == mozilla::intl::BidiClass::EuropeanNumberSeparator) || \
((val) == mozilla::intl::BidiClass::EuropeanNumberTerminator) || \
(((val) > mozilla::intl::BidiClass::ArabicNumber) && \
((val) != mozilla::intl::BidiClass::RightToLeftArabic)))
/**
* Inspects a Unichar, converting numbers to Arabic or Hindi forms and

View File

@ -60,8 +60,8 @@ inline uint8_t GetGeneralCategory(uint32_t aCh) {
return sICUtoHBcategory[intl::UnicodeProperties::CharType(aCh)];
}
inline nsCharType GetBidiCat(uint32_t aCh) {
return nsCharType(u_charDirection(aCh));
inline int8_t GetNumericValue(uint32_t aCh) {
return intl::UnicodeProperties::GetNumericValue(aCh);
}
inline uint8_t GetLineBreakClass(uint32_t aCh) {

View File

@ -1991,7 +1991,7 @@ void nsBidiPresUtils::RemoveBidiContinuation(BidiParagraphData* aBpd,
nsresult nsBidiPresUtils::FormatUnicodeText(nsPresContext* aPresContext,
char16_t* aText,
int32_t& aTextLength,
nsCharType aCharType) {
intl::BidiClass aBidiClass) {
nsresult rv = NS_OK;
// ahmed
// adjusted for correct numeral shaping
@ -2011,12 +2011,12 @@ nsresult nsBidiPresUtils::FormatUnicodeText(nsPresContext* aPresContext,
case IBMBIDI_NUMERAL_REGULAR:
switch (aCharType) {
case eCharType_EuropeanNumber:
switch (aBidiClass) {
case intl::BidiClass::EuropeanNumber:
HandleNumbers(aText, aTextLength, IBMBIDI_NUMERAL_ARABIC);
break;
case eCharType_ArabicNumber:
case intl::BidiClass::ArabicNumber:
HandleNumbers(aText, aTextLength, IBMBIDI_NUMERAL_HINDI);
break;
@ -2029,20 +2029,22 @@ nsresult nsBidiPresUtils::FormatUnicodeText(nsPresContext* aPresContext,
if (((GET_BIDI_OPTION_DIRECTION(bidiOptions) ==
IBMBIDI_TEXTDIRECTION_RTL) &&
(IS_ARABIC_DIGIT(aText[0]))) ||
(eCharType_ArabicNumber == aCharType))
(intl::BidiClass::ArabicNumber == aBidiClass)) {
HandleNumbers(aText, aTextLength, IBMBIDI_NUMERAL_HINDI);
else if (eCharType_EuropeanNumber == aCharType)
} else if (intl::BidiClass::EuropeanNumber == aBidiClass) {
HandleNumbers(aText, aTextLength, IBMBIDI_NUMERAL_ARABIC);
}
break;
case IBMBIDI_NUMERAL_PERSIANCONTEXT:
if (((GET_BIDI_OPTION_DIRECTION(bidiOptions) ==
IBMBIDI_TEXTDIRECTION_RTL) &&
(IS_ARABIC_DIGIT(aText[0]))) ||
(eCharType_ArabicNumber == aCharType))
(intl::BidiClass::ArabicNumber == aBidiClass)) {
HandleNumbers(aText, aTextLength, IBMBIDI_NUMERAL_PERSIAN);
else if (eCharType_EuropeanNumber == aCharType)
} else if (intl::BidiClass::EuropeanNumber == aBidiClass) {
HandleNumbers(aText, aTextLength, IBMBIDI_NUMERAL_ARABIC);
}
break;
case IBMBIDI_NUMERAL_NOMINAL:
@ -2074,64 +2076,40 @@ void nsBidiPresUtils::StripBidiControlCharacters(char16_t* aText,
aTextLength -= stripLen;
}
#if 0 // XXX: for the future use ???
void
RemoveDiacritics(char16_t* aText,
int32_t& aTextLength)
{
if (aText && (aTextLength > 0) ) {
int32_t offset = 0;
for (int32_t i = 0; i < aTextLength && aText[i]; i++) {
if (IS_BIDI_DIACRITIC(aText[i]) ) {
++offset;
continue;
}
aText[i - offset] = aText[i];
}
aTextLength = i - offset;
aText[aTextLength] = 0;
}
}
#endif
void nsBidiPresUtils::CalculateCharType(intl::Bidi* aBidiEngine,
const char16_t* aText, int32_t& aOffset,
int32_t aCharTypeLimit,
int32_t& aRunLimit, int32_t& aRunLength,
int32_t& aRunCount, uint8_t& aCharType,
uint8_t& aPrevCharType)
{
void nsBidiPresUtils::CalculateBidiClass(
intl::Bidi* aBidiEngine, const char16_t* aText, int32_t& aOffset,
int32_t aBidiClassLimit, int32_t& aRunLimit, int32_t& aRunLength,
int32_t& aRunCount, intl::BidiClass& aBidiClass,
intl::BidiClass& aPrevBidiClass) {
bool strongTypeFound = false;
int32_t offset;
nsCharType charType;
intl::BidiClass bidiClass;
aCharType = eCharType_OtherNeutral;
aBidiClass = intl::BidiClass::OtherNeutral;
int32_t charLen;
for (offset = aOffset; offset < aCharTypeLimit; offset += charLen) {
for (offset = aOffset; offset < aBidiClassLimit; offset += charLen) {
// Make sure we give RTL chartype to all characters that would be classified
// as Right-To-Left by a bidi platform.
// (May differ from the UnicodeData, eg we set RTL chartype to some NSMs.)
charLen = 1;
uint32_t ch = aText[offset];
if (IS_HEBREW_CHAR(ch)) {
charType = eCharType_RightToLeft;
bidiClass = intl::BidiClass::RightToLeft;
} else if (IS_ARABIC_ALPHABETIC(ch)) {
charType = eCharType_RightToLeftArabic;
bidiClass = intl::BidiClass::RightToLeftArabic;
} else {
if (offset + 1 < aCharTypeLimit &&
if (offset + 1 < aBidiClassLimit &&
NS_IS_SURROGATE_PAIR(ch, aText[offset + 1])) {
ch = SURROGATE_TO_UCS4(ch, aText[offset + 1]);
charLen = 2;
}
charType = unicode::GetBidiCat(ch);
bidiClass = intl::UnicodeProperties::GetBidiClass(ch);
}
if (!CHARTYPE_IS_WEAK(charType)) {
if (strongTypeFound && (charType != aPrevCharType) &&
(CHARTYPE_IS_RTL(charType) || CHARTYPE_IS_RTL(aPrevCharType))) {
if (!BIDICLASS_IS_WEAK(bidiClass)) {
if (strongTypeFound && (bidiClass != aPrevBidiClass) &&
(BIDICLASS_IS_RTL(bidiClass) || BIDICLASS_IS_RTL(aPrevBidiClass))) {
// Stop at this point to ensure uni-directionality of the text
// (from platform's point of view).
// Also, don't mix Arabic and Hebrew content (since platform may
@ -2142,18 +2120,18 @@ void nsBidiPresUtils::CalculateCharType(intl::Bidi* aBidiEngine,
break;
}
if ((eCharType_RightToLeftArabic == aPrevCharType ||
eCharType_ArabicNumber == aPrevCharType) &&
eCharType_EuropeanNumber == charType) {
charType = eCharType_ArabicNumber;
if ((intl::BidiClass::RightToLeftArabic == aPrevBidiClass ||
intl::BidiClass::ArabicNumber == aPrevBidiClass) &&
intl::BidiClass::EuropeanNumber == bidiClass) {
bidiClass = intl::BidiClass::ArabicNumber;
}
// Set PrevCharType to the last strong type in this frame
// Set PrevBidiClass to the last strong type in this frame
// (for correct numeric shaping)
aPrevCharType = charType;
aPrevBidiClass = bidiClass;
strongTypeFound = true;
aCharType = charType;
aBidiClass = bidiClass;
}
}
aOffset = offset;
@ -2188,8 +2166,8 @@ nsresult nsBidiPresUtils::ProcessText(const char16_t* aText, size_t aLength,
nscoord totalWidth = 0;
int32_t i, start, limit, length;
uint32_t visualStart = 0;
uint8_t charType;
uint8_t prevType = eCharType_LeftToRight;
intl::BidiClass bidiClass;
intl::BidiClass prevClass = intl::BidiClass::LeftToRight;
for (int nPosResolve = 0; nPosResolve < aPosResolveCount; ++nPosResolve) {
aPosResolve[nPosResolve].visualIndex = kNotFound;
@ -2231,17 +2209,17 @@ nsresult nsBidiPresUtils::ProcessText(const char16_t* aText, size_t aLength,
}
while (subRunCount > 0) {
// CalculateCharType can increment subRunCount if the run
// CalculateBidiClass can increment subRunCount if the run
// contains mixed character types
CalculateCharType(aBidiEngine, text, lineOffset, typeLimit, subRunLimit,
subRunLength, subRunCount, charType, prevType);
CalculateBidiClass(aBidiEngine, text, lineOffset, typeLimit, subRunLimit,
subRunLength, subRunCount, bidiClass, prevClass);
nsAutoString runVisualText;
runVisualText.Assign(text + start, subRunLength);
if (int32_t(runVisualText.Length()) < subRunLength)
return NS_ERROR_OUT_OF_MEMORY;
FormatUnicodeText(aPresContext, runVisualText.BeginWriting(),
subRunLength, (nsCharType)charType);
subRunLength, bidiClass);
aprocessor.SetText(runVisualText.get(), subRunLength, dir);
width = aprocessor.GetWidth();

View File

@ -8,6 +8,7 @@
#define nsBidiPresUtils_h___
#include "gfxContext.h"
#include "mozilla/intl/BidiClass.h"
#include "mozilla/intl/BidiEmbeddingLevel.h"
#include "nsBidiUtils.h"
#include "nsHashKeys.h"
@ -223,7 +224,7 @@ class nsBidiPresUtils {
*/
static nsresult FormatUnicodeText(nsPresContext* aPresContext,
char16_t* aText, int32_t& aTextLength,
nsCharType aCharType);
mozilla::intl::BidiClass aBidiClass);
/**
* Reorder plain text using the Unicode Bidi algorithm and send it to
@ -562,11 +563,12 @@ class nsBidiPresUtils {
*/
static void RemoveBidiContinuation(BidiParagraphData* aBpd, nsIFrame* aFrame,
int32_t aFirstIndex, int32_t aLastIndex);
static void CalculateCharType(mozilla::intl::Bidi* aBidiEngine,
const char16_t* aText, int32_t& aOffset,
int32_t aCharTypeLimit, int32_t& aRunLimit,
int32_t& aRunLength, int32_t& aRunCount,
uint8_t& aCharType, uint8_t& aPrevCharType);
static void CalculateBidiClass(mozilla::intl::Bidi* aBidiEngine,
const char16_t* aText, int32_t& aOffset,
int32_t aBidiClassLimit, int32_t& aRunLimit,
int32_t& aRunLength, int32_t& aRunCount,
mozilla::intl::BidiClass& aBidiClass,
mozilla::intl::BidiClass& aPrevBidiClass);
static void StripBidiControlCharacters(char16_t* aText, int32_t& aTextLength);
};