Bug 1305700 - pt 3 & 4 - Clean up/simplify use of ENABLE_INTL_API conditionals in nsUnicodeProperties (code rearrangement, no change in behavior). r=m_kato

This commit is contained in:
Jonathan Kew 2016-09-28 10:52:51 +01:00
parent 7f21325a4a
commit 594fdb205d
3 changed files with 149 additions and 143 deletions

View File

@ -10,11 +10,6 @@
#include "mozilla/ArrayUtils.h"
#include "nsCharTraits.h"
#if ENABLE_INTL_API
#include "unicode/uchar.h"
#include "unicode/uscript.h"
#endif
#define UNICODE_BMP_LIMIT 0x10000
#define UNICODE_LIMIT 0x110000
@ -173,121 +168,70 @@ const hb_unicode_general_category_t sICUtoHBcategory[U_CHAR_CATEGORY_COUNT] = {
};
#endif
#if !ENABLE_INTL_API
uint8_t GetGeneralCategory(uint32_t aCh) {
#if ENABLE_INTL_API
return sICUtoHBcategory[u_charType(aCh)];
#else
return GetCharProps2(aCh).mCategory;
#endif
}
nsCharType GetBidiCat(uint32_t aCh) {
#if ENABLE_INTL_API
return nsCharType(u_charDirection(aCh));
#else
return nsCharType(GetCharProps2(aCh).mBidiCategory);
#endif
}
int8_t GetNumericValue(uint32_t aCh) {
#if ENABLE_INTL_API
UNumericType type =
UNumericType(u_getIntPropertyValue(aCh, UCHAR_NUMERIC_TYPE));
return type == U_NT_DECIMAL || type == U_NT_DIGIT
? int8_t(u_getNumericValue(aCh))
: -1;
#else
return GetCharProps2(aCh).mNumericValue;
#endif
}
uint32_t
GetMirroredChar(uint32_t aCh)
{
#if ENABLE_INTL_API
return u_charMirror(aCh);
#else
return aCh + sMirrorOffsets[GetCharProps1(aCh).mMirrorOffsetIndex];
#endif
}
bool
HasMirroredChar(uint32_t aCh)
{
#if ENABLE_INTL_API
return u_isMirrored(aCh);
#else
return GetCharProps1(aCh).mMirrorOffsetIndex != 0;
#endif
}
uint8_t
GetCombiningClass(uint32_t aCh)
{
#if ENABLE_INTL_API
return u_getCombiningClass(aCh);
#else
return GetCharProps1(aCh).mCombiningClass;
#endif
}
uint8_t
GetLineBreakClass(uint32_t aCh)
{
#if ENABLE_INTL_API
return u_getIntPropertyValue(aCh, UCHAR_LINE_BREAK);
#else
return GetCharProps2(aCh).mLineBreak;
#endif
}
Script
GetScriptCode(uint32_t aCh)
{
#if ENABLE_INTL_API
UErrorCode err = U_ZERO_ERROR;
return Script(uscript_getScript(aCh, &err));
#else
return Script(GetCharProps2(aCh).mScriptCode);
#endif
}
uint32_t
GetScriptTagForCode(Script aScriptCode)
{
#if ENABLE_INTL_API
const char* tag = uscript_getShortName(UScriptCode(aScriptCode));
return HB_TAG(tag[0], tag[1], tag[2], tag[3]);
#else
// this will safely return 0 for negative script codes, too :)
if (static_cast<uint32_t>(aScriptCode) > ArrayLength(sScriptCodeToTag)) {
return 0;
}
return sScriptCodeToTag[static_cast<uint32_t>(aScriptCode)];
#endif
}
PairedBracketType GetPairedBracketType(uint32_t aCh)
{
#if ENABLE_INTL_API
return PairedBracketType
(u_getIntPropertyValue(aCh, UCHAR_BIDI_PAIRED_BRACKET_TYPE));
#else
return PairedBracketType(GetCharProps2(aCh).mPairedBracketType);
#endif
}
uint32_t GetPairedBracket(uint32_t aCh)
{
#if ENABLE_INTL_API
return u_getBidiPairedBracket(aCh);
#else
return GetPairedBracketType(aCh) != PAIRED_BRACKET_TYPE_NONE
? GetMirroredChar(aCh) : aCh;
#endif
}
#if !ENABLE_INTL_API
static inline uint32_t
GetCaseMapValue(uint32_t aCh)
{
@ -302,14 +246,10 @@ GetCaseMapValue(uint32_t aCh)
}
return 0;
}
#endif
uint32_t
GetUppercase(uint32_t aCh)
{
#if ENABLE_INTL_API
return u_toupper(aCh);
#else
uint32_t mapValue = GetCaseMapValue(aCh);
if (mapValue & (kLowerToUpper | kTitleToUpper)) {
return aCh ^ (mapValue & kCaseMapCharMask);
@ -318,15 +258,11 @@ GetUppercase(uint32_t aCh)
return GetUppercase(aCh ^ (mapValue & kCaseMapCharMask));
}
return aCh;
#endif
}
uint32_t
GetLowercase(uint32_t aCh)
{
#if ENABLE_INTL_API
return u_tolower(aCh);
#else
uint32_t mapValue = GetCaseMapValue(aCh);
if (mapValue & kUpperToLower) {
return aCh ^ (mapValue & kCaseMapCharMask);
@ -335,29 +271,21 @@ GetLowercase(uint32_t aCh)
return GetLowercase(aCh ^ (mapValue & kCaseMapCharMask));
}
return aCh;
#endif
}
uint32_t
GetTitlecaseForLower(uint32_t aCh)
{
#if ENABLE_INTL_API
return u_isULowercase(aCh) ? u_totitle(aCh) : aCh;
#else
uint32_t mapValue = GetCaseMapValue(aCh);
if (mapValue & (kLowerToTitle | kLowerToUpper)) {
return aCh ^ (mapValue & kCaseMapCharMask);
}
return aCh;
#endif
}
uint32_t
GetTitlecaseForAll(uint32_t aCh)
{
#if ENABLE_INTL_API
return u_totitle(aCh);
#else
uint32_t mapValue = GetCaseMapValue(aCh);
if (mapValue & (kLowerToTitle | kLowerToUpper)) {
return aCh ^ (mapValue & kCaseMapCharMask);
@ -366,27 +294,6 @@ GetTitlecaseForAll(uint32_t aCh)
return GetTitlecaseForLower(aCh ^ (mapValue & kCaseMapCharMask));
}
return aCh;
#endif
}
#if 0 // currently unused - bug 857481
HanVariantType
GetHanVariant(uint32_t aCh)
{
// In the sHanVariantValues array, data for 4 successive characters
// (2 bits each) is packed in to each uint8_t entry, with the value
// for the lowest character stored in the least significant bits.
uint8_t v = 0;
if (aCh < UNICODE_BMP_LIMIT) {
v = sHanVariantValues[sHanVariantPages[0][aCh >> kHanVariantCharBits]]
[(aCh & ((1 << kHanVariantCharBits) - 1)) >> 2];
} else if (aCh < (kHanVariantMaxPlane + 1) * 0x10000) {
v = sHanVariantValues[sHanVariantPages[sHanVariantPlanes[(aCh >> 16) - 1]]
[(aCh & 0xffff) >> kHanVariantCharBits]]
[(aCh & ((1 << kHanVariantCharBits) - 1)) >> 2];
}
// extract the appropriate 2-bit field from the value
return HanVariantType((v >> ((aCh & 3) * 2)) & 3);
}
#endif

View File

@ -10,6 +10,12 @@
#include "nsBidiUtils.h"
#include "nsIUGenCategory.h"
#include "nsUnicodeScriptCodes.h"
#include "harfbuzz/hb.h"
#if ENABLE_INTL_API
#include "unicode/uchar.h"
#include "unicode/uscript.h"
#endif
const nsCharProps2& GetCharProps2(uint32_t aCh);
@ -19,29 +25,6 @@ namespace unicode {
extern const nsIUGenCategory::nsUGenCategory sDetailedToGeneralCategory[];
// Return whether the char has a mirrored-pair counterpart.
uint32_t GetMirroredChar(uint32_t aCh);
bool HasMirroredChar(uint32_t aChr);
uint8_t GetCombiningClass(uint32_t aCh);
// returns the detailed General Category in terms of HB_UNICODE_* values
uint8_t GetGeneralCategory(uint32_t aCh);
// returns the simplified Gen Category as defined in nsIUGenCategory
inline nsIUGenCategory::nsUGenCategory GetGenCategory(uint32_t aCh) {
return sDetailedToGeneralCategory[GetGeneralCategory(aCh)];
}
nsCharType GetBidiCat(uint32_t aCh);
uint8_t GetLineBreakClass(uint32_t aCh);
Script GetScriptCode(uint32_t aCh);
uint32_t GetScriptTagForCode(Script aScriptCode);
/* This MUST match the values assigned by genUnicodePropertyData.pl! */
enum VerticalOrientation {
VERTICAL_ORIENTATION_U = 0,
@ -50,10 +33,6 @@ enum VerticalOrientation {
VERTICAL_ORIENTATION_Tr = 3
};
inline VerticalOrientation GetVerticalOrientation(uint32_t aCh) {
return VerticalOrientation(GetCharProps2(aCh).mVertOrient);
}
/* This MUST match the values assigned by genUnicodePropertyData.pl! */
enum PairedBracketType {
PAIRED_BRACKET_TYPE_NONE = 0,
@ -61,9 +40,6 @@ enum PairedBracketType {
PAIRED_BRACKET_TYPE_CLOSE = 2
};
PairedBracketType GetPairedBracketType(uint32_t aCh);
uint32_t GetPairedBracket(uint32_t aCh);
enum XidmodType {
XIDMOD_RECOMMENDED,
XIDMOD_INCLUSION,
@ -80,10 +56,129 @@ enum XidmodType {
XIDMOD_NOT_CHARS
};
inline XidmodType GetIdentifierModification(uint32_t aCh) {
return XidmodType(GetCharProps2(aCh).mXidmod);
#if ENABLE_INTL_API // ICU is available, so simply forward to its API
extern const hb_unicode_general_category_t sICUtoHBcategory[];
inline uint32_t
GetMirroredChar(uint32_t aCh)
{
return u_charMirror(aCh);
}
inline bool
HasMirroredChar(uint32_t aCh)
{
return u_isMirrored(aCh);
}
inline uint8_t
GetCombiningClass(uint32_t aCh)
{
return u_getCombiningClass(aCh);
}
inline uint8_t
GetGeneralCategory(uint32_t aCh)
{
return sICUtoHBcategory[u_charType(aCh)];
}
inline nsCharType
GetBidiCat(uint32_t aCh)
{
return nsCharType(u_charDirection(aCh));
}
inline int8_t
GetNumericValue(uint32_t aCh)
{
UNumericType type =
UNumericType(u_getIntPropertyValue(aCh, UCHAR_NUMERIC_TYPE));
return type == U_NT_DECIMAL || type == U_NT_DIGIT
? int8_t(u_getNumericValue(aCh)) : -1;
}
inline uint8_t
GetLineBreakClass(uint32_t aCh)
{
return u_getIntPropertyValue(aCh, UCHAR_LINE_BREAK);
}
inline Script
GetScriptCode(uint32_t aCh)
{
UErrorCode err = U_ZERO_ERROR;
return Script(uscript_getScript(aCh, &err));
}
inline uint32_t
GetScriptTagForCode(Script aScriptCode)
{
const char* tag = uscript_getShortName(UScriptCode(aScriptCode));
return HB_TAG(tag[0], tag[1], tag[2], tag[3]);
}
inline PairedBracketType
GetPairedBracketType(uint32_t aCh)
{
return PairedBracketType
(u_getIntPropertyValue(aCh, UCHAR_BIDI_PAIRED_BRACKET_TYPE));
}
inline uint32_t
GetPairedBracket(uint32_t aCh)
{
return u_getBidiPairedBracket(aCh);
}
inline uint32_t
GetUppercase(uint32_t aCh)
{
return u_toupper(aCh);
}
inline uint32_t
GetLowercase(uint32_t aCh)
{
return u_tolower(aCh);
}
inline uint32_t
GetTitlecaseForLower(uint32_t aCh) // maps LC to titlecase, UC unchanged
{
return u_isULowercase(aCh) ? u_totitle(aCh) : aCh;
}
inline uint32_t
GetTitlecaseForAll(uint32_t aCh) // maps both UC and LC to titlecase
{
return u_totitle(aCh);
}
#else // not ENABLE_INTL_API
// Return whether the char has a mirrored-pair counterpart.
uint32_t GetMirroredChar(uint32_t aCh);
bool HasMirroredChar(uint32_t aChr);
uint8_t GetCombiningClass(uint32_t aCh);
// returns the detailed General Category in terms of HB_UNICODE_* values
uint8_t GetGeneralCategory(uint32_t aCh);
nsCharType GetBidiCat(uint32_t aCh);
uint8_t GetLineBreakClass(uint32_t aCh);
Script GetScriptCode(uint32_t aCh);
uint32_t GetScriptTagForCode(Script aScriptCode);
PairedBracketType GetPairedBracketType(uint32_t aCh);
uint32_t GetPairedBracket(uint32_t aCh);
/**
* Return the numeric value of the character. The value returned is the value
* of the Numeric_Value in field 7 of the UCD, or -1 if field 7 is empty.
@ -92,16 +187,25 @@ inline XidmodType GetIdentifierModification(uint32_t aCh) {
*/
int8_t GetNumericValue(uint32_t aCh);
#if 0 // currently unused - bug 857481
enum HanVariantType {
HVT_NotHan = 0x0,
HVT_SimplifiedOnly = 0x1,
HVT_TraditionalOnly = 0x2,
HVT_AnyHan = 0x3
};
uint32_t GetUppercase(uint32_t aCh);
uint32_t GetLowercase(uint32_t aCh);
uint32_t GetTitlecaseForLower(uint32_t aCh); // maps LC to titlecase, UC unchanged
uint32_t GetTitlecaseForAll(uint32_t aCh); // maps both UC and LC to titlecase
HanVariantType GetHanVariant(uint32_t aCh);
#endif
#endif // !ENABLE_INTL_API
// returns the simplified Gen Category as defined in nsIUGenCategory
inline nsIUGenCategory::nsUGenCategory GetGenCategory(uint32_t aCh) {
return sDetailedToGeneralCategory[GetGeneralCategory(aCh)];
}
inline VerticalOrientation GetVerticalOrientation(uint32_t aCh) {
return VerticalOrientation(GetCharProps2(aCh).mVertOrient);
}
inline XidmodType GetIdentifierModification(uint32_t aCh) {
return XidmodType(GetCharProps2(aCh).mXidmod);
}
uint32_t GetFullWidth(uint32_t aCh);
// This is the reverse function of GetFullWidth which guarantees that
@ -116,14 +220,6 @@ inline bool IsClusterExtender(uint32_t aCh) {
return IsClusterExtender(aCh, GetGeneralCategory(aCh));
}
// Case mappings for the full Unicode range;
// note that it may be worth testing for ASCII chars and taking
// a separate fast-path before calling these, in perf-critical places
uint32_t GetUppercase(uint32_t aCh);
uint32_t GetLowercase(uint32_t aCh);
uint32_t GetTitlecaseForLower(uint32_t aCh); // maps LC to titlecase, UC unchanged
uint32_t GetTitlecaseForAll(uint32_t aCh); // maps both UC and LC to titlecase
// A simple iterator for a string of char16_t codepoints that advances
// by Unicode grapheme clusters
class ClusterIterator

View File

@ -214,6 +214,7 @@ typedef uint8_t DirProp;
#define SURROGATE_OFFSET ((0xd800<<10UL)+0xdc00-0x10000)
#define GET_UTF_32(first, second) (((first)<<10UL)+(second)-SURROGATE_OFFSET)
#if !ENABLE_INTL_API // these are provided by ICU if present in the build
#define UTF_ERROR_VALUE 0xffff
/* definitions with forward iteration --------------------------------------- */
@ -338,6 +339,8 @@ typedef uint8_t DirProp;
#define UTF_BACK_N(s, start, i, n) UTF_BACK_N_SAFE(s, start, i, n)
#define UTF_APPEND_CHAR(s, i, length, c) UTF_APPEND_CHAR_SAFE(s, i, length, c)
#endif // !ENABLE_INTL_API
struct Isolate {
int32_t start1;
int16_t stateImp;