Bug 1383647 - Part 2: Add unicode::Can{Lower,Upper}Case specialized for Latin1 characters. r=jandem

This commit is contained in:
André Bargull 2017-08-01 11:35:35 -07:00
parent 404303b347
commit ae1bd3ebef
2 changed files with 37 additions and 5 deletions

View File

@ -942,10 +942,10 @@ ToLowerCase(JSContext* cx, JSLinearString* str)
// Look for the first character that changes when lowercased.
size_t i = 0;
for (; i < length; i++) {
char16_t c = chars[i];
CharT c = chars[i];
if (!IsSame<CharT, Latin1Char>::value) {
if (unicode::IsLeadSurrogate(c) && i + 1 < length) {
char16_t trail = chars[i + 1];
CharT trail = chars[i + 1];
if (unicode::IsTrailSurrogate(trail)) {
if (unicode::CanLowerCaseNonBMP(c, trail))
break;
@ -1252,10 +1252,10 @@ ToUpperCase(JSContext* cx, JSLinearString* str)
// Look for the first character that changes when uppercased.
size_t i = 0;
for (; i < length; i++) {
char16_t c = chars[i];
CharT c = chars[i];
if (!IsSame<CharT, Latin1Char>::value) {
if (unicode::IsLeadSurrogate(c) && i + 1 < length) {
char16_t trail = chars[i + 1];
CharT trail = chars[i + 1];
if (unicode::IsTrailSurrogate(trail)) {
if (unicode::CanUpperCaseNonBMP(c, trail))
break;
@ -1267,7 +1267,7 @@ ToUpperCase(JSContext* cx, JSLinearString* str)
}
if (unicode::CanUpperCase(c))
break;
if (MOZ_UNLIKELY(c > 0x7f && CanUpperCaseSpecialCasing(static_cast<CharT>(c))))
if (MOZ_UNLIKELY(c > 0x7f && CanUpperCaseSpecialCasing(c)))
break;
}

View File

@ -65,7 +65,11 @@ namespace CharFlag {
constexpr char16_t NO_BREAK_SPACE = 0x00A0;
constexpr char16_t MICRO_SIGN = 0x00B5;
constexpr char16_t LATIN_CAPITAL_LETTER_A_WITH_GRAVE = 0x00C0;
constexpr char16_t MULTIPLICATION_SIGN = 0x00D7;
constexpr char16_t LATIN_SMALL_LETTER_SHARP_S = 0x00DF;
constexpr char16_t LATIN_SMALL_LETTER_A_WITH_GRAVE = 0x00E0;
constexpr char16_t DIVISION_SIGN = 0x00F7;
constexpr char16_t LATIN_SMALL_LETTER_Y_WITH_DIAERESIS = 0x00FF;
constexpr char16_t LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE = 0x0130;
constexpr char16_t COMBINING_DOT_ABOVE = 0x0307;
@ -294,6 +298,20 @@ CanUpperCase(char16_t ch)
return CharInfo(ch).upperCase != 0;
}
// Returns true iff ToUpperCase(ch) != ch.
inline bool
CanUpperCase(JS::Latin1Char ch)
{
if (MOZ_LIKELY(ch < 128))
return ch >= 'a' && ch <= 'z';
// U+00B5 and U+00E0 to U+00FF, except U+00F7, have an uppercase form.
bool canUpper = ch == MICRO_SIGN ||
(((ch & ~0x1F) == LATIN_SMALL_LETTER_A_WITH_GRAVE) && ch != DIVISION_SIGN);
MOZ_ASSERT(canUpper == CanUpperCase(char16_t(ch)));
return canUpper;
}
// Returns true iff ToLowerCase(ch) != ch.
inline bool
CanLowerCase(char16_t ch)
@ -303,6 +321,20 @@ CanLowerCase(char16_t ch)
return CharInfo(ch).lowerCase != 0;
}
// Returns true iff ToLowerCase(ch) != ch.
inline bool
CanLowerCase(JS::Latin1Char ch)
{
if (MOZ_LIKELY(ch < 128))
return ch >= 'A' && ch <= 'Z';
// U+00C0 to U+00DE, except U+00D7, have a lowercase form.
bool canLower = ((ch & ~0x1F) == LATIN_CAPITAL_LETTER_A_WITH_GRAVE) &&
((ch & MULTIPLICATION_SIGN) != MULTIPLICATION_SIGN);
MOZ_ASSERT(canLower == CanLowerCase(char16_t(ch)));
return canLower;
}
#define CHECK_RANGE(FROM, TO, LEAD, TRAIL_FROM, TRAIL_TO, DIFF) \
if (lead == LEAD && trail >= TRAIL_FROM && trail <= TRAIL_TO) \
return true;