mirror of
https://github.com/mozilla/gecko-dev.git
synced 2025-01-27 07:34:20 +00:00
Bug 1105644 - Lowercase words before passing them to libhyphen, so as to match patterns fully. r=smontagu
This commit is contained in:
parent
22671df931
commit
32ba31eb54
@ -80,9 +80,47 @@ nsHyphenator::Hyphenate(const nsAString& aString,
|
||||
}
|
||||
|
||||
if (inWord) {
|
||||
const char16_t *begin = aString.BeginReading();
|
||||
NS_ConvertUTF16toUTF8 utf8(begin + wordStart,
|
||||
wordLimit - wordStart);
|
||||
// Convert the word to utf-8 for libhyphen, lowercasing it as we go
|
||||
// so that it will match the (lowercased) patterns (bug 1105644).
|
||||
nsAutoCString utf8;
|
||||
const char16_t* const begin = aString.BeginReading();
|
||||
const char16_t *cur = begin + wordStart;
|
||||
const char16_t *end = begin + wordLimit;
|
||||
while (cur < end) {
|
||||
uint32_t ch = *cur++;
|
||||
|
||||
if (NS_IS_HIGH_SURROGATE(ch)) {
|
||||
if (cur < end && NS_IS_LOW_SURROGATE(*cur)) {
|
||||
ch = SURROGATE_TO_UCS4(ch, *cur++);
|
||||
} else {
|
||||
ch = 0xfffd; // unpaired surrogate, treat as REPLACEMENT CHAR
|
||||
}
|
||||
} else if (NS_IS_LOW_SURROGATE(ch)) {
|
||||
ch = 0xfffd; // unpaired surrogate
|
||||
}
|
||||
|
||||
// XXX What about language-specific casing? Consider Turkish I/i...
|
||||
// In practice, it looks like the current patterns will not be
|
||||
// affected by this, as they treat dotted and undotted i similarly.
|
||||
ch = ToLowerCase(ch);
|
||||
|
||||
if (ch < 0x80) { // U+0000 - U+007F
|
||||
utf8.Append(ch);
|
||||
} else if (ch < 0x0800) { // U+0100 - U+07FF
|
||||
utf8.Append(0xC0 | (ch >> 6));
|
||||
utf8.Append(0x80 | (0x003F & ch));
|
||||
} else if (ch < 0x10000) { // U+0800 - U+D7FF,U+E000 - U+FFFF
|
||||
utf8.Append(0xE0 | (ch >> 12));
|
||||
utf8.Append(0x80 | (0x003F & (ch >> 6)));
|
||||
utf8.Append(0x80 | (0x003F & ch));
|
||||
} else {
|
||||
utf8.Append(0xF0 | (ch >> 18));
|
||||
utf8.Append(0x80 | (0x003F & (ch >> 12)));
|
||||
utf8.Append(0x80 | (0x003F & (ch >> 6)));
|
||||
utf8.Append(0x80 | (0x003F & ch));
|
||||
}
|
||||
}
|
||||
|
||||
nsAutoTArray<char,200> utf8hyphens;
|
||||
utf8hyphens.SetLength(utf8.Length() + 5);
|
||||
char **rep = nullptr;
|
||||
|
Loading…
x
Reference in New Issue
Block a user