mirror of
https://github.com/mozilla/gecko-dev.git
synced 2025-04-11 18:32:31 +00:00
bug 672472 - convert hyphenation-point offsets correctly from Unicode characters to UTF16 code units. r=smontagu
This commit is contained in:
parent
c62529f8bd
commit
938babc448
@ -85,8 +85,19 @@ nsHyphenator::Hyphenate(const nsAString& aString,
|
|||||||
|
|
||||||
PRBool inWord = PR_FALSE;
|
PRBool inWord = PR_FALSE;
|
||||||
PRUint32 wordStart = 0, wordLimit = 0;
|
PRUint32 wordStart = 0, wordLimit = 0;
|
||||||
for (PRUint32 i = 0; i < aString.Length(); i++) {
|
PRUint32 chLen;
|
||||||
PRUnichar ch = aString[i];
|
for (PRUint32 i = 0; i < aString.Length(); i += chLen) {
|
||||||
|
PRUint32 ch = aString[i];
|
||||||
|
chLen = 1;
|
||||||
|
|
||||||
|
if (NS_IS_HIGH_SURROGATE(ch)) {
|
||||||
|
if (i + 1 < aString.Length() && NS_IS_LOW_SURROGATE(aString[i+1])) {
|
||||||
|
ch = SURROGATE_TO_UCS4(ch, aString[i+1]);
|
||||||
|
chLen = 2;
|
||||||
|
} else {
|
||||||
|
NS_WARNING("unpaired surrogate found during hyphenation");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
nsIUGenCategory::nsUGenCategory cat = mCategories->Get(ch);
|
nsIUGenCategory::nsUGenCategory cat = mCategories->Get(ch);
|
||||||
if (cat == nsIUGenCategory::kLetter || cat == nsIUGenCategory::kMark) {
|
if (cat == nsIUGenCategory::kLetter || cat == nsIUGenCategory::kMark) {
|
||||||
@ -94,14 +105,15 @@ nsHyphenator::Hyphenate(const nsAString& aString,
|
|||||||
inWord = PR_TRUE;
|
inWord = PR_TRUE;
|
||||||
wordStart = i;
|
wordStart = i;
|
||||||
}
|
}
|
||||||
wordLimit = i + 1;
|
wordLimit = i + chLen;
|
||||||
if (i < aString.Length() - 1) {
|
if (i + chLen < aString.Length()) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (inWord) {
|
if (inWord) {
|
||||||
NS_ConvertUTF16toUTF8 utf8(aString.BeginReading() + wordStart,
|
const PRUnichar *begin = aString.BeginReading();
|
||||||
|
NS_ConvertUTF16toUTF8 utf8(begin + wordStart,
|
||||||
wordLimit - wordStart);
|
wordLimit - wordStart);
|
||||||
nsAutoTArray<char,200> utf8hyphens;
|
nsAutoTArray<char,200> utf8hyphens;
|
||||||
utf8hyphens.SetLength(utf8.Length() + 5);
|
utf8hyphens.SetLength(utf8.Length() + 5);
|
||||||
@ -113,29 +125,24 @@ nsHyphenator::Hyphenate(const nsAString& aString,
|
|||||||
utf8hyphens.Elements(), nsnull,
|
utf8hyphens.Elements(), nsnull,
|
||||||
&rep, &pos, &cut);
|
&rep, &pos, &cut);
|
||||||
if (!err) {
|
if (!err) {
|
||||||
PRUint32 utf16offset = wordStart;
|
// Surprisingly, hnj_hyphen_hyphenate2 converts the 'hyphens' buffer
|
||||||
const char *cp = utf8.BeginReading();
|
// from utf8 code unit indexing (which would match the utf8 input
|
||||||
while (cp < utf8.EndReading()) {
|
// string directly) to Unicode character indexing.
|
||||||
if (UTF8traits::isASCII(*cp)) { // single-byte utf8 char
|
// We then need to convert this to utf16 code unit offsets for Gecko.
|
||||||
cp++;
|
const char *hyphPtr = utf8hyphens.Elements();
|
||||||
utf16offset++;
|
const PRUnichar *cur = begin + wordStart;
|
||||||
} else if (UTF8traits::is2byte(*cp)) { // 2-byte sequence
|
const PRUnichar *end = begin + wordLimit;
|
||||||
cp += 2;
|
while (cur < end) {
|
||||||
utf16offset++;
|
if (*hyphPtr & 0x01) {
|
||||||
} else if (UTF8traits::is3byte(*cp)) { // 3-byte sequence
|
aHyphens[cur - begin] = PR_TRUE;
|
||||||
cp += 3;
|
|
||||||
utf16offset++;
|
|
||||||
} else { // must be a 4-byte sequence (no need to check validity,
|
|
||||||
// as this was just created with NS_ConvertUTF16toUTF8)
|
|
||||||
NS_ASSERTION(UTF8traits::is4byte(*cp), "unexpected utf8 byte");
|
|
||||||
cp += 4;
|
|
||||||
utf16offset += 2;
|
|
||||||
}
|
}
|
||||||
NS_ASSERTION(cp <= utf8.EndReading(), "incomplete utf8 string?");
|
cur++;
|
||||||
NS_ASSERTION(utf16offset <= aString.Length(), "length mismatch?");
|
if (cur < end && NS_IS_LOW_SURROGATE(*cur) &&
|
||||||
if (utf8hyphens[cp - utf8.BeginReading() - 1] & 0x01) {
|
NS_IS_HIGH_SURROGATE(*(cur-1)))
|
||||||
aHyphens[utf16offset - 1] = PR_TRUE;
|
{
|
||||||
|
cur++;
|
||||||
}
|
}
|
||||||
|
hyphPtr++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user