Bug 1730084 Part 5 - Remove WordBreaker::BreakInBetween(). r=jfkthame

The motivation of this patch is to remove rarely used API in
WordBreaker. WordBreaker::BreakInBetween() is used only in
nsFind::BreakInBetween() in production, and it can be replaced by
Next().

If the user wants to know whether there is a word break between two
strings such as the use cases in gtest, joining the two strings and
passing the result to Next() is the preferred way.

Note: I delete the buggy forward word search algorithm in
TestFindWordBreakFromPosition() because from the test expectations, it
doesn't expect to continue the search in previous fragments. Also, the
buggy part comes from the following code, which had undefined behavior
before Part 4, and does nothing after Part 4.

```
wbk->FindWord(prevFragText.get(), prevFragText.Length(), prevFragText.Length());
```

Differential Revision: https://phabricator.services.mozilla.com/D125151
This commit is contained in:
Ting-Yu Lin 2021-09-13 23:55:33 +00:00
parent bd25bca479
commit f8152b2b24
4 changed files with 42 additions and 70 deletions

View File

@ -20,29 +20,6 @@ already_AddRefed<WordBreaker> WordBreaker::Create() {
return RefPtr<WordBreaker>(new WordBreaker()).forget();
}
bool WordBreaker::BreakInBetween(const char16_t* aText1, uint32_t aTextLen1,
const char16_t* aText2, uint32_t aTextLen2) {
MOZ_ASSERT(nullptr != aText1, "null ptr");
MOZ_ASSERT(nullptr != aText2, "null ptr");
if (!aText1 || !aText2 || (0 == aTextLen1) || (0 == aTextLen2)) return false;
uint8_t c1 = GetClass(aText1[aTextLen1 - 1]);
uint8_t c2 = GetClass(aText2[0]);
if (c1 == c2 && kWbClassScriptioContinua == c1) {
nsAutoString text(aText1, aTextLen1);
text.Append(aText2, aTextLen2);
AutoTArray<uint8_t, 256> breakBefore;
breakBefore.SetLength(aTextLen1 + aTextLen2);
NS_GetComplexLineBreaks(text.get(), text.Length(), breakBefore.Elements());
bool ret = breakBefore[aTextLen1];
return ret;
}
return (c1 != c2);
}
#define IS_ASCII(c) (0 == (0xFF80 & (c)))
#define ASCII_IS_ALPHA(c) \
((('a' <= (c)) && ((c) <= 'z')) || (('A' <= (c)) && ((c) <= 'Z')))

View File

@ -24,9 +24,6 @@ class WordBreaker {
static already_AddRefed<WordBreaker> Create();
bool BreakInBetween(const char16_t* aText1, uint32_t aTextLen1,
const char16_t* aText2, uint32_t aTextLen2);
// Find the word boundary by scanning forward and backward from aPos.
//
// @return WordRange where mBegin equals to the offset to first character in

View File

@ -208,20 +208,31 @@ TEST(WordBreak, TestPrintWordWithBreak)
if (i != numOfFragment - 1) {
NS_ConvertASCIItoUTF16 nextFragText(wb[i + 1]);
if (nextFragText.IsEmpty()) {
// If nextFragText is empty, there's no new possible word break
// opportunity.
continue;
}
bool canBreak = true;
canBreak = wbk->BreakInBetween(fragText.get(), fragText.Length(),
nextFragText.get(), nextFragText.Length());
const auto origFragLen = static_cast<int32_t>(fragText.Length());
fragText.Append(nextFragText);
bool canBreak =
origFragLen ==
wbk->Next(fragText.get(), fragText.Length(), origFragLen - 1);
if (canBreak) {
result.Append('^');
}
fragText.Assign(nextFragText);
}
}
ASSERT_STREQ("This^ ^is^ ^a^ ^internationalization^ ^work^.",
NS_ConvertUTF16toUTF8(result).get());
}
// This function searches a complete word starting from |offset| in wb[fragN].
// If it reaches the end of wb[fragN], and there is no word break opportunity
// between wb[fragN] and wb[fragN+1], it will continue the search in wb[fragN+1]
// until a word break.
void TestFindWordBreakFromPosition(uint32_t fragN, uint32_t offset,
const char* expected) {
uint32_t numOfFragment = sizeof(wb) / sizeof(char*);
@ -232,16 +243,24 @@ void TestFindWordBreakFromPosition(uint32_t fragN, uint32_t offset,
mozilla::intl::WordRange res =
wbk->FindWord(fragText.get(), fragText.Length(), offset);
bool canBreak;
nsAutoString result(Substring(fragText, res.mBegin, res.mEnd - res.mBegin));
if ((uint32_t)fragText.Length() == res.mEnd) {
if ((uint32_t)fragText.Length() <= res.mEnd) {
// if we hit the end of the fragment
nsAutoString curFragText = fragText;
for (uint32_t p = fragN + 1; p < numOfFragment; p++) {
NS_ConvertASCIItoUTF16 nextFragText(wb[p]);
canBreak = wbk->BreakInBetween(curFragText.get(), curFragText.Length(),
nextFragText.get(), nextFragText.Length());
if (nextFragText.IsEmpty()) {
// If nextFragText is empty, there's no new possible word break
// opportunity between curFragText and nextFragText.
continue;
}
const auto origFragLen = static_cast<int32_t>(curFragText.Length());
curFragText.Append(nextFragText);
bool canBreak =
origFragLen ==
wbk->Next(curFragText.get(), curFragText.Length(), origFragLen - 1);
if (canBreak) {
break;
}
@ -253,29 +272,6 @@ void TestFindWordBreakFromPosition(uint32_t fragN, uint32_t offset,
if ((uint32_t)nextFragText.Length() != r.mEnd) {
break;
}
nextFragText.Assign(curFragText);
}
}
if (0 == res.mBegin) {
// if we hit the beginning of the fragment
nsAutoString curFragText = fragText;
for (uint32_t p = fragN; p > 0; p--) {
NS_ConvertASCIItoUTF16 prevFragText(wb[p - 1]);
canBreak = wbk->BreakInBetween(prevFragText.get(), prevFragText.Length(),
curFragText.get(), curFragText.Length());
if (canBreak) {
break;
}
mozilla::intl::WordRange r = wbk->FindWord(
prevFragText.get(), prevFragText.Length(), prevFragText.Length());
result.Insert(Substring(prevFragText, r.mBegin, r.mEnd - r.mBegin), 0);
if (0 != r.mBegin) {
break;
}
prevFragText.Assign(curFragText);
}
}

View File

@ -504,25 +504,27 @@ char32_t nsFind::DecodeChar(const char16_t* t2b, int32_t* index) const {
}
bool nsFind::BreakInBetween(char32_t x, char32_t y) const {
char16_t x16[2], y16[2];
int32_t x16len, y16len;
char16_t text[4];
int32_t textLen;
if (IS_IN_BMP(x)) {
x16[0] = (char16_t)x;
x16len = 1;
text[0] = (char16_t)x;
textLen = 1;
} else {
x16[0] = H_SURROGATE(x);
x16[1] = L_SURROGATE(x);
x16len = 2;
text[0] = H_SURROGATE(x);
text[1] = L_SURROGATE(x);
textLen = 2;
}
const int32_t x16Len = textLen;
if (IS_IN_BMP(y)) {
y16[0] = (char16_t)y;
y16len = 1;
text[textLen] = (char16_t)y;
textLen += 1;
} else {
y16[0] = H_SURROGATE(y);
y16[1] = L_SURROGATE(y);
y16len = 2;
text[textLen] = H_SURROGATE(y);
text[textLen + 1] = L_SURROGATE(y);
textLen += 2;
}
return mWordBreaker->BreakInBetween(x16, x16len, y16, y16len);
return mWordBreaker->Next(text, textLen, x16Len - 1) == x16Len;
}
char32_t nsFind::PeekNextChar(State& aState, bool aAlreadyMatching) const {