Bug 1730084 Part 5 - Remove WordBreaker::BreakInBetween(). r=jfkthame

The motivation of this patch is to remove rarely used API in WordBreaker. WordBreaker::BreakInBetween() is used only in nsFind::BreakInBetween() in production, and it can be replaced by Next(). If the user wants to know whether there is a word break between two strings such as the use cases in gtest, joining the two strings and passing the result to Next() is the preferred way. Note: I delete the buggy forward word search algorithm in TestFindWordBreakFromPosition() because from the test expectations, it doesn't expect to continue the search in previous fragments. Also, the buggy part comes from the following code, which had undefined behavior before Part 4, and does nothing after Part 4. ``` wbk->FindWord(prevFragText.get(), prevFragText.Length(), prevFragText.Length()); ``` Differential Revision: https://phabricator.services.mozilla.com/D125151
2024-10-07 18:04:46 +00:00 · 2021-09-13 23:55:33 +00:00 · 2021-09-13 23:55:33 +00:00 · f8152b2b24
commit f8152b2b24
parent bd25bca479
4 changed files with 42 additions and 70 deletions
--- a/intl/lwbrk/WordBreaker.cpp
+++ b/intl/lwbrk/WordBreaker.cpp
@ -20,29 +20,6 @@ already_AddRefed<WordBreaker> WordBreaker::Create() {
  return RefPtr<WordBreaker>(new WordBreaker()).forget();
 }

-bool WordBreaker::BreakInBetween(const char16_t* aText1, uint32_t aTextLen1,
-                                 const char16_t* aText2, uint32_t aTextLen2) {
-  MOZ_ASSERT(nullptr != aText1, "null ptr");
-  MOZ_ASSERT(nullptr != aText2, "null ptr");
-
-  if (!aText1 || !aText2 || (0 == aTextLen1) || (0 == aTextLen2)) return false;
-
-  uint8_t c1 = GetClass(aText1[aTextLen1 - 1]);
-  uint8_t c2 = GetClass(aText2[0]);
-
-  if (c1 == c2 && kWbClassScriptioContinua == c1) {
-    nsAutoString text(aText1, aTextLen1);
-    text.Append(aText2, aTextLen2);
-    AutoTArray<uint8_t, 256> breakBefore;
-    breakBefore.SetLength(aTextLen1 + aTextLen2);
-    NS_GetComplexLineBreaks(text.get(), text.Length(), breakBefore.Elements());
-    bool ret = breakBefore[aTextLen1];
-    return ret;
-  }
-
-  return (c1 != c2);
-}
-
 #define IS_ASCII(c) (0 == (0xFF80 & (c)))
 #define ASCII_IS_ALPHA(c) \
  ((('a' <= (c)) && ((c) <= 'z')) || (('A' <= (c)) && ((c) <= 'Z')))
--- a/intl/lwbrk/WordBreaker.h
+++ b/intl/lwbrk/WordBreaker.h
@ -24,9 +24,6 @@ class WordBreaker {

  static already_AddRefed<WordBreaker> Create();

-  bool BreakInBetween(const char16_t* aText1, uint32_t aTextLen1,
-                      const char16_t* aText2, uint32_t aTextLen2);
-
  // Find the word boundary by scanning forward and backward from aPos.
  //
  // @return WordRange where mBegin equals to the offset to first character in
--- a/intl/lwbrk/gtest/TestBreak.cpp
+++ b/intl/lwbrk/gtest/TestBreak.cpp
@ -208,20 +208,31 @@ TEST(WordBreak, TestPrintWordWithBreak)

    if (i != numOfFragment - 1) {
      NS_ConvertASCIItoUTF16 nextFragText(wb[i + 1]);
+      if (nextFragText.IsEmpty()) {
+        // If nextFragText is empty, there's no new possible word break
+        // opportunity.
+        continue;
+      }

-      bool canBreak = true;
-      canBreak = wbk->BreakInBetween(fragText.get(), fragText.Length(),
-                                     nextFragText.get(), nextFragText.Length());
+      const auto origFragLen = static_cast<int32_t>(fragText.Length());
+      fragText.Append(nextFragText);
+
+      bool canBreak =
+          origFragLen ==
+          wbk->Next(fragText.get(), fragText.Length(), origFragLen - 1);
      if (canBreak) {
        result.Append('^');
      }
-      fragText.Assign(nextFragText);
    }
  }
  ASSERT_STREQ("This^   ^is^ ^a^ ^internationalization^ ^work^.",
               NS_ConvertUTF16toUTF8(result).get());
 }

+// This function searches a complete word starting from |offset| in wb[fragN].
+// If it reaches the end of wb[fragN], and there is no word break opportunity
+// between wb[fragN] and wb[fragN+1], it will continue the search in wb[fragN+1]
+// until a word break.
 void TestFindWordBreakFromPosition(uint32_t fragN, uint32_t offset,
                                   const char* expected) {
  uint32_t numOfFragment = sizeof(wb) / sizeof(char*);
@ -232,16 +243,24 @@ void TestFindWordBreakFromPosition(uint32_t fragN, uint32_t offset,
  mozilla::intl::WordRange res =
      wbk->FindWord(fragText.get(), fragText.Length(), offset);

-  bool canBreak;
  nsAutoString result(Substring(fragText, res.mBegin, res.mEnd - res.mBegin));

-  if ((uint32_t)fragText.Length() == res.mEnd) {
+  if ((uint32_t)fragText.Length() <= res.mEnd) {
    // if we hit the end of the fragment
    nsAutoString curFragText = fragText;
    for (uint32_t p = fragN + 1; p < numOfFragment; p++) {
      NS_ConvertASCIItoUTF16 nextFragText(wb[p]);
-      canBreak = wbk->BreakInBetween(curFragText.get(), curFragText.Length(),
-                                     nextFragText.get(), nextFragText.Length());
+      if (nextFragText.IsEmpty()) {
+        // If nextFragText is empty, there's no new possible word break
+        // opportunity between curFragText and nextFragText.
+        continue;
+      }
+
+      const auto origFragLen = static_cast<int32_t>(curFragText.Length());
+      curFragText.Append(nextFragText);
+      bool canBreak =
+          origFragLen ==
+          wbk->Next(curFragText.get(), curFragText.Length(), origFragLen - 1);
      if (canBreak) {
        break;
      }
@ -253,29 +272,6 @@ void TestFindWordBreakFromPosition(uint32_t fragN, uint32_t offset,
      if ((uint32_t)nextFragText.Length() != r.mEnd) {
        break;
      }
-      nextFragText.Assign(curFragText);
-    }
-  }
-
-  if (0 == res.mBegin) {
-    // if we hit the beginning of the fragment
-    nsAutoString curFragText = fragText;
-    for (uint32_t p = fragN; p > 0; p--) {
-      NS_ConvertASCIItoUTF16 prevFragText(wb[p - 1]);
-      canBreak = wbk->BreakInBetween(prevFragText.get(), prevFragText.Length(),
-                                     curFragText.get(), curFragText.Length());
-      if (canBreak) {
-        break;
-      }
-      mozilla::intl::WordRange r = wbk->FindWord(
-          prevFragText.get(), prevFragText.Length(), prevFragText.Length());
-
-      result.Insert(Substring(prevFragText, r.mBegin, r.mEnd - r.mBegin), 0);
-
-      if (0 != r.mBegin) {
-        break;
-      }
-      prevFragText.Assign(curFragText);
    }
  }

--- a/toolkit/components/find/nsFind.cpp
+++ b/toolkit/components/find/nsFind.cpp
@ -504,25 +504,27 @@ char32_t nsFind::DecodeChar(const char16_t* t2b, int32_t* index) const {
 }

 bool nsFind::BreakInBetween(char32_t x, char32_t y) const {
-  char16_t x16[2], y16[2];
-  int32_t x16len, y16len;
+  char16_t text[4];
+  int32_t textLen;
  if (IS_IN_BMP(x)) {
-    x16[0] = (char16_t)x;
-    x16len = 1;
+    text[0] = (char16_t)x;
+    textLen = 1;
  } else {
-    x16[0] = H_SURROGATE(x);
-    x16[1] = L_SURROGATE(x);
-    x16len = 2;
+    text[0] = H_SURROGATE(x);
+    text[1] = L_SURROGATE(x);
+    textLen = 2;
  }
+
+  const int32_t x16Len = textLen;
  if (IS_IN_BMP(y)) {
-    y16[0] = (char16_t)y;
-    y16len = 1;
+    text[textLen] = (char16_t)y;
+    textLen += 1;
  } else {
-    y16[0] = H_SURROGATE(y);
-    y16[1] = L_SURROGATE(y);
-    y16len = 2;
+    text[textLen] = H_SURROGATE(y);
+    text[textLen + 1] = L_SURROGATE(y);
+    textLen += 2;
  }
-  return mWordBreaker->BreakInBetween(x16, x16len, y16, y16len);
+  return mWordBreaker->Next(text, textLen, x16Len - 1) == x16Len;
 }

 char32_t nsFind::PeekNextChar(State& aState, bool aAlreadyMatching) const {