Bug 1911550 - Make text-transform:capitalize work even if no textrun is available, for <select> intrinsic sizing. a=RyanVM

Original Revision: https://phabricator.services.mozilla.com/D218533 Differential Revision: https://phabricator.services.mozilla.com/D225214
2024-11-26 22:32:46 +00:00 · 2024-10-10 15:34:51 +00:00 · 2024-10-10 15:34:51 +00:00 · b57f0f7bf8
commit b57f0f7bf8
parent c7118866af
3 changed files with 127 additions and 98 deletions
--- a/dom/base/nsLineBreaker.cpp
+++ b/dom/base/nsLineBreaker.cpp
@ -78,63 +78,71 @@ nsLineBreaker::~nsLineBreaker() {
               "Should have Reset() before destruction!");
 }

+/* static */
+bool nsLineBreaker::ShouldCapitalize(uint32_t aChar, bool& aCapitalizeNext) {
+  using mozilla::intl::GeneralCategory;
+  auto category = UnicodeProperties::CharType(aChar);
+  switch (category) {
+    case GeneralCategory::Uppercase_Letter:
+    case GeneralCategory::Lowercase_Letter:
+    case GeneralCategory::Titlecase_Letter:
+    case GeneralCategory::Modifier_Letter:
+    case GeneralCategory::Other_Letter:
+    case GeneralCategory::Decimal_Number:
+    case GeneralCategory::Letter_Number:
+    case GeneralCategory::Other_Number:
+      if (aCapitalizeNext) {
+        aCapitalizeNext = false;
+        return true;
+      }
+      break;
+    case GeneralCategory::Space_Separator:
+    case GeneralCategory::Line_Separator:
+    case GeneralCategory::Paragraph_Separator:
+    case GeneralCategory::Dash_Punctuation:
+    case GeneralCategory::Initial_Punctuation:
+      /* These punctuation categories are excluded, for examples like
+       *   "what colo[u]r" -> "What Colo[u]r?" (rather than "What Colo[U]R?")
+       * and
+       *   "snake_case" -> "Snake_case" (to match word selection behavior)
+      case GeneralCategory::Open_Punctuation:
+      case GeneralCategory::Close_Punctuation:
+      case GeneralCategory::Connector_Punctuation:
+       */
+      aCapitalizeNext = true;
+      break;
+    case GeneralCategory::Final_Punctuation:
+      /* Special-case: exclude Unicode single-close-quote/apostrophe,
+         for examples like "Lowe’s" etc. */
+      if (aChar != 0x2019) {
+        aCapitalizeNext = true;
+      }
+      break;
+    case GeneralCategory::Other_Punctuation:
+      /* Special-case: exclude ASCII apostrophe, for "Lowe's" etc.,
+         and MIDDLE DOT, for Catalan "l·l". */
+      if (aChar != '\'' && aChar != 0x00B7) {
+        aCapitalizeNext = true;
+      }
+      break;
+    default:
+      break;
+  }
+  return false;
+}
+
 static void SetupCapitalization(const char16_t* aWord, uint32_t aLength,
                                bool* aCapitalization) {
  // Capitalize the first alphanumeric character after a space or punctuation.
-  using mozilla::intl::GeneralCategory;
  bool capitalizeNextChar = true;
  for (uint32_t i = 0; i < aLength; ++i) {
    uint32_t ch = aWord[i];
    if (i + 1 < aLength && NS_IS_SURROGATE_PAIR(ch, aWord[i + 1])) {
      ch = SURROGATE_TO_UCS4(ch, aWord[i + 1]);
    }
-    auto category = UnicodeProperties::CharType(ch);
-    switch (category) {
-      case GeneralCategory::Uppercase_Letter:
-      case GeneralCategory::Lowercase_Letter:
-      case GeneralCategory::Titlecase_Letter:
-      case GeneralCategory::Modifier_Letter:
-      case GeneralCategory::Other_Letter:
-      case GeneralCategory::Decimal_Number:
-      case GeneralCategory::Letter_Number:
-      case GeneralCategory::Other_Number:
-        if (capitalizeNextChar) {
-          aCapitalization[i] = true;
-          capitalizeNextChar = false;
-        }
-        break;
-      case GeneralCategory::Space_Separator:
-      case GeneralCategory::Line_Separator:
-      case GeneralCategory::Paragraph_Separator:
-      case GeneralCategory::Dash_Punctuation:
-      case GeneralCategory::Initial_Punctuation:
-        /* These punctuation categories are excluded, for examples like
-         *   "what colo[u]r" -> "What Colo[u]r?" (rather than "What Colo[U]R?")
-         * and
-         *   "snake_case" -> "Snake_case" (to match word selection behavior)
-        case GeneralCategory::Open_Punctuation:
-        case GeneralCategory::Close_Punctuation:
-        case GeneralCategory::Connector_Punctuation:
-         */
-        capitalizeNextChar = true;
-        break;
-      case GeneralCategory::Final_Punctuation:
-        /* Special-case: exclude Unicode single-close-quote/apostrophe,
-           for examples like "Lowe’s" etc. */
-        if (ch != 0x2019) {
-          capitalizeNextChar = true;
-        }
-        break;
-      case GeneralCategory::Other_Punctuation:
-        /* Special-case: exclude ASCII apostrophe, for "Lowe's" etc.,
-           and MIDDLE DOT, for Catalan "l·l". */
-        if (ch != '\'' && ch != 0x00B7) {
-          capitalizeNextChar = true;
-        }
-        break;
-      default:
-        break;
-    }
+    aCapitalization[i] =
+        nsLineBreaker::ShouldCapitalize(ch, capitalizeNextChar);
+
    if (!IS_IN_BMP(ch)) {
      ++i;
    }
--- a/dom/base/nsLineBreaker.h
+++ b/dom/base/nsLineBreaker.h
@ -72,6 +72,13 @@ class nsLineBreaker {
    return mozilla::intl::NS_IsSpace(u);
  }

+  // Helper also used by nsCaseTransformTextRunFactory::TransformString.
+  // aChar is the current character to be examined;
+  // aCapitalizeNext is a state variable: initialize it to true at start-of-
+  // text, then pass it back to this function as each successive character is
+  // considered.
+  static bool ShouldCapitalize(uint32_t aChar, bool& aCapitalizeNext);
+
  // Break opportunities exist at the end of each run of breakable whitespace
  // (see IsSpace above). Break opportunities can also exist between pairs of
  // non-whitespace characters, as determined by mozilla::intl::LineBreaker.
--- a/layout/generic/nsTextRunTransformations.cpp
+++ b/layout/generic/nsTextRunTransformations.cpp
@ -18,6 +18,7 @@
 #include "mozilla/TextEditor.h"
 #include "mozilla/gfx/2D.h"
 #include "nsGkAtoms.h"
+#include "nsLineBreaker.h"
 #include "nsSpecialCasingData.h"
 #include "nsStyleConsts.h"
 #include "nsTextFrameUtils.h"
@ -335,6 +336,10 @@ bool nsCaseTransformTextRunFactory::TransformString(
                                      // tonos added (if it is disjunctive eta)
  const char16_t kGreekUpperEta = 0x0397;

+  // If we're doing capitalization and don't have a textrun, this is the state
+  // to be passed to each call to nsLineBreaker::ShouldCapitalize.
+  bool capitalizeNext = true;
+
  for (uint32_t i = 0; i < length; ++i, ++aOffsetInTextRun) {
    uint32_t ch = str[i];

@ -365,7 +370,7 @@ bool nsCaseTransformTextRunFactory::TransformString(

    bool maskPassword = (charStyle && charStyle->mMaskPassword) || aMaskChar;
    int extraChars = 0;
-    const mozilla::unicode::MultiCharMapping* mcm;
+    const unicode::MultiCharMapping* mcm;
    bool inhibitBreakBefore = false;  // have we just deleted preceding hyphen?

    if (i < length - 1 && NS_IS_SURROGATE_PAIR(ch, str[i + 1])) {
@ -448,7 +453,7 @@ bool nsCaseTransformTextRunFactory::TransformString(
            }
          }

-          cat = mozilla::unicode::GetGenCategory(ch);
+          cat = unicode::GetGenCategory(ch);

          if (languageSpecificCasing == eLSCB_Irish &&
              cat == nsUGenCategory::kLetter) {
@ -528,7 +533,7 @@ bool nsCaseTransformTextRunFactory::TransformString(
            sigmaIndex = uint32_t(-1);
          }

-          mcm = mozilla::unicode::SpecialLower(ch);
+          mcm = unicode::SpecialLower(ch);
          if (mcm) {
            int j = 0;
            while (j < 2 && mcm->mMappedChars[j + 1]) {
@ -664,7 +669,7 @@ bool nsCaseTransformTextRunFactory::TransformString(
            break;
          }

-          mcm = mozilla::unicode::SpecialUpper(ch);
+          mcm = unicode::SpecialUpper(ch);
          if (mcm) {
            int j = 0;
            while (j < 2 && mcm->mMappedChars[j + 1]) {
@ -686,61 +691,70 @@ bool nsCaseTransformTextRunFactory::TransformString(
          }
          break;

-        case StyleTextTransform::CAPITALIZE._0:
+        case StyleTextTransform::CAPITALIZE._0: {
+          if (capitalizeDutchIJ && ch == 'j') {
+            ch = 'J';
+            capitalizeDutchIJ = false;
+            break;
+          }
+          capitalizeDutchIJ = false;
+          // If we have a textrun, its mCapitalize array tells us which chars
+          // are to be capitalized. If not, we track the state locally, and
+          // assume there's no context to be considered.
+          bool doCapitalize = false;
          if (aTextRun) {
-            if (capitalizeDutchIJ && ch == 'j') {
-              ch = 'J';
-              capitalizeDutchIJ = false;
+            if (aOffsetInTextRun < aTextRun->mCapitalize.Length()) {
+              doCapitalize = aTextRun->mCapitalize[aOffsetInTextRun];
+            }
+          } else {
+            doCapitalize = nsLineBreaker::ShouldCapitalize(ch, capitalizeNext);
+          }
+          if (doCapitalize) {
+            if (languageSpecificCasing == eLSCB_Turkish && ch == 'i') {
+              ch = LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE;
              break;
            }
-            capitalizeDutchIJ = false;
-            if (aOffsetInTextRun < aTextRun->mCapitalize.Length() &&
-                aTextRun->mCapitalize[aOffsetInTextRun]) {
-              if (languageSpecificCasing == eLSCB_Turkish && ch == 'i') {
-                ch = LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE;
+            if (languageSpecificCasing == eLSCB_Dutch && ch == 'i') {
+              ch = 'I';
+              capitalizeDutchIJ = true;
+              break;
+            }
+            if (languageSpecificCasing == eLSCB_Lithuanian) {
+              /*
+               * # Remove DOT ABOVE after "i" with upper or titlecase
+               *
+               * 0307; 0307; ; ; lt After_Soft_Dotted; # COMBINING DOT ABOVE
+               */
+              if (ch == 'i' || ch == 'j' || ch == 0x012F) {
+                seenSoftDotted = true;
+                ch = ToTitleCase(ch);
                break;
              }
-              if (languageSpecificCasing == eLSCB_Dutch && ch == 'i') {
-                ch = 'I';
-                capitalizeDutchIJ = true;
-                break;
-              }
-              if (languageSpecificCasing == eLSCB_Lithuanian) {
-                /*
-                 * # Remove DOT ABOVE after "i" with upper or titlecase
-                 *
-                 * 0307; 0307; ; ; lt After_Soft_Dotted; # COMBINING DOT ABOVE
-                 */
-                if (ch == 'i' || ch == 'j' || ch == 0x012F) {
-                  seenSoftDotted = true;
-                  ch = ToTitleCase(ch);
+              if (seenSoftDotted) {
+                seenSoftDotted = false;
+                if (ch == 0x0307) {
+                  ch = uint32_t(-1);
                  break;
                }
-                if (seenSoftDotted) {
-                  seenSoftDotted = false;
-                  if (ch == 0x0307) {
-                    ch = uint32_t(-1);
-                    break;
-                  }
-                }
              }
-
-              mcm = mozilla::unicode::SpecialTitle(ch);
-              if (mcm) {
-                int j = 0;
-                while (j < 2 && mcm->mMappedChars[j + 1]) {
-                  aConvertedString.Append(mcm->mMappedChars[j]);
-                  ++extraChars;
-                  ++j;
-                }
-                ch = mcm->mMappedChars[j];
-                break;
-              }
-
-              ch = ToTitleCase(ch);
            }
+
+            mcm = unicode::SpecialTitle(ch);
+            if (mcm) {
+              int j = 0;
+              while (j < 2 && mcm->mMappedChars[j + 1]) {
+                aConvertedString.Append(mcm->mMappedChars[j]);
+                ++extraChars;
+                ++j;
+              }
+              ch = mcm->mMappedChars[j];
+              break;
+            }
+
+            ch = ToTitleCase(ch);
          }
          break;
+        }

        case StyleTextTransform::MATH_AUTO._0:
          // text-transform: math-auto is used for automatic italicization of
@ -773,7 +787,7 @@ bool nsCaseTransformTextRunFactory::TransformString(

      if (!aCaseTransformsOnly) {
        if (!forceNonFullWidth && (style & StyleTextTransform::FULL_WIDTH)) {
-          ch = mozilla::unicode::GetFullWidth(ch);
+          ch = unicode::GetFullWidth(ch);
        }

        if (style & StyleTextTransform::FULL_SIZE_KANA) {
@ -825,7 +839,7 @@ bool nsCaseTransformTextRunFactory::TransformString(
      }

      if (forceNonFullWidth) {
-        ch = mozilla::unicode::GetFullWidthInverse(ch);
+        ch = unicode::GetFullWidthInverse(ch);
      }
    }