Use GetGeneralCategory in IsPunctuationMark instead of the precompiled ccmap. Bug 731222, r=jfkthame

2024-10-15 06:15:43 +00:00 · 2012-03-01 00:17:27 -08:00 · 2012-03-01 00:17:27 -08:00 · c9433228bb
commit c9433228bb
parent c6228ce7b5
6 changed files with 23 additions and 1370 deletions
--- a/content/base/public/nsContentUtils.h
+++ b/content/base/public/nsContentUtils.h
@ -422,8 +422,8 @@ public:
  /**
   * Returns true if aChar is of class Ps, Pi, Po, Pf, or Pe.
   */
-  static bool IsPunctuationMark(PRUint32 aChar);
-  static bool IsPunctuationMarkAt(const nsTextFragment* aFrag, PRUint32 aOffset);
+  static bool IsFirstLetterPunctuation(PRUint32 aChar);
+  static bool IsFirstLetterPunctuationAt(const nsTextFragment* aFrag, PRUint32 aOffset);
 
  /**
   * Returns true if aChar is of class Lu, Ll, Lt, Lm, Lo, Nd, Nl or No
--- a/content/base/src/Makefile.in
+++ b/content/base/src/Makefile.in
@ -210,6 +210,7 @@ INCLUDES	+= \
 		$(NULL)

 DEFINES += -D_IMPL_NS_LAYOUT
+DEFINES += -DHB_DONT_DEFINE_STDINT

 # gcc requires -msse2 for this file since it uses SSE2 intrinsics.  (See bug
 # 585538 comment 12.)
--- a/content/base/src/nsContentUtils.cpp
+++ b/content/base/src/nsContentUtils.cpp
@ -123,6 +123,7 @@ static NS_DEFINE_CID(kXTFServiceCID, NS_XTFSERVICE_CID);
 #include "nsILineBreaker.h"
 #include "nsIWordBreaker.h"
 #include "nsUnicodeProperties.h"
+#include "harfbuzz/hb-common.h"
 #include "jsdbgapi.h"
 #include "nsIJSRuntimeService.h"
 #include "nsIDOMDocumentXBL.h"
@ -1117,34 +1118,38 @@ nsContentUtils::CopyNewlineNormalizedUnicodeTo(nsReadingIterator<PRUnichar>& aSr
  return normalizer.GetCharsWritten();
 }

-// Replaced by precompiled CCMap (see bug 180266). To update the list
-// of characters, see one of files included below. As for the way
-// the original list of characters was obtained by Frank Tang, see bug 54467.
-// Updated to fix the regression (bug 263411). The list contains
-// characters of the following Unicode character classes : Ps, Pi, Po, Pf, Pe.
-// (ref.: http://www.w3.org/TR/2004/CR-CSS21-20040225/selector.html#first-letter)
-#include "punct_marks.x-ccmap"
-DEFINE_X_CCMAP(gPuncCharsCCMapExt, const);
+/**
+ * This is used to determine whether a character is in one of the punctuation
+ * mark classes which CSS says should be part of the first-letter.
+ * See http://www.w3.org/TR/CSS2/selector.html#first-letter and
+ *     http://www.w3.org/TR/selectors/#first-letter
+ */

 // static
 bool
-nsContentUtils::IsPunctuationMark(PRUint32 aChar)
+nsContentUtils::IsFirstLetterPunctuation(PRUint32 aChar)
 {
-  return CCMAP_HAS_CHAR_EXT(gPuncCharsCCMapExt, aChar);
+  PRUint8 cat = mozilla::unicode::GetGeneralCategory(aChar);
+
+  return (cat == HB_UNICODE_GENERAL_CATEGORY_OPEN_PUNCTUATION ||     // Ps
+          cat == HB_UNICODE_GENERAL_CATEGORY_CLOSE_PUNCTUATION ||    // Pe
+          cat == HB_UNICODE_GENERAL_CATEGORY_INITIAL_PUNCTUATION ||  // Pi
+          cat == HB_UNICODE_GENERAL_CATEGORY_FINAL_PUNCTUATION ||    // Pf
+          cat == HB_UNICODE_GENERAL_CATEGORY_OTHER_PUNCTUATION);     // Po
 }

 // static
 bool
-nsContentUtils::IsPunctuationMarkAt(const nsTextFragment* aFrag, PRUint32 aOffset)
+nsContentUtils::IsFirstLetterPunctuationAt(const nsTextFragment* aFrag, PRUint32 aOffset)
 {
  PRUnichar h = aFrag->CharAt(aOffset);
  if (!IS_SURROGATE(h)) {
-    return IsPunctuationMark(h);
+    return IsFirstLetterPunctuation(h);
  }
  if (NS_IS_HIGH_SURROGATE(h) && aOffset + 1 < aFrag->GetLength()) {
    PRUnichar l = aFrag->CharAt(aOffset + 1);
    if (NS_IS_LOW_SURROGATE(l)) {
-      return IsPunctuationMark(SURROGATE_TO_UCS4(h, l));
+      return IsFirstLetterPunctuation(SURROGATE_TO_UCS4(h, l));
    }
  }
  return false;
--- a/content/base/src/nsLineBreaker.cpp
+++ b/content/base/src/nsLineBreaker.cpp
@ -65,7 +65,7 @@ SetupCapitalization(const PRUnichar* aWord, PRUint32 aLength,
  // The only space character a word can contain is NBSP.
  bool capitalizeNextChar = true;
  for (PRUint32 i = 0; i < aLength; ++i) {
-    if (capitalizeNextChar && !nsContentUtils::IsPunctuationMark(aWord[i])) {
+    if (capitalizeNextChar && !nsContentUtils::IsFirstLetterPunctuation(aWord[i])) {
      aCapitalization[i] = true;
      capitalizeNextChar = false;
    }
--- a/layout/generic/nsTextFrameThebes.cpp
+++ b/layout/generic/nsTextFrameThebes.cpp
@ -6444,7 +6444,7 @@ FindEndOfPunctuationRun(const nsTextFragment* aFrag,
  PRInt32 i;

  for (i = aStart; i < aEnd - aOffset; ++i) {
-    if (nsContentUtils::IsPunctuationMarkAt(aFrag, aOffset + i)) {
+    if (nsContentUtils::IsFirstLetterPunctuationAt(aFrag, aOffset + i)) {
      aIter->SetOriginalOffset(aOffset + i);
      FindClusterEnd(aTextRun, aEnd, aIter);
      i = aIter->GetOriginalOffset() - aOffset;
--- a/layout/generic/punct_marks.x-ccmap
+++ b/layout/generic/punct_marks.x-ccmap