Use GetGeneralCategory in IsPunctuationMark instead of the precompiled ccmap. Bug 731222, r=jfkthame

This commit is contained in:
Simon Montagu 2012-03-01 00:17:27 -08:00
parent c6228ce7b5
commit c9433228bb
6 changed files with 23 additions and 1370 deletions

View File

@ -422,8 +422,8 @@ public:
/**
* Returns true if aChar is of class Ps, Pi, Po, Pf, or Pe.
*/
static bool IsPunctuationMark(PRUint32 aChar);
static bool IsPunctuationMarkAt(const nsTextFragment* aFrag, PRUint32 aOffset);
static bool IsFirstLetterPunctuation(PRUint32 aChar);
static bool IsFirstLetterPunctuationAt(const nsTextFragment* aFrag, PRUint32 aOffset);
/**
* Returns true if aChar is of class Lu, Ll, Lt, Lm, Lo, Nd, Nl or No

View File

@ -210,6 +210,7 @@ INCLUDES += \
$(NULL)
DEFINES += -D_IMPL_NS_LAYOUT
DEFINES += -DHB_DONT_DEFINE_STDINT
# gcc requires -msse2 for this file since it uses SSE2 intrinsics. (See bug
# 585538 comment 12.)

View File

@ -123,6 +123,7 @@ static NS_DEFINE_CID(kXTFServiceCID, NS_XTFSERVICE_CID);
#include "nsILineBreaker.h"
#include "nsIWordBreaker.h"
#include "nsUnicodeProperties.h"
#include "harfbuzz/hb-common.h"
#include "jsdbgapi.h"
#include "nsIJSRuntimeService.h"
#include "nsIDOMDocumentXBL.h"
@ -1117,34 +1118,38 @@ nsContentUtils::CopyNewlineNormalizedUnicodeTo(nsReadingIterator<PRUnichar>& aSr
return normalizer.GetCharsWritten();
}
// Replaced by precompiled CCMap (see bug 180266). To update the list
// of characters, see one of files included below. As for the way
// the original list of characters was obtained by Frank Tang, see bug 54467.
// Updated to fix the regression (bug 263411). The list contains
// characters of the following Unicode character classes : Ps, Pi, Po, Pf, Pe.
// (ref.: http://www.w3.org/TR/2004/CR-CSS21-20040225/selector.html#first-letter)
#include "punct_marks.x-ccmap"
DEFINE_X_CCMAP(gPuncCharsCCMapExt, const);
/**
* This is used to determine whether a character is in one of the punctuation
* mark classes which CSS says should be part of the first-letter.
* See http://www.w3.org/TR/CSS2/selector.html#first-letter and
* http://www.w3.org/TR/selectors/#first-letter
*/
// static
bool
nsContentUtils::IsPunctuationMark(PRUint32 aChar)
nsContentUtils::IsFirstLetterPunctuation(PRUint32 aChar)
{
return CCMAP_HAS_CHAR_EXT(gPuncCharsCCMapExt, aChar);
PRUint8 cat = mozilla::unicode::GetGeneralCategory(aChar);
return (cat == HB_UNICODE_GENERAL_CATEGORY_OPEN_PUNCTUATION || // Ps
cat == HB_UNICODE_GENERAL_CATEGORY_CLOSE_PUNCTUATION || // Pe
cat == HB_UNICODE_GENERAL_CATEGORY_INITIAL_PUNCTUATION || // Pi
cat == HB_UNICODE_GENERAL_CATEGORY_FINAL_PUNCTUATION || // Pf
cat == HB_UNICODE_GENERAL_CATEGORY_OTHER_PUNCTUATION); // Po
}
// static
bool
nsContentUtils::IsPunctuationMarkAt(const nsTextFragment* aFrag, PRUint32 aOffset)
nsContentUtils::IsFirstLetterPunctuationAt(const nsTextFragment* aFrag, PRUint32 aOffset)
{
PRUnichar h = aFrag->CharAt(aOffset);
if (!IS_SURROGATE(h)) {
return IsPunctuationMark(h);
return IsFirstLetterPunctuation(h);
}
if (NS_IS_HIGH_SURROGATE(h) && aOffset + 1 < aFrag->GetLength()) {
PRUnichar l = aFrag->CharAt(aOffset + 1);
if (NS_IS_LOW_SURROGATE(l)) {
return IsPunctuationMark(SURROGATE_TO_UCS4(h, l));
return IsFirstLetterPunctuation(SURROGATE_TO_UCS4(h, l));
}
}
return false;

View File

@ -65,7 +65,7 @@ SetupCapitalization(const PRUnichar* aWord, PRUint32 aLength,
// The only space character a word can contain is NBSP.
bool capitalizeNextChar = true;
for (PRUint32 i = 0; i < aLength; ++i) {
if (capitalizeNextChar && !nsContentUtils::IsPunctuationMark(aWord[i])) {
if (capitalizeNextChar && !nsContentUtils::IsFirstLetterPunctuation(aWord[i])) {
aCapitalization[i] = true;
capitalizeNextChar = false;
}

View File

@ -6444,7 +6444,7 @@ FindEndOfPunctuationRun(const nsTextFragment* aFrag,
PRInt32 i;
for (i = aStart; i < aEnd - aOffset; ++i) {
if (nsContentUtils::IsPunctuationMarkAt(aFrag, aOffset + i)) {
if (nsContentUtils::IsFirstLetterPunctuationAt(aFrag, aOffset + i)) {
aIter->SetOriginalOffset(aOffset + i);
FindClusterEnd(aTextRun, aEnd, aIter);
i = aIter->GetOriginalOffset() - aOffset;

File diff suppressed because it is too large Load Diff