diff --git a/gfx/thebes/gfxFont.cpp b/gfx/thebes/gfxFont.cpp index 7870aa67cac2..93a3f21ff68c 100644 --- a/gfx/thebes/gfxFont.cpp +++ b/gfx/thebes/gfxFont.cpp @@ -1876,16 +1876,6 @@ gfxFont::Measure(gfxTextRun *aTextRun, // Limiting backtrack here avoids pathological // behavior on long runs with no whitespace. -static bool -IsClusterExtender(PRUint32 aUSV) -{ - PRUint8 category = GetGeneralCategory(aUSV); - return ((category >= HB_UNICODE_GENERAL_CATEGORY_SPACING_MARK && - category <= HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK) || - (aUSV >= 0x200c && aUSV <= 0x200d) || // ZWJ, ZWNJ - (aUSV >= 0xff9e && aUSV <= 0xff9f)); // katakana sound marks -} - static bool IsBoundarySpace(PRUnichar aChar, PRUnichar aNextChar) { @@ -3797,95 +3787,25 @@ gfxShapedWord::SetupClusterBoundaries(CompressedGlyph *aGlyphs, gfxTextRun::CompressedGlyph extendCluster; extendCluster.SetComplex(false, true, 0); - HSType hangulState = HST_NONE; + ClusterIterator iter(aString, aLength); - for (PRUint32 i = 0; i < aLength; ++i) { - bool surrogatePair = false; - PRUint32 ch = aString[i]; - if (NS_IS_HIGH_SURROGATE(ch) && - i < aLength - 1 && NS_IS_LOW_SURROGATE(aString[i+1])) - { - ch = SURROGATE_TO_UCS4(ch, aString[i+1]); - surrogatePair = true; + // the ClusterIterator won't be able to tell us if the string + // _begins_ with a cluster-extender, so we handle that here + if (aLength && IsClusterExtender(*aString)) { + *aGlyphs = extendCluster; + } + + while (!iter.AtEnd()) { + // advance iter to the next cluster-start (or end of text) + iter.Next(); + // step past the first char of the cluster + aString++; + aGlyphs++; + // mark all the rest as cluster-continuations + while (aString < iter) { + *aGlyphs++ = extendCluster; + aString++; } - - PRUint8 category = GetGeneralCategory(ch); - HSType hangulType = HST_NONE; - - // combining marks extend the cluster - if (IsClusterExtender(ch)) { - aGlyphs[i] = extendCluster; - } else if (category == HB_UNICODE_GENERAL_CATEGORY_OTHER_LETTER) { - // handle special cases in Letter_Other category -#if 0 - // Currently disabled. This would follow the UAX#29 specification - // for extended grapheme clusters, but this is not favored by - // Thai users, at least for editing behavior. - // See discussion of equivalent Pango issue in bug 474068 and - // upstream at https://bugzilla.gnome.org/show_bug.cgi?id=576156. - - if ((ch & ~0xff) == 0x0e00) { - // specific Thai & Lao (U+0Exx) chars that extend the cluster - if ( ch == 0x0e30 || - (ch >= 0x0e32 && ch <= 0x0e33) || - ch == 0x0e45 || - ch == 0x0eb0 || - (ch >= 0x0eb2 && ch <= 0x0eb3)) - { - if (i > 0) { - aTextRun->SetGlyphs(i, extendCluster, nsnull); - } - } - else if ((ch >= 0x0e40 && ch <= 0x0e44) || - (ch >= 0x0ec0 && ch <= 0x0ec4)) - { - // characters that are prepended to the following cluster - if (i < length - 1) { - aTextRun->SetGlyphs(i+1, extendCluster, nsnull); - } - } - } else -#endif - if ((ch & ~0xff) == 0x1100 || - (ch >= 0xa960 && ch <= 0xa97f) || - (ch >= 0xac00 && ch <= 0xd7ff)) - { - // no break within Hangul syllables - hangulType = GetHangulSyllableType(ch); - switch (hangulType) { - case HST_L: - case HST_LV: - case HST_LVT: - if (hangulState == HST_L) { - aGlyphs[i] = extendCluster; - } - break; - case HST_V: - if ( (hangulState != HST_NONE) && - !(hangulState & HST_T)) - { - aGlyphs[i] = extendCluster; - } - break; - case HST_T: - if (hangulState & (HST_V | - HST_T)) - { - aGlyphs[i] = extendCluster; - } - break; - default: - break; - } - } - } - - if (surrogatePair) { - ++i; - aGlyphs[i] = extendCluster; - } - - hangulState = hangulType; } } diff --git a/intl/unicharutil/public/nsUnicodeProperties.h b/intl/unicharutil/public/nsUnicodeProperties.h index bf1cf89c1c3d..71d2befca1de 100644 --- a/intl/unicharutil/public/nsUnicodeProperties.h +++ b/intl/unicharutil/public/nsUnicodeProperties.h @@ -65,6 +65,12 @@ PRInt32 GetScriptCode(PRUint32 aCh); PRUint32 GetScriptTagForCode(PRInt32 aScriptCode); +bool IsClusterExtender(PRUint32 aCh, PRUint8 aCategory); + +inline bool IsClusterExtender(PRUint32 aCh) { + return IsClusterExtender(aCh, GetGeneralCategory(aCh)); +} + enum HSType { HST_NONE = 0x00, HST_L = 0x01, @@ -88,6 +94,31 @@ enum ShapingType { PRInt32 ScriptShapingType(PRInt32 aScriptCode); +// A simple iterator for a string of PRUnichar codepoints that advances +// by Unicode grapheme clusters +class ClusterIterator +{ +public: + ClusterIterator(const PRUnichar* aText, PRUint32 aLength) + : mText(aText), mLimit(aText + aLength), mPos(aText) + { } + + operator const PRUnichar* () const { + return mPos; + } + + bool AtEnd() const { + return mPos >= mLimit; + } + + void Next(); + +private: + const PRUnichar* mText; + const PRUnichar* mLimit; + const PRUnichar* mPos; +}; + } // end namespace unicode } // end namespace mozilla diff --git a/intl/unicharutil/src/nsUnicodeProperties.cpp b/intl/unicharutil/src/nsUnicodeProperties.cpp index 352981eba616..6364e082c1ad 100644 --- a/intl/unicharutil/src/nsUnicodeProperties.cpp +++ b/intl/unicharutil/src/nsUnicodeProperties.cpp @@ -41,6 +41,7 @@ #include "mozilla/Util.h" #include "nsMemory.h" +#include "nsCharTraits.h" #include "harfbuzz/hb-unicode.h" @@ -214,6 +215,15 @@ GetHangulSyllableType(PRUint32 aCh) return HST_NONE; } +bool +IsClusterExtender(PRUint32 aCh, PRUint8 aCategory) +{ + return ((aCategory >= HB_UNICODE_GENERAL_CATEGORY_SPACING_MARK && + aCategory <= HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK) || + (aCh >= 0x200c && aCh <= 0x200d) || // ZWJ, ZWNJ + (aCh >= 0xff9e && aCh <= 0xff9f)); // katakana sound marks +} + // TODO: replace this with a properties file or similar; // expect this to evolve as harfbuzz shaping support matures. // @@ -272,6 +282,74 @@ ScriptShapingType(PRInt32 aScriptCode) } } +void +ClusterIterator::Next() +{ + if (AtEnd()) { + NS_WARNING("ClusterIterator has already reached the end"); + return; + } + + PRUint32 ch = *mPos++; + + // Handle conjoining Jamo that make Hangul syllables + if ((ch & ~0xff) == 0x1100 || + (ch >= 0xa960 && ch <= 0xa97f) || + (ch >= 0xac00 && ch <= 0xd7ff)) { + HSType hangulState = GetHangulSyllableType(ch); + while (mPos < mLimit) { + ch = *mPos; + HSType hangulType = GetHangulSyllableType(ch); + switch (hangulType) { + case HST_L: + case HST_LV: + case HST_LVT: + if (hangulState == HST_L) { + hangulState = hangulType; + mPos++; + continue; + } + break; + case HST_V: + if ((hangulState != HST_NONE) && !(hangulState & HST_T)) { + hangulState = hangulType; + mPos++; + continue; + } + break; + case HST_T: + if (hangulState & (HST_V | HST_T)) { + hangulState = hangulType; + mPos++; + continue; + } + break; + default: + break; + } + break; + } + } + + while (mPos < mLimit) { + ch = *mPos; + + // Check for surrogate pairs; note that isolated surrogates will just + // be treated as generic (non-cluster-extending) characters here, + // which is fine for cluster-iterating purposes + if (NS_IS_LOW_SURROGATE(ch) && + NS_IS_HIGH_SURROGATE(*(mPos - 1))) { + ch = SURROGATE_TO_UCS4(*(mPos - 1), *mPos); + mPos++; + } + + if (!IsClusterExtender(ch)) { + break; + } + mPos++; + } +} + } // end namespace unicode } // end namespace mozilla diff --git a/layout/base/Makefile.in b/layout/base/Makefile.in index 33431a27f480..d2a618b07510 100644 --- a/layout/base/Makefile.in +++ b/layout/base/Makefile.in @@ -170,6 +170,7 @@ LOCAL_INCLUDES += \ CXXFLAGS += $(MOZ_CAIRO_CFLAGS) DEFINES += -D_IMPL_NS_LAYOUT +DEFINES += -DHB_DONT_DEFINE_STDINT ifndef MOZ_XUL nsIBoxObject.idl: %: $(topsrcdir)/layout/xul/base/public/% diff --git a/layout/base/nsBidiPresUtils.cpp b/layout/base/nsBidiPresUtils.cpp index 783f5bdf1ae3..b5eaaac1dc3d 100644 --- a/layout/base/nsBidiPresUtils.cpp +++ b/layout/base/nsBidiPresUtils.cpp @@ -2005,44 +2005,28 @@ void nsBidiPresUtils::WriteReverse(const PRUnichar* aSrc, PRUint32 aSrcLength, PRUnichar* aDest) { - const PRUnichar* src = aSrc + aSrcLength; - PRUnichar* dest = aDest; - PRUint32 UTF32Char; + PRUnichar* dest = aDest + aSrcLength; + mozilla::unicode::ClusterIterator iter(aSrc, aSrcLength); - while (--src >= aSrc) { - if (NS_IS_LOW_SURROGATE(*src)) { - if (src > aSrc && NS_IS_HIGH_SURROGATE(*(src - 1))) { - UTF32Char = SURROGATE_TO_UCS4(*(src - 1), *src); - --src; - } else { - UTF32Char = UCS2_REPLACEMENT_CHAR; - } - } else if (NS_IS_HIGH_SURROGATE(*src)) { - // paired high surrogates are handled above, so this is a lone high surrogate - UTF32Char = UCS2_REPLACEMENT_CHAR; - } else { - UTF32Char = *src; - } - - UTF32Char = mozilla::unicode::GetMirroredChar(UTF32Char); - - if (IS_IN_BMP(UTF32Char)) { - *(dest++) = UTF32Char; - } else { - *(dest++) = H_SURROGATE(UTF32Char); - *(dest++) = L_SURROGATE(UTF32Char); + while (!iter.AtEnd()) { + iter.Next(); + for (const PRUnichar *cp = iter; cp > aSrc; ) { + // Here we rely on the fact that there are no non-BMP mirrored pairs + // currently in Unicode, so we don't need to look for surrogates + *--dest = mozilla::unicode::GetMirroredChar(*--cp); } + aSrc = iter; } - NS_ASSERTION(dest - aDest == aSrcLength, "Whole string not copied"); + NS_ASSERTION(dest == aDest, "Whole string not copied"); } /* static */ bool nsBidiPresUtils::WriteLogicalToVisual(const PRUnichar* aSrc, - PRUint32 aSrcLength, - PRUnichar* aDest, - nsBidiLevel aBaseDirection, - nsBidi* aBidiEngine) + PRUint32 aSrcLength, + PRUnichar* aDest, + nsBidiLevel aBaseDirection, + nsBidi* aBidiEngine) { const PRUnichar* src = aSrc; nsresult rv = aBidiEngine->SetPara(src, aSrcLength, aBaseDirection, nsnull);