bug 721821 - make nsBidiPresUtils::WriteReverse aware of clusters, so that diacritics in RTL text display correctly in SVG text. r=smontagu

This commit is contained in:
Jonathan Kew 2012-02-28 21:24:04 +00:00
parent 34f61797c8
commit adfa2f07b4
5 changed files with 141 additions and 127 deletions

View File

@ -1876,16 +1876,6 @@ gfxFont::Measure(gfxTextRun *aTextRun,
// Limiting backtrack here avoids pathological
// behavior on long runs with no whitespace.
static bool
IsClusterExtender(PRUint32 aUSV)
{
PRUint8 category = GetGeneralCategory(aUSV);
return ((category >= HB_UNICODE_GENERAL_CATEGORY_SPACING_MARK &&
category <= HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK) ||
(aUSV >= 0x200c && aUSV <= 0x200d) || // ZWJ, ZWNJ
(aUSV >= 0xff9e && aUSV <= 0xff9f)); // katakana sound marks
}
static bool
IsBoundarySpace(PRUnichar aChar, PRUnichar aNextChar)
{
@ -3797,95 +3787,25 @@ gfxShapedWord::SetupClusterBoundaries(CompressedGlyph *aGlyphs,
gfxTextRun::CompressedGlyph extendCluster;
extendCluster.SetComplex(false, true, 0);
HSType hangulState = HST_NONE;
ClusterIterator iter(aString, aLength);
for (PRUint32 i = 0; i < aLength; ++i) {
bool surrogatePair = false;
PRUint32 ch = aString[i];
if (NS_IS_HIGH_SURROGATE(ch) &&
i < aLength - 1 && NS_IS_LOW_SURROGATE(aString[i+1]))
{
ch = SURROGATE_TO_UCS4(ch, aString[i+1]);
surrogatePair = true;
// the ClusterIterator won't be able to tell us if the string
// _begins_ with a cluster-extender, so we handle that here
if (aLength && IsClusterExtender(*aString)) {
*aGlyphs = extendCluster;
}
while (!iter.AtEnd()) {
// advance iter to the next cluster-start (or end of text)
iter.Next();
// step past the first char of the cluster
aString++;
aGlyphs++;
// mark all the rest as cluster-continuations
while (aString < iter) {
*aGlyphs++ = extendCluster;
aString++;
}
PRUint8 category = GetGeneralCategory(ch);
HSType hangulType = HST_NONE;
// combining marks extend the cluster
if (IsClusterExtender(ch)) {
aGlyphs[i] = extendCluster;
} else if (category == HB_UNICODE_GENERAL_CATEGORY_OTHER_LETTER) {
// handle special cases in Letter_Other category
#if 0
// Currently disabled. This would follow the UAX#29 specification
// for extended grapheme clusters, but this is not favored by
// Thai users, at least for editing behavior.
// See discussion of equivalent Pango issue in bug 474068 and
// upstream at https://bugzilla.gnome.org/show_bug.cgi?id=576156.
if ((ch & ~0xff) == 0x0e00) {
// specific Thai & Lao (U+0Exx) chars that extend the cluster
if ( ch == 0x0e30 ||
(ch >= 0x0e32 && ch <= 0x0e33) ||
ch == 0x0e45 ||
ch == 0x0eb0 ||
(ch >= 0x0eb2 && ch <= 0x0eb3))
{
if (i > 0) {
aTextRun->SetGlyphs(i, extendCluster, nsnull);
}
}
else if ((ch >= 0x0e40 && ch <= 0x0e44) ||
(ch >= 0x0ec0 && ch <= 0x0ec4))
{
// characters that are prepended to the following cluster
if (i < length - 1) {
aTextRun->SetGlyphs(i+1, extendCluster, nsnull);
}
}
} else
#endif
if ((ch & ~0xff) == 0x1100 ||
(ch >= 0xa960 && ch <= 0xa97f) ||
(ch >= 0xac00 && ch <= 0xd7ff))
{
// no break within Hangul syllables
hangulType = GetHangulSyllableType(ch);
switch (hangulType) {
case HST_L:
case HST_LV:
case HST_LVT:
if (hangulState == HST_L) {
aGlyphs[i] = extendCluster;
}
break;
case HST_V:
if ( (hangulState != HST_NONE) &&
!(hangulState & HST_T))
{
aGlyphs[i] = extendCluster;
}
break;
case HST_T:
if (hangulState & (HST_V |
HST_T))
{
aGlyphs[i] = extendCluster;
}
break;
default:
break;
}
}
}
if (surrogatePair) {
++i;
aGlyphs[i] = extendCluster;
}
hangulState = hangulType;
}
}

View File

@ -65,6 +65,12 @@ PRInt32 GetScriptCode(PRUint32 aCh);
PRUint32 GetScriptTagForCode(PRInt32 aScriptCode);
bool IsClusterExtender(PRUint32 aCh, PRUint8 aCategory);
inline bool IsClusterExtender(PRUint32 aCh) {
return IsClusterExtender(aCh, GetGeneralCategory(aCh));
}
enum HSType {
HST_NONE = 0x00,
HST_L = 0x01,
@ -88,6 +94,31 @@ enum ShapingType {
PRInt32 ScriptShapingType(PRInt32 aScriptCode);
// A simple iterator for a string of PRUnichar codepoints that advances
// by Unicode grapheme clusters
class ClusterIterator
{
public:
ClusterIterator(const PRUnichar* aText, PRUint32 aLength)
: mText(aText), mLimit(aText + aLength), mPos(aText)
{ }
operator const PRUnichar* () const {
return mPos;
}
bool AtEnd() const {
return mPos >= mLimit;
}
void Next();
private:
const PRUnichar* mText;
const PRUnichar* mLimit;
const PRUnichar* mPos;
};
} // end namespace unicode
} // end namespace mozilla

View File

@ -41,6 +41,7 @@
#include "mozilla/Util.h"
#include "nsMemory.h"
#include "nsCharTraits.h"
#include "harfbuzz/hb-unicode.h"
@ -214,6 +215,15 @@ GetHangulSyllableType(PRUint32 aCh)
return HST_NONE;
}
bool
IsClusterExtender(PRUint32 aCh, PRUint8 aCategory)
{
return ((aCategory >= HB_UNICODE_GENERAL_CATEGORY_SPACING_MARK &&
aCategory <= HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK) ||
(aCh >= 0x200c && aCh <= 0x200d) || // ZWJ, ZWNJ
(aCh >= 0xff9e && aCh <= 0xff9f)); // katakana sound marks
}
// TODO: replace this with a properties file or similar;
// expect this to evolve as harfbuzz shaping support matures.
//
@ -272,6 +282,74 @@ ScriptShapingType(PRInt32 aScriptCode)
}
}
void
ClusterIterator::Next()
{
if (AtEnd()) {
NS_WARNING("ClusterIterator has already reached the end");
return;
}
PRUint32 ch = *mPos++;
// Handle conjoining Jamo that make Hangul syllables
if ((ch & ~0xff) == 0x1100 ||
(ch >= 0xa960 && ch <= 0xa97f) ||
(ch >= 0xac00 && ch <= 0xd7ff)) {
HSType hangulState = GetHangulSyllableType(ch);
while (mPos < mLimit) {
ch = *mPos;
HSType hangulType = GetHangulSyllableType(ch);
switch (hangulType) {
case HST_L:
case HST_LV:
case HST_LVT:
if (hangulState == HST_L) {
hangulState = hangulType;
mPos++;
continue;
}
break;
case HST_V:
if ((hangulState != HST_NONE) && !(hangulState & HST_T)) {
hangulState = hangulType;
mPos++;
continue;
}
break;
case HST_T:
if (hangulState & (HST_V | HST_T)) {
hangulState = hangulType;
mPos++;
continue;
}
break;
default:
break;
}
break;
}
}
while (mPos < mLimit) {
ch = *mPos;
// Check for surrogate pairs; note that isolated surrogates will just
// be treated as generic (non-cluster-extending) characters here,
// which is fine for cluster-iterating purposes
if (NS_IS_LOW_SURROGATE(ch) &&
NS_IS_HIGH_SURROGATE(*(mPos - 1))) {
ch = SURROGATE_TO_UCS4(*(mPos - 1), *mPos);
mPos++;
}
if (!IsClusterExtender(ch)) {
break;
}
mPos++;
}
}
} // end namespace unicode
} // end namespace mozilla

View File

@ -170,6 +170,7 @@ LOCAL_INCLUDES += \
CXXFLAGS += $(MOZ_CAIRO_CFLAGS)
DEFINES += -D_IMPL_NS_LAYOUT
DEFINES += -DHB_DONT_DEFINE_STDINT
ifndef MOZ_XUL
nsIBoxObject.idl: %: $(topsrcdir)/layout/xul/base/public/%

View File

@ -2005,44 +2005,28 @@ void nsBidiPresUtils::WriteReverse(const PRUnichar* aSrc,
PRUint32 aSrcLength,
PRUnichar* aDest)
{
const PRUnichar* src = aSrc + aSrcLength;
PRUnichar* dest = aDest;
PRUint32 UTF32Char;
PRUnichar* dest = aDest + aSrcLength;
mozilla::unicode::ClusterIterator iter(aSrc, aSrcLength);
while (--src >= aSrc) {
if (NS_IS_LOW_SURROGATE(*src)) {
if (src > aSrc && NS_IS_HIGH_SURROGATE(*(src - 1))) {
UTF32Char = SURROGATE_TO_UCS4(*(src - 1), *src);
--src;
} else {
UTF32Char = UCS2_REPLACEMENT_CHAR;
}
} else if (NS_IS_HIGH_SURROGATE(*src)) {
// paired high surrogates are handled above, so this is a lone high surrogate
UTF32Char = UCS2_REPLACEMENT_CHAR;
} else {
UTF32Char = *src;
}
UTF32Char = mozilla::unicode::GetMirroredChar(UTF32Char);
if (IS_IN_BMP(UTF32Char)) {
*(dest++) = UTF32Char;
} else {
*(dest++) = H_SURROGATE(UTF32Char);
*(dest++) = L_SURROGATE(UTF32Char);
while (!iter.AtEnd()) {
iter.Next();
for (const PRUnichar *cp = iter; cp > aSrc; ) {
// Here we rely on the fact that there are no non-BMP mirrored pairs
// currently in Unicode, so we don't need to look for surrogates
*--dest = mozilla::unicode::GetMirroredChar(*--cp);
}
aSrc = iter;
}
NS_ASSERTION(dest - aDest == aSrcLength, "Whole string not copied");
NS_ASSERTION(dest == aDest, "Whole string not copied");
}
/* static */
bool nsBidiPresUtils::WriteLogicalToVisual(const PRUnichar* aSrc,
PRUint32 aSrcLength,
PRUnichar* aDest,
nsBidiLevel aBaseDirection,
nsBidi* aBidiEngine)
PRUint32 aSrcLength,
PRUnichar* aDest,
nsBidiLevel aBaseDirection,
nsBidi* aBidiEngine)
{
const PRUnichar* src = aSrc;
nsresult rv = aBidiEngine->SetPara(src, aSrcLength, aBaseDirection, nsnull);