Bug 333659. Relanding intl changes --- new nsILineBreaker API, shouldn't affect current code. r+sr=dbaron

This commit is contained in:
roc+%cs.cmu.edu 2007-01-17 02:14:10 +00:00
parent 573da385ae
commit d0c6652240
3 changed files with 73 additions and 39 deletions

@ -43,11 +43,10 @@
#define NS_LINEBREAKER_NEED_MORE_TEXT -1
// {E86B3375-BF89-11d2-B3AF-00805F8A6670}
// {c3d9f25f-7cea-4a76-a08f-05c431353448}
#define NS_ILINEBREAKER_IID \
{ 0xe86b3375, 0xbf89, 0x11d2, \
{ 0xb3, 0xaf, 0x0, 0x80, 0x5f, 0x8a, 0x66, 0x70 } }
{ 0xc3d9f25f, 0x7cea, 0x4a76, \
{ 0xa0, 0x8f, 0x05, 0xc4, 0x31, 0x35, 0x34, 0x48 } }
class nsILineBreaker : public nsISupports
{
@ -63,6 +62,14 @@ public:
virtual PRInt32 Prev( const PRUnichar* aText, PRUint32 aLen,
PRUint32 aPos) = 0;
// Call this on a word with whitespace at either end. We will apply JISx4501
// rules to find breaks inside the word. aBreakBefore is set to the break-
// before status of each character; aBreakBefore[0] will always be false
// because we never return a break before the first character.
// aLength is the length of the aText array and also the length of the aBreakBefore
// output array.
virtual void GetJISx4051Breaks(const PRUnichar* aText, PRUint32 aLength,
PRPackedBool* aBreakBefore) = 0;
};
NS_DEFINE_STATIC_IID_ACCESSOR(nsILineBreaker, NS_ILINEBREAKER_IID)

@ -47,7 +47,6 @@
#include "rulebrk.h"
#include "nsUnicharUtils.h"
/*
Simplification of Pair Table in JIS X 4051
@ -241,7 +240,7 @@ IS_SPACE(PRUnichar u)
return ((u) == 0x0020 || (u) == 0x0009 || (u) == 0x000a || (u) == 0x000d || (u)==0x200b);
}
PRInt8 nsJISx4051LineBreaker::GetClass(PRUnichar u)
static PRInt8 GetClass(PRUnichar u)
{
PRUint16 h = u & 0xFF00;
PRUint16 l = u & 0x00ff;
@ -332,7 +331,7 @@ PRInt8 nsJISx4051LineBreaker::GetClass(PRUnichar u)
return c;
}
PRBool nsJISx4051LineBreaker::GetPair(PRInt8 c1, PRInt8 c2)
static PRBool GetPair(PRInt8 c1, PRInt8 c2)
{
NS_ASSERTION( c1 < MAX_CLASSES ,"illegal classes 1");
NS_ASSERTION( c2 < MAX_CLASSES ,"illegal classes 2");
@ -361,9 +360,8 @@ NS_IMPL_ISUPPORTS1(nsJISx4051LineBreaker, nsILineBreaker)
#define CHARACTER_CLASS 8 // JIS x4051 class 18 is now map to simplified class 8
#define IS_ASCII_DIGIT(u) (0x0030 <= (u) && (u) <= 0x0039)
PRInt8 nsJISx4051LineBreaker::ContextualAnalysis(
PRUnichar prev, PRUnichar cur, PRUnichar next
)
static PRInt8 ContextualAnalysis(
PRUnichar prev, PRUnichar cur, PRUnichar next)
{
if(U_COMMA == cur)
{
@ -393,7 +391,7 @@ PRInt8 nsJISx4051LineBreaker::ContextualAnalysis(
if(U_SPACE != next)
return CHARACTER_CLASS;
}
return this->GetClass(cur);
return GetClass(cur);
}
@ -435,18 +433,18 @@ ROUTE_CJK_BETWEEN:
PRInt8 c1, c2;
if(NEED_CONTEXTUAL_ANALYSIS(aText1[aTextLen1-1]))
c1 = this->ContextualAnalysis((aTextLen1>1)?aText1[aTextLen1-2]:0,
c1 = ContextualAnalysis((aTextLen1>1)?aText1[aTextLen1-2]:0,
aText1[aTextLen1-1],
aText2[0]);
else
c1 = this->GetClass(aText1[aTextLen1-1]);
c1 = GetClass(aText1[aTextLen1-1]);
if(NEED_CONTEXTUAL_ANALYSIS(aText2[0]))
c2 = this->ContextualAnalysis(aText1[aTextLen1-1],
aText2[0],
(aTextLen2>1)?aText2[1]:0);
c2 = ContextualAnalysis(aText1[aTextLen1-1],
aText2[0],
(aTextLen2>1)?aText2[1]:0);
else
c2 = this->GetClass(aText2[0]);
c2 = GetClass(aText2[0]);
/* Handle cases for THAI */
if((CLASS_THAI == c1) && (CLASS_THAI == c2))
@ -483,11 +481,11 @@ ROUTE_CJK_NEXT:
cur = aPos;
if(NEED_CONTEXTUAL_ANALYSIS(aText[cur]))
{
c1 = this->ContextualAnalysis((cur>0)?aText[cur-1]:0,
aText[cur],
(cur<(aLen-1)) ?aText[cur+1]:0);
c1 = ContextualAnalysis((cur>0)?aText[cur-1]:0,
aText[cur],
(cur<(aLen-1)) ?aText[cur+1]:0);
} else {
c1 = this->GetClass(aText[cur]);
c1 = GetClass(aText[cur]);
}
if(CLASS_THAI == c1)
@ -497,11 +495,11 @@ ROUTE_CJK_NEXT:
{
if(NEED_CONTEXTUAL_ANALYSIS(aText[cur]))
{
c2= this->ContextualAnalysis((cur>0)?aText[cur-1]:0,
aText[cur],
(cur<(aLen-1)) ?aText[cur+1]:0);
c2 = ContextualAnalysis((cur>0)?aText[cur-1]:0,
aText[cur],
(cur<(aLen-1)) ?aText[cur+1]:0);
} else {
c2 = this->GetClass(aText[cur]);
c2 = GetClass(aText[cur]);
}
if(GetPair(c1, c2)) {
@ -539,11 +537,11 @@ ROUTE_CJK_PREV:
PRInt8 c1, c2;
if(NEED_CONTEXTUAL_ANALYSIS(aText[cur-1]))
{
c2 = this->ContextualAnalysis(((cur-1)>0)?aText[cur-2]:0,
aText[cur-1],
(cur<aLen) ?aText[cur]:0);
c2 = ContextualAnalysis(((cur-1)>0)?aText[cur-2]:0,
aText[cur-1],
(cur<aLen) ?aText[cur]:0);
} else {
c2 = this->GetClass(aText[cur-1]);
c2 = GetClass(aText[cur-1]);
}
// To Do:
//
@ -553,11 +551,11 @@ ROUTE_CJK_PREV:
{
if(NEED_CONTEXTUAL_ANALYSIS(aText[cur-1]))
{
c1= this->ContextualAnalysis(((cur-1)>0)?aText[cur-2]:0,
aText[cur-1],
(cur<aLen) ?aText[cur]:0);
c1 = ContextualAnalysis(((cur-1)>0)?aText[cur-2]:0,
aText[cur-1],
(cur<aLen) ?aText[cur]:0);
} else {
c1 = this->GetClass(aText[cur-1]);
c1 = GetClass(aText[cur-1]);
}
if(GetPair(c1, c2)) {
@ -568,3 +566,36 @@ ROUTE_CJK_PREV:
return NS_LINEBREAKER_NEED_MORE_TEXT; // Need more text
}
void
nsJISx4051LineBreaker::GetJISx4051Breaks(const PRUnichar* aChars, PRUint32 aLength,
PRPackedBool* aBreakBefore)
{
PRUint32 cur;
PRInt8 lastClass = -1;
for (cur = 0; cur < aLength; ++cur) {
PRUnichar ch = aChars[cur];
PRInt8 cl;
if (NEED_CONTEXTUAL_ANALYSIS(ch)) {
cl = ContextualAnalysis(cur > 0 ? aChars[cur - 1] : 0,
ch,
cur + 1 < aLength ? aChars[cur + 1] : 0);
} else {
cl = GetClass(ch);
}
PRBool allowBreak;
if (cur > 0) {
if (CLASS_THAI == lastClass && CLASS_THAI == cl) {
allowBreak = 0 == TrbWordBreakPos(aChars, cur, aChars + cur, aLength - cur);
} else {
allowBreak = GetPair(lastClass, cl);
}
} else {
allowBreak = PR_FALSE;
}
aBreakBefore[cur] = allowBreak;
lastClass = cl;
}
}

@ -55,12 +55,8 @@ public:
PRInt32 Prev( const PRUnichar* aText, PRUint32 aLen, PRUint32 aPos);
protected:
PRInt8 GetClass(PRUnichar u);
PRInt8 ContextualAnalysis(PRUnichar prev, PRUnichar cur, PRUnichar next );
PRBool GetPair(PRInt8 c1, PRInt8 c2);
virtual void GetJISx4051Breaks(const PRUnichar* aText, PRUint32 aLength,
PRPackedBool* aBreakBefore);
};
#endif /* nsJISx4501LineBreaker_h__ */