Bug 336959. Use Pango to break inside Thai/Lao runs. Patch by Theppitak Karoonboonyanan, r+sr=roc

This commit is contained in:
roc+@cs.cmu.edu 2007-07-18 20:26:51 -07:00
parent 84f164ad4a
commit 6ac430a20e
14 changed files with 404 additions and 209 deletions

View File

@ -96,11 +96,12 @@ public:
static inline PRBool IsComplexChar(PRUnichar u)
{
return IsComplexASCIIChar(u) ||
(0x1100 <= u && u <= 0x11ff) ||
(0x2000 <= u && u <= 0x21ff) ||
(0x2e80 <= u && u <= 0xd7ff) ||
(0xf900 <= u && u <= 0xfaff) ||
(0xff00 <= u && u <= 0xffef);
(0x0e01 <= u && u <= 0x0edf) || // Thai & Lao
(0x1100 <= u && u <= 0x11ff) || // Hangul Jamo
(0x2000 <= u && u <= 0x21ff) || // Punctuations and Symbols
(0x2e80 <= u && u <= 0xd7ff) || // several CJK blocks
(0xf900 <= u && u <= 0xfaff) || // CJK Compatibility Idographs
(0xff00 <= u && u <= 0xffef); // Halfwidth and Fullwidth Forms
}
// Normally, break opportunities exist at the end of each run of whitespace

View File

@ -106,3 +106,13 @@ endif
include $(topsrcdir)/config/rules.mk
ifdef MOZ_ENABLE_PANGO
CXXFLAGS += \
$(MOZ_PANGO_CFLAGS) \
$(NULL)
EXTRA_DSO_LDOPTS += \
$(MOZ_PANGO_LIBS) \
$(NULL)
endif

View File

@ -43,19 +43,15 @@
#define NS_LINEBREAKER_NEED_MORE_TEXT -1
// {C9C5938E-70EF-4db2-ADEE-E7B2CCFBBEE6}
// {5ae68851-d9a3-49fd-9388-58586dad8044}
#define NS_ILINEBREAKER_IID \
{ 0xc9c5938e, 0x70ef, 0x4db2, \
{ 0xad, 0xee, 0xe7, 0xb2, 0xcc, 0xfb, 0xbe, 0xe6 } }
{ 0x5ae68851, 0xd9a3, 0x49fd, \
{ 0x93, 0x88, 0x58, 0x58, 0x6d, 0xad, 0x80, 0x44 } }
class nsILineBreaker : public nsISupports
{
public:
NS_DECLARE_STATIC_IID_ACCESSOR(NS_ILINEBREAKER_IID)
virtual PRBool BreakInBetween( const PRUnichar* aText1 , PRUint32 aTextLen1,
const PRUnichar* aText2 ,
PRUint32 aTextLen2) = 0;
virtual PRInt32 Next( const PRUnichar* aText, PRUint32 aLen,
PRUint32 aPos) = 0;

View File

@ -52,12 +52,33 @@ REQUIRES = xpcom \
unicharutil \
$(NULL)
CSRCS = rulebrk.c
CPPSRCS = \
nsJISx4501LineBreaker.cpp \
nsSampleWordBreaker.cpp \
nsSemanticUnitScanner.cpp \
$(NULL)
ifdef MOZ_ENABLE_PANGO
CPPSRCS += \
nsPangoBreaker.cpp \
$(NULL)
else
CPPSRCS += \
nsRuleBreaker.cpp \
$(NULL)
CSRCS = rulebrk.c
endif
include $(topsrcdir)/config/rules.mk
ifdef MOZ_ENABLE_PANGO
CXXFLAGS += \
$(MOZ_PANGO_CFLAGS) \
$(NULL)
EXTRA_DSO_LDOPTS += \
$(MOZ_PANGO_LIBS) \
$(NULL)
endif

View File

@ -178,3 +178,38 @@ static const PRUint32 gLBClass30[32] = {
0x51111555, // U+30F8 - U+30FF
};
static const PRUint32 gLBClass0E[32] = {
0x99999999, // U+0E00 - U+0E07
0x99999999, // U+0E08 - U+0E0F
0x99999999, // U+0E10 - U+0E17
0x99999999, // U+0E18 - U+0E1F
0x99999999, // U+0E20 - U+0E27
0x19999999, // U+0E28 - U+0E2F
0x99999999, // U+0E30 - U+0E37
0x09999999, // U+0E38 - U+0E3F
0x91999999, // U+0E40 - U+0E47
0x89999999, // U+0E48 - U+0E4F
0x66666666, // U+0E50 - U+0E57
0x99991166, // U+0E58 - U+0E5F
0x99999999, // U+0E60 - U+0E67
0x99999999, // U+0E68 - U+0E6F
0x99999999, // U+0E70 - U+0E77
0x99999999, // U+0E78 - U+0E7F
0x99999999, // U+0E80 - U+0E87
0x99999999, // U+0E88 - U+0E8F
0x99999999, // U+0E90 - U+0E97
0x99999999, // U+0E98 - U+0E9F
0x99999999, // U+0EA0 - U+0EA7
0x19999999, // U+0EA8 - U+0EAF
0x99999999, // U+0EB0 - U+0EB7
0x99999999, // U+0EB8 - U+0EBF
0x91999999, // U+0EC0 - U+0EC7
0x99999999, // U+0EC8 - U+0ECF
0x66666666, // U+0ED0 - U+0ED7
0x99999966, // U+0ED8 - U+0EDF
0x99999999, // U+0EE0 - U+0EE7
0x99999999, // U+0EE8 - U+0EEF
0x99999999, // U+0EF0 - U+0EF7
0x99999999, // U+0EF8 - U+0EFF
};

View File

@ -0,0 +1,52 @@
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* ***** BEGIN LICENSE BLOCK *****
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
*
* The contents of this file are subject to the Mozilla Public License Version
* 1.1 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
* for the specific language governing rights and limitations under the
* License.
*
* The Original Code is mozilla.org code.
*
* The Initial Developer of the Original Code is
* Theppitak Karoonboonyanan <thep@linux.thai.net>.
* Portions created by the Initial Developer are Copyright (C) 2007
* the Initial Developer. All Rights Reserved.
*
* Contributor(s):
* - Theppitak Karoonboonyanan <thep@linux.thai.net>
*
* Alternatively, the contents of this file may be used under the terms of
* either of the GNU General Public License Version 2 or later (the "GPL"),
* or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
* in which case the provisions of the GPL or the LGPL are applicable instead
* of those above. If you wish to allow use of your version of this file only
* under the terms of either the GPL or the LGPL, and not to allow others to
* use your version of this file under the terms of the MPL, indicate your
* decision by deleting the provisions above and replace them with the notice
* and other provisions required by the GPL or the LGPL. If you do not delete
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the MPL, the GPL or the LGPL.
*
* ***** END LICENSE BLOCK ***** */
#ifndef nsComplexBreaker_h__
#define nsComplexBreaker_h__
#include "nsString.h"
/**
* Find line break opportunities in aText[] of aLength characters,
* filling boolean values indicating line break opportunities for
* corresponding charactersin aBreakBefore[] on return.
*/
void
NS_GetComplexLineBreaks(const PRUnichar* aText, PRUint32 aLength,
PRPackedBool* aBreakBefore);
#endif /* nsComplexBreaker_h__ */

View File

@ -42,9 +42,8 @@
#include "pratom.h"
#include "nsLWBRKDll.h"
#include "jisx4501class.h"
#define TH_UNICODE
#include "th_char.h"
#include "rulebrk.h"
#include "nsComplexBreaker.h"
#include "nsTArray.h"
#include "nsUnicharUtils.h"
/*
@ -141,15 +140,16 @@
4. We add THAI characters and make it breakable w/ all ther class
4. We add COMPLEX characters and make it breakable w/ all ther class
except after class 1 and before class [a]
Class of
Leading Class of Trailing Char Class
Char
1 [a] 7 8 9 [b]15 16 18 THAI
1 [a] 7 8 9 [b]15 16 18 COMPLEX
1 X X X X X X X X X
1 X X X X X X X X X X
[a] X
7 X X
8 X X
@ -158,7 +158,7 @@
15 X X X X
16 X X X
18 X X X X
THAI T
COMPLEX X T
T : need special handling
@ -167,7 +167,7 @@
18 <- 1
1 0000 0001 1111 1111 = 0x01FF
1 0000 0011 1111 1111 = 0x03FF
[a] 0000 0000 0000 0010 = 0x0002
7 0000 0000 0000 0110 = 0x0006
8 0000 0000 0100 0010 = 0x0042
@ -176,7 +176,7 @@
15 0000 0001 0101 0010 = 0x0152
16 0000 0001 1000 0010 = 0x0182
18 0000 0001 1100 0010 = 0x01C2
THAI 0000 0000 0000 0000 = 0x0000
COMPLEX 0000 0010 0000 0010 = 0x0202
5. Now we map the class to number
@ -189,14 +189,14 @@
6: 15
7: 16
8: 18
9: THAI
9: COMPLEX
*/
#define MAX_CLASSES 10
static const PRUint16 gPair[MAX_CLASSES] = {
0x01FF,
0x03FF,
0x0002,
0x0006,
0x0042,
@ -205,7 +205,7 @@ static const PRUint16 gPair[MAX_CLASSES] = {
0x0152,
0x0182,
0x01C2,
0x0000
0x0202
};
@ -215,7 +215,7 @@ GETCLASSFROMTABLE(const PRUint32* t, PRUint16 l)
return ((((t)[(l>>3)]) >> ((l & 0x0007)<<2)) & 0x000f);
}
#define CLASS_THAI 9
#define CLASS_COMPLEX 9
@ -234,6 +234,12 @@ IS_CJK_CHAR(PRUnichar u)
(0xff00 <= (u) && (u) <= 0xffef) );
}
static inline int
IS_COMPLEX(PRUnichar u)
{
return (0x0e01 <= (u) && (u) <= 0x0e5b);
}
static inline int
IS_SPACE(PRUnichar u)
{
@ -251,9 +257,9 @@ static PRInt8 GetClass(PRUnichar u)
{
c = GETCLASSFROMTABLE(gLBClass00, l);
}
else if(th_isthai(u))
else if( 0x0E00 == h)
{
c = CLASS_THAI;
c = GETCLASSFROMTABLE(gLBClass0E, l);
}
else if( 0x2000 == h)
{
@ -412,175 +418,64 @@ static PRInt8 ContextualAnalysis(
}
PRBool nsJISx4051LineBreaker::BreakInBetween(
const PRUnichar* aText1 , PRUint32 aTextLen1,
const PRUnichar* aText2 , PRUint32 aTextLen2)
PRInt32 nsJISx4051LineBreaker::WordMove(
const PRUnichar* aText, PRUint32 aLen, PRUint32 aPos, PRInt8 aDirection)
{
if(!aText1 || !aText2 || (0 == aTextLen1) || (0==aTextLen2) ||
NS_IS_HIGH_SURROGATE(aText1[aTextLen1-1]) &&
NS_IS_LOW_SURROGATE(aText2[0]) ) //Do not separate a surrogate pair
{
return PR_FALSE;
PRBool textNeedsJISx4051 = PR_FALSE;
PRInt32 begin, end;
for (begin = aPos; begin > 0 && !IS_SPACE(aText[begin - 1]); --begin) {
if (IS_CJK_CHAR(aText[begin]) || IS_COMPLEX(aText[begin])) {
textNeedsJISx4051 = PR_TRUE;
}
}
for (end = aPos + 1; end < PRInt32(aLen) && !IS_SPACE(aText[end]); ++end) {
if (IS_CJK_CHAR(aText[end]) || IS_COMPLEX(aText[end])) {
textNeedsJISx4051 = PR_TRUE;
}
}
//search for CJK characters until a space is found.
//if CJK char is found before space, use 4051, otherwise western
PRInt32 cur;
PRInt32 ret;
nsAutoTArray<PRPackedBool, 2000> breakState;
if (!textNeedsJISx4051 || !breakState.AppendElements(end - begin)) {
// No complex text character, do not try to do complex line break.
// (This is required for serializers. See Bug #344816.)
// Also fall back to this when out of memory.
if (aDirection < 0) {
ret = (begin == PRInt32(aPos)) ? begin - 1 : begin;
} else {
ret = end;
}
} else {
GetJISx4051Breaks(aText + begin, end - begin, breakState.Elements());
for (cur= aTextLen1-1; cur>=0; cur--)
{
if (IS_SPACE(aText1[cur]))
break;
if (IS_CJK_CHAR(aText1[cur]))
goto ROUTE_CJK_BETWEEN;
ret = aPos;
do {
ret += aDirection;
} while (begin < ret && ret < end && !breakState[ret - begin]);
}
for (cur= 0; cur < (PRInt32)aTextLen2; cur++)
{
if (IS_SPACE(aText2[cur]))
break;
if (IS_CJK_CHAR(aText2[cur]))
goto ROUTE_CJK_BETWEEN;
}
//now apply western rule.
return IS_SPACE(aText1[aTextLen1-1]) || IS_SPACE(aText2[0]);
ROUTE_CJK_BETWEEN:
PRInt8 c1, c2;
if(NEED_CONTEXTUAL_ANALYSIS(aText1[aTextLen1-1]))
c1 = ContextualAnalysis((aTextLen1>1)?aText1[aTextLen1-2]:U_NULL,
aText1[aTextLen1-1],
aText2[0]);
else
c1 = GetClass(aText1[aTextLen1-1]);
if(NEED_CONTEXTUAL_ANALYSIS(aText2[0]))
c2 = ContextualAnalysis(aText1[aTextLen1-1],
aText2[0],
(aTextLen2>1)?aText2[1]:U_NULL);
else
c2 = GetClass(aText2[0]);
/* Handle cases for THAI */
if((CLASS_THAI == c1) && (CLASS_THAI == c2))
{
return (0 == TrbWordBreakPos(aText1, aTextLen1, aText2, aTextLen2));
}
else
{
return GetPair(c1,c2);
}
return ret;
}
PRInt32 nsJISx4051LineBreaker::Next(
const PRUnichar* aText, PRUint32 aLen, PRUint32 aPos)
{
NS_ASSERTION(aText, "aText shouldn't be null");
NS_ASSERTION(aLen > aPos, "Illegal value (length > position)");
//forward check for CJK characters until a space is found.
//if CJK char is found before space, use 4051, otherwise western
PRUint32 cur;
for (cur = aPos; cur < aLen; ++cur)
{
if (IS_SPACE(aText[cur]))
return cur;
if (IS_CJK_CHAR(aText[cur]))
goto ROUTE_CJK_NEXT;
}
return NS_LINEBREAKER_NEED_MORE_TEXT; // Need more text
ROUTE_CJK_NEXT:
PRInt8 c1, c2;
cur = aPos;
if(NEED_CONTEXTUAL_ANALYSIS(aText[cur]))
{
c1 = ContextualAnalysis((cur>0)?aText[cur-1]:U_NULL,
aText[cur],
(cur<(aLen-1)) ?aText[cur+1]:U_NULL);
} else {
c1 = GetClass(aText[cur]);
}
if(CLASS_THAI == c1)
return PRUint32(TrbFollowing(aText, aLen, aPos));
for(cur++; cur <aLen; cur++)
{
if(NEED_CONTEXTUAL_ANALYSIS(aText[cur]))
{
c2 = ContextualAnalysis((cur>0)?aText[cur-1]:U_NULL,
aText[cur],
(cur<(aLen-1)) ?aText[cur+1]:U_NULL);
} else {
c2 = GetClass(aText[cur]);
}
if(GetPair(c1, c2)) {
return cur;
}
c1 = c2;
}
return NS_LINEBREAKER_NEED_MORE_TEXT; // Need more text
PRInt32 nextPos = WordMove(aText, aLen, aPos, 1);
return nextPos < PRInt32(aLen) ? nextPos : NS_LINEBREAKER_NEED_MORE_TEXT;
}
PRInt32 nsJISx4051LineBreaker::Prev(
const PRUnichar* aText, PRUint32 aLen, PRUint32 aPos)
{
NS_ASSERTION(aText, "aText shouldn't be null");
NS_ASSERTION(aLen >= aPos, "Illegal value (length >= position)");
//backward check for CJK characters until a space is found.
//if CJK char is found before space, use 4051, otherwise western
PRUint32 cur;
for (cur = aPos - 1; cur > 0; --cur)
{
if (IS_SPACE(aText[cur]))
{
if (cur != aPos - 1) // XXXldb Why?
++cur;
return cur;
}
if (IS_CJK_CHAR(aText[cur]))
goto ROUTE_CJK_PREV;
}
return NS_LINEBREAKER_NEED_MORE_TEXT; // Need more text
ROUTE_CJK_PREV:
cur = aPos;
PRInt8 c1, c2;
if(NEED_CONTEXTUAL_ANALYSIS(aText[cur-1]))
{
c2 = ContextualAnalysis(((cur-1)>0)?aText[cur-2]:U_NULL,
aText[cur-1],
(cur<aLen) ?aText[cur]:U_NULL);
} else {
c2 = GetClass(aText[cur-1]);
}
// To Do:
//
// Should handle CLASS_THAI here
//
for(cur--; cur > 0; cur--)
{
if(NEED_CONTEXTUAL_ANALYSIS(aText[cur-1]))
{
c1 = ContextualAnalysis(((cur-1)>0)?aText[cur-2]:U_NULL,
aText[cur-1],
(cur<aLen) ?aText[cur]:U_NULL);
} else {
c1 = GetClass(aText[cur-1]);
}
if(GetPair(c1, c2)) {
return cur;
}
c2 = c1;
}
return NS_LINEBREAKER_NEED_MORE_TEXT; // Need more text
PRInt32 prevPos = WordMove(aText, aLen, aPos, -1);
return prevPos > 0 ? prevPos : NS_LINEBREAKER_NEED_MORE_TEXT;
}
void
@ -604,16 +499,29 @@ nsJISx4051LineBreaker::GetJISx4051Breaks(const PRUnichar* aChars, PRUint32 aLeng
PRBool allowBreak;
if (cur > 0) {
if (CLASS_THAI == lastClass && CLASS_THAI == cl) {
allowBreak = 0 == TrbWordBreakPos(aChars, cur, aChars + cur, aLength - cur);
} else {
allowBreak = GetPair(lastClass, cl);
}
NS_ASSERTION(CLASS_COMPLEX != lastClass || CLASS_COMPLEX != cl,
"Loop should have prevented adjacent complex chars here");
allowBreak = GetPair(lastClass, cl);
} else {
allowBreak = PR_FALSE;
}
aBreakBefore[cur] = allowBreak;
lastClass = cl;
if (CLASS_COMPLEX == cl) {
PRUint32 end = cur + 1;
while (end < aLength && CLASS_COMPLEX == GetClass(aChars[end])) {
++end;
}
NS_GetComplexLineBreaks(aChars + cur, end - cur, aBreakBefore + cur);
// restore breakability at chunk begin, which was always set to false
// by the complex line breaker
aBreakBefore[cur] = allowBreak;
cur = end - 1;
}
}
}

View File

@ -48,9 +48,6 @@ public:
nsJISx4051LineBreaker();
virtual ~nsJISx4051LineBreaker();
PRBool BreakInBetween( const PRUnichar* aText1 , PRUint32 aTextLen1,
const PRUnichar* aText2 , PRUint32 aTextLen2);
PRInt32 Next( const PRUnichar* aText, PRUint32 aLen, PRUint32 aPos);
PRInt32 Prev( const PRUnichar* aText, PRUint32 aLen, PRUint32 aPos);
@ -59,6 +56,10 @@ public:
PRPackedBool* aBreakBefore);
virtual void GetJISx4051Breaks(const PRUint8* aText, PRUint32 aLength,
PRPackedBool* aBreakBefore);
private:
PRInt32 WordMove(const PRUnichar* aText, PRUint32 aLen, PRUint32 aPos,
PRInt8 aDirection);
};
#endif /* nsJISx4501LineBreaker_h__ */

View File

@ -0,0 +1,95 @@
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* ***** BEGIN LICENSE BLOCK *****
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
*
* The contents of this file are subject to the Mozilla Public License Version
* 1.1 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
* for the specific language governing rights and limitations under the
* License.
*
* The Original Code is mozilla.org code.
*
* The Initial Developer of the Original Code is
* Theppitak Karoonboonyanan <thep@linux.thai.net>.
* Portions created by the Initial Developer are Copyright (C) 2007
* the Initial Developer. All Rights Reserved.
*
* Contributor(s):
* - Theppitak Karoonboonyanan <thep@linux.thai.net>
*
* Alternatively, the contents of this file may be used under the terms of
* either of the GNU General Public License Version 2 or later (the "GPL"),
* or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
* in which case the provisions of the GPL or the LGPL are applicable instead
* of those above. If you wish to allow use of your version of this file only
* under the terms of either the GPL or the LGPL, and not to allow others to
* use your version of this file under the terms of the MPL, indicate your
* decision by deleting the provisions above and replace them with the notice
* and other provisions required by the GPL or the LGPL. If you do not delete
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the MPL, the GPL or the LGPL.
*
* ***** END LICENSE BLOCK ***** */
#include "nsComplexBreaker.h"
#include <pango/pango-break.h>
#include "nsUTF8Utils.h"
#include "nsString.h"
#include "nsTArray.h"
void
NS_GetComplexLineBreaks(const PRUnichar* aText, PRUint32 aLength,
PRPackedBool* aBreakBefore)
{
NS_ASSERTION(aText, "aText shouldn't be null");
nsAutoTArray<PangoLogAttr, 2000> attrBuffer;
if (!attrBuffer.AppendElements(aLength + 1))
{
// out of memory, behave as if there were no complex line breaker
for (PRUint32 i = 0; i < aLength; ++i) {
aBreakBefore[i] = PR_FALSE;
}
}
NS_ConvertUTF16toUTF8 aUTF8(aText, aLength);
const gchar* p = aUTF8.Data();
const gchar* end = p + aUTF8.Length();
PRUint32 u16Offset = 0;
static PangoLanguage* language = pango_language_from_string("en");
while (p < end)
{
PangoLogAttr* attr = attrBuffer.Elements();
pango_get_log_attrs(p, end - p, -1, language, attr, attrBuffer.Length());
while (p < end)
{
aBreakBefore[u16Offset] = attr->is_line_break;
if (NS_IS_LOW_SURROGATE(aText[u16Offset]))
aBreakBefore[++u16Offset] = PR_FALSE; // Skip high surrogate
++u16Offset;
PRUint32 ch = UTF8CharEnumerator::NextChar(&p, end);
++attr;
if (ch == 0) {
// pango_break (pango 1.16.2) only analyses text before the
// first NUL (but sets one extra attr). Workaround loop to call
// pango_break again to analyse after the NUL is done somewhere else
// (gfx/thebes/src/gfxPangoFonts.cpp: SetupClusterBoundaries()).
// So, we do the same here for pango_get_log_attrs.
break;
}
}
}
}

View File

@ -0,0 +1,53 @@
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* ***** BEGIN LICENSE BLOCK *****
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
*
* The contents of this file are subject to the Mozilla Public License Version
* 1.1 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
* for the specific language governing rights and limitations under the
* License.
*
* The Original Code is mozilla.org code.
*
* The Initial Developer of the Original Code is
* Theppitak Karoonboonyanan <thep@linux.thai.net>.
* Portions created by the Initial Developer are Copyright (C) 2007
* the Initial Developer. All Rights Reserved.
*
* Contributor(s):
* - Theppitak Karoonboonyanan <thep@linux.thai.net>
*
* Alternatively, the contents of this file may be used under the terms of
* either of the GNU General Public License Version 2 or later (the "GPL"),
* or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
* in which case the provisions of the GPL or the LGPL are applicable instead
* of those above. If you wish to allow use of your version of this file only
* under the terms of either the GPL or the LGPL, and not to allow others to
* use your version of this file under the terms of the MPL, indicate your
* decision by deleting the provisions above and replace them with the notice
* and other provisions required by the GPL or the LGPL. If you do not delete
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the MPL, the GPL or the LGPL.
*
* ***** END LICENSE BLOCK ***** */
#include "nsComplexBreaker.h"
#define TH_UNICODE
#include "rulebrk.h"
void
NS_GetComplexLineBreaks(const PRUnichar* aText, PRUint32 aLength,
PRPackedBool* aBreakBefore)
{
NS_ASSERTION(aText, "aText shouldn't be null");
for (PRUint32 i = 0; i < aLength; i++)
aBreakBefore[i] = (0 == TrbWordBreakPos(aText, i, aText + i, aLength - i));
}

View File

@ -54,9 +54,9 @@ Analysis of JIS X 4051 to Unicode General Category Mapping
<TD></TD>
<TD></TD>
<TD>14</TD>
<TD>2</TD>
<TD>3</TD>
<TD></TD>
<TD BGCOLOR=white>16</TD>
<TD BGCOLOR=white>17</TD>
<TD></TD>
<TD></TD>
<TD></TD>
@ -79,7 +79,7 @@ Analysis of JIS X 4051 to Unicode General Category Mapping
<TD></TD>
<TD>1</TD>
<TD>13</TD>
<TD>1</TD>
<TD>2</TD>
<TD></TD>
<TD>1</TD>
<TD></TD>
@ -89,20 +89,20 @@ Analysis of JIS X 4051 to Unicode General Category Mapping
</TR>
<TR><TH>01_[a]<TH>
<TD></TD>
<TD>27</TD>
<TD>31</TD>
<TD>2</TD>
<TD></TD>
<TD>30</TD>
<TD>32</TD>
<TD>6</TD>
<TD></TD>
<TD BGCOLOR=white>65</TD>
<TD BGCOLOR=white>71</TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD>5</TD>
<TD>22</TD>
<TD>7</TD>
<TD>24</TD>
<TD></TD>
<TD></TD>
<TD></TD>
@ -116,7 +116,7 @@ Analysis of JIS X 4051 to Unicode General Category Mapping
<TD>14</TD>
<TD></TD>
<TD></TD>
<TD>14</TD>
<TD>16</TD>
<TD></TD>
<TD></TD>
<TD>2</TD>
@ -286,11 +286,11 @@ Analysis of JIS X 4051 to Unicode General Category Mapping
<TD></TD>
<TD></TD>
<TD></TD>
<TD>10</TD>
<TD>30</TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD BGCOLOR=white>10</TD>
<TD BGCOLOR=white>30</TD>
<TD></TD>
<TD></TD>
<TD></TD>
@ -303,7 +303,7 @@ Analysis of JIS X 4051 to Unicode General Category Mapping
<TD></TD>
<TD></TD>
<TD></TD>
<TD>10</TD>
<TD>30</TD>
<TD></TD>
<TD></TD>
<TD></TD>
@ -362,22 +362,22 @@ Analysis of JIS X 4051 to Unicode General Category Mapping
</TR>
<TR><TH>08_18<TH>
<TD>10</TD>
<TD>660</TD>
<TD>659</TD>
<TD>4</TD>
<TD>130</TD>
<TD>55</TD>
<TD>940</TD>
<TD>56</TD>
<TD>941</TD>
<TD>2</TD>
<TD BGCOLOR=white>1801</TD>
<TD BGCOLOR=white>1802</TD>
<TD></TD>
<TD>10</TD>
<TD></TD>
<TD></TD>
<TD>367</TD>
<TD>368</TD>
<TD>1</TD>
<TD>5</TD>
<TD>4</TD>
<TD></TD>
<TD>287</TD>
<TD>286</TD>
<TD></TD>
<TD></TD>
<TD>4</TD>
@ -389,17 +389,17 @@ Analysis of JIS X 4051 to Unicode General Category Mapping
<TD>3</TD>
<TD>4</TD>
<TD>6</TD>
<TD>29</TD>
<TD>30</TD>
<TD>5</TD>
<TD>12</TD>
<TD>10</TD>
<TD>273</TD>
<TD>645</TD>
<TD>646</TD>
<TD>1</TD>
<TD>1</TD>
<TD></TD>
</TR>
<TR><TH>09_nbsp<TH>
<TR><TH>09_COMPLEX<TH>
<TD></TD>
<TD></TD>
<TD></TD>
@ -489,7 +489,7 @@ Analysis of JIS X 4051 to Unicode General Category Mapping
<TD BGCOLOR=red>06_15</TD>
<TD BGCOLOR=red>07_16</TD>
<TD BGCOLOR=red>08_18</TD>
<TD BGCOLOR=red>09_nbsp</TD>
<TD BGCOLOR=red>09_COMPLEX</TD>
<TD BGCOLOR=red>X</TD>
</TR>
<TR><TH>00<TH>
@ -557,6 +557,19 @@ Analysis of JIS X 4051 to Unicode General Category Mapping
<TD></TD>
<TD></TD>
</TR>
<TR><TH>0E<TH>
<TD>1</TD>
<TD>6</TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD></TD>
<TD>20</TD>
<TD></TD>
<TD>1</TD>
<TD></TD>
<TD></TD>
</TR>
<TR><TH>20<TH>
<TD></TD>
<TD>5</TD>

View File

@ -400,6 +400,7 @@ printarray("00", "8");
printarray("20", "8");
printarray("21", "8");
printarray("30", "5");
printarray("0E", "9");
#print %rangecount;

View File

@ -190,3 +190,12 @@
2776;2794;18
2798;27AF;18
27B1;27BE;18
0E3F;;1
0E2F;;4
0E46;;4
0E5A;0E5B;4
0E50;0E59;15
0E4F;;18
0EAF;;4
0EC6;;4
0ED0;0ED9;15

View File

@ -18,4 +18,4 @@
18;08_18
19;X
20;X
21;09_nbsp
21;09_COMPLEX