mirror of
https://github.com/mozilla/gecko-dev.git
synced 2024-11-25 05:41:12 +00:00
Treat all empty and incomplete sequences as encoding errors, and some other clean-up. Bug 381412, r=jshin, sr=dveditz, b1.9=jst
This commit is contained in:
parent
0b5f01f577
commit
18da58e54b
@ -79,6 +79,7 @@
|
||||
nsHZToUnicode::nsHZToUnicode() : nsBufferDecoderSupport(1)
|
||||
{
|
||||
mHZState = HZ_STATE_ASCII; // per HZ spec, default to ASCII state
|
||||
mRunLength = 0;
|
||||
}
|
||||
//Overwriting the ConvertNoBuff() in nsUCvCnSupport.cpp.
|
||||
NS_IMETHODIMP nsHZToUnicode::ConvertNoBuff(
|
||||
@ -102,8 +103,13 @@ NS_IMETHODIMP nsHZToUnicode::ConvertNoBuff(
|
||||
}
|
||||
if ( *aSrc & 0x80 ) // if it is a 8-bit byte
|
||||
{
|
||||
// The source is a 8-bit GBCode
|
||||
*aDest = mUtil.GBKCharToUnicode(aSrc[0], aSrc[1]);
|
||||
if (UINT8_IN_RANGE(0x81, aSrc[0], 0xFE) &&
|
||||
UINT8_IN_RANGE(0x40, aSrc[1], 0xFE)) {
|
||||
// The source is a 8-bit GBCode
|
||||
*aDest = mUtil.GBKCharToUnicode(aSrc[0], aSrc[1]);
|
||||
} else {
|
||||
*aDest = UCS2_NO_MAPPING;
|
||||
}
|
||||
aSrc += 2;
|
||||
i++;
|
||||
iDestlen++;
|
||||
@ -123,6 +129,7 @@ NS_IMETHODIMP nsHZToUnicode::ConvertNoBuff(
|
||||
// we got a '~{'
|
||||
// we are switching to HZ state
|
||||
mHZState = HZ_STATE_GB;
|
||||
mRunLength = 0;
|
||||
aSrc += 2;
|
||||
i++;
|
||||
break;
|
||||
@ -132,6 +139,12 @@ NS_IMETHODIMP nsHZToUnicode::ConvertNoBuff(
|
||||
mHZState = HZ_STATE_ASCII;
|
||||
aSrc += 2;
|
||||
i++;
|
||||
if (mRunLength == 0) {
|
||||
*aDest = UCS2_NO_MAPPING;
|
||||
iDestlen++;
|
||||
aDest++;
|
||||
}
|
||||
mRunLength = 0;
|
||||
break;
|
||||
case HZLEAD1:
|
||||
// we got a '~~', process like an ASCII, but no state change
|
||||
@ -141,6 +154,7 @@ NS_IMETHODIMP nsHZToUnicode::ConvertNoBuff(
|
||||
i++;
|
||||
iDestlen++;
|
||||
aDest++;
|
||||
mRunLength++;
|
||||
break;
|
||||
case HZLEAD4:
|
||||
// we got a "~\n", it means maintain double byte mode cross lines, ignore the '~' itself
|
||||
@ -152,6 +166,9 @@ NS_IMETHODIMP nsHZToUnicode::ConvertNoBuff(
|
||||
default:
|
||||
// undefined ESC sequence '~X' are ignored since this is a illegal combination
|
||||
aSrc += 2;
|
||||
*aDest = UCS2_NO_MAPPING;
|
||||
iDestlen++;
|
||||
aDest++;
|
||||
break;
|
||||
};
|
||||
continue;// go for next loop
|
||||
@ -166,6 +183,7 @@ NS_IMETHODIMP nsHZToUnicode::ConvertNoBuff(
|
||||
i++;
|
||||
iDestlen++;
|
||||
aDest++;
|
||||
mRunLength++;
|
||||
break;
|
||||
case HZ_STATE_ASCII:
|
||||
default:
|
||||
|
@ -70,6 +70,7 @@ protected:
|
||||
|
||||
private:
|
||||
PRInt16 mHZState;
|
||||
PRUint32 mRunLength; // length of a run of 8-bit GB-encoded characters
|
||||
|
||||
};
|
||||
|
||||
|
@ -164,6 +164,7 @@ NS_IMETHODIMP nsISO2022CNToUnicode::Convert(const char * aSrc, PRInt32 * aSrcLen
|
||||
case eState_ESC_24_29_A: // ESC $ ) A
|
||||
if(SO == *src) {
|
||||
mState = eState_GB2312_1980;
|
||||
mRunLength = 0;
|
||||
} else {
|
||||
if(dest+5 >= destEnd)
|
||||
goto error1;
|
||||
@ -180,6 +181,12 @@ NS_IMETHODIMP nsISO2022CNToUnicode::Convert(const char * aSrc, PRInt32 * aSrcLen
|
||||
case eState_GB2312_1980: // ESC $ ) A SO
|
||||
if(SI == *src) { // Shift-In (SI)
|
||||
mState = eState_ESC_24_29_A_SO_SI;
|
||||
if (mRunLength == 0) {
|
||||
if(dest+1 >= destEnd)
|
||||
goto error1;
|
||||
*dest++ = 0xFFFD;
|
||||
}
|
||||
mRunLength = 0;
|
||||
} else if(ESC == *src) {
|
||||
mState = eState_ESC;
|
||||
} else {
|
||||
@ -204,6 +211,7 @@ NS_IMETHODIMP nsISO2022CNToUnicode::Convert(const char * aSrc, PRInt32 * aSrcLen
|
||||
|
||||
aLen = destEnd - dest;
|
||||
rv = GB2312_To_Unicode(gb, gbLen, dest, &aLen);
|
||||
++mRunLength;
|
||||
if(rv == NS_OK_UDEC_MOREOUTPUT) {
|
||||
goto error1;
|
||||
} else if(NS_FAILED(rv)) {
|
||||
@ -223,6 +231,7 @@ NS_IMETHODIMP nsISO2022CNToUnicode::Convert(const char * aSrc, PRInt32 * aSrcLen
|
||||
case eState_ESC_24_29_A_SO_SI: // ESC $ ) A SO SI
|
||||
if(SO == *src) {
|
||||
mState = eState_GB2312_1980;
|
||||
mRunLength = 0;
|
||||
} else if(ESC == *src) {
|
||||
mState = eState_ESC;
|
||||
} else {
|
||||
@ -237,6 +246,7 @@ NS_IMETHODIMP nsISO2022CNToUnicode::Convert(const char * aSrc, PRInt32 * aSrcLen
|
||||
case eState_ESC_24_29_G: // ESC $ ) G
|
||||
if(SO == *src) {
|
||||
mState = eState_CNS11643_1;
|
||||
mRunLength = 0;
|
||||
} else {
|
||||
if(dest+5 >= destEnd)
|
||||
goto error1;
|
||||
@ -253,6 +263,12 @@ NS_IMETHODIMP nsISO2022CNToUnicode::Convert(const char * aSrc, PRInt32 * aSrcLen
|
||||
case eState_CNS11643_1: // ESC $ ) G SO
|
||||
if(SI == *src) { // Shift-In (SI)
|
||||
mState = eState_ESC_24_29_G_SO_SI;
|
||||
if (mRunLength == 0) {
|
||||
if(dest+1 >= destEnd)
|
||||
goto error1;
|
||||
*dest++ = 0xFFFD;
|
||||
}
|
||||
mRunLength = 0;
|
||||
} else if(ESC == *src) {
|
||||
mState = eState_ESC;
|
||||
} else {
|
||||
@ -277,6 +293,7 @@ NS_IMETHODIMP nsISO2022CNToUnicode::Convert(const char * aSrc, PRInt32 * aSrcLen
|
||||
|
||||
aLen = destEnd - dest;
|
||||
rv = EUCTW_To_Unicode(cns, cnsLen, dest, &aLen);
|
||||
++mRunLength;
|
||||
if(rv == NS_OK_UDEC_MOREOUTPUT) {
|
||||
goto error1;
|
||||
} else if(NS_FAILED(rv)) {
|
||||
@ -296,6 +313,7 @@ NS_IMETHODIMP nsISO2022CNToUnicode::Convert(const char * aSrc, PRInt32 * aSrcLen
|
||||
case eState_ESC_24_29_G_SO_SI: // ESC $ ) G SO SI
|
||||
if(SO == *src) {
|
||||
mState = eState_CNS11643_1;
|
||||
mRunLength = 0;
|
||||
} else if(ESC == *src) {
|
||||
mState = eState_ESC;
|
||||
} else {
|
||||
@ -341,6 +359,7 @@ NS_IMETHODIMP nsISO2022CNToUnicode::Convert(const char * aSrc, PRInt32 * aSrcLen
|
||||
case eState_ESC_24_2A_H_ESC: // ESC $ * H ESC
|
||||
if(SS2 == *src) {
|
||||
mState = eState_CNS11643_2;
|
||||
mRunLength = 0;
|
||||
} else if('$' == *src) {
|
||||
mState = eState_ESC_24;
|
||||
} else {
|
||||
@ -360,6 +379,12 @@ NS_IMETHODIMP nsISO2022CNToUnicode::Convert(const char * aSrc, PRInt32 * aSrcLen
|
||||
case eState_CNS11643_2: // ESC $ * H ESC SS2
|
||||
if(SI == *src) { // Shift-In (SI)
|
||||
mState = eState_ESC_24_2A_H_ESC_SS2_SI;
|
||||
if (mRunLength == 0) {
|
||||
if(dest+1 >= destEnd)
|
||||
goto error1;
|
||||
*dest++ = 0xFFFD;
|
||||
}
|
||||
mRunLength = 0;
|
||||
} else if(ESC == *src) {
|
||||
mState = eState_ESC_24_2A_H_ESC;
|
||||
} else {
|
||||
@ -386,6 +411,7 @@ NS_IMETHODIMP nsISO2022CNToUnicode::Convert(const char * aSrc, PRInt32 * aSrcLen
|
||||
|
||||
aLen = destEnd - dest;
|
||||
rv = EUCTW_To_Unicode(cns, cnsLen, dest, &aLen);
|
||||
++mRunLength;
|
||||
if(rv == NS_OK_UDEC_MOREOUTPUT) {
|
||||
goto error1;
|
||||
} else if(NS_FAILED(rv)) {
|
||||
@ -417,6 +443,7 @@ NS_IMETHODIMP nsISO2022CNToUnicode::Convert(const char * aSrc, PRInt32 * aSrcLen
|
||||
case eState_ESC_24_2A_H_ESC_SS2_SI_ESC: // ESC $ * H ESC SS2 SI ESC
|
||||
if(SS2 == *src) {
|
||||
mState = eState_CNS11643_2;
|
||||
mRunLength = 0;
|
||||
} else if('$' == *src) {
|
||||
mState = eState_ESC_24;
|
||||
} else {
|
||||
@ -463,6 +490,7 @@ NS_IMETHODIMP nsISO2022CNToUnicode::Convert(const char * aSrc, PRInt32 * aSrcLen
|
||||
case eState_ESC_24_2B_I_ESC: // ESC $ + I ESC
|
||||
if(SS3 == *src) {
|
||||
mState = eState_CNS11643_3;
|
||||
mRunLength = 0;
|
||||
} else if('$' == *src) {
|
||||
mState = eState_ESC_24;
|
||||
} else {
|
||||
@ -482,6 +510,12 @@ NS_IMETHODIMP nsISO2022CNToUnicode::Convert(const char * aSrc, PRInt32 * aSrcLen
|
||||
case eState_CNS11643_3: // ESC $ + I ESC SS3
|
||||
if(SI == *src) { // Shift-In (SI)
|
||||
mState = eState_ESC_24_2B_I_ESC_SS3_SI;
|
||||
if (mRunLength == 0) {
|
||||
if(dest+1 >= destEnd)
|
||||
goto error1;
|
||||
*dest++ = 0xFFFD;
|
||||
}
|
||||
mRunLength = 0;
|
||||
} else if(ESC == *src) {
|
||||
mState = eState_ESC_24_2B_I_ESC;
|
||||
} else {
|
||||
@ -509,6 +543,7 @@ NS_IMETHODIMP nsISO2022CNToUnicode::Convert(const char * aSrc, PRInt32 * aSrcLen
|
||||
|
||||
aLen = destEnd - dest;
|
||||
rv = EUCTW_To_Unicode(cns, cnsLen, dest, &aLen);
|
||||
++mRunLength;
|
||||
if(rv == NS_OK_UDEC_MOREOUTPUT) {
|
||||
goto error1;
|
||||
} else if(NS_FAILED(rv)) {
|
||||
@ -540,6 +575,7 @@ NS_IMETHODIMP nsISO2022CNToUnicode::Convert(const char * aSrc, PRInt32 * aSrcLen
|
||||
case eState_ESC_24_2B_I_ESC_SS3_SI_ESC: // ESC $ + I ESC SS3 SI ESC
|
||||
if(SS3 == *src) {
|
||||
mState = eState_CNS11643_3;
|
||||
mRunLength = 0;
|
||||
} else if('$' == *src) {
|
||||
mState = eState_ESC_24;
|
||||
} else {
|
||||
|
@ -56,7 +56,8 @@ class nsISO2022CNToUnicode : public nsBasicDecoderSupport
|
||||
public:
|
||||
nsISO2022CNToUnicode() :
|
||||
mState(eState_ASCII),
|
||||
mPlaneID(0) { }
|
||||
mPlaneID(0),
|
||||
mRunLength(0) { }
|
||||
|
||||
virtual ~nsISO2022CNToUnicode() {}
|
||||
|
||||
@ -74,6 +75,7 @@ public:
|
||||
{
|
||||
mState = eState_ASCII;
|
||||
mPlaneID = 0;
|
||||
mRunLength = 0;
|
||||
|
||||
return NS_OK;
|
||||
}
|
||||
@ -118,6 +120,9 @@ private:
|
||||
// Plane number for CNS11643 code
|
||||
int mPlaneID;
|
||||
|
||||
// Length of non-ASCII run
|
||||
PRUint32 mRunLength;
|
||||
|
||||
// Decoder handler
|
||||
nsCOMPtr<nsIUnicodeDecoder> mGB2312_Decoder;
|
||||
nsCOMPtr<nsIUnicodeDecoder> mEUCTW_Decoder;
|
||||
|
@ -120,7 +120,7 @@ NS_IMETHODIMP nsShiftJISToUnicode::Convert(
|
||||
{
|
||||
|
||||
case 0:
|
||||
if(*src & 0x80 && *src != (unsigned char)0xa0)
|
||||
if(*src & 0x80)
|
||||
{
|
||||
mData = SJIS_INDEX[*src & 0x7F];
|
||||
if(mData < 0xE000 )
|
||||
@ -130,15 +130,33 @@ NS_IMETHODIMP nsShiftJISToUnicode::Convert(
|
||||
if( mData > 0xFF00)
|
||||
{
|
||||
if(0xFFFD == mData) {
|
||||
// IE convert fd-ff as single byte and convert to
|
||||
// U+f8f1 to U+f8f3
|
||||
if((0xfd == *src) || (0xfe == *src) || (0xff == *src))
|
||||
{
|
||||
*dest++ = (PRUnichar) 0xf8f1 +
|
||||
// IE-compatible handling of undefined codepoints:
|
||||
// 0x80 --> U+0080
|
||||
// 0xa0 --> U+F8F0
|
||||
// 0xfd --> U+F8F1
|
||||
// 0xfe --> U+F8F2
|
||||
// 0xff --> U+F8F3
|
||||
switch (*src) {
|
||||
case 0x80:
|
||||
*dest++ = (PRUnichar) *src;
|
||||
break;
|
||||
|
||||
case 0xa0:
|
||||
*dest++ = (PRUnichar) 0xf8f0;
|
||||
break;
|
||||
|
||||
case 0xfd:
|
||||
case 0xfe:
|
||||
case 0xff:
|
||||
*dest++ = (PRUnichar) 0xf8f1 +
|
||||
(*src - (unsigned char)(0xfd));
|
||||
if(dest >= destEnd)
|
||||
goto error1;
|
||||
break;
|
||||
|
||||
default:
|
||||
*dest++ = 0x30FB;
|
||||
}
|
||||
if(dest >= destEnd)
|
||||
goto error1;
|
||||
} else {
|
||||
*dest++ = mData; // JIS 0201
|
||||
if(dest >= destEnd)
|
||||
@ -517,10 +535,16 @@ NS_IMETHODIMP nsISO2022JPToUnicodeV2::Convert(
|
||||
case mState_ESC_28: // ESC (
|
||||
if( 'B' == *src) {
|
||||
mState = mState_ASCII;
|
||||
if (mRunLength == 0) {
|
||||
goto error2;
|
||||
}
|
||||
mRunLength = 0;
|
||||
} else if ('J' == *src) {
|
||||
mState = mState_JISX0201_1976Roman;
|
||||
mRunLength = 0;
|
||||
} else if ('I' == *src) {
|
||||
mState = mState_JISX0201_1976Kana;
|
||||
mRunLength = 0;
|
||||
} else {
|
||||
if((dest+3) >= destEnd)
|
||||
goto error1;
|
||||
@ -536,10 +560,13 @@ NS_IMETHODIMP nsISO2022JPToUnicodeV2::Convert(
|
||||
case mState_ESC_24: // ESC $
|
||||
if( '@' == *src) {
|
||||
mState = mState_JISX0208_1978;
|
||||
mRunLength = 0;
|
||||
} else if ('A' == *src) {
|
||||
mState = mState_GB2312_1980;
|
||||
mRunLength = 0;
|
||||
} else if ('B' == *src) {
|
||||
mState = mState_JISX0208_1983;
|
||||
mRunLength = 0;
|
||||
} else if ('(' == *src) {
|
||||
mState = mState_ESC_24_28;
|
||||
} else {
|
||||
@ -557,8 +584,10 @@ NS_IMETHODIMP nsISO2022JPToUnicodeV2::Convert(
|
||||
case mState_ESC_24_28: // ESC $ (
|
||||
if( 'C' == *src) {
|
||||
mState = mState_KSC5601_1987;
|
||||
mRunLength = 0;
|
||||
} else if ('D' == *src) {
|
||||
mState = mState_JISX0212_1990;
|
||||
mRunLength = 0;
|
||||
} else {
|
||||
if((dest+4) >= destEnd)
|
||||
goto error1;
|
||||
@ -583,6 +612,7 @@ NS_IMETHODIMP nsISO2022JPToUnicodeV2::Convert(
|
||||
// we may need a if statement here for '\' and '~'
|
||||
// to map them to Yen and Overbar
|
||||
*dest++ = (PRUnichar) *src;
|
||||
++mRunLength;
|
||||
if(dest >= destEnd)
|
||||
goto error1;
|
||||
}
|
||||
@ -595,6 +625,7 @@ NS_IMETHODIMP nsISO2022JPToUnicodeV2::Convert(
|
||||
} else {
|
||||
if((0x21 <= *src) && (*src <= 0x5F)) {
|
||||
*dest++ = (0xFF61-0x0021) + *src;
|
||||
++mRunLength;
|
||||
} else {
|
||||
goto error2;
|
||||
}
|
||||
@ -687,6 +718,7 @@ NS_IMETHODIMP nsISO2022JPToUnicodeV2::Convert(
|
||||
// XXX We need to map from JIS X 0208 1983 to 1987
|
||||
// in the next line before pass to *dest++
|
||||
*dest++ = gJapaneseMap[mData+off];
|
||||
++mRunLength;
|
||||
}
|
||||
mState = mState_JISX0208_1978;
|
||||
if(dest >= destEnd)
|
||||
@ -724,6 +756,7 @@ NS_IMETHODIMP nsISO2022JPToUnicodeV2::Convert(
|
||||
mGB2312Decoder->Convert((const char *)gb, &gbLen,
|
||||
&uni, &uniLen);
|
||||
*dest++ = uni;
|
||||
++mRunLength;
|
||||
}
|
||||
}
|
||||
mState = mState_GB2312_1980;
|
||||
@ -739,6 +772,7 @@ NS_IMETHODIMP nsISO2022JPToUnicodeV2::Convert(
|
||||
goto error2;
|
||||
} else {
|
||||
*dest++ = gJapaneseMap[mData+off];
|
||||
++mRunLength;
|
||||
}
|
||||
mState = mState_JISX0208_1983;
|
||||
if(dest >= destEnd)
|
||||
@ -776,6 +810,7 @@ NS_IMETHODIMP nsISO2022JPToUnicodeV2::Convert(
|
||||
mEUCKRDecoder->Convert((const char *)ksc, &kscLen,
|
||||
&uni, &uniLen);
|
||||
*dest++ = uni;
|
||||
++mRunLength;
|
||||
}
|
||||
}
|
||||
mState = mState_KSC5601_1987;
|
||||
@ -791,6 +826,7 @@ NS_IMETHODIMP nsISO2022JPToUnicodeV2::Convert(
|
||||
goto error2;
|
||||
} else {
|
||||
*dest++ = gJapaneseMap[mData+off];
|
||||
++mRunLength;
|
||||
}
|
||||
mState = mState_JISX0212_1990;
|
||||
if(dest >= destEnd)
|
||||
@ -824,6 +860,7 @@ NS_IMETHODIMP nsISO2022JPToUnicodeV2::Convert(
|
||||
if((0x20 <= *src) && (*src <= 0x7F)) {
|
||||
if (G2_ISO88591 == G2charset) {
|
||||
*dest++ = *src | 0x80;
|
||||
++mRunLength;
|
||||
} else if (G2_ISO88597 == G2charset) {
|
||||
if (!mISO88597Decoder) {
|
||||
// creating a delegate converter (ISO-8859-7)
|
||||
@ -845,6 +882,7 @@ NS_IMETHODIMP nsISO2022JPToUnicodeV2::Convert(
|
||||
mISO88597Decoder->Convert((const char *)&gr, &grLen,
|
||||
&uni, &uniLen);
|
||||
*dest++ = uni;
|
||||
++mRunLength;
|
||||
}
|
||||
} else {// G2charset is G2_unknown (not designated yet)
|
||||
goto error2;
|
||||
@ -864,6 +902,7 @@ NS_IMETHODIMP nsISO2022JPToUnicodeV2::Convert(
|
||||
|
||||
case mState_ERROR:
|
||||
mState = mLastLegalState;
|
||||
mRunLength = 0;
|
||||
goto error2;
|
||||
break;
|
||||
|
||||
|
@ -123,6 +123,7 @@ public:
|
||||
mState = mState_ASCII;
|
||||
mLastLegalState = mState_ASCII;
|
||||
mData = 0;
|
||||
mRunLength = 0;
|
||||
G2charset = G2_unknown;
|
||||
mGB2312Decoder = nsnull;
|
||||
mEUCKRDecoder = nsnull;
|
||||
@ -148,6 +149,7 @@ public:
|
||||
{
|
||||
mState = mState_ASCII;
|
||||
mLastLegalState = mState_ASCII;
|
||||
mRunLength = 0;
|
||||
setMapMode();
|
||||
return NS_OK;
|
||||
}
|
||||
@ -176,6 +178,7 @@ private:
|
||||
mState_ERROR
|
||||
} mState, mLastLegalState;
|
||||
PRInt32 mData;
|
||||
PRInt32 mRunLength; // the length of a non-ASCII run
|
||||
enum {
|
||||
G2_unknown,
|
||||
G2_ISO88591,
|
||||
|
@ -59,6 +59,7 @@ NS_IMETHODIMP nsISO2022KRToUnicode::Convert(const char * aSrc, PRInt32 * aSrcLen
|
||||
}
|
||||
else if(0x0e == *src) { // Shift-Out
|
||||
mState = mState_KSX1001_1992;
|
||||
mRunLength = 0;
|
||||
}
|
||||
else if(*src & 0x80) {
|
||||
*dest++ = 0xFFFD;
|
||||
@ -103,6 +104,12 @@ NS_IMETHODIMP nsISO2022KRToUnicode::Convert(const char * aSrc, PRInt32 * aSrcLen
|
||||
mState = mLastLegalState;
|
||||
if('C' == *src) {
|
||||
mState = mState_ASCII;
|
||||
if (mRunLength == 0) {
|
||||
if(dest+1 >= destEnd)
|
||||
goto error1;
|
||||
*dest++ = 0xFFFD;
|
||||
}
|
||||
mRunLength = 0;
|
||||
}
|
||||
else {
|
||||
if((dest+4) >= destEnd)
|
||||
@ -122,11 +129,18 @@ NS_IMETHODIMP nsISO2022KRToUnicode::Convert(const char * aSrc, PRInt32 * aSrcLen
|
||||
}
|
||||
else if (0x0f == *src) { // Shift-In (SI)
|
||||
mState = mState_ASCII;
|
||||
if (mRunLength == 0) {
|
||||
if(dest+1 >= destEnd)
|
||||
goto error1;
|
||||
*dest++ = 0xFFFD;
|
||||
}
|
||||
mRunLength = 0;
|
||||
}
|
||||
else if ((PRUint8) *src == 0x20 || (PRUint8) *src == 0x09) {
|
||||
// Allow space and tab between SO and SI (i.e. in Hangul segment)
|
||||
mState = mState_KSX1001_1992;
|
||||
*dest++ = (PRUnichar) *src;
|
||||
++mRunLength;
|
||||
if(dest >= destEnd)
|
||||
goto error1;
|
||||
}
|
||||
@ -164,6 +178,7 @@ NS_IMETHODIMP nsISO2022KRToUnicode::Convert(const char * aSrc, PRInt32 * aSrcLen
|
||||
// Convert EUC-KR to unicode.
|
||||
mEUCKRDecoder->Convert((const char *)ksx, &ksxLen, &uni, &uniLen);
|
||||
*dest++ = uni;
|
||||
++mRunLength;
|
||||
}
|
||||
if(dest >= destEnd)
|
||||
goto error1;
|
||||
|
@ -50,6 +50,7 @@ public:
|
||||
mLastLegalState = mState_ASCII;
|
||||
mData = 0;
|
||||
mEUCKRDecoder = nsnull;
|
||||
mRunLength = 0;
|
||||
}
|
||||
|
||||
virtual ~nsISO2022KRToUnicode()
|
||||
@ -71,6 +72,7 @@ public:
|
||||
{
|
||||
mState = mState_ASCII;
|
||||
mLastLegalState = mState_ASCII;
|
||||
mRunLength = 0;
|
||||
return NS_OK;
|
||||
}
|
||||
|
||||
@ -87,6 +89,9 @@ private:
|
||||
|
||||
PRUint8 mData;
|
||||
|
||||
// Length of non-ASCII run
|
||||
PRUint32 mRunLength;
|
||||
|
||||
nsIUnicodeDecoder *mEUCKRDecoder;
|
||||
};
|
||||
#endif // nsISO2022KRToUnicode_h__
|
||||
|
@ -787,8 +787,6 @@ PRIVATE PRBool uCheckAndScan4BytesGB18030(
|
||||
(in[2] - 0x81)) * 10 ) + (in[3] - 0x30);
|
||||
|
||||
*inscanlen = 4;
|
||||
if(data >= 0x00010000)
|
||||
return PR_FALSE;
|
||||
*out = (PRUint16) data;
|
||||
*out = (data < 0x00010000) ? data : 0xFFFD;
|
||||
return PR_TRUE;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user