add composed Hangul support

This commit is contained in:
ftang%netscape.com 1999-07-16 13:03:32 +00:00
parent 02afed6d74
commit 04317bc727
4 changed files with 300 additions and 0 deletions

View File

@ -172,6 +172,15 @@ PRIVATE PRBool uCheckAndGenAlways1ByteShiftGL(
PRUint32* outlen
);
PRIVATE PRBool uCnGAlways8BytesComposedHangul(
uShiftTable *shift,
PRInt32* state,
PRUint16 in,
unsigned char* out,
PRUint32 outbuflen,
PRUint32* outlen
);
PRIVATE PRBool uGenAlways2Byte(
PRUint16 in,
unsigned char* out
@ -216,6 +225,7 @@ PRIVATE uGeneratorFunc m_generator[uNumOfCharsetType] =
uCheckAndGen2ByteGRPrefix8EA6,
uCheckAndGen2ByteGRPrefix8EA7,
uCheckAndGenAlways1ByteShiftGL,
uCnGAlways8BytesComposedHangul
};
/*=================================================================================
@ -677,3 +687,54 @@ PRIVATE PRBool uCheckAndGenAlways1ByteShiftGL(
return PR_TRUE;
}
}
#define SBase 0xAC00
#define LCount 19
#define VCount 21
#define TCount 28
#define NCount (VCount * TCount)
/*=================================================================================
=================================================================================*/
PRIVATE PRBool uCnGAlways8BytesComposedHangul(
uShiftTable *shift,
PRInt32* state,
PRUint16 in,
unsigned char* out,
PRUint32 outbuflen,
PRUint32* outlen
)
{
if(outbuflen < 8)
return PR_FALSE;
else
{
static PRUint8 lMap[LCount] = {
0xa1, 0xa2, 0xa4, 0xa7, 0xa8, 0xa9, 0xb1, 0xb2, 0xb3, 0xb5,
0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe
};
static PRUint8 tMap[TCount] = {
0xd4, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa9, 0xaa,
0xab, 0xac, 0xad, 0xae, 0xaf, 0xb0, 0xb1, 0xb2, 0xb4, 0xb5,
0xb6, 0xb7, 0xb8, 0xba, 0xbb, 0xbc, 0xbd, 0xbe
};
PRUint16 SIndex, LIndex, VIndex, TIndex;
/* the following line are copy from Unicode 2.0 page 3-13 */
/* item 1 of Hangul Syllabel Decomposition */
SIndex = in - SBase;
/* the following lines are copy from Unicode 2.0 page 3-14 */
/* item 2 of Hangul Syllabel Decomposition w/ modification */
LIndex = SIndex / NCount;
VIndex = (SIndex % NCount) / TCount;
TIndex = SIndex / TCount;
*outlen = 8;
out[0] = out[2] = out[4] = out[6] = 0xa4;
out[1] = 0xd4;
out[2] = lMap[LIndex];
out[4] = VIndex + 0xbf;
out[6] = tMap[TIndex];
return PR_TRUE;
}
}

View File

@ -165,6 +165,14 @@ PRIVATE PRBool uCheckAndScanAlways1ByteShiftGL(
PRUint32 inbuflen,
PRUint32* inscanlen
);
PRIVATE PRBool uCnSAlways8BytesComposedHangul(
uShiftTable *shift,
PRInt32* state,
unsigned char *in,
PRUint16 *out,
PRUint32 inbuflen,
PRUint32* inscanlen
);
PRIVATE PRBool uScanAlways2Byte(
unsigned char* in,
@ -210,6 +218,7 @@ PRIVATE uScannerFunc m_scanner[uNumOfCharsetType] =
uCheckAndScan2ByteGRPrefix8EA6,
uCheckAndScan2ByteGRPrefix8EA7,
uCheckAndScanAlways1ByteShiftGL,
uCnSAlways8BytesComposedHangul
};
/*=================================================================================
@ -637,3 +646,83 @@ PRIVATE PRBool uCheckAndScanAlways1ByteShiftGL(
return PR_TRUE;
}
/*=================================================================================
=================================================================================*/
#define SBase 0xAC00
#define SCount 11172
#define LCount 19
#define VCount 21
#define TCount 28
#define NCount (VCount * TCount)
PRIVATE PRBool uCnSAlways8BytesComposedHangul(
uShiftTable *shift,
PRInt32* state,
unsigned char *in,
PRUint16 *out,
PRUint32 inbuflen,
PRUint32* inscanlen
)
{
PRUint16 LIndex, VIndex, TIndex;
/* no 8 bytes, not in a4 range, or the first 2 byte are not a4d4 */
if((inbuflen < 8) || (0xa4 != in[0]) || (0xd4 != in[1]) ||
(0xa4 != in[2] ) || (0xa4 != in[4]) || (0xa4 != in[6]))
return PR_FALSE;
/* Compute LIndex */
if((in[3] < 0xa1) && (in[3] > 0xbe)) { /* illegal leading consonant */
return PR_FALSE;
} else {
static PRUint8 lMap[] = {
/* A1 A2 A3 A4 A5 A6 A7 */
0, 1,0xff, 2,0xff,0xff, 3,
/* A8 A9 AA AB AC AD AE AF */
4, 5,0xff,0xff,0xff,0xff,0xff,0xff,
/* B0 B1 B2 B3 B4 B5 B6 B7 */
0xff, 6, 7, 8,0xff, 9, 10, 11,
/* B8 B9 BA BB BC BD BE */
12, 13, 14, 15, 16, 17, 18
};
LIndex = lMap[in[3] - 0xa1];
if(0xff == (0xff & LIndex))
return PR_FALSE;
}
/* Compute VIndex */
if((in[5] < 0xbf) && (in[5] > 0xd3)) { /* illegal medial vowel */
return PR_FALSE;
} else {
VIndex = in[5] - 0xbf;
}
/* Compute TIndex */
if(0xd4 == in[7])
{
TIndex = 0;
} else if((in[7] < 0xa1) && (in[7] > 0xbe)) {/* illegal trailling consonant */
return PR_FALSE;
} else {
static PRUint8 tMap[] = {
/* A1 A2 A3 A4 A5 A6 A7 */
1, 2, 3, 4, 5, 6, 7,
/* A8 A9 AA AB AC AD AE AF */
0xff, 8, 9, 10, 11, 12, 13, 14,
/* B0 B1 B2 B3 B4 B5 B6 B7 */
15, 16, 17,0xff, 18, 19, 20, 21,
/* B8 B9 BA BB BC BD BE */
22,0xff, 23, 24, 25, 26, 27
};
TIndex = tMap[in[3] - 0xa1];
if(0xff == (0xff & TIndex))
return PR_FALSE;
}
*inscanlen = 8;
/* the following line is from Unicode 2.0 page 3-13 item 5 */
*out = ( LIndex * VCount + VIndex) * TCount + TIndex + SBase;
return PR_TRUE;
}

View File

@ -172,6 +172,15 @@ PRIVATE PRBool uCheckAndGenAlways1ByteShiftGL(
PRUint32* outlen
);
PRIVATE PRBool uCnGAlways8BytesComposedHangul(
uShiftTable *shift,
PRInt32* state,
PRUint16 in,
unsigned char* out,
PRUint32 outbuflen,
PRUint32* outlen
);
PRIVATE PRBool uGenAlways2Byte(
PRUint16 in,
unsigned char* out
@ -216,6 +225,7 @@ PRIVATE uGeneratorFunc m_generator[uNumOfCharsetType] =
uCheckAndGen2ByteGRPrefix8EA6,
uCheckAndGen2ByteGRPrefix8EA7,
uCheckAndGenAlways1ByteShiftGL,
uCnGAlways8BytesComposedHangul
};
/*=================================================================================
@ -677,3 +687,54 @@ PRIVATE PRBool uCheckAndGenAlways1ByteShiftGL(
return PR_TRUE;
}
}
#define SBase 0xAC00
#define LCount 19
#define VCount 21
#define TCount 28
#define NCount (VCount * TCount)
/*=================================================================================
=================================================================================*/
PRIVATE PRBool uCnGAlways8BytesComposedHangul(
uShiftTable *shift,
PRInt32* state,
PRUint16 in,
unsigned char* out,
PRUint32 outbuflen,
PRUint32* outlen
)
{
if(outbuflen < 8)
return PR_FALSE;
else
{
static PRUint8 lMap[LCount] = {
0xa1, 0xa2, 0xa4, 0xa7, 0xa8, 0xa9, 0xb1, 0xb2, 0xb3, 0xb5,
0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe
};
static PRUint8 tMap[TCount] = {
0xd4, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa9, 0xaa,
0xab, 0xac, 0xad, 0xae, 0xaf, 0xb0, 0xb1, 0xb2, 0xb4, 0xb5,
0xb6, 0xb7, 0xb8, 0xba, 0xbb, 0xbc, 0xbd, 0xbe
};
PRUint16 SIndex, LIndex, VIndex, TIndex;
/* the following line are copy from Unicode 2.0 page 3-13 */
/* item 1 of Hangul Syllabel Decomposition */
SIndex = in - SBase;
/* the following lines are copy from Unicode 2.0 page 3-14 */
/* item 2 of Hangul Syllabel Decomposition w/ modification */
LIndex = SIndex / NCount;
VIndex = (SIndex % NCount) / TCount;
TIndex = SIndex / TCount;
*outlen = 8;
out[0] = out[2] = out[4] = out[6] = 0xa4;
out[1] = 0xd4;
out[2] = lMap[LIndex];
out[4] = VIndex + 0xbf;
out[6] = tMap[TIndex];
return PR_TRUE;
}
}

View File

@ -165,6 +165,14 @@ PRIVATE PRBool uCheckAndScanAlways1ByteShiftGL(
PRUint32 inbuflen,
PRUint32* inscanlen
);
PRIVATE PRBool uCnSAlways8BytesComposedHangul(
uShiftTable *shift,
PRInt32* state,
unsigned char *in,
PRUint16 *out,
PRUint32 inbuflen,
PRUint32* inscanlen
);
PRIVATE PRBool uScanAlways2Byte(
unsigned char* in,
@ -210,6 +218,7 @@ PRIVATE uScannerFunc m_scanner[uNumOfCharsetType] =
uCheckAndScan2ByteGRPrefix8EA6,
uCheckAndScan2ByteGRPrefix8EA7,
uCheckAndScanAlways1ByteShiftGL,
uCnSAlways8BytesComposedHangul
};
/*=================================================================================
@ -637,3 +646,83 @@ PRIVATE PRBool uCheckAndScanAlways1ByteShiftGL(
return PR_TRUE;
}
/*=================================================================================
=================================================================================*/
#define SBase 0xAC00
#define SCount 11172
#define LCount 19
#define VCount 21
#define TCount 28
#define NCount (VCount * TCount)
PRIVATE PRBool uCnSAlways8BytesComposedHangul(
uShiftTable *shift,
PRInt32* state,
unsigned char *in,
PRUint16 *out,
PRUint32 inbuflen,
PRUint32* inscanlen
)
{
PRUint16 LIndex, VIndex, TIndex;
/* no 8 bytes, not in a4 range, or the first 2 byte are not a4d4 */
if((inbuflen < 8) || (0xa4 != in[0]) || (0xd4 != in[1]) ||
(0xa4 != in[2] ) || (0xa4 != in[4]) || (0xa4 != in[6]))
return PR_FALSE;
/* Compute LIndex */
if((in[3] < 0xa1) && (in[3] > 0xbe)) { /* illegal leading consonant */
return PR_FALSE;
} else {
static PRUint8 lMap[] = {
/* A1 A2 A3 A4 A5 A6 A7 */
0, 1,0xff, 2,0xff,0xff, 3,
/* A8 A9 AA AB AC AD AE AF */
4, 5,0xff,0xff,0xff,0xff,0xff,0xff,
/* B0 B1 B2 B3 B4 B5 B6 B7 */
0xff, 6, 7, 8,0xff, 9, 10, 11,
/* B8 B9 BA BB BC BD BE */
12, 13, 14, 15, 16, 17, 18
};
LIndex = lMap[in[3] - 0xa1];
if(0xff == (0xff & LIndex))
return PR_FALSE;
}
/* Compute VIndex */
if((in[5] < 0xbf) && (in[5] > 0xd3)) { /* illegal medial vowel */
return PR_FALSE;
} else {
VIndex = in[5] - 0xbf;
}
/* Compute TIndex */
if(0xd4 == in[7])
{
TIndex = 0;
} else if((in[7] < 0xa1) && (in[7] > 0xbe)) {/* illegal trailling consonant */
return PR_FALSE;
} else {
static PRUint8 tMap[] = {
/* A1 A2 A3 A4 A5 A6 A7 */
1, 2, 3, 4, 5, 6, 7,
/* A8 A9 AA AB AC AD AE AF */
0xff, 8, 9, 10, 11, 12, 13, 14,
/* B0 B1 B2 B3 B4 B5 B6 B7 */
15, 16, 17,0xff, 18, 19, 20, 21,
/* B8 B9 BA BB BC BD BE */
22,0xff, 23, 24, 25, 26, 27
};
TIndex = tMap[in[3] - 0xa1];
if(0xff == (0xff & TIndex))
return PR_FALSE;
}
*inscanlen = 8;
/* the following line is from Unicode 2.0 page 3-13 item 5 */
*out = ( LIndex * VCount + VIndex) * TCount + TIndex + SBase;
return PR_TRUE;
}