From 2a1d3f7665d48715fa51c6aff2a2782fd4fdd317 Mon Sep 17 00:00:00 2001 From: "jshin%mailaps.org" Date: Thu, 18 Jul 2002 02:40:19 +0000 Subject: [PATCH] Combining EUC-KR and UHC decoders into one (bug 131388) r=yokoyama, sr=alecf, a=scc --- intl/uconv/ucvko/nsCP949ToUnicode.cpp | 18 +++++-- intl/uconv/ucvko/nsEUCKRToUnicode.cpp | 74 +-------------------------- intl/uconv/ucvko/nsEUCKRToUnicode.h | 12 ++--- 3 files changed, 19 insertions(+), 85 deletions(-) diff --git a/intl/uconv/ucvko/nsCP949ToUnicode.cpp b/intl/uconv/ucvko/nsCP949ToUnicode.cpp index 3333c57c631d..55bfdb862043 100644 --- a/intl/uconv/ucvko/nsCP949ToUnicode.cpp +++ b/intl/uconv/ucvko/nsCP949ToUnicode.cpp @@ -19,7 +19,7 @@ * Portions created by the Initial Developer are Copyright (C) 1998 * the Initial Developer. All Rights Reserved. * - * Contributor(s): + * Contributor(s): Jungshik Shin * * * Alternatively, the contents of this file may be used under the terms of @@ -47,6 +47,14 @@ static const PRUint16 g_ASCIIShiftTable[] = { ShiftCell(0,0,0,0,0,0,0,0) }; +// This is necessary to decode 8byte sequence representation of Hangul +// syllables. This representation is uniq to EUC-KR and is not used +// in CP949. However, this conveter is for both EUC-KR and CP949 +// so that this shift table is put here. See bug 131388. +static const PRUint16 g_DecomposedHangulShiftTable[] = { + 0, uDecomposedHangulCharset, + ShiftCell(0, 0, 0, 0, 0, 0, 0, 0), +}; static const PRUint16 g_EUCKRShiftTable[] = { 0, u2BytesGRCharset, @@ -70,14 +78,17 @@ static const PRUint16 g_utCP949NoKSCHangulMapping[] = { static const uRange g_CP949Ranges[] = { { 0x00, 0x7E }, + { 0xA4, 0xA4 }, // 8byte seq. for Hangul syllables not available + // in pre-composed form in KS X 1001 { 0xA1, 0xFE }, - { 0xA1, 0xFE }, + { 0xA1, 0xC6 }, // CP949 extension B. ends at 0xC6. { 0x80, 0xA0 } }; static const PRUint16 *g_CP949ShiftTableSet [] = { g_ASCIIShiftTable, + g_DecomposedHangulShiftTable, g_EUCKRShiftTable, g_CP949HighShiftTable, g_CP949LowShiftTable @@ -85,6 +96,7 @@ static const PRUint16 *g_CP949ShiftTableSet [] = { static const PRUint16 *g_CP949MappingTableSet [] ={ g_AsciiMapping, + g_HangulNullMapping, g_utKSC5601Mapping, g_utCP949NoKSCHangulMapping, g_utCP949NoKSCHangulMapping @@ -97,7 +109,7 @@ static const PRUint16 *g_CP949MappingTableSet [] ={ // Class nsCP949ToUnicode [implementation] nsCP949ToUnicode::nsCP949ToUnicode() -: nsMultiTableDecoderSupport(4, +: nsMultiTableDecoderSupport(sizeof(g_CP949Ranges) / sizeof(g_CP949Ranges[0]), (uRange*) &g_CP949Ranges, (uShiftTable**) &g_CP949ShiftTableSet, (uMappingTable**) &g_CP949MappingTableSet) diff --git a/intl/uconv/ucvko/nsEUCKRToUnicode.cpp b/intl/uconv/ucvko/nsEUCKRToUnicode.cpp index 22ad29084950..3df47f3b3b4a 100644 --- a/intl/uconv/ucvko/nsEUCKRToUnicode.cpp +++ b/intl/uconv/ucvko/nsEUCKRToUnicode.cpp @@ -39,82 +39,10 @@ #include "nsEUCKRToUnicode.h" #include "nsUCvKODll.h" -//---------------------------------------------------------------------- -// Global functions and data [declaration] - -static const PRUint16 g_ASCIIShiftTable[] = { - 0, u1ByteCharset, - ShiftCell(0,0,0,0,0,0,0,0) -}; - -#ifdef MS_EUCKR -static const PRUint16 g_EUCKRMappingTable[] = { -#include "u20ksc.ut" -}; - -static const PRUint16 g_EUCKRShiftTable[] = { - 0, u2BytesCharset, - ShiftCell(0, 0, 0, 0, 0, 0, 0, 0) -}; - -static const uRange g_EUCKRRanges[] = { - { 0x00, 0x7E }, - { 0x81, 0xFE } -}; -#else - -static const PRUint16 g_EUCKRShiftTable[] = { - 0, u2BytesGRCharset, - ShiftCell(0, 0, 0, 0, 0, 0, 0, 0) -}; - -static const uRange g_EUCKRRanges[] = { - { 0x00, 0x7E }, - { 0xA4, 0xA4 }, // 8byte seq. for Hangul syllables not available - // in pre-composed form in KS X 1001 - { 0xA1, 0xFE } -}; -#endif - -static const PRUint16 g_DecomposedHangulShiftTable[] = { - 0, uDecomposedHangulCharset, - ShiftCell(0, 0, 0, 0, 0, 0, 0, 0), -}; - - - -static const PRUint16 *g_EUCKRShiftTableSet [] = { - g_ASCIIShiftTable, - g_DecomposedHangulShiftTable, - g_EUCKRShiftTable -}; - -static const PRUint16 *g_EUCKRMappingTableSet [] ={ - g_AsciiMapping, - g_HangulNullMapping, - g_utKSC5601Mapping -}; - - //---------------------------------------------------------------------- // Class nsEUCKRToUnicode [implementation] nsEUCKRToUnicode::nsEUCKRToUnicode() -: nsMultiTableDecoderSupport(3, - (uRange*) &g_EUCKRRanges, - (uShiftTable**) &g_EUCKRShiftTableSet, - (uMappingTable**) &g_EUCKRMappingTableSet) +: nsCP949ToUnicode() { } - -//---------------------------------------------------------------------- -// Subclassing of nsTablesDecoderSupport class [implementation] - -NS_IMETHODIMP nsEUCKRToUnicode::GetMaxLength(const char * aSrc, - PRInt32 aSrcLength, - PRInt32 * aDestLength) -{ - // we are a single byte to Unicode converter, so... - *aDestLength = aSrcLength; - return NS_OK_UDEC_EXACTLENGTH; -} diff --git a/intl/uconv/ucvko/nsEUCKRToUnicode.h b/intl/uconv/ucvko/nsEUCKRToUnicode.h index 309b05200a6c..5fb27091c22a 100644 --- a/intl/uconv/ucvko/nsEUCKRToUnicode.h +++ b/intl/uconv/ucvko/nsEUCKRToUnicode.h @@ -40,6 +40,7 @@ #define nsEUCKRToUnicode_h___ #include "nsUCvKOSupport.h" +#include "nsCP949ToUnicode.h" //---------------------------------------------------------------------- // Class nsEUCKRToUnicode [declaration] @@ -50,7 +51,8 @@ * @created 06/Apr/1999 * @author Catalin Rotaru [CATA] */ -class nsEUCKRToUnicode : public nsMultiTableDecoderSupport +// Just make it an alias to CP949 decoder. bug 131388 +class nsEUCKRToUnicode : public nsCP949ToUnicode { public: @@ -58,14 +60,6 @@ public: * Class constructor. */ nsEUCKRToUnicode(); - -protected: - - //-------------------------------------------------------------------- - // Subclassing of nsDecoderSupport class [declaration] - - NS_IMETHOD GetMaxLength(const char * aSrc, PRInt32 aSrcLength, - PRInt32 * aDestLength); }; #endif /* nsEUCKRToUnicode_h___ */