/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- * * The contents of this file are subject to the Netscape Public License * Version 1.0 (the "NPL"); you may not use this file except in * compliance with the NPL. You may obtain a copy of the NPL at * http://www.mozilla.org/NPL/ * * Software distributed under the NPL is distributed on an "AS IS" basis, * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL * for the specific language governing rights and limitations under the * NPL. * * The Initial Developer of this code under the NPL is Netscape * Communications Corporation. Portions created by Netscape are * Copyright (C) 1998 Netscape Communications Corporation. All Rights * Reserved. */ /*---------------------------------------------------------------------------- Function UCS2ToValueAndInfo ----------------------------------------------------------------------------*/ #include "intlpriv.h" #include "ugen.h" #include "umap.h" #include "csid.h" #include "xp_mem.h" #include "xpassert.h" #include "unicpriv.h" #include "libi18n.h" #if defined(XP_WIN) || defined(XP_OS2) #include "prlink.h" #endif /* The following table is moved from npc.c npctocsid[] I rename it and try to get ride of npc.c */ PRIVATE int16 csidtable[MAXCSIDINTBL] = { /* 0x00 - 0x0F*/ CS_DEFAULT, CS_ASCII, CS_LATIN1, CS_JIS, CS_SJIS, CS_EUCJP, CS_MAC_ROMAN, CS_BIG5, CS_GB_8BIT, CS_CNS_8BIT, CS_LATIN2, CS_MAC_CE, CS_KSC_8BIT, CS_2022_KR, CS_8859_3, CS_8859_4, /* 0x10 - 0x1F*/ CS_8859_5, CS_8859_6, CS_8859_7, CS_8859_8, CS_8859_9, CS_SYMBOL, CS_DINGBATS, CS_DECTECH, CS_CNS11643_1, CS_CNS11643_2, CS_JISX0208, CS_JISX0201, CS_KSC5601, CS_TIS620, CS_JISX0212, CS_GB2312, /* 0x20 - 0x2F*/ CS_UNKNOWN, /* CS_UCS2 */ CS_UNKNOWN, /* CS_UCS4 */ CS_UNKNOWN, /* CS_UTF8 */ CS_UNKNOWN, /* CS_UTF7 */ CS_UNKNOWN, /* CS_NPC */ CS_X_BIG5, CS_UNKNOWN, /* USRDEF2 */ CS_KOI8_R, CS_MAC_CYRILLIC, CS_CP_1251, CS_MAC_GREEK, CS_CP_1253, CS_CP_1250, CS_CP_1254, CS_MAC_TURKISH, CS_UNKNOWN, /* CS_GB2312_11 */ /* 0x30 - 0x3F*/ CS_UNKNOWN, /* CS_JISX0208_11 */ CS_UNKNOWN, /* CS_KSC5601_11 */ CS_UNKNOWN, /* CS_CNS11643_1110 */ CS_UNKNOWN, /* CS_UCS2_SWAP */ CS_UNKNOWN, /* CS_IMAP4_UTF7 */ CS_CP_850, CS_CP_852, CS_CP_855, CS_CP_857, CS_CP_862, CS_CP_864, CS_CP_866, CS_CP_874, CS_CP_1257, CS_CP_1258, CS_ARMSCII8, /* 0x40 - 0x4F*/ CS_HZ, CS_ISO_2022_CN, CS_GB13000, CS_BIG5_PLUS, CS_UHC, CS_CNS11643_3, CS_CNS11643_4, CS_CNS11643_5, CS_CNS11643_6, CS_CNS11643_7, CS_CNS11643_8, CS_CNS11643_9, CS_CNS11643_10, CS_CNS11643_11, CS_CNS11643_12, CS_CNS11643_13, /* 0x50 - 0x5F*/ CS_CNS11643_14, CS_CNS11643_15, CS_CNS11643_16, CS_VIET_VISCII, CS_VIET_VIQR, CS_KOI8_U, CS_ISO_IR_111, CS_8859_6_I, CS_8859_6_E, CS_8859_8_I, CS_8859_8_E, CS_JOHAB, CS_JOHABFONT, CS_VIET_VPS, CS_VIET_TCVN, CS_VIET_VNI, /* 0x60 - 0x6F*/ CS_T61, CS_UNKNOWN, CS_UNKNOWN, CS_UNKNOWN, CS_UNKNOWN, CS_UNKNOWN, CS_UNKNOWN, CS_UNKNOWN, CS_UNKNOWN, CS_UNKNOWN, CS_UNKNOWN, CS_UNKNOWN, CS_UNKNOWN, CS_UNKNOWN, CS_UNKNOWN, CS_UNKNOWN, /* 0x70 - 0x7F*/ CS_UNKNOWN, CS_UNKNOWN, CS_UNKNOWN, CS_UNKNOWN, CS_UNKNOWN, CS_UNKNOWN, CS_UNKNOWN, CS_UNKNOWN, CS_UNKNOWN, CS_UNKNOWN, CS_UNKNOWN, CS_UNKNOWN, CS_UNKNOWN, CS_UNKNOWN, CS_UNKNOWN, CS_UNKNOWN, }; #define intl_GetValidCSID(fb) (csidtable[(fb) & (MAXCSIDINTBL - 1)]) /* Our global table are deivded into 256 row each row have 256 entries each entry have one value and one info Info field contains csid index */ typedef struct { uint16 value[256]; unsigned char info[256]; } uRowTable; PRIVATE uRowTable *uRowTablePtArray[256]; PRIVATE uTable* LoadToUCS2Table(uint16 csid); PRIVATE void UnloadToUCS2Table(uint16 csid, uTable *utblPtr); PRIVATE uTable* LoadFromUCS2Table(uint16 csid); PRIVATE void UnloadFromUCS2Table(uint16 csid, uTable *utblPtr); PRIVATE void CheckAndAddEntry(uint16 ucs2, uint16 med , uint16 csid); PRIVATE XP_Bool UCS2ToValueAndInfo(uint16 ucs2, uint16* med, unsigned char* info); PRIVATE void InitUCS2Table(void); /* UCS2 Table- is build into the navigator */ PRIVATE uint16 Ucs2Tbl[] = { 0x0001, 0x0004, 0x0005, 0x0008, 0x0000, 0x0000, 0xFFFF, 0x0000 }; PRIVATE uint16 asciiTbl[] = { 0x0001, 0x0004, 0x0005, 0x0008, 0x0000, 0x0000, 0x007F, 0x0000 }; #ifdef XP_UNIX /* Currently, we only support the Latin 1 and Japanese Table. */ /* We will add more table here after the first run */ /*--------------------------------------------------------------------------*/ /* Latin stuff */ PRIVATE uint16 iso88591FromTbl[] = { #include "8859-1.uf" }; PRIVATE uint16 iso88591ToTbl[] = { #include "8859-1.ut" }; /*--------------------------------------------------------------------------*/ PRIVATE uint16 iso88595FromTbl[] = { #include "8859-5.uf" }; PRIVATE uint16 iso88595ToTbl[] = { #include "8859-5.ut" }; /*--------------------------------------------------------------------------*/ PRIVATE uint16 iso88597FromTbl[] = { #include "8859-7.uf" }; PRIVATE uint16 iso88597ToTbl[] = { #include "8859-7.ut" }; /*--------------------------------------------------------------------------*/ PRIVATE uint16 iso88599FromTbl[] = { #include "8859-9.uf" }; PRIVATE uint16 iso88599ToTbl[] = { #include "8859-9.ut" }; /*--------------------------------------------------------------------------*/ PRIVATE uint16 koi8uFromTbl[] = { #include "koi8u.uf" }; PRIVATE uint16 koi8uToTbl[] = { #include "koi8u.ut" }; /*--------------------------------------------------------------------------*/ PRIVATE uint16 koi8rFromTbl[] = { #include "koi8r.uf" }; PRIVATE uint16 koi8rToTbl[] = { #include "koi8r.ut" }; PRIVATE uint16 cp1251FromTbl[] = { #include "cp1251.uf" }; PRIVATE uint16 cp1251ToTbl[] = { #include "cp1251.ut" }; /*--------------------------------------------------------------------------*/ /* ArmSCII */ PRIVATE uint16 armsciiFromTbl[] = { #include "armscii.uf" }; PRIVATE uint16 armsciiToTbl[] = { #include "armscii.ut" }; /*--------------------------------------------------------------------------*/ /* Japanese stuff */ PRIVATE uint16 JIS0208FromTbl[] = { #include "jis0208.uf" }; PRIVATE uint16 JIS0208ToTbl[] = { #include "jis0208.ut" }; PRIVATE uint16 JIS0201FromTbl[] = { #include "jis0201.uf" }; PRIVATE uint16 JIS0201ToTbl[] = { #include "jis0201.ut" }; PRIVATE uint16 JIS0212FromTbl[] = { #include "jis0212.uf" }; PRIVATE uint16 JIS0212ToTbl[] = { #include "jis0212.ut" }; PRIVATE uint16 SJISFromTbl[] = { #include "sjis.uf" }; PRIVATE uint16 SJISToTbl[] = { #include "sjis.ut" }; /*--------------------------------------------------------------------------*/ /* Latin2 Stuff */ PRIVATE uint16 iso88592FromTbl[] = { #include "8859-2.uf" }; PRIVATE uint16 iso88592ToTbl[] = { #include "8859-2.ut" }; /*--------------------------------------------------------------------------*/ /* Traditional Chinese Stuff */ PRIVATE uint16 CNS11643_1FromTbl[] = { #include "cns_1.uf" }; PRIVATE uint16 CNS11643_1ToTbl[] = { #include "cns_1.ut" }; PRIVATE uint16 CNS11643_2FromTbl[] = { #include "cns_2.uf" }; PRIVATE uint16 CNS11643_2ToTbl[] = { #include "cns_2.ut" }; PRIVATE uint16 Big5FromTbl[] = { #include "big5.uf" }; PRIVATE uint16 Big5ToTbl[] = { #include "big5.ut" }; /*--------------------------------------------------------------------------*/ /* Simplified Chinese Stuff */ PRIVATE uint16 GB2312FromTbl[] = { #include "gb2312.uf" }; PRIVATE uint16 GB2312ToTbl[] = { #include "gb2312.ut" }; /*--------------------------------------------------------------------------*/ /* Korean Stuff */ /* For UNIX the Korean UNICODE 2.0 table is u20kscgl.u[tf] They are GL base table that contains minimun set of Korean table that the UNIX actually can handle. */ PRIVATE uint16 KSC5601FromTbl[] = { #include "u20kscgl.uf" }; PRIVATE uint16 KSC5601ToTbl[] = { #include "u20kscgl.ut" }; /*--------------------------------------------------------------------------*/ /* Symbol Stuff */ PRIVATE uint16 SymbolFromTbl[] = { #include "macsymbo.uf" }; PRIVATE uint16 SymbolToTbl[] = { #include "macsymbo.ut" }; /*--------------------------------------------------------------------------*/ /* Dingbats Stuff */ PRIVATE uint16 DingbatsFromTbl[] = { #include "macdingb.uf" }; PRIVATE uint16 DingbatsToTbl[] = { #include "macdingb.ut" }; /*--------------------------------------------------------------------------*/ /* T61 Stuff */ PRIVATE uint16 T61FromTbl[] = { #include "t61.uf" }; PRIVATE uint16 T61ToTbl[] = { #include "t61.ut" }; /*--------------------------------------------------------------------------*/ /* Thai */ PRIVATE uint16 TIS620FromTbl[] = { #include "cp874.uf" /* use cp874 untill we have real TIS 620 table */ }; PRIVATE uint16 TIS620ToTbl[] = { #include "cp874.ut" /* use cp874 untill we have real TIS 620 table */ }; /*--------------------------------------------------------------------------*/ PRIVATE uTable* LoadToUCS2Table(uint16 csid) { XP_ASSERT( (CS_UNKNOWN != csid) && (CS_DEFAULT != csid)); switch(csid) { case CS_ASCII: return (uTable*) asciiTbl; /* Latin stuff */ case CS_LATIN1: return (uTable*) iso88591ToTbl; case CS_8859_5: return (uTable*) iso88595ToTbl; case CS_8859_7: return (uTable*) iso88597ToTbl; case CS_8859_9: return (uTable*) iso88599ToTbl; case CS_KOI8_R: return (uTable*) koi8rToTbl; case CS_KOI8_U: return (uTable*) koi8uToTbl; case CS_CP_1251: return (uTable*) cp1251ToTbl; /* ArmSCII */ case CS_ARMSCII8: return (uTable*) armsciiToTbl; /* Japanese */ case CS_JISX0208: return (uTable*) JIS0208ToTbl; case CS_JISX0201: return (uTable*) JIS0201ToTbl; case CS_JISX0212: return (uTable*) JIS0212ToTbl; case CS_SJIS: return (uTable*) SJISToTbl; /* Latin2 Stuff */ case CS_LATIN2: return (uTable*) iso88592ToTbl; /* Traditional Chinese Stuff */ case CS_CNS11643_1: return (uTable*) CNS11643_1ToTbl; case CS_CNS11643_2: return (uTable*) CNS11643_2ToTbl; case CS_BIG5: case CS_X_BIG5: return (uTable*) Big5ToTbl; /* Simplified Chinese Stuff */ case CS_GB2312: case CS_GB_8BIT: return (uTable*) GB2312ToTbl; /* Korean Stuff */ case CS_KSC5601: case CS_KSC_8BIT: return (uTable*) KSC5601ToTbl; /* Symbol Stuff */ case CS_SYMBOL: return (uTable*) SymbolToTbl; /* Dingbats Stuff */ case CS_DINGBATS: return (uTable*) DingbatsToTbl; /* UTF8 */ case CS_UTF8: case CS_UCS2: return (uTable*) Ucs2Tbl; case CS_TIS620: return (uTable*) TIS620ToTbl; case CS_T61: return (uTable*) T61ToTbl; /* Other Stuff */ default: XP_ASSERT(TRUE); return NULL; } } PRIVATE uTable* LoadFromUCS2Table(uint16 csid) { XP_ASSERT( (CS_UNKNOWN != csid) && (CS_DEFAULT != csid)); switch(csid) { case CS_ASCII: return (uTable*) asciiTbl; /* Latin stuff */ case CS_LATIN1: return (uTable*) iso88591FromTbl; case CS_8859_5: return (uTable*) iso88595FromTbl; case CS_8859_7: return (uTable*) iso88597FromTbl; case CS_8859_9: return (uTable*) iso88599FromTbl; case CS_KOI8_R: return (uTable*) koi8rFromTbl; case CS_KOI8_U: return (uTable*) koi8uFromTbl; case CS_CP_1251: return (uTable*) cp1251FromTbl; /* ArmSCII */ case CS_ARMSCII8: return (uTable*) armsciiFromTbl; /* Japanese */ case CS_JISX0208: return (uTable*) JIS0208FromTbl; case CS_JISX0201: return (uTable*) JIS0201FromTbl; case CS_JISX0212: return (uTable*) JIS0212FromTbl; case CS_SJIS: return (uTable*) SJISFromTbl; /* Latin2 Stuff */ case CS_LATIN2: return (uTable*) iso88592FromTbl; /* Traditional Chinese Stuff */ case CS_CNS11643_1: return (uTable*) CNS11643_1FromTbl; case CS_CNS11643_2: return (uTable*) CNS11643_2FromTbl; case CS_X_BIG5: case CS_BIG5: return (uTable*) Big5FromTbl; /* Simplified Chinese Stuff */ case CS_GB2312: case CS_GB_8BIT: return (uTable*) GB2312FromTbl; /* Korean Stuff */ case CS_KSC5601: case CS_KSC_8BIT: return (uTable*) KSC5601FromTbl; /* Symbol Stuff */ case CS_SYMBOL: return (uTable*) SymbolFromTbl; /* Dingbats Stuff */ case CS_DINGBATS: return (uTable*) DingbatsFromTbl; /* UTF8 */ case CS_UTF8: case CS_UCS2: return (uTable*) Ucs2Tbl; case CS_TIS620: return (uTable*) TIS620FromTbl; case CS_T61: return (uTable*) T61FromTbl; /* Other Stuff */ default: XP_ASSERT(TRUE); return NULL; } } /*--------------------------------------------------------------------------*/ /*--------------------------------------------------------------------------*/ PRIVATE void UnloadToUCS2Table(uint16 csid, uTable *utblPtr) { /* If we link those table in our code. We don't need to do anything to unload them */ } /*--------------------------------------------------------------------------*/ /*--------------------------------------------------------------------------*/ PRIVATE void UnloadFromUCS2Table(uint16 csid, uTable *utblPtr) { /* If we link those table in our code. We don't need to do anything to unload them */ } #endif /* XP_UNIX */ #ifdef XP_MAC PRIVATE XP_Bool isIcelandicRoman() { static int region = -1; if(region == -1) { region = GetScriptManagerVariable(smRegionCode); } return (verIceland == region); } /*--------------------------------------------------------------------------*/ /*--------------------------------------------------------------------------*/ PRIVATE uTable* LoadUCS2Table(uint16 csid,int from) { /* We need to add reference count here */ Handle tableHandle; XP_ASSERT( (CS_UNKNOWN != csid) && (CS_DEFAULT != csid)); switch(csid) { case CS_ASCII: return (uTable*) asciiTbl; break; case CS_UCS2: case CS_UTF8: return (uTable*) Ucs2Tbl; break; case CS_MAC_ROMAN: /* Handle MacRoman Variant here */ if(isIcelandicRoman()) csid = CS_MAC_ROMAN | 0x1000; /* if this is Icelandic variant */ break; default: break; } tableHandle = GetResource((from ? 'UFRM' : 'UTO '), csid); if(tableHandle == NULL || ResError()!=noErr) return NULL; if(*tableHandle == NULL) LoadResource(tableHandle); HNoPurge(tableHandle); HLock(tableHandle); return((uTable*) *tableHandle); } /*--------------------------------------------------------------------------*/ /*--------------------------------------------------------------------------*/ PRIVATE void UnloadUCS2Table(uint16 csid, uTable *utblPtr, int from) { /* We need to add reference count here */ Handle tableHandle; XP_ASSERT( (CS_UNKNOWN != csid) && (CS_DEFAULT != csid)); switch(csid) { case CS_ASCII: case CS_UCS2: case CS_UTF8: return; case CS_MAC_ROMAN: /* Handle MacRoman Variant here */ if(isIcelandicRoman()) csid = CS_MAC_ROMAN | 0x1000; /* if this is Icelandic variant */ break; default: break; } tableHandle = GetResource((from ? 'UFRM' : 'UTO '), csid); if(tableHandle == NULL || ResError()!=noErr) return; HUnlock((Handle) tableHandle); HPurge(tableHandle); } /*--------------------------------------------------------------------------*/ /*--------------------------------------------------------------------------*/ PRIVATE uTable* LoadToUCS2Table(uint16 csid) { XP_ASSERT( (CS_UNKNOWN != csid) && (CS_DEFAULT != csid)); return LoadUCS2Table(csid, FALSE); } /*--------------------------------------------------------------------------*/ /*--------------------------------------------------------------------------*/ PRIVATE void UnloadToUCS2Table(uint16 csid, uTable *utblPtr) { XP_ASSERT( (CS_UNKNOWN != csid) && (CS_DEFAULT != csid)); UnloadUCS2Table(csid, utblPtr, FALSE); } /*--------------------------------------------------------------------------*/ /*--------------------------------------------------------------------------*/ PRIVATE uTable* LoadFromUCS2Table(uint16 csid) { XP_ASSERT( (CS_UNKNOWN != csid) && (CS_DEFAULT != csid)); return LoadUCS2Table(csid, TRUE); } /*--------------------------------------------------------------------------*/ /*--------------------------------------------------------------------------*/ PRIVATE void UnloadFromUCS2Table(uint16 csid, uTable *utblPtr) { XP_ASSERT( (CS_UNKNOWN != csid) && (CS_DEFAULT != csid)); UnloadUCS2Table(csid, utblPtr, TRUE); } #endif /* XP_MAC */ #if defined(XP_WIN) #ifdef XP_WIN32 #define UNICODEDLL "UNI3200.DLL" #define LIBRARYLOADOK(l) (l != NULL) #define UNICODE_LOADUCS2TABLE_SYM "UNICODE_LOADUCS2TABLE" #define UNICODE_UNLOADUCS2TABLE_SYM "UNICODE_UNLOADUCS2TABLE" #else #define UNICODEDLL "UNI1600.DLL" #define LIBRARYLOADOK(l) (l >= 32) #define UNICODE_LOADUCS2TABLE_SYM "_UNICODE_LOADUCS2TABLE" #define UNICODE_UNLOADUCS2TABLE_SYM "_UNICODE_UNLOADUCS2TABLE" #endif /* !XP_WIN32 */ PRLibrary* uniLib = NULL; PRIVATE uTable* LoadUCS2Table(uint16 csid, int from) { uTable* ret = NULL; XP_ASSERT( (CS_UNKNOWN != csid) && (CS_DEFAULT != csid)); switch(csid) { case CS_ASCII: ret = (uTable*) asciiTbl; break; case CS_UCS2: case CS_UTF8: ret = (uTable*) Ucs2Tbl; break; default: if(uniLib == NULL ) uniLib = PR_LoadLibrary(UNICODEDLL); if(uniLib) { typedef uTable* (*f) (uint16 i1, int i2); static f p = NULL; if (p == NULL) { p = (f)PR_FindSymbol(uniLib, UNICODE_LOADUCS2TABLE_SYM); } XP_ASSERT(p); if(p) ret = (*p)(csid, from); } break; } return ret; } PRIVATE void UnloadUCS2Table(uint16 csid, uTable* utblPtr, int from) { XP_ASSERT( (CS_UNKNOWN != csid) && (CS_DEFAULT != csid)); switch(csid) { case CS_ASCII: case CS_UCS2: case CS_UTF8: break; default: if(uniLib == NULL ) uniLib = PR_LoadLibrary(UNICODEDLL); if(uniLib) { typedef void (*f) (uint16 i1, uTable* i2, int i3); static f p = NULL; if (p == NULL) { p = (f)PR_FindSymbol(uniLib, UNICODE_UNLOADUCS2TABLE_SYM); } XP_ASSERT(p); if(p) (*p)(csid, utblPtr, from); } break; } } PRIVATE uTable* LoadToUCS2Table(uint16 csid) { XP_ASSERT( (CS_UNKNOWN != csid) && (CS_DEFAULT != csid)); return(LoadUCS2Table(csid,0)); } PRIVATE void UnloadToUCS2Table(uint16 csid, uTable *utblPtr) { XP_ASSERT( (CS_UNKNOWN != csid) && (CS_DEFAULT != csid)); UnloadUCS2Table(csid, utblPtr, 0); } PRIVATE uTable* LoadFromUCS2Table(uint16 csid) { XP_ASSERT( (CS_UNKNOWN != csid) && (CS_DEFAULT != csid)); return(LoadUCS2Table(csid,1)); } PRIVATE void UnloadFromUCS2Table(uint16 csid, uTable *utblPtr) { XP_ASSERT( (CS_UNKNOWN != csid) && (CS_DEFAULT != csid)); UnloadUCS2Table(csid, utblPtr, 1); } #endif /* XP_WIN */ #ifdef XP_OS2 /* * The basic design for OS/2 is to place all of the tables inline. * Since we reference most of the from tables during init to form * the row tables, we might as well just put them here. */ /* * Latin 1 */ PRIVATE uint16 cp1252FromTbl[] = { #include "cp1252.uf" }; PRIVATE uint16 cp1252ToTbl[] = { #include "cp1252.ut" }; /* * Latin 2 */ PRIVATE uint16 iso8859_2FromTbl[] = { #include "8859-2.uf" }; PRIVATE uint16 iso8859_2ToTbl[] = { #include "8859-2.ut" }; /* * Japan */ PRIVATE uint16 japanFromTbl[] = { #include "sjis.uf" }; PRIVATE uint16 japanToTbl[] = { #include "sjis.ut" }; /* * China (may need update for GBK) */ PRIVATE uint16 chinaFromTbl[] = { #include "gb2312.uf" }; PRIVATE uint16 chinaToTbl[] = { #include "gb2312.ut" }; /* * Taiwan */ PRIVATE uint16 taiwanFromTbl[] = { #include "big5.uf" }; PRIVATE uint16 taiwanToTbl[] = { #include "big5.ut" }; /* * ISO Codepages */ PRIVATE uint16 iso8859_3FromTbl[] = { #include "8859-3.uf" }; PRIVATE uint16 iso8859_3ToTbl[] = { #include "8859-3.ut" }; PRIVATE uint16 iso8859_4FromTbl[] = { #include "8859-4.uf" }; PRIVATE uint16 iso8859_4ToTbl[] = { #include "8859-4.ut" }; PRIVATE uint16 iso8859_5FromTbl[] = { #include "8859-5.uf" }; PRIVATE uint16 iso8859_5ToTbl[] = { #include "8859-5.ut" }; PRIVATE uint16 iso8859_6FromTbl[] = { #include "8859-6.uf" }; PRIVATE uint16 iso8859_6ToTbl[] = { #include "8859-6.ut" }; PRIVATE uint16 iso8859_7FromTbl[] = { #include "8859-7.uf" }; PRIVATE uint16 iso8859_7ToTbl[] = { #include "8859-7.ut" }; PRIVATE uint16 iso8859_8FromTbl[] = { #include "8859-8.uf" }; PRIVATE uint16 iso8859_8ToTbl[] = { #include "8859-8.ut" }; PRIVATE uint16 iso8859_9FromTbl[] = { #include "8859-9.uf" }; PRIVATE uint16 iso8859_9ToTbl[] = { #include "8859-9.ut" }; /* * Windows Codepages */ PRIVATE uint16 cp1250FromTbl[] = { #include "cp1250.uf" }; PRIVATE uint16 cp1250ToTbl[] = { #include "cp1250.ut" }; PRIVATE uint16 cp1251FromTbl[] = { #include "cp1251.uf" }; PRIVATE uint16 cp1251ToTbl[] = { #include "cp1251.ut" }; PRIVATE uint16 cp1253FromTbl[] = { #include "cp1253.uf" }; PRIVATE uint16 cp1253ToTbl[] = { #include "cp1253.ut" }; PRIVATE uint16 cp1254FromTbl[] = { #include "cp1254.uf" }; PRIVATE uint16 cp1254ToTbl[] = { #include "cp1254.ut" }; PRIVATE uint16 cp1257FromTbl[] = { #include "cp1257.uf" }; PRIVATE uint16 cp1257ToTbl[] = { #include "cp1257.ut" }; /* * Russian */ PRIVATE uint16 koi8rFromTbl[] = { #include "koi8r.uf" }; PRIVATE uint16 koi8rToTbl[] = { #include "koi8r.ut" }; /* * OS/2 Codepages */ PRIVATE uint16 cp850FromTbl[] = { #include "cp850.uf" }; PRIVATE uint16 cp850ToTbl[] = { #include "cp850.ut" }; PRIVATE uint16 cp852FromTbl[] = { #include "cp852.uf" }; PRIVATE uint16 cp852ToTbl[] = { #include "cp852.ut" }; PRIVATE uint16 cp855FromTbl[] = { #include "cp855.uf" }; PRIVATE uint16 cp855ToTbl[] = { #include "cp855.ut" }; PRIVATE uint16 cp857FromTbl[] = { #include "cp857.uf" }; PRIVATE uint16 cp857ToTbl[] = { #include "cp857.ut" }; PRIVATE uint16 cp862FromTbl[] = { #include "cp862.uf" }; PRIVATE uint16 cp862ToTbl[] = { #include "cp862.ut" }; PRIVATE uint16 cp864FromTbl[] = { #include "cp864.uf" }; PRIVATE uint16 cp864ToTbl[] = { #include "cp864.ut" }; PRIVATE uint16 cp866FromTbl[] = { #include "cp866.uf" }; PRIVATE uint16 cp866ToTbl[] = { #include "cp866.ut" }; PRIVATE uint16 cp874FromTbl[] = { #include "cp874.uf" }; PRIVATE uint16 cp874ToTbl[] = { #include "cp874.ut" }; /* * Korea */ PRIVATE uint16 koreaFromTbl[] = { #include "ksc5601.uf" }; PRIVATE uint16 koreaToTbl[] = { #include "ksc5601.ut" }; /* * Symbol */ PRIVATE uint16 symbolFromTbl[] = { #include "macsymbo.uf" }; PRIVATE uint16 symbolToTbl[] = { #include "macsymbo.ut" }; /* * Mac roman */ PRIVATE uint16 macromanFromTbl[] = { #include "macroman.uf" }; PRIVATE uint16 macromanToTbl[] = { #include "macroman.ut" }; /* * Dingbats Stuff */ PRIVATE uint16 dingbatFromTbl[] = { #include "macdingb.uf" }; PRIVATE uint16 dingbatToTbl[] = { #include "macdingb.ut" }; /* * Return the address of the To table given the codeset */ PRIVATE uTable* LoadToUCS2Table(uint16 csid) { XP_ASSERT( (CS_UNKNOWN != csid) && (CS_DEFAULT != csid)); switch(csid) { case CS_ASCII: case CS_LATIN1: return (uTable*) cp1252ToTbl; case CS_UTF8: case CS_UTF7: case CS_UCS2: return (uTable*) Ucs2Tbl; case CS_LATIN2: return (uTable*) iso8859_2ToTbl; case CS_SJIS: return (uTable*) japanToTbl; case CS_BIG5: return (uTable*) taiwanToTbl; case CS_GB_8BIT: return (uTable*) chinaToTbl; case CS_8859_3: return (uTable*) iso8859_3ToTbl; case CS_8859_4: return (uTable*) iso8859_4ToTbl; case CS_8859_5: return (uTable*) iso8859_5ToTbl; case CS_8859_6: return (uTable*) iso8859_6ToTbl; case CS_8859_7: return (uTable*) iso8859_7ToTbl; case CS_8859_8: return (uTable*) iso8859_8ToTbl; case CS_8859_9: return (uTable*) iso8859_9ToTbl; case CS_CP_1250: return (uTable*) cp1250ToTbl; case CS_CP_1251: return (uTable*) cp1251ToTbl; case CS_CP_1253: return (uTable*) cp1253ToTbl; case CS_CP_1254: return (uTable*) cp1254ToTbl; case CS_CP_1257: return (uTable*) cp1257ToTbl; case CS_CP_850: return (uTable*) cp850ToTbl; case CS_CP_852: return (uTable*) cp852ToTbl; case CS_CP_855: return (uTable*) cp855ToTbl; case CS_CP_857: return (uTable*) cp857ToTbl; case CS_CP_862: return (uTable*) cp862ToTbl; case CS_CP_864: return (uTable*) cp864ToTbl; case CS_CP_866: return (uTable*) cp866ToTbl; case CS_CP_874: return (uTable*) cp874ToTbl; case CS_KOI8_R: return (uTable*) koi8rToTbl; case CS_KSC_8BIT: return (uTable*) koreaToTbl; case CS_MAC_ROMAN: return (uTable*) macromanToTbl; case CS_SYMBOL: return (uTable*) symbolToTbl; case CS_DINGBATS: return (uTable*) dingbatToTbl; } return (uTable*) cp1252ToTbl; /* This should not happen */ } /* * Return the address of the From table given the codeset */ PRIVATE uTable* LoadFromUCS2Table(uint16 csid) { XP_ASSERT( (CS_UNKNOWN != csid) && (CS_DEFAULT != csid)); switch(csid) { case CS_ASCII: case CS_LATIN1: return (uTable*) cp1252FromTbl; case CS_UTF8: case CS_UTF7: case CS_UCS2: return (uTable*) Ucs2Tbl; case CS_LATIN2: return (uTable*) iso8859_2FromTbl; case CS_SJIS: return (uTable*) japanFromTbl; case CS_BIG5: return (uTable*) taiwanFromTbl; case CS_GB_8BIT: return (uTable*) chinaFromTbl; case CS_8859_3: return (uTable*) iso8859_3FromTbl; case CS_8859_4: return (uTable*) iso8859_4FromTbl; case CS_8859_5: return (uTable*) iso8859_5FromTbl; case CS_8859_6: return (uTable*) iso8859_6FromTbl; case CS_8859_7: return (uTable*) iso8859_7FromTbl; case CS_8859_8: return (uTable*) iso8859_8FromTbl; case CS_8859_9: return (uTable*) iso8859_9FromTbl; case CS_CP_1250: return (uTable*) cp1250FromTbl; case CS_CP_1251: return (uTable*) cp1251FromTbl; case CS_CP_1253: return (uTable*) cp1253FromTbl; case CS_CP_1254: return (uTable*) cp1254FromTbl; case CS_CP_1257: return (uTable*) cp1257FromTbl; case CS_CP_850: return (uTable*) cp850FromTbl; case CS_CP_852: return (uTable*) cp852FromTbl; case CS_CP_855: return (uTable*) cp855FromTbl; case CS_CP_857: return (uTable*) cp857FromTbl; case CS_CP_862: return (uTable*) cp862FromTbl; case CS_CP_864: return (uTable*) cp864FromTbl; case CS_CP_866: return (uTable*) cp866FromTbl; case CS_CP_874: return (uTable*) cp874FromTbl; case CS_KOI8_R: return (uTable*) koi8rFromTbl; case CS_KSC_8BIT: return (uTable*) koreaFromTbl; case CS_MAC_ROMAN: return (uTable*) macromanFromTbl; case CS_SYMBOL: return (uTable*) symbolFromTbl; case CS_DINGBATS: return (uTable*) dingbatFromTbl; } return (uTable*) cp1252FromTbl; /* This should not happen */ } /* * Null functions since the tables are inline */ PRIVATE void UnloadToUCS2Table(uint16 csid, uTable *utblPtr) {} PRIVATE void UnloadFromUCS2Table(uint16 csid, uTable *utblPtr) {} #endif /* XP_OS2 */ /*--------------------------------------------------------------------------*/ /*--------------------------------------------------------------------------*/ PRIVATE uRowTable* AddAndInitOneRow(uint16 hb) { /* Allocate uRowTablePtArray[hb] and initialize it */ uint16 i; uRowTable *row = XP_ALLOC(sizeof(uRowTable)); if(row == NULL) { XP_ASSERT(row != 0); return NULL; } else { for(i = 0; i < 256 ;i++) { row->value[i] = NOMAPPING; row->info[i] = 0; } uRowTablePtArray[hb] = row; } return row; } /*--------------------------------------------------------------------------*/ /*--------------------------------------------------------------------------*/ PRIVATE void AddAndInitAllRows(void) { uint16 i; for(i=0;i<256;i++) (void) AddAndInitOneRow(i); } /*--------------------------------------------------------------------------*/ /*--------------------------------------------------------------------------*/ PRIVATE XP_Bool RowUsed(uint16 rownum) { uint16 c; uRowTable *row = uRowTablePtArray[ rownum] ; for(c=0;c<256;c++) { if(row->value[c] != NOMAPPING) return TRUE; } return FALSE; } /*--------------------------------------------------------------------------*/ /*--------------------------------------------------------------------------*/ PRIVATE void FreeRow(uint16 row) { XP_FREE(uRowTablePtArray[row]); uRowTablePtArray[row] = NULL; } /*--------------------------------------------------------------------------*/ /*--------------------------------------------------------------------------*/ PRIVATE void FreeUnusedRows(void) { uint16 i; for(i=0;i<256;i++) { if(! RowUsed(i)) FreeRow(i); } } /*--------------------------------------------------------------------------*/ /*--------------------------------------------------------------------------*/ PRIVATE void CheckAndAddEntry(uint16 ucs2, uint16 med, uint16 csid) { uint16 lb = ucs2 & 0x00FF; uRowTable *row = uRowTablePtArray[ucs2 >> 8]; if(row->value[lb] == NOMAPPING) { row->value[lb]= med; row->info[lb]= (csid & 0xFF); } } /*--------------------------------------------------------------------------*/ /*--------------------------------------------------------------------------*/ PRIVATE XP_Bool UCS2ToValueAndInfo(uint16 ucs2, uint16* med, unsigned char* info) { uRowTable *uRowTablePtr = uRowTablePtArray[(ucs2 >> 8)]; if( uRowTablePtr == NULL) return FALSE; *med = uRowTablePtr->value[(ucs2 & 0x00ff)]; if(*med == NOMAPPING) { return FALSE; } else { *info = uRowTablePtr->info[(ucs2 & 0x00ff)]; return TRUE; } } /*--------------------------------------------------------------------------*/ /*--------------------------------------------------------------------------*/ PRIVATE void InitUCS2Table(void) { int16 i; for(i=0;i<256; i++) uRowTablePtArray[i] = NULL; } /*--------------------------------------------------------------------------*/ /*--------------------------------------------------------------------------*/ PUBLIC XP_Bool UCS2_To_Other( uint16 ucs2, unsigned char *out, uint32 outbuflen, uint32* outlen, int16 *outcsid ) { uint16 med; unsigned char info; uShiftTable* shiftTable; #ifdef XP_MAC if(ucs2 == 0x000a) ucs2 = 0x000d; #endif if(UCS2ToValueAndInfo(ucs2, &med, &info)) { *outcsid = intl_GetValidCSID(info); XP_ASSERT(*outcsid != CS_UNKNOWN); shiftTable = InfoToShiftTable(info); XP_ASSERT(shiftTable); return uGenerate(shiftTable, (int32*)0, med, out,outbuflen, outlen); } return FALSE; } PRIVATE int16* unicodeCSIDList = NULL; PRIVATE unsigned char** unicodeCharsetNameList = NULL; PRIVATE uint16 numOfUnicodeList = 0; PUBLIC int16* INTL_GetUnicodeCSIDList(int16 * outnum) { *outnum = numOfUnicodeList; return unicodeCSIDList; } PUBLIC unsigned char **INTL_GetUnicodeCharsetList(int16 * outnum) { *outnum = numOfUnicodeList; return unicodeCharsetNameList; } /*--------------------------------------------------------------------------*/ /*--------------------------------------------------------------------------*/ PUBLIC void INTL_SetUnicodeCSIDList(uint16 numOfItems, int16* csidlist) { int i; uTable* utbl; /* This function should be called once only */ XP_ASSERT(unicodeCSIDList == NULL); XP_ASSERT(unicodeCharsetNameList == NULL); unicodeCSIDList = XP_ALLOC(sizeof(int16) * numOfItems); /* needs to handle no memory */ XP_ASSERT(unicodeCSIDList != NULL); unicodeCharsetNameList = XP_ALLOC(sizeof(unsigned char*) * numOfItems); /* needs to handle no memory*/ XP_ASSERT(unicodeCharsetNameList != NULL); numOfUnicodeList = numOfItems; InitUCS2Table(); AddAndInitAllRows(); /* Add the first table */ for(i = 0 ; i < numOfItems; i++) { unicodeCSIDList[i] = csidlist[i]; unicodeCharsetNameList[i]= INTL_CsidToCharsetNamePt(csidlist[i]); if( (csidlist[i] != CS_UTF8 ) && ((utbl = LoadFromUCS2Table(csidlist[i])) != NULL)) { uMapIterate(utbl,CheckAndAddEntry, csidlist[i]); UnloadFromUCS2Table(csidlist[i],utbl); } } FreeUnusedRows(); } /*--------------------------------------------------------------------------*/ /*--------------------------------------------------------------------------*/ typedef struct UnicodeConverterPriv UnicodeConverterPriv; typedef UnicodeConverterPriv* INTL_UnicodeToStrIteratorPriv; struct UnicodeConverterPriv { INTL_Unicode *ustr; uint32 ustrlen; }; /*--------------------------------------------------------------------------*/ /*--------------------------------------------------------------------------*/ /* Pricate Function Declartion */ PRIVATE XP_Bool UnicodeToStrWithFallback_p( uint16 ucs2, unsigned char *out, uint32 outbuflen, uint32* outlen, uint16 *outcsid ); PRIVATE XP_Bool UnicodeToStrFirst_p( uint16 ucs2, unsigned char *out, uint32 outbuflen, uint32* outlen, uint16 *outcsid ); PRIVATE XP_Bool UnicodeToStrNext_p( uint16 ucs2, unsigned char *out, uint32 outbuflen, uint32* outlen, uint16 lastcsid ); /*--------------------------------------------------------------------------*/ /*--------------------------------------------------------------------------*/ /* the return of FLASE of this funciton only mean one thing - the outbuf is not enough for this conversion */ PRIVATE XP_Bool UnicodeToStrWithFallback_p( uint16 ucs2, unsigned char *out, uint32 outbuflen, uint32* outlen, uint16 *outcsid) { if(! UCS2_To_Other(ucs2, out, outbuflen, outlen, (int16*)outcsid)) { if(outbuflen > 2) { #ifdef XP_MAC *outcsid = CS_MAC_ROMAN; #else *outcsid = CS_LATIN1; #endif out[0]= '?'; *outlen =1; return TRUE; } else return FALSE; } return TRUE; } /*--------------------------------------------------------------------------*/ /*--------------------------------------------------------------------------*/ PRIVATE XP_Bool UnicodeToStrFirst_p( uint16 ucs2, unsigned char *out, uint32 outbuflen, uint32* outlen, uint16 *outcsid) { return UnicodeToStrWithFallback_p(ucs2,out,outbuflen,outlen,outcsid); } /*--------------------------------------------------------------------------*/ /*--------------------------------------------------------------------------*/ PRIVATE XP_Bool UnicodeToStrNext_p( uint16 ucs2, unsigned char *out, uint32 outbuflen, uint32* outlen, uint16 lastcsid) { uint16 thiscsid; XP_Bool retval = UnicodeToStrWithFallback_p(ucs2,out,outbuflen,outlen,&thiscsid); return (retval && (thiscsid == lastcsid)); } /*--------------------------------------------------------------------------*/ /*--------------------------------------------------------------------------*/ PUBLIC INTL_UnicodeToStrIterator INTL_UnicodeToStrIteratorCreate( INTL_Unicode* ustr, uint32 ustrlen, INTL_Encoding_ID *encoding, unsigned char* dest, uint32 destbuflen ) { UnicodeConverterPriv* priv=0; priv=XP_ALLOC(sizeof(UnicodeConverterPriv)); if(priv) { priv->ustrlen = ustrlen; priv->ustr = ustr; (void)INTL_UnicodeToStrIterate((INTL_UnicodeToStrIterator)priv, encoding, dest, destbuflen); } else { *encoding = 0; dest[0] = '\0'; } return (INTL_UnicodeToStrIterator)priv; } /*--------------------------------------------------------------------------*/ /*--------------------------------------------------------------------------*/ PUBLIC int INTL_UnicodeToStrIterate( INTL_UnicodeToStrIterator iterator, INTL_Encoding_ID *encoding, unsigned char* dest, uint32 destbuflen ) { unsigned char* orig = dest; UnicodeConverterPriv* priv = (UnicodeConverterPriv*)iterator; if(destbuflen < 2) /* we want to make sure there at least two byte in the buffer */ return 0; /* first one for the first char, second one for the NULL */ destbuflen -= 1; /* resever one byte for NULL terminator */ if((priv == NULL) || ((priv->ustrlen) == 0)) { *encoding = 0; dest[0]='\0'; return 0; } else { uint32 len = 0; if(UnicodeToStrFirst_p(*(priv->ustr), dest,destbuflen,&len,encoding)) { do{ dest += len; destbuflen -= len; priv->ustr += 1; priv->ustrlen -= 1 ; } while( (destbuflen > 0) && ((priv->ustrlen > 0)) && UnicodeToStrNext_p(*(priv->ustr), dest, destbuflen, &len, *encoding)); } dest[0] = '\0'; return (orig != dest); } } /*--------------------------------------------------------------------------*/ /*--------------------------------------------------------------------------*/ PUBLIC void INTL_UnicodeToStrIteratorDestroy( INTL_UnicodeToStrIterator iterator ) { UnicodeConverterPriv* priv = (UnicodeConverterPriv*)iterator; if(priv) XP_FREE(priv); } /*--------------------------------------------------------------------------*/ /*--------------------------------------------------------------------------*/ PUBLIC uint32 INTL_UnicodeLen(INTL_Unicode *ustr) { uint32 i; for(i=0;*ustr++;i++) ; return i; } /*--------------------------------------------------------------------------*/ /*--------------------------------------------------------------------------*/ PUBLIC uint32 INTL_UnicodeToStrLen( INTL_Encoding_ID encoding, INTL_Unicode* ustr, uint32 ustrlen ) { /* for now, put a dump algorithm to caculate the length */ return ustrlen * ((encoding & MULTIBYTE) ? 4 : 1) + 1; } /*--------------------------------------------------------------------------*/ /*--------------------------------------------------------------------------*/ PRIVATE int LoadUCS2TableSet(uint16 csid, uTableSet* tableset,int from) { UnicodeTableSet* set; int i; XP_ASSERT( (CS_UNKNOWN != csid) && (CS_DEFAULT != csid)); for(i=0;irange[i].intercsid=CS_DEFAULT; tableset->tables[i]=NULL; tableset->shift[i] = NULL; tableset->range[i].min = 0xff; tableset->range[i].max = 0x00; } set = GetUnicodeTableSet(csid); /* If the conversion is a combination of several csid conversion, We try */ /* to load all of them now. */ /* Otherwise, we simply load the one for the csid */ if(set == NULL) { tableset->range[0].intercsid=csid; if(from) tableset->tables[0]=LoadFromUCS2Table(csid); else tableset->tables[0]=LoadToUCS2Table(csid); tableset->shift[0] = GetShiftTableFromCsid(csid); tableset->range[0].min = 0x00; tableset->range[0].max = 0xff; return 1; } else { for(i=0;((irange[i].intercsid != CS_DEFAULT));i++) { tableset->range[i].intercsid=set->range[i].intercsid; tableset->range[i].min = set->range[i].min; tableset->range[i].max = set->range[i].max; if(from) tableset->tables[i]=LoadFromUCS2Table(set->range[i].intercsid); else tableset->tables[i]=LoadToUCS2Table(set->range[i].intercsid); tableset->shift[i] = GetShiftTableFromCsid(set->range[i].intercsid); XP_ASSERT(tableset->shift[i]); XP_ASSERT(tableset->tables[i]); } return i; } } /*--------------------------------------------------------------------------*/ /*--------------------------------------------------------------------------*/ PRIVATE void UnloadUCS2TableSet(uTableSet *tableset,int from) { int i; if(tableset == NULL) return; for(i=0;irange[i].intercsid != CS_DEFAULT) && (tableset->tables[i] != NULL)) { if(from) UnloadFromUCS2Table(tableset->range[i].intercsid, tableset->tables[i]); else UnloadToUCS2Table(tableset->range[i].intercsid, tableset->tables[i]); } tableset->range[i].intercsid=CS_DEFAULT; tableset->tables[i]=NULL; tableset->shift[i] = NULL; } } /*--------------------------------------------------------------------------*/ /*--------------------------------------------------------------------------*/ /* * utf8_to_local_encoding - UTF8 to Locally Encoded segment * * Convert a utf8 string to a Locally Encoded string. * Convert as characters until the encoding changes or * input/output space runs out. * * The segment is NOT NULL TERMINATED * * inputs: utf8 string & length * buffer (pre-allocated) to hold Locally Encoded string * pointer to return encoding csid * pointer to return strlen of Encoded string * * output: values written to Locally Encoded string buffer * encoding csid set: * >0 if successful * -1 if not unicode * -2 if no local encoding * length of utf8 string converted returned * strlen of Locally Encoded string * */ PUBLIC int utf8_to_local_encoding(const unsigned char *utf8p, const int utf8len, unsigned char *LE_string, int LE_string_len, int *LE_written_len, int16 *LE_string_csid) { int parsed_len = 0; int written_len = 0; int16 i, utf8_char_len; uint16 ucs2_char; int16 seg_encoding; int16 out_char_encoding; uint32 out_char_len; unsigned char tmpbuf[10]; XP_Bool result; /* * get segment encoding (encoding of first character) */ utf8_char_len = utf8_to_ucs2_char(utf8p, (int16)utf8len, &ucs2_char); if (utf8_char_len == -1) { /* its not unicode/utf8 but try to convert */ /* it anyway so the user can see something */ seg_encoding = -1; } else if (utf8_char_len == -2) /* not enough input characters */ return 0; else { result = UCS2_To_Other(ucs2_char, tmpbuf, 10, &out_char_len, &seg_encoding); if (result == FALSE) /* failed to convert */ seg_encoding = -2; /* no local encoding */ } /* * loop converting the string */ while (1) { /* * convert utf8 to UCS2 */ utf8_char_len = utf8_to_ucs2_char(utf8p+parsed_len, (int16)(utf8len-parsed_len), &ucs2_char); if (utf8_char_len == -1) { /* not utf8 */ utf8_char_len = 1; out_char_encoding = -1; tmpbuf[0] = *(utf8p+parsed_len); out_char_len = 1; } else if (utf8_char_len == -2) /* no input/output space */ break; else { /* * convert UCS2 to local encoding */ result = UCS2_To_Other(ucs2_char, tmpbuf, (uint16)10, &out_char_len, &out_char_encoding); if (result == FALSE) { /* failed to convert */ out_char_encoding = -2; /* no local encoding */ tmpbuf[0] = '?'; /* place holder */ out_char_len = 1; } } /* stop if not the same encoding */ if (out_char_encoding != seg_encoding) break; /* stop if out of space for output characters */ if ((written_len+out_char_len) >= LE_string_len-1) break; /* * add this character to the segment */ for (i=0; i 1); /* cannot map one, gen the fallback */ *dest++ = '?'; destbuflen--; } } } XP_ASSERT(destbuflen > 0); *dest = '\0'; /* NULL terminate it */ } /* intl_check_unicode_question Used by INTL_UnicodeToEncodingStr */ PRIVATE uint32 intl_check_unicode_question( INTL_Unicode* ustr, uint32 ustrlen ) { INTL_Unicode* p; uint32 count = 0; for(p=ustr; ustrlen > 0 ;ustrlen--, p++) if(*p == 0x003F) count++; return count; } /* intl_check_unknown_unicode Used by INTL_UnicodeToEncodingStr */ PRIVATE uint32 intl_check_unknown_unicode(unsigned char* buf) { unsigned char* p; uint32 count = 0; for(p=buf; *p != '\0'; p++) if(*p == '?') count++; return count; } /* INTL_UnicodeToEncodingStr This is an Trail and Error function which may wast a lot of performance in "THE WORST CASE" However, it do it's best in the best case and average case. IMPORTANT ASSUMPTION: The unknown Unicode is fallback to '?' */ PUBLIC INTL_Encoding_ID INTL_UnicodeToEncodingStr( INTL_Unicode* ustr, uint32 ustrlen, unsigned char* dest, uint32 destbuflen ) { INTL_Encoding_ID latin1_encoding, encoding, min_error_encoding, last_convert_encoding; uint32 min, question; int16 *encodingList; int16 itemCount; int16 idx; #ifdef XP_MAC encoding = latin1_encoding = CS_MAC_ROMAN; #else encoding = latin1_encoding = CS_LATIN1; #endif /* Ok, let's try them with Latin 1 first. I believe this is for most of the case */ INTL_UnicodeToStr(encoding,ustr,ustrlen,dest,destbuflen); /* Try to find the '?' in the converted string */ min = intl_check_unknown_unicode(dest); if(min == 0) /* No '?' in the converted string, it could be convert to Latin 1 */ return encoding; /* The origional Unicode may contaion some '?' in unicode. Let's count it */ question = intl_check_unicode_question(ustr,ustrlen ); /* The number of '?' in the converted string match the number in unicode */ if(min == question) return encoding; last_convert_encoding = min_error_encoding = encoding; encodingList = INTL_GetUnicodeCSIDList(&itemCount); for(idx = 0; idx < itemCount ; idx++) { encoding = encodingList[idx]; /* Let's ignore the following three csid the latin1 (we already try it Symbol an Dingbat */ if((encoding != latin1_encoding) && (encoding != CS_SYMBOL) && (encoding != CS_DINGBATS)) { uint32 unknowInThis; last_convert_encoding = encoding; INTL_UnicodeToStr(encoding,ustr,ustrlen,dest,destbuflen); unknowInThis = intl_check_unknown_unicode(dest); /* The number of '?' in the converted string match the number in unicode */ if(unknowInThis == question) /* what a perfect candidcate */ return encoding; /* The number of '?' is less then the previous smallest */ if(unknowInThis < min) { /* let's remember the encoding and the number of '?' */ min = unknowInThis; min_error_encoding = encoding; } } } /* The min_error_encoding is not the last one we try to convert to. We need to convert it again */ if(min_error_encoding != last_convert_encoding) INTL_UnicodeToStr(min_error_encoding,ustr,ustrlen,dest,destbuflen); return min_error_encoding; } /*--------------------------------------------------------------------------*/ /*--------------------------------------------------------------------------*/ PUBLIC uint32 INTL_StrToUnicodeLen( INTL_Encoding_ID encoding, unsigned char* src ) { XP_ASSERT( (CS_UNKNOWN != encoding) && (CS_DEFAULT != encoding)); /* for now, put a dump algorithm to caculate the length */ return INTL_TextToUnicodeLen(encoding, src, XP_STRLEN((char*)src)); } /*--------------------------------------------------------------------------*/ /*--------------------------------------------------------------------------*/ PUBLIC uint32 INTL_StrToUnicode( INTL_Encoding_ID encoding, unsigned char* src, INTL_Unicode* ustr, uint32 ubuflen ) { uint32 len = XP_STRLEN((char*)src); XP_ASSERT( (CS_UNKNOWN != encoding) && (CS_DEFAULT != encoding)); return INTL_TextToUnicode(encoding,src,len,ustr,ubuflen); } PUBLIC uint32 INTL_TextToUnicodeLen( INTL_Encoding_ID encoding, unsigned char* src, uint32 srclen ) { XP_ASSERT( (CS_UNKNOWN != encoding) && (CS_DEFAULT != encoding)); /* for now, put a dump algorithm to caculate the length */ return srclen + 1; } PUBLIC uint32 INTL_TextToUnicode( INTL_Encoding_ID encoding, unsigned char* src, uint32 srclen, INTL_Unicode* ustr, uint32 ubuflen ) { /* * Use the Netscape conversion tables */ uint32 validlen,scanlen; uint16 num, med; uTableSet tableset; XP_ASSERT( (CS_UNKNOWN != encoding) && (CS_DEFAULT != encoding)); num = LoadUCS2TableSet(encoding, &tableset,FALSE); for(validlen=0; ((srclen > 0) && (ubuflen > 1)); srclen -= scanlen, src += scanlen, ustr++, ubuflen--,validlen++) { uint16 i; scanlen = 0; if(*src < 0x20) { *ustr = (INTL_Unicode)(*src); scanlen = 1; continue; } for(i=0;i