mirror of
https://github.com/mozilla/gecko-dev.git
synced 2024-12-30 20:48:52 +00:00
1431 lines
51 KiB
C
1431 lines
51 KiB
C
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*-
|
|
*
|
|
* The contents of this file are subject to the Netscape Public License
|
|
* Version 1.0 (the "NPL"); you may not use this file except in
|
|
* compliance with the NPL. You may obtain a copy of the NPL at
|
|
* http://www.mozilla.org/NPL/
|
|
*
|
|
* Software distributed under the NPL is distributed on an "AS IS" basis,
|
|
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
|
|
* for the specific language governing rights and limitations under the
|
|
* NPL.
|
|
*
|
|
* The Initial Developer of this code under the NPL is Netscape
|
|
* Communications Corporation. Portions created by Netscape are
|
|
* Copyright (C) 1998 Netscape Communications Corporation. All Rights
|
|
* Reserved.
|
|
*/
|
|
/* fe_ccc.c */
|
|
/* Test harness code to be replaced by FE specific code */
|
|
|
|
#ifdef XP_OS2
|
|
#define INCL_DOS
|
|
#endif
|
|
#include "intlpriv.h"
|
|
|
|
#include <stdio.h>
|
|
#include "xp.h"
|
|
#include "intl_csi.h"
|
|
|
|
#ifdef XP_MAC
|
|
#include "resgui.h"
|
|
#endif
|
|
|
|
/* for XP_GetString() */
|
|
#include "xpgetstr.h"
|
|
extern int MK_OUT_OF_MEMORY;
|
|
|
|
|
|
/*
|
|
IMPORTANT NOTE:
|
|
|
|
mz_euc2euc
|
|
mz_b52b5
|
|
mz_cns2cns
|
|
mz_ksc2ksc
|
|
mz_sjis2sjis
|
|
mz_utf82utf8
|
|
|
|
is now replaced by mz_mbNullConv
|
|
we eventually should replacing mz_hz2gb after we extract the hz -> gb conversion
|
|
*/
|
|
MODULE_PRIVATE unsigned char* mz_euctwtob5( CCCDataObject obj, const unsigned char *in, int32 insize);
|
|
MODULE_PRIVATE unsigned char* mz_b5toeuctw( CCCDataObject obj, const unsigned char *in, int32 insize);
|
|
|
|
PRIVATE unsigned char *
|
|
mz_hz2gb(CCCDataObject obj, const unsigned char *kscbuf, int32 kscbufsz);
|
|
|
|
PRIVATE unsigned char *
|
|
mz_mbNullConv(CCCDataObject obj, const unsigned char *buf, int32 bufsz);
|
|
|
|
/* intl_CharLenFunc is designed to used with mz_mbNullConv */
|
|
typedef int16 (*intl_CharLenFunc) ( unsigned char ch);
|
|
PRIVATE int16 intl_CharLen_SJIS( unsigned char ch);
|
|
PRIVATE int16 intl_CharLen_EUC_JP( unsigned char ch);
|
|
PRIVATE int16 intl_CharLen_CGK( unsigned char ch);
|
|
PRIVATE int16 intl_CharLen_CNS_8BIT( unsigned char ch);
|
|
PRIVATE int16 intl_CharLen_UTF8( unsigned char ch);
|
|
PRIVATE int16 intl_CharLen_SingleByte(unsigned char ch);
|
|
PRIVATE int16 intl_CharLen_T61(unsigned char ch);
|
|
|
|
#define INTL_CHARLEN_SJIS 0
|
|
#define INTL_CHARLEN_EUC_JP 1
|
|
#define INTL_CHARLEN_CGK 2
|
|
#define INTL_CHARLEN_CNS_8BIT 3
|
|
#define INTL_CHARLEN_UTF8 4
|
|
#define INTL_CHARLEN_SINGLEBYTE 5
|
|
#define INTL_CHARLEN_T61 6
|
|
/* a conversion flag for JIS, set if converting hankaku (1byte) kana to zenkaku (2byte) */
|
|
#define INTL_SEND_HANKAKU_KANA 128
|
|
|
|
PRIVATE intl_CharLenFunc intl_char_len_func[]=
|
|
{
|
|
intl_CharLen_SJIS,
|
|
intl_CharLen_EUC_JP,
|
|
intl_CharLen_CGK,
|
|
intl_CharLen_CNS_8BIT,
|
|
intl_CharLen_UTF8,
|
|
intl_CharLen_SingleByte,
|
|
intl_CharLen_T61,
|
|
};
|
|
|
|
#ifdef XP_UNIX
|
|
PRIVATE XP_Bool haveBig5 = FALSE;
|
|
PRIVATE XP_Bool have88595 = FALSE;
|
|
PRIVATE XP_Bool have1251 = FALSE;
|
|
PRIVATE XP_Bool haveKOI8R = FALSE;
|
|
PRIVATE XP_Bool haveKOI8U = FALSE;
|
|
#endif
|
|
|
|
PRIVATE int16 *availableFontCharSets = NULL;
|
|
|
|
|
|
/* Table that maps the FROM char, codeset to all other relevant info:
|
|
* - TO character codeset
|
|
* - Fonts (fixe & proportional) for TO character codeset
|
|
* - Type of conversion (func for Win/Mac, value for X)
|
|
* - Argument for conversion routine. Routine-defined.
|
|
*
|
|
* Not all of these may be available. Depends upon available fonts,
|
|
* scripts, codepages, etc. Need to query system to build valid table.
|
|
*
|
|
* What info do I need to make the font change API on the 3 platforms?
|
|
* Is just a 32bit font ID sufficient?
|
|
*
|
|
* Some X Windows can render Japanese in either EUC or SJIS, how do we
|
|
* choose?
|
|
*/
|
|
/* The ***first*** match of a "FROM" encoding (1st col.) will be
|
|
* used as the URL->native encoding. Be careful of the
|
|
* ordering.
|
|
* Additional entries for the same "FROM" encoding, specifies
|
|
* how to convert going out (e.g., sending mail, news or forms).
|
|
*/
|
|
|
|
/*
|
|
What is the flag mean ?
|
|
|
|
For Mac the flag in One2OneCCC is the resouce number of a 256 byte mapping table
|
|
For all platform the flag in mz_mbNullConv is a pointer to a intl_CharLenFunc routine
|
|
|
|
*/
|
|
#ifdef XP_MAC
|
|
MODULE_PRIVATE cscvt_t cscvt_tbl[] = {
|
|
/* SINGLE BYTE */
|
|
/* LATIN1 */
|
|
{CS_LATIN1, CS_MAC_ROMAN, 0, (CCCFunc)One2OneCCC, xlat_LATIN1_TO_MAC_ROMAN},
|
|
{CS_ASCII, CS_MAC_ROMAN, 0, (CCCFunc)One2OneCCC, xlat_LATIN1_TO_MAC_ROMAN},
|
|
{CS_MAC_ROMAN, CS_MAC_ROMAN, 0, (CCCFunc)0, 0},
|
|
{CS_MAC_ROMAN, CS_LATIN1, 0, (CCCFunc)One2OneCCC, xlat_MAC_ROMAN_TO_LATIN1},
|
|
{CS_MAC_ROMAN, CS_ASCII, 0, (CCCFunc)One2OneCCC, xlat_MAC_ROMAN_TO_LATIN1},
|
|
|
|
/* LATIN2 */
|
|
{CS_LATIN2, CS_MAC_CE, 0, (CCCFunc)One2OneCCC, xlat_LATIN2_TO_MAC_CE},
|
|
{CS_MAC_CE, CS_MAC_CE, 0, (CCCFunc)0, 0},
|
|
{CS_MAC_CE, CS_LATIN2, 0, (CCCFunc)One2OneCCC, xlat_MAC_CE_TO_LATIN2},
|
|
{CS_MAC_CE, CS_ASCII, 0, (CCCFunc)One2OneCCC, xlat_MAC_CE_TO_LATIN2},
|
|
|
|
{CS_CP_1250, CS_MAC_CE, 0, (CCCFunc)One2OneCCC, xlat_CP_1250_TO_MAC_CE},
|
|
{CS_MAC_CE, CS_CP_1250, 0, (CCCFunc)One2OneCCC, xlat_MAC_CE_TO_CP_1250},
|
|
|
|
/* CYRILLIC */
|
|
{CS_8859_5, CS_MAC_CYRILLIC,0, (CCCFunc)One2OneCCC, xlat_8859_5_TO_MAC_CYRILLIC},
|
|
{CS_MAC_CYRILLIC,CS_MAC_CYRILLIC, 0, (CCCFunc)0, 0},
|
|
{CS_MAC_CYRILLIC,CS_8859_5, 0, (CCCFunc)One2OneCCC, xlat_MAC_CYRILLIC_TO_8859_5},
|
|
{CS_MAC_CYRILLIC,CS_ASCII, 0, (CCCFunc)One2OneCCC, xlat_MAC_CYRILLIC_TO_8859_5},
|
|
|
|
{CS_CP_1251, CS_MAC_CYRILLIC,0, (CCCFunc)One2OneCCC, xlat_CP_1251_TO_MAC_CYRILLIC},
|
|
{CS_MAC_CYRILLIC,CS_CP_1251, 0, (CCCFunc)One2OneCCC, xlat_MAC_CYRILLIC_TO_CP_1251},
|
|
|
|
{CS_KOI8_R, CS_MAC_CYRILLIC,0, (CCCFunc)One2OneCCC, xlat_KOI8_R_TO_MAC_CYRILLIC},
|
|
{CS_MAC_CYRILLIC,CS_KOI8_R, 0, (CCCFunc)One2OneCCC, xlat_MAC_CYRILLIC_TO_KOI8_R},
|
|
|
|
/* GREEK */
|
|
{CS_8859_7, CS_MAC_GREEK, 0, (CCCFunc)One2OneCCC, xlat_8859_7_TO_MAC_GREEK},
|
|
{CS_MAC_GREEK, CS_MAC_GREEK, 0, (CCCFunc)0, 0},
|
|
{CS_MAC_GREEK, CS_8859_7, 0, (CCCFunc)One2OneCCC, xlat_MAC_GREEK_TO_8859_7},
|
|
{CS_MAC_GREEK, CS_ASCII, 0, (CCCFunc)One2OneCCC, xlat_MAC_GREEK_TO_8859_7},
|
|
|
|
{CS_CP_1253, CS_MAC_GREEK, 0, (CCCFunc)One2OneCCC, xlat_CP_1253_TO_MAC_GREEK},
|
|
{CS_MAC_GREEK, CS_CP_1253, 0, (CCCFunc)One2OneCCC, xlat_MAC_GREEK_TO_CP_1253},
|
|
|
|
/* TURKISH */
|
|
{CS_8859_9, CS_MAC_TURKISH, 0, (CCCFunc)One2OneCCC, xlat_8859_9_TO_MAC_TURKISH},
|
|
{CS_MAC_TURKISH,CS_MAC_TURKISH, 0, (CCCFunc)0, 0},
|
|
{CS_MAC_TURKISH,CS_8859_9, 0, (CCCFunc)One2OneCCC, xlat_MAC_TURKISH_TO_8859_9},
|
|
{CS_MAC_TURKISH,CS_ASCII, 0, (CCCFunc)One2OneCCC, xlat_MAC_TURKISH_TO_8859_9},
|
|
|
|
/* THAI */
|
|
{CS_TIS620, CS_TIS620, 0, (CCCFunc)0, 0},
|
|
|
|
/* MULTIBYTE */
|
|
/* JAPANESE */
|
|
{CS_SJIS, CS_SJIS, 1, (CCCFunc)mz_mbNullConv, INTL_CHARLEN_SJIS },
|
|
{CS_SJIS, CS_JIS, 1, (CCCFunc)mz_sjis2jis, 0},
|
|
{CS_JIS, CS_SJIS, 1, (CCCFunc)jis2other, 0},
|
|
{CS_EUCJP, CS_SJIS, 1, (CCCFunc)mz_euc2sjis, 0},
|
|
{CS_JIS, CS_EUCJP, 1, (CCCFunc)jis2other, 1},
|
|
{CS_EUCJP, CS_JIS, 1, (CCCFunc)mz_euc2jis, 0},
|
|
{CS_SJIS, CS_EUCJP, 1, (CCCFunc)mz_sjis2euc, 0},
|
|
/* auto-detect Japanese conversions */
|
|
{CS_SJIS_AUTO, CS_SJIS, 1, (CCCFunc)autoJCCC, 0},
|
|
{CS_JIS_AUTO, CS_SJIS, 1, (CCCFunc)autoJCCC, 0},
|
|
{CS_EUCJP_AUTO, CS_SJIS, 1, (CCCFunc)autoJCCC, 0},
|
|
|
|
/* KOREAN */
|
|
{CS_KSC_8BIT, CS_KSC_8BIT, 0, (CCCFunc)mz_mbNullConv, INTL_CHARLEN_CGK },
|
|
{CS_2022_KR, CS_KSC_8BIT, 0, (CCCFunc)mz_iso2euckr, 0},
|
|
{CS_KSC_8BIT, CS_2022_KR, 0, (CCCFunc)mz_euckr2iso, 0},
|
|
/* auto-detect Korean conversions */
|
|
{CS_KSC_8BIT_AUTO, CS_KSC_8BIT,1, (CCCFunc)autoKCCC, 0},
|
|
{(CS_2022_KR|CS_AUTO) , CS_KSC_8BIT,1, (CCCFunc)autoKCCC, 0},
|
|
{CS_KSC5601, CS_KSC_8BIT, 0, (CCCFunc)mz_mbNullConv, INTL_CHARLEN_CGK},
|
|
{CS_KSC_8BIT, CS_KSC5601, 0, (CCCFunc)mz_mbNullConv, INTL_CHARLEN_CGK},
|
|
|
|
/* SIMPLIFIED CHINESE */
|
|
{CS_GB_8BIT, CS_GB_8BIT, 0, (CCCFunc)mz_hz2gb, 0},
|
|
{CS_GB2312, CS_GB_8BIT, 0, (CCCFunc)mz_mbNullConv, INTL_CHARLEN_CGK},
|
|
{CS_GB_8BIT, CS_GB2312, 0, (CCCFunc)mz_mbNullConv, INTL_CHARLEN_CGK},
|
|
|
|
{CS_HZ, CS_GB_8BIT, 0, (CCCFunc)mz_hz2gb, 0},
|
|
/* we need gb2hz routine to complete hz support */
|
|
|
|
/* TRADITIONAL CHINESE */
|
|
{CS_BIG5, CS_BIG5, 0, (CCCFunc)mz_mbNullConv, INTL_CHARLEN_CGK },
|
|
{CS_BIG5, CS_CNS_8BIT, 0, (CCCFunc)mz_b5toeuctw, 0},
|
|
{CS_CNS_8BIT, CS_BIG5, 0, (CCCFunc)mz_euctwtob5, 0},
|
|
{CS_X_BIG5, CS_BIG5, 0, (CCCFunc)mz_mbNullConv, INTL_CHARLEN_CGK},
|
|
{CS_BIG5, CS_X_BIG5, 0, (CCCFunc)mz_mbNullConv, INTL_CHARLEN_CGK},
|
|
|
|
/* UNICODE */
|
|
{CS_UTF8, CS_UTF8, 0, (CCCFunc)mz_mbNullConv, INTL_CHARLEN_UTF8 },
|
|
|
|
{CS_T61, CS_UTF8, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_T61},
|
|
{CS_UTF8, CS_T61, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_UTF8},
|
|
|
|
{CS_UTF8, CS_UCS2, 0, (CCCFunc)mz_utf82ucs, 0},
|
|
{CS_UTF8, CS_UTF7, 0, (CCCFunc)mz_utf82utf7, 0},
|
|
{CS_UTF8, CS_UCS2_SWAP, 0, (CCCFunc)mz_utf82ucsswap, 0},
|
|
{CS_UTF8, CS_IMAP4_UTF7, 0, (CCCFunc)mz_utf82imap4utf7, 0},
|
|
{CS_UCS2, CS_UTF8, 0, (CCCFunc)mz_ucs2utf8, 0},
|
|
{CS_UCS2, CS_UTF7, 0, (CCCFunc)mz_ucs2utf7, 0},
|
|
{CS_UCS2_SWAP, CS_UTF8, 0, (CCCFunc)mz_ucs2utf8, 0},
|
|
{CS_UCS2_SWAP, CS_UTF7, 0, (CCCFunc)mz_ucs2utf7, 0},
|
|
{CS_UTF7, CS_UTF8, 0, (CCCFunc)mz_utf72utf8, 0},
|
|
{CS_IMAP4_UTF7, CS_UTF8, 0, (CCCFunc)mz_imap4utf72utf8, 0},
|
|
|
|
{CS_MAC_ROMAN, CS_UTF8, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_SINGLEBYTE},
|
|
{CS_LATIN1, CS_UTF8, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_SINGLEBYTE},
|
|
{CS_MAC_CE, CS_UTF8, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_SINGLEBYTE},
|
|
{CS_MAC_CYRILLIC, CS_UTF8, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_SINGLEBYTE},
|
|
{CS_KOI8_R, CS_UTF8, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_SINGLEBYTE},
|
|
{CS_MAC_GREEK, CS_UTF8, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_SINGLEBYTE},
|
|
{CS_MAC_TURKISH, CS_UTF8, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_SINGLEBYTE},
|
|
{CS_SJIS, CS_UTF8, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_SJIS},
|
|
{CS_KSC_8BIT, CS_UTF8, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_CGK},
|
|
{CS_BIG5, CS_UTF8, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_CGK},
|
|
{CS_GB_8BIT, CS_UTF8, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_CGK},
|
|
{CS_TIS620, CS_UTF8, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_SINGLEBYTE},
|
|
|
|
{CS_UTF8, CS_MAC_ROMAN, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_UTF8},
|
|
{CS_UTF8, CS_LATIN1, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_UTF8},
|
|
{CS_UTF8, CS_MAC_CE, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_UTF8},
|
|
{CS_UTF8, CS_MAC_CYRILLIC,0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_UTF8},
|
|
{CS_UTF8, CS_KOI8_R, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_UTF8},
|
|
{CS_UTF8, CS_MAC_GREEK, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_UTF8},
|
|
{CS_UTF8, CS_MAC_TURKISH, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_UTF8},
|
|
{CS_UTF8, CS_SJIS, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_UTF8},
|
|
{CS_UTF8, CS_KSC_8BIT, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_UTF8},
|
|
{CS_UTF8, CS_BIG5, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_UTF8},
|
|
{CS_UTF8, CS_GB_8BIT, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_UTF8},
|
|
{CS_UTF8, CS_TIS620, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_UTF8},
|
|
|
|
|
|
{CS_USER_DEFINED_ENCODING, CS_USER_DEFINED_ENCODING, 0, (CCCFunc)0, 0},
|
|
{0, 0, 1, (CCCFunc)0, 0}
|
|
};
|
|
|
|
#endif /* XP_MAC */
|
|
|
|
#if defined(XP_WIN) || defined(XP_OS2)
|
|
MODULE_PRIVATE cscvt_t cscvt_tbl[] = {
|
|
/* SINGLE BYTE */
|
|
/* LATIN1 */
|
|
{CS_LATIN1, CS_LATIN1, 0, (CCCFunc)0, 0},
|
|
{CS_LATIN1, CS_ASCII, 0, (CCCFunc)0, 0},
|
|
{CS_ASCII, CS_LATIN1, 0, (CCCFunc)0, 0},
|
|
{CS_ASCII, CS_ASCII, 0, (CCCFunc)0, 0},
|
|
|
|
/* LATIN2 */
|
|
{CS_CP_1250, CS_CP_1250, 0, (CCCFunc)0, 0},
|
|
{CS_CP_1250, CS_LATIN2, 0, (CCCFunc)One2OneCCC, 0},
|
|
{CS_LATIN2, CS_CP_1250, 0, (CCCFunc)One2OneCCC, 0},
|
|
{CS_LATIN2, CS_LATIN2, 0, (CCCFunc)0, 0},
|
|
{CS_LATIN2, CS_ASCII, 0, (CCCFunc)0, 0},
|
|
|
|
/* CYRILLIC */
|
|
{CS_CP_1251, CS_CP_1251, 0, (CCCFunc)0, 0},
|
|
{CS_8859_5, CS_CP_1251, 0, (CCCFunc)One2OneCCC, 0},
|
|
{CS_CP_1251, CS_8859_5, 0, (CCCFunc)One2OneCCC, 0},
|
|
{CS_CP_1251, CS_CP_1251, 0, (CCCFunc)0, 0},
|
|
|
|
{CS_KOI8_R, CS_CP_1251, 0, (CCCFunc)One2OneCCC, 0},
|
|
{CS_CP_1251, CS_KOI8_R, 0, (CCCFunc)One2OneCCC, 0},
|
|
|
|
/* ARMENIAN */
|
|
{CS_ARMSCII8, CS_ARMSCII8, 0, (CCCFunc)0, 0},
|
|
|
|
/* THAI */
|
|
{CS_TIS620, CS_TIS620, 0, (CCCFunc)0, 0},
|
|
|
|
/* GREEK */
|
|
{CS_CP_1253, CS_CP_1253, 0, (CCCFunc)0, 0},
|
|
{CS_CP_1253, CS_8859_7, 0, (CCCFunc)One2OneCCC, 0},
|
|
{CS_8859_7, CS_CP_1253, 0, (CCCFunc)One2OneCCC, 0},
|
|
{CS_8859_7, CS_8859_7, 0, (CCCFunc)0, 0},
|
|
|
|
/* TURKISH */
|
|
#ifdef XP_OS2
|
|
{CS_CP_1254, CS_CP_1254, 0, (CCCFunc)0, 0},
|
|
{CS_CP_1254, CS_8859_9, 0, (CCCFunc)One2OneCCC, 0},
|
|
{CS_8859_9, CS_CP_1254, 0, (CCCFunc)One2OneCCC, 0},
|
|
#endif
|
|
{CS_8859_9, CS_8859_9, 0, (CCCFunc)0, 0},
|
|
|
|
/* MULTIBYTE */
|
|
/* JAPANESE */
|
|
{CS_SJIS, CS_SJIS, 1, (CCCFunc)mz_mbNullConv, INTL_CHARLEN_SJIS},
|
|
{CS_SJIS, CS_JIS, 1, (CCCFunc)mz_sjis2jis, 0},
|
|
{CS_JIS, CS_SJIS, 1, (CCCFunc)jis2other, 0},
|
|
{CS_EUCJP, CS_SJIS, 1, (CCCFunc)mz_euc2sjis, 0},
|
|
{CS_JIS, CS_EUCJP, 1, (CCCFunc)jis2other, 1},
|
|
{CS_EUCJP, CS_JIS, 1, (CCCFunc)mz_euc2jis, 0},
|
|
{CS_SJIS, CS_EUCJP, 1, (CCCFunc)mz_sjis2euc, 0},
|
|
/* auto-detect Japanese conversions */
|
|
{CS_SJIS_AUTO, CS_SJIS, 1, (CCCFunc)autoJCCC, 0},
|
|
{CS_JIS_AUTO, CS_SJIS, 1, (CCCFunc)autoJCCC, 0},
|
|
{CS_EUCJP_AUTO, CS_SJIS, 1, (CCCFunc)autoJCCC, 0},
|
|
|
|
/* KOREAN */
|
|
{CS_KSC_8BIT, CS_KSC_8BIT, 0, (CCCFunc)mz_mbNullConv, INTL_CHARLEN_CGK},
|
|
{CS_2022_KR, CS_KSC_8BIT, 0, (CCCFunc)mz_iso2euckr, 0},
|
|
{CS_KSC_8BIT, CS_2022_KR, 0, (CCCFunc)mz_euckr2iso, 0},
|
|
/* auto-detect Korean conversions */
|
|
{CS_KSC_8BIT_AUTO, CS_KSC_8BIT,1, (CCCFunc)autoKCCC, 0},
|
|
{(CS_2022_KR|CS_AUTO) , CS_KSC_8BIT,1, (CCCFunc)autoKCCC, 0},
|
|
{CS_KSC5601, CS_KSC_8BIT, 0, (CCCFunc)mz_mbNullConv, INTL_CHARLEN_CGK},
|
|
{CS_KSC_8BIT, CS_KSC5601, 0, (CCCFunc)mz_mbNullConv, INTL_CHARLEN_CGK},
|
|
|
|
/* SIMPLIFIED CHINESE */
|
|
{CS_GB_8BIT, CS_GB_8BIT, 0, (CCCFunc)mz_hz2gb, 0},
|
|
{CS_GB2312, CS_GB_8BIT, 0, (CCCFunc)mz_mbNullConv, INTL_CHARLEN_CGK},
|
|
{CS_GB_8BIT, CS_GB2312, 0, (CCCFunc)mz_mbNullConv, INTL_CHARLEN_CGK},
|
|
|
|
{CS_HZ, CS_GB_8BIT, 0, (CCCFunc)mz_hz2gb, 0},
|
|
/* we need gb2hz routine to complete hz support */
|
|
|
|
/* TRADITIONAL CHINESE */
|
|
{CS_BIG5, CS_BIG5, 0, (CCCFunc)mz_mbNullConv, INTL_CHARLEN_CGK},
|
|
|
|
{CS_BIG5, CS_CNS_8BIT, 0, (CCCFunc)mz_b5toeuctw, 0},
|
|
{CS_CNS_8BIT, CS_BIG5, 0, (CCCFunc)mz_euctwtob5, 0},
|
|
|
|
{CS_X_BIG5, CS_BIG5, 0, (CCCFunc)mz_mbNullConv, INTL_CHARLEN_CGK},
|
|
{CS_BIG5, CS_X_BIG5, 0, (CCCFunc)mz_mbNullConv, INTL_CHARLEN_CGK},
|
|
|
|
|
|
/* UNICODE */
|
|
{CS_UTF8, CS_UTF8, 0, (CCCFunc)mz_mbNullConv, INTL_CHARLEN_UTF8},
|
|
/*************** Vietnamese ****************/
|
|
{CS_VIET_VIQR, CS_VIET_VIQR, 0, (CCCFunc)0, 0},
|
|
{CS_VIET_VISCII, CS_VIET_VISCII, 0, (CCCFunc)0, 0},
|
|
{CS_VIET_VPS, CS_VIET_VPS, 0, (CCCFunc)0, 0},
|
|
{CS_VIET_TCVN, CS_VIET_TCVN, 0, (CCCFunc)0, 0},
|
|
{CS_VIET_VNI, CS_VIET_VNI, 0, (CCCFunc)0, 0},
|
|
{CS_CP_1258, CS_CP_1258, 0, (CCCFunc)0, 0},
|
|
|
|
{CS_VIET_VIQR, CS_VIET_VISCII, 0, (CCCFunc)viet_any_2_any, INTL_CHARLEN_SINGLEBYTE},
|
|
{CS_VIET_VIQR, CS_VIET_VPS, 0, (CCCFunc)viet_any_2_any, INTL_CHARLEN_SINGLEBYTE},
|
|
{CS_VIET_VIQR, CS_VIET_TCVN, 0, (CCCFunc)viet_any_2_any, INTL_CHARLEN_SINGLEBYTE},
|
|
{CS_VIET_VIQR, CS_VIET_VNI, 0, (CCCFunc)viet_any_2_any, INTL_CHARLEN_SINGLEBYTE},
|
|
{CS_VIET_VIQR, CS_CP_1258, 0, (CCCFunc)viet_any_2_any, INTL_CHARLEN_SINGLEBYTE},
|
|
|
|
{CS_VIET_VISCII, CS_VIET_VIQR, 0, (CCCFunc)viet_any_2_any, INTL_CHARLEN_SINGLEBYTE},
|
|
{CS_VIET_VISCII, CS_VIET_VPS, 0, (CCCFunc)viet_any_2_any, INTL_CHARLEN_SINGLEBYTE},
|
|
{CS_VIET_VISCII, CS_VIET_TCVN, 0, (CCCFunc)viet_any_2_any, INTL_CHARLEN_SINGLEBYTE},
|
|
{CS_VIET_VISCII, CS_VIET_VNI, 0, (CCCFunc)viet_any_2_any, INTL_CHARLEN_SINGLEBYTE},
|
|
{CS_VIET_VISCII, CS_CP_1258, 0, (CCCFunc)viet_any_2_any, INTL_CHARLEN_SINGLEBYTE},
|
|
|
|
{CS_VIET_VPS, CS_VIET_VIQR, 0, (CCCFunc)viet_any_2_any, INTL_CHARLEN_SINGLEBYTE},
|
|
{CS_VIET_VPS, CS_VIET_VISCII, 0, (CCCFunc)viet_any_2_any, INTL_CHARLEN_SINGLEBYTE},
|
|
{CS_VIET_VPS, CS_VIET_TCVN, 0, (CCCFunc)viet_any_2_any, INTL_CHARLEN_SINGLEBYTE},
|
|
{CS_VIET_VPS, CS_VIET_VNI, 0, (CCCFunc)viet_any_2_any, INTL_CHARLEN_SINGLEBYTE},
|
|
{CS_VIET_VPS, CS_CP_1258, 0, (CCCFunc)viet_any_2_any, INTL_CHARLEN_SINGLEBYTE},
|
|
|
|
{CS_VIET_TCVN, CS_VIET_VIQR, 0, (CCCFunc)viet_any_2_any, INTL_CHARLEN_SINGLEBYTE},
|
|
{CS_VIET_TCVN, CS_VIET_VISCII, 0, (CCCFunc)viet_any_2_any, INTL_CHARLEN_SINGLEBYTE},
|
|
{CS_VIET_TCVN, CS_VIET_VPS, 0, (CCCFunc)viet_any_2_any, INTL_CHARLEN_SINGLEBYTE},
|
|
{CS_VIET_TCVN, CS_VIET_VNI, 0, (CCCFunc)viet_any_2_any, INTL_CHARLEN_SINGLEBYTE},
|
|
{CS_VIET_TCVN, CS_CP_1258, 0, (CCCFunc)viet_any_2_any, INTL_CHARLEN_SINGLEBYTE},
|
|
|
|
{CS_VIET_VNI, CS_VIET_VIQR, 0, (CCCFunc)viet_any_2_any, INTL_CHARLEN_SINGLEBYTE},
|
|
{CS_VIET_VNI, CS_VIET_VISCII, 0, (CCCFunc)viet_any_2_any, INTL_CHARLEN_SINGLEBYTE},
|
|
{CS_VIET_VNI, CS_VIET_VPS, 0, (CCCFunc)viet_any_2_any, INTL_CHARLEN_SINGLEBYTE},
|
|
{CS_VIET_VNI, CS_VIET_TCVN, 0, (CCCFunc)viet_any_2_any, INTL_CHARLEN_SINGLEBYTE},
|
|
{CS_VIET_VNI, CS_CP_1258, 0, (CCCFunc)viet_any_2_any, INTL_CHARLEN_SINGLEBYTE},
|
|
|
|
{CS_CP_1258, CS_VIET_VIQR, 0, (CCCFunc)viet_any_2_any, INTL_CHARLEN_SINGLEBYTE},
|
|
{CS_CP_1258, CS_VIET_VISCII, 0, (CCCFunc)viet_any_2_any, INTL_CHARLEN_SINGLEBYTE},
|
|
{CS_CP_1258, CS_VIET_VPS, 0, (CCCFunc)viet_any_2_any, INTL_CHARLEN_SINGLEBYTE},
|
|
{CS_CP_1258, CS_VIET_TCVN, 0, (CCCFunc)viet_any_2_any, INTL_CHARLEN_SINGLEBYTE},
|
|
{CS_CP_1258, CS_VIET_VNI, 0, (CCCFunc)viet_any_2_any, INTL_CHARLEN_SINGLEBYTE},
|
|
|
|
{CS_VIET_VIQR, CS_UTF8, 0, (CCCFunc)viet_any_2_any, INTL_CHARLEN_SINGLEBYTE},
|
|
{CS_VIET_VISCII, CS_UTF8, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_SINGLEBYTE},
|
|
{CS_UTF8, CS_VIET_VISCII, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_UTF8},
|
|
|
|
|
|
{CS_T61, CS_UTF8, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_T61},
|
|
{CS_UTF8, CS_T61, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_UTF8},
|
|
|
|
{CS_UTF8, CS_UCS2, 0, (CCCFunc)mz_utf82ucs, 0},
|
|
{CS_UTF8, CS_UTF7, 0, (CCCFunc)mz_utf82utf7, 0},
|
|
{CS_UTF8, CS_UCS2_SWAP, 0, (CCCFunc)mz_utf82ucsswap, 0},
|
|
{CS_UCS2, CS_UTF8, 0, (CCCFunc)mz_ucs2utf8, 0},
|
|
{CS_UCS2, CS_UTF7, 0, (CCCFunc)mz_ucs2utf7, 0},
|
|
{CS_UTF8, CS_IMAP4_UTF7, 0, (CCCFunc)mz_utf82imap4utf7, 0},
|
|
{CS_UCS2_SWAP, CS_UTF8, 0, (CCCFunc)mz_ucs2utf8, 0},
|
|
{CS_UCS2_SWAP, CS_UTF7, 0, (CCCFunc)mz_ucs2utf7, 0},
|
|
{CS_UTF7, CS_UTF8, 0, (CCCFunc)mz_utf72utf8, 0},
|
|
{CS_IMAP4_UTF7, CS_UTF8, 0, (CCCFunc)mz_imap4utf72utf8, 0},
|
|
|
|
{CS_LATIN1, CS_UTF8, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_SINGLEBYTE},
|
|
{CS_CP_1250, CS_UTF8, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_SINGLEBYTE},
|
|
{CS_CP_1251, CS_UTF8, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_SINGLEBYTE},
|
|
{CS_CP_1253, CS_UTF8, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_SINGLEBYTE},
|
|
{CS_8859_9, CS_UTF8, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_SINGLEBYTE},
|
|
{CS_SJIS, CS_UTF8, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_SJIS},
|
|
{CS_KSC_8BIT, CS_UTF8, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_CGK},
|
|
{CS_BIG5, CS_UTF8, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_CGK},
|
|
{CS_GB_8BIT, CS_UTF8, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_CGK},
|
|
{CS_TIS620, CS_UTF8, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_SINGLEBYTE},
|
|
|
|
{CS_UTF8, CS_LATIN1, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_UTF8},
|
|
{CS_UTF8, CS_CP_1250, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_UTF8},
|
|
{CS_UTF8, CS_CP_1251, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_UTF8},
|
|
{CS_UTF8, CS_CP_1253, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_UTF8},
|
|
{CS_UTF8, CS_8859_9, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_UTF8},
|
|
{CS_UTF8, CS_SJIS, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_UTF8},
|
|
{CS_UTF8, CS_KSC_8BIT, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_UTF8},
|
|
{CS_UTF8, CS_BIG5, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_UTF8},
|
|
{CS_UTF8, CS_GB_8BIT, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_UTF8},
|
|
{CS_UTF8, CS_TIS620, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_UTF8},
|
|
|
|
#ifdef XP_OS2
|
|
/*
|
|
* Define additional codepage conversions for OS/2. All of these use the unicode
|
|
* based conversion tables.
|
|
*/
|
|
/* Thai */
|
|
{CS_CP_874, CS_CP_874, 0, (CCCFunc)0, 0},
|
|
{CS_CP_874, CS_UTF8, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_SINGLEBYTE},
|
|
{CS_UTF8, CS_CP_874, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_UTF8},
|
|
|
|
/* Baltic */
|
|
{CS_CP_1257, CS_CP_1257, 0, (CCCFunc)0, 0},
|
|
{CS_CP_1257, CS_UTF8, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_SINGLEBYTE},
|
|
{CS_UTF8, CS_CP_1257, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_UTF8},
|
|
|
|
/* Hebrew */
|
|
{CS_CP_862, CS_CP_862, 0, (CCCFunc)0, 0},
|
|
{CS_CP_862, CS_UTF8, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_SINGLEBYTE},
|
|
{CS_UTF8, CS_CP_862, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_UTF8},
|
|
|
|
/* Arabic */
|
|
{CS_CP_864, CS_CP_864, 0, (CCCFunc)0, 0},
|
|
{CS_CP_864, CS_UTF8, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_SINGLEBYTE},
|
|
{CS_UTF8, CS_CP_864, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_UTF8},
|
|
|
|
/* PC codepages - Default convert to windows codepages */
|
|
{CS_CP_850, CS_LATIN1, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_SINGLEBYTE},
|
|
{CS_CP_852, CS_LATIN2, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_SINGLEBYTE},
|
|
{CS_CP_855, CS_CP_1251, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_SINGLEBYTE},
|
|
{CS_CP_857, CS_CP_1254, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_SINGLEBYTE},
|
|
{CS_CP_866, CS_CP_1251, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_SINGLEBYTE},
|
|
|
|
{CS_CP_850, CS_CP_850, 0, (CCCFunc)0, 0},
|
|
{CS_CP_852, CS_CP_852, 0, (CCCFunc)0, 0},
|
|
{CS_CP_855, CS_CP_855, 0, (CCCFunc)0, 0},
|
|
{CS_CP_857, CS_CP_857, 0, (CCCFunc)0, 0},
|
|
{CS_CP_866, CS_CP_866, 0, (CCCFunc)0, 0},
|
|
|
|
{CS_LATIN1, CS_CP_850, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_SINGLEBYTE},
|
|
{CS_LATIN2, CS_CP_852, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_SINGLEBYTE},
|
|
{CS_CP_1251, CS_CP_855, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_SINGLEBYTE},
|
|
{CS_CP_1254, CS_CP_857, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_SINGLEBYTE},
|
|
{CS_CP_1251, CS_CP_866, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_SINGLEBYTE},
|
|
|
|
{CS_CP_850, CS_UTF8, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_SINGLEBYTE},
|
|
{CS_CP_852, CS_UTF8, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_SINGLEBYTE},
|
|
{CS_CP_855, CS_UTF8, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_SINGLEBYTE},
|
|
{CS_CP_857, CS_UTF8, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_SINGLEBYTE},
|
|
{CS_CP_866, CS_UTF8, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_SINGLEBYTE},
|
|
{CS_KOI8_R, CS_UTF8, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_SINGLEBYTE},
|
|
|
|
{CS_UTF8, CS_CP_850, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_UTF8},
|
|
{CS_UTF8, CS_CP_852, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_UTF8},
|
|
{CS_UTF8, CS_CP_855, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_UTF8},
|
|
{CS_UTF8, CS_CP_857, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_UTF8},
|
|
{CS_UTF8, CS_CP_866, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_UTF8},
|
|
{CS_UTF8, CS_KOI8_R, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_UTF8},
|
|
|
|
{CS_MAC_ROMAN, CS_LATIN1, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_SINGLEBYTE},
|
|
{CS_LATIN1, CS_MAC_ROMAN, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_SINGLEBYTE},
|
|
#endif /* XP_OS2 */
|
|
|
|
|
|
{CS_USER_DEFINED_ENCODING, CS_USER_DEFINED_ENCODING, 0, (CCCFunc)0, 0},
|
|
{0, 0, 1, (CCCFunc)0, 0}
|
|
};
|
|
|
|
#endif /* XP_WIN || XP_OS2 */
|
|
#ifdef XP_UNIX
|
|
MODULE_PRIVATE cscvt_t cscvt_tbl[] = {
|
|
/* SINGLE BYTE */
|
|
/* LATIN1 */
|
|
{CS_LATIN1, CS_LATIN1, 0, (CCCFunc)One2OneCCC, 0},
|
|
{CS_LATIN1, CS_ASCII, 0, NULL, 0},
|
|
{CS_ASCII, CS_LATIN1, 0, NULL, 0},
|
|
|
|
/* LATIN2 */
|
|
{CS_LATIN2, CS_LATIN2, 0, NULL, 0},
|
|
{CS_LATIN2, CS_ASCII, 0, NULL, 0},
|
|
{CS_LATIN2, CS_CP_1250, 0, (CCCFunc)One2OneCCC, 0},
|
|
{CS_CP_1250, CS_LATIN2, 0, (CCCFunc)One2OneCCC, 0},
|
|
|
|
/* CYRILLIC */
|
|
{CS_KOI8_R, CS_KOI8_R, 0, NULL, 0},
|
|
{CS_8859_5, CS_8859_5, 0, NULL, 0},
|
|
{CS_CP_1251, CS_CP_1251, 0, NULL, 0},
|
|
|
|
{CS_8859_5, CS_KOI8_R, 0, (CCCFunc)One2OneCCC, 0},
|
|
{CS_KOI8_R, CS_8859_5, 0, (CCCFunc)One2OneCCC, 0},
|
|
|
|
{CS_CP_1251, CS_8859_5, 0, (CCCFunc)One2OneCCC, 0},
|
|
{CS_8859_5, CS_CP_1251, 0, (CCCFunc)One2OneCCC, 0},
|
|
{CS_CP_1251, CS_KOI8_R, 0, (CCCFunc)One2OneCCC, 0},
|
|
{CS_KOI8_R, CS_CP_1251, 0, (CCCFunc)One2OneCCC, 0},
|
|
|
|
/* UKRAINIAN */
|
|
{CS_KOI8_U, CS_KOI8_U, 0, (CCCFunc)0, 0},
|
|
{CS_KOI8_U, CS_KOI8_R, 0, (CCCFunc)One2OneCCC, 0},
|
|
{CS_KOI8_R, CS_KOI8_U, 0, (CCCFunc)One2OneCCC, 0},
|
|
{CS_KOI8_U, CS_8859_5, 0, (CCCFunc)One2OneCCC, 0},
|
|
{CS_8859_5, CS_KOI8_U, 0, (CCCFunc)One2OneCCC, 0},
|
|
{CS_KOI8_U, CS_MAC_CYRILLIC, 0, (CCCFunc)One2OneCCC, 0},
|
|
{CS_MAC_CYRILLIC, CS_KOI8_U, 0, (CCCFunc)One2OneCCC, 0},
|
|
{CS_KOI8_U, CS_CP_1251, 0, (CCCFunc)One2OneCCC, 0},
|
|
{CS_CP_1251, CS_KOI8_U, 0, (CCCFunc)One2OneCCC, 0},
|
|
|
|
|
|
/* ARMENIAN */
|
|
{CS_ARMSCII8, CS_ARMSCII8, 0, (CCCFunc)0, 0},
|
|
|
|
/* THAI */
|
|
{CS_TIS620, CS_TIS620, 0, (CCCFunc)0, 0},
|
|
|
|
/* GREEK */
|
|
{CS_8859_7, CS_8859_7, 0, NULL, 0},
|
|
{CS_8859_7, CS_CP_1253, 0, (CCCFunc)One2OneCCC, 0},
|
|
{CS_CP_1253, CS_8859_7, 0, (CCCFunc)One2OneCCC, 0},
|
|
|
|
/* TURKISH */
|
|
{CS_8859_9, CS_8859_9, 0, NULL, 0},
|
|
|
|
/* MULTIBYTE */
|
|
/* JAPANESE */
|
|
{CS_EUCJP, CS_EUCJP, 1, mz_mbNullConv, INTL_CHARLEN_EUC_JP},
|
|
{CS_JIS, CS_EUCJP, 1, jis2other, 1},
|
|
{CS_SJIS, CS_EUCJP, 1, mz_sjis2euc, 0},
|
|
{CS_EUCJP, CS_SJIS, 1, mz_euc2sjis, 0},
|
|
{CS_JIS, CS_SJIS, 1, jis2other, 0},
|
|
{CS_SJIS, CS_SJIS, 1, mz_mbNullConv, INTL_CHARLEN_SJIS},
|
|
{CS_EUCJP, CS_JIS, 1, mz_euc2jis, 0},
|
|
{CS_SJIS, CS_JIS, 1, mz_sjis2jis, 0},
|
|
/* auto-detect Japanese conversions */
|
|
{CS_JIS_AUTO, CS_EUCJP, 1, autoJCCC, 1},
|
|
{CS_SJIS_AUTO, CS_EUCJP, 1, autoJCCC, 0},
|
|
{CS_EUCJP_AUTO, CS_EUCJP, 1, autoJCCC, 0},
|
|
{CS_EUCJP_AUTO, CS_SJIS, 1, autoJCCC, 0},
|
|
{CS_JIS_AUTO, CS_SJIS, 1, autoJCCC, 0},
|
|
{CS_SJIS_AUTO, CS_SJIS, 1, autoJCCC, 0},
|
|
|
|
/* KOREAN */
|
|
{CS_KSC_8BIT, CS_KSC_8BIT, 0, (CCCFunc)mz_mbNullConv, INTL_CHARLEN_CGK},
|
|
{CS_2022_KR, CS_KSC_8BIT, 0, (CCCFunc)mz_iso2euckr, 0},
|
|
{CS_KSC_8BIT, CS_2022_KR, 0, (CCCFunc)mz_euckr2iso, 0},
|
|
/* auto-detect Korean conversions */
|
|
{CS_KSC_8BIT_AUTO, CS_KSC_8BIT,1, (CCCFunc)autoKCCC, 0},
|
|
{(CS_2022_KR|CS_AUTO) , CS_KSC_8BIT,1, (CCCFunc)autoKCCC, 0},
|
|
{CS_KSC5601, CS_KSC_8BIT, 0, (CCCFunc)mz_mbNullConv, INTL_CHARLEN_CGK},
|
|
{CS_KSC_8BIT, CS_KSC5601, 0, (CCCFunc)mz_mbNullConv, INTL_CHARLEN_CGK},
|
|
|
|
/* SIMPLIFIED CHINESE */
|
|
{CS_GB_8BIT, CS_GB_8BIT, 0, (CCCFunc)mz_hz2gb, 0},
|
|
{CS_GB2312, CS_GB_8BIT, 0, (CCCFunc)mz_mbNullConv, INTL_CHARLEN_CGK},
|
|
{CS_GB_8BIT, CS_GB2312, 0, (CCCFunc)mz_mbNullConv, INTL_CHARLEN_CGK},
|
|
|
|
{CS_HZ, CS_GB_8BIT, 0, (CCCFunc)mz_hz2gb, 0},
|
|
/* we need gb2hz routine to complete hz support */
|
|
|
|
/* TRADITIONAL CHINESE */
|
|
{CS_CNS_8BIT, CS_CNS_8BIT, 0, mz_mbNullConv, INTL_CHARLEN_CNS_8BIT},
|
|
|
|
{CS_BIG5, CS_CNS_8BIT, 0, mz_b5toeuctw, 0},
|
|
{CS_CNS_8BIT, CS_BIG5, 0, mz_euctwtob5, 0},
|
|
{CS_X_BIG5, CS_CNS_8BIT, 0, mz_b5toeuctw, 0},
|
|
{CS_CNS_8BIT, CS_X_BIG5, 0, mz_euctwtob5, 0},
|
|
|
|
{CS_BIG5, CS_BIG5, 0, mz_mbNullConv, INTL_CHARLEN_CGK},
|
|
{CS_X_BIG5, CS_BIG5, 0, (CCCFunc)mz_mbNullConv, INTL_CHARLEN_CGK},
|
|
{CS_BIG5, CS_X_BIG5, 0, (CCCFunc)mz_mbNullConv, INTL_CHARLEN_CGK},
|
|
|
|
{CS_USRDEF2, CS_USRDEF2, 0, NULL, 0},
|
|
|
|
/* UNICODE */
|
|
{CS_UTF8, CS_UTF8, 0, mz_mbNullConv, INTL_CHARLEN_UTF8},
|
|
|
|
{CS_T61, CS_UTF8, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_T61},
|
|
{CS_UTF8, CS_T61, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_UTF8},
|
|
|
|
{CS_UTF8, CS_UCS2, 0, (CCCFunc)mz_utf82ucs, 0},
|
|
{CS_UTF8, CS_UTF7, 0, (CCCFunc)mz_utf82utf7, 0},
|
|
{CS_UTF8, CS_UCS2_SWAP, 0, (CCCFunc)mz_utf82ucsswap, 0},
|
|
{CS_UTF8, CS_IMAP4_UTF7, 0, (CCCFunc)mz_utf82imap4utf7, 0},
|
|
{CS_UCS2, CS_UTF8, 0, (CCCFunc)mz_ucs2utf8, 0},
|
|
{CS_UCS2, CS_UTF7, 0, (CCCFunc)mz_ucs2utf7, 0},
|
|
{CS_UCS2_SWAP, CS_UTF8, 0, (CCCFunc)mz_ucs2utf8, 0},
|
|
{CS_UCS2_SWAP, CS_UTF7, 0, (CCCFunc)mz_ucs2utf7, 0},
|
|
{CS_UTF7, CS_UTF8, 0, (CCCFunc)mz_utf72utf8, 0},
|
|
|
|
{CS_LATIN1, CS_UTF8, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_SINGLEBYTE},
|
|
{CS_LATIN2, CS_UTF8, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_SINGLEBYTE},
|
|
{CS_8859_5, CS_UTF8, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_SINGLEBYTE},
|
|
{CS_KOI8_R, CS_UTF8, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_SINGLEBYTE},
|
|
{CS_KOI8_U, CS_UTF8, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_SINGLEBYTE},
|
|
{CS_8859_7, CS_UTF8, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_SINGLEBYTE},
|
|
{CS_8859_9, CS_UTF8, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_SINGLEBYTE},
|
|
{CS_SJIS, CS_UTF8, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_SJIS},
|
|
{CS_EUCJP, CS_UTF8, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_EUC_JP},
|
|
{CS_KSC_8BIT, CS_UTF8, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_CGK},
|
|
{CS_BIG5, CS_UTF8, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_CGK},
|
|
{CS_CNS_8BIT, CS_UTF8, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_CNS_8BIT},
|
|
{CS_GB_8BIT, CS_UTF8, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_CGK},
|
|
{CS_TIS620, CS_UTF8, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_SINGLEBYTE},
|
|
|
|
{CS_UTF8, CS_LATIN1, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_UTF8},
|
|
{CS_UTF8, CS_LATIN2, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_UTF8},
|
|
{CS_UTF8, CS_8859_5, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_UTF8},
|
|
{CS_UTF8, CS_KOI8_R, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_UTF8},
|
|
{CS_UTF8, CS_KOI8_U, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_UTF8},
|
|
{CS_UTF8, CS_8859_7, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_UTF8},
|
|
{CS_UTF8, CS_8859_9, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_UTF8},
|
|
{CS_UTF8, CS_SJIS, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_UTF8},
|
|
{CS_UTF8, CS_EUCJP, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_UTF8},
|
|
{CS_UTF8, CS_KSC_8BIT, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_UTF8},
|
|
{CS_UTF8, CS_BIG5, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_UTF8},
|
|
{CS_UTF8, CS_CNS_8BIT, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_UTF8},
|
|
{CS_UTF8, CS_GB_8BIT, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_UTF8},
|
|
{CS_UTF8, CS_TIS620, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_UTF8},
|
|
|
|
{CS_IMAP4_UTF7, CS_UTF8, 0, (CCCFunc)mz_imap4utf72utf8, 0},
|
|
{0, 0, 0, NULL, 0}
|
|
|
|
};
|
|
|
|
#endif /* XP_UNIX */
|
|
|
|
|
|
/*
|
|
* this routine is needed to make sure parser and layout see whole
|
|
* characters, not partial characters
|
|
*/
|
|
|
|
typedef enum {
|
|
kHZSingle,
|
|
kHZSingleTild,
|
|
kHZSingleTildLFCR,
|
|
kHZDouble,
|
|
kHZDoubleGet1,
|
|
kHZDoubleTild,
|
|
kGBDoubleGet1 /* hacky state which allow GB pass through */
|
|
} HZ_STATE;
|
|
|
|
PRIVATE unsigned char *
|
|
mz_hz2gb(CCCDataObject obj, const unsigned char *inbuf, int32 inbufsize)
|
|
{
|
|
HZ_STATE state;
|
|
unsigned char *outbuf;
|
|
unsigned char *out;
|
|
const unsigned char *in;
|
|
int32 i;
|
|
|
|
state = (HZ_STATE)INTL_GetCCCCvtflag(obj);
|
|
|
|
out = outbuf = XP_ALLOC(inbufsize + 1 + 1); /* 1 for unconverted, 4 for fake escape 1 for NULL */
|
|
XP_ASSERT(NULL != out);
|
|
|
|
if((kHZDoubleGet1 == state) ||(kGBDoubleGet1 == state))
|
|
{
|
|
/* If we have anything in the unconverted buffer, let's output it
|
|
please notice the usage of uncoverted buffer in this routine is different from other,
|
|
it is really "converted, but not output yet" buffer
|
|
*/
|
|
unsigned char* u = INTL_GetCCCUncvtbuf(obj);
|
|
XP_ASSERT(NULL != u);
|
|
*out++ = *u;
|
|
}
|
|
|
|
for(in = inbuf, i = 0; i < inbufsize; i++, in++)
|
|
{
|
|
switch(state)
|
|
{
|
|
case kHZSingle:
|
|
if(0x80 & *in) {
|
|
state = kGBDoubleGet1; /* change state */
|
|
*out++ = *in; /* get 1 double byte, output it */
|
|
}
|
|
else if('~' == *in) {
|
|
state = kHZSingleTild; /* change state */
|
|
} else {
|
|
*out++ = *in; /* normal single byte, output it, do not change state */
|
|
}
|
|
break;
|
|
|
|
case kHZSingleTild:
|
|
switch(*in)
|
|
{
|
|
case '{':
|
|
state = kHZDouble; /* change state */
|
|
break;
|
|
case '~':
|
|
*out++ = '~'; /* a ~~ , generate a ~ */
|
|
state = kHZSingle; /* change state back to normal */
|
|
break;
|
|
case LF: /* continue in ASCII mode */
|
|
case CR: /* continue in ASCII mode */
|
|
state = kHZSingleTildLFCR;
|
|
break;
|
|
default:
|
|
*out++ = '~';
|
|
i--; in--; /* unread it */
|
|
state = kHZSingle;
|
|
break;
|
|
}
|
|
break;
|
|
|
|
case kHZSingleTildLFCR:
|
|
switch(*in)
|
|
{
|
|
case LF: /* continue in ASCII mode */
|
|
case CR: /* continue in ASCII mode */
|
|
/* eat all following CR or LF */
|
|
break;
|
|
|
|
default:
|
|
i--; in--; /* unread it */
|
|
state = kHZSingle;
|
|
break;
|
|
}
|
|
break;
|
|
|
|
case kHZDouble:
|
|
if('~' == *in) {
|
|
state = kHZDoubleTild; /* change state, generate nothing */
|
|
} else {
|
|
*out++ = 0x80 | *in; /* get one byte in double byte, output it */
|
|
state = kHZDoubleGet1; /* change state */
|
|
}
|
|
break;
|
|
|
|
case kHZDoubleGet1:
|
|
*out++ = 0x80 | *in; /* get the 2nd byte in double byte character, output it with hi-bit on */
|
|
state = kHZDouble; /* change state */
|
|
break;
|
|
|
|
case kHZDoubleTild:
|
|
if('}' == *in) {
|
|
state = kHZSingle; /* change state, output nothing */
|
|
} else {
|
|
*out++ = 0x80 | '~'; /* not the escape sequence, output the ~ with hi-bit on */
|
|
*out++ = 0x80 | *in; /* now output it with hi-bit on*/
|
|
state = kHZDouble; /* change state */
|
|
}
|
|
break;
|
|
|
|
case kGBDoubleGet1:
|
|
*out++ = *in; /* get the 2nd byte in double byte character, output it */
|
|
state = kHZSingle; /* change state */
|
|
break;
|
|
|
|
|
|
default:
|
|
XP_ASSERT(0);
|
|
state = kHZSingle; /* change state so the program won't stop */
|
|
break;
|
|
}
|
|
}
|
|
|
|
|
|
if((kHZDoubleGet1 == state) ||(kGBDoubleGet1 == state))
|
|
{
|
|
/* We need to make sure we always return in the boundary of a character,
|
|
So we back off one byte, store it in the unconverted buf
|
|
*/
|
|
unsigned char* u = INTL_GetCCCUncvtbuf(obj);
|
|
XP_ASSERT(NULL != u);
|
|
|
|
out--; /* back up */
|
|
*u = *out; /* store it into unconvert buffer */
|
|
*(u+1) = '\0'; /* null terminate the unconvertered buffer */
|
|
}
|
|
INTL_SetCCCCvtflag(obj, ((int32)state));
|
|
|
|
|
|
*out = '\0';
|
|
INTL_SetCCCLen(obj, out - outbuf);
|
|
|
|
return outbuf;
|
|
}
|
|
|
|
|
|
|
|
/* mz_mbNullConv
|
|
* this routine is needed to make sure parser and layout see whole
|
|
* characters, not partial characters
|
|
*/
|
|
/* This routine is designed to replace the following routine:
|
|
mz_euc2euc
|
|
mz_b52b5
|
|
mz_cns2cns
|
|
mz_ksc2ksc
|
|
mz_sjis2sjis
|
|
mz_utf82utf8
|
|
mz_gb2gb
|
|
*/
|
|
PRIVATE unsigned char *
|
|
mz_mbNullConv(CCCDataObject obj, const unsigned char *buf, int32 bufsz)
|
|
{
|
|
int32 left_over;
|
|
int32 len;
|
|
unsigned char *p;
|
|
unsigned char *ret;
|
|
int32 total;
|
|
intl_CharLenFunc CharLenFunc = intl_char_len_func[INTL_GetCCCCvtflag(obj)];
|
|
int charlen = 0;
|
|
|
|
/* Get the unconverted buffer */
|
|
unsigned char *uncvtbuf = INTL_GetCCCUncvtbuf(obj);
|
|
int32 uncvtsz = strlen((char *)uncvtbuf);
|
|
|
|
/* return in the input is nonsense */
|
|
if ((!obj) || (! buf) || (bufsz < 0))
|
|
return NULL;
|
|
|
|
/* Allocate Output Buffer */
|
|
total = uncvtsz + bufsz;
|
|
ret = (unsigned char *) XP_ALLOC(total + 1);
|
|
if (!ret)
|
|
{
|
|
INTL_SetCCCRetval(obj, MK_OUT_OF_MEMORY);
|
|
return NULL;
|
|
}
|
|
|
|
/* Copy unconverted buffer into the output bufer */
|
|
memcpy(ret, uncvtbuf, uncvtsz);
|
|
/* Copy the current input buffer into the output buffer */
|
|
memcpy(ret+uncvtsz, buf, bufsz);
|
|
|
|
/* Walk through the buffer and figure out the left_over length */
|
|
for (p=ret, len=total, left_over=0; len > 0; p += charlen, len -= charlen)
|
|
{
|
|
if((charlen = CharLenFunc(*p)) > 1)
|
|
{ /* count left_over only if it is multibyte char */
|
|
if(charlen > len) /* count left_over only if the len is less than charlen */
|
|
left_over = len;
|
|
};
|
|
}
|
|
|
|
/* Copy the left over into the uncvtbuf */
|
|
if(left_over)
|
|
memcpy(uncvtbuf, p - charlen, left_over);
|
|
/* Null terminated the uncvtbuf */
|
|
uncvtbuf[left_over] = '\0';
|
|
|
|
/* Null terminate the return buffer and set the length */
|
|
INTL_SetCCCLen(obj, total - left_over);
|
|
ret[total - left_over] = 0;
|
|
|
|
return ret;
|
|
}
|
|
|
|
/*
|
|
buf -> mz_mbNullConv -> frombuf -> INTL_TextToUnicode -> ucs2buf
|
|
-> INTL_UnicodeToStr -> tobuf
|
|
*/
|
|
MODULE_PRIVATE unsigned char* mz_AnyToAnyThroughUCS2(CCCDataObject obj, const unsigned char *buf, int32 bufsz)
|
|
{
|
|
/* buffers */
|
|
unsigned char* fromBuf = NULL;
|
|
INTL_Unicode* ucs2Buf = NULL;
|
|
unsigned char* toBuf = NULL;
|
|
/* buffers' length */
|
|
uint32 ucs2BufLen = 0;
|
|
uint32 fromBufLen = 0;
|
|
uint32 toBufLen = 0;
|
|
/* from & to csid */
|
|
uint16 fromCsid = INTL_GetCCCFromCSID(obj);
|
|
uint16 toCsid = INTL_GetCCCToCSID(obj);
|
|
|
|
/* get the fromBuf */
|
|
if( !( fromBuf = mz_mbNullConv( obj, buf, bufsz) ) )
|
|
return NULL;
|
|
|
|
/* map fromBuf -> ucs2Buf */
|
|
fromBufLen = INTL_GetCCCLen(obj);
|
|
ucs2BufLen = INTL_TextToUnicodeLen( fromCsid, fromBuf, fromBufLen );
|
|
|
|
if( !( ucs2Buf = XP_ALLOC( (ucs2BufLen + 1 ) * 2)) ){
|
|
return NULL;
|
|
}
|
|
|
|
/* be care, the return value is HOW MANY UNICODE IN THIS UCS2BUF, not how many bytes */
|
|
ucs2BufLen = INTL_TextToUnicode( fromCsid, fromBuf, fromBufLen, ucs2Buf, ucs2BufLen );
|
|
|
|
/* map ucs2Buf -> toBuf */
|
|
toBufLen = INTL_UnicodeToStrLen( toCsid, ucs2Buf, ucs2BufLen ); /* we get BYTES here :) */
|
|
|
|
if( !( toBuf = XP_ALLOC( toBufLen + 1 ) ) )
|
|
return NULL;
|
|
|
|
INTL_UnicodeToStr( toCsid, ucs2Buf, ucs2BufLen, toBuf, toBufLen );
|
|
|
|
|
|
/* clean up after myself */
|
|
free( fromBuf );
|
|
free( ucs2Buf );
|
|
|
|
INTL_SetCCCLen(obj, XP_STRLEN((char*)toBuf));
|
|
|
|
return toBuf;
|
|
}
|
|
|
|
|
|
PRIVATE int16 intl_CharLen_SJIS( unsigned char ch)
|
|
{
|
|
return ( (((ch >= 0x81) && (ch <= 0x9f)) || ((ch >= 0xe0) && (ch <= 0xfc))) ? 2 : 1);
|
|
}
|
|
PRIVATE int16 intl_CharLen_EUC_JP( unsigned char ch)
|
|
{
|
|
return ( (((ch >= 0xa1) && (ch <= 0xfe)) || (ch == 0x8e)) ? 2 : ((ch ==0x8f) ? 3 : 1));
|
|
}
|
|
PRIVATE int16 intl_CharLen_CGK( unsigned char ch)
|
|
{
|
|
return ( ((ch >= 0xa1) && (ch <= 0xfe)) ? 2 : 1);
|
|
}
|
|
PRIVATE int16 intl_CharLen_CNS_8BIT( unsigned char ch)
|
|
{
|
|
return ( ((ch >= 0xa1) && (ch <= 0xfe)) ? 2 : ((ch == 0x8e) ? 4 : 1));
|
|
}
|
|
PRIVATE int16 intl_CharLen_UTF8( unsigned char ch)
|
|
{
|
|
return ( ((ch >= 0xc0) && (ch <= 0xdf)) ? 2 : (((ch >= 0xe0) && (ch <= 0xef)) ? 3 : 1));
|
|
}
|
|
PRIVATE int16 intl_CharLen_SingleByte( unsigned char ch)
|
|
{
|
|
return 1;
|
|
}
|
|
PRIVATE int16 intl_CharLen_T61( unsigned char ch)
|
|
{
|
|
return ( ((ch >= 0xC0) && (ch <= 0xCF)) ? 2 : 1);
|
|
}
|
|
|
|
|
|
/*
|
|
INTL_DefaultWinCharSetID,
|
|
Based on DefaultDocCSID, it determines which Win CSID to use for Display
|
|
*/
|
|
PUBLIC int16 INTL_DefaultWinCharSetID(iDocumentContext context)
|
|
{
|
|
|
|
if (context) {
|
|
INTL_CharSetInfo csi = LO_GetDocumentCharacterSetInfo(context);
|
|
if (INTL_GetCSIWinCSID(csi))
|
|
return INTL_GetCSIWinCSID(csi);
|
|
}
|
|
|
|
return INTL_DocToWinCharSetID(INTL_DefaultDocCharSetID(context));
|
|
}
|
|
|
|
/*
|
|
INTL_DocToWinCharSetID,
|
|
Based on DefaultDocCSID, it determines which Win CSID to use for Display
|
|
*/
|
|
/*
|
|
|
|
To Do: (ftang)
|
|
|
|
We should seperate the DocToWinCharSetID logic from the cscvt_t table
|
|
for Cyrillic users.
|
|
|
|
*/
|
|
PUBLIC int16 INTL_DocToWinCharSetID(int16 csid)
|
|
{
|
|
cscvt_t *cscvtp;
|
|
int16 from_csid = 0, to_csid = 0;
|
|
|
|
/**************************************************************/
|
|
/*** This section should work for everybody, but for now we ***/
|
|
/*** make it work for vietnamese only (our area). Basically ***/
|
|
/*** instead of returning the first csid found in the rules,***/
|
|
/*** we check the default view encoding first, and return ***/
|
|
/*** that default view csid if a rule is found, otherwise ***/
|
|
/*** it will return the first rule as the usual way ***/
|
|
int16 def_doccsid;
|
|
def_doccsid = INTL_DefaultDocCharSetID(0);
|
|
if (! ((def_doccsid == CS_VIET_VISCII) ||
|
|
(def_doccsid == CS_VIET_VPS) ||
|
|
(def_doccsid == CS_VIET_TCVN) ||
|
|
(def_doccsid == CS_VIET_VNI) ||
|
|
(def_doccsid == CS_CP_1258) ||
|
|
(def_doccsid == CS_VIET_VIQR)
|
|
)) {
|
|
def_doccsid = 0;
|
|
}
|
|
|
|
from_csid = csid & ~CS_AUTO; /* remove auto bit */
|
|
|
|
/* Look-up conversion method given FROM and TO char. code sets */
|
|
cscvtp = cscvt_tbl;
|
|
while (cscvtp->from_csid)
|
|
{
|
|
if (cscvtp->from_csid == from_csid)
|
|
{
|
|
/*
|
|
* disgusting hack...
|
|
*/
|
|
#ifdef XP_UNIX
|
|
if (((cscvtp->to_csid == CS_CNS_8BIT) && (TRUE == haveBig5)) ||
|
|
((cscvtp->to_csid == CS_8859_5) && (FALSE == have88595)) ||
|
|
((cscvtp->to_csid == CS_KOI8_R) && (FALSE == haveKOI8R)) ||
|
|
((cscvtp->to_csid == CS_KOI8_U) && (FALSE == haveKOI8U)) ||
|
|
((cscvtp->to_csid == CS_CP_1251) && (FALSE == have1251)) )
|
|
{
|
|
cscvtp++;
|
|
continue;
|
|
}
|
|
#endif
|
|
to_csid = cscvtp->to_csid;
|
|
if (def_doccsid) {
|
|
/*************************************************/
|
|
/*** This happens for vietnamese only (for now)***/
|
|
cscvtp++;
|
|
while (cscvtp->from_csid) {
|
|
if ((cscvtp->from_csid == from_csid) &&
|
|
(cscvtp->to_csid == def_doccsid)) {
|
|
to_csid = cscvtp->to_csid;
|
|
break;
|
|
}
|
|
cscvtp++;
|
|
}
|
|
/*********************************************/
|
|
}
|
|
break ;
|
|
}
|
|
cscvtp++;
|
|
}
|
|
return to_csid == 0 ? CS_FE_ASCII: to_csid ;
|
|
}
|
|
|
|
|
|
XP_Bool
|
|
INTL_CanAutoSelect(int16 csid)
|
|
{
|
|
register cscvt_t *cscvtp;
|
|
|
|
cscvtp = cscvt_tbl;
|
|
while (cscvtp->from_csid) {
|
|
if (cscvtp->from_csid == csid) {
|
|
return (cscvtp->autoselect);
|
|
}
|
|
cscvtp++;
|
|
}
|
|
return FALSE;
|
|
}
|
|
|
|
|
|
PUBLIC int16
|
|
INTL_DefaultTextAttributeCharSetID(iDocumentContext context)
|
|
{
|
|
if (context)
|
|
{
|
|
INTL_CharSetInfo c = LO_GetDocumentCharacterSetInfo(context);
|
|
if (INTL_GetCSIWinCSID(c))
|
|
return INTL_GetCSIWinCSID(c);
|
|
}
|
|
|
|
return INTL_DefaultWinCharSetID(context);
|
|
}
|
|
|
|
#ifdef XP_UNIX
|
|
int16 *intl_RearrangeUnicodeFontCSIDList(uint16 len, int16* inlist)
|
|
{
|
|
int i;
|
|
int outidx, mbidx,miscidx;
|
|
int16 *outlist = XP_ALLOC(sizeof(int16) * len);
|
|
int16 *mblist = XP_ALLOC(sizeof(int16) * len);
|
|
int16 *misclist = XP_ALLOC(sizeof(int16) * len);
|
|
XP_ASSERT(NULL != outlist);
|
|
XP_ASSERT(NULL != mblist);
|
|
XP_ASSERT(NULL != misclist);
|
|
/* pick up single byte charset first, multibyte charset second, and
|
|
finally, symbol and dingbat
|
|
*/
|
|
outidx = mbidx = miscidx = 0;
|
|
for(i=0;i<len;i++)
|
|
{
|
|
if((CS_SYMBOL == inlist[i]) ||
|
|
(CS_DINGBATS == inlist[i]) ||
|
|
(CS_USER_DEFINED_ENCODING == inlist[i]) ||
|
|
(CS_USRDEF2 == inlist[i])
|
|
)
|
|
{
|
|
misclist[miscidx++] = inlist[i];
|
|
} else if ( MULTIBYTE == ( CODESET_MASK & inlist[i] )) {
|
|
mblist[mbidx++] = inlist[i];
|
|
} else {
|
|
outlist[outidx++] = inlist[i];
|
|
}
|
|
}
|
|
for(i = 0; i < mbidx; i++)
|
|
outlist[outidx++] = mblist[i];
|
|
for(i = 0; i < miscidx; i++)
|
|
outlist[outidx++] = misclist[i];
|
|
XP_FREE(mblist);
|
|
XP_FREE(misclist);
|
|
return outlist;
|
|
}
|
|
#endif
|
|
void
|
|
INTL_ReportFontCharSets(int16 *charsets)
|
|
{
|
|
uint16 len;
|
|
|
|
if (!charsets)
|
|
{
|
|
return;
|
|
}
|
|
|
|
if (availableFontCharSets)
|
|
{
|
|
free(availableFontCharSets);
|
|
}
|
|
|
|
availableFontCharSets = charsets;
|
|
|
|
while (*charsets)
|
|
{
|
|
#ifdef XP_UNIX
|
|
switch(*charsets)
|
|
{
|
|
case CS_X_BIG5:
|
|
haveBig5 = TRUE;
|
|
break;
|
|
case CS_8859_5:
|
|
have88595 = TRUE;
|
|
break;
|
|
case CS_CP_1251:
|
|
have1251 = TRUE;
|
|
break;
|
|
case CS_KOI8_R:
|
|
haveKOI8R = TRUE;
|
|
break;
|
|
case CS_KOI8_U:
|
|
haveKOI8U = TRUE;
|
|
break;
|
|
}
|
|
#endif
|
|
charsets++;
|
|
}
|
|
len = (charsets - availableFontCharSets);
|
|
|
|
#ifdef XP_UNIX
|
|
{
|
|
int16 *fontcsidlist = intl_RearrangeUnicodeFontCSIDList(len, availableFontCharSets);
|
|
XP_ASSERT(NULL != fontcsidlist);
|
|
INTL_SetUnicodeCSIDList(len, fontcsidlist);
|
|
XP_FREE(fontcsidlist);
|
|
}
|
|
#endif
|
|
|
|
}
|
|
|
|
/* Code for CSID Iterator */
|
|
|
|
#define NUMOFCSIDINITERATOR 15
|
|
struct INTL_CSIDIteratorPriv
|
|
{
|
|
int16 cur;
|
|
int16 csidlist[NUMOFCSIDINITERATOR];
|
|
};
|
|
typedef struct INTL_CSIDIteratorPriv INTL_CSIDIteratorPriv;
|
|
|
|
#ifdef MOZ_MAIL_NEWS
|
|
|
|
PRIVATE void intl_FillTryIMAP4SearchIterator(INTL_CSIDIteratorPriv* p, int16 csid);
|
|
PRIVATE void intl_FillTryIMAP4SearchIterator(INTL_CSIDIteratorPriv* p, int16 csid)
|
|
{
|
|
int idx = 0;
|
|
cscvt_t *cscvtp = cscvt_tbl;
|
|
p->csidlist[idx++] = INTL_DefaultMailCharSetID(csid); /* add mailcsid first */
|
|
p->csidlist[idx++] = INTL_DefaultNewsCharSetID(csid); /* If the news csid is different add it */
|
|
if(p->csidlist[0] == p->csidlist[1])
|
|
idx--;
|
|
/* Add all the csid that we know how to convert to (Without CS_AUTO bit on */
|
|
while (cscvtp->from_csid)
|
|
{
|
|
if ( (cscvtp->from_csid & ~CS_AUTO) == (csid & ~CS_AUTO))
|
|
{
|
|
int16 foundcsid = cscvtp->to_csid & ~CS_AUTO;
|
|
XP_Bool notInTheList = TRUE;
|
|
int i;
|
|
for(i = 0; i < idx ;i++)
|
|
{
|
|
if(foundcsid == p->csidlist[i])
|
|
notInTheList = FALSE;
|
|
}
|
|
if(notInTheList)
|
|
{
|
|
p->csidlist[idx++] = foundcsid;
|
|
XP_ASSERT(NUMOFCSIDINITERATOR == idx);
|
|
if(NUMOFCSIDINITERATOR == idx)
|
|
break;
|
|
}
|
|
}
|
|
cscvtp++;
|
|
}
|
|
p->csidlist[idx] = 0; /* terminate the list by 0 */
|
|
}
|
|
|
|
PUBLIC void INTL_CSIDIteratorCreate( INTL_CSIDIterator* iterator, int16 csid, int flag)
|
|
{
|
|
INTL_CSIDIteratorPriv* priv =
|
|
(INTL_CSIDIteratorPriv*) XP_ALLOC(sizeof(INTL_CSIDIteratorPriv));
|
|
*iterator = (INTL_CSIDIterator) priv;
|
|
if(priv)
|
|
{
|
|
priv->cur = 0;
|
|
switch(flag)
|
|
{
|
|
case csiditerate_TryIMAP4Search:
|
|
intl_FillTryIMAP4SearchIterator (priv, (int16)(csid & ~CS_AUTO));
|
|
break;
|
|
default:
|
|
XP_ASSERT(FALSE);
|
|
break;
|
|
}
|
|
}
|
|
return;
|
|
}
|
|
|
|
#endif /* MOZ_MAIL_NEWS */
|
|
|
|
PUBLIC void INTL_CSIDIteratorDestroy(INTL_CSIDIterator* iterator)
|
|
{
|
|
INTL_CSIDIteratorPriv* priv = (INTL_CSIDIteratorPriv*) *iterator;
|
|
*iterator = NULL;
|
|
XP_FREE(priv);
|
|
}
|
|
|
|
PUBLIC XP_Bool INTL_CSIDIteratorNext( INTL_CSIDIterator* iterator, int16* pCsid)
|
|
{
|
|
INTL_CSIDIteratorPriv* priv = (INTL_CSIDIteratorPriv*) *iterator;
|
|
int16 csid = priv->csidlist[(priv->cur)++];
|
|
if(0 == csid)
|
|
{
|
|
return FALSE;
|
|
}
|
|
else
|
|
{
|
|
*pCsid = csid;
|
|
return TRUE;
|
|
}
|
|
}
|
|
|
|
|
|
#ifdef XP_OS2
|
|
/*
|
|
* Map Netscape charset to OS/2 codepage
|
|
*/
|
|
|
|
/*
|
|
* This is tricker then you think. For a given charset, first entry should
|
|
* be windows codepage, second entry should be OS/2 codepage.
|
|
*/
|
|
|
|
static uint16 CS2CodePage[] = {
|
|
CS_LATIN1 , 1004, /* 2 */
|
|
CS_ASCII , 1252, /* 1 */
|
|
CS_UTF8 , 1208, /* 290 */
|
|
CS_SJIS , 943, /* 260 */
|
|
CS_8859_3 , 913, /* 14 */
|
|
CS_8859_4 , 914, /* 15 */
|
|
CS_8859_5 , 915, /* 16 ISO Cyrillic */
|
|
CS_8859_6 , 1089, /* 17 ISO Arabic */
|
|
CS_8859_7 , 813, /* 18 ISO Greek */
|
|
CS_8859_8 , 916, /* 19 ISO Hebrew */
|
|
CS_8859_9 , 920, /* 20 */
|
|
CS_BIG5 , 950, /* 263 */
|
|
CS_GB2312 , 1386, /* 287 */
|
|
CS_CP_1250 , 1250, /* 44 CS_CP_1250 is window Centrl Europe */
|
|
CS_CP_1251 , 1251, /* 41 CS_CP_1251 is window Cyrillic */
|
|
CS_LATIN2 , 912, /* 10 */
|
|
CS_CP_1253 , 1253, /* 43 CS_CP_1253 is window Greek */
|
|
CS_CP_1254 , 1254, /* 45 CS_CP_1254 is window Turkish */
|
|
CS_CP_1257 , 1257, /* 61 Windows Baltic */
|
|
CS_CP_1258 , 1258, /* 62 Windows Vietnamese */
|
|
CS_CP_850 , 850, /* 53 PC Latin 1 */
|
|
CS_CP_852 , 852, /* 54 PC Latin 2 */
|
|
CS_CP_855 , 855, /* 55 PC Cyrillic */
|
|
CS_CP_857 , 857, /* 56 PC Turkish */
|
|
CS_CP_862 , 862, /* 57 PC Hebrew */
|
|
CS_CP_864 , 864, /* 58 PC Arabic */
|
|
CS_CP_866 , 866, /* 59 PC Russian */
|
|
CS_CP_874 , 874, /* 60 PC Thai */
|
|
CS_EUCJP , 930, /* 261 */
|
|
CS_GB_8BIT , 1386, /* 264 */
|
|
CS_KOI8_R , 878, /* 39 */
|
|
CS_KSC5601 , 949, /* 284 */
|
|
CS_MAC_CE , 1282, /* 11 */
|
|
CS_MAC_CYRILLIC, 1283, /* 40 */
|
|
CS_MAC_GREEK , 1280, /* 42 */
|
|
CS_MAC_ROMAN , 1275, /* 6 */
|
|
CS_MAC_TURKISH, 1281, /* 46 */
|
|
CS_UCS2 , 1200, /* 810 */
|
|
CS_USRDEF2 , 1252, /* 38 */
|
|
0, 0,
|
|
};
|
|
|
|
/*
|
|
* MapCpToCsNum: Search table and return netscape codeset name
|
|
*/
|
|
uint16 INTL_MapCpToCsNum(uint16 cpid) {
|
|
uint16 * up;
|
|
|
|
up = CS2CodePage;
|
|
while (*up) {
|
|
if (up[1] == cpid) {
|
|
return up[0];
|
|
}
|
|
up += 2;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
|
|
/*
|
|
* MapCsToCpNum: Search table and return codepage
|
|
*/
|
|
uint16 INTL_MapCsToCpNum(uint16 csid) {
|
|
uint16 * up;
|
|
|
|
up = CS2CodePage;
|
|
while (*up) {
|
|
if (up[0] == csid) {
|
|
return up[1];
|
|
}
|
|
up += 2;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
|
|
/*
|
|
* Map from process codepage to default charset
|
|
*/
|
|
int16 INTL_MenuFontCSID(void) {
|
|
ULONG codepage, xxx;
|
|
|
|
DosQueryCp(4, &codepage, &xxx);
|
|
return INTL_MapCpToCsNum(codepage);
|
|
}
|
|
|
|
|
|
/*
|
|
* This returns the ID for the
|
|
*/
|
|
int INTL_MenuFontID() {
|
|
return 0;
|
|
}
|
|
|
|
#endif /* XP_OS2 */
|
|
|
|
|
|
#if defined(MOZ_MAIL_NEWS)
|
|
/*
|
|
* Access a conversion flag for hankaku->zenkaku kana conversion for mail.
|
|
*/
|
|
XP_Bool INTL_GetCCCCvtflag_SendHankakuKana(CCCDataObject obj)
|
|
{
|
|
return ((CS_JIS == (INTL_GetCCCToCSID(obj) & ~CS_AUTO)) &&
|
|
(INTL_SEND_HANKAKU_KANA & INTL_GetCCCCvtflag(obj)));
|
|
}
|
|
|
|
void INTL_SetCCCCvtflag_SendHankakuKana(CCCDataObject obj, XP_Bool flag)
|
|
{
|
|
int32 cvtflag;
|
|
if (CS_JIS == (INTL_GetCCCToCSID(obj) & ~CS_AUTO))
|
|
{
|
|
cvtflag = INTL_GetCCCCvtflag(obj);
|
|
cvtflag = flag ? (INTL_SEND_HANKAKU_KANA | cvtflag) : (~INTL_SEND_HANKAKU_KANA & cvtflag);
|
|
INTL_SetCCCCvtflag(obj, cvtflag);
|
|
}
|
|
}
|
|
#endif /* MOZ_MAIL_NEWS */
|
|
|
|
|
|
static int16 xp_res_csid = CS_DEFAULT;
|
|
INTLCharSetID INTL_GetCharSetID(INTL_CharSetID_Selector selector)
|
|
{
|
|
if( INTL_XPResourcesCsidSel == selector)
|
|
{
|
|
if(CS_DEFAULT == xp_res_csid)
|
|
{
|
|
xp_res_csid = INTL_CharSetNameToID(
|
|
INTL_ResourceCharSet());
|
|
}
|
|
return xp_res_csid;
|
|
}
|
|
return FE_GetCharSetID(selector);
|
|
}
|