mirror of
https://github.com/mozilla/gecko-dev.git
synced 2024-12-16 19:48:18 +00:00
1181 lines
40 KiB
C
1181 lines
40 KiB
C
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*-
|
|
*
|
|
* The contents of this file are subject to the Netscape Public License
|
|
* Version 1.0 (the "NPL"); you may not use this file except in
|
|
* compliance with the NPL. You may obtain a copy of the NPL at
|
|
* http://www.mozilla.org/NPL/
|
|
*
|
|
* Software distributed under the NPL is distributed on an "AS IS" basis,
|
|
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
|
|
* for the specific language governing rights and limitations under the
|
|
* NPL.
|
|
*
|
|
* The Initial Developer of this code under the NPL is Netscape
|
|
* Communications Corporation. Portions created by Netscape are
|
|
* Copyright (C) 1998 Netscape Communications Corporation. All Rights
|
|
* Reserved.
|
|
*/
|
|
/* fe_ccc.c */
|
|
/* Test harness code to be replaced by FE specific code */
|
|
|
|
#ifdef XP_OS2
|
|
#define INCL_DOS
|
|
#endif
|
|
#include "intlpriv.h"
|
|
|
|
#include <stdio.h>
|
|
#include "xp.h"
|
|
#include "intl_csi.h"
|
|
|
|
#ifdef XP_MAC
|
|
#include "resgui.h"
|
|
#endif
|
|
|
|
/* for XP_GetString() */
|
|
#include "xpgetstr.h"
|
|
extern int MK_OUT_OF_MEMORY;
|
|
|
|
|
|
/*
|
|
IMPORTANT NOTE:
|
|
|
|
mz_euc2euc
|
|
mz_b52b5
|
|
mz_cns2cns
|
|
mz_ksc2ksc
|
|
mz_sjis2sjis
|
|
mz_utf82utf8
|
|
|
|
is now replaced by mz_mbNullConv
|
|
we eventually should replacing mz_hz2gb after we extract the hz -> gb conversion
|
|
*/
|
|
PRIVATE unsigned char *
|
|
mz_hz2gb(CCCDataObject obj, const unsigned char *kscbuf, int32 kscbufsz);
|
|
PRIVATE unsigned char *
|
|
mz_gb2gb(CCCDataObject obj, const unsigned char *kscbuf, int32 kscbufsz);
|
|
|
|
PRIVATE unsigned char *
|
|
mz_mbNullConv(CCCDataObject obj, const unsigned char *buf, int32 bufsz);
|
|
|
|
PRIVATE unsigned char *
|
|
mz_AnyToAnyThroughUCS2(CCCDataObject obj, const unsigned char *buf, int32 bufsz);
|
|
|
|
/* intl_CharLenFunc is designed to used with mz_mbNullConv */
|
|
typedef int16 (*intl_CharLenFunc) ( unsigned char ch);
|
|
PRIVATE int16 intl_CharLen_SJIS( unsigned char ch);
|
|
PRIVATE int16 intl_CharLen_EUC_JP( unsigned char ch);
|
|
PRIVATE int16 intl_CharLen_CGK( unsigned char ch);
|
|
PRIVATE int16 intl_CharLen_CNS_8BIT( unsigned char ch);
|
|
PRIVATE int16 intl_CharLen_UTF8( unsigned char ch);
|
|
PRIVATE int16 intl_CharLen_SingleByte(unsigned char ch);
|
|
|
|
#define INTL_CHARLEN_SJIS 0
|
|
#define INTL_CHARLEN_EUC_JP 1
|
|
#define INTL_CHARLEN_CGK 2
|
|
#define INTL_CHARLEN_CNS_8BIT 3
|
|
#define INTL_CHARLEN_UTF8 4
|
|
#define INTL_CHARLEN_SINGLEBYTE 5
|
|
|
|
PRIVATE intl_CharLenFunc intl_char_len_func[]=
|
|
{
|
|
intl_CharLen_SJIS,
|
|
intl_CharLen_EUC_JP,
|
|
intl_CharLen_CGK,
|
|
intl_CharLen_CNS_8BIT,
|
|
intl_CharLen_UTF8,
|
|
intl_CharLen_SingleByte,
|
|
};
|
|
|
|
#ifdef XP_UNIX
|
|
PRIVATE XP_Bool haveBig5 = FALSE;
|
|
PRIVATE XP_Bool have88595 = FALSE;
|
|
PRIVATE XP_Bool have1251 = FALSE;
|
|
PRIVATE XP_Bool haveKOI8R = FALSE;
|
|
#endif
|
|
|
|
PRIVATE int16 *availableFontCharSets = NULL;
|
|
|
|
|
|
/* Table that maps the FROM char, codeset to all other relevant info:
|
|
* - TO character codeset
|
|
* - Fonts (fixe & proportional) for TO character codeset
|
|
* - Type of conversion (func for Win/Mac, value for X)
|
|
* - Argument for conversion routine. Routine-defined.
|
|
*
|
|
* Not all of these may be available. Depends upon available fonts,
|
|
* scripts, codepages, etc. Need to query system to build valid table.
|
|
*
|
|
* What info do I need to make the font change API on the 3 platforms?
|
|
* Is just a 32bit font ID sufficient?
|
|
*
|
|
* Some X Windows can render Japanese in either EUC or SJIS, how do we
|
|
* choose?
|
|
*/
|
|
/* The ***first*** match of a "FROM" encoding (1st col.) will be
|
|
* used as the URL->native encoding. Be careful of the
|
|
* ordering.
|
|
* Additional entries for the same "FROM" encoding, specifies
|
|
* how to convert going out (e.g., sending mail, news or forms).
|
|
*/
|
|
|
|
/*
|
|
What is the flag mean ?
|
|
|
|
For Mac the flag in One2OneCCC is the resouce number of a 256 byte mapping table
|
|
For all platform the flag in mz_mbNullConv is a pointer to a intl_CharLenFunc routine
|
|
|
|
*/
|
|
#ifdef XP_MAC
|
|
MODULE_PRIVATE cscvt_t cscvt_tbl[] = {
|
|
/* SINGLE BYTE */
|
|
/* LATIN1 */
|
|
{CS_LATIN1, CS_MAC_ROMAN, 0, (CCCFunc)One2OneCCC, xlat_LATIN1_TO_MAC_ROMAN},
|
|
{CS_ASCII, CS_MAC_ROMAN, 0, (CCCFunc)One2OneCCC, xlat_LATIN1_TO_MAC_ROMAN},
|
|
{CS_MAC_ROMAN, CS_MAC_ROMAN, 0, (CCCFunc)0, 0},
|
|
{CS_MAC_ROMAN, CS_LATIN1, 0, (CCCFunc)One2OneCCC, xlat_MAC_ROMAN_TO_LATIN1},
|
|
{CS_MAC_ROMAN, CS_ASCII, 0, (CCCFunc)One2OneCCC, xlat_MAC_ROMAN_TO_LATIN1},
|
|
|
|
/* LATIN2 */
|
|
{CS_LATIN2, CS_MAC_CE, 0, (CCCFunc)One2OneCCC, xlat_LATIN2_TO_MAC_CE},
|
|
{CS_MAC_CE, CS_MAC_CE, 0, (CCCFunc)0, 0},
|
|
{CS_MAC_CE, CS_LATIN2, 0, (CCCFunc)One2OneCCC, xlat_MAC_CE_TO_LATIN2},
|
|
{CS_MAC_CE, CS_ASCII, 0, (CCCFunc)One2OneCCC, xlat_MAC_CE_TO_LATIN2},
|
|
|
|
{CS_CP_1250, CS_MAC_CE, 0, (CCCFunc)One2OneCCC, xlat_CP_1250_TO_MAC_CE},
|
|
{CS_MAC_CE, CS_CP_1250, 0, (CCCFunc)One2OneCCC, xlat_MAC_CE_TO_CP_1250},
|
|
|
|
/* CYRILLIC */
|
|
{CS_8859_5, CS_MAC_CYRILLIC,0, (CCCFunc)One2OneCCC, xlat_8859_5_TO_MAC_CYRILLIC},
|
|
{CS_MAC_CYRILLIC,CS_MAC_CYRILLIC, 0, (CCCFunc)0, 0},
|
|
{CS_MAC_CYRILLIC,CS_8859_5, 0, (CCCFunc)One2OneCCC, xlat_MAC_CYRILLIC_TO_8859_5},
|
|
{CS_MAC_CYRILLIC,CS_ASCII, 0, (CCCFunc)One2OneCCC, xlat_MAC_CYRILLIC_TO_8859_5},
|
|
|
|
{CS_CP_1251, CS_MAC_CYRILLIC,0, (CCCFunc)One2OneCCC, xlat_CP_1251_TO_MAC_CYRILLIC},
|
|
{CS_MAC_CYRILLIC,CS_CP_1251, 0, (CCCFunc)One2OneCCC, xlat_MAC_CYRILLIC_TO_CP_1251},
|
|
|
|
{CS_KOI8_R, CS_MAC_CYRILLIC,0, (CCCFunc)One2OneCCC, xlat_KOI8_R_TO_MAC_CYRILLIC},
|
|
{CS_MAC_CYRILLIC,CS_KOI8_R, 0, (CCCFunc)One2OneCCC, xlat_MAC_CYRILLIC_TO_KOI8_R},
|
|
|
|
/* GREEK */
|
|
{CS_8859_7, CS_MAC_GREEK, 0, (CCCFunc)One2OneCCC, xlat_8859_7_TO_MAC_GREEK},
|
|
{CS_MAC_GREEK, CS_MAC_GREEK, 0, (CCCFunc)0, 0},
|
|
{CS_MAC_GREEK, CS_8859_7, 0, (CCCFunc)One2OneCCC, xlat_MAC_GREEK_TO_8859_7},
|
|
{CS_MAC_GREEK, CS_ASCII, 0, (CCCFunc)One2OneCCC, xlat_MAC_GREEK_TO_8859_7},
|
|
|
|
{CS_CP_1253, CS_MAC_GREEK, 0, (CCCFunc)One2OneCCC, xlat_CP_1253_TO_MAC_GREEK},
|
|
{CS_MAC_GREEK, CS_CP_1253, 0, (CCCFunc)One2OneCCC, xlat_MAC_GREEK_TO_CP_1253},
|
|
|
|
/* TURKISH */
|
|
{CS_8859_9, CS_MAC_TURKISH, 0, (CCCFunc)One2OneCCC, xlat_8859_9_TO_MAC_TURKISH},
|
|
{CS_MAC_TURKISH,CS_MAC_TURKISH, 0, (CCCFunc)0, 0},
|
|
{CS_MAC_TURKISH,CS_8859_9, 0, (CCCFunc)One2OneCCC, xlat_MAC_TURKISH_TO_8859_9},
|
|
{CS_MAC_TURKISH,CS_ASCII, 0, (CCCFunc)One2OneCCC, xlat_MAC_TURKISH_TO_8859_9},
|
|
|
|
/* MULTIBYTE */
|
|
/* JAPANESE */
|
|
{CS_SJIS, CS_SJIS, 1, (CCCFunc)mz_mbNullConv, INTL_CHARLEN_SJIS },
|
|
{CS_SJIS, CS_JIS, 1, (CCCFunc)mz_sjis2jis, 0},
|
|
{CS_JIS, CS_SJIS, 1, (CCCFunc)jis2other, 0},
|
|
{CS_EUCJP, CS_SJIS, 1, (CCCFunc)mz_euc2sjis, 0},
|
|
{CS_JIS, CS_EUCJP, 1, (CCCFunc)jis2other, 1},
|
|
{CS_EUCJP, CS_JIS, 1, (CCCFunc)mz_euc2jis, 0},
|
|
{CS_SJIS, CS_EUCJP, 1, (CCCFunc)mz_sjis2euc, 0},
|
|
/* auto-detect Japanese conversions */
|
|
{CS_SJIS_AUTO, CS_SJIS, 1, (CCCFunc)autoJCCC, 0},
|
|
{CS_JIS_AUTO, CS_SJIS, 1, (CCCFunc)autoJCCC, 0},
|
|
{CS_EUCJP_AUTO, CS_SJIS, 1, (CCCFunc)autoJCCC, 0},
|
|
|
|
/* KOREAN */
|
|
{CS_KSC_8BIT, CS_KSC_8BIT, 0, (CCCFunc)mz_mbNullConv, INTL_CHARLEN_CGK },
|
|
{CS_2022_KR, CS_KSC_8BIT, 0, (CCCFunc)mz_iso2euckr, 0},
|
|
{CS_KSC_8BIT, CS_2022_KR, 0, (CCCFunc)mz_euckr2iso, 0},
|
|
/* auto-detect Korean conversions */
|
|
{CS_KSC_8BIT_AUTO, CS_KSC_8BIT,1, (CCCFunc)autoKCCC, 0},
|
|
{(CS_2022_KR|CS_AUTO) , CS_KSC_8BIT,1, (CCCFunc)autoKCCC, 0},
|
|
|
|
/* SIMPLIFIED CHINESE */
|
|
{CS_GB_8BIT, CS_GB_8BIT, 0, (CCCFunc)mz_gb2gb, 0},
|
|
|
|
/* TRADITIONAL CHINESE */
|
|
{CS_BIG5, CS_BIG5, 0, (CCCFunc)mz_mbNullConv, INTL_CHARLEN_CGK },
|
|
#ifdef FEATURE_BIG5CNS
|
|
{CS_BIG5, CS_CNS_8BIT, 0, (CCCFunc)mz_b52cns, 0},
|
|
{CS_CNS_8BIT, CS_BIG5, 0, (CCCFunc)mz_cns2b5, 0},
|
|
#endif
|
|
|
|
/* UNICODE */
|
|
{CS_UTF8, CS_UTF8, 0, (CCCFunc)mz_mbNullConv, INTL_CHARLEN_UTF8 },
|
|
|
|
{CS_UTF8, CS_UCS2, 0, (CCCFunc)mz_utf82ucs, 0},
|
|
{CS_UTF8, CS_UTF7, 0, (CCCFunc)mz_utf82utf7, 0},
|
|
{CS_UTF8, CS_UCS2_SWAP, 0, (CCCFunc)mz_utf82ucsswap, 0},
|
|
{CS_UTF8, CS_IMAP4_UTF7, 0, (CCCFunc)mz_utf82imap4utf7, 0},
|
|
{CS_UCS2, CS_UTF8, 0, (CCCFunc)mz_ucs2utf8, 0},
|
|
{CS_UCS2, CS_UTF7, 0, (CCCFunc)mz_ucs2utf7, 0},
|
|
{CS_UCS2_SWAP, CS_UTF8, 0, (CCCFunc)mz_ucs2utf8, 0},
|
|
{CS_UCS2_SWAP, CS_UTF7, 0, (CCCFunc)mz_ucs2utf7, 0},
|
|
{CS_UTF7, CS_UTF8, 0, (CCCFunc)mz_utf72utf8, 0},
|
|
{CS_IMAP4_UTF7, CS_UTF8, 0, (CCCFunc)mz_imap4utf72utf8, 0},
|
|
|
|
{CS_MAC_ROMAN, CS_UTF8, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_SINGLEBYTE},
|
|
{CS_MAC_CE, CS_UTF8, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_SINGLEBYTE},
|
|
{CS_MAC_CYRILLIC, CS_UTF8, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_SINGLEBYTE},
|
|
{CS_KOI8_R, CS_UTF8, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_SINGLEBYTE},
|
|
{CS_MAC_GREEK, CS_UTF8, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_SINGLEBYTE},
|
|
{CS_MAC_TURKISH, CS_UTF8, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_SINGLEBYTE},
|
|
{CS_SJIS, CS_UTF8, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_SJIS},
|
|
{CS_KSC_8BIT, CS_UTF8, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_CGK},
|
|
{CS_BIG5, CS_UTF8, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_CGK},
|
|
{CS_GB_8BIT, CS_UTF8, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_CGK},
|
|
|
|
{CS_UTF8, CS_MAC_ROMAN, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_UTF8},
|
|
{CS_UTF8, CS_MAC_CE, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_UTF8},
|
|
{CS_UTF8, CS_MAC_CYRILLIC,0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_UTF8},
|
|
{CS_UTF8, CS_KOI8_R, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_UTF8},
|
|
{CS_UTF8, CS_MAC_GREEK, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_UTF8},
|
|
{CS_UTF8, CS_MAC_TURKISH, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_UTF8},
|
|
{CS_UTF8, CS_SJIS, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_UTF8},
|
|
{CS_UTF8, CS_KSC_8BIT, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_UTF8},
|
|
{CS_UTF8, CS_BIG5, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_UTF8},
|
|
{CS_UTF8, CS_GB_8BIT, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_UTF8},
|
|
|
|
|
|
{CS_USER_DEFINED_ENCODING, CS_USER_DEFINED_ENCODING, 0, (CCCFunc)0, 0},
|
|
{0, 0, 1, (CCCFunc)0, 0}
|
|
};
|
|
|
|
#endif /* XP_MAC */
|
|
|
|
#if defined(XP_WIN) || defined(XP_OS2)
|
|
MODULE_PRIVATE cscvt_t cscvt_tbl[] = {
|
|
/* SINGLE BYTE */
|
|
/* LATIN1 */
|
|
{CS_LATIN1, CS_LATIN1, 0, (CCCFunc)0, 0},
|
|
{CS_LATIN1, CS_ASCII, 0, (CCCFunc)0, 0},
|
|
{CS_ASCII, CS_LATIN1, 0, (CCCFunc)0, 0},
|
|
{CS_ASCII, CS_ASCII, 0, (CCCFunc)0, 0},
|
|
|
|
/* LATIN2 */
|
|
{CS_CP_1250, CS_CP_1250, 0, (CCCFunc)0, 0},
|
|
{CS_CP_1250, CS_LATIN2, 0, (CCCFunc)One2OneCCC, 0},
|
|
{CS_LATIN2, CS_CP_1250, 0, (CCCFunc)One2OneCCC, 0},
|
|
{CS_LATIN2, CS_LATIN2, 0, (CCCFunc)0, 0},
|
|
{CS_LATIN2, CS_ASCII, 0, (CCCFunc)0, 0},
|
|
|
|
/* CYRILLIC */
|
|
{CS_CP_1251, CS_CP_1251, 0, (CCCFunc)0, 0},
|
|
{CS_8859_5, CS_CP_1251, 0, (CCCFunc)One2OneCCC, 0},
|
|
{CS_CP_1251, CS_8859_5, 0, (CCCFunc)One2OneCCC, 0},
|
|
{CS_CP_1251, CS_CP_1251, 0, (CCCFunc)0, 0},
|
|
|
|
{CS_KOI8_R, CS_CP_1251, 0, (CCCFunc)One2OneCCC, 0},
|
|
{CS_CP_1251, CS_KOI8_R, 0, (CCCFunc)One2OneCCC, 0},
|
|
|
|
/* ARMENIAN */
|
|
{CS_ARMSCII8, CS_ARMSCII8, 0, (CCCFunc)0, 0},
|
|
|
|
/* GREEK */
|
|
{CS_CP_1253, CS_CP_1253, 0, (CCCFunc)0, 0},
|
|
{CS_CP_1253, CS_8859_7, 0, (CCCFunc)One2OneCCC, 0},
|
|
{CS_8859_7, CS_CP_1253, 0, (CCCFunc)One2OneCCC, 0},
|
|
{CS_8859_7, CS_8859_7, 0, (CCCFunc)0, 0},
|
|
|
|
/* TURKISH */
|
|
#ifdef XP_OS2
|
|
{CS_CP_1254, CS_CP_1254, 0, (CCCFunc)0, 0},
|
|
{CS_CP_1254, CS_8859_9, 0, (CCCFunc)One2OneCCC, 0},
|
|
{CS_8859_9, CS_CP_1254, 0, (CCCFunc)One2OneCCC, 0},
|
|
#endif
|
|
{CS_8859_9, CS_8859_9, 0, (CCCFunc)0, 0},
|
|
|
|
/* MULTIBYTE */
|
|
/* JAPANESE */
|
|
{CS_SJIS, CS_SJIS, 1, (CCCFunc)mz_mbNullConv, INTL_CHARLEN_SJIS},
|
|
{CS_SJIS, CS_JIS, 1, (CCCFunc)mz_sjis2jis, 0},
|
|
{CS_JIS, CS_SJIS, 1, (CCCFunc)jis2other, 0},
|
|
{CS_EUCJP, CS_SJIS, 1, (CCCFunc)mz_euc2sjis, 0},
|
|
{CS_JIS, CS_EUCJP, 1, (CCCFunc)jis2other, 1},
|
|
{CS_EUCJP, CS_JIS, 1, (CCCFunc)mz_euc2jis, 0},
|
|
{CS_SJIS, CS_EUCJP, 1, (CCCFunc)mz_sjis2euc, 0},
|
|
/* auto-detect Japanese conversions */
|
|
{CS_SJIS_AUTO, CS_SJIS, 1, (CCCFunc)autoJCCC, 0},
|
|
{CS_JIS_AUTO, CS_SJIS, 1, (CCCFunc)autoJCCC, 0},
|
|
{CS_EUCJP_AUTO, CS_SJIS, 1, (CCCFunc)autoJCCC, 0},
|
|
|
|
/* KOREAN */
|
|
{CS_KSC_8BIT, CS_KSC_8BIT, 0, (CCCFunc)mz_mbNullConv, INTL_CHARLEN_CGK},
|
|
{CS_2022_KR, CS_KSC_8BIT, 0, (CCCFunc)mz_iso2euckr, 0},
|
|
{CS_KSC_8BIT, CS_2022_KR, 0, (CCCFunc)mz_euckr2iso, 0},
|
|
/* auto-detect Korean conversions */
|
|
{CS_KSC_8BIT_AUTO, CS_KSC_8BIT,1, (CCCFunc)autoKCCC, 0},
|
|
{(CS_2022_KR|CS_AUTO) , CS_KSC_8BIT,1, (CCCFunc)autoKCCC, 0},
|
|
{CS_KSC5601, CS_KSC_8BIT, 0, (CCCFunc)mz_mbNullConv, INTL_CHARLEN_CGK},
|
|
{CS_KSC_8BIT, CS_KSC5601, 0, (CCCFunc)mz_mbNullConv, INTL_CHARLEN_CGK},
|
|
|
|
/* SIMPLIFIED CHINESE */
|
|
{CS_GB_8BIT, CS_GB_8BIT, 0, (CCCFunc)mz_gb2gb, 0},
|
|
{CS_GB2312, CS_GB_8BIT, 0, (CCCFunc)mz_mbNullConv, INTL_CHARLEN_CGK},
|
|
{CS_GB_8BIT, CS_GB2312, 0, (CCCFunc)mz_mbNullConv, INTL_CHARLEN_CGK},
|
|
|
|
/* TRADITIONAL CHINESE */
|
|
{CS_BIG5, CS_BIG5, 0, (CCCFunc)mz_mbNullConv, INTL_CHARLEN_CGK},
|
|
#ifdef FEATURE_BIG5CNS
|
|
{CS_BIG5, CS_CNS_8BIT, 0, (CCCFunc)mz_b52cns, 0},
|
|
{CS_CNS_8BIT, CS_BIG5, 0, (CCCFunc)mz_cns2b5, 0},
|
|
#endif
|
|
{CS_X_BIG5, CS_BIG5, 0, (CCCFunc)mz_mbNullConv, INTL_CHARLEN_CGK},
|
|
{CS_BIG5, CS_X_BIG5, 0, (CCCFunc)mz_mbNullConv, INTL_CHARLEN_CGK},
|
|
|
|
/* UNICODE */
|
|
{CS_UTF8, CS_UTF8, 0, (CCCFunc)mz_mbNullConv, INTL_CHARLEN_UTF8},
|
|
|
|
{CS_UTF8, CS_UCS2, 0, (CCCFunc)mz_utf82ucs, 0},
|
|
{CS_UTF8, CS_UTF7, 0, (CCCFunc)mz_utf82utf7, 0},
|
|
{CS_UTF8, CS_UCS2_SWAP, 0, (CCCFunc)mz_utf82ucsswap, 0},
|
|
{CS_UCS2, CS_UTF8, 0, (CCCFunc)mz_ucs2utf8, 0},
|
|
{CS_UCS2, CS_UTF7, 0, (CCCFunc)mz_ucs2utf7, 0},
|
|
{CS_UTF8, CS_IMAP4_UTF7, 0, (CCCFunc)mz_utf82imap4utf7, 0},
|
|
{CS_UCS2_SWAP, CS_UTF8, 0, (CCCFunc)mz_ucs2utf8, 0},
|
|
{CS_UCS2_SWAP, CS_UTF7, 0, (CCCFunc)mz_ucs2utf7, 0},
|
|
{CS_UTF7, CS_UTF8, 0, (CCCFunc)mz_utf72utf8, 0},
|
|
{CS_IMAP4_UTF7, CS_UTF8, 0, (CCCFunc)mz_imap4utf72utf8, 0},
|
|
|
|
{CS_LATIN1, CS_UTF8, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_SINGLEBYTE},
|
|
{CS_CP_1250, CS_UTF8, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_SINGLEBYTE},
|
|
{CS_CP_1251, CS_UTF8, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_SINGLEBYTE},
|
|
{CS_CP_1253, CS_UTF8, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_SINGLEBYTE},
|
|
{CS_8859_9, CS_UTF8, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_SINGLEBYTE},
|
|
{CS_SJIS, CS_UTF8, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_SJIS},
|
|
{CS_KSC_8BIT, CS_UTF8, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_CGK},
|
|
{CS_BIG5, CS_UTF8, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_CGK},
|
|
{CS_GB_8BIT, CS_UTF8, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_CGK},
|
|
|
|
{CS_UTF8, CS_LATIN1, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_UTF8},
|
|
{CS_UTF8, CS_CP_1250, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_UTF8},
|
|
{CS_UTF8, CS_CP_1251, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_UTF8},
|
|
{CS_UTF8, CS_CP_1253, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_UTF8},
|
|
{CS_UTF8, CS_8859_9, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_UTF8},
|
|
{CS_UTF8, CS_SJIS, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_UTF8},
|
|
{CS_UTF8, CS_KSC_8BIT, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_UTF8},
|
|
{CS_UTF8, CS_BIG5, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_UTF8},
|
|
{CS_UTF8, CS_GB_8BIT, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_UTF8},
|
|
|
|
#ifdef XP_OS2
|
|
/*
|
|
* Define additional codepage conversions for OS/2. All of these use the unicode
|
|
* based conversion tables.
|
|
*/
|
|
/* Thai */
|
|
{CS_CP_874, CS_CP_874, 0, (CCCFunc)0, 0},
|
|
{CS_CP_874, CS_UTF8, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_SINGLEBYTE},
|
|
{CS_UTF8, CS_CP_874, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_UTF8},
|
|
|
|
/* Baltic */
|
|
{CS_CP_1257, CS_CP_1257, 0, (CCCFunc)0, 0},
|
|
{CS_CP_1257, CS_UTF8, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_SINGLEBYTE},
|
|
{CS_UTF8, CS_CP_1257, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_UTF8},
|
|
|
|
/* Hebrew */
|
|
{CS_CP_862, CS_CP_862, 0, (CCCFunc)0, 0},
|
|
{CS_CP_862, CS_UTF8, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_SINGLEBYTE},
|
|
{CS_UTF8, CS_CP_862, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_UTF8},
|
|
|
|
/* Arabic */
|
|
{CS_CP_864, CS_CP_864, 0, (CCCFunc)0, 0},
|
|
{CS_CP_864, CS_UTF8, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_SINGLEBYTE},
|
|
{CS_UTF8, CS_CP_864, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_UTF8},
|
|
|
|
/* PC codepages - Default convert to windows codepages */
|
|
{CS_CP_850, CS_LATIN1, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_SINGLEBYTE},
|
|
{CS_CP_852, CS_LATIN2, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_SINGLEBYTE},
|
|
{CS_CP_855, CS_CP_1251, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_SINGLEBYTE},
|
|
{CS_CP_857, CS_CP_1254, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_SINGLEBYTE},
|
|
{CS_CP_866, CS_CP_1251, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_SINGLEBYTE},
|
|
|
|
{CS_CP_850, CS_CP_850, 0, (CCCFunc)0, 0},
|
|
{CS_CP_852, CS_CP_852, 0, (CCCFunc)0, 0},
|
|
{CS_CP_855, CS_CP_855, 0, (CCCFunc)0, 0},
|
|
{CS_CP_857, CS_CP_857, 0, (CCCFunc)0, 0},
|
|
{CS_CP_866, CS_CP_866, 0, (CCCFunc)0, 0},
|
|
|
|
{CS_LATIN1, CS_CP_850, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_SINGLEBYTE},
|
|
{CS_LATIN2, CS_CP_852, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_SINGLEBYTE},
|
|
{CS_CP_1251, CS_CP_855, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_SINGLEBYTE},
|
|
{CS_CP_1254, CS_CP_857, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_SINGLEBYTE},
|
|
{CS_CP_1251, CS_CP_866, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_SINGLEBYTE},
|
|
|
|
{CS_CP_850, CS_UTF8, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_SINGLEBYTE},
|
|
{CS_CP_852, CS_UTF8, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_SINGLEBYTE},
|
|
{CS_CP_855, CS_UTF8, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_SINGLEBYTE},
|
|
{CS_CP_857, CS_UTF8, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_SINGLEBYTE},
|
|
{CS_CP_866, CS_UTF8, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_SINGLEBYTE},
|
|
{CS_KOI8_R, CS_UTF8, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_SINGLEBYTE},
|
|
|
|
{CS_UTF8, CS_CP_850, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_UTF8},
|
|
{CS_UTF8, CS_CP_852, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_UTF8},
|
|
{CS_UTF8, CS_CP_855, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_UTF8},
|
|
{CS_UTF8, CS_CP_857, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_UTF8},
|
|
{CS_UTF8, CS_CP_866, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_UTF8},
|
|
{CS_UTF8, CS_KOI8_R, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_UTF8},
|
|
|
|
{CS_MAC_ROMAN, CS_LATIN1, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_SINGLEBYTE},
|
|
{CS_LATIN1, CS_MAC_ROMAN, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_SINGLEBYTE},
|
|
#endif /* XP_OS2 */
|
|
|
|
|
|
{CS_USER_DEFINED_ENCODING, CS_USER_DEFINED_ENCODING, 0, (CCCFunc)0, 0},
|
|
{0, 0, 1, (CCCFunc)0, 0}
|
|
};
|
|
|
|
#endif /* XP_WIN || XP_OS2 */
|
|
#ifdef XP_UNIX
|
|
MODULE_PRIVATE cscvt_t cscvt_tbl[] = {
|
|
/* SINGLE BYTE */
|
|
/* LATIN1 */
|
|
{CS_LATIN1, CS_LATIN1, 0, (CCCFunc)One2OneCCC, 0},
|
|
{CS_LATIN1, CS_ASCII, 0, NULL, 0},
|
|
{CS_ASCII, CS_LATIN1, 0, NULL, 0},
|
|
|
|
/* LATIN2 */
|
|
{CS_LATIN2, CS_LATIN2, 0, NULL, 0},
|
|
{CS_LATIN2, CS_ASCII, 0, NULL, 0},
|
|
{CS_LATIN2, CS_CP_1250, 0, (CCCFunc)One2OneCCC, 0},
|
|
{CS_CP_1250, CS_LATIN2, 0, (CCCFunc)One2OneCCC, 0},
|
|
|
|
/* CYRILLIC */
|
|
{CS_KOI8_R, CS_KOI8_R, 0, NULL, 0},
|
|
{CS_8859_5, CS_8859_5, 0, NULL, 0},
|
|
{CS_CP_1251, CS_CP_1251, 0, NULL, 0},
|
|
|
|
{CS_8859_5, CS_KOI8_R, 0, (CCCFunc)One2OneCCC, 0},
|
|
{CS_KOI8_R, CS_8859_5, 0, (CCCFunc)One2OneCCC, 0},
|
|
|
|
{CS_CP_1251, CS_8859_5, 0, (CCCFunc)One2OneCCC, 0},
|
|
{CS_8859_5, CS_CP_1251, 0, (CCCFunc)One2OneCCC, 0},
|
|
{CS_CP_1251, CS_KOI8_R, 0, (CCCFunc)One2OneCCC, 0},
|
|
{CS_KOI8_R, CS_CP_1251, 0, (CCCFunc)One2OneCCC, 0},
|
|
|
|
/* ARMENIAN */
|
|
{CS_ARMSCII8, CS_ARMSCII8, 0, (CCCFunc)0, 0},
|
|
|
|
/* GREEK */
|
|
{CS_8859_7, CS_8859_7, 0, NULL, 0},
|
|
{CS_8859_7, CS_CP_1253, 0, (CCCFunc)One2OneCCC, 0},
|
|
{CS_CP_1253, CS_8859_7, 0, (CCCFunc)One2OneCCC, 0},
|
|
|
|
/* TURKISH */
|
|
{CS_8859_9, CS_8859_9, 0, NULL, 0},
|
|
|
|
/* MULTIBYTE */
|
|
/* JAPANESE */
|
|
{CS_EUCJP, CS_EUCJP, 1, mz_mbNullConv, INTL_CHARLEN_EUC_JP},
|
|
{CS_JIS, CS_EUCJP, 1, jis2other, 1},
|
|
{CS_SJIS, CS_EUCJP, 1, mz_sjis2euc, 0},
|
|
{CS_EUCJP, CS_SJIS, 1, mz_euc2sjis, 0},
|
|
{CS_JIS, CS_SJIS, 1, jis2other, 0},
|
|
{CS_SJIS, CS_SJIS, 1, mz_mbNullConv, INTL_CHARLEN_SJIS},
|
|
{CS_EUCJP, CS_JIS, 1, mz_euc2jis, 0},
|
|
{CS_SJIS, CS_JIS, 1, mz_sjis2jis, 0},
|
|
/* auto-detect Japanese conversions */
|
|
{CS_JIS_AUTO, CS_EUCJP, 1, autoJCCC, 1},
|
|
{CS_SJIS_AUTO, CS_EUCJP, 1, autoJCCC, 0},
|
|
{CS_EUCJP_AUTO, CS_EUCJP, 1, autoJCCC, 0},
|
|
{CS_EUCJP_AUTO, CS_SJIS, 1, autoJCCC, 0},
|
|
{CS_JIS_AUTO, CS_SJIS, 1, autoJCCC, 0},
|
|
{CS_SJIS_AUTO, CS_SJIS, 1, autoJCCC, 0},
|
|
|
|
/* KOREAN */
|
|
{CS_KSC_8BIT, CS_KSC_8BIT, 0, (CCCFunc)mz_mbNullConv, INTL_CHARLEN_CGK},
|
|
{CS_2022_KR, CS_KSC_8BIT, 0, (CCCFunc)mz_iso2euckr, 0},
|
|
{CS_KSC_8BIT, CS_2022_KR, 0, (CCCFunc)mz_euckr2iso, 0},
|
|
/* auto-detect Korean conversions */
|
|
{CS_KSC_8BIT_AUTO, CS_KSC_8BIT,1, (CCCFunc)autoKCCC, 0},
|
|
{(CS_2022_KR|CS_AUTO) , CS_KSC_8BIT,1, (CCCFunc)autoKCCC, 0},
|
|
|
|
/* SIMPLIFIED CHINESE */
|
|
{CS_GB_8BIT, CS_GB_8BIT, 0, (CCCFunc)mz_gb2gb, 0},
|
|
|
|
/* TRADITIONAL CHINESE */
|
|
{CS_CNS_8BIT, CS_CNS_8BIT, 0, mz_mbNullConv, INTL_CHARLEN_CNS_8BIT},
|
|
#ifdef FEATURE_BIG5CNS
|
|
{CS_BIG5, CS_CNS_8BIT, 0, mz_b52cns, 0},
|
|
{CS_CNS_8BIT, CS_BIG5, 0, mz_cns2b5, 0},
|
|
#endif
|
|
{CS_BIG5, CS_BIG5, 0, mz_mbNullConv, INTL_CHARLEN_CGK},
|
|
|
|
{CS_USRDEF2, CS_USRDEF2, 0, NULL, 0},
|
|
|
|
/* UNICODE */
|
|
{CS_UTF8, CS_UTF8, 0, mz_mbNullConv, INTL_CHARLEN_UTF8},
|
|
{CS_UTF8, CS_UCS2, 0, (CCCFunc)mz_utf82ucs, 0},
|
|
{CS_UTF8, CS_UTF7, 0, (CCCFunc)mz_utf82utf7, 0},
|
|
{CS_UTF8, CS_UCS2_SWAP, 0, (CCCFunc)mz_utf82ucsswap, 0},
|
|
{CS_UTF8, CS_IMAP4_UTF7, 0, (CCCFunc)mz_utf82imap4utf7, 0},
|
|
{CS_UCS2, CS_UTF8, 0, (CCCFunc)mz_ucs2utf8, 0},
|
|
{CS_UCS2, CS_UTF7, 0, (CCCFunc)mz_ucs2utf7, 0},
|
|
{CS_UCS2_SWAP, CS_UTF8, 0, (CCCFunc)mz_ucs2utf8, 0},
|
|
{CS_UCS2_SWAP, CS_UTF7, 0, (CCCFunc)mz_ucs2utf7, 0},
|
|
{CS_UTF7, CS_UTF8, 0, (CCCFunc)mz_utf72utf8, 0},
|
|
|
|
{CS_LATIN1, CS_UTF8, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_SINGLEBYTE},
|
|
{CS_LATIN2, CS_UTF8, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_SINGLEBYTE},
|
|
{CS_8859_5, CS_UTF8, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_SINGLEBYTE},
|
|
{CS_KOI8_R, CS_UTF8, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_SINGLEBYTE},
|
|
{CS_8859_7, CS_UTF8, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_SINGLEBYTE},
|
|
{CS_8859_9, CS_UTF8, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_SINGLEBYTE},
|
|
{CS_SJIS, CS_UTF8, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_SJIS},
|
|
{CS_EUCJP, CS_UTF8, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_EUC_JP},
|
|
{CS_KSC_8BIT, CS_UTF8, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_CGK},
|
|
{CS_BIG5, CS_UTF8, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_CGK},
|
|
{CS_CNS_8BIT, CS_UTF8, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_CNS_8BIT},
|
|
{CS_GB_8BIT, CS_UTF8, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_CGK},
|
|
|
|
{CS_UTF8, CS_LATIN1, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_UTF8},
|
|
{CS_UTF8, CS_LATIN2, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_UTF8},
|
|
{CS_UTF8, CS_8859_5, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_UTF8},
|
|
{CS_UTF8, CS_KOI8_R, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_UTF8},
|
|
{CS_UTF8, CS_8859_7, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_UTF8},
|
|
{CS_UTF8, CS_8859_9, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_UTF8},
|
|
{CS_UTF8, CS_SJIS, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_UTF8},
|
|
{CS_UTF8, CS_EUCJP, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_UTF8},
|
|
{CS_UTF8, CS_KSC_8BIT, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_UTF8},
|
|
{CS_UTF8, CS_BIG5, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_UTF8},
|
|
{CS_UTF8, CS_CNS_8BIT, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_UTF8},
|
|
{CS_UTF8, CS_GB_8BIT, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_UTF8},
|
|
|
|
{CS_IMAP4_UTF7, CS_UTF8, 0, (CCCFunc)mz_imap4utf72utf8, 0},
|
|
{0, 0, 0, NULL, 0}
|
|
|
|
};
|
|
|
|
#endif /* XP_UNIX */
|
|
|
|
|
|
/*
|
|
* this routine is needed to make sure parser and layout see whole
|
|
* characters, not partial characters
|
|
*/
|
|
PRIVATE unsigned char *
|
|
mz_gb2gb(CCCDataObject obj, const unsigned char *gbbuf, int32 gbbufsz)
|
|
{
|
|
return mz_hz2gb(obj, gbbuf, gbbufsz);
|
|
}
|
|
PRIVATE unsigned char *
|
|
mz_hz2gb(CCCDataObject obj, const unsigned char *gbbuf, int32 gbbufsz)
|
|
{
|
|
unsigned char *start, *p, *q;
|
|
unsigned char *output;
|
|
int i, j, len;
|
|
unsigned char *uncvtbuf = INTL_GetCCCUncvtbuf(obj);
|
|
|
|
q = output = XP_ALLOC(strlen((char*)uncvtbuf) + gbbufsz + 1);
|
|
if (q == NULL)
|
|
return NULL;
|
|
|
|
start = NULL;
|
|
|
|
for (j = 0; j < 2; j++)
|
|
{
|
|
len = 0;
|
|
if (j == 0)
|
|
len = strlen((char *)uncvtbuf);
|
|
if (len)
|
|
p = (unsigned char *) uncvtbuf;
|
|
else
|
|
{
|
|
p = (unsigned char *) gbbuf ;
|
|
len = gbbufsz;
|
|
j = 100; /* quit this loop next time */
|
|
}
|
|
for (i = 0; i < len;)
|
|
{
|
|
if (start)
|
|
{
|
|
if (*p == '~' && *(p+1) == '}') /* switch back to ASCII mode */
|
|
{
|
|
for (; start < p; start++)
|
|
*q++ = *start | 0x80;
|
|
p += 2;
|
|
i += 2;
|
|
start = NULL;
|
|
}
|
|
else if (*p == 0x0D && *(p+1) == 0x0A) /* Unix or Mac return */
|
|
{
|
|
for (; start < p; start++)
|
|
*q++ = *start | 0x80;
|
|
i += 2;
|
|
*q++ = *p++; /* 0x0D */
|
|
*q++ = *p++; /* 0x0A */
|
|
start = NULL; /* reset start if we see normal line return */
|
|
}
|
|
else if (*p == 0x0A) /* Unix or Mac return */
|
|
{
|
|
for (; start < p; start++)
|
|
*q++ = *start | 0x80;
|
|
i ++;
|
|
*q++ = *p++; /* LF */
|
|
start = NULL; /* reset start if we see normal line return */
|
|
}
|
|
else if (*p == 0x0D) /* Unix or Mac return */
|
|
{
|
|
for (; start < p; start++)
|
|
*q++ = *start | 0x80;
|
|
i ++;
|
|
*q++ = *p++; /* LF */
|
|
start = NULL; /* reset start if we see normal line return */
|
|
}
|
|
else
|
|
{
|
|
i ++ ;
|
|
p ++ ;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
if (*p == '~' && *(p+1) == '{') /* switch to GB mode */
|
|
{
|
|
start = p + 2;
|
|
p += 2;
|
|
i += 2;
|
|
}
|
|
else if (*p == '~' && *(p+1) == 0x0D && *(p+2) == 0x0A) /* line-continuation marker */
|
|
{
|
|
i += 3;
|
|
p += 3;
|
|
}
|
|
else if (*p == '~' && *(p+1) == 0x0A) /* line-continuation marker */
|
|
{
|
|
i += 2;
|
|
p += 2;
|
|
}
|
|
else if (*p == '~' && *(p+1) == 0x0D) /* line-continuation marker */
|
|
{
|
|
i += 2;
|
|
p += 2;
|
|
}
|
|
else if (*p == '~' && *(p+1) == '~') /* ~~ means ~ */
|
|
{
|
|
*q++ = '~';
|
|
p += 2;
|
|
i += 2;
|
|
}
|
|
else
|
|
{
|
|
i ++;
|
|
*q++ = *p++;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
*q = '\0';
|
|
INTL_SetCCCLen(obj, q - output);
|
|
if (start)
|
|
{
|
|
|
|
/* Consider UNCVTBUF_SIZE is only 8 byte long, it's not enough
|
|
for HZ anyway. Let's convert leftover to GB first and deal with
|
|
unfinished buffer in the coming block.
|
|
*/
|
|
INTL_SetCCCLen(obj, INTL_GetCCCLen(obj) + p - start);
|
|
for (; start < p; start++)
|
|
*q++ = *start | 0x80;
|
|
*q = '\0';
|
|
|
|
q = uncvtbuf;
|
|
XP_STRCPY((char *)q, "~{");
|
|
}
|
|
|
|
return output;
|
|
}
|
|
|
|
|
|
|
|
/* mz_mbNullConv
|
|
* this routine is needed to make sure parser and layout see whole
|
|
* characters, not partial characters
|
|
*/
|
|
/* This routine is designed to replace the following routine:
|
|
mz_euc2euc
|
|
mz_b52b5
|
|
mz_cns2cns
|
|
mz_ksc2ksc
|
|
mz_sjis2sjis
|
|
mz_utf82utf8
|
|
|
|
It should also replace
|
|
mz_gb2gb
|
|
but currently mz_gb2gb also handle hz to gb. We need to move that functionality out of mz_gb2gb
|
|
*/
|
|
PRIVATE unsigned char *
|
|
mz_mbNullConv(CCCDataObject obj, const unsigned char *buf, int32 bufsz)
|
|
{
|
|
int32 left_over;
|
|
int32 len;
|
|
unsigned char *p;
|
|
unsigned char *ret;
|
|
int32 total;
|
|
intl_CharLenFunc CharLenFunc = intl_char_len_func[INTL_GetCCCCvtflag(obj)];
|
|
int charlen = 0;
|
|
|
|
/* Get the unconverted buffer */
|
|
unsigned char *uncvtbuf = INTL_GetCCCUncvtbuf(obj);
|
|
int32 uncvtsz = strlen((char *)uncvtbuf);
|
|
|
|
/* return in the input is nonsense */
|
|
if ((!obj) || (! buf) || (bufsz < 0))
|
|
return NULL;
|
|
|
|
/* Allocate Output Buffer */
|
|
total = uncvtsz + bufsz;
|
|
ret = (unsigned char *) XP_ALLOC(total + 1);
|
|
if (!ret)
|
|
{
|
|
INTL_SetCCCRetval(obj, MK_OUT_OF_MEMORY);
|
|
return NULL;
|
|
}
|
|
|
|
/* Copy unconverted buffer into the output bufer */
|
|
memcpy(ret, uncvtbuf, uncvtsz);
|
|
/* Copy the current input buffer into the output buffer */
|
|
memcpy(ret+uncvtsz, buf, bufsz);
|
|
|
|
/* Walk through the buffer and figure out the left_over length */
|
|
for (p=ret, len=total, left_over=0; len > 0; p += charlen, len -= charlen)
|
|
{
|
|
if((charlen = CharLenFunc(*p)) > 1)
|
|
{ /* count left_over only if it is multibyte char */
|
|
if(charlen > len) /* count left_over only if the len is less than charlen */
|
|
left_over = len;
|
|
};
|
|
}
|
|
|
|
/* Copy the left over into the uncvtbuf */
|
|
if(left_over)
|
|
memcpy(uncvtbuf, p - charlen, left_over);
|
|
/* Null terminated the uncvtbuf */
|
|
uncvtbuf[left_over] = '\0';
|
|
|
|
/* Null terminate the return buffer and set the length */
|
|
INTL_SetCCCLen(obj, total - left_over);
|
|
ret[total - left_over] = 0;
|
|
|
|
return ret;
|
|
}
|
|
|
|
/*
|
|
buf -> mz_mbNullConv -> frombuf -> INTL_TextToUnicode -> ucs2buf
|
|
-> INTL_UnicodeToStr -> tobuf
|
|
*/
|
|
PRIVATE unsigned char* mz_AnyToAnyThroughUCS2(CCCDataObject obj, const unsigned char *buf, int32 bufsz)
|
|
{
|
|
/* buffers */
|
|
unsigned char* fromBuf = NULL;
|
|
INTL_Unicode* ucs2Buf = NULL;
|
|
unsigned char* toBuf = NULL;
|
|
/* buffers' length */
|
|
uint32 ucs2BufLen = 0;
|
|
uint32 fromBufLen = 0;
|
|
uint32 toBufLen = 0;
|
|
/* from & to csid */
|
|
uint16 fromCsid = INTL_GetCCCFromCSID(obj);
|
|
uint16 toCsid = INTL_GetCCCToCSID(obj);
|
|
|
|
/* get the fromBuf */
|
|
if( !( fromBuf = mz_mbNullConv( obj, buf, bufsz) ) )
|
|
return NULL;
|
|
|
|
/* map fromBuf -> ucs2Buf */
|
|
fromBufLen = INTL_GetCCCLen(obj);
|
|
ucs2BufLen = INTL_TextToUnicodeLen( fromCsid, fromBuf, fromBufLen );
|
|
|
|
if( !( ucs2Buf = XP_ALLOC( (ucs2BufLen + 1 ) * 2)) ){
|
|
return NULL;
|
|
}
|
|
|
|
/* be care, the return value is HOW MANY UNICODE IN THIS UCS2BUF, not how many bytes */
|
|
ucs2BufLen = INTL_TextToUnicode( fromCsid, fromBuf, fromBufLen, ucs2Buf, ucs2BufLen );
|
|
|
|
/* map ucs2Buf -> toBuf */
|
|
toBufLen = INTL_UnicodeToStrLen( toCsid, ucs2Buf, ucs2BufLen ); /* we get BYTES here :) */
|
|
|
|
if( !( toBuf = XP_ALLOC( toBufLen + 1 ) ) )
|
|
return NULL;
|
|
|
|
INTL_UnicodeToStr( toCsid, ucs2Buf, ucs2BufLen, toBuf, toBufLen );
|
|
|
|
|
|
/* clean up after myself */
|
|
free( fromBuf );
|
|
free( ucs2Buf );
|
|
|
|
/* In order to let the caller know how long the buffer is, i have to set its tail NULL. */
|
|
toBuf[ toBufLen ] = 0;
|
|
return toBuf;
|
|
}
|
|
|
|
|
|
PRIVATE int16 intl_CharLen_SJIS( unsigned char ch)
|
|
{
|
|
return ( (((ch >= 0x81) && (ch <= 0x9f)) || ((ch >= 0xe0) && (ch <= 0xfc))) ? 2 : 1);
|
|
}
|
|
PRIVATE int16 intl_CharLen_EUC_JP( unsigned char ch)
|
|
{
|
|
return ( (((ch >= 0xa1) && (ch <= 0xfe)) || (ch == 0x8e)) ? 2 : ((ch ==0x8f) ? 3 : 1));
|
|
}
|
|
PRIVATE int16 intl_CharLen_CGK( unsigned char ch)
|
|
{
|
|
return ( ((ch >= 0xa1) && (ch <= 0xfe)) ? 2 : 1);
|
|
}
|
|
PRIVATE int16 intl_CharLen_CNS_8BIT( unsigned char ch)
|
|
{
|
|
return ( ((ch >= 0xa1) && (ch <= 0xfe)) ? 2 : ((ch == 0x8e) ? 4 : 1));
|
|
}
|
|
PRIVATE int16 intl_CharLen_UTF8( unsigned char ch)
|
|
{
|
|
return ( ((ch >= 0xc0) && (ch <= 0xdf)) ? 2 : (((ch >= 0xe0) && (ch <= 0xef)) ? 3 : 1));
|
|
}
|
|
PRIVATE int16 intl_CharLen_SingleByte( unsigned char ch)
|
|
{
|
|
return 1;
|
|
}
|
|
|
|
|
|
/*
|
|
INTL_DefaultWinCharSetID,
|
|
Based on DefaultDocCSID, it determines which Win CSID to use for Display
|
|
*/
|
|
PUBLIC int16 INTL_DefaultWinCharSetID(iDocumentContext context)
|
|
{
|
|
|
|
if (context) {
|
|
INTL_CharSetInfo csi = LO_GetDocumentCharacterSetInfo(context);
|
|
if (INTL_GetCSIWinCSID(csi))
|
|
return INTL_GetCSIWinCSID(csi);
|
|
}
|
|
|
|
return INTL_DocToWinCharSetID(INTL_DefaultDocCharSetID(context));
|
|
}
|
|
|
|
/*
|
|
INTL_DocToWinCharSetID,
|
|
Based on DefaultDocCSID, it determines which Win CSID to use for Display
|
|
*/
|
|
/*
|
|
|
|
To Do: (ftang)
|
|
|
|
We should seperate the DocToWinCharSetID logic from the cscvt_t table
|
|
for Cyrillic users.
|
|
|
|
*/
|
|
PUBLIC int16 INTL_DocToWinCharSetID(int16 csid)
|
|
{
|
|
cscvt_t *cscvtp;
|
|
int16 from_csid = 0, to_csid = 0;
|
|
|
|
from_csid = csid & ~CS_AUTO; /* remove auto bit */
|
|
|
|
/* Look-up conversion method given FROM and TO char. code sets */
|
|
cscvtp = cscvt_tbl;
|
|
while (cscvtp->from_csid)
|
|
{
|
|
if (cscvtp->from_csid == from_csid)
|
|
{
|
|
/*
|
|
* disgusting hack...
|
|
*/
|
|
#ifdef XP_UNIX
|
|
if (((cscvtp->to_csid == CS_CNS_8BIT) && (TRUE == haveBig5)) ||
|
|
((cscvtp->to_csid == CS_8859_5) && (FALSE == have88595)) ||
|
|
((cscvtp->to_csid == CS_KOI8_R) && (FALSE == haveKOI8R)) ||
|
|
((cscvtp->to_csid == CS_CP_1251) && (FALSE == have1251)) )
|
|
{
|
|
cscvtp++;
|
|
continue;
|
|
}
|
|
#endif
|
|
to_csid = cscvtp->to_csid;
|
|
break ;
|
|
}
|
|
cscvtp++;
|
|
}
|
|
return to_csid == 0 ? CS_FE_ASCII: to_csid ;
|
|
}
|
|
|
|
|
|
XP_Bool
|
|
INTL_CanAutoSelect(int16 csid)
|
|
{
|
|
register cscvt_t *cscvtp;
|
|
|
|
cscvtp = cscvt_tbl;
|
|
while (cscvtp->from_csid) {
|
|
if (cscvtp->from_csid == csid) {
|
|
return (cscvtp->autoselect);
|
|
}
|
|
cscvtp++;
|
|
}
|
|
return FALSE;
|
|
}
|
|
|
|
|
|
PUBLIC int16
|
|
INTL_DefaultTextAttributeCharSetID(iDocumentContext context)
|
|
{
|
|
if (context)
|
|
{
|
|
INTL_CharSetInfo c = LO_GetDocumentCharacterSetInfo(context);
|
|
if (INTL_GetCSIWinCSID(c))
|
|
return INTL_GetCSIWinCSID(c);
|
|
}
|
|
|
|
return INTL_DefaultWinCharSetID(context);
|
|
}
|
|
|
|
void
|
|
INTL_ReportFontCharSets(int16 *charsets)
|
|
{
|
|
uint16 len;
|
|
|
|
if (!charsets)
|
|
{
|
|
return;
|
|
}
|
|
|
|
if (availableFontCharSets)
|
|
{
|
|
free(availableFontCharSets);
|
|
}
|
|
|
|
availableFontCharSets = charsets;
|
|
|
|
while (*charsets)
|
|
{
|
|
#ifdef XP_UNIX
|
|
switch(*charsets)
|
|
{
|
|
case CS_X_BIG5:
|
|
haveBig5 = TRUE;
|
|
break;
|
|
case CS_8859_5:
|
|
have88595 = TRUE;
|
|
break;
|
|
case CS_CP_1251:
|
|
have1251 = TRUE;
|
|
break;
|
|
case CS_KOI8_R:
|
|
haveKOI8R = TRUE;
|
|
break;
|
|
}
|
|
#endif
|
|
charsets++;
|
|
}
|
|
len = (charsets - availableFontCharSets);
|
|
|
|
#ifdef XP_UNIX
|
|
INTL_SetUnicodeCSIDList(len, availableFontCharSets);
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
/* Code for CSID Iterator */
|
|
|
|
#define NUMOFCSIDINITERATOR 15
|
|
struct INTL_CSIDIteratorPriv
|
|
{
|
|
int16 cur;
|
|
int16 csidlist[NUMOFCSIDINITERATOR];
|
|
};
|
|
typedef struct INTL_CSIDIteratorPriv INTL_CSIDIteratorPriv;
|
|
|
|
#ifdef MOZ_MAIL_NEWS
|
|
|
|
PRIVATE void intl_FillTryIMAP4SearchIterator(INTL_CSIDIteratorPriv* p, int16 csid);
|
|
PRIVATE void intl_FillTryIMAP4SearchIterator(INTL_CSIDIteratorPriv* p, int16 csid)
|
|
{
|
|
int idx = 0;
|
|
cscvt_t *cscvtp = cscvt_tbl;
|
|
p->csidlist[idx++] = INTL_DefaultMailCharSetID(csid); /* add mailcsid first */
|
|
p->csidlist[idx++] = INTL_DefaultNewsCharSetID(csid); /* If the news csid is different add it */
|
|
if(p->csidlist[0] == p->csidlist[1])
|
|
idx--;
|
|
/* Add all the csid that we know how to convert to (Without CS_AUTO bit on */
|
|
while (cscvtp->from_csid)
|
|
{
|
|
if ( (cscvtp->from_csid & ~CS_AUTO) == (csid & ~CS_AUTO))
|
|
{
|
|
int16 foundcsid = cscvtp->to_csid & ~CS_AUTO;
|
|
XP_Bool notInTheList = TRUE;
|
|
int i;
|
|
for(i = 0; i < idx ;i++)
|
|
{
|
|
if(foundcsid == p->csidlist[i])
|
|
notInTheList = FALSE;
|
|
}
|
|
if(notInTheList)
|
|
{
|
|
p->csidlist[idx++] = foundcsid;
|
|
XP_ASSERT(NUMOFCSIDINITERATOR == idx);
|
|
if(NUMOFCSIDINITERATOR == idx)
|
|
break;
|
|
}
|
|
}
|
|
cscvtp++;
|
|
}
|
|
p->csidlist[idx] = 0; /* terminate the list by 0 */
|
|
}
|
|
|
|
PUBLIC void INTL_CSIDIteratorCreate( INTL_CSIDIterator* iterator, int16 csid, int flag)
|
|
{
|
|
INTL_CSIDIteratorPriv* priv =
|
|
(INTL_CSIDIteratorPriv*) XP_ALLOC(sizeof(INTL_CSIDIteratorPriv));
|
|
*iterator = (INTL_CSIDIterator) priv;
|
|
if(priv)
|
|
{
|
|
priv->cur = 0;
|
|
switch(flag)
|
|
{
|
|
case csiditerate_TryIMAP4Search:
|
|
intl_FillTryIMAP4SearchIterator (priv, (int16)(csid & ~CS_AUTO));
|
|
break;
|
|
default:
|
|
XP_ASSERT(FALSE);
|
|
break;
|
|
}
|
|
}
|
|
return;
|
|
}
|
|
|
|
#endif /* MOZ_MAIL_NEWS */
|
|
|
|
PUBLIC void INTL_CSIDIteratorDestroy(INTL_CSIDIterator* iterator)
|
|
{
|
|
INTL_CSIDIteratorPriv* priv = (INTL_CSIDIteratorPriv*) *iterator;
|
|
*iterator = NULL;
|
|
XP_FREE(priv);
|
|
}
|
|
|
|
PUBLIC XP_Bool INTL_CSIDIteratorNext( INTL_CSIDIterator* iterator, int16* pCsid)
|
|
{
|
|
INTL_CSIDIteratorPriv* priv = (INTL_CSIDIteratorPriv*) *iterator;
|
|
int16 csid = priv->csidlist[(priv->cur)++];
|
|
if(0 == csid)
|
|
{
|
|
return FALSE;
|
|
}
|
|
else
|
|
{
|
|
*pCsid = csid;
|
|
return TRUE;
|
|
}
|
|
}
|
|
|
|
|
|
#ifdef XP_OS2
|
|
/*
|
|
* Map Netscape charset to OS/2 codepage
|
|
*/
|
|
|
|
/*
|
|
* This is tricker then you think. For a given charset, first entry should
|
|
* be windows codepage, second entry should be OS/2 codepage.
|
|
*/
|
|
|
|
static uint16 CS2CodePage[] = {
|
|
CS_LATIN1 , 1004, /* 2 */
|
|
CS_ASCII , 1252, /* 1 */
|
|
CS_UTF8 , 1208, /* 290 */
|
|
CS_SJIS , 943, /* 260 */
|
|
CS_8859_3 , 913, /* 14 */
|
|
CS_8859_4 , 914, /* 15 */
|
|
CS_8859_5 , 915, /* 16 ISO Cyrillic */
|
|
CS_8859_6 , 1089, /* 17 ISO Arabic */
|
|
CS_8859_7 , 813, /* 18 ISO Greek */
|
|
CS_8859_8 , 916, /* 19 ISO Hebrew */
|
|
CS_8859_9 , 920, /* 20 */
|
|
CS_BIG5 , 950, /* 263 */
|
|
CS_GB2312 , 1386, /* 287 */
|
|
CS_CP_1250 , 1250, /* 44 CS_CP_1250 is window Centrl Europe */
|
|
CS_CP_1251 , 1251, /* 41 CS_CP_1251 is window Cyrillic */
|
|
CS_LATIN2 , 912, /* 10 */
|
|
CS_CP_1253 , 1253, /* 43 CS_CP_1253 is window Greek */
|
|
CS_CP_1254 , 1254, /* 45 CS_CP_1254 is window Turkish */
|
|
CS_CP_1257 , 1257, /* 61 Windows Baltic */
|
|
CS_CP_1258 , 1258, /* 62 Windows Vietnamese */
|
|
CS_CP_850 , 850, /* 53 PC Latin 1 */
|
|
CS_CP_852 , 852, /* 54 PC Latin 2 */
|
|
CS_CP_855 , 855, /* 55 PC Cyrillic */
|
|
CS_CP_857 , 857, /* 56 PC Turkish */
|
|
CS_CP_862 , 862, /* 57 PC Hebrew */
|
|
CS_CP_864 , 864, /* 58 PC Arabic */
|
|
CS_CP_866 , 866, /* 59 PC Russian */
|
|
CS_CP_874 , 874, /* 60 PC Thai */
|
|
CS_EUCJP , 930, /* 261 */
|
|
CS_GB_8BIT , 1386, /* 264 */
|
|
CS_KOI8_R , 878, /* 39 */
|
|
CS_KSC5601 , 949, /* 284 */
|
|
CS_MAC_CE , 1282, /* 11 */
|
|
CS_MAC_CYRILLIC, 1283, /* 40 */
|
|
CS_MAC_GREEK , 1280, /* 42 */
|
|
CS_MAC_ROMAN , 1275, /* 6 */
|
|
CS_MAC_TURKISH, 1281, /* 46 */
|
|
CS_UCS2 , 1200, /* 810 */
|
|
CS_USRDEF2 , 1252, /* 38 */
|
|
0, 0,
|
|
};
|
|
|
|
/*
|
|
* MapCpToCsNum: Search table and return netscape codeset name
|
|
*/
|
|
uint16 INTL_MapCpToCsNum(uint16 cpid) {
|
|
uint16 * up;
|
|
|
|
up = CS2CodePage;
|
|
while (*up) {
|
|
if (up[1] == cpid) {
|
|
return up[0];
|
|
}
|
|
up += 2;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
|
|
/*
|
|
* MapCsToCpNum: Search table and return codepage
|
|
*/
|
|
uint16 INTL_MapCsToCpNum(uint16 csid) {
|
|
uint16 * up;
|
|
|
|
up = CS2CodePage;
|
|
while (*up) {
|
|
if (up[0] == csid) {
|
|
return up[1];
|
|
}
|
|
up += 2;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
|
|
/*
|
|
* Map from process codepage to default charset
|
|
*/
|
|
int16 INTL_MenuFontCSID(void) {
|
|
ULONG codepage, xxx;
|
|
|
|
DosQueryCp(4, &codepage, &xxx);
|
|
return INTL_MapCpToCsNum(codepage);
|
|
}
|
|
|
|
|
|
/*
|
|
* This returns the ID for the
|
|
*/
|
|
int INTL_MenuFontID() {
|
|
return 0;
|
|
}
|
|
|
|
#endif /* XP_OS2 */
|