mirror of
https://github.com/mozilla/gecko-dev.git
synced 2024-11-01 06:35:42 +00:00
2652 lines
89 KiB
C
2652 lines
89 KiB
C
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*-
|
|
*
|
|
* The contents of this file are subject to the Netscape Public License
|
|
* Version 1.0 (the "NPL"); you may not use this file except in
|
|
* compliance with the NPL. You may obtain a copy of the NPL at
|
|
* http://www.mozilla.org/NPL/
|
|
*
|
|
* Software distributed under the NPL is distributed on an "AS IS" basis,
|
|
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
|
|
* for the specific language governing rights and limitations under the
|
|
* NPL.
|
|
*
|
|
* The Initial Developer of this code under the NPL is Netscape
|
|
* Communications Corporation. Portions created by Netscape are
|
|
* Copyright (C) 1998 Netscape Communications Corporation. All Rights
|
|
* Reserved.
|
|
*/
|
|
|
|
|
|
/* libi18n.h */
|
|
|
|
|
|
#ifndef INTL_LIBI18N_H
|
|
#define INTL_LIBI18N_H
|
|
|
|
#include "xp.h"
|
|
#ifndef iDocumentContext
|
|
#define iDocumentContext MWContext *
|
|
#endif
|
|
#define Stream NET_StreamClass
|
|
#define URL URL_Struct
|
|
#include "csid.h"
|
|
|
|
#ifdef _UNICVT_DLL_
|
|
|
|
#ifdef XP_WIN32
|
|
#define UNICVTAPI __declspec(dllexport)
|
|
|
|
#else
|
|
#define UNICVTAPI
|
|
#endif
|
|
|
|
#else /* _UNICVT_DLL is undefined */
|
|
#define UNICVTAPI
|
|
#endif
|
|
|
|
/* Enum for INTL_CSIDIteratorCreate */
|
|
enum {
|
|
csiditerate_TryIMAP4Search = 1
|
|
};
|
|
|
|
|
|
|
|
/*
|
|
* To be called when backend catches charset info on <meta ... charset=...> tag.
|
|
* This will force netlib to go get fresh data again either through cache or
|
|
* network.
|
|
*/
|
|
enum
|
|
{
|
|
METACHARSET_NONE = 0,
|
|
METACHARSET_HASCHARSET,
|
|
METACHARSET_REQUESTRELAYOUT,
|
|
METACHARSET_FORCERELAYOUT,
|
|
METACHARSET_RELAYOUTDONE
|
|
};
|
|
|
|
XP_BEGIN_PROTOS
|
|
|
|
|
|
/*=======================================================*/
|
|
/* Character Code Conversion (CCC).
|
|
*
|
|
*
|
|
* CCCDataObject accessor functions are
|
|
* build as a table to allow access from a DLL
|
|
*
|
|
* Note: new functions must be added at the end
|
|
* or old apps using the new dll will fail
|
|
*/
|
|
/**@name Character Code Conversion (CCC) */
|
|
/*@{*/
|
|
|
|
/**
|
|
* Function Prototype for the codeset conversion function.
|
|
*
|
|
* @param obj Specifies the converter object
|
|
* @param src Specifies the text to be converted
|
|
* @param srclen Specifies the length of src
|
|
* @return the converted text. The length of the converted result could be
|
|
* access via INTL_GetCCCLen(obj)
|
|
* @see INTL_GetCCCLen
|
|
* @see INTL_SetCCCCvtfunc
|
|
*
|
|
*/
|
|
typedef unsigned char *(*CCCFunc)(CCCDataObject obj, const unsigned char * src, int32 srclen);
|
|
|
|
/**
|
|
* Function Prototype for the Report Auto Detect Result function.
|
|
*
|
|
* @param closure Specifies the closure which associated with the converter
|
|
* object by calling INTL_SetCCCReportAutoDetect
|
|
* @param obj Specifies the converter object
|
|
* @param doc_csid Specifies the auto-detected document csid
|
|
* @see INTL_SetCCCReportAutoDetect
|
|
*
|
|
*/
|
|
typedef void (*CCCRADFunc)(void * closure, CCCDataObject obj, uint16 doc_csid);
|
|
|
|
/**
|
|
* Opaque converter object.
|
|
*
|
|
* This struct is an opaque converter object.
|
|
*/
|
|
struct OpaqueCCCDataObject { /* WARNING: MUST MATCH REAL STRUCT */
|
|
/** pointer to the converter object private functions struct */
|
|
struct INTL_CCCFuncs *funcs_pointer;
|
|
};
|
|
|
|
/**
|
|
* This structure hold the private functions of a conversion object.
|
|
*
|
|
* <B>WARNING: THIS STRUCT AND THE TABLE MUST BE IN SYNC WITH EACH OTHER </B>
|
|
*/
|
|
struct INTL_CCCFuncs {
|
|
/** The private function of INTL_SetCCCReportAutoDetect. */
|
|
void (*set_report_autodetect)(CCCDataObject, CCCRADFunc, void*);
|
|
/** The private function of INTL_CallCCCReportAutoDetect. */
|
|
void (*call_report_autodetect)(CCCDataObject, uint16);
|
|
/** The private function of INTL_SetCCCCvtfunc. */
|
|
void (*set_cvtfunc)(CCCDataObject, CCCFunc);
|
|
/** The private function of INTL_GetCCCCvtfunc. */
|
|
CCCFunc (*get_cvtfunc)(CCCDataObject);
|
|
/** The private function of INTL_SetCCCJismode. */
|
|
void (*set_jismode)(CCCDataObject,int32);
|
|
/** The private function of INTL_GetCCCJismode. */
|
|
int32 (*get_jismode)(CCCDataObject);
|
|
/** The private function of INTL_SetCCCCvtflag. */
|
|
void (*set_cvtflag)(CCCDataObject,int32);
|
|
/** The private function of INTL_GetCCCCvtflag. */
|
|
int32 (*get_cvtflag)(CCCDataObject);
|
|
/** The private function of INTL_GetCCCUncvtbuf. */
|
|
unsigned char* (*get_uncvtbuf)(CCCDataObject);
|
|
/** The private function of INTL_SetCCCDefaultCSID. */
|
|
void (*set_default_doc_csid)(CCCDataObject, uint16);
|
|
/** The private function of INTL_GetCCCDefaultCSID. */
|
|
uint16 (*get_default_doc_csid)(CCCDataObject);
|
|
/** The private function of INTL_SetCCCFromCSID. */
|
|
void (*set_from_csid)(CCCDataObject, uint16);
|
|
/** The private function of INTL_GetCCCFromCSID. */
|
|
uint16 (*get_from_csid)(CCCDataObject);
|
|
/** The private function of INTL_SetCCCToCSID. */
|
|
void (*set_to_csid)(CCCDataObject, uint16);
|
|
/** The private function of INTL_GetCCCToCSID. */
|
|
uint16 (*get_to_csid)(CCCDataObject);
|
|
/** The private function of INTL_SetCCCRetval. */
|
|
void (*set_retval)(CCCDataObject, int);
|
|
/** The private function of INTL_GetCCCRetval. */
|
|
int (*get_retval)(CCCDataObject);
|
|
/** The private function of INTL_SetCCCLen. */
|
|
void (*set_len)(CCCDataObject, int32);
|
|
/** The private function of INTL_GetCCCLen. */
|
|
int32 (*get_len)(CCCDataObject);
|
|
};
|
|
|
|
/**
|
|
* Create and initialize Character Code Converter Object.
|
|
*
|
|
* Create and initialize character code converter.
|
|
* It also set up a converter if a doc_csid is known (by DOC_CSID_KNOWN).
|
|
* Caller is responsible for deallocation of an allocated memory.
|
|
*
|
|
* @param c Pointer to an i18n private data structure.
|
|
* @param default_doc_csid Default doc_csid to be used.
|
|
* @return CCCDataObject Created character code converter object pointer.
|
|
*/
|
|
PUBLIC CCCDataObject INTL_CreateDocumentCCC(
|
|
INTL_CharSetInfo c,
|
|
uint16 default_doc_csid
|
|
);
|
|
|
|
/**
|
|
* Look for a converter from one charset to another.
|
|
*
|
|
* If the from_csid is CS_DEFAULT, this function uses the ID returned by
|
|
* INTL_GetCCCDefaultCSID. If the to_csid is zero, this function uses the ID
|
|
* returned by INTL_DocToWinCharSetID for the from_csid determined above.
|
|
* If found, the converter function is stored in the given character code
|
|
* conversion object.
|
|
*
|
|
* @param from_csid Specifies the charset ID to convert from
|
|
* @param to_csid Specifies the charset ID to convert to
|
|
* @param obj Specifies the character code converter object
|
|
* @return 1 for success, 0 for failure
|
|
* @see INTL_CreateCharCodeConverter, INTL_CallCharCodeConverter
|
|
*/
|
|
PUBLIC int INTL_GetCharCodeConverter(
|
|
int16 from_csid,
|
|
int16 to_csid,
|
|
CCCDataObject obj
|
|
);
|
|
|
|
/**
|
|
* Set up charset internal data by meta charset.
|
|
*
|
|
* Given a charset name, this will set up i18n private charset info
|
|
* which is obtained by a given context.
|
|
* Input charset name should be obtained from HTML META tag.
|
|
*
|
|
* @param context Context to be set up.
|
|
* @param charset_tag Charset name as an input (e.g. iso-8859-1).
|
|
* @see INTL_CSIReportMetaCharsetTag
|
|
*/
|
|
PUBLIC void INTL_CCCReportMetaCharsetTag(
|
|
MWContext *context,
|
|
char *charset_tag
|
|
);
|
|
|
|
/**
|
|
* Passes some more text to the character code converter.
|
|
*
|
|
* The character code converter object keeps track of the current state as it
|
|
* receives data to convert. If partial characters are received, they are
|
|
* buffered until this function is called again.
|
|
* INTL_GetCharCodeConverter must first be called before calling this function.
|
|
*
|
|
* In some cases, the text is converted in place (in the input buffer).
|
|
*
|
|
* @param obj Specifies the character code converter object
|
|
* @param str Specifies the text to be converted
|
|
* @param len Specifies the length in bytes of the text
|
|
* @return The converted text, null terminated
|
|
* @see INTL_GetCharCodeConverter
|
|
*/
|
|
PUBLIC unsigned char *INTL_CallCharCodeConverter(
|
|
CCCDataObject obj,
|
|
const unsigned char *str,
|
|
int32 len
|
|
);
|
|
|
|
/**
|
|
* Initialize and set up a character code converter for a mail charset.
|
|
*
|
|
* Allocate memory and initialize for character code converter.
|
|
* From/To charset is determined by given context or by parsing the source
|
|
* buffer in case of HTML.
|
|
* After charsets are determined, it set up a converter function.
|
|
* Caller is responsible for deallocation of an allocated memory.
|
|
*
|
|
* @param context Context to access charset info.
|
|
* @param isHTML If TRUE then the input stream is parsed for meta tag.
|
|
* @param buffer Source buffer.
|
|
* @param buffer_size the length of the source buffer.
|
|
* @return CCCDataObject Created character code converter object pointer.
|
|
* @see INTL_CreateCharCodeConverter
|
|
*/
|
|
PUBLIC CCCDataObject INTL_CreateDocToMailConverter(
|
|
iDocumentContext context,
|
|
XP_Bool isHTML,
|
|
unsigned char *buffer,
|
|
uint32 buffer_size
|
|
);
|
|
|
|
/**
|
|
* Create a character code converter object used for codeset conversion.
|
|
*
|
|
* @return The new character code converter object
|
|
* @see INTL_CreateDocumentCCC, INTL_GetCharCodeConverter,
|
|
* INTL_DestroyCharCodeConverter
|
|
* @deprecated Obsolescent. Please use INTL_CreateDocumentCCC.
|
|
*/
|
|
PUBLIC CCCDataObject INTL_CreateCharCodeConverter(void);
|
|
|
|
/**
|
|
* Frees the given character code conversion object.
|
|
*
|
|
* This function destroys the code conversion object created by
|
|
* INTL_CreateCharCodeConverter.
|
|
*
|
|
* @param obj Specifies the character code conversion object to free
|
|
* @see INTL_CreateCharCodeConverter
|
|
*/
|
|
PUBLIC void INTL_DestroyCharCodeConverter(
|
|
CCCDataObject obj
|
|
);
|
|
|
|
/**
|
|
* Converts a piece of text from one charset to another.
|
|
*
|
|
* This function does not do charset ID auto-detection. The caller must pass
|
|
* the from/to charset IDs. This function does not keep state. Don't use it to
|
|
* convert a stream of data. Only use this when you want to convert a string,
|
|
* and you have no way to hold on to the converter object.
|
|
*
|
|
* If the string gets converted in place (use the input buffer), then this
|
|
* function returns NULL.
|
|
*
|
|
* @param fromcsid Specifies the charset ID to convert from
|
|
* @param tocsid Specifies the charset ID to convert to
|
|
* @param pSrc Specifies the input text
|
|
* @param block_size Specifies the number of bytes in the input text
|
|
* @return The converted text, null terminated, or NULL if converted in place
|
|
* @see INTL_CallCharCodeConverter
|
|
*/
|
|
PUBLIC unsigned char *INTL_ConvertLineWithoutAutoDetect(
|
|
int16 fromcsid,
|
|
int16 tocsid,
|
|
unsigned char *pSrc,
|
|
uint32 block_size
|
|
);
|
|
|
|
/**
|
|
* Returns the window charset ID corresponding to the given document charset ID.
|
|
*
|
|
* This function searches a built-in table to find the first entry that
|
|
* matches the given document charset ID. If no such entry is found, it
|
|
* returns CS_FE_ASCII.
|
|
*
|
|
* @param csid Specifies the document charset ID
|
|
* @return The corresponding window charset ID
|
|
*/
|
|
PUBLIC int16 INTL_DocToWinCharSetID(
|
|
int16 csid
|
|
);
|
|
|
|
/**
|
|
* Return the charset used in internet message from a specified charset.
|
|
*
|
|
* In the current implementation of Communicator, we assume there is a many to
|
|
* one relationship between a encoding and a encoding used on internet mail
|
|
* message. This routines is used to get the outgoing encoding for a specified
|
|
* encoding. The caller than can convert the text of the specified encoding to
|
|
* the return encoding and before send out the internet message. Usually the
|
|
* relationship is the same as the newsgroup posting and this one. However, for
|
|
* some region/country like Korean, it is not the same. In such region/country,
|
|
* they use different encodings in internet mail message and newsgroup posting.
|
|
* In that case INTL_DefaultNewsCharSetID should be used instead.
|
|
*
|
|
* Issues: The current model assume the text of a particular encoding is always
|
|
* sending out as one encoding. Such assumption break when people want send out
|
|
* message in different Cyrillic, Chinese, or Unicode encoding. Therefore, we
|
|
* may change this architecture in the near future.
|
|
*
|
|
* The mapping are:
|
|
* <UL>
|
|
* <LI>CS_ASCII: CS_ASCII
|
|
* <LI>CS_LATIN1: CS_LATIN1
|
|
* <LI>CS_JIS: CS_JIS
|
|
* <LI>CS_SJIS: CS_JIS
|
|
* <LI>CS_EUCJP: CS_JIS
|
|
* <LI>CS_JIS_AUTO: CS_JIS
|
|
* <LI>CS_SJIS_AUTO: CS_JIS
|
|
* <LI>CS_EUCJP_AUTO: CS_JIS
|
|
* <LI>CS_KSC_8BIT: CS_2022_KR [Note 1]
|
|
* <LI>CS_KSC_8BIT_AUTO: CS_2022_KR [Note 1]
|
|
* <LI>CS_GB_8BIT: CS_GB_8BIT
|
|
* <LI>CS_BIG5: CS_BIG5
|
|
* <LI>CS_CNS_8BIT: CS_BIG5
|
|
* <LI>CS_MAC_ROMAN: CS_LATIN1
|
|
* <LI>CS_LATIN2: CS_LATIN2
|
|
* <LI>CS_MAC_CE,: CS_LATIN2
|
|
* <LI>CS_CP_1250: CS_LATIN2
|
|
* <LI>CS_8859_5: CS_KOI8_R [Note 2]
|
|
* <LI>CS_KOI8_R: CS_KOI8_R [Note 2]
|
|
* <LI>CS_MAC_CYRILLIC: CS_KOI8_R [Note 2]
|
|
* <LI>CS_CP_1251: CS_KOI8_R [Note 2]
|
|
* <LI>CS_8859_7: CS_8859_7
|
|
* <LI>CS_CP_1253: CS_8859_7
|
|
* <LI>CS_MAC_GREEK: CS_8859_7
|
|
* <LI>CS_8859_9: CS_8859_9
|
|
* <LI>CS_MAC_TURKISH: CS_8859_9
|
|
* <LI>CS_UTF8: CS_UTF7
|
|
* <LI>CS_UTF7: CS_UTF7
|
|
* <LI>CS_UCS2: CS_UTF7
|
|
* <LI>CS_UCS2_SWAP: CS_UTF7
|
|
* </UL>
|
|
* Note:
|
|
* <OL>
|
|
* <LI>For INTL_DefaultNewsCharSetID, this value is different
|
|
* <LI>The value is the one specified in preference
|
|
* "intl.mailcharset.cyrillic". The default value is CS_KOI_R. See
|
|
* <A HREF=http://people.netscape.com/ftang/cyrillicmail.html>
|
|
* http://people.netscape.com/ftang/cyrillicmail.html</A> for details.
|
|
* </OL>
|
|
*
|
|
* @param Specifies the encoding
|
|
* @return the encoding should be send out for the internet mail message.
|
|
* @see INTL_DefaultNewsCharSetID
|
|
*/
|
|
PUBLIC int16 INTL_DefaultMailCharSetID(int16 csid);
|
|
|
|
/**
|
|
* Return the charset used in internet message from a specified charset.
|
|
*
|
|
* In the current implementation of Communicator, we assume there is a many to
|
|
* one relationship between a encoding and a encoding used on internet
|
|
* newsgroup posting. This routines is used to get the outgoing encoding for a
|
|
* specified encoding. The caller than can convert the text of the specified
|
|
* encoding to the return encoding and before post the message to the
|
|
* newsgroup. Usually the relationship is the same as the newsgroup posting
|
|
* and this one. However, for some region/country like Korean, it is not the
|
|
* same. In such region/country, they use different encodings in internet mail
|
|
* message and newsgroup posting. In that case INTL_DefaultMailCharSetID should
|
|
* be used instead.
|
|
*
|
|
* Issues: The current model assume the text of a particular encoding is always
|
|
* sending out as one encoding. Such assumption break when people want send out
|
|
* message in different Cyrillic, Chinese, or Unicode encoding. Therefore, we
|
|
* may change this architecture in the near future.
|
|
*
|
|
* The mapping are:
|
|
* <UL>
|
|
* <LI>ASCII: CS_ASCII
|
|
* <LI>LATIN1: CS_LATIN1
|
|
* <LI>JIS: CS_JIS
|
|
* <LI>SJIS: CS_JIS
|
|
* <LI>EUCJP: CS_JIS
|
|
* <LI>JIS_AUTO: CS_JIS
|
|
* <LI>SJIS_AUTO: CS_JIS
|
|
* <LI>EUCJP_AUTO: CS_JIS
|
|
* <LI>KSC_8BIT: CS_KSC_8BIT [Note 1]
|
|
* <LI>KSC_8BIT_AUTO: CS_KSC_8BIT [Note 1]
|
|
* <LI>GB_8BIT: CS_GB_8BIT
|
|
* <LI>BIG5: CS_BIG5
|
|
* <LI>CNS_8BIT: CS_BIG5
|
|
* <LI>MAC_ROMAN: CS_LATIN1
|
|
* <LI>LATIN2: CS_LATIN2
|
|
* <LI>MAC_CE,: CS_LATIN2
|
|
* <LI>CP_1250: CS_LATIN2
|
|
* <LI>8859_5: CS_KOI8_R [Note 2]
|
|
* <LI>KOI8_R: CS_KOI8_R [Note 2]
|
|
* <LI>MAC_CYRILLIC: CS_KOI8_R [Note 2]
|
|
* <LI>CP_1251: CS_KOI8_R [Note 2]
|
|
* <LI>8859_7: CS_8859_7
|
|
* <LI>CP_1253: CS_8859_7
|
|
* <LI>MAC_GREEK: CS_8859_7
|
|
* <LI>8859_9: CS_8859_9
|
|
* <LI>MAC_TURKISH: CS_8859_9
|
|
* <LI>UTF8: CS_UTF7
|
|
* <LI>UTF7: CS_UTF7
|
|
* <LI>UCS2: CS_UTF7
|
|
* <LI>UCS2_SWAP: CS_UTF7
|
|
* </UL>
|
|
* Note:
|
|
* <OL>
|
|
* <LI>For INTL_DefaultMailCharSetID, this value is different
|
|
* <LI>The value is the one specified in preference
|
|
* "intl.mailcharset.cyrillic". The default value is CS_KOI_R. See
|
|
* <A HREF=http://people.netscape.com/ftang/cyrillicmail.html>
|
|
* http://people.netscape.com/ftang/cyrillicmail.html</A> for details.
|
|
* </OL>
|
|
*
|
|
* @param Specifies the encoding
|
|
* @return the encoding should be send out for the internet newsgroup.
|
|
* @see INTL_DefaultMailCharSetID
|
|
*/
|
|
PUBLIC int16 INTL_DefaultNewsCharSetID(int16 csid);
|
|
|
|
/**
|
|
* Tell libi18n which font charset IDs are available in the front end.
|
|
*
|
|
* The front end (FE) calls this function to inform libi18n of the charset IDs
|
|
* of the fonts that are currently available.
|
|
*
|
|
* This function calls INTL_SetUnicodeCSIDList to set up the Unicode
|
|
* machinery.
|
|
*
|
|
* The front end must allocate space for this array using malloc/calloc. If
|
|
* this function is called more than once, the array passed in a previous call
|
|
* is freed by this function. However, the front end is responsible for
|
|
* freeing the array at exit time.
|
|
*
|
|
* @param charsets Specifies a null-terminated array of charset IDs
|
|
*/
|
|
PUBLIC void INTL_ReportFontCharSets(
|
|
int16 *charsets
|
|
);
|
|
|
|
/**
|
|
* Get the "Unconverted Buffer" from the Converter Object.
|
|
*
|
|
* @param obj Specifies the converter object
|
|
* @return the unconverted buffer in the converter object
|
|
*/
|
|
#define INTL_GetCCCUncvtbuf(obj) (obj->funcs_pointer->get_uncvtbuf)(obj)
|
|
|
|
/**
|
|
* Set the "conversion result length" to the converter object.
|
|
*
|
|
* @param obj Specifies the converter object
|
|
* @param len Specifies the length of current conversion result.
|
|
* @see INTLGetCCCLen
|
|
*/
|
|
#define INTL_SetCCCLen(obj,len) ((obj)->funcs_pointer->set_len)((obj), (len))
|
|
|
|
/**
|
|
* Get the "conversion result length" from the converter object.
|
|
*
|
|
* @param obj Specifies the converter object
|
|
* @return the length of conversion result stored in the converter object
|
|
* @see INTL_SetCCCLen
|
|
*/
|
|
#define INTL_GetCCCLen(obj) ((obj)->funcs_pointer->get_len)(obj)
|
|
|
|
/**
|
|
* Set a private flag "Jismode" to the converter object.
|
|
*
|
|
* There are no reason any code outside libi18n should call this.
|
|
* We are considering move this into intlpriv.h.
|
|
* Don't call this macro unless you are changing libi18n.
|
|
*
|
|
* The name "jismode" refers to the ISO 2022 state (JIS mode).
|
|
* This is what the field was first used for.
|
|
* It is now used for other purposes as well, so the name is no longer
|
|
* appropriate.
|
|
*
|
|
* @param obj Specifies the converter object
|
|
* @param jismode Specifies the Jismode
|
|
* @see INTL_GetCCCJismode
|
|
*/
|
|
#define INTL_SetCCCJismode(obj,jismode) \
|
|
((obj)->funcs_pointer->set_jismode)((obj), (jismode))
|
|
/**
|
|
* Get a private flag "Jismode" from the converter object.
|
|
*
|
|
* There are no reason any code outside libi18n should call this.
|
|
* We are considering move this into intlpriv.h.
|
|
* Don't call this macro unless you are changing libi18n
|
|
*
|
|
* The name "jismode" refers to the ISO 2022 state (JIS mode).
|
|
* This is what the field was first used for.
|
|
* It is now used for other purposes as well, so the name is no longer
|
|
* appropriate.
|
|
*
|
|
* @param obj Specifies the converter object
|
|
* @return the Jismode stored in the converter object
|
|
* @see INTL_SetCCCJismode
|
|
*/
|
|
#define INTL_GetCCCJismode(obj) ((obj)->funcs_pointer->get_jismode)(obj)
|
|
|
|
/**
|
|
* Set a private flag "Cvtflag" to the converter object.
|
|
*
|
|
* There are no reason any code outside libi18n should call this.
|
|
* We are considering move this into intlpriv.h.
|
|
* Don't call this macro unless you are changing libi18n
|
|
*
|
|
* @param obj Specifies the converter object
|
|
* @param cvtflag Specifies the Cvtflag
|
|
* @see INTL_GetCCCCvtflag
|
|
*/
|
|
#define INTL_SetCCCCvtflag(obj,cvtflag) \
|
|
((obj)->funcs_pointer->set_cvtflag)((obj), (cvtflag))
|
|
/**
|
|
* Get a private flag "Cvtflag" from the converter object.
|
|
*
|
|
* There are no reason any code outside libi18n should call this.
|
|
* We are considering move this into intlpriv.h.
|
|
* Don't call this macro unless you are changing libi18n
|
|
*
|
|
* @param obj Specifies the converter object
|
|
* @return the Cvtflag stored in the converter object
|
|
* @see INTL_SetCCCCvtflag
|
|
*/
|
|
#define INTL_GetCCCCvtflag(obj) ((obj)->funcs_pointer->get_cvtflag)(obj)
|
|
|
|
/**
|
|
* Set the "Convert To CSID" to the converter object.
|
|
*
|
|
* There are no reason any code outside libi18n should call this.
|
|
* We are considering move this into intlpriv.h.
|
|
* Don't call this macro unless you are changing libi18n
|
|
*
|
|
* @param obj Specifies the converter object
|
|
* @param to_csid Specifies the Convert To CSID
|
|
* @see INTL_SetCCCToCSID
|
|
*/
|
|
#define INTL_SetCCCToCSID(obj,to_csid) \
|
|
(((obj)->funcs_pointer->set_to_csid)((obj),(to_csid)))
|
|
/**
|
|
* Get the "Convert To CSID" from the converter object.
|
|
*
|
|
* @param obj Specifies the converter object
|
|
* @return the "Convert To CSID" stored in the converter object
|
|
* @see INTL_SetCCCToCSID
|
|
*/
|
|
#define INTL_GetCCCToCSID(obj) (((obj)->funcs_pointer->get_to_csid)(obj))
|
|
|
|
/**
|
|
* Set the "Convert From CSID" to the converter object.
|
|
*
|
|
* There are no reason any code outside libi18n should call this.
|
|
* We are considering move this into intlpriv.h.
|
|
* Don't call this macro unless you are changing libi18n
|
|
*
|
|
* @param obj Specifies the converter object
|
|
* @param from_csid Specifies the Convert From CSID
|
|
* @see INTL_SetCCCFromCSID
|
|
*/
|
|
#define INTL_SetCCCFromCSID(obj,from_csid) \
|
|
(((obj)->funcs_pointer->set_from_csid)((obj),(from_csid)))
|
|
/**
|
|
* Get the "Convert From CSID" from the converter object.
|
|
*
|
|
* @param obj Specifies the converter object
|
|
* @return the "Convert From CSID" stored in the converter object
|
|
* @see INTL_SetCCCFromCSID
|
|
*/
|
|
#define INTL_GetCCCFromCSID(obj) (((obj)->funcs_pointer->get_from_csid)(obj))
|
|
|
|
/**
|
|
* Set the "Return Value" to the converter object.
|
|
*
|
|
* There are no reason any code outside libi18n should call this.
|
|
* We are considering move this into intlpriv.h.
|
|
* Don't call this macro unless you are changing libi18n
|
|
*
|
|
* @param obj Specifies the converter object
|
|
* @param retval Specifies the "Return Value"
|
|
* @see INTL_GetCCCRetval
|
|
*/
|
|
#define INTL_SetCCCRetval(obj,retval) \
|
|
(((obj)->funcs_pointer->set_retval)((obj),(retval)))
|
|
/**
|
|
* Get the "Return Value" from the converter object.
|
|
*
|
|
* There are no reason any code outside libi18n should call this.
|
|
* We are considering move this into intlpriv.h.
|
|
* Don't call this macro unless you are changing libi18n
|
|
*
|
|
* @param obj Specifies the converter object
|
|
* @return the "Return Value" stored in the converter object
|
|
* @see INTL_SetCCCRetval
|
|
*/
|
|
#define INTL_GetCCCRetval(obj) (((obj)->funcs_pointer->get_retval)(obj))
|
|
|
|
/**
|
|
* Set the "Conversion Function" to the converter object.
|
|
*
|
|
* There are no reason any code outside libi18n should call this.
|
|
* We are considering move this into intlpriv.h.
|
|
* Don't call this macro unless you are changing libi18n
|
|
*
|
|
* @param obj Specifies the converter object
|
|
* @param func Specifies the "Conversion Function" stored in the converter
|
|
* object
|
|
* @see INTL_GetCCCCvtfunc
|
|
*/
|
|
#define INTL_SetCCCCvtfunc(obj,func) \
|
|
(((obj)->funcs_pointer->set_cvtfunc)((obj),(func)))
|
|
|
|
/**
|
|
* Get the "Conversion Function" from the converter object.
|
|
*
|
|
* @param obj Specifies the converter object
|
|
* @return the "Conversion Function" stored in the converter object
|
|
* @see INTL_SetCCCCvtfunc
|
|
*/
|
|
#define INTL_GetCCCCvtfunc(obj) ((obj)->funcs_pointer->get_cvtfunc)(obj)
|
|
|
|
/**
|
|
* Set the "Report Auto Detect Result Function" to the converter object.
|
|
*
|
|
* @param obj Specifies the converter object
|
|
* @param func Specifies the "Auto Detect Result Reporting Function"
|
|
* @param closure Specifies the closure which will be pass to the "Auto
|
|
* Detect Result Reporting Function"
|
|
* @see INTL_CallCCCReportAutoDetect
|
|
*/
|
|
#define INTL_SetCCCReportAutoDetect(obj,func,closure) \
|
|
(((obj)->funcs_pointer->set_report_autodetect)((obj), (func), (closure)))
|
|
|
|
/**
|
|
* Call the "Report Auto Detect Result Function" associated with the
|
|
* converter object.
|
|
*
|
|
* @param obj Specifies the converter object
|
|
* @param doc_csid Specifies the document csid which be auto detected
|
|
* @see INTL_CallCCCReportAutoDetect
|
|
*/
|
|
#define INTL_CallCCCReportAutoDetect(obj,doc_csid) \
|
|
(((obj)->funcs_pointer->call_report_autodetect)((obj), (doc_csid)))
|
|
|
|
/**
|
|
* Set the "Default Document CSID" to the converter object.
|
|
*
|
|
* There are no reason any code outside libi18n should call this.
|
|
* We are considering move this into intlpriv.h.
|
|
* Don't call this macro unless you are changing libi18n
|
|
*
|
|
* @param obj Specifies the converter object
|
|
* @param default_doc_csid Specifies the Default Document CSID
|
|
* @see INTL_GetCCCDefaultCSID
|
|
*/
|
|
#define INTL_SetCCCDefaultCSID(obj,default_doc_csid) \
|
|
((obj)->funcs_pointer->set_default_doc_csid)((obj), (default_doc_csid))
|
|
|
|
/**
|
|
* Get the "Default Document CSID" from the converter object.
|
|
*
|
|
* There are no reason any code outside libi18n should call this.
|
|
* We are considering move this into intlpriv.h.
|
|
* Don't call this macro unless you are changing libi18n
|
|
*
|
|
* @param obj Specifies the converter object
|
|
* @return the Default Document CSID stored in the converter object
|
|
* @see INTL_GetCCCDefaultCSID
|
|
*/
|
|
#define INTL_GetCCCDefaultCSID(obj) \
|
|
(((obj)->funcs_pointer->get_default_doc_csid)(obj))
|
|
|
|
/*@}*/
|
|
/*=======================================================*/
|
|
/**@name CharSetID and Charset Name Mapping */
|
|
/*@{*/
|
|
/**
|
|
* Returns the preferred MIME charset name corresponding to the given
|
|
* charset ID.
|
|
*
|
|
* Charset names are registered by IANA (Internet Assigned Numbers Authority).
|
|
* The current charset name database can be found at:
|
|
*
|
|
* <A HREF=ftp://ftp.isi.edu/in-notes/iana/assignments/character-sets>
|
|
* ftp://ftp.isi.edu/in-notes/iana/assignments/character-sets</A>.
|
|
*
|
|
* This function returns the charset name for the given Character Set ID
|
|
* which in most cases corresponds to the "(preferred MIME name)" registered
|
|
* with IANA. This function may return private names not found in the
|
|
* registered. Private names start with "x-". See INTL_CharSetNameToID for
|
|
* information about charset IDs.
|
|
*
|
|
* @param charSetID Specifies the charset ID
|
|
* @param charset_return Returns the corresponding charset name, max 128 bytes
|
|
* @see INTL_CharSetNameToID
|
|
*/
|
|
PUBLIC void INTL_CharSetIDToName(
|
|
int16 charSetID,
|
|
char *charset_return
|
|
);
|
|
|
|
/**
|
|
* Returns the charset ID corresponding to the given charset name.
|
|
*
|
|
* The charset ID is a private 16-bit integer, described in
|
|
* ns/include/csid.h. If the given charset is unknown, CS_UNKNOWN is returned.
|
|
* If the given charset is NULL, CS_DEFAULT is returned. Charset names are not
|
|
* case-sensitive. See INTL_CharSetIDToName for a description of charset names.
|
|
*
|
|
* @param charset Specifies the charset name
|
|
* @return the corresponding charset ID
|
|
* @see INTL_CharSetIDToName
|
|
*/
|
|
PUBLIC int16 INTL_CharSetNameToID(
|
|
char *charset
|
|
);
|
|
|
|
/**
|
|
* Returns a pointer to the preferred MIME charset name corresponding
|
|
* to the given charset ID.
|
|
*
|
|
* This function is similar to INTL_CharSetIDToName. It returns a pointer to
|
|
* the charset name. See INTL_CharSetIDToName for other details.
|
|
*
|
|
* @param charSetID Specifies the charset ID
|
|
* @return The corresponding charset name
|
|
* @see INTL_CharSetIDToName
|
|
*/
|
|
PUBLIC unsigned char *INTL_CsidToCharsetNamePt(
|
|
int16 charSetID
|
|
);
|
|
|
|
/**
|
|
* Returns the Java charset name corresponding to the given charset ID.
|
|
*
|
|
* The Java charset name is one that JDK 1.1 and up will understand.
|
|
* The Java name is defined in
|
|
* <A HREF=
|
|
* http://java.sun.com/products/jdk/1.1/docs/guide/intl/intl.doc.html#25303>
|
|
* http://java.sun.com/products/jdk/1.1/docs/guide/intl/intl.doc.html#25303</A>
|
|
*
|
|
* @param charSetID Specifies the charset ID
|
|
* @param charset_return Returns the corresponding Java charset name,
|
|
* max 128 bytes
|
|
* @see INTL_CharSetIDToJavaCharSetName
|
|
*/
|
|
PUBLIC void INTL_CharSetIDToJavaName(
|
|
int16 charSetID,
|
|
char *charset_return
|
|
);
|
|
|
|
/**
|
|
* Returns the Java charset name corresponding to the given charset ID.
|
|
*
|
|
* The Java charset name is a name used in JDK 1.1 and up.
|
|
* The Java name is defined in
|
|
* <A HREF=
|
|
* http://java.sun.com/products/jdk/1.1/docs/guide/intl/intl.doc.html#25303>
|
|
* http://java.sun.com/products/jdk/1.1/docs/guide/intl/intl.doc.html#25303</A>
|
|
*
|
|
* @param charSetID Specifies the charset ID
|
|
* @return the corresponding Java charset name
|
|
* @see INTL_CharSetIDToJavaName
|
|
*/
|
|
PUBLIC const char * PR_CALLBACK INTL_CharSetIDToJavaCharSetName(
|
|
int16 charSetID
|
|
);
|
|
|
|
/**
|
|
* Returns a pointer to the Java charset name corresponding to
|
|
* the given charset ID.
|
|
*
|
|
* This function is similar to INTL_CharSetIDToJavaCharSetName. See
|
|
* INTL_CharSetIDToJavaCharSetName for further details.
|
|
*
|
|
* @param charSetID Specifies the charset ID
|
|
* @return The corresponding Java charset name
|
|
* @see INTL_CharSetIDToJavaCharSetName
|
|
*/
|
|
PUBLIC unsigned char *INTL_CsidToJavaCharsetNamePt(
|
|
int16 charSetID
|
|
);
|
|
|
|
/*@}*/
|
|
/*=======================================================*/
|
|
/**@name Character Set Properties */
|
|
/*@{*/
|
|
|
|
/**
|
|
* Returns whether or not auto-detection is available for the given charset ID.
|
|
*
|
|
* For example, this routine will return TRUE for any of the Japanese charset
|
|
* IDs, since a Japanese auto-detection routine is available.
|
|
*
|
|
* @param csid Specifies the charset ID
|
|
* @return Whether or not auto-detection is available for the charset ID
|
|
* @see INTL_GetCharCodeConverter
|
|
*/
|
|
PUBLIC XP_Bool INTL_CanAutoSelect(
|
|
int16 csid
|
|
);
|
|
|
|
/**
|
|
* Returns the charset type.
|
|
*
|
|
* Returns the type of the given charset ID. The charset types are defined in
|
|
* csid.h.
|
|
*
|
|
* <UL>
|
|
* <LI>SINGLEBYTE: single-byte charset (e.g. ISO-8859-1, MacRoman)
|
|
* <LI>MULTIBYTE: multi-byte charset (e.g. Shift-JIS, Big5)
|
|
* <LI>STATEFUL: stateful charset (e.g. ISO-2022-JP, UTF-7)
|
|
* <LI>WIDECHAR: wide character charset (e.g. UCS-2, UCS-4)
|
|
* </UL>
|
|
*
|
|
* @param charsetid Specifies the charset ID.
|
|
* @return The charset type.
|
|
*/
|
|
#define INTL_CharSetType(charsetid) (charsetid & 0x700)
|
|
|
|
/*@}*/
|
|
/*=======================================================*/
|
|
/**@name Finding Character Boundaries */
|
|
/*@{*/
|
|
|
|
/**
|
|
* Returns the number of bytes in the given character.
|
|
*
|
|
* This function checks for zero bytes within the text, returning the actual
|
|
* length even if the preceding byte(s) would normally indicate a longer
|
|
* multibyte character.
|
|
*
|
|
* @param charSetID Specifies the charset ID of the text
|
|
* @param pstr Specifies the 1st byte of the character
|
|
* @return The number of bytes in the given character
|
|
* @see INTL_IsLeadByte
|
|
*/
|
|
PUBLIC int INTL_CharLen(
|
|
int charSetID,
|
|
unsigned char *pstr
|
|
);
|
|
|
|
/**
|
|
* Returns number of bytes in given character, minus 1.
|
|
*
|
|
* This function returns the number of bytes in a character that starts with
|
|
* the given byte, minus 1. I.e. for a single-byte character, it returns zero.
|
|
* For a double-byte character, it returns 1. And so on. Hence, this function
|
|
* returns a non-zero value if the given byte is the "lead byte" of a multibyte
|
|
* character.
|
|
* This function should not be confused with Windows API isleadbyte().
|
|
*
|
|
* @param charSetID Specifies the charset ID of the text
|
|
* @param ch Specifies the first byte of a character in the text
|
|
* @return The number of bytes in the given character, minus 1
|
|
* @see INTL_CharLen
|
|
*/
|
|
PUBLIC int
|
|
#ifdef NSPR20
|
|
PR_CALLBACK
|
|
#endif
|
|
INTL_IsLeadByte(
|
|
int charSetID,
|
|
unsigned char ch
|
|
);
|
|
|
|
/**
|
|
* Returns a pointer to the 1st byte of the next character.
|
|
*
|
|
* This function checks for zero bytes and returns pstr+1 if any are found,
|
|
* even if the preceding byte(s) would normally indicate a longer character.
|
|
*
|
|
* @param charSetID Specifies the charset ID of the text
|
|
* @param pstr Specifies the 1st byte of any previous character
|
|
* @return The 1st byte of the next character
|
|
* @see INTL_CharLen
|
|
*/
|
|
PUBLIC char *INTL_NextChar(
|
|
int charSetID,
|
|
char *pstr
|
|
);
|
|
|
|
/**
|
|
* Returns the number of the byte pointed to by the given position.
|
|
*
|
|
* Determines whether the byte at the given position is the 1st, 2nd, 3rd
|
|
* or 4th byte of the character at that position. The pstr pointer must point
|
|
* to the first byte of any preceding character in the string. The pos
|
|
* position must be greater than zero, and is the index into pstr plus one.
|
|
* I.e. the byte at pstr[0] has pos 1.
|
|
*
|
|
* If pos points to the only byte in a single-byte character, this function
|
|
* returns zero. Otherwise, if pos points to the 1st byte, it returns 1. If
|
|
* pos points to the 2nd byte, it returns 2. And so on.
|
|
*
|
|
* @param charSetID Specifies the charset ID of the given text
|
|
* @param pstr Specifies the beginning of a character in the string
|
|
* @param pos Specifies the byte position within the string
|
|
* @return The number of the byte at the given position
|
|
* @see INTL_CharLen
|
|
*/
|
|
PUBLIC int INTL_NthByteOfChar(
|
|
int charSetID,
|
|
char *pstr,
|
|
int pos
|
|
);
|
|
|
|
/**
|
|
* Returns the byte index of the next character.
|
|
*
|
|
* Given the position of a character in some text, this function returns the
|
|
* position of the next character.
|
|
*
|
|
* @param charSetID Specifies the charset ID of the text
|
|
* @param text Specifies the beginning of the text
|
|
* @param pos Specifies the current position within the text
|
|
* @return The position of the next character
|
|
* @see INTL_PrevCharIdxInText
|
|
*/
|
|
PUBLIC int INTL_NextCharIdxInText(
|
|
int16 charSetID,
|
|
unsigned char *text,
|
|
int pos
|
|
);
|
|
|
|
/**
|
|
* Returns the byte index of the previous character.
|
|
*
|
|
* Given the position of a character in some text, this function returns the
|
|
* position of the previous character.
|
|
*
|
|
* @param charSetID Specifies the charset ID of the text
|
|
* @param text Specifies the beginning of the text
|
|
* @param pos Specifies the current position within the text
|
|
* @return The position of the previous character
|
|
* @see INTL_NextCharIdxInText
|
|
*/
|
|
PUBLIC int INTL_PrevCharIdxInText(
|
|
int16 charSetID,
|
|
unsigned char *text,
|
|
int pos
|
|
);
|
|
|
|
|
|
/**
|
|
* Convert number of bytes to number of characters.
|
|
*
|
|
* Given a number of bytes in a given string, this function determines the
|
|
* number of characters.
|
|
*
|
|
* @param charSetID Specifies the charset ID of the text
|
|
* @param text Specifies the text
|
|
* @param byteCount Specifies the number of bytes
|
|
* @return The number of characters
|
|
* @see INTL_TextCharLenToByteCount
|
|
*/
|
|
PUBLIC int32 INTL_TextByteCountToCharLen(
|
|
int16 charSetID,
|
|
unsigned char *text,
|
|
uint32 byteCount
|
|
);
|
|
|
|
/**
|
|
* Convert number of characters to number of bytes.
|
|
*
|
|
* Given a number of characters in a given string, this function determines the
|
|
* number of bytes.
|
|
*
|
|
* @param charSetID Specifies the charset ID of the text
|
|
* @param text Specifies the text
|
|
* @param charLen Specifies the number of characters
|
|
* @return The number of bytes
|
|
* @see INTL_TextByteCountToCharLen
|
|
*/
|
|
PUBLIC int32 INTL_TextCharLenToByteCount(
|
|
int16 charSetID,
|
|
unsigned char *text,
|
|
uint32 charLen
|
|
);
|
|
|
|
|
|
/**
|
|
* Returns the byte index of the next character.
|
|
*
|
|
* Given the position of any byte of any character in some text, this function
|
|
* returns the position of the 1st byte of the next character. The
|
|
* difference between this function and INTL_NextCharIdxInText is that this
|
|
* function will accept the position of any byte of a character rather than
|
|
* just the 1st byte of a character.
|
|
*
|
|
* @param charSetID Specifies the charset ID of the text
|
|
* @param str Specifies the beginning of the text
|
|
* @param pos Specifies any byte of any character
|
|
* @return The index of the next character
|
|
* @see INTL_NextCharIdxInText, INTL_PrevCharIdx
|
|
*/
|
|
PUBLIC int INTL_NextCharIdx(
|
|
int16 charSetID,
|
|
unsigned char *str,
|
|
int pos
|
|
);
|
|
|
|
/**
|
|
* Returns the byte index of the previous character.
|
|
*
|
|
* Given the position of any byte of any character in some text, this function
|
|
* returns the position of the 1st byte of the previous character. The
|
|
* difference between this function and INTL_PrevCharIdxInText is that this
|
|
* function will accept the position of any byte of a character rather than
|
|
* just the 1st byte of a character.
|
|
*
|
|
* @param charSetID Specifies the charset ID of the text
|
|
* @param str Specifies the beginning of the text
|
|
* @param pos Specifies any byte of any character
|
|
* @return The index of the previous character
|
|
* @see INTL_PrevCharIdxInText, INTL_NextCharIdx
|
|
*/
|
|
PUBLIC int INTL_PrevCharIdx(
|
|
int16 charSetID,
|
|
unsigned char *str,
|
|
int pos
|
|
);
|
|
|
|
/*@}*/
|
|
/*=======================================================*/
|
|
/**@name Single-Byte Charset Conversion Tables (Obsolescent) */
|
|
/*@{*/
|
|
|
|
/**
|
|
* Free a single-byte charset conversion table.
|
|
*
|
|
* This is not really a public function. However, ns/sun-java/awt/macos needs
|
|
* it, so we have to put it here.
|
|
*
|
|
* @see INTL_GetSingleByteTable
|
|
* @version DEPRECATED. Obsolescent. Use INTL_DestroyCharCodeConverter instead.
|
|
*/
|
|
MODULE_PRIVATE void INTL_FreeSingleByteTable(char **cvthdl);
|
|
|
|
/**
|
|
* Get a single-byte charset conversion table.
|
|
*
|
|
* This is not really a public function. However, ns/sun-java/awt/macos needs
|
|
* it, so we have to put it here.
|
|
*
|
|
* @see INTL_FreeSingleByteTable
|
|
* @see INTL_LockTable
|
|
* @version DEPRECATED. Obsolescent. Use INTL_GetCharCodeConverter instead.
|
|
*/
|
|
MODULE_PRIVATE char **INTL_GetSingleByteTable(
|
|
int16 fromcsid,
|
|
int16 tocsid,
|
|
int32 func_ctx
|
|
);
|
|
|
|
/**
|
|
* Lock the given single-byte charset conversion table in memory.
|
|
*
|
|
* This is not really a public function. However, ns/sun-java/awt/macos needs
|
|
* it, so we have to put it here.
|
|
*
|
|
* @see INTL_GetSingleByteTable
|
|
* @version DEPRECATED. Obsolescent. See INTL_GetSingleByteTable.
|
|
*/
|
|
MODULE_PRIVATE char *INTL_LockTable(char **cvthdl);
|
|
|
|
/*@}*/
|
|
/*=======================================================*/
|
|
/**@name HTTP Headers */
|
|
/*@{*/
|
|
|
|
/**
|
|
* Return the AcceptLanguage preference.
|
|
*
|
|
* Get the HTTP Accept-Language header from preference settings.
|
|
*
|
|
* @return Accept-Language header (null-terminated string).
|
|
* @see INTL_GetAcceptCharset
|
|
*/
|
|
PUBLIC char *INTL_GetAcceptLanguage(void);
|
|
|
|
/**
|
|
* Return the AcceptCharset preference.
|
|
*
|
|
* Get the HTTP Accept-Charset header from preference settings.
|
|
*
|
|
* @return Accept-Charset header (null-terminated string).
|
|
* @see INTL_GetAcceptLanguage
|
|
*/
|
|
PUBLIC char *INTL_GetAcceptCharset(void);
|
|
|
|
/*@}*/
|
|
/*=======================================================*/
|
|
/**@name Message Header Processing */
|
|
/*@{*/
|
|
|
|
/**
|
|
* Decode and convert message header.
|
|
*
|
|
* This is a convenience macro that calls INTL_DecodeMimePartIIStr. It is
|
|
* similar to INTL_DecodeMimePartIIStr, with the exception that it always
|
|
* attempts to allocate a new buffer instead of returning the original input
|
|
* buffer where the decoding/conversion may have been performed in place.
|
|
*
|
|
* @param r Returns the decoded/converted message header
|
|
* @param b Specifies the message header
|
|
* @param c Specifies the target window charset ID
|
|
* @param f Specifies whether to convert the string into the wincsid or not
|
|
* @return the decoded/converted message header (r)
|
|
* @see INTL_DecodeMimePartIIStr
|
|
*/
|
|
#define INTL_DECODE_MIME_PART_II(r,b,c,f) \
|
|
(r = INTL_DecodeMimePartIIStr((b),(c),(f))), \
|
|
((NULL!=r) && ((r)!=(b))) ? r : (r = XP_STRDUP(b))
|
|
|
|
/**
|
|
* Decode and convert message header.
|
|
*
|
|
* If the message header contains an RFC 2047 encoded-word, that word is
|
|
* decoded. Then it performs charset conversion if the dontConvert parameter is
|
|
* false. Otherwise, it will only decode the string and return. The conversion
|
|
* may happen later in the process. The flag is needed to work around a double
|
|
* conversion problem.
|
|
*
|
|
* @param header Specifies the message string to be decoded/converted.
|
|
* @param wincsid Specifies the target window charset ID.
|
|
* @param dontConvert Specifies whether to convert the string into the wincsid
|
|
* or not. If the value is true, then it will only decode
|
|
* any RFC 2047 encoded-words, without converting their
|
|
* charsets. If the value is false, then it will decode RFC
|
|
* 2047 encoded-words AND convert them into the specified
|
|
* wincsid.
|
|
* @return Decoded and/or converted message header. If the return value is
|
|
* different from the input buffer, the caller must free the output
|
|
* buffer by calling XP_FREE when it is no longer needed.
|
|
* @see INTL_DECODE_MIME_PART_II
|
|
* @see INTL_EncodeMimePartIIStr
|
|
* @see INTL_EncodeMimePartIIStr_VarLen
|
|
*/
|
|
PUBLIC char *INTL_DecodeMimePartIIStr(
|
|
const char *header,
|
|
int16 wincsid,
|
|
XP_Bool dontConvert
|
|
);
|
|
|
|
/**
|
|
* Convert and encode message header.
|
|
*
|
|
* Convert the string into an encoding used in Internet messages and encode
|
|
* them as per RFC 2047. It will (1) perform the codeset conversion and
|
|
* (2) RFC 1522 encoding algorithm (if bUseMime is true or the internet message
|
|
* encoding is ISO-2022-KR or ISO-2022-JP). This is a restrict version of
|
|
* INTL_EncodeMimePartIIStr_VarLen which always use 72 for encodedWordSize
|
|
*
|
|
* @param header Specifies the RFC 1522 string to be encoded.
|
|
* @param wincsid Specifies the source encoding
|
|
* @param bUseMime Specifies apply RFC 1522 rule or not. If the value is
|
|
* true or the internet message encoding is ISO-2022-JP
|
|
* or ISO-2022-KR, then it perform RFC1522 encoding after
|
|
* convert the text into the internet message encoding,
|
|
* Otherwise, it only convert the text into internet
|
|
* message encoding.
|
|
* @return the encoded/converted header. The caller need to free this by
|
|
* calling XP_FREE when the result is no longer needed.
|
|
* @see INTL_DecodeMimePartIIStr
|
|
* @see INTL_EncodeMimePartIIStr_VarLen
|
|
*/
|
|
PUBLIC char *INTL_EncodeMimePartIIStr(
|
|
char *header,
|
|
int16 wincsid,
|
|
XP_Bool bUseMime
|
|
);
|
|
|
|
/**
|
|
* Convert and encode text into RFC 1522 header.
|
|
*
|
|
* Convert the string into the encoding used in internet message and encode
|
|
* them into RFC 1522 form. It will (1) perform the codeset conversion and
|
|
* (2) RFC 1522 encoding algorithm (if bUseMime is true or the internet message
|
|
* encoding is ISO-2022-KR or ISO-2022-JP). It is same as
|
|
* INTL_EncodeMimePartIIStr except it allow encodedWordSize value other than 72.
|
|
*
|
|
* @param header Specifies the RFC 1522 string to be encoded.
|
|
* @param wincsid Specifies the source encoding
|
|
* @param bUseMime Specifies apply RFC 1522 rule or not. If the value
|
|
* is true or the internet message encoding is
|
|
* ISO-2022-JP or ISO-2022-KR, then it perform RFC1522
|
|
* encoding after convert the text into the internet
|
|
* message encoding. Otherwise, it only convert the
|
|
* text into internet message encoding.
|
|
* @param encodedWordSize Specifies the maximum length of encoded word.
|
|
* @return the encoded/converted header. The caller need to free this by
|
|
* calling XP_FREE when the result is no longer needed.
|
|
* @see INTL_DecodeMimePartIIStr
|
|
* @see INTL_EncodeMimePartIIStr
|
|
*/
|
|
PUBLIC char *INTL_EncodeMimePartIIStr_VarLen(
|
|
char * header,
|
|
int16 wincsid,
|
|
XP_Bool bUseMime,
|
|
int encodedWordSize
|
|
);
|
|
|
|
/**
|
|
* [OBSOLETE!!!] We should use the INTL_DecodeMimePartIIStr instead of this.
|
|
* We keep this Macro until we change all the callers.
|
|
* Please do not use this in the future.
|
|
*/
|
|
#define IntlDecodeMimePartIIStr INTL_DecodeMimePartIIStr
|
|
|
|
/**
|
|
* [OBSOLETE!!!] We should use the INTL_EncodeMimePartIIStr instead of this.
|
|
* We keep this Macro until we change all the callers.
|
|
* Please do not use this in the future.
|
|
*/
|
|
#define IntlEncodeMimePartIIStr INTL_EncodeMimePartIIStr
|
|
|
|
|
|
|
|
/**
|
|
* Set a private flag to remember a state mail/news.
|
|
*
|
|
* A flag is used inside libi18n to remember whether we are sending mail or
|
|
* news. This is because mail encoding and news encoding is different
|
|
* for Korean.
|
|
* Note that this should be used carefully since it depends on
|
|
* the current mail/news implementation.
|
|
* This is really a hack. It will be removed in the future.
|
|
*
|
|
* @param toNews Boolean value to be set to the private flag.
|
|
*/
|
|
PUBLIC void
|
|
INTL_MessageSendToNews(XP_Bool toNews);
|
|
|
|
|
|
/**
|
|
* Convert a string from RFC1522 encoded header and normalize it, by dropping
|
|
* the case of the character.
|
|
*
|
|
* The return value could be used with INTL_StrContains, INTL_StrIs,
|
|
* INTL_StrBeginWith or INTL_StrEndWith to perform string matching. This
|
|
* function will normalize a string by dropping the case of character according
|
|
* to the csid the caller passed in. It will also ignore CR and LF characters.
|
|
*
|
|
* @param csid Specifies the encoding of str
|
|
* @param str Specifies the to-be-normalized string.
|
|
* @return a normalized string which could be used in INTL_StrContains,
|
|
* INTL_StrIs , INTL_StrBeginWith and INTL_StrEndWith The caller
|
|
* should free it by calling XP_FREE when it is not needed.
|
|
* @see INTL_GetNormalizeStr
|
|
* @see INTL_StrContains
|
|
* @see INTL_StrIs
|
|
* @see INTL_StrBeginWith
|
|
* @see INTL_StrEndWith
|
|
*/
|
|
PUBLIC unsigned char* INTL_GetNormalizeStrFromRFC1522(
|
|
int16 csid,
|
|
unsigned char* rfc1522header
|
|
);
|
|
|
|
|
|
/*@}*/
|
|
/*=======================================================*/
|
|
/**@name Unicode (UCS-2) Strings */
|
|
/*@{*/
|
|
|
|
/**
|
|
* Unicode character typedef.
|
|
*
|
|
* This is used to represent a 16-bit Unicode (UCS-2) character.
|
|
*/
|
|
typedef uint16 INTL_Unicode;
|
|
|
|
/**
|
|
* Return the length of a Unicode string.
|
|
*
|
|
* The given Unicode string must be terminated by U+0000.
|
|
*
|
|
* @param ustr Specifies the Unicode string
|
|
* @return The length of ustr in UCS-2 units, not bytes
|
|
*/
|
|
PUBLIC uint32 INTL_UnicodeLen(INTL_Unicode *ustr);
|
|
|
|
/*@}*/
|
|
/*=======================================================*/
|
|
/**@name Compound Strings */
|
|
/*@{*/
|
|
|
|
/**
|
|
* A typedef for encoding IDs (charset IDs).
|
|
*
|
|
* These are equivalent to charset IDs in the current code base.
|
|
*/
|
|
typedef uint16 INTL_Encoding_ID;
|
|
|
|
/*
|
|
* See comment below.
|
|
*/
|
|
typedef struct INTL_CompoundStr INTL_CompoundStr;
|
|
|
|
/**
|
|
* Compound String.
|
|
*
|
|
* A Compound String is constructed as a linked list. Each node has two fields
|
|
* and a pointer to the next node. The two fields store a pointer to a
|
|
* uniformly encoded piece of text and the encoding of that text.
|
|
*/
|
|
struct INTL_CompoundStr {
|
|
/** The encoding of the text in this node. */
|
|
INTL_Encoding_ID encoding;
|
|
/** The uniformly encoded text. */
|
|
unsigned char *text;
|
|
/** A pointer to the next node. NULL if there are no more nodes. */
|
|
INTL_CompoundStr *next;
|
|
};
|
|
|
|
/**
|
|
* INTL_CompoundStrIterator should really be opaque, but we need to change the
|
|
* callers first.
|
|
*/
|
|
typedef INTL_CompoundStr *INTL_CompoundStrIterator;
|
|
|
|
/**
|
|
* Construct an INTL_CompoundStr, given some text and its encoding.
|
|
*
|
|
* Use this with INTL_CompoundStrCat to create multi-encoding
|
|
* INTL_CompoundStrs.
|
|
*
|
|
* @param inencoding Specifies the encoding of intext.
|
|
* @param intext Specifies the text to be stored. Null-terminated string.
|
|
* @return INTL_CompoundStr. The caller should use INTL_CompoundStrDestroy to
|
|
* destroy it when it is no longer needed.
|
|
* @see INTL_CompoundStrDestroy
|
|
*/
|
|
PUBLIC INTL_CompoundStr* INTL_CompoundStrFromStr(
|
|
INTL_Encoding_ID inencoding,
|
|
unsigned char* intext
|
|
);
|
|
|
|
/**
|
|
* Convert the given Unicode string to an INTL_CompoundStr.
|
|
*
|
|
* This routine uses information provided by the front end through
|
|
* INTL_SetUnicodeCSIDList. It converts from Unicode to substrings in the
|
|
* encodings that the front end said were available (in the font system).
|
|
*
|
|
* @param inunicode Specifies the Unicode text to be converted.
|
|
* @param inlen Specifies the length of inunicode in UCS-2 units,
|
|
* not bytes.
|
|
* @return INTL_CompoundStr. The caller should use INTL_CompoundStrDestroy to
|
|
* destroy it when it is no longer needed.
|
|
* @see INTL_CompoundStrDestroy
|
|
*/
|
|
PUBLIC INTL_CompoundStr* INTL_CompoundStrFromUnicode(
|
|
INTL_Unicode* inunicode,
|
|
uint32 inlen
|
|
);
|
|
|
|
/**
|
|
* Destroy an INTL_CompoundStr.
|
|
*
|
|
* This function destroys the INTL_CompoundStr created by
|
|
* INTL_CompoundStrFromStr or INTL_CompoundStrFromUnicode.
|
|
*
|
|
* @param Specifies the INTL_CompoundStr to be destroyed.
|
|
* @see INTL_CompoundStrFromStr
|
|
* @see INTL_CompoundStrFromUnicode
|
|
*/
|
|
PUBLIC void INTL_CompoundStrDestroy(INTL_CompoundStr* This);
|
|
|
|
/**
|
|
* Concatenate two INTL_CompoundStrs.
|
|
*
|
|
* @param s1 Specifies the first INTL_CompoundStr and returns the
|
|
* concatenated INTL_CompoundStr
|
|
* @param s2 Specifies the second INTL_CompoundStr
|
|
* @see INTL_CompoundStrDestroy
|
|
*/
|
|
PUBLIC void INTL_CompoundStrCat(
|
|
INTL_CompoundStr* s1,
|
|
INTL_CompoundStr* s2
|
|
);
|
|
|
|
/**
|
|
* Clone an INTL_CompoundStr.
|
|
*
|
|
* This function clones an INTL_CompoundStr.
|
|
*
|
|
* @param s Specifies the INTL_CompoundStr to be cloned
|
|
* @return a cloned INTL_CompoundStr. The caller should use
|
|
* INTL_CompoundStrDestroy to destroy it when it is no longer needed.
|
|
* @see INTL_CompoundStrDestroy
|
|
*/
|
|
PUBLIC INTL_CompoundStr* INTL_CompoundStrClone(INTL_CompoundStr* s1);
|
|
|
|
/**
|
|
* Start iterating an INTL_CompoundStr.
|
|
*
|
|
* Initialize the iterating state and perform the first iteration of an
|
|
* INTL_CompoundStr.
|
|
*
|
|
* @param This Specifies the INTL_CompoundStr to be iterated
|
|
* @param outencoding Returns the encoding of the first node
|
|
* @param outtext Returns the text of the first node. The caller should
|
|
* not free it.
|
|
* @return INTL_CompoundStrIterator. The state of the iteration. Should be
|
|
* passed to INTL_CompoundStrNextStr. NULL if the iteration is
|
|
* finished.
|
|
* @see INTL_CompoundStrNextStr
|
|
*/
|
|
PUBLIC INTL_CompoundStrIterator INTL_CompoundStrFirstStr(
|
|
INTL_CompoundStr* This,
|
|
INTL_Encoding_ID *outencoding,
|
|
unsigned char** outtext
|
|
);
|
|
|
|
/**
|
|
* Iterating INTL_CompoundStr.
|
|
*
|
|
* This function iterates through the INTL_CompoundStr for the given
|
|
* INTL_CompoundStrIterator.
|
|
*
|
|
* @param iterator Specifies the INTL_CompoundStrIterator
|
|
* @param outencoding Returns the encoding of the current node
|
|
* @param outtext Returns the text of the current node. The caller should
|
|
* not free it.
|
|
* @return INTL_CompoundStrIterator. The state of the iteration. Should be
|
|
* passed to INTL_CompoundStrNextStr. NULL if the iteration is
|
|
* finished.
|
|
* @see INTL_CompoundStrFirstStr
|
|
*/
|
|
PUBLIC INTL_CompoundStrIterator INTL_CompoundStrNextStr(
|
|
INTL_CompoundStrIterator iterator,
|
|
INTL_Encoding_ID *outencoding,
|
|
unsigned char** outtext
|
|
);
|
|
|
|
/*@}*/
|
|
/*=======================================================*/
|
|
/**@name Unicode Conversion */
|
|
/*@{*/
|
|
/**
|
|
* An opaque data object used to iterate through Unicode text for
|
|
* conversion to font encodings.
|
|
*
|
|
* See also the functions that use this object.
|
|
*
|
|
* @see INTL_UnicodeToStrIteratorCreate
|
|
* @see INTL_UnicodeToStrIterate
|
|
* @see INTL_UnicodeToStrIteratorDestroy
|
|
*
|
|
*/
|
|
typedef void* INTL_UnicodeToStrIterator ;
|
|
|
|
/**
|
|
* Create an INTL_UnicodeToStrIterator and iterate through it once.
|
|
*
|
|
* This function creates an INTL_UnicodeToStrIterator and iterates through it
|
|
* once to get the first element of Unicode text for font encoding conversion.
|
|
* The function uses the prioritized Character Set ID list (CSIDList) to
|
|
* decide which font encoding it will convert to. The iteration stops if the
|
|
* whole Unicode string is converted. Otherwise, it continues iterating and
|
|
* uses the next charset in the CSIDlist to convert the Unicode text.
|
|
*
|
|
* @param ustr Specifies Unicode string to be converted
|
|
* @param ustrlen Specifies length of ustr in UCS-2 units not bytes
|
|
* @param encoding Returns the encoding of the first element.
|
|
* Returns 0 if there are no more to iterate.
|
|
* @param dest Specifies the buffer for output and returns the
|
|
* converted string for the first iteration
|
|
* @param destbuflen Specifies the length of dest in bytes
|
|
* @return Iterator which keeps the iteration state
|
|
* @see INTL_GetUnicodeCSIDList
|
|
* @see INTL_SetUnicodeCSIDList
|
|
* @see INTL_UnicodeToStrIterate
|
|
* @see INTL_UnicodeToStrIteratorDestroy
|
|
* @see INTL_GetUnicodeCharsetList
|
|
*/
|
|
PUBLIC INTL_UnicodeToStrIterator INTL_UnicodeToStrIteratorCreate(
|
|
INTL_Unicode* ustr,
|
|
uint32 ustrlen,
|
|
INTL_Encoding_ID *encoding,
|
|
unsigned char* dest,
|
|
uint32 destbuflen
|
|
);
|
|
|
|
/**
|
|
* Iterate through a Unicode object and convert to font encoding.
|
|
*
|
|
* Iterate the INTL_UnicodeToStrIterator to get Unicode to font encoding
|
|
* conversion.
|
|
*
|
|
* @param iterator Specifies iterator that keeps the last iteration state
|
|
* @param encoding Returns the encoding of the first element. Returns 0
|
|
* if there are no more to iterate.
|
|
* @param dest Specifies the buffer for output and returns the
|
|
* converted string for the current iteration
|
|
* @param destbuflen Specifies the length of dest in bytes
|
|
* @return 0 if there are no more elements to iterate.
|
|
* @see INTL_GetUnicodeCSIDList
|
|
* @see INTL_SetUnicodeCSIDList
|
|
* @see INTL_UnicodeToStrIteratorCreate
|
|
* @see INTL_UnicodeToStrIteratorDestroy
|
|
* @see INTL_GetUnicodeCharsetList
|
|
*/
|
|
PUBLIC int INTL_UnicodeToStrIterate(
|
|
INTL_UnicodeToStrIterator iterator,
|
|
INTL_Encoding_ID *encoding,
|
|
unsigned char* dest,
|
|
uint32 destbuflen
|
|
);
|
|
|
|
/**
|
|
* Destroy an INTL_UnicodeToStrIterator.
|
|
*
|
|
* This function destroys the INTL_UnicodeToStrIterator created by
|
|
* INTL_UnicodeToStrIterateCreate.
|
|
*
|
|
* @param iterator Specifies the iterator to be destroyed
|
|
* @see INTL_GetUnicodeCSIDList
|
|
* @see INTL_SetUnicodeCSIDList
|
|
* @see INTL_UnicodeToStrIteratorCreate
|
|
* @see INTL_UnicodeToStrIterate
|
|
* @see INTL_GetUnicodeCharsetList
|
|
*/
|
|
PUBLIC void INTL_UnicodeToStrIteratorDestroy(
|
|
INTL_UnicodeToStrIterator iterator
|
|
);
|
|
|
|
/**
|
|
* Return memory requirement for INTL_UnicodeToStr.
|
|
*
|
|
* Returns the maximum memory required for text converted from a Unicode
|
|
* string to a specified encoding. Call this to prepare memory for
|
|
* INTL_UnicodeToStr.
|
|
*
|
|
* @param encoding Specifies the target encoding
|
|
* @param ustr Specifies the buffer containing UCS-2 data
|
|
* @param ustrlen Specifies the valid length of ustr in UCS-2 units
|
|
* not bytes
|
|
* @return Number of bytes needed to store the converted result
|
|
* @see INTL_UnicodeToStr
|
|
*/
|
|
PUBLIC uint32 INTL_UnicodeToStrLen(
|
|
INTL_Encoding_ID encoding,
|
|
INTL_Unicode* ustr,
|
|
uint32 ustrlen
|
|
);
|
|
|
|
/**
|
|
* Convert Unicode string to a specified encoding.
|
|
*
|
|
* The caller needs to call INTL_UnicodeToStrLen first to prepare memory and
|
|
* pass into dest.
|
|
*
|
|
* @param encoding Specifies the target encoding
|
|
* @param ustr Specifies the buffer containing UCS-2 data
|
|
* @param ustrlen Specifies the valid length of ustr in UCS-2 units
|
|
* not bytes
|
|
* @param dest Specifies the buffer for the converted text and
|
|
* returns the converted text
|
|
* @param destbuflen Specifies the size of dest in bytes
|
|
* @see INTL_UnicodeToStrLen
|
|
*/
|
|
PUBLIC void INTL_UnicodeToStr(
|
|
INTL_Encoding_ID encoding,
|
|
INTL_Unicode* ustr,
|
|
uint32 ustrlen,
|
|
unsigned char* dest,
|
|
uint32 destbuflen
|
|
);
|
|
|
|
/**
|
|
* Convert Unicode to text in one encoding by trial and error.
|
|
*
|
|
* This routine tries to convert the given Unicode string into text of one
|
|
* non-Unicode encoding. This is a trial and error function which may be
|
|
* slow in "THE WORST CASE". However, it does it's best in the best case and
|
|
* average case.
|
|
*
|
|
* @param ustr Specifies the buffer containing UCS-2 data
|
|
* @param ustrlen Specifies the valid length of ustr in UCS-2 units
|
|
* not bytes
|
|
* @param dest Specifies the buffer for the converted text and
|
|
* returns the converted text
|
|
* @return Encoding of the converted text
|
|
*/
|
|
PUBLIC INTL_Encoding_ID INTL_UnicodeToEncodingStr(
|
|
INTL_Unicode* ustr,
|
|
uint32 ustrlen,
|
|
unsigned char* dest,
|
|
uint32 destbuflen
|
|
);
|
|
|
|
/**
|
|
* Return memory requirement for INTL_StrToUnicode.
|
|
*
|
|
* Return the maximum memory requirement for text converted from the
|
|
* specified encoding to Unicode. Call this to prepare memory for
|
|
* INTL_StrToUnicode. The difference between INTL_TextToUnicodeLen is
|
|
* the input string is specified by a NULL terminated string.
|
|
*
|
|
* @param encoding Specifies the encoding of text in src
|
|
* @param src Specifies the text to be converted
|
|
* @return Size of Unicode to store the converted output (in
|
|
* UCS-2 units not bytes)
|
|
* @see INTL_StrToUnicode
|
|
* @see INTL_TextToUnicodeLen
|
|
*/
|
|
PUBLIC uint32 INTL_StrToUnicodeLen(
|
|
INTL_Encoding_ID encoding,
|
|
unsigned char* src
|
|
);
|
|
|
|
/**
|
|
* Convert non-Unicode text to Unicode.
|
|
*
|
|
* The caller needs to call INTL_StrToUnicodeLen first to prepare memory and
|
|
* pass into ustr. The difference between INTL_TextToUnicode is the input
|
|
* string is specified by a NULL terminated string.
|
|
*
|
|
* @param encoding Specifies the encoding of text in src
|
|
* @param src Specifies the text to be converted
|
|
* @param ustr Specifies the buffer for Unicode and returns the converted
|
|
* Unicode
|
|
* @param ubuflen Specifies the size of the ustr in UCS-2 units not bytes
|
|
* @return Size of the converted Unicode (in UCS-2 units not bytes)
|
|
* @see INTL_StrToUnicodeLen
|
|
* @see INTL_TextToUnicode
|
|
*/
|
|
PUBLIC uint32 INTL_StrToUnicode(
|
|
INTL_Encoding_ID encoding,
|
|
unsigned char* src,
|
|
INTL_Unicode* ustr,
|
|
uint32 ubuflen
|
|
);
|
|
|
|
/**
|
|
* Return memory requirement for INTL_TextToUnicode.
|
|
*
|
|
* Return the maximum memory requirement for text converted from a specified
|
|
* encoding to Unicode . Call this to prepare memory for INTL_TextToUnicode.
|
|
* The difference between INTL_StrToUnicodeLen is the input is not specified
|
|
* by a NULL terminated string, but a pointer and length.
|
|
*
|
|
* @param encoding Specifies the encoding of text in src
|
|
* @param src Specifies the text to be converted
|
|
* @param srclen Specifies the number of bytes in src
|
|
* @return Size of Unicode to store the converted output (in UCS-2
|
|
* units not bytes)
|
|
* @see INTL_TextToUnicode
|
|
* @see INTL_StrToUnicodeLen
|
|
*/
|
|
PUBLIC uint32 INTL_TextToUnicodeLen(
|
|
INTL_Encoding_ID encoding,
|
|
unsigned char* src,
|
|
uint32 srclen
|
|
);
|
|
|
|
/**
|
|
* Convert text from non-Unicode to Unicode.
|
|
*
|
|
* The caller needs to call INTL_TextToUnicodeLen first to prepare memory and
|
|
* pass into ustr. The difference between INTL_StrToUnicode is the input is
|
|
* not specified by a NULL terminated string, but a pointer and length.
|
|
*
|
|
* @param encoding Specifies the encoding of text in src
|
|
* @param src Specifies the text to be converted
|
|
* @param srclen Specifies the number of bytes in src
|
|
* @param ustr Specifies the buffer for the Unicode string and returns
|
|
* the converted Unicode string
|
|
* @param ubuflen Specifies the size of the ustr in the UCS-2 units not
|
|
* bytes
|
|
* @return Size of converted Unicode (in UCS-2 units not bytes)
|
|
* @see INTL_TextToUnicodeLen
|
|
* @see INTL_StrToUnicode
|
|
*/
|
|
PUBLIC uint32 INTL_TextToUnicode(
|
|
INTL_Encoding_ID encoding,
|
|
unsigned char* src,
|
|
uint32 srclen,
|
|
INTL_Unicode* ustr,
|
|
uint32 ubuflen
|
|
);
|
|
|
|
|
|
/**
|
|
* Initial Unicode conversion routines from a list of Character Set ID (CSID)
|
|
* for Unicode rendering.
|
|
*
|
|
* It should only be called once in the application life time. It should be
|
|
* called by front end before calling any other Unicode conversion functions.
|
|
* The list could be retrieved through INTL_GetUnicodeCSIDList or
|
|
* INTL_GetUnicodeCharsetList.
|
|
*
|
|
* @param numberOfItem Specifies the valid number in the csidlist
|
|
* @param csidlist Specifies a prioritized list of csid to be used for
|
|
* Unicode to font charset conversion. The function will
|
|
* make a copy of the list the caller pass in. The caller
|
|
* could free the pass in list after this function.
|
|
* @ see INTL_GetUnicodeCSIDList
|
|
* @ see INTL_UnicodeToStrIteratorCreate
|
|
* @ see INTL_UnicodeToStrIterate
|
|
* @ see INTL_UnicodeToStrIteratorDestroy
|
|
* @ see INTL_GetUnicodeCharsetList
|
|
*/
|
|
PUBLIC void INTL_SetUnicodeCSIDList(
|
|
uint16 numOfItems,
|
|
int16 *csidlist);
|
|
|
|
/**
|
|
* Returns a list of Character Set ID (CSID) used for converting Unicode
|
|
* to font encoding.
|
|
*
|
|
* The list is set in the initialization time by the front end through
|
|
* INTL_SetUnicodeCSIDList. The only difference between INTL_GetUnicodeCSIDList
|
|
* and INTL_GetUnicodeCharsetList is that INTL_GetUnicodeCSIDList returns a
|
|
* list of CSIDs and the INTL_GetUnicodeCharsetList returns a list of charset
|
|
* names (strings).
|
|
*
|
|
* @param outnum Returns the number of items in the returned CSID array.
|
|
* @return Array of CSIDs. Caller should change or free the returned array.
|
|
* @see INTL_SetUnicodeCSIDList
|
|
* @see INTL_UnicodeToStrIteratorCreate
|
|
* @see INTL_UnicodeToStrIterate
|
|
* @see INTL_UnicodeToStrIteratorDestroy
|
|
* @see INTL_GetUnicodeCharsetList
|
|
*/
|
|
PUBLIC int16* INTL_GetUnicodeCSIDList(int16 * outnum);
|
|
|
|
/**
|
|
* Return a list of charset names (strings) used for converting Unicode to font
|
|
* encoding.
|
|
*
|
|
* The list is set in the initialization time by front end through
|
|
* INTL_SetUnicodeCSIDList. The only difference between INTL_GetUnicodeCSIDList
|
|
* and INTL_GetUnicodeCharsetList is that INTL_GetUnicodeCSIDList returns a
|
|
* list of CSIDs and INTL_GetUnicodeCharsetList returns a list of charset
|
|
* names (strings).
|
|
*
|
|
* @param outnum Returns the number of items in the returned charset array
|
|
* @return Array of charset names. Caller should not change or free the
|
|
* returned array.
|
|
* @see INTL_GetUnicodeCSIDList
|
|
* @see INTL_SetUnicodeCSIDList
|
|
* @see INTL_UnicodeToStrIteratorCreate
|
|
* @see INTL_UnicodeToStrIterate
|
|
* @see INTL_UnicodeToStrIteratorDestroy
|
|
*/
|
|
PUBLIC unsigned char **INTL_GetUnicodeCharsetList(int16 * outnum);
|
|
|
|
/**
|
|
* Converts a UTF-8 sub-string to the appropriate font encoding.
|
|
*
|
|
* Converts characters until the encoding changes or
|
|
* input/output space runs out.
|
|
*
|
|
* The segment is NOT NULL TERMINATED
|
|
*
|
|
* @param utf8p Specifies the UTF-8 string
|
|
* @param utf8len Specifies the length of utf8p
|
|
* @param LE_string Specifies and returns the (pre-allocated) buffer
|
|
* for the string converted to the font encoding
|
|
* @param LE_string_len Specifies the length of the buffer for LE_string
|
|
* @param LE_written_len Returns the valid length of the return LE_string
|
|
* @param LE_string_csid Returns the CSID of the return LE_string:
|
|
* <UL>
|
|
* <LI>
|
|
* >0 if successful (valid CSID).
|
|
* <LI>
|
|
* -1 if not Unicode.
|
|
* <LI>
|
|
* -2 if no font encoding.
|
|
* </UL>
|
|
* @return Length of converted UTF-8 string
|
|
*/
|
|
PUBLIC int utf8_to_local_encoding(
|
|
const unsigned char *utf8p,
|
|
const int utf8len,
|
|
unsigned char *LE_string,
|
|
int LE_string_len,
|
|
int *LE_written_len,
|
|
int16 *LE_string_csid
|
|
);
|
|
|
|
/**
|
|
* Convert text from UTF-8 to UCS-2 encoding.
|
|
*
|
|
* UCS-2 is the abbreviation for the two byte form of Unicode.
|
|
* UTF-8 is a transformation encoding for Unicode.
|
|
* For more information about UTF-8 look at RFC 2279 in
|
|
* <A HREF=ftp://ds.internic.net/rfc/rfc2279.txt>
|
|
* ftp://ds.internic.net/rfc/rfc2279.txt</A> .
|
|
* For more information about UCS-2, look at <A HREF=http://www.unicode.org>
|
|
* http://www.unicode.org</A>.
|
|
*
|
|
* @param utf8p Specifies the UTF-8 text buffer. It is NULL terminated.
|
|
* @param num_chars Returns the length of the converted UCS-2 in UCS-2 units
|
|
* not bytes
|
|
* @return UCS-2 string, NULL terminated by U+0000, or NULL. The
|
|
* caller should free it by calling XP_FREE when it is no
|
|
* longer needed.
|
|
* @see INTL_UCS2ToUTF8
|
|
*/
|
|
PUBLIC UNICVTAPI uint16 *INTL_UTF8ToUCS2(
|
|
const unsigned char *utf8p,
|
|
int32 *num_chars
|
|
);
|
|
|
|
/**
|
|
* Convert text from UCS-2 to UTF-8 encoding.
|
|
*
|
|
* UCS-2 is the abbreviation for the two byte form of Unicode.
|
|
* UTF-8 is a transformation encoding for Unicode.
|
|
* For more information about UTF-8 look at RFC 2279 in
|
|
* <A HREF=ftp://ds.internic.net/rfc/rfc2279.txt>
|
|
* ftp://ds.internic.net/rfc/rfc2279.txt</A> .
|
|
* For more information about UCS-2, look at <A HREF= http://www.unicode.org>
|
|
* http://www.unicode.org</A>.
|
|
*
|
|
* @param ucs2p Specifies the UCS-2 text buffer
|
|
* @param num_chars Specifies the length of ucs2p, in UCS-2 units not bytes
|
|
* @return NULL terminated UTF-8 string or NULL. The caller should
|
|
* free it by calling XP_FREE when it is no longer needed.
|
|
* @see INTL_UTF8ToUCS2
|
|
*/
|
|
PUBLIC UNICVTAPI unsigned char *INTL_UCS2ToUTF8(
|
|
const uint16 *ucs2p,
|
|
int32 num_chars
|
|
);
|
|
/*@}*/
|
|
/*=======================================================*/
|
|
/**@name String Comparison */
|
|
/*@{*/
|
|
|
|
/**
|
|
* Case insensitive comparison.
|
|
*
|
|
* This function is multibyte charset safe. It will consider characters
|
|
* boundary correctly. It also ignore case by considering the charset
|
|
* it used.
|
|
*
|
|
* @param charSetID Specifies the encoding of text1 and text2.
|
|
* @param text1 Specifies address of text1.
|
|
* @param text2 Specifies address of text2.
|
|
* @param charlen Returns the length in byte of text1.
|
|
* @return true if the text1 and text2 point to the same character,
|
|
* ignoring the case, false otherwise.
|
|
* @see INTL_MatchOneCaseChar
|
|
* @see INTL_Strstr
|
|
* @see INTL_Strcasestr
|
|
*/
|
|
PUBLIC XP_Bool INTL_MatchOneChar(
|
|
int16 charSetID,
|
|
unsigned char *text1,
|
|
unsigned char *text2,
|
|
int *charlen
|
|
);
|
|
|
|
/**
|
|
* Case sensitive comparison.
|
|
*
|
|
* This function is multibyte charset safe. It will consider characters
|
|
* boundary correctly.
|
|
*
|
|
* @param charSetID Specifies the encoding of text1 and text2.
|
|
* @param text1 Specifies address of text1.
|
|
* @param text2 Specifies address of text2.
|
|
* @param charlen Returns length in bytes of text1.
|
|
* @return true if the text1 and text2 point to the same character (same case),
|
|
* false otherwise.
|
|
* @see INTL_MatchOneChar
|
|
* @see INTL_Strstr
|
|
* @see INTL_Strcasestr
|
|
*/
|
|
PUBLIC XP_Bool INTL_MatchOneCaseChar(
|
|
int16 charSetID,
|
|
unsigned char *text1,
|
|
unsigned char *text2,
|
|
int *charlen
|
|
);
|
|
|
|
/**
|
|
* Case sensitive sub-string search.
|
|
*
|
|
* This function is multibyte charset safe. It will consider characters
|
|
* boundary correctly.
|
|
*
|
|
* @param charSetID Specifies the encoding of s1 and s2.
|
|
* @param s1 Specifies the first string
|
|
* @param s2 Specifies the second string
|
|
* @return NULL if s1 does not contains s2,
|
|
* otherwise, return the address of the sub-string in s1.
|
|
* @see INTL_MatchOneChar
|
|
* @see INTL_MatchOneCaseChar
|
|
* @see INTL_Strcasestr
|
|
*/
|
|
PUBLIC char *INTL_Strstr(
|
|
int16 charSetID,
|
|
const char *s1,
|
|
const char *s2
|
|
);
|
|
|
|
/**
|
|
* Case insensitive sub-string search.
|
|
*
|
|
* This function is multibyte charset safe. It will consider characters
|
|
* boundary correctly. It also ignore case by considering the charset it
|
|
* used.
|
|
*
|
|
* @param charSetID Specifies the encoding of s1 and s2.
|
|
* @param s1 Specifies the first string
|
|
* @param s2 Specifies the second string
|
|
* @return NULL if s1 does not contains s2,
|
|
* otherwise, return the address of the sub-string in s1.
|
|
* @see INTL_MatchOneChar
|
|
* @see INTL_MatchOneCaseChar
|
|
* @see INTL_Strstr
|
|
*/
|
|
PUBLIC char *INTL_Strcasestr(
|
|
int16 charSetID,
|
|
const char *s1,
|
|
const char *s2
|
|
);
|
|
|
|
|
|
/*
|
|
Function to support correct mail/news comparison:
|
|
INTL_GetNormalizeStr
|
|
INTL_GetNormalizeStrFromRFC1522
|
|
INTL_StrContains
|
|
INTL_StrIs
|
|
INTL_StrBeginWith
|
|
INTL_StrEndWith
|
|
|
|
Example:
|
|
|
|
XP_Bool MailHeaderContains(csid, header, str)
|
|
{
|
|
XP_Bool result = FALSE;
|
|
unsigned char* n_str = INTL_GetNormalizeStr(csid, str);
|
|
unsigned char* n_header = INTL_GetNormalizeStrFromRFC1522(csid, header);
|
|
|
|
if((NULL != n_str) && (NULL != n_header))
|
|
result = INTL_StrContains(csid, n_header, n_str);
|
|
if(n_str)
|
|
XP_FREE(n_str);
|
|
if(n_header)
|
|
XP_FREE(n_header);
|
|
return result;
|
|
}
|
|
|
|
*/
|
|
|
|
/**
|
|
* Normalize a string, by dropping the case of the characters.
|
|
*
|
|
* The return value could be used with INTL_StrContains, INTL_StrIs,
|
|
* INTL_StrBeginWith or INTL_StrEndWith to perform string matching. This
|
|
* function normalizes a string by dropping the case of character according to
|
|
* the charSetID the caller passed in. It also ignores CR and LF characters.
|
|
*
|
|
* @param charSetID Specifies the encoding of str
|
|
* @param str Specifies the to-be-normalized string.
|
|
* @return a normalized string which could be used in INTL_StrContains,
|
|
* INTL_StrIs, INTL_StrBeginWith and INTL_StrEndWith The caller should
|
|
* free it by calling XP_FREE when it is not needed.
|
|
* @see INTL_GetNormalizeStrFromRFC1522
|
|
* @see INTL_StrContains
|
|
* @see INTL_StrIs
|
|
* @see INTL_StrBeginWith
|
|
* @see INTL_StrEndWith
|
|
*/
|
|
PUBLIC unsigned char* INTL_GetNormalizeStr(
|
|
int16 charSetID,
|
|
unsigned char* str
|
|
);
|
|
|
|
/**
|
|
* Test if string s1 contains string s2.
|
|
*
|
|
* This function is multibyte charset safe. It will consider characters
|
|
* boundary correctly. To do string matching with ignoring the case of
|
|
* character, call INTL_GetNormalizeStr (or INTL_GetNormalizeStrFromRFC1522)
|
|
* before call this function.
|
|
*
|
|
* @param charSetID Specifies the encoding for s1 and s2.
|
|
* @param s1 Specifies the first string
|
|
* @param s2 Specifies the second string
|
|
* @return true if s1 contains s2,
|
|
* false otherwise
|
|
* @see INTL_GetNormalizeStr
|
|
* @see INTL_GetNormalizeStrFromRFC1522
|
|
* @see INTL_StrIs
|
|
* @see INTL_StrBeginWith
|
|
* @see INTL_StrEndWith
|
|
*/
|
|
PUBLIC XP_Bool INTL_StrContains(
|
|
int16 charSetID,
|
|
unsigned char* str1,
|
|
unsigned char* str2
|
|
);
|
|
|
|
/**
|
|
* Test if string s1 is string s2.
|
|
*
|
|
* This function is multibyte charset safe. It will consider characters boundary
|
|
* correctly. To do string matching with ignoring the case of character, call
|
|
* INTL_GetNormalizeStr (or INTL_GetNormalizeStrFromRFC1522) before calling this
|
|
* function.
|
|
*
|
|
* @param charSetID Specifies the encoding for s1 and s2.
|
|
* @param s1 Specifies the first string
|
|
* @param s2 Specifies the second string
|
|
* @return true if two string are equal, false otherwise
|
|
* @see INTL_GetNormalizeStr
|
|
* @see INTL_GetNormalizeStrFromRFC1522
|
|
* @see INTL_StrContains
|
|
* @see INTL_StrBeginWith
|
|
* @see INTL_StrEndWith
|
|
*/
|
|
PUBLIC XP_Bool INTL_StrIs(
|
|
int16 charSetID,
|
|
unsigned char* str1,
|
|
unsigned char* str2
|
|
);
|
|
|
|
/**
|
|
* Test if string s1 begin with string s2.
|
|
*
|
|
* This function is multibyte charset safe. It will consider characters
|
|
* boundary correctly. To do string matching with ignoring the case of
|
|
* character, call INTL_GetNormalizeStr (or INTL_GetNormalizeStrFromRFC1522)
|
|
* before calling this function.
|
|
*
|
|
* @param charSetID Specifies the encoding for s1 and s2.
|
|
* @param s1 Specifies the first string
|
|
* @param s2 Specifies the second string
|
|
* @return true if the first string is begin with the second string,
|
|
* false otherwise
|
|
* @see INTL_GetNormalizeStr
|
|
* @see INTL_GetNormalizeStrFromRFC1522
|
|
* @see INTL_StrContains
|
|
* @see INTL_StrIs
|
|
* @see INTL_StrEndWith
|
|
*/
|
|
PUBLIC XP_Bool INTL_StrBeginWith(
|
|
int16 charSetID,
|
|
unsigned char* str1,
|
|
unsigned char* str2
|
|
);
|
|
|
|
/**
|
|
* Test if string s1 end with string s2.
|
|
*
|
|
* This function is multibyte charset safe. It will consider characters
|
|
* boundary correctly. To do string matching with ignoring the case of
|
|
* character, call INTL_GetNormalizeStr (or INTL_GetNormalizeStrFromRFC1522)
|
|
* before calling this function.
|
|
*
|
|
* @param charSetID Specifies the encoding for s1 and s2.
|
|
* @param s1 Specifies the first string
|
|
* @param s2 Specifies the second string
|
|
* @return true if the first string is end with the second string, false
|
|
* otherwise.
|
|
* @see INTL_GetNormalizeStr
|
|
* @see INTL_GetNormalizeStrFromRFC1522
|
|
* @see INTL_StrContains
|
|
* @see INTL_StrIs
|
|
* @see INTL_StrBeginWith
|
|
*/
|
|
PUBLIC XP_Bool INTL_StrEndWith(
|
|
int16 charSetID,
|
|
unsigned char* str1,
|
|
unsigned char* str2
|
|
);
|
|
|
|
/**
|
|
* Return a (hacky) XPAT pattern for NNTP server for searching pre
|
|
* RFC 1522 message header.
|
|
*
|
|
* This is a hacky function which try to work around another HACK!!! The
|
|
* problem it tries to solve is to search on NNTP, internet newsgroup server.
|
|
* Unfortunately, the NNTP server does not have non-ASCII text searching
|
|
* command. The only functionality in the NNTP protocol we could use is the
|
|
* XPAT extension of NNTP (see
|
|
* <A HREF=ftp://ds.internic.net/internet-drafts/draft-ietf-nntpext-imp-01.txt>
|
|
* ftp://ds.internic.net/internet-drafts/draft-ietf-nntpext-imp-01.txt</A> or
|
|
* <A HREF=ftp://ds.internic.net/internet-drafts/draft-barber-nntp-imp-07.txt>
|
|
* ftp://ds.internic.net/internet-drafts/draft-barber-nntp-imp-07.txt</A> ).
|
|
* XPAT use wildmat regular expression (see <A HREF=
|
|
* http://oac.hsc.uth.tmc.edu/oac_sysadmin/services/INN/man/wildmat.3.html>
|
|
* http://oac.hsc.uth.tmc.edu/oac_sysadmin/services/INN/man/wildmat.3.html</A>
|
|
* for details) to provide string matching. Unfortunately, wildmat is not
|
|
* designed to support non-ASCII text. It work for English header but not for
|
|
* header in other language like Japanese, French, or German. The problem is
|
|
* the XPAT/wildmat cannot deal with (1) ISO-2022-xx encoding nor (2) RFC 1522
|
|
* header. To work around the limitation in the protocol, we put together this
|
|
* function to support the first limitation as possible as we can. This
|
|
* function take one search string, and return a XPAT pattern which could then
|
|
* be used to send to NNTP XPAT as search argument. However, there are some
|
|
* limitation here. (1) It may cause NNTP return more message than it should,
|
|
* the reason is the XPAT won't respect to the multibyte character boundary
|
|
* when it try to match the string. To improve this in the future, the client
|
|
* double check the header after it receive message from the server and narrow
|
|
* it down to the correct case. (2) The pattern it generated won't match RFC
|
|
* 1522 header so it could return less message than it should. This is because
|
|
* there are more than one XPAT could match the sting in the case of RFC 1522
|
|
* header. To improve this in the future, the client side should send several
|
|
* possible XPAT patterns (with the patterned return by this function), collect
|
|
* the result, and then double checking in the client side. Of course, improve
|
|
* the NNTP protocol itself is the real solution. But the improvement stated
|
|
* above is also needed for the server support the current NNTP protocol. This
|
|
* function (1) convert the text from the encoding the argument specified into
|
|
* the encoding used in the corresponding internet newsgroup, (2) strip out
|
|
* leading or trailing ISO-2022 escape sequence if present, (3) escape the
|
|
* wildmat special characters (any characters which is not from 0-9, a-z, A-Z),
|
|
* and return.
|
|
*
|
|
* @param winCharSetID Specifies the encoding of searchString.
|
|
* @param searchString Specifies the string to be search through NNTP XPAT
|
|
* command.
|
|
* @return the pattern should be send to NNTP XPAT command for searching
|
|
* non-ASCII header. The caller need to free this by calling XP_FREE
|
|
* when the result is no longer needed.
|
|
*/
|
|
PUBLIC unsigned char* INTL_FormatNNTPXPATInNonRFC1522Format(
|
|
int16 winCharSetID,
|
|
unsigned char* searchString
|
|
);
|
|
|
|
/*@}*/
|
|
/*=======================================================*/
|
|
/**@name Charset ID Iterator */
|
|
/*@{*/
|
|
|
|
|
|
/**
|
|
* An object that can iterate through a list of charset ID.
|
|
*
|
|
* @see INTL_CSIDIteratorCreate
|
|
* @see INTL_CSIDIteratorDestroy
|
|
* @see INTL_CSIDIteratorNext
|
|
*/
|
|
typedef void* INTL_CSIDIterator;
|
|
|
|
/**
|
|
* Returns a new iterator object to search charset IDs for a particular
|
|
* conversion.
|
|
*
|
|
* This function searches a built-in table to look for charset converters
|
|
* that could be used for a particular purpose. The only purpose currently
|
|
* supported is the IMAP4 conversion. This function puts the mail and news
|
|
* charset IDs corresponding to the given charset ID at the top of the list
|
|
* of IDs to try. After that, it inserts the "to" charset IDs of all entries
|
|
* matching the given "from" ID.
|
|
*
|
|
* @param iterator_return Returns a new iterator object
|
|
* @param charSetID Specifies the charset ID to convert from
|
|
* @param flag Specifies the type of conversion
|
|
* Currently, the only valid value is
|
|
* csiditerate_TryIMAP4Search .
|
|
*
|
|
* @see INTL_CSIDIteratorNext, INTL_CSIDIteratorDestroy
|
|
*/
|
|
PUBLIC void INTL_CSIDIteratorCreate(
|
|
INTL_CSIDIterator *iterator,
|
|
int16 charSetID,
|
|
int flag
|
|
);
|
|
|
|
/**
|
|
* Frees the given iterator, and sets given pointer to NULL.
|
|
*
|
|
* This function destroys the object created by INTL_CSIDIteratorCreate.
|
|
*
|
|
* @param iterator Specifies the iterator object to destroy
|
|
* @see INTL_CSIDIteratorCreate
|
|
*/
|
|
PUBLIC void INTL_CSIDIteratorDestroy(
|
|
INTL_CSIDIterator *iterator
|
|
);
|
|
|
|
/**
|
|
* Returns the next charset ID in the given iterator, if any.
|
|
*
|
|
* The return value is TRUE if a charset ID was found. The charset ID
|
|
* is returned in pCharSetID. Otherwise, the return value is FALSE, and
|
|
* pCharSetID remains untouched.
|
|
*
|
|
* @param iterator Specifies the iterator object
|
|
* @param pCharSetID Returns the next charset ID
|
|
* @return TRUE if there are more elements to be iterate, otherwise FALSE
|
|
* @see INTL_CSIDIteratorCreate, INTL_CSIDIteratorDestroy
|
|
*/
|
|
PUBLIC XP_Bool INTL_CSIDIteratorNext(
|
|
INTL_CSIDIterator *iterator,
|
|
int16 *pCharSetID
|
|
);
|
|
|
|
/*@}*/
|
|
/*=======================================================*/
|
|
/**@name Line/Word Breaking */
|
|
/*@{*/
|
|
|
|
/**
|
|
* Line breaking information.
|
|
*
|
|
* <UL>
|
|
* <LI>
|
|
* PROHIBIT_NOWHERE -
|
|
* It is a breakable character. It could be break before
|
|
* or after this character. This class is for all
|
|
* Kanji ideographic character.
|
|
* <LI>
|
|
* PROHIBIT_BEGIN_OF_LINE -
|
|
* It should not appeared in the beginning of the line.
|
|
* <LI>
|
|
* PROHIBIT_END_OF_LINE -
|
|
* It should not appeared in the end of the line.
|
|
* <LI>
|
|
* PROHIBIT_WORD_BREAK -
|
|
* It is non breakable character. It cannot be break
|
|
* if the next (or previous) character is also
|
|
* PROHIBIT_WORD_BREAK.
|
|
* </UL>
|
|
*
|
|
* @see INTL_KinsokuClass
|
|
*/
|
|
enum LINE_WRAP_PROHIBIT_CLASS{
|
|
PROHIBIT_NOWHERE,
|
|
PROHIBIT_BEGIN_OF_LINE,
|
|
PROHIBIT_END_OF_LINE,
|
|
PROHIBIT_WORD_BREAK
|
|
};
|
|
|
|
/**
|
|
* Basic Japanese word breaking information.
|
|
*
|
|
* <UL>
|
|
* <LI>
|
|
* SEVEN_BIT_CHAR - e.g. ASCII
|
|
* <LI>
|
|
* HALFWIDTH_PRONOUNCE_CHAR - e.g. Japanese Katakana
|
|
* <LI>
|
|
* FULLWIDTH_ASCII_CHAR - e.g. ASCII in JIS
|
|
* <LI>
|
|
* FULLWIDTH_PRONOUNCE_CHAR - e.g. Japanese Hiragana, Katakana
|
|
* <LI>
|
|
* KANJI_CHAR - ideographic
|
|
* <LI>
|
|
* UNCLASSIFIED_CHAR - others
|
|
* </UL>
|
|
*
|
|
* @see INTL_CharClass
|
|
*/
|
|
enum WORD_BREAK_CLASS{
|
|
SEVEN_BIT_CHAR,
|
|
HALFWIDTH_PRONOUNCE_CHAR,
|
|
FULLWIDTH_ASCII_CHAR,
|
|
FULLWIDTH_PRONOUNCE_CHAR,
|
|
KANJI_CHAR,
|
|
UNCLASSIFIED_CHAR
|
|
};
|
|
/**
|
|
* Returns the code point that represent the non-breaking space character.
|
|
*
|
|
* The current implementation return the same value regardless of the given
|
|
* charset. However, the return value is platform dependent.
|
|
* The information then is used by parser and layout code.
|
|
*
|
|
* Using this function with caution as it is tied to
|
|
* the current HTML parser implementation.
|
|
*
|
|
* @param winCharSetID Specifies the window charset id.
|
|
* @return the code point which Non Breaking Space in a
|
|
* C style NULL terminated string.
|
|
* @see
|
|
*/
|
|
PUBLIC const char *INTL_NonBreakingSpace(
|
|
uint16 winCharSetID
|
|
);
|
|
|
|
/**
|
|
* Returns information for basic Japanese word breaking.
|
|
*
|
|
* Given a character pointer and charset, returns a word breaking
|
|
* character class for the given character.
|
|
* It is necessary to pass a pointer because the
|
|
* character may be more than one byte.
|
|
*
|
|
* In the future, the definition of word breaking classes needs to be
|
|
* extended.
|
|
*
|
|
* @param winCharSetID Specifies the window charset ID
|
|
* @param pstr Specifies the pointer to the character
|
|
* @return Character class for word breaking:
|
|
* <UL>
|
|
* <LI>
|
|
* SEVEN_BIT_CHAR - e.g. ASCII
|
|
* <LI>
|
|
* HALFWIDTH_PRONOUNCE_CHAR - e.g. Japanese Katakana
|
|
* <LI>
|
|
* FULLWIDTH_ASCII_CHAR - e.g. ASCII in JIS
|
|
* <LI>
|
|
* FULLWIDTH_PRONOUNCE_CHAR - e.g. Japanese Hiragana, Katakana
|
|
* <LI>
|
|
* KANJI_CHAR - ideographic
|
|
* <LI>
|
|
* UNCLASSIFIED_CHAR - others
|
|
* </UL>
|
|
* @see INTL_KinsokuClass
|
|
* @see WORD_BREAK_CLASS
|
|
*/
|
|
PUBLIC int INTL_CharClass(
|
|
int winCharSetID,
|
|
unsigned char *pstr
|
|
);
|
|
|
|
/**
|
|
* Returns line breaking information.
|
|
*
|
|
* Given a character pointer and charset, returns a line breaking
|
|
* character class for the given character.
|
|
* It is necessary to pass a pointer because the
|
|
* character may be more than one byte.
|
|
*
|
|
*
|
|
* Please notice that the function currently only supports multibyte charsets.
|
|
* If this is called for ascii charset, it always return PROHIBIT_WORD_BREAK.
|
|
*
|
|
* References for line breaking:
|
|
* <UL>
|
|
* <LI>
|
|
* Japanese Standard Association,
|
|
* JIS X 4501 1995 - Japanese Industrial Standard -
|
|
* Line Composition rules for Japanese documents
|
|
* <LI>
|
|
* Ken Lunde,
|
|
* Understanding Japanese Information Processing,
|
|
* O'Reilly & Associates, Inc.,
|
|
* ISBN:1-56592-043-0,
|
|
* pp.148
|
|
* <LI>
|
|
* Nadine Kano,
|
|
* Developing International Software For Windows 95 and Windows NT,
|
|
* Microsoft Press,
|
|
* ISBN:1-556-15-840-8,
|
|
* pp.239-244
|
|
* </UL>
|
|
*
|
|
*
|
|
* @param winCharSetID Specifies window charset ID.
|
|
* @param pstr Specifies the pointer to the character
|
|
* @return the kinsoku class for line breaking:
|
|
* <UL>
|
|
* <LI>
|
|
* PROHIBIT_NOWHERE -
|
|
* It is a breakable character. It could be break before
|
|
* or after this character. This class is for all
|
|
* Kanji ideographic character.
|
|
* <LI>
|
|
* PROHIBIT_BEGIN_OF_LINE -
|
|
* It should not appeared in the beginning of the line.
|
|
* <LI>
|
|
* PROHIBIT_END_OF_LINE -
|
|
* It should not appeared in the end of the line.
|
|
* <LI>
|
|
* PROHIBIT_WORD_BREAK -
|
|
* It is non breakable character. It cannot be break
|
|
* if the next (or previous) character is also
|
|
* PROHIBIT_WORD_BREAK.
|
|
* </UL>
|
|
* @see INTL_CharClass
|
|
* @see LINE_WRAP_PROHIBIT_CLASS
|
|
*/
|
|
PUBLIC int INTL_KinsokuClass(
|
|
int16 winCharSetID,
|
|
unsigned char *pstr
|
|
);
|
|
|
|
/**
|
|
* Returns the column width of the given character.
|
|
*
|
|
* In some countries, old terminals use full-width and half-width characters.
|
|
* This function returns the number of "columns" taken up by the given
|
|
* character. For example, in Japan, normal characters take up 2 columns,
|
|
* while half-width characters take up 1 column each.
|
|
*
|
|
* Returns 1 for charsets that do not distinguish between half-width and
|
|
* full-width characters.
|
|
*
|
|
* @param winCharSetID Specifies the charset ID of the text
|
|
* @param pstr Specifies the character
|
|
* @return The column width of the given character
|
|
* @see INTL_IsHalfWidth
|
|
*/
|
|
PUBLIC int INTL_ColumnWidth(
|
|
int winCharSetID,
|
|
unsigned char *pstr
|
|
);
|
|
|
|
/**
|
|
* Truncates a long string by replacing excess characters in the middle
|
|
* with "...".
|
|
*
|
|
* The output_return pointer may be the same as the input pointer.
|
|
*
|
|
* @param winCharSetID Specifies the charset ID of the text
|
|
* @param input Specifies the text to be mid-truncated
|
|
* @param output_return Returns the mid-truncated text
|
|
* @param max_length Specifies the desired number of bytes to be placed in
|
|
* the output buffer, minus 1 for null terminator
|
|
*/
|
|
PUBLIC void INTL_MidTruncateString(
|
|
int16 winCharSetID,
|
|
const char *input,
|
|
char *output_return,
|
|
int max_length
|
|
);
|
|
|
|
/**
|
|
* Returns whether or not the given character is a half-width character.
|
|
*
|
|
* In some countries, certain characters are normal width on old terminals,
|
|
* while other characters are half-width. For example, normal Japanese
|
|
* characters are considered normal width, while "hankaku kana" are
|
|
* half-width, as are the ASCII characters.
|
|
*
|
|
* @param winCharSetID Specifies the charset ID of the text
|
|
* @param pstr Specifies the character
|
|
* @return
|
|
* 0 if the given character is ASCII or the charset do not normally
|
|
* distinguish between half-width and full-width,
|
|
* 1 if the given character is half-width
|
|
* @see INTL_ColumnWidth
|
|
*/
|
|
PUBLIC int INTL_IsHalfWidth(
|
|
uint16 winCharSetID,
|
|
unsigned char *pstr
|
|
);
|
|
|
|
/*@}*/
|
|
/*=======================================================*/
|
|
/**@name Document Context Handling */
|
|
/*@{*/
|
|
/**
|
|
* Request a re-layout of the document.
|
|
*
|
|
* Libi18n calls this function in those cases where a different document
|
|
* encoding is detected after document conversion and layout has begun.
|
|
* This can occur because the parsing and layout of the document begins
|
|
* immediately when the document data begins to stream in - at which time
|
|
* all the data needed to determine the charset may not be available. If
|
|
* this occurs, the layout engine needs to be notified to pull the data from
|
|
* the source (cache) again so the data will be converted by the correct
|
|
* character codeset conversion module in the data stream.
|
|
*
|
|
* @param context Specifies the context which should be relayout again.
|
|
*/
|
|
PUBLIC void
|
|
INTL_Relayout(iDocumentContext context);
|
|
|
|
/**
|
|
* Returns name of the document charset.
|
|
*
|
|
* The returned string is suitable for use in the window brought up by
|
|
* View | Page Info (previously known as Document Info). It also provides
|
|
* information such as whether this charset was auto-detected.
|
|
*
|
|
* @param doc_context Specifies the document context
|
|
* @return Name (string) of the document charset
|
|
*/
|
|
PUBLIC char *INTL_CharSetDocInfo(
|
|
iDocumentContext doc_context
|
|
);
|
|
|
|
/**
|
|
* Get the UI charset encoding setting.
|
|
*
|
|
* Gets the currently selected charset encoding for this document
|
|
* (not the global default and not the detected document encoding).
|
|
*
|
|
* @param context Specifies document context
|
|
* @return Document charset ID selected by the user
|
|
* @see
|
|
*/
|
|
PUBLIC uint16 FE_DefaultDocCharSetID(
|
|
iDocumentContext context
|
|
);
|
|
|
|
/**
|
|
* Change the default document charset ID.
|
|
*
|
|
* This function is currently only implemented and called by the Windows
|
|
* platform. It will be removed in the future to keep the consistency between
|
|
* platforms.
|
|
*
|
|
* @param defaultDocCharSetID Specifies the new default document charset ID
|
|
* @version DEPRECATED. Do not use this function.
|
|
*/
|
|
#if defined(XP_WIN) || defined(XP_OS2)
|
|
PUBLIC void
|
|
INTL_ChangeDefaultCharSetID(int16 defaultDocCharSetID);
|
|
#endif
|
|
|
|
/**
|
|
* Return default charset from preference or from current encoding
|
|
* menu selection.
|
|
*
|
|
* @param context Specifies the context
|
|
* @return Default document charset ID. If the context is NULL
|
|
* then it returns default charset from the user preference.
|
|
* If the context is specified then it returns current
|
|
* encoding menu selection.
|
|
*/
|
|
PUBLIC int16
|
|
INTL_DefaultDocCharSetID(iDocumentContext context);
|
|
|
|
/**
|
|
* Returns the default window charset ID for the given document context.
|
|
*
|
|
* If context is NULL, or the context's window charset ID is zero, this
|
|
* function calls INTL_DefaultWinCharSetID, passing the same context.
|
|
*
|
|
* @param context Specifies the document context
|
|
* @return The default window charset ID for this document context
|
|
* @see INTL_DefaultWinCharSetID
|
|
*/
|
|
PUBLIC int16 INTL_DefaultTextAttributeCharSetID(
|
|
iDocumentContext context
|
|
);
|
|
|
|
/**
|
|
* Returns the default window charset ID for the given document context.
|
|
*
|
|
* If context is NULL, or if the context's window charset ID is zero, this
|
|
* function calls INTL_DefaultDocCharSetID, passing the same context, and then
|
|
* calls INTL_DocToWinCharSetID on the result.
|
|
*
|
|
* @param context Specifies the document context
|
|
* @return Default window charset ID for this document context
|
|
* @see INTL_DefaultDocCharSetID, INTL_DocToWinCharSetID
|
|
*/
|
|
PUBLIC int16 INTL_DefaultWinCharSetID(
|
|
iDocumentContext context
|
|
);
|
|
/**
|
|
* Set up the charset conversion stream module.
|
|
*
|
|
* This function gets the charset info object from the context, and then
|
|
* picks up the relayout flag and the document charset ID before calling
|
|
* INTL_CSIInitialize. It then creates the appropriate charset converter
|
|
* to convert from the document to window charset. The stream is set up
|
|
* by setting the various function pointers (put, abort, complete, etc).
|
|
* It then hooks up to the next stream module "INTERNAL_PARSER", the HTML
|
|
* parser and layout engine. This is done by rewriting URL_s' content_type
|
|
* field.
|
|
*
|
|
* @param format_out Specifies the type of stream
|
|
* @param data_obj Ignored
|
|
* @param URL_s Specifies the URL object
|
|
* @param window_id Specifies the context
|
|
* @return Stream object corresponding to this charset conversion module
|
|
* @see INTL_CSIInitialize, NET_StreamBuilder
|
|
*/
|
|
PUBLIC Stream *INTL_ConvCharCode(
|
|
int format_out,
|
|
void *data_obj,
|
|
URL *URL_s,
|
|
iDocumentContext window_id
|
|
);
|
|
|
|
/**
|
|
* Converts mail charset to display charset used by current window.
|
|
*
|
|
* It decides which display charset to use based on current default language.
|
|
* Caller is responsible for deallocating memory.
|
|
*
|
|
* @param context the context (window ID).
|
|
* @param bit7buff Source buffer.
|
|
* @param block_size the length of the source buffer.
|
|
* @return Destination buffer. If NULL, this means either conversion failed or
|
|
* did single-byte to single-byte conversion.
|
|
*/
|
|
PUBLIC unsigned char *INTL_ConvMailToWinCharCode(
|
|
iDocumentContext context,
|
|
unsigned char *bit7buff,
|
|
uint32 block_size
|
|
);
|
|
|
|
/*@}*/
|
|
/*=======================================================*/
|
|
/**@name Platform Independent String Resources */
|
|
/*@{*/
|
|
/**
|
|
* Return the Charset name of the translated resource.
|
|
*
|
|
* @return MIME charset of the cross-platform string resource and FE
|
|
* resources
|
|
* @see XP_GetString
|
|
* @see XP_GetStringForHTML
|
|
*/
|
|
PUBLIC char *
|
|
INTL_ResourceCharSet(void);
|
|
|
|
/*@}*/
|
|
/*=======================================================*/
|
|
|
|
XP_END_PROTOS
|
|
|
|
#endif /* INTL_LIBI18N_H */
|