mirror of
https://github.com/mozilla/gecko-dev.git
synced 2024-11-05 08:35:26 +00:00
294 lines
9.0 KiB
C
294 lines
9.0 KiB
C
|
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*-
|
||
|
*
|
||
|
* The contents of this file are subject to the Netscape Public License
|
||
|
* Version 1.0 (the "NPL"); you may not use this file except in
|
||
|
* compliance with the NPL. You may obtain a copy of the NPL at
|
||
|
* http://www.mozilla.org/NPL/
|
||
|
*
|
||
|
* Software distributed under the NPL is distributed on an "AS IS" basis,
|
||
|
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
|
||
|
* for the specific language governing rights and limitations under the
|
||
|
* NPL.
|
||
|
*
|
||
|
* The Initial Developer of this code under the NPL is Netscape
|
||
|
* Communications Corporation. Portions created by Netscape are
|
||
|
* Copyright (C) 1998 Netscape Communications Corporation. All Rights
|
||
|
* Reserved.
|
||
|
*/
|
||
|
/* jis2oth.c */
|
||
|
/* other: SJIS or EUC */
|
||
|
|
||
|
#include "intlpriv.h"
|
||
|
|
||
|
|
||
|
extern int MK_OUT_OF_MEMORY;
|
||
|
|
||
|
|
||
|
/* net_jis2other(obj, jisbuf, jisbufsz, uncvtbuf)
|
||
|
* Args:
|
||
|
* jisbuf: Ptr to a buf of JIS chars
|
||
|
* jisbufsz: Size in bytes of jisbuf
|
||
|
* jismode: Ptr to encoding mode, use as arg for next call to
|
||
|
* jis2other() for rest of current SJIS data. First call should
|
||
|
* initialize mode to ASCII (0).
|
||
|
* uncvtbuf: If entire buffer was converted, uncvtbuf[0] will be nul,
|
||
|
* else this points to SJIS chars that were NOT converted
|
||
|
* and jis2other() with additional SJIS chars appended.
|
||
|
* obj->cvtflag: Specifies converting to either EUC or SJIS.
|
||
|
* Return:
|
||
|
* Returns NULL on failure, otherwise it returns a pointer to a buffer of
|
||
|
* converted JIS characters. Caller must XP_FREE() this memory.
|
||
|
*
|
||
|
* Description:
|
||
|
*
|
||
|
* Allocate destination buffer (for SJIS or EUC).
|
||
|
*
|
||
|
* Set JIS mode state based upon ESC sequences. Also if NL or CR,
|
||
|
* mode is reset to JIS-Roman.
|
||
|
*
|
||
|
* If JIS mode is JIS x208 and converting to EUC, set 8th bits of next 2 bytes.
|
||
|
*
|
||
|
* If JIS mode is JIS x208-1983 and converting to SJIS, use the
|
||
|
* JIS to SJIS algorithm.
|
||
|
*
|
||
|
* If JIS mode is JIS x212 and converting to EUC, output SS3 and set 8th
|
||
|
* bits of next 2 bytes. (This mode only set when converting to EUC.)
|
||
|
*
|
||
|
* If JIS Half-width Katakana and converting to EUC, output SS2 followed
|
||
|
* by the 2 bytes w/8th bits set.
|
||
|
*
|
||
|
* If JIS Half-width Katakana and converting to SJIS, output the 2 bytes
|
||
|
* w/8th bits set.
|
||
|
*
|
||
|
* If any other JIS mode, then assume Latin1 and just copy the next byte.
|
||
|
*
|
||
|
* If either SJIS buffer does not contain complete JIS char or JIS buffer
|
||
|
* is full, then return unconverted SJIS to caller. Caller should
|
||
|
* append more data and recall jis2other.
|
||
|
*/
|
||
|
|
||
|
|
||
|
MODULE_PRIVATE unsigned char *
|
||
|
jis2other( CCCDataObject obj,
|
||
|
const unsigned char *jisbuf, /* JIS buffer for conversion*/
|
||
|
int32 jisbufsz) /* JIS buffer size in bytes */
|
||
|
{
|
||
|
unsigned char *tobuf = NULL;
|
||
|
int32 tobufsz;
|
||
|
unsigned char *tobufp, *jisp; /* current byte in bufs */
|
||
|
unsigned char *tobufep, *jisep; /* end of buffers */
|
||
|
int32 uncvtlen;
|
||
|
unsigned char *uncvtbuf = INTL_GetCCCUncvtbuf(obj);
|
||
|
|
||
|
#define sjisbuf tobuf
|
||
|
#define sjisbufsz tobufsz
|
||
|
#define sjisp tobufp
|
||
|
#define sjisep tobufep
|
||
|
|
||
|
#define eucbufsz tobufsz
|
||
|
#define eucbuf tobuf
|
||
|
#define eucp tobufp
|
||
|
#define eucep tobufep
|
||
|
/* Allocate a dest buffer: */
|
||
|
/* JIS is usually longer than SJIS or EUC because of ESC seq.
|
||
|
*
|
||
|
* In the worst case (all Roman), converted SJIS will be the same
|
||
|
* length as the original JIS + 1 for nul byte
|
||
|
*
|
||
|
* In the worst case ( <ESC> ( I <rest Half-width Kana>... ),
|
||
|
* converted EUC will be 2X - 2 the size of the original JIS + 1 for nul
|
||
|
* byte.
|
||
|
*/
|
||
|
uncvtlen = strlen((char *)uncvtbuf);
|
||
|
if (!INTL_GetCCCCvtflag(obj))
|
||
|
tobufsz = jisbufsz + uncvtlen + 1;
|
||
|
else
|
||
|
tobufsz = (jisbufsz + uncvtlen) << 1;
|
||
|
|
||
|
if (!tobufsz) {
|
||
|
return NULL;
|
||
|
}
|
||
|
|
||
|
if ((tobuf = (unsigned char *)XP_ALLOC(tobufsz)) == (unsigned char *)NULL) {
|
||
|
INTL_SetCCCRetval(obj, MK_OUT_OF_MEMORY);
|
||
|
return(NULL);
|
||
|
}
|
||
|
/* Initialize pointers, etc. */
|
||
|
jisp = (unsigned char *)jisbuf;
|
||
|
jisep = jisp + jisbufsz - 1;
|
||
|
|
||
|
#define uncvtp tobufp /* use tobufp as temp */
|
||
|
/* If prev. unconverted chars, append unconverted
|
||
|
* chars w/new chars and try to process.
|
||
|
*/
|
||
|
if (uncvtbuf[0] != '\0') {
|
||
|
uncvtp = uncvtbuf + uncvtlen;
|
||
|
while (uncvtp < (uncvtbuf + UNCVTBUF_SIZE) &&
|
||
|
jisp <= jisep)
|
||
|
*uncvtp++ = *jisp++;
|
||
|
*uncvtp = '\0'; /* nul terminate */
|
||
|
jisp = uncvtbuf; /* process unconverted first */
|
||
|
jisep = uncvtp - 1;
|
||
|
}
|
||
|
#undef uncvtp
|
||
|
|
||
|
tobufp = tobuf;
|
||
|
tobufep = tobufp + tobufsz - 2; /* save space for terminating null */
|
||
|
|
||
|
WHILELOOP:
|
||
|
/* While JIS data && space in SJIS buf. */
|
||
|
while ((tobufp <= tobufep) && (jisp <= jisep)) {
|
||
|
if (*jisp == ESC) {
|
||
|
if ((jisep - jisp) < 2) /* Incomplete ESC seq in JIS buf? */
|
||
|
break;
|
||
|
switch (jisp[1]) {
|
||
|
case '(':
|
||
|
switch (jisp[2]) {
|
||
|
case 'J': /* JIS X 0201-Roman */
|
||
|
case 'B': /* ASCII */
|
||
|
INTL_SetCCCJismode(obj, JIS_Roman);
|
||
|
jisp += 3; /* remove ESC seq. */
|
||
|
break;
|
||
|
case 'I': /* Half-width katakana */
|
||
|
INTL_SetCCCJismode(obj, JIS_HalfKana);
|
||
|
jisp += 3; /* remove ESC seq. */
|
||
|
break;
|
||
|
default: /* pass thru invalid ESC seq. */
|
||
|
*tobufp++ = *jisp++;
|
||
|
*tobufp++ = *jisp++;
|
||
|
}
|
||
|
break;
|
||
|
case DOLLAR:
|
||
|
switch (jisp[2]) {
|
||
|
case 'B': /* JIS X 0208-1983 */
|
||
|
case '@': /* JIS X 0208-1978 (old-JIS) */
|
||
|
INTL_SetCCCJismode(obj, JIS_208_83);
|
||
|
jisp += 3; /* remove rest of ESC seq. */
|
||
|
break;
|
||
|
case '(':
|
||
|
if ((jisep - jisp) < 3) /* Full ESC seq in buf? */
|
||
|
goto abortwhile;
|
||
|
switch (jisp[3]) {
|
||
|
case 'D': /* JIS X 0212-1990 */
|
||
|
if (!INTL_GetCCCCvtflag(obj)) /* No JIS212 in SJIS */
|
||
|
INTL_SetCCCJismode(obj, JIS_208_83);
|
||
|
else
|
||
|
INTL_SetCCCJismode(obj, JIS_212_90);
|
||
|
jisp += 4; /* remove rest of ESC seq. */
|
||
|
break;
|
||
|
default: /* pass thru invalid ESC seq. */
|
||
|
*tobufp++ = *jisp++;
|
||
|
*tobufp++ = *jisp++;
|
||
|
break;
|
||
|
}
|
||
|
break;
|
||
|
default: /* pass thru invalid ESC seq. */
|
||
|
*tobufp++ = *jisp++;
|
||
|
*tobufp++ = *jisp++;
|
||
|
}
|
||
|
break;
|
||
|
case '-':
|
||
|
switch (jisp[2]) {
|
||
|
case 'A': /* ISO8859-1 */
|
||
|
INTL_SetCCCJismode(obj, JIS_Roman);
|
||
|
jisp += 3; /* remove rest of ESC seq. */
|
||
|
break;
|
||
|
default: /* pass thru invalid ESC seq. */
|
||
|
*tobufp++ = *jisp++;
|
||
|
*tobufp++ = *jisp++;
|
||
|
}
|
||
|
break;
|
||
|
default: /* pass thru invalid ESC seq. */
|
||
|
*tobufp++ = *jisp++;
|
||
|
}
|
||
|
} else if (*jisp == NL || *jisp == CR) {
|
||
|
INTL_SetCCCJismode(obj, JIS_Roman);
|
||
|
*tobufp++ = *jisp++;
|
||
|
} else if (INTL_GetCCCJismode(obj) == JIS_208_83) {
|
||
|
if ((jisp+1) > jisep) /* Incomplete 2Byte char in JIS buf? */
|
||
|
break;
|
||
|
|
||
|
if (INTL_GetCCCCvtflag(obj)) { /* Convert JIS 208 to EUC */
|
||
|
*eucp++ = *jisp | 0x80;
|
||
|
jisp++;
|
||
|
*eucp++ = *jisp | 0x80;
|
||
|
jisp++;
|
||
|
|
||
|
} else { /* Convert JIS-208 to SJIS: Same as */
|
||
|
/* euc2sjis.c's EUC208-to-SJIS algorithm */
|
||
|
/* except JIS 8th bit is clear. */
|
||
|
if (*jisp < 0x5F) /* Convert 1st SJIS byte */
|
||
|
*sjisp++ = ((*jisp + 1) >> 1) + 0x70;
|
||
|
else
|
||
|
*sjisp++ = ((*jisp + 1) >> 1) + 0xB0;
|
||
|
/* Convert 2nd SJIS byte */
|
||
|
|
||
|
if ((*jisp++) & 1) { /* if 1st JIS byte is odd */
|
||
|
if (*jisp > 0x5F)
|
||
|
*sjisp = *jisp + 0x20;
|
||
|
else
|
||
|
*sjisp = *jisp + 0x1F;
|
||
|
} else {
|
||
|
*sjisp = *jisp + 0x7E;
|
||
|
}
|
||
|
sjisp++;
|
||
|
jisp++;
|
||
|
}
|
||
|
} else if (INTL_GetCCCJismode(obj) == JIS_212_90) {
|
||
|
/* only "to EUC" supports 212 */
|
||
|
if ((jisp+1) > jisep) /* Incomplete 2Byte char in JIS buf? */
|
||
|
break;
|
||
|
*eucp++ = SS3;
|
||
|
*eucp++ = *jisp | 0x80;
|
||
|
jisp++;
|
||
|
*eucp++ = *jisp | 0x80;
|
||
|
jisp++;
|
||
|
} else if (INTL_GetCCCJismode(obj) == JIS_HalfKana) {
|
||
|
if (INTL_GetCCCCvtflag(obj)) {
|
||
|
*eucp++ = SS2;
|
||
|
}
|
||
|
*tobufp++ = *jisp | 0x80; /* Set 8th bit for EUC & SJIS */
|
||
|
jisp++;
|
||
|
} else {
|
||
|
/* Unknown type: no conversion */
|
||
|
*tobufp++ = *jisp++;
|
||
|
}
|
||
|
}
|
||
|
abortwhile:
|
||
|
|
||
|
if (uncvtbuf[0] != '\0') {
|
||
|
/* Just processed unconverted chars:
|
||
|
* jisp pts to 1st unprocessed char in
|
||
|
* jisbuf. Some may have been processed
|
||
|
* while processing unconverted chars,
|
||
|
* so set up ptrs not to process them
|
||
|
* twice.
|
||
|
*/
|
||
|
/* If nothing was converted, this can
|
||
|
* only happen if there was not
|
||
|
* enough JIS data. Stop and get
|
||
|
* more data.
|
||
|
*/
|
||
|
if (jisp == uncvtbuf) { /* Nothing converted */
|
||
|
*tobufp = '\0';
|
||
|
return(NULL);
|
||
|
}
|
||
|
jisep = (unsigned char *)jisbuf + jisbufsz - 1 ;
|
||
|
jisp = (unsigned char *)jisbuf + (jisp - uncvtbuf - uncvtlen);
|
||
|
uncvtbuf[0] = '\0'; /* No more uncoverted chars. */
|
||
|
goto WHILELOOP; /* Process new data */
|
||
|
}
|
||
|
|
||
|
*tobufp = '\0'; /* null terminate dest. data */
|
||
|
INTL_SetCCCLen(obj, tobufp - tobuf); /* length not counting null */
|
||
|
|
||
|
if (jisp <= jisep) { /* uncoverted JIS? */
|
||
|
tobufp = uncvtbuf; /* reuse the tobufp as a TEMP */
|
||
|
while (jisp <= jisep)
|
||
|
*tobufp++ = *jisp++;
|
||
|
*tobufp = '\0'; /* null terminate */
|
||
|
}
|
||
|
return(tobuf);
|
||
|
}
|
||
|
|