gecko-dev/lib/libi18n/jis2oth.c

/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*-
 *
 * The contents of this file are subject to the Netscape Public License
 * Version 1.0 (the "NPL"); you may not use this file except in
 * compliance with the NPL.  You may obtain a copy of the NPL at
 * http://www.mozilla.org/NPL/
 *
 * Software distributed under the NPL is distributed on an "AS IS" basis,
 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
 * for the specific language governing rights and limitations under the
 * NPL.
 *
 * The Initial Developer of this code under the NPL is Netscape
 * Communications Corporation.  Portions created by Netscape are
 * Copyright (C) 1998 Netscape Communications Corporation.  All Rights
 * Reserved.
 */
/*	jis2oth.c	*/
/*	other: SJIS or EUC */

#include "intlpriv.h"


extern int MK_OUT_OF_MEMORY;


/* net_jis2other(obj, jisbuf, jisbufsz, uncvtbuf)
 * Args:
 *	jisbuf:		Ptr to a buf of JIS chars
 *	jisbufsz:	Size in bytes of jisbuf
 *	jismode:	Ptr to encoding mode, use as arg for next call to
 *		jis2other() for rest of current SJIS data.  First call should
 *		initialize mode to ASCII (0).
 *	uncvtbuf:	If entire buffer was converted, uncvtbuf[0] will be nul,
 *		else this points to SJIS chars that were NOT converted
 *		and jis2other() with additional SJIS chars appended.
 *	obj->cvtflag:	Specifies converting to either EUC or SJIS.
 * Return:
 *	Returns NULL on failure, otherwise it returns a pointer to a buffer of
 *	converted JIS characters.  Caller must XP_FREE() this memory.
 *
 * Description:
 *
 *	Allocate destination buffer (for SJIS or EUC).
 *
 *	Set JIS mode state based upon ESC sequences.  Also if NL or CR,
 *	mode is reset to JIS-Roman.
 *
 *  If JIS mode is JIS x208 and converting to EUC, set 8th bits of next 2 bytes.
 *
 *  If JIS mode is JIS x208-1983 and converting to SJIS, use the
 *	JIS to SJIS algorithm.
 *
 *  If JIS mode is JIS x212 and converting to EUC, output SS3 and set 8th
 *	bits of next 2 bytes.  (This mode only set when converting to EUC.)
 *
 *	If JIS Half-width Katakana and converting to EUC, output SS2 followed
 *	by the 2 bytes w/8th bits set.
 *
 *	If JIS Half-width Katakana and converting to SJIS, output the 2 bytes
 *	w/8th bits set.
 *
 *	If any other JIS mode, then assume Latin1 and just copy the next byte.
 *
 *	If either SJIS buffer does not contain complete JIS char or JIS buffer
 *	is full, then return unconverted SJIS to caller.  Caller should
 *	append more data and recall jis2other.
 */


MODULE_PRIVATE unsigned char *
jis2other(	CCCDataObject		obj,
			const unsigned char	*jisbuf,	/* JIS buffer for conversion*/
			int32				jisbufsz)	/* JIS buffer size in bytes	*/
{
 	unsigned char			*tobuf = NULL;
 	int32					tobufsz;
 	unsigned char	*tobufp, *jisp;		/* current byte in bufs	*/
 	unsigned char	*tobufep, *jisep;	/* end of buffers		*/
 	int32					uncvtlen;
	unsigned char *uncvtbuf = INTL_GetCCCUncvtbuf(obj);

#define sjisbuf		tobuf
#define sjisbufsz	tobufsz
#define sjisp		tobufp
#define sjisep		tobufep

#define eucbufsz	tobufsz
#define eucbuf		tobuf
#define eucp		tobufp
#define eucep		tobufep
 										/* Allocate a dest buffer:		*/
		/* JIS is usually longer than SJIS or EUC because of ESC seq.
		 *
		 * In the worst case (all Roman), converted SJIS will be the same
		 * length as the original JIS + 1 for nul byte
		 *
		 * In the worst case ( <ESC> ( I <rest Half-width Kana>... ),
		 * converted EUC will be 2X - 2 the size of the original JIS + 1 for nul
		 * byte.
		 */
	uncvtlen = strlen((char *)uncvtbuf);
	if (!INTL_GetCCCCvtflag(obj))
		tobufsz = jisbufsz + uncvtlen + 1;
	else
		tobufsz = (jisbufsz + uncvtlen) << 1;

	if (!tobufsz) {
		return NULL;
	}

	if ((tobuf = (unsigned char *)XP_ALLOC(tobufsz)) == (unsigned char *)NULL) {
		INTL_SetCCCRetval(obj, MK_OUT_OF_MEMORY);
		return(NULL);
	}
										/* Initialize pointers, etc.	*/
 	jisp = (unsigned char *)jisbuf;
 	jisep = jisp + jisbufsz - 1;

#define uncvtp	tobufp	/* use tobufp as temp */
							/* If prev. unconverted chars, append unconverted
							 * chars w/new chars and try to process.
							 */
 	if (uncvtbuf[0] != '\0') {
 		uncvtp = uncvtbuf + uncvtlen;
 		while (uncvtp < (uncvtbuf + UNCVTBUF_SIZE) &&
														jisp <= jisep)
 			*uncvtp++ = *jisp++;
 		*uncvtp = '\0';						/* nul terminate	*/
 		jisp = uncvtbuf;				/* process unconverted first */
 		jisep = uncvtp - 1;
 	}
#undef uncvtp

 	tobufp = tobuf;
 	tobufep = tobufp + tobufsz - 2;		/* save space for terminating null */

WHILELOOP:
									/* While JIS data && space in SJIS buf. */
 	while ((tobufp <= tobufep) && (jisp <= jisep)) {
		if (*jisp == ESC) {
 			if ((jisep - jisp) < 2)	/* Incomplete ESC seq in JIS buf?		*/
 				break;
			switch (jisp[1]) {
				case '(':
					switch (jisp[2]) {
						case 'J':				/* JIS X 0201-Roman			*/
						case 'B':				/* ASCII	*/
							INTL_SetCCCJismode(obj, JIS_Roman);
							jisp += 3;			/* remove ESC seq.	*/
							break;
						case 'I':				/* Half-width katakana	*/
							INTL_SetCCCJismode(obj, JIS_HalfKana);
							jisp += 3;			/* remove ESC seq.	*/
							break;
						default:				/* pass thru invalid ESC seq. */
							*tobufp++ = *jisp++;
							*tobufp++ = *jisp++;
					}
					break;
				case DOLLAR:
					switch (jisp[2]) {
				   		case 'B':				/* JIS X 0208-1983	*/
						case '@':				/* JIS X 0208-1978 (old-JIS) */
							INTL_SetCCCJismode(obj, JIS_208_83);
							jisp += 3;			/* remove rest of ESC seq.	*/
							break;
						case '(':
 							if ((jisep - jisp) < 3)	/* Full ESC seq in buf? */
 								goto abortwhile;
							switch (jisp[3]) {
								case 'D':			/* JIS X 0212-1990	*/
									if (!INTL_GetCCCCvtflag(obj))	/* No JIS212 in SJIS */
										INTL_SetCCCJismode(obj, JIS_208_83);
									else
										INTL_SetCCCJismode(obj, JIS_212_90);
									jisp += 4;	/* remove rest of ESC seq.	*/
									break;
								default:		/* pass thru invalid ESC seq. */
									*tobufp++ = *jisp++;
									*tobufp++ = *jisp++;
									break;
							}
							break;
						default:				/* pass thru invalid ESC seq. */
							*tobufp++ = *jisp++;
							*tobufp++ = *jisp++;
					}
					break;
				case '-':
					switch (jisp[2]) {
						case 'A':				/* ISO8859-1			*/
							INTL_SetCCCJismode(obj, JIS_Roman);
							jisp += 3;			/* remove rest of ESC seq.	*/
							break;
						default:				/* pass thru invalid ESC seq. */
							*tobufp++ = *jisp++;
							*tobufp++ = *jisp++;
					}
					break;
				default:						/* pass thru invalid ESC seq. */
					*tobufp++ = *jisp++;
			}
		} else if (*jisp == NL || *jisp == CR) {
			INTL_SetCCCJismode(obj, JIS_Roman);
			*tobufp++ = *jisp++;
		} else if (INTL_GetCCCJismode(obj) == JIS_208_83) {
 			if ((jisp+1) > jisep)		/* Incomplete 2Byte char in JIS buf? */
 				break;

			if (INTL_GetCCCCvtflag(obj)) {				/* Convert JIS 208 to EUC		*/
				*eucp++ = *jisp | 0x80;
				jisp++;
				*eucp++ = *jisp | 0x80;
				jisp++;

			} else {				/* Convert JIS-208 to SJIS: 	Same as	*/
									/* euc2sjis.c's EUC208-to-SJIS algorithm */
									/* except JIS 8th bit is clear. */
				if (*jisp < 0x5F)			/* Convert 1st SJIS byte	*/
					*sjisp++ = ((*jisp + 1) >> 1) + 0x70;
				else
					*sjisp++ = ((*jisp + 1) >> 1) + 0xB0;
											/* Convert 2nd SJIS byte	*/

				if ((*jisp++) & 1) {		/* if 1st JIS byte is odd	*/
					if (*jisp > 0x5F)
						*sjisp = *jisp + 0x20;
					else
						*sjisp = *jisp + 0x1F;
				} else {
					*sjisp = *jisp + 0x7E;
				}
				sjisp++;
				jisp++;
			}
		} else if (INTL_GetCCCJismode(obj) == JIS_212_90) {
										/* only "to EUC" supports 212	*/
 			if ((jisp+1) > jisep)		/* Incomplete 2Byte char in JIS buf? */
 				break;
			*eucp++ = SS3;
			*eucp++ = *jisp | 0x80;
			jisp++;
			*eucp++ = *jisp | 0x80;
			jisp++;
		} else if (INTL_GetCCCJismode(obj) == JIS_HalfKana) {
				if (INTL_GetCCCCvtflag(obj)) {
					*eucp++ = SS2;
				}
				*tobufp++ = *jisp | 0x80;		/* Set 8th bit for EUC & SJIS */
				jisp++;
		} else {
											/* Unknown type: no conversion	*/
			*tobufp++ = *jisp++;
		}
	}
abortwhile:

 	if (uncvtbuf[0] != '\0') {
										/* Just processed unconverted chars:
										 * jisp pts to 1st unprocessed char in
 										 * jisbuf.  Some may have been processed
 										 * while processing unconverted chars,
 										 * so set up ptrs not to process them
 										 * twice.
 										 */
										/* If nothing was converted, this can
										 * only happen if there was not
										 * enough JIS data.  Stop and get
										 * more data.
										 */
		if (jisp == uncvtbuf) {	/* Nothing converted */
			*tobufp = '\0';
			return(NULL);
		}
 		jisep = (unsigned char *)jisbuf + jisbufsz - 1 ;
 		jisp = (unsigned char *)jisbuf + (jisp - uncvtbuf - uncvtlen);
 		uncvtbuf[0] = '\0';			/* No more uncoverted chars.	*/
 		goto WHILELOOP;					/* Process new data				*/
 	}

	*tobufp = '\0';						/* null terminate dest. data */
	INTL_SetCCCLen(obj,  tobufp - tobuf);			/* length not counting null	*/

 	if (jisp <= jisep) {				/* uncoverted JIS?		*/
		tobufp = uncvtbuf;				/* reuse the tobufp as a TEMP */
 		while (jisp <= jisep)
 			*tobufp++ = *jisp++;
 		*tobufp = '\0';					/* null terminate		*/
 	}
	return(tobuf);
}