gecko-dev/lib/libi18n/csstrlen.c

/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*-
 *
 * The contents of this file are subject to the Netscape Public License
 * Version 1.0 (the "NPL"); you may not use this file except in
 * compliance with the NPL.  You may obtain a copy of the NPL at
 * http://www.mozilla.org/NPL/
 *
 * Software distributed under the NPL is distributed on an "AS IS" basis,
 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
 * for the specific language governing rights and limitations under the
 * NPL.
 *
 * The Initial Developer of this code under the NPL is Netscape
 * Communications Corporation.  Portions created by Netscape are
 * Copyright (C) 1998 Netscape Communications Corporation.  All Rights
 * Reserved.
 */
/*	csstrlen.c	*/
/*
	Routines that tell you information about one csid
*/
#include "intlpriv.h"

/*	csinfoindex and csinfo_tbl work together for performance inprovement.
	Whenever you add an entry inside csinfo_tbl, you also need to change
	csinfoindex
*/
#define MAX_FIRSTBYTE_RANGE  3
typedef struct {
    struct {
        unsigned char bytes;       /* number of bytes for range */
        unsigned char columns;     /* number of columns for range */
        unsigned char range[2];    /* Multibyte first byte range */
    } enc[MAX_FIRSTBYTE_RANGE];
} csinfo_t;

PRIVATE csinfo_t csinfo_tbl[] =
{
/* b = bytes; c = columns                                               */
/*				b c  range 1       b c  range 2       b c  range 3     */
	/*  0 */ {{{2,2,{0x81,0x9f}}, {2,2,{0xe0,0xfc}}, {0,0,{0x00,0x00}}}},	/* For SJIS */
	/*  1 */ {{{2,2,{0xa1,0xfe}}, {2,1,{0x8e,0x8e}}, {3,2,{0x8f,0x8f}}}},	/* For EUC_JP */
	/*  2 */ {{{2,2,{0xa1,0xfe}}, {0,0,{0x00,0x00}}, {0,0,{0x00,0x00}}}},	/* For BIG5 GB KSC */
	/*  3 */ {{{2,2,{0xa1,0xfe}}, {4,2,{0x8e,0x8e}}, {0,0,{0x00,0x00}}}},	/* For CNS_8BIT */
	/*  4 */ {{{2,2,{0x21,0x7e}}, {0,0,{0x00,0x00}}, {0,0,{0x00,0x00}}}},	/* For 2 Byte GL */
	/*  5 */ {{{2,2,{0xC0,0xDF}}, {3,2,{0xE0,0xEF}}, {0,0,{0x00,0x00}}}},	/* For UTF8 */
	/*  6 */ {{{2,1,{0xC0,0xCF}}, {0,0,{0x00,0x00}}, {0,0,{0x00,0x00}}}},	/* For UTF8 */
	/*  0 */ {{{0,0,{0x00,0x00}}, {0,0,{0x00,0x00}}, {0,0,{0x00,0x00}}}}
};
/*	Array to index from the lower 8 bits of csid into the index of csinfo_tbl */
PRIVATE int csinfoindex[256] =
{/*	0	1	2	3	4	5	6	7	8	9	a	b	c	d	e	f	*/
	-1,	-1,	-1,	-1,	0,	1,	-1,	2,	2,	3,	-1,	-1,	2,	-1,	-1,	-1,	/*	0x00 */
	-1,	-1,	-1,	-1,	-1,	-1,	-1,	-1,	4,	4,	4,	4,	-1,	4,	4,	-1,	/*	0x10 */
	-1,	-1,	5,	-1,	-1,	-1,	-1,	-1,	-1,	-1,	-1,	-1,	-1,	-1,	-1,	-1,	/*	0x20 */
	-1,	-1,	-1,	-1,	-1,	-1,	-1,	-1,	-1,	-1,	-1,	-1,	-1,	-1,	-1,	-1,	/*	0x30 */
	-1,	-1,	-1,	-1,	-1,	-1,	-1,	-1,	-1,	-1,	-1,	-1,	-1,	-1,	-1,	-1,	/*	0x40 */
	-1,	-1,	-1,	-1,	-1,	-1,	-1,	-1,	-1,	-1,	-1,	-1,	-1,	-1,	-1,	-1,	/*	0x50 */
	-1,	-1,	-1,	-1,	-1,	-1,	-1,	-1,	-1,	-1,	-1,	-1,	-1,	-1,	-1,	-1,	/*	0x60 */
	-1,	-1,	-1,	-1,	-1,	-1,	-1,	-1,	-1,	-1,	-1,	-1,	-1,	-1,	-1,	-1,	/*	0x70 */
	-1,	-1,	-1,	-1,	-1,	-1,	-1,	-1,	-1,	-1,	-1,	-1,	-1,	-1,	-1,	-1,	/*	0x80 */
	-1,	-1,	-1,	-1,	-1,	-1,	-1,	-1,	-1,	-1,	-1,	-1,	-1,	-1,	-1,	-1,	/*	0x90 */
	-1,	-1,	-1,	-1,	-1,	-1,	-1,	-1,	-1,	-1,	-1,	-1,	-1,	-1,	-1,	-1,	/*	0xa0 */
	-1,	-1,	-1,	-1,	-1,	-1,	-1,	-1,	-1,	-1,	-1,	-1,	-1,	-1,	-1,	-1,	/*	0xb0 */
	 6,	-1,	-1,	-1,	-1,	-1,	-1,	-1,	-1,	-1,	-1,	-1,	-1,	-1,	-1,	-1,	/*	0xc0 */
	-1,	-1,	-1,	-1,	-1,	-1,	-1,	-1,	-1,	-1,	-1,	-1,	-1,	-1,	-1,	-1,	/*	0xd0 */
	-1,	-1,	-1,	-1,	-1,	-1,	-1,	-1,	-1,	-1,	-1,	-1,	-1,	-1,	-1,	-1,	/*	0xe0 */
	-1,	-1,	-1,	-1,	-1,	-1,	-1,	-1,	-1,	-1,	-1,	-1,	-1,	-1,	-1,	-1,	/*	0xf0 */
};
#define INTL_GETTBLINDEX(csid)	(csinfoindex[ (csid) & 0x00FF ])

PRIVATE csinfo_t* intl_GetInfoTbl(int16 csid)
{
	int idx = INTL_GETTBLINDEX(csid);
	if(idx < 0)
		return NULL;
	else
		return &csinfo_tbl[idx];
}

/***********************************************************
 INTL_MidTruncateString  truncate a string removing the
						 middle
 Input:  int16 csid  Char Set ID
         char *input un-truncated string

 Output: char *output pointer to truncated string buffer

***********************************************************/

PUBLIC void
INTL_MidTruncateString (int16 csid, const char *input, char *output, int max_length)
{
  char *begin_part, *p;
  int L = strlen (input);
  char *tmp = 0;
  int begin_len, mid, rem;

  /*
   * If it fits then no need to truncate
   */
  if (L <= max_length)
    {
      strcpy (output, input);
      return;
    }

  if (input == output) /* if copying in place use tmp buf */
    {
      tmp = output;
      output = (char *) calloc (1, max_length + 1);
    }

  /*
   * find the 1st half
   */
  mid = (max_length - 3) / 2; /* approx 1st half */
  /* find 1st half to whole char */
  for (begin_part=p=(char*)input;
		*p && p<=((char*)input+mid); p=INTL_NextChar(csid, p))
    begin_part = p; /* remember last good point before mid */
  /* exact mid point */
  begin_len = begin_part - input;

  /*
   * Copy 1st half
   */
  strncpy (output, input, begin_len);
  strncpy (output + begin_len, "...", 3);

  /*
   * find the remainder
   */
  rem = L - mid; /* approx remainder */
  /* find remainder to whole char */
  for (p=begin_part; *p && p<((char*)input+rem); p=INTL_NextChar(csid, p))
    continue;
  /* exact remainder */
  rem = p - input;
  strncpy (output + begin_len + 3, p, L - rem + 1);

  if (tmp)
    {
      strncpy (tmp, output, max_length + 1);
      free (output);
    }
}
/***********************************************************
 Input:  int (int16) charsetid		Char Set ID
         char *pstr				   Buffer which always point to Multibyte char first byte
                      			   or normal single byte char
 Output: return next char position
***********************************************************/
PUBLIC char * INTL_NextChar(int charsetid, char *pstr)
{
	csinfo_t	*pInfo ;
	unsigned char ch ;
	int i;

	if ((INTL_CharSetType(charsetid) == SINGLEBYTE) || (*pstr == 0)) /* If no csid, assume it's not multibyte */
		return pstr + 1;

	ch = *pstr ;
	if((pInfo = intl_GetInfoTbl((int16)charsetid)) != NULL)
	{
		for (i=0; i<MAX_FIRSTBYTE_RANGE && pInfo->enc[i].bytes > 0; i++)
		{
			if ((ch >= pInfo->enc[i].range[0]) && (ch <= pInfo->enc[i].range[1]))
			{
				int j = 0;
				for (j=0; pstr[j] && j < pInfo->enc[i].bytes; j++)
					;
				if (j < pInfo->enc[i].bytes)
					return pstr+1;
				else
					return pstr+j;
			}
		}
		return pstr + 1;
	}
	return pstr + 1;
}

/********************************************************
 Input:  DocumentContext context Window Context
         unsigned char ch		 Buffer which always point to Multibyte char
								 first byte or normal single byte char
 Output: 1, if ch is under ShiftJIS type MultiByte first byte range
         2, if ch is under EUC type MultiByte first byte range
		 0, if it's not MultiByte firstbyte
*********************************************************/

PUBLIC
int PR_CALLBACK
INTL_IsLeadByte(int charsetid, unsigned char ch)
{
	csinfo_t	*pInfo ;
	int i;

	if ((INTL_CharSetType(charsetid) == SINGLEBYTE) || (ch == 0)) /* If no csid, assume it's not multibyte */
		return 0;

	if((pInfo = intl_GetInfoTbl((int16)charsetid)) != NULL)
	{
		for (i=0; i<MAX_FIRSTBYTE_RANGE && pInfo->enc[i].bytes > 0; i++)
			if ((ch >= pInfo->enc[i].range[0]) &&
				(ch <= pInfo->enc[i].range[1]))
				return pInfo->enc[i].bytes-1;
		return 0 ;
	}
	return 0;
}

PUBLIC int
INTL_CharLen(int charsetid, unsigned char *pstr)
{
	int i,l;
	if ((!pstr) || (!*pstr)) return 0;
	l = 1 + INTL_IsLeadByte(charsetid, *pstr);
	for(i=1, pstr++ ; (i<l) && (*pstr); i++, pstr++)
		;
	return i;
}

PUBLIC int
INTL_ColumnWidth(int charsetid, unsigned char *str)
{
	unsigned char	b;
	csinfo_t	*pInfo;
	int		i;

	if ((!str) || (!*str))
		return 0;

	if (INTL_CharSetType(charsetid) == SINGLEBYTE)
		return 1;
	if((pInfo = intl_GetInfoTbl((int16)charsetid)) != NULL)
	{
		b = *str;
		for (i = 0; (i < MAX_FIRSTBYTE_RANGE) &&
			pInfo->enc[i].bytes; i++)
		{
			if ((b >= pInfo->enc[i].range[0]) &&
				(b <= pInfo->enc[i].range[1]))
			{
				return pInfo->enc[i].columns;
			}
		}
	}

	return 1;
}

/********************************************************
 Input:  int (int16) charsetid	Char Set ID
         char *pstr				Buffer which always point to Multibyte char
								first byte or normal single byte char
		 int  pos				byte position
 Output: 0,   if pos is not on kanji char
 		 1,   if pos is on kanji 1st byte
	     2,   if pos is on kanji 2nd byte
		 3,   if pos is on kanji 3rd byte
 Note:   Current this one only works for ShiftJis type multibyte not for JIS or EUC
*********************************************************/
PUBLIC int
INTL_NthByteOfChar(int charsetid, char *pstr, int pos)
{
	int	i;
	int	prev;

	pos--;

	if
	(
		(INTL_CharSetType(charsetid) == SINGLEBYTE) ||
		(!pstr)  ||
		(!*pstr) ||
		(pos < 0)
	)
	{
		return 0;
	}

	i = 0;
	prev = 0;
	while (pstr[i] && (i <= pos))
	{
		prev = i;
		i += INTL_CharLen(charsetid, (unsigned char *) &pstr[i]);
	}
	if (i <= pos)
	{
		return 0;
	}
	if (INTL_CharLen(charsetid, (unsigned char *) &pstr[prev]) < 2)
	{
		return 0;
	}

	return pos - prev + 1;
}

PUBLIC int
INTL_IsHalfWidth(uint16 win_csid, unsigned char *pstr)
{
	int	c;

	c = *pstr;

	switch (win_csid)
	{
	case CS_SJIS:
		if ((0xa1 <= c) && (c <= 0xdf))
		{
			return 1;
		}
		break;
	case CS_EUCJP:
		if (c == 0x8e)
		{
			return 1;
		}
		break;
	default:
		break;
	}

	return 0;
}


/*
	INTL_NextCharIdxInText
		Input: 	csid - window csid
				text - point to a text buffer
				pos  - origional index position
		output: index of the position of next character
		Called by lo_next_character in layfind.c
*/
PUBLIC int INTL_NextCharIdxInText(int16 csid, unsigned char *text, int pos)
{
	return pos + INTL_CharLen(csid ,text+pos);
}
/*
	INTL_PrevCharIdxInText
		Input: 	csid - window csid
				text - point to a text buffer
				pos  - origional index position
		output: index of the position of previous character
		Called by lo_next_character in layfind.c
*/
PUBLIC int INTL_PrevCharIdxInText(int16 csid, unsigned char *text, int pos)
{
	int rev, ff , thislen;
	if((INTL_CharSetType(csid) == SINGLEBYTE) ) {
		return pos - 1;
	}
	else
	{
		/*	First, backward to character in ASCII range */
		for(rev=pos - 1; rev > 0 ; rev--)
		{
			if(((text[rev] & 0x80 ) == 0) &&
			   ((rev + INTL_CharLen(csid ,text+rev)) < pos))
				break;
		}

		/*	Then forward till we cross the position. */
		for(ff = rev ; ff < pos ; ff += thislen)
		{
			thislen = INTL_CharLen(csid ,text+ff);
			if((ff + thislen) >= pos)
				break;
		}
		return ff;
	}
}

/*
	INTL_NextCharIdx
		Input: 	csid - window csid
				text - point to a text buffer
				pos  - 0 based position
		output: 0 based next char position
		Note: this one works for any position no matter it's legal or not
*/

PUBLIC int INTL_NextCharIdx(int16 csid, unsigned char *str, int pos)
{
	int n;
	unsigned char *p;

	if((INTL_CharSetType(csid) == SINGLEBYTE) || (pos < 0))
		return pos + 1;

	n = INTL_NthByteOfChar(csid, (char *) str, pos+1);
	if (n == 0)
		return pos + 1;

	p = str + pos - n + 1;
	return pos + INTL_CharLen(csid, p) - n + 1;
}
/*
	INTL_PrevCharIdx
		Input: 	csid - window csid
				text - point to a text buffer
				pos  - 0 based position
		output: 0 based prev char position
		Note: this one works for any position no matter it's legal or not
*/
PUBLIC int INTL_PrevCharIdx(int16 csid, unsigned char *str, int pos)
{
	int n;
	if((INTL_CharSetType(csid) == SINGLEBYTE) || (pos <= 0))
		return pos - 1;
#ifdef DEBUG
	n = INTL_NthByteOfChar(csid, (char *) str, pos+1);
	if (n > 1)
	{
		XP_TRACE(("Wrong position passed to INTL_PrevCharIdx"));
		pos -= (n - 1);
	}
#endif

	pos --;
	if ((n = INTL_NthByteOfChar(csid, (char *) str, pos+1)) > 1)
		return pos - n + 1;
	else
		return pos;
}


PUBLIC
int32  INTL_TextByteCountToCharLen(int16 csid, unsigned char* text, uint32 byteCount)
{
	/* quickly return if it is zero */
	if(byteCount == 0 )
		return 0;
	if(INTL_CharSetType(csid) == SINGLEBYTE)
	{
		/* for single byte csid, byteCount equal to charLen */
		return byteCount;
	}
	else
	{
		csinfo_t	*pInfo ;
		if((pInfo = intl_GetInfoTbl(csid)) != NULL)
		{
			uint32 curByte, curChar;
			int thislen;
			for(curByte=curChar=0; curByte < byteCount ;curChar++,curByte += thislen)
			{
				int i;
				unsigned char ch = text[curByte];
				/* preset thislen to 1 and looking for the entry for this char */
				for (i=0, thislen = 1; i<MAX_FIRSTBYTE_RANGE && pInfo->enc[i].bytes > 0; i++)
				{
					if ((ch >= pInfo->enc[i].range[0]) && (ch <= pInfo->enc[i].range[1]))
						thislen = pInfo->enc[i].bytes;
				}
			}
			return curChar;
		}
	}
	/* it should not come to here */
	XP_ASSERT(byteCount);
	return byteCount;
}


PUBLIC
int32  INTL_TextCharLenToByteCount(int16 csid, unsigned char* text, uint32 charLen)
{
	/* quickly return if it is zero */
	if(charLen == 0 )
		return 0;
	if(INTL_CharSetType(csid) == SINGLEBYTE)
	{
		/* for single byte csid, byteCount equal to charLen */
		return charLen;
	}
	else
	{
		csinfo_t	*pInfo ;
		if((pInfo = intl_GetInfoTbl(csid)) != NULL)
		{
			uint32 curByte, curChar;
			int thislen;
			for(curByte=curChar=0; curChar < charLen ;curChar++,curByte += thislen)
			{
				int i;
				unsigned char ch = text[curByte];
				/* preset thislen to 1 and looking for the entry for this char */
				for (i=0, thislen = 1; i<MAX_FIRSTBYTE_RANGE && pInfo->enc[i].bytes > 0; i++)
				{
					if ((ch >= pInfo->enc[i].range[0]) && (ch <= pInfo->enc[i].range[1]))
						thislen = pInfo->enc[i].bytes;
				}
			}
			return curByte;
		}
	}
	/* it should not come to here */
	XP_ASSERT(charLen);
	return charLen;
}