mirror of
https://github.com/mozilla/gecko-dev.git
synced 2024-11-12 14:37:50 +00:00
70553064dd
--HG-- rename : intl/uconv/src/8859-1.uf => intl/uconv/8859-1.uf rename : intl/uconv/src/8859-1.ut => intl/uconv/8859-1.ut rename : intl/uconv/src/cp1252.uf => intl/uconv/cp1252.uf rename : intl/uconv/src/cp1252.ut => intl/uconv/cp1252.ut rename : intl/uconv/src/macroman.uf => intl/uconv/macroman.uf rename : intl/uconv/src/macroman.ut => intl/uconv/macroman.ut rename : intl/uconv/src/nsCP1252ToUnicode.cpp => intl/uconv/nsCP1252ToUnicode.cpp rename : intl/uconv/src/nsCP1252ToUnicode.h => intl/uconv/nsCP1252ToUnicode.h rename : intl/uconv/src/nsConverterInputStream.cpp => intl/uconv/nsConverterInputStream.cpp rename : intl/uconv/src/nsConverterInputStream.h => intl/uconv/nsConverterInputStream.h rename : intl/uconv/src/nsConverterOutputStream.cpp => intl/uconv/nsConverterOutputStream.cpp rename : intl/uconv/src/nsConverterOutputStream.h => intl/uconv/nsConverterOutputStream.h rename : intl/uconv/src/nsISO88591ToUnicode.cpp => intl/uconv/nsISO88591ToUnicode.cpp rename : intl/uconv/src/nsISO88591ToUnicode.h => intl/uconv/nsISO88591ToUnicode.h rename : intl/uconv/src/nsMacRomanToUnicode.cpp => intl/uconv/nsMacRomanToUnicode.cpp rename : intl/uconv/src/nsMacRomanToUnicode.h => intl/uconv/nsMacRomanToUnicode.h rename : intl/uconv/src/nsReplacementToUnicode.cpp => intl/uconv/nsReplacementToUnicode.cpp rename : intl/uconv/src/nsReplacementToUnicode.h => intl/uconv/nsReplacementToUnicode.h rename : intl/uconv/src/nsScriptableUConv.cpp => intl/uconv/nsScriptableUConv.cpp rename : intl/uconv/src/nsScriptableUConv.h => intl/uconv/nsScriptableUConv.h rename : intl/uconv/src/nsTextToSubURI.cpp => intl/uconv/nsTextToSubURI.cpp rename : intl/uconv/src/nsTextToSubURI.h => intl/uconv/nsTextToSubURI.h rename : intl/uconv/src/nsUConvModule.cpp => intl/uconv/nsUConvModule.cpp rename : intl/uconv/src/nsUTF8ConverterService.cpp => intl/uconv/nsUTF8ConverterService.cpp rename : intl/uconv/src/nsUTF8ConverterService.h => intl/uconv/nsUTF8ConverterService.h rename : intl/uconv/src/nsUTF8ToUnicode.cpp => intl/uconv/nsUTF8ToUnicode.cpp rename : intl/uconv/src/nsUTF8ToUnicode.h => intl/uconv/nsUTF8ToUnicode.h rename : intl/uconv/src/nsUTF8ToUnicodeSSE2.cpp => intl/uconv/nsUTF8ToUnicodeSSE2.cpp rename : intl/uconv/src/nsUnicodeToCP1252.cpp => intl/uconv/nsUnicodeToCP1252.cpp rename : intl/uconv/src/nsUnicodeToCP1252.h => intl/uconv/nsUnicodeToCP1252.h rename : intl/uconv/src/nsUnicodeToISO88591.cpp => intl/uconv/nsUnicodeToISO88591.cpp rename : intl/uconv/src/nsUnicodeToISO88591.h => intl/uconv/nsUnicodeToISO88591.h rename : intl/uconv/src/nsUnicodeToMacRoman.cpp => intl/uconv/nsUnicodeToMacRoman.cpp rename : intl/uconv/src/nsUnicodeToMacRoman.h => intl/uconv/nsUnicodeToMacRoman.h rename : intl/uconv/src/nsUnicodeToUTF8.cpp => intl/uconv/nsUnicodeToUTF8.cpp rename : intl/uconv/src/nsUnicodeToUTF8.h => intl/uconv/nsUnicodeToUTF8.h
155 lines
4.7 KiB
C++
155 lines
4.7 KiB
C++
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
|
/* This Source Code Form is subject to the terms of the Mozilla Public
|
|
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
|
|
|
//----------------------------------------------------------------------
|
|
// Global functions and data [declaration]
|
|
#include "nsUnicodeToUTF8.h"
|
|
|
|
NS_IMPL_ISUPPORTS(nsUnicodeToUTF8, nsIUnicodeEncoder)
|
|
|
|
//----------------------------------------------------------------------
|
|
// nsUnicodeToUTF8 class [implementation]
|
|
|
|
NS_IMETHODIMP nsUnicodeToUTF8::GetMaxLength(const char16_t * aSrc,
|
|
int32_t aSrcLength,
|
|
int32_t * aDestLength)
|
|
{
|
|
// aSrc is interpreted as UTF16, 3 is normally enough.
|
|
// But when previous buffer only contains part of the surrogate pair, we
|
|
// need to complete it here. If the first word in following buffer is not
|
|
// in valid surrogate range, we need to convert the remaining of last buffer
|
|
// to 3 bytes.
|
|
*aDestLength = 3*aSrcLength + 3;
|
|
return NS_OK;
|
|
}
|
|
|
|
NS_IMETHODIMP nsUnicodeToUTF8::Convert(const char16_t * aSrc,
|
|
int32_t * aSrcLength,
|
|
char * aDest,
|
|
int32_t * aDestLength)
|
|
{
|
|
const char16_t * src = aSrc;
|
|
const char16_t * srcEnd = aSrc + *aSrcLength;
|
|
char * dest = aDest;
|
|
int32_t destLen = *aDestLength;
|
|
uint32_t n;
|
|
|
|
//complete remaining of last conversion
|
|
if (mHighSurrogate) {
|
|
if (src < srcEnd) {
|
|
*aDestLength = 0;
|
|
return NS_OK_UENC_MOREINPUT;
|
|
}
|
|
if (*aDestLength < 4) {
|
|
*aSrcLength = 0;
|
|
*aDestLength = 0;
|
|
return NS_OK_UENC_MOREOUTPUT;
|
|
}
|
|
if (*src < (char16_t)0xdc00 || *src > (char16_t)0xdfff) { //not a pair
|
|
*dest++ = (char)0xef; //replacement character
|
|
*dest++ = (char)0xbf;
|
|
*dest++ = (char)0xbd;
|
|
destLen -= 3;
|
|
} else {
|
|
n = ((mHighSurrogate - (char16_t)0xd800) << 10) +
|
|
(*src - (char16_t)0xdc00) + 0x10000;
|
|
*dest++ = (char)0xf0 | (n >> 18);
|
|
*dest++ = (char)0x80 | ((n >> 12) & 0x3f);
|
|
*dest++ = (char)0x80 | ((n >> 6) & 0x3f);
|
|
*dest++ = (char)0x80 | (n & 0x3f);
|
|
++src;
|
|
destLen -= 4;
|
|
}
|
|
mHighSurrogate = 0;
|
|
}
|
|
|
|
while (src < srcEnd) {
|
|
if ( *src <= 0x007f) {
|
|
if (destLen < 1)
|
|
goto error_more_output;
|
|
*dest++ = (char)*src;
|
|
--destLen;
|
|
} else if (*src <= 0x07ff) {
|
|
if (destLen < 2)
|
|
goto error_more_output;
|
|
*dest++ = (char)0xc0 | (*src >> 6);
|
|
*dest++ = (char)0x80 | (*src & 0x003f);
|
|
destLen -= 2;
|
|
} else if (*src >= (char16_t)0xd800 && *src <= (char16_t)0xdfff) {
|
|
if (*src >= (char16_t)0xdc00) { //not a pair
|
|
if (destLen < 3)
|
|
goto error_more_output;
|
|
*dest++ = (char)0xef; //replacement character
|
|
*dest++ = (char)0xbf;
|
|
*dest++ = (char)0xbd;
|
|
destLen -= 3;
|
|
++src;
|
|
continue;
|
|
}
|
|
if ((src+1) >= srcEnd) {
|
|
//we need another surrogate to complete this unicode char
|
|
mHighSurrogate = *src;
|
|
*aDestLength = dest - aDest;
|
|
return NS_OK_UENC_MOREINPUT;
|
|
}
|
|
//handle surrogate
|
|
if (destLen < 4)
|
|
goto error_more_output;
|
|
if (*(src+1) < (char16_t)0xdc00 || *(src+1) > 0xdfff) { //not a pair
|
|
*dest++ = (char)0xef; //replacement character
|
|
*dest++ = (char)0xbf;
|
|
*dest++ = (char)0xbd;
|
|
destLen -= 3;
|
|
} else {
|
|
n = ((*src - (char16_t)0xd800) << 10) + (*(src+1) - (char16_t)0xdc00) + (uint32_t)0x10000;
|
|
*dest++ = (char)0xf0 | (n >> 18);
|
|
*dest++ = (char)0x80 | ((n >> 12) & 0x3f);
|
|
*dest++ = (char)0x80 | ((n >> 6) & 0x3f);
|
|
*dest++ = (char)0x80 | (n & 0x3f);
|
|
destLen -= 4;
|
|
++src;
|
|
}
|
|
} else {
|
|
if (destLen < 3)
|
|
goto error_more_output;
|
|
//treat rest of the character as BMP
|
|
*dest++ = (char)0xe0 | (*src >> 12);
|
|
*dest++ = (char)0x80 | ((*src >> 6) & 0x003f);
|
|
*dest++ = (char)0x80 | (*src & 0x003f);
|
|
destLen -= 3;
|
|
}
|
|
++src;
|
|
}
|
|
|
|
*aDestLength = dest - aDest;
|
|
return NS_OK;
|
|
|
|
error_more_output:
|
|
*aSrcLength = src - aSrc;
|
|
*aDestLength = dest - aDest;
|
|
return NS_OK_UENC_MOREOUTPUT;
|
|
}
|
|
|
|
NS_IMETHODIMP nsUnicodeToUTF8::Finish(char * aDest, int32_t * aDestLength)
|
|
{
|
|
char * dest = aDest;
|
|
|
|
if (mHighSurrogate) {
|
|
if (*aDestLength < 3) {
|
|
*aDestLength = 0;
|
|
return NS_OK_UENC_MOREOUTPUT;
|
|
}
|
|
*dest++ = (char)0xef; //replacement character
|
|
*dest++ = (char)0xbf;
|
|
*dest++ = (char)0xbd;
|
|
mHighSurrogate = 0;
|
|
*aDestLength = 3;
|
|
return NS_OK;
|
|
}
|
|
|
|
*aDestLength = 0;
|
|
return NS_OK;
|
|
}
|