gecko-dev/intl/uconv/nsUnicodeToUTF8.cpp
Birunthan Mohanathas 70553064dd Bug 1038537 - Part 7: Flatten intl/uconv/src/ directory. r=smontagu
--HG--
rename : intl/uconv/src/8859-1.uf => intl/uconv/8859-1.uf
rename : intl/uconv/src/8859-1.ut => intl/uconv/8859-1.ut
rename : intl/uconv/src/cp1252.uf => intl/uconv/cp1252.uf
rename : intl/uconv/src/cp1252.ut => intl/uconv/cp1252.ut
rename : intl/uconv/src/macroman.uf => intl/uconv/macroman.uf
rename : intl/uconv/src/macroman.ut => intl/uconv/macroman.ut
rename : intl/uconv/src/nsCP1252ToUnicode.cpp => intl/uconv/nsCP1252ToUnicode.cpp
rename : intl/uconv/src/nsCP1252ToUnicode.h => intl/uconv/nsCP1252ToUnicode.h
rename : intl/uconv/src/nsConverterInputStream.cpp => intl/uconv/nsConverterInputStream.cpp
rename : intl/uconv/src/nsConverterInputStream.h => intl/uconv/nsConverterInputStream.h
rename : intl/uconv/src/nsConverterOutputStream.cpp => intl/uconv/nsConverterOutputStream.cpp
rename : intl/uconv/src/nsConverterOutputStream.h => intl/uconv/nsConverterOutputStream.h
rename : intl/uconv/src/nsISO88591ToUnicode.cpp => intl/uconv/nsISO88591ToUnicode.cpp
rename : intl/uconv/src/nsISO88591ToUnicode.h => intl/uconv/nsISO88591ToUnicode.h
rename : intl/uconv/src/nsMacRomanToUnicode.cpp => intl/uconv/nsMacRomanToUnicode.cpp
rename : intl/uconv/src/nsMacRomanToUnicode.h => intl/uconv/nsMacRomanToUnicode.h
rename : intl/uconv/src/nsReplacementToUnicode.cpp => intl/uconv/nsReplacementToUnicode.cpp
rename : intl/uconv/src/nsReplacementToUnicode.h => intl/uconv/nsReplacementToUnicode.h
rename : intl/uconv/src/nsScriptableUConv.cpp => intl/uconv/nsScriptableUConv.cpp
rename : intl/uconv/src/nsScriptableUConv.h => intl/uconv/nsScriptableUConv.h
rename : intl/uconv/src/nsTextToSubURI.cpp => intl/uconv/nsTextToSubURI.cpp
rename : intl/uconv/src/nsTextToSubURI.h => intl/uconv/nsTextToSubURI.h
rename : intl/uconv/src/nsUConvModule.cpp => intl/uconv/nsUConvModule.cpp
rename : intl/uconv/src/nsUTF8ConverterService.cpp => intl/uconv/nsUTF8ConverterService.cpp
rename : intl/uconv/src/nsUTF8ConverterService.h => intl/uconv/nsUTF8ConverterService.h
rename : intl/uconv/src/nsUTF8ToUnicode.cpp => intl/uconv/nsUTF8ToUnicode.cpp
rename : intl/uconv/src/nsUTF8ToUnicode.h => intl/uconv/nsUTF8ToUnicode.h
rename : intl/uconv/src/nsUTF8ToUnicodeSSE2.cpp => intl/uconv/nsUTF8ToUnicodeSSE2.cpp
rename : intl/uconv/src/nsUnicodeToCP1252.cpp => intl/uconv/nsUnicodeToCP1252.cpp
rename : intl/uconv/src/nsUnicodeToCP1252.h => intl/uconv/nsUnicodeToCP1252.h
rename : intl/uconv/src/nsUnicodeToISO88591.cpp => intl/uconv/nsUnicodeToISO88591.cpp
rename : intl/uconv/src/nsUnicodeToISO88591.h => intl/uconv/nsUnicodeToISO88591.h
rename : intl/uconv/src/nsUnicodeToMacRoman.cpp => intl/uconv/nsUnicodeToMacRoman.cpp
rename : intl/uconv/src/nsUnicodeToMacRoman.h => intl/uconv/nsUnicodeToMacRoman.h
rename : intl/uconv/src/nsUnicodeToUTF8.cpp => intl/uconv/nsUnicodeToUTF8.cpp
rename : intl/uconv/src/nsUnicodeToUTF8.h => intl/uconv/nsUnicodeToUTF8.h
2014-07-24 10:56:39 -07:00

155 lines
4.7 KiB
C++

/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
//----------------------------------------------------------------------
// Global functions and data [declaration]
#include "nsUnicodeToUTF8.h"
NS_IMPL_ISUPPORTS(nsUnicodeToUTF8, nsIUnicodeEncoder)
//----------------------------------------------------------------------
// nsUnicodeToUTF8 class [implementation]
NS_IMETHODIMP nsUnicodeToUTF8::GetMaxLength(const char16_t * aSrc,
int32_t aSrcLength,
int32_t * aDestLength)
{
// aSrc is interpreted as UTF16, 3 is normally enough.
// But when previous buffer only contains part of the surrogate pair, we
// need to complete it here. If the first word in following buffer is not
// in valid surrogate range, we need to convert the remaining of last buffer
// to 3 bytes.
*aDestLength = 3*aSrcLength + 3;
return NS_OK;
}
NS_IMETHODIMP nsUnicodeToUTF8::Convert(const char16_t * aSrc,
int32_t * aSrcLength,
char * aDest,
int32_t * aDestLength)
{
const char16_t * src = aSrc;
const char16_t * srcEnd = aSrc + *aSrcLength;
char * dest = aDest;
int32_t destLen = *aDestLength;
uint32_t n;
//complete remaining of last conversion
if (mHighSurrogate) {
if (src < srcEnd) {
*aDestLength = 0;
return NS_OK_UENC_MOREINPUT;
}
if (*aDestLength < 4) {
*aSrcLength = 0;
*aDestLength = 0;
return NS_OK_UENC_MOREOUTPUT;
}
if (*src < (char16_t)0xdc00 || *src > (char16_t)0xdfff) { //not a pair
*dest++ = (char)0xef; //replacement character
*dest++ = (char)0xbf;
*dest++ = (char)0xbd;
destLen -= 3;
} else {
n = ((mHighSurrogate - (char16_t)0xd800) << 10) +
(*src - (char16_t)0xdc00) + 0x10000;
*dest++ = (char)0xf0 | (n >> 18);
*dest++ = (char)0x80 | ((n >> 12) & 0x3f);
*dest++ = (char)0x80 | ((n >> 6) & 0x3f);
*dest++ = (char)0x80 | (n & 0x3f);
++src;
destLen -= 4;
}
mHighSurrogate = 0;
}
while (src < srcEnd) {
if ( *src <= 0x007f) {
if (destLen < 1)
goto error_more_output;
*dest++ = (char)*src;
--destLen;
} else if (*src <= 0x07ff) {
if (destLen < 2)
goto error_more_output;
*dest++ = (char)0xc0 | (*src >> 6);
*dest++ = (char)0x80 | (*src & 0x003f);
destLen -= 2;
} else if (*src >= (char16_t)0xd800 && *src <= (char16_t)0xdfff) {
if (*src >= (char16_t)0xdc00) { //not a pair
if (destLen < 3)
goto error_more_output;
*dest++ = (char)0xef; //replacement character
*dest++ = (char)0xbf;
*dest++ = (char)0xbd;
destLen -= 3;
++src;
continue;
}
if ((src+1) >= srcEnd) {
//we need another surrogate to complete this unicode char
mHighSurrogate = *src;
*aDestLength = dest - aDest;
return NS_OK_UENC_MOREINPUT;
}
//handle surrogate
if (destLen < 4)
goto error_more_output;
if (*(src+1) < (char16_t)0xdc00 || *(src+1) > 0xdfff) { //not a pair
*dest++ = (char)0xef; //replacement character
*dest++ = (char)0xbf;
*dest++ = (char)0xbd;
destLen -= 3;
} else {
n = ((*src - (char16_t)0xd800) << 10) + (*(src+1) - (char16_t)0xdc00) + (uint32_t)0x10000;
*dest++ = (char)0xf0 | (n >> 18);
*dest++ = (char)0x80 | ((n >> 12) & 0x3f);
*dest++ = (char)0x80 | ((n >> 6) & 0x3f);
*dest++ = (char)0x80 | (n & 0x3f);
destLen -= 4;
++src;
}
} else {
if (destLen < 3)
goto error_more_output;
//treat rest of the character as BMP
*dest++ = (char)0xe0 | (*src >> 12);
*dest++ = (char)0x80 | ((*src >> 6) & 0x003f);
*dest++ = (char)0x80 | (*src & 0x003f);
destLen -= 3;
}
++src;
}
*aDestLength = dest - aDest;
return NS_OK;
error_more_output:
*aSrcLength = src - aSrc;
*aDestLength = dest - aDest;
return NS_OK_UENC_MOREOUTPUT;
}
NS_IMETHODIMP nsUnicodeToUTF8::Finish(char * aDest, int32_t * aDestLength)
{
char * dest = aDest;
if (mHighSurrogate) {
if (*aDestLength < 3) {
*aDestLength = 0;
return NS_OK_UENC_MOREOUTPUT;
}
*dest++ = (char)0xef; //replacement character
*dest++ = (char)0xbf;
*dest++ = (char)0xbd;
mHighSurrogate = 0;
*aDestLength = 3;
return NS_OK;
}
*aDestLength = 0;
return NS_OK;
}