mirror of
https://github.com/mozilla/gecko-dev.git
synced 2024-11-25 05:41:12 +00:00
166 lines
4.9 KiB
C++
166 lines
4.9 KiB
C++
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
|
/* This Source Code Form is subject to the terms of the Mozilla Public
|
|
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
|
|
|
//----------------------------------------------------------------------
|
|
// Global functions and data [declaration]
|
|
#include "nsUnicodeToUTF8.h"
|
|
#include "mozilla/CheckedInt.h"
|
|
|
|
NS_IMPL_ISUPPORTS(nsUnicodeToUTF8, nsIUnicodeEncoder)
|
|
|
|
//----------------------------------------------------------------------
|
|
// nsUnicodeToUTF8 class [implementation]
|
|
|
|
NS_IMETHODIMP nsUnicodeToUTF8::GetMaxLength(const char16_t* aSrc,
|
|
int32_t aSrcLength,
|
|
int32_t* aDestLength)
|
|
{
|
|
MOZ_ASSERT(aDestLength);
|
|
|
|
// aSrc is interpreted as UTF16, 3 is normally enough.
|
|
// But when previous buffer only contains part of the surrogate pair, we
|
|
// need to complete it here. If the first word in following buffer is not
|
|
// in valid surrogate range, we need to convert the remaining of last buffer
|
|
// to 3 bytes.
|
|
mozilla::CheckedInt32 length = aSrcLength;
|
|
length *= 3;
|
|
length += 3;
|
|
|
|
if (!length.isValid()) {
|
|
return NS_ERROR_OUT_OF_MEMORY;
|
|
}
|
|
|
|
*aDestLength = length.value();
|
|
return NS_OK;
|
|
}
|
|
|
|
NS_IMETHODIMP nsUnicodeToUTF8::Convert(const char16_t* aSrc,
|
|
int32_t* aSrcLength,
|
|
char* aDest,
|
|
int32_t* aDestLength)
|
|
{
|
|
const char16_t* src = aSrc;
|
|
const char16_t* srcEnd = aSrc + *aSrcLength;
|
|
char* dest = aDest;
|
|
int32_t destLen = *aDestLength;
|
|
uint32_t n;
|
|
|
|
//complete remaining of last conversion
|
|
if (mHighSurrogate) {
|
|
if (src < srcEnd) {
|
|
*aDestLength = 0;
|
|
return NS_OK_UENC_MOREINPUT;
|
|
}
|
|
if (*aDestLength < 4) {
|
|
*aSrcLength = 0;
|
|
*aDestLength = 0;
|
|
return NS_OK_UENC_MOREOUTPUT;
|
|
}
|
|
if (*src < (char16_t)0xdc00 || *src > (char16_t)0xdfff) { //not a pair
|
|
*dest++ = (char)0xef; //replacement character
|
|
*dest++ = (char)0xbf;
|
|
*dest++ = (char)0xbd;
|
|
destLen -= 3;
|
|
} else {
|
|
n = ((mHighSurrogate - (char16_t)0xd800) << 10) +
|
|
(*src - (char16_t)0xdc00) + 0x10000;
|
|
*dest++ = (char)0xf0 | (n >> 18);
|
|
*dest++ = (char)0x80 | ((n >> 12) & 0x3f);
|
|
*dest++ = (char)0x80 | ((n >> 6) & 0x3f);
|
|
*dest++ = (char)0x80 | (n & 0x3f);
|
|
++src;
|
|
destLen -= 4;
|
|
}
|
|
mHighSurrogate = 0;
|
|
}
|
|
|
|
while (src < srcEnd) {
|
|
if ( *src <= 0x007f) {
|
|
if (destLen < 1)
|
|
goto error_more_output;
|
|
*dest++ = (char)*src;
|
|
--destLen;
|
|
} else if (*src <= 0x07ff) {
|
|
if (destLen < 2)
|
|
goto error_more_output;
|
|
*dest++ = (char)0xc0 | (*src >> 6);
|
|
*dest++ = (char)0x80 | (*src & 0x003f);
|
|
destLen -= 2;
|
|
} else if (*src >= (char16_t)0xd800 && *src <= (char16_t)0xdfff) {
|
|
if (*src >= (char16_t)0xdc00) { //not a pair
|
|
if (destLen < 3)
|
|
goto error_more_output;
|
|
*dest++ = (char)0xef; //replacement character
|
|
*dest++ = (char)0xbf;
|
|
*dest++ = (char)0xbd;
|
|
destLen -= 3;
|
|
++src;
|
|
continue;
|
|
}
|
|
if ((src+1) >= srcEnd) {
|
|
//we need another surrogate to complete this unicode char
|
|
mHighSurrogate = *src;
|
|
*aDestLength = dest - aDest;
|
|
return NS_OK_UENC_MOREINPUT;
|
|
}
|
|
//handle surrogate
|
|
if (destLen < 4)
|
|
goto error_more_output;
|
|
if (*(src+1) < (char16_t)0xdc00 || *(src+1) > 0xdfff) { //not a pair
|
|
*dest++ = (char)0xef; //replacement character
|
|
*dest++ = (char)0xbf;
|
|
*dest++ = (char)0xbd;
|
|
destLen -= 3;
|
|
} else {
|
|
n = ((*src - (char16_t)0xd800) << 10) + (*(src+1) - (char16_t)0xdc00) + (uint32_t)0x10000;
|
|
*dest++ = (char)0xf0 | (n >> 18);
|
|
*dest++ = (char)0x80 | ((n >> 12) & 0x3f);
|
|
*dest++ = (char)0x80 | ((n >> 6) & 0x3f);
|
|
*dest++ = (char)0x80 | (n & 0x3f);
|
|
destLen -= 4;
|
|
++src;
|
|
}
|
|
} else {
|
|
if (destLen < 3)
|
|
goto error_more_output;
|
|
//treat rest of the character as BMP
|
|
*dest++ = (char)0xe0 | (*src >> 12);
|
|
*dest++ = (char)0x80 | ((*src >> 6) & 0x003f);
|
|
*dest++ = (char)0x80 | (*src & 0x003f);
|
|
destLen -= 3;
|
|
}
|
|
++src;
|
|
}
|
|
|
|
*aDestLength = dest - aDest;
|
|
return NS_OK;
|
|
|
|
error_more_output:
|
|
*aSrcLength = src - aSrc;
|
|
*aDestLength = dest - aDest;
|
|
return NS_OK_UENC_MOREOUTPUT;
|
|
}
|
|
|
|
NS_IMETHODIMP nsUnicodeToUTF8::Finish(char * aDest, int32_t * aDestLength)
|
|
{
|
|
char * dest = aDest;
|
|
|
|
if (mHighSurrogate) {
|
|
if (*aDestLength < 3) {
|
|
*aDestLength = 0;
|
|
return NS_OK_UENC_MOREOUTPUT;
|
|
}
|
|
*dest++ = (char)0xef; //replacement character
|
|
*dest++ = (char)0xbf;
|
|
*dest++ = (char)0xbd;
|
|
mHighSurrogate = 0;
|
|
*aDestLength = 3;
|
|
return NS_OK;
|
|
}
|
|
|
|
*aDestLength = 0;
|
|
return NS_OK;
|
|
}
|