Move the core of NS_ConvertUCS2toUTF8 into character sinks in nsUTF8Utils.h, and use them to make ToNewUTF8String faster. Fix bug in surrogate handling in the moved code. Fix null-termination bug in UTF8ToNewUnicode. b=206682 r=jag sr=alecf a=brendan

This commit is contained in:
dbaron%dbaron.org 2003-05-22 21:25:43 +00:00
parent f6ad24ed15
commit 06133b6d3c
10 changed files with 424 additions and 258 deletions

View File

@ -45,6 +45,7 @@
#include "nsString.h"
#include "nsReadableUtils.h"
#include "nsDebug.h"
#include "nsUTF8Utils.h"
#ifndef nsCharTraits_h___
#include "nsCharTraits.h"
@ -54,8 +55,10 @@
#include "prdtoa.h"
#endif
#ifdef DEBUG
static const char* kPossibleNull = "Error: possible unintended null in string";
static const char* kNullPointerError = "Error: unexpected null ptr";
#endif
static const char* kWhitespace="\b\t\r\n ";
const nsBufferHandle<char>*
@ -1084,111 +1087,47 @@ PRBool nsCString::EqualsWithConversion(const char* aCString,PRBool aIgnoreCase,P
//----------------------------------------------------------------------
NS_ConvertUCS2toUTF8::NS_ConvertUCS2toUTF8( const nsAString& aString )
NS_ConvertUCS2toUTF8::NS_ConvertUCS2toUTF8( const PRUnichar* aString )
{
nsAString::const_iterator start; aString.BeginReading(start);
nsAString::const_iterator end; aString.EndReading(end);
while (start != end) {
nsReadableFragment<PRUnichar> frag(start.fragment());
Append(frag.mStart, frag.mEnd - frag.mStart);
start.advance(start.size_forward());
}
if (!aString)
// Leave us as an uninitialized nsCAutoString.
return;
Init(nsDependentString(aString));
}
void
NS_ConvertUCS2toUTF8::Append( const PRUnichar* aString, PRUint32 aLength )
NS_ConvertUCS2toUTF8::NS_ConvertUCS2toUTF8( const PRUnichar* aString, PRUint32 aLength )
{
// Handle null string by just leaving us as a brand-new
// uninitialized nsCAutoString.
if (! aString)
if (!aString)
// Leave us as an uninitialized nsCAutoString.
return;
Init(Substring(aString, aString + aLength));
}
// Calculate how many bytes we need
const PRUnichar* p;
PRInt32 count, utf8len;
for (p = aString, utf8len = 0, count = aLength; 0 != count && 0 != (*p); count--, p++)
{
if (! ((*p) & 0xFF80))
utf8len += 1; // 0000 0000 - 0000 007F
else if (! ((*p) & 0xF800))
utf8len += 2; // 0000 0080 - 0000 07FF
else
utf8len += 3; // 0000 0800 - 0000 FFFF
// Note: Surrogate pair needs 4 bytes, but in this calcuation
// we count it as 6 bytes. It will waste 2 bytes per surrogate pair
void NS_ConvertUCS2toUTF8::Init( const nsAString& aString )
{
// Compute space required: do this once so we don't incur multiple
// allocations. This "optimization" is probably of dubious value...
nsAString::const_iterator start, end;
CalculateUTF8Size calculator;
copy_string(aString.BeginReading(start), aString.EndReading(end), calculator);
PRUint32 count = calculator.Size();
if (count) {
// Grow the buffer if we need to.
SetLength(count);
// All ready? Time to convert
ConvertUCS2toUTF8 converter(mStr);
copy_string(aString.BeginReading(start), aString.EndReading(end), converter);
mLength = converter.Size();
if (mLength != count) {
NS_ERROR("Input invalid or incorrect length was calculated");
Truncate();
}
// Make sure our buffer's big enough, so we don't need to do
// multiple allocations.
if(mLength+PRUint32(utf8len+1) > sizeof(mBuffer))
SetCapacity(mLength+utf8len+1);
// |SetCapacity| normally doesn't guarantee the use we are putting it to here (see its interface comment in nsAString.h),
// we can only use it since our local implementation, |nsCString::SetCapacity|, is known to do what we want
char* out = mStr+mLength;
PRUint32 ucs4=0;
for (p = aString, count = aLength; 0 != count && 0 != (*p); count--, p++)
{
if (0 == ucs4)
{
if (! ((*p) & 0xFF80))
{
*out++ = (char)*p;
}
else if (! ((*p) & 0xF800))
{
*out++ = 0xC0 | (char)((*p) >> 6);
*out++ = 0x80 | (char)(0x003F & (*p));
}
else
{
if (0xD800 == (0xFC00 & (*p)))
{
// D800- DBFF - High Surrogate
// N = (H- D800) *400 + 10000 + ...
ucs4 = 0x10000 | ((0x03FF & (*p)) << 10);
}
else if (0xDC00 == (0xFC00 & (*p)))
{
// DC00- DFFF - Low Surrogate
// error here. We should hit High Surrogate first
// Do not output any thing in this case
}
else
{
*out++ = 0xE0 | (char)((*p) >> 12);
*out++ = 0x80 | (char)(0x003F & (*p >> 6));
*out++ = 0x80 | (char)(0x003F & (*p) );
}
}
}
else
{
if (0xDC00 == (0xFC00 & (*p)))
{
// DC00- DFFF - Low Surrogate
// N += ( L - DC00 )
ucs4 |= (0x03FF & (*p));
// 0001 0000-001F FFFF
*out++ = 0xF0 | (char)(ucs4 >> 18);
*out++ = 0x80 | (char)(0x003F & (ucs4 >> 12));
*out++ = 0x80 | (char)(0x003F & (ucs4 >> 6));
*out++ = 0x80 | (char)(0x003F & ucs4) ;
}
else
{
// Got a High Surrogate but no low surrogate
// output nothing.
}
ucs4 = 0;
}
}
*out = '\0'; // null terminate
mLength += utf8len;
}
}
NS_LossyConvertUCS2toASCII::NS_LossyConvertUCS2toASCII( const nsAString& aString )

View File

@ -431,24 +431,15 @@ class NS_COM NS_ConvertUCS2toUTF8
*/
{
public:
friend NS_COM char* ToNewUTF8String( const nsAString& aSource );
public:
explicit
NS_ConvertUCS2toUTF8( const PRUnichar* aString )
explicit NS_ConvertUCS2toUTF8( const PRUnichar* aString );
NS_ConvertUCS2toUTF8( const PRUnichar* aString, PRUint32 aLength );
explicit NS_ConvertUCS2toUTF8( const nsAString& aString )
{
Append( aString, ~PRUint32(0) /* MAXINT */);
Init(aString);
}
NS_ConvertUCS2toUTF8( const PRUnichar* aString, PRUint32 aLength )
{
Append( aString, aLength );
}
explicit NS_ConvertUCS2toUTF8( const nsAString& aString );
protected:
void Append( const PRUnichar* aString, PRUint32 aLength );
void Init( const nsAString& aString );
private:
// NOT TO BE IMPLEMENTED

View File

@ -54,8 +54,10 @@
#include "prdtoa.h"
#endif
#ifdef DEBUG
static const char* kPossibleNull = "Error: possible unintended null in string";
static const char* kNullPointerError = "Error: unexpected null ptr";
#endif
static const char* kWhitespace="\b\t\r\n ";
const nsBufferHandle<PRUnichar>*

View File

@ -54,6 +54,10 @@ class UTF8traits
#define PLANE1_BASE 0x00010000
#define UCS2_REPLACEMENT_CHAR 0xfffd
/**
* A character sink (see |copy_string| in nsAlgorithm.h) for converting
* UTF-8 to UCS2 (really UTF-16).
*/
class ConvertUTF8toUCS2
{
public:
@ -181,12 +185,21 @@ class ConvertUTF8toUCS2
return p - start;
}
void write_terminator()
{
*mBuffer = buffer_type(0);
}
private:
buffer_type* mStart;
buffer_type* mBuffer;
PRBool mErrorEncountered;
};
/**
* A character sink (see |copy_string| in nsAlgorithm.h) for computing
* the length of a UTF-8 string.
*/
class CalculateUTF8Length
{
public:
@ -242,4 +255,148 @@ class CalculateUTF8Length
PRBool mErrorEncountered;
};
/**
* A character sink (see |copy_string| in nsAlgorithm.h) for converting
* UCS2 (really UTF-16) to UTF-8.
*/
class ConvertUCS2toUTF8
{
public:
typedef nsAString::char_type value_type;
typedef nsACString::char_type buffer_type;
// The error handling here is more lenient than that in
// |ConvertUTF8toUCS2|, but it's that way for backwards
// compatibility.
ConvertUCS2toUTF8( buffer_type* aBuffer )
: mStart(aBuffer), mBuffer(aBuffer) {}
size_t Size() const { return mBuffer - mStart; }
PRUint32 write( const value_type* start, PRUint32 N )
{
for (const value_type *p = start, *end = start + N; p < end; ++p )
{
value_type c = *p;
if (! (c & 0xFF80)) // U+0000 - U+007F
{
*mBuffer++ = (char)c;
}
else if (! (c & 0xF800)) // U+0100 - U+07FF
{
*mBuffer++ = 0xC0 | (char)(c >> 6);
*mBuffer++ = 0x80 | (char)(0x003F & c);
}
else if (0xD800 == (0xFC00 & c)) // U+D800 - U+DBFF
{
// D800- DBFF - High Surrogate
// N = (H- D800) *400 + 10000 + ...
PRUint32 ucs4 = 0x10000 + ((0x03FF & c) << 10);
++p;
if (p == end)
{
NS_ERROR("Surrogate pair split between fragments");
return N;
}
c = *p;
if (0xDC00 == (0xFC00 & c))
{
// DC00- DFFF - Low Surrogate
// N += ( L - DC00 )
ucs4 |= (0x03FF & c);
// 0001 0000-001F FFFF
*mBuffer++ = 0xF0 | (char)(ucs4 >> 18);
*mBuffer++ = 0x80 | (char)(0x003F & (ucs4 >> 12));
*mBuffer++ = 0x80 | (char)(0x003F & (ucs4 >> 6));
*mBuffer++ = 0x80 | (char)(0x003F & ucs4) ;
}
else
{
NS_ERROR("got a High Surrogate but no low surrogate");
// output nothing.
}
}
else if (0xDC00 == (0xFC00 & c)) // U+DC00 - U+DFFF
{
// DC00- DFFF - Low Surrogate
NS_ERROR("got a low Surrogate but no high surrogate");
// output nothing.
}
else // U+0800 - U+D7FF, U+E000 - U+FFFF
{
*mBuffer++ = 0xE0 | (char)(c >> 12);
*mBuffer++ = 0x80 | (char)(0x003F & (c >> 6));
*mBuffer++ = 0x80 | (char)(0x003F & c );
}
}
return N;
}
void write_terminator()
{
*mBuffer = buffer_type(0);
}
private:
buffer_type* mStart;
buffer_type* mBuffer;
};
/**
* A character sink (see |copy_string| in nsAlgorithm.h) for computing
* the number of bytes a UCS2 (really UTF-16) would occupy in UTF-8.
*/
class CalculateUTF8Size
{
public:
typedef nsAString::char_type value_type;
CalculateUTF8Size()
: mSize(0) { }
size_t Size() const { return mSize; }
PRUint32 write( const value_type* start, PRUint32 N )
{
// Assume UCS2 surrogate pairs won't be spread across fragments.
for (const value_type *p = start, *end = start + N; p < end; ++p )
{
value_type c = *p;
if (! (c & 0xFF80)) // U+0000 - U+007F
mSize += 1;
else if (! (c & 0xF800)) // U+0100 - U+07FF
mSize += 2;
else if (0xD800 == (0xFC00 & c)) // U+D800 - U+DBFF
{
++p;
if (p == end)
{
NS_ERROR("Surrogate pair split between fragments");
return N;
}
c = *p;
if (0xDC00 == (0xFC00 & c))
mSize += 4;
else
NS_ERROR("got a high Surrogate but no low surrogate");
}
else if (0xDC00 == (0xFC00 & c)) // U+DC00 - U+DFFF
NS_ERROR("got a low Surrogate but no high surrogate");
else // U+0800 - U+D7FF, U+E000 - U+FFFF
mSize += 3;
}
return N;
}
private:
size_t mSize;
};
#endif /* !defined(nsUTF8Utils_h_) */

View File

@ -209,24 +209,18 @@ NS_COM
char*
ToNewUTF8String( const nsAString& aSource )
{
// XXX The conversion code in NS_ConvertUCS2toUTF8 needs to be
// refactored so that we can use it here without a double-copy.
NS_ConvertUCS2toUTF8 temp(aSource);
nsAString::const_iterator start, end;
CalculateUTF8Size calculator;
copy_string(aSource.BeginReading(start), aSource.EndReading(end),
calculator);
char* result;
if (temp.GetOwnsBuffer()) {
// We allocated. Trick the string into not freeing its buffer to
// avoid an extra allocation.
result = temp.mStr;
char *result = NS_STATIC_CAST(char*,
nsMemory::Alloc(calculator.Size() + 1));
temp.mStr=0;
temp.SetOwnsBuffer(PR_FALSE);
}
else {
// We didn't allocate a buffer, so we need to copy it out of the
// nsCAutoString's storage.
result = ToNewCString(temp);
}
ConvertUCS2toUTF8 converter(result);
copy_string(aSource.BeginReading(start), aSource.EndReading(end),
converter).write_terminator();
NS_ASSERTION(calculator.Size() == converter.Size(), "length mismatch");
return result;
}
@ -285,7 +279,7 @@ UTF8ToNewUnicode( const nsACString& aSource )
ConvertUTF8toUCS2 converter(result);
copy_string(aSource.BeginReading(start), aSource.EndReading(end),
converter);
converter).write_terminator();
NS_ASSERTION(calculator.Length() == converter.Length(), "length mismatch");
return result;

View File

@ -45,6 +45,7 @@
#include "nsString.h"
#include "nsReadableUtils.h"
#include "nsDebug.h"
#include "nsUTF8Utils.h"
#ifndef nsCharTraits_h___
#include "nsCharTraits.h"
@ -54,8 +55,10 @@
#include "prdtoa.h"
#endif
#ifdef DEBUG
static const char* kPossibleNull = "Error: possible unintended null in string";
static const char* kNullPointerError = "Error: unexpected null ptr";
#endif
static const char* kWhitespace="\b\t\r\n ";
const nsBufferHandle<char>*
@ -1084,111 +1087,47 @@ PRBool nsCString::EqualsWithConversion(const char* aCString,PRBool aIgnoreCase,P
//----------------------------------------------------------------------
NS_ConvertUCS2toUTF8::NS_ConvertUCS2toUTF8( const nsAString& aString )
NS_ConvertUCS2toUTF8::NS_ConvertUCS2toUTF8( const PRUnichar* aString )
{
nsAString::const_iterator start; aString.BeginReading(start);
nsAString::const_iterator end; aString.EndReading(end);
while (start != end) {
nsReadableFragment<PRUnichar> frag(start.fragment());
Append(frag.mStart, frag.mEnd - frag.mStart);
start.advance(start.size_forward());
}
if (!aString)
// Leave us as an uninitialized nsCAutoString.
return;
Init(nsDependentString(aString));
}
void
NS_ConvertUCS2toUTF8::Append( const PRUnichar* aString, PRUint32 aLength )
NS_ConvertUCS2toUTF8::NS_ConvertUCS2toUTF8( const PRUnichar* aString, PRUint32 aLength )
{
// Handle null string by just leaving us as a brand-new
// uninitialized nsCAutoString.
if (! aString)
if (!aString)
// Leave us as an uninitialized nsCAutoString.
return;
Init(Substring(aString, aString + aLength));
}
// Calculate how many bytes we need
const PRUnichar* p;
PRInt32 count, utf8len;
for (p = aString, utf8len = 0, count = aLength; 0 != count && 0 != (*p); count--, p++)
{
if (! ((*p) & 0xFF80))
utf8len += 1; // 0000 0000 - 0000 007F
else if (! ((*p) & 0xF800))
utf8len += 2; // 0000 0080 - 0000 07FF
else
utf8len += 3; // 0000 0800 - 0000 FFFF
// Note: Surrogate pair needs 4 bytes, but in this calcuation
// we count it as 6 bytes. It will waste 2 bytes per surrogate pair
void NS_ConvertUCS2toUTF8::Init( const nsAString& aString )
{
// Compute space required: do this once so we don't incur multiple
// allocations. This "optimization" is probably of dubious value...
nsAString::const_iterator start, end;
CalculateUTF8Size calculator;
copy_string(aString.BeginReading(start), aString.EndReading(end), calculator);
PRUint32 count = calculator.Size();
if (count) {
// Grow the buffer if we need to.
SetLength(count);
// All ready? Time to convert
ConvertUCS2toUTF8 converter(mStr);
copy_string(aString.BeginReading(start), aString.EndReading(end), converter);
mLength = converter.Size();
if (mLength != count) {
NS_ERROR("Input invalid or incorrect length was calculated");
Truncate();
}
// Make sure our buffer's big enough, so we don't need to do
// multiple allocations.
if(mLength+PRUint32(utf8len+1) > sizeof(mBuffer))
SetCapacity(mLength+utf8len+1);
// |SetCapacity| normally doesn't guarantee the use we are putting it to here (see its interface comment in nsAString.h),
// we can only use it since our local implementation, |nsCString::SetCapacity|, is known to do what we want
char* out = mStr+mLength;
PRUint32 ucs4=0;
for (p = aString, count = aLength; 0 != count && 0 != (*p); count--, p++)
{
if (0 == ucs4)
{
if (! ((*p) & 0xFF80))
{
*out++ = (char)*p;
}
else if (! ((*p) & 0xF800))
{
*out++ = 0xC0 | (char)((*p) >> 6);
*out++ = 0x80 | (char)(0x003F & (*p));
}
else
{
if (0xD800 == (0xFC00 & (*p)))
{
// D800- DBFF - High Surrogate
// N = (H- D800) *400 + 10000 + ...
ucs4 = 0x10000 | ((0x03FF & (*p)) << 10);
}
else if (0xDC00 == (0xFC00 & (*p)))
{
// DC00- DFFF - Low Surrogate
// error here. We should hit High Surrogate first
// Do not output any thing in this case
}
else
{
*out++ = 0xE0 | (char)((*p) >> 12);
*out++ = 0x80 | (char)(0x003F & (*p >> 6));
*out++ = 0x80 | (char)(0x003F & (*p) );
}
}
}
else
{
if (0xDC00 == (0xFC00 & (*p)))
{
// DC00- DFFF - Low Surrogate
// N += ( L - DC00 )
ucs4 |= (0x03FF & (*p));
// 0001 0000-001F FFFF
*out++ = 0xF0 | (char)(ucs4 >> 18);
*out++ = 0x80 | (char)(0x003F & (ucs4 >> 12));
*out++ = 0x80 | (char)(0x003F & (ucs4 >> 6));
*out++ = 0x80 | (char)(0x003F & ucs4) ;
}
else
{
// Got a High Surrogate but no low surrogate
// output nothing.
}
ucs4 = 0;
}
}
*out = '\0'; // null terminate
mLength += utf8len;
}
}
NS_LossyConvertUCS2toASCII::NS_LossyConvertUCS2toASCII( const nsAString& aString )

View File

@ -431,24 +431,15 @@ class NS_COM NS_ConvertUCS2toUTF8
*/
{
public:
friend NS_COM char* ToNewUTF8String( const nsAString& aSource );
public:
explicit
NS_ConvertUCS2toUTF8( const PRUnichar* aString )
explicit NS_ConvertUCS2toUTF8( const PRUnichar* aString );
NS_ConvertUCS2toUTF8( const PRUnichar* aString, PRUint32 aLength );
explicit NS_ConvertUCS2toUTF8( const nsAString& aString )
{
Append( aString, ~PRUint32(0) /* MAXINT */);
Init(aString);
}
NS_ConvertUCS2toUTF8( const PRUnichar* aString, PRUint32 aLength )
{
Append( aString, aLength );
}
explicit NS_ConvertUCS2toUTF8( const nsAString& aString );
protected:
void Append( const PRUnichar* aString, PRUint32 aLength );
void Init( const nsAString& aString );
private:
// NOT TO BE IMPLEMENTED

View File

@ -54,8 +54,10 @@
#include "prdtoa.h"
#endif
#ifdef DEBUG
static const char* kPossibleNull = "Error: possible unintended null in string";
static const char* kNullPointerError = "Error: unexpected null ptr";
#endif
static const char* kWhitespace="\b\t\r\n ";
const nsBufferHandle<PRUnichar>*

View File

@ -54,6 +54,10 @@ class UTF8traits
#define PLANE1_BASE 0x00010000
#define UCS2_REPLACEMENT_CHAR 0xfffd
/**
* A character sink (see |copy_string| in nsAlgorithm.h) for converting
* UTF-8 to UCS2 (really UTF-16).
*/
class ConvertUTF8toUCS2
{
public:
@ -181,12 +185,21 @@ class ConvertUTF8toUCS2
return p - start;
}
void write_terminator()
{
*mBuffer = buffer_type(0);
}
private:
buffer_type* mStart;
buffer_type* mBuffer;
PRBool mErrorEncountered;
};
/**
* A character sink (see |copy_string| in nsAlgorithm.h) for computing
* the length of a UTF-8 string.
*/
class CalculateUTF8Length
{
public:
@ -242,4 +255,148 @@ class CalculateUTF8Length
PRBool mErrorEncountered;
};
/**
* A character sink (see |copy_string| in nsAlgorithm.h) for converting
* UCS2 (really UTF-16) to UTF-8.
*/
class ConvertUCS2toUTF8
{
public:
typedef nsAString::char_type value_type;
typedef nsACString::char_type buffer_type;
// The error handling here is more lenient than that in
// |ConvertUTF8toUCS2|, but it's that way for backwards
// compatibility.
ConvertUCS2toUTF8( buffer_type* aBuffer )
: mStart(aBuffer), mBuffer(aBuffer) {}
size_t Size() const { return mBuffer - mStart; }
PRUint32 write( const value_type* start, PRUint32 N )
{
for (const value_type *p = start, *end = start + N; p < end; ++p )
{
value_type c = *p;
if (! (c & 0xFF80)) // U+0000 - U+007F
{
*mBuffer++ = (char)c;
}
else if (! (c & 0xF800)) // U+0100 - U+07FF
{
*mBuffer++ = 0xC0 | (char)(c >> 6);
*mBuffer++ = 0x80 | (char)(0x003F & c);
}
else if (0xD800 == (0xFC00 & c)) // U+D800 - U+DBFF
{
// D800- DBFF - High Surrogate
// N = (H- D800) *400 + 10000 + ...
PRUint32 ucs4 = 0x10000 + ((0x03FF & c) << 10);
++p;
if (p == end)
{
NS_ERROR("Surrogate pair split between fragments");
return N;
}
c = *p;
if (0xDC00 == (0xFC00 & c))
{
// DC00- DFFF - Low Surrogate
// N += ( L - DC00 )
ucs4 |= (0x03FF & c);
// 0001 0000-001F FFFF
*mBuffer++ = 0xF0 | (char)(ucs4 >> 18);
*mBuffer++ = 0x80 | (char)(0x003F & (ucs4 >> 12));
*mBuffer++ = 0x80 | (char)(0x003F & (ucs4 >> 6));
*mBuffer++ = 0x80 | (char)(0x003F & ucs4) ;
}
else
{
NS_ERROR("got a High Surrogate but no low surrogate");
// output nothing.
}
}
else if (0xDC00 == (0xFC00 & c)) // U+DC00 - U+DFFF
{
// DC00- DFFF - Low Surrogate
NS_ERROR("got a low Surrogate but no high surrogate");
// output nothing.
}
else // U+0800 - U+D7FF, U+E000 - U+FFFF
{
*mBuffer++ = 0xE0 | (char)(c >> 12);
*mBuffer++ = 0x80 | (char)(0x003F & (c >> 6));
*mBuffer++ = 0x80 | (char)(0x003F & c );
}
}
return N;
}
void write_terminator()
{
*mBuffer = buffer_type(0);
}
private:
buffer_type* mStart;
buffer_type* mBuffer;
};
/**
* A character sink (see |copy_string| in nsAlgorithm.h) for computing
* the number of bytes a UCS2 (really UTF-16) would occupy in UTF-8.
*/
class CalculateUTF8Size
{
public:
typedef nsAString::char_type value_type;
CalculateUTF8Size()
: mSize(0) { }
size_t Size() const { return mSize; }
PRUint32 write( const value_type* start, PRUint32 N )
{
// Assume UCS2 surrogate pairs won't be spread across fragments.
for (const value_type *p = start, *end = start + N; p < end; ++p )
{
value_type c = *p;
if (! (c & 0xFF80)) // U+0000 - U+007F
mSize += 1;
else if (! (c & 0xF800)) // U+0100 - U+07FF
mSize += 2;
else if (0xD800 == (0xFC00 & c)) // U+D800 - U+DBFF
{
++p;
if (p == end)
{
NS_ERROR("Surrogate pair split between fragments");
return N;
}
c = *p;
if (0xDC00 == (0xFC00 & c))
mSize += 4;
else
NS_ERROR("got a high Surrogate but no low surrogate");
}
else if (0xDC00 == (0xFC00 & c)) // U+DC00 - U+DFFF
NS_ERROR("got a low Surrogate but no high surrogate");
else // U+0800 - U+D7FF, U+E000 - U+FFFF
mSize += 3;
}
return N;
}
private:
size_t mSize;
};
#endif /* !defined(nsUTF8Utils_h_) */

View File

@ -209,24 +209,18 @@ NS_COM
char*
ToNewUTF8String( const nsAString& aSource )
{
// XXX The conversion code in NS_ConvertUCS2toUTF8 needs to be
// refactored so that we can use it here without a double-copy.
NS_ConvertUCS2toUTF8 temp(aSource);
nsAString::const_iterator start, end;
CalculateUTF8Size calculator;
copy_string(aSource.BeginReading(start), aSource.EndReading(end),
calculator);
char* result;
if (temp.GetOwnsBuffer()) {
// We allocated. Trick the string into not freeing its buffer to
// avoid an extra allocation.
result = temp.mStr;
char *result = NS_STATIC_CAST(char*,
nsMemory::Alloc(calculator.Size() + 1));
temp.mStr=0;
temp.SetOwnsBuffer(PR_FALSE);
}
else {
// We didn't allocate a buffer, so we need to copy it out of the
// nsCAutoString's storage.
result = ToNewCString(temp);
}
ConvertUCS2toUTF8 converter(result);
copy_string(aSource.BeginReading(start), aSource.EndReading(end),
converter).write_terminator();
NS_ASSERTION(calculator.Size() == converter.Size(), "length mismatch");
return result;
}
@ -285,7 +279,7 @@ UTF8ToNewUnicode( const nsACString& aSource )
ConvertUTF8toUCS2 converter(result);
copy_string(aSource.BeginReading(start), aSource.EndReading(end),
converter);
converter).write_terminator();
NS_ASSERTION(calculator.Length() == converter.Length(), "length mismatch");
return result;