mirror of
https://github.com/mozilla/gecko-dev.git
synced 2024-11-26 06:11:37 +00:00
bug 183156 : replace UCS2 in function/method names with UTF16 and update the
document accordingly. r=jag, sr=alecf
This commit is contained in:
parent
06645ab757
commit
b33261e746
@ -516,9 +516,10 @@ foo::GetShortName( nsAString& aResult ) const
|
||||
If your string happens to be wide,
|
||||
you'll need to convert it before you can <span class="code">printf</span> something reasonable.
|
||||
If it's just for debugging,
|
||||
you probably wouldn't care if something odd was printed in the case of a UCS2 character that didn't have
|
||||
an ASCII equivalent.
|
||||
The simplest thing in this case is to make a temporary conversion using <span class="code">NS_ConvertUCS2toUTF8</span>.
|
||||
you probably wouldn't care if something odd was printed in the case of a Unicode character that didn't have
|
||||
an ASCII equivalent. (If you have a UTF-8 terminal, the result is
|
||||
perfectly legible and nothing odd is printed.)
|
||||
The simplest thing in this case is to make a temporary conversion using <span class="code">NS_ConvertUTF16toUTF8</span>.
|
||||
The result is conveniently flat already, so getting the pointer is simple.
|
||||
Remember not to hold onto the pointer you get out of this beyond the lifetime of temporary.
|
||||
</dd>
|
||||
@ -534,14 +535,14 @@ void PrintSomeStrings( const nsAString& aString, const PRUnichar* aKey, const ns
|
||||
printf("%s\n", <span class="notice">PromiseFlatCString(</span>aCString<span class="notice">).get()</span>); // GOOD
|
||||
|
||||
// the simplest way to get a |printf|-able |const char*| out of a string
|
||||
printf("%s\n", <span class="notice">NS_ConvertUCS2toUTF8(</span>aKey<span class="notice">).get()</span>); // GOOD
|
||||
printf("%s\n", <span class="notice">NS_ConvertUTF16toUTF8(</span>aKey<span class="notice">).get()</span>); // GOOD
|
||||
|
||||
// works just as well with an formal wide string type...
|
||||
printf("%s\n", <span class="notice">NS_ConvertUCS2toUTF8(</span>aString<span class="notice">).get()</span>);
|
||||
printf("%s\n", <span class="notice">NS_ConvertUTF16toUTF8(</span>aString<span class="notice">).get()</span>);
|
||||
|
||||
|
||||
// But don't hold onto the pointer longer than the lifetime of the temporary!
|
||||
<span class="warning">const char* cstring = NS_ConvertUCS2toUTF8(aKey).get(); // BAD! |cstring| is dangling
|
||||
<span class="warning">const char* cstring = NS_ConvertUTF16toUTF8(aKey).get(); // BAD! |cstring| is dangling
|
||||
printf("%s\n", cstring);</span>
|
||||
}
|
||||
</pre>
|
||||
@ -555,6 +556,15 @@ void PrintSomeStrings( const nsAString& aString, const PRUnichar* aKey, const ns
|
||||
Some of the URLs may be out-dated or moved.
|
||||
The messages are in order from oldest to newest.
|
||||
</p>
|
||||
<p class="editnote">[Note : In June, 2003, these emails were modified
|
||||
to better reflect what is stored in 'wide' string
|
||||
classes (UTF-16 string instead of UCS-2) and what
|
||||
related methods do as a part of the patch for <a href=
|
||||
"http://bugzilla.mozilla.org/show_bug.cgi?id=183156"
|
||||
title="replace UCS2 in function/class/method names with UTF16">bug 183156</a>.
|
||||
Therefore, they're a little different from the original emails
|
||||
written by <a href="http://ScottCollins.net/">Scott Collins</a>]
|
||||
</p>
|
||||
<hr>
|
||||
<pre>
|
||||
Date: Thu, 13 Apr 2000 19:41:47 -0400
|
||||
@ -570,19 +580,25 @@ rambling, and for the fact that this message may accidentally mix
|
||||
discussion of how things <strong>are</strong> and how they will be.
|
||||
|
||||
<p>There are many different possible encodings. Three in common use in
|
||||
the Mozilla source base are: ASCII, UCS2, and UTF8. In ASCII, every
|
||||
the Mozilla source base are: ASCII, UTF-16, and UTF-8. In ASCII, every
|
||||
<!--the Mozilla source base are: ASCII, UCS2, and UTF8. In ASCII, every-->
|
||||
character fits in 7-bits and is typically stored in an 8-bit byte. We
|
||||
usually represent ASCII strings with <span class="code">nsCString</span>s, <span class="code">nsXPIDLCString</span>s,
|
||||
or <span class="code">char</span> string literals. In UCS2, characters occupy 16 bits each.
|
||||
We usually represent UCS2 strings as <span class="code">nsString</span>s, etc., i.e., two-byte
|
||||
or `wide' strings. UTF8 is a multi-byte encoding. A character might
|
||||
occupy one, two, or three bytes. It is easiest to store and
|
||||
or <span class="code">char</span> string literals. In UTF-16, characters occupy one 16-bit code unit (
|
||||
<a href="http://www.unicode.org/glossary/index.html#BMP_character">
|
||||
<abbr title="Basic Multilingual Plane">BMP</abbr>characters</a>)
|
||||
or two 16-bit code units
|
||||
(<a href="http://www.unicode.org/glossary/index.html#supplementary_character">
|
||||
<abbr title="Supplementary Plane : Plane 1 through 16">non-BMP</abbr> characters</a>).
|
||||
We usually represent UTF-16 strings as <span class="code">nsString</span>s, etc., i.e., two-byte
|
||||
or `wide' strings. UTF-8 is a multi-byte encoding. A character might
|
||||
occupy one, two, three, or four bytes. It is easiest to store and
|
||||
manipulate such a string within a single-byte or `narrow' string
|
||||
implementation.
|
||||
|
||||
<p>None of our current string implementations know the encoding of the
|
||||
data they hold at any given moment. An <span class="code">nsCString</span> might legitimately
|
||||
hold data encoded in ASCII, UTF8, or even EBCDIC for that matter.
|
||||
hold data encoded in ASCII, UTF-8 or even EBCDIC for that matter.
|
||||
|
||||
<p>Operations that convert from one encoding to another, or operations
|
||||
that are encoding sensitive (e.g., <span class="code">to_upper</span>), rightly belong in
|
||||
@ -590,7 +606,7 @@ i18n. The fact that our current string interfaces automatically and
|
||||
implicitly convert between wide and narrow strings is actually the
|
||||
source of many errors in two particular categories: (1) unintended
|
||||
extra work, (2) mistaken re-encoding, e.g., accidentally `converting'
|
||||
a UTF8 string to UCS2 by pretending the UTF8 string is ASCII and then
|
||||
a UTF-8 string to UTF-16 by pretending the UTF-8 string is ASCII and then
|
||||
padding with <span class="code">'\0'</span>s.
|
||||
|
||||
<p>We've known these were bad for a long time, and have been trying to
|
||||
@ -600,7 +616,7 @@ ramifications.
|
||||
|
||||
<div class="source-code">
|
||||
<pre>
|
||||
void foo( const nsString& aUCS2string );
|
||||
void foo( const nsString& aUTF16string );
|
||||
|
||||
foo("hello"); // works! constructs a temporary |nsString| by
|
||||
// converting the ASCII literal with padding.
|
||||
@ -620,13 +636,13 @@ foo( nsAutoString("hello") );
|
||||
<p>which still copy/converts, but at least it probably doesn't need to do
|
||||
a heap allocation. In the best of all worlds, no conversion, copying,
|
||||
or allocation would be necessary. To do that, you would need to be
|
||||
able to directly specify a UCS2 string, e.g., with the <span class="code">L"hello"</span>
|
||||
able to directly specify a UTF-16 string, e.g., with the <span class="code">L"hello"</span>
|
||||
notation, and wrap that in an interface that just held a pointer.
|
||||
E.g., something like
|
||||
|
||||
<div class="source-code">
|
||||
<pre>
|
||||
void foo( const nsAReadableString& aUCS2string );
|
||||
void foo( const nsAReadableString& aUTF16string );
|
||||
|
||||
foo( nsLiteralString(L"hello") );
|
||||
</pre>
|
||||
@ -675,10 +691,10 @@ class that derives from <span class="code">nsAutoString</span>, but allows const
|
||||
|
||||
<div class="source-code">
|
||||
<pre>
|
||||
class NS_ConvertASCIItoUCS2 : public nsAutoString
|
||||
class NS_ConvertASCIItoUTF16 : public nsAutoString
|
||||
{
|
||||
public:
|
||||
NS_ConvertASCIItoUCS2( const char* );
|
||||
NS_ConvertASCIItoUTF16( const char* );
|
||||
// ...
|
||||
};
|
||||
</pre>
|
||||
@ -688,7 +704,7 @@ class NS_ConvertASCIItoUCS2 : public nsAutoString
|
||||
|
||||
<div class="source-code">
|
||||
<pre>
|
||||
foo( NS_ConvertASCIItoUCS2("hello") );
|
||||
foo( NS_ConvertASCIItoUTF16("hello") );
|
||||
</pre>
|
||||
</div>
|
||||
|
||||
@ -697,8 +713,8 @@ acts like a function call to an explicit encoding conversion. It <strong>is</st
|
||||
a function call to an explicit encoding conversion. We think that
|
||||
this naming pattern has room for growth. In the meeting, we concluded
|
||||
that the best representation for encoding conversions is a family of
|
||||
functions, and <span class="code">NS_ConvertASCIItoUCS2</span> fits right in. We think that
|
||||
XPCOM probably can't live without the ASCII to UCS2 conversion (though
|
||||
functions, and <span class="code">NS_ConvertASCIItoUTF16</span> fits right in. We think that
|
||||
XPCOM probably can't live without the ASCII to UTF-16 conversion (though
|
||||
as explicit as possible) but that all others rightly belong in i18n
|
||||
land.
|
||||
|
||||
@ -710,19 +726,19 @@ the `WithConversion' form must be used. E.g.,
|
||||
|
||||
<div class="source-code">
|
||||
<pre>
|
||||
nsString aUCS2string;
|
||||
nsString aUTF16string;
|
||||
nsCString anASCIIstring;
|
||||
// ...
|
||||
|
||||
aUCS2string += anASCIIstring; // Currently legal, but not for long
|
||||
aUCS2string.Append(anASCIIstring); // same
|
||||
aUTF16string += anASCIIstring; // Currently legal, but not for long
|
||||
aUTF16string.Append(anASCIIstring); // same
|
||||
|
||||
aUCS2string.AppendWithConversion(anASCIIstring); // the new way
|
||||
aUTF16string.AppendWithConversion(anASCIIstring); // the new way
|
||||
|
||||
if ( aUCS2string == anASCIIstring ) // Sorry, this is going away too
|
||||
if ( aUTF16string == anASCIIstring ) // Sorry, this is going away too
|
||||
// ...
|
||||
|
||||
if ( aUCS2string.EqualsWithConversion(anASCIIstring) )
|
||||
if ( aUTF16string.EqualsWithConversion(anASCIIstring) )
|
||||
// ...
|
||||
</pre>
|
||||
</div>
|
||||
@ -747,8 +763,8 @@ unrelated to encoding issues, so I'll defer it to another post.
|
||||
|
||||
<div class="source-code">
|
||||
<pre>
|
||||
xxxConvertingASCIItoUCS2
|
||||
xxxConvertingUCS2toASCII
|
||||
xxxConvertingASCIItoUTF16
|
||||
xxxConvertingUTF16toASCII
|
||||
</pre>
|
||||
</div>
|
||||
|
||||
@ -781,7 +797,7 @@ appealing, but more likely to work, like
|
||||
|
||||
<div class="source-code">
|
||||
<pre>
|
||||
NS_ConvertASCIItoUCS2("Hello")
|
||||
NS_ConvertASCIItoUTF16("Hello")
|
||||
</pre>
|
||||
</div>
|
||||
|
||||
@ -800,7 +816,7 @@ often we are converting constant literal strings, and why.
|
||||
`WithConversion' forms where appropriate. I was also converting
|
||||
things to use <span class="code">NS_ConvertToString</span> where appropriate; unless I get
|
||||
talked out of it, I want to switch midstream to
|
||||
<span class="code">NS_ConvertASCIItoUCS2</span>, then go back and fix up the
|
||||
<span class="code">NS_ConvertASCIItoUTF16</span>, then go back and fix up the
|
||||
<span class="code">NS_ConvertToString</span> instances later. I've set things up so I can
|
||||
check in as I go. After all these conversions have been done, I'll be
|
||||
able to throw the switch (what switch? NEW_STRING_APIS) which will
|
||||
@ -815,8 +831,8 @@ reasoning.)
|
||||
<ul>
|
||||
<li>how really annoying this whole topic is
|
||||
<li>how bad <span class="code">L"xxx"</span> is
|
||||
<li>whether to move forward with <span class="code">NS_ConvertASCIItoUCS2</span>
|
||||
<li>whether we should move to xxxConvertingASCIItoUCS2 etc instead
|
||||
<li>whether to move forward with <span class="code">NS_ConvertASCIItoUTF16</span>
|
||||
<li>whether we should move to xxxConvertingASCIItoUTF16 etc instead
|
||||
of `WithConverting'
|
||||
<li>arguments about where encoding conversions should live
|
||||
<li>arguments about whether going between 1 and 2 byte storage is an
|
||||
@ -908,7 +924,7 @@ standard as we move forward.
|
||||
#define NS_LITERAL_STRING(s) nsLiteralString(L##s, \
|
||||
(sizeof(L##s)/sizeof(wchar_t))-1)
|
||||
#else
|
||||
#define NS_LITERAL_STRING(s) NS_ConvertASCIItoUCS2(s, \
|
||||
#define NS_LITERAL_STRING(s) NS_ConvertASCIItoUTF16(s, \
|
||||
sizeof(s)-1)
|
||||
#endif
|
||||
</pre>
|
||||
@ -1045,7 +1061,7 @@ example I gave above, that is, the one with <span class="code">AssignWithConvers
|
||||
|
||||
<p><span class="code">Assign</span> still exists. <span class="code">AssignWithConversion</span> takes on that
|
||||
functionality for assignments that require encoding transformations
|
||||
(e.g., from ASCII to UCS2). <span class="code">SetString</span> is gone, since it was always
|
||||
(e.g., from ASCII to UTF16). <span class="code">SetString</span> is gone, since it was always
|
||||
a synonym for <span class="code">Assign</span>.
|
||||
|
||||
<p>Learn more about the general APIs for strings that we are trying to
|
||||
@ -1263,7 +1279,7 @@ strings semantics
|
||||
<p>In a later message, Chris Waterson asks a related question
|
||||
<pre class="email-quote">
|
||||
>scc: should we add <span class="code">operator PRUnichar*()</span> to
|
||||
>NS_ConvertASCIItoUCS2?
|
||||
>NS_ConvertASCIItoUTF16?
|
||||
</pre>
|
||||
|
||||
<p>And I reply:
|
||||
@ -1999,7 +2015,7 @@ Subject: Re: how to free an nsString::ToNewCString
|
||||
|
||||
<hr>
|
||||
|
||||
<p>You use several <span class="code">NS_ConvertASCIItoUCS2("...").get()</span>, these should be
|
||||
<p>You use several <span class="code">NS_ConvertASCIItoUTF16("...").get()</span>, these should be
|
||||
|
||||
NS_LITERAL_STRING("...").get()
|
||||
|
||||
@ -2037,7 +2053,7 @@ DoSomething( nsAWritableString& answer )
|
||||
if ( localFile )
|
||||
{
|
||||
|
||||
localFile->SetPersistentDescriptor(NS_ConvertUCS2toUTF8(path));
|
||||
localFile->SetPersistentDescriptor(NS_ConvertUTF16toUTF8(path));
|
||||
|
||||
nsXPIDLString converted_path;
|
||||
localFile->GetUnicodePath(getter_Copies(converted_path));
|
||||
|
@ -1087,7 +1087,7 @@ PRBool nsCString::EqualsWithConversion(const char* aCString,PRBool aIgnoreCase,P
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
|
||||
NS_ConvertUCS2toUTF8::NS_ConvertUCS2toUTF8( const PRUnichar* aString )
|
||||
NS_ConvertUTF16toUTF8::NS_ConvertUTF16toUTF8( const PRUnichar* aString )
|
||||
{
|
||||
if (!aString)
|
||||
// Leave us as an uninitialized nsCAutoString.
|
||||
@ -1095,7 +1095,7 @@ NS_ConvertUCS2toUTF8::NS_ConvertUCS2toUTF8( const PRUnichar* aString )
|
||||
Init(aString, nsCharTraits<PRUnichar>::length(aString));
|
||||
}
|
||||
|
||||
NS_ConvertUCS2toUTF8::NS_ConvertUCS2toUTF8( const PRUnichar* aString, PRUint32 aLength )
|
||||
NS_ConvertUTF16toUTF8::NS_ConvertUTF16toUTF8( const PRUnichar* aString, PRUint32 aLength )
|
||||
{
|
||||
if (!aString)
|
||||
// Leave us as an uninitialized nsCAutoString.
|
||||
@ -1103,13 +1103,13 @@ NS_ConvertUCS2toUTF8::NS_ConvertUCS2toUTF8( const PRUnichar* aString, PRUint32 a
|
||||
Init(aString, aLength);
|
||||
}
|
||||
|
||||
NS_ConvertUCS2toUTF8::NS_ConvertUCS2toUTF8( const nsASingleFragmentString& aString )
|
||||
NS_ConvertUTF16toUTF8::NS_ConvertUTF16toUTF8( const nsASingleFragmentString& aString )
|
||||
{
|
||||
nsASingleFragmentString::const_char_iterator start;
|
||||
Init(aString.BeginReading(start), aString.Length());
|
||||
}
|
||||
|
||||
NS_ConvertUCS2toUTF8::NS_ConvertUCS2toUTF8( const nsAString& aString )
|
||||
NS_ConvertUTF16toUTF8::NS_ConvertUTF16toUTF8( const nsAString& aString )
|
||||
{
|
||||
// Compute space required: do this once so we don't incur multiple
|
||||
// allocations. This "optimization" is probably of dubious value...
|
||||
@ -1128,7 +1128,7 @@ NS_ConvertUCS2toUTF8::NS_ConvertUCS2toUTF8( const nsAString& aString )
|
||||
|
||||
// All ready? Time to convert
|
||||
|
||||
ConvertUCS2toUTF8 converter(mStr);
|
||||
ConvertUTF16toUTF8 converter(mStr);
|
||||
copy_string(aString.BeginReading(start), aString.EndReading(end),
|
||||
converter).write_terminator();
|
||||
mLength = converter.Size();
|
||||
@ -1140,7 +1140,7 @@ NS_ConvertUCS2toUTF8::NS_ConvertUCS2toUTF8( const nsAString& aString )
|
||||
}
|
||||
}
|
||||
|
||||
void NS_ConvertUCS2toUTF8::Init( const PRUnichar* aString, PRUint32 aLength )
|
||||
void NS_ConvertUTF16toUTF8::Init( const PRUnichar* aString, PRUint32 aLength )
|
||||
{
|
||||
// Compute space required: do this once so we don't incur multiple
|
||||
// allocations. This "optimization" is probably of dubious value...
|
||||
@ -1157,7 +1157,7 @@ void NS_ConvertUCS2toUTF8::Init( const PRUnichar* aString, PRUint32 aLength )
|
||||
|
||||
// All ready? Time to convert
|
||||
|
||||
ConvertUCS2toUTF8 converter(mStr);
|
||||
ConvertUTF16toUTF8 converter(mStr);
|
||||
converter.write(aString, aLength);
|
||||
mLength = converter.Size();
|
||||
mStr[mLength] = char_type(0);
|
||||
@ -1169,7 +1169,7 @@ void NS_ConvertUCS2toUTF8::Init( const PRUnichar* aString, PRUint32 aLength )
|
||||
}
|
||||
}
|
||||
|
||||
NS_LossyConvertUCS2toASCII::NS_LossyConvertUCS2toASCII( const nsAString& aString )
|
||||
NS_LossyConvertUTF16toASCII::NS_LossyConvertUTF16toASCII( const nsAString& aString )
|
||||
{
|
||||
SetCapacity(aString.Length());
|
||||
|
||||
|
@ -422,32 +422,33 @@ public:
|
||||
// NS_DEF_DERIVED_STRING_OPERATOR_PLUS(nsCAutoString, char)
|
||||
|
||||
/**
|
||||
* A helper class that converts a UCS2 string to UTF8
|
||||
* A helper class that converts a UTF-16 string to UTF-8
|
||||
*/
|
||||
class NS_COM NS_ConvertUCS2toUTF8
|
||||
class NS_COM NS_ConvertUTF16toUTF8
|
||||
: public nsCAutoString
|
||||
/*
|
||||
...
|
||||
*/
|
||||
{
|
||||
public:
|
||||
explicit NS_ConvertUCS2toUTF8( const PRUnichar* aString );
|
||||
NS_ConvertUCS2toUTF8( const PRUnichar* aString, PRUint32 aLength );
|
||||
explicit NS_ConvertUCS2toUTF8( const nsAString& aString );
|
||||
explicit NS_ConvertUCS2toUTF8( const nsASingleFragmentString& aString );
|
||||
explicit NS_ConvertUTF16toUTF8( const PRUnichar* aString );
|
||||
NS_ConvertUTF16toUTF8( const PRUnichar* aString, PRUint32 aLength );
|
||||
explicit NS_ConvertUTF16toUTF8( const nsAString& aString );
|
||||
explicit NS_ConvertUTF16toUTF8( const nsASingleFragmentString& aString );
|
||||
|
||||
protected:
|
||||
void Init( const PRUnichar* aString, PRUint32 aLength );
|
||||
|
||||
private:
|
||||
// NOT TO BE IMPLEMENTED
|
||||
NS_ConvertUCS2toUTF8( char );
|
||||
NS_ConvertUTF16toUTF8( char );
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* A helper class that converts a UCS2 string to ASCII in a lossy manner
|
||||
* A helper class that converts a UTF-16 string to ASCII in a lossy manner
|
||||
*/
|
||||
class NS_COM NS_LossyConvertUCS2toASCII
|
||||
class NS_COM NS_LossyConvertUTF16toASCII
|
||||
: public nsCAutoString
|
||||
/*
|
||||
...
|
||||
@ -455,21 +456,25 @@ class NS_COM NS_LossyConvertUCS2toASCII
|
||||
{
|
||||
public:
|
||||
explicit
|
||||
NS_LossyConvertUCS2toASCII( const PRUnichar* aString )
|
||||
NS_LossyConvertUTF16toASCII( const PRUnichar* aString )
|
||||
{
|
||||
AppendWithConversion( aString, ~PRUint32(0) /* MAXINT */);
|
||||
}
|
||||
|
||||
NS_LossyConvertUCS2toASCII( const PRUnichar* aString, PRUint32 aLength )
|
||||
NS_LossyConvertUTF16toASCII( const PRUnichar* aString, PRUint32 aLength )
|
||||
{
|
||||
AppendWithConversion( aString, aLength );
|
||||
}
|
||||
|
||||
explicit NS_LossyConvertUCS2toASCII( const nsAString& aString );
|
||||
explicit NS_LossyConvertUTF16toASCII( const nsAString& aString );
|
||||
|
||||
private:
|
||||
// NOT TO BE IMPLEMENTED
|
||||
NS_LossyConvertUCS2toASCII( char );
|
||||
NS_LossyConvertUTF16toASCII( char );
|
||||
};
|
||||
|
||||
// Backward compatibility
|
||||
typedef NS_ConvertUTF16toUTF8 NS_ConvertUCS2toUTF8;
|
||||
typedef NS_LossyConvertUTF16toASCII NS_LossyConvertUCS2toASCII;
|
||||
|
||||
#endif /* !defined(nsString_h__) */
|
||||
|
@ -1331,12 +1331,12 @@ nsAutoString::nsAutoString(const CBufDescriptor& aBuffer) : nsString() {
|
||||
}
|
||||
|
||||
void
|
||||
NS_ConvertASCIItoUCS2::Init( const char* aCString, PRUint32 aLength )
|
||||
NS_ConvertASCIItoUTF16::Init( const char* aCString, PRUint32 aLength )
|
||||
{
|
||||
AppendWithConversion(aCString,aLength);
|
||||
}
|
||||
|
||||
NS_ConvertASCIItoUCS2::NS_ConvertASCIItoUCS2( const nsACString& aCString )
|
||||
NS_ConvertASCIItoUTF16::NS_ConvertASCIItoUTF16( const nsACString& aCString )
|
||||
{
|
||||
SetCapacity(aCString.Length());
|
||||
|
||||
@ -1351,7 +1351,7 @@ NS_ConvertASCIItoUCS2::NS_ConvertASCIItoUCS2( const nsACString& aCString )
|
||||
}
|
||||
}
|
||||
|
||||
NS_ConvertUTF8toUCS2::NS_ConvertUTF8toUCS2( const nsACString& aCString )
|
||||
NS_ConvertUTF8toUTF16::NS_ConvertUTF8toUTF16( const nsACString& aCString )
|
||||
{
|
||||
// Compute space required: do this once so we don't incur multiple
|
||||
// allocations. This "optimization" is probably of dubious value...
|
||||
@ -1370,36 +1370,36 @@ NS_ConvertUTF8toUCS2::NS_ConvertUTF8toUCS2( const nsACString& aCString )
|
||||
|
||||
// All ready? Time to convert
|
||||
|
||||
ConvertUTF8toUCS2 converter(mUStr);
|
||||
ConvertUTF8toUTF16 converter(mUStr);
|
||||
copy_string(aCString.BeginReading(start), aCString.EndReading(end),
|
||||
converter).write_terminator();
|
||||
mLength = converter.Length();
|
||||
if (mLength != count)
|
||||
{
|
||||
NS_ERROR("Input wasn't UTF8 or incorrect length was calculated");
|
||||
NS_ERROR("Input wasn't UTF-8 or incorrect length was calculated");
|
||||
Truncate();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
NS_ConvertUTF8toUCS2::NS_ConvertUTF8toUCS2( const nsASingleFragmentCString& aCString )
|
||||
NS_ConvertUTF8toUTF16::NS_ConvertUTF8toUTF16( const nsASingleFragmentCString& aCString )
|
||||
{
|
||||
nsASingleFragmentCString::const_char_iterator start;
|
||||
Init(aCString.BeginReading(start), aCString.Length());
|
||||
}
|
||||
|
||||
NS_ConvertUTF8toUCS2::NS_ConvertUTF8toUCS2( const char* aCString )
|
||||
NS_ConvertUTF8toUTF16::NS_ConvertUTF8toUTF16( const char* aCString )
|
||||
{
|
||||
Init(aCString, nsCharTraits<char>::length(aCString));
|
||||
}
|
||||
|
||||
NS_ConvertUTF8toUCS2::NS_ConvertUTF8toUCS2( const char* aCString, PRUint32 aLength )
|
||||
NS_ConvertUTF8toUTF16::NS_ConvertUTF8toUTF16( const char* aCString, PRUint32 aLength )
|
||||
{
|
||||
Init(aCString, aLength);
|
||||
}
|
||||
|
||||
void
|
||||
NS_ConvertUTF8toUCS2::Init( const char* aCString, PRUint32 aLength )
|
||||
NS_ConvertUTF8toUTF16::Init( const char* aCString, PRUint32 aLength )
|
||||
{
|
||||
// Compute space required: do this once so we don't incur multiple
|
||||
// allocations. This "optimization" is probably of dubious value...
|
||||
@ -1416,7 +1416,7 @@ NS_ConvertUTF8toUCS2::Init( const char* aCString, PRUint32 aLength )
|
||||
|
||||
// All ready? Time to convert
|
||||
|
||||
ConvertUTF8toUCS2 converter(mUStr);
|
||||
ConvertUTF8toUTF16 converter(mUStr);
|
||||
converter.write(aCString, aLength);
|
||||
mLength = converter.Length();
|
||||
mUStr[mLength] = char_type(0);
|
||||
|
@ -484,7 +484,7 @@ public:
|
||||
|
||||
// NS_DEF_DERIVED_STRING_OPERATOR_PLUS(nsAutoString, PRUnichar)
|
||||
|
||||
class NS_COM NS_ConvertASCIItoUCS2
|
||||
class NS_COM NS_ConvertASCIItoUTF16
|
||||
: public nsAutoString
|
||||
/*
|
||||
...
|
||||
@ -492,21 +492,21 @@ class NS_COM NS_ConvertASCIItoUCS2
|
||||
{
|
||||
public:
|
||||
explicit
|
||||
NS_ConvertASCIItoUCS2( const nsACString& aCString );
|
||||
NS_ConvertASCIItoUTF16( const nsACString& aCString );
|
||||
|
||||
explicit
|
||||
NS_ConvertASCIItoUCS2( const nsAFlatCString& aCString )
|
||||
NS_ConvertASCIItoUTF16( const nsAFlatCString& aCString )
|
||||
{
|
||||
Init( aCString.get(), aCString.Length() );
|
||||
}
|
||||
|
||||
explicit
|
||||
NS_ConvertASCIItoUCS2( const char* aCString )
|
||||
NS_ConvertASCIItoUTF16( const char* aCString )
|
||||
{
|
||||
Init( aCString, ~PRUint32(0) /* MAXINT */ );
|
||||
}
|
||||
|
||||
NS_ConvertASCIItoUCS2( const char* aCString, PRUint32 aLength )
|
||||
NS_ConvertASCIItoUTF16( const char* aCString, PRUint32 aLength )
|
||||
{
|
||||
Init( aCString, aLength );
|
||||
}
|
||||
@ -523,23 +523,28 @@ class NS_COM NS_ConvertASCIItoUCS2
|
||||
|
||||
private:
|
||||
// NOT TO BE IMPLEMENTED
|
||||
NS_ConvertASCIItoUCS2( PRUnichar );
|
||||
NS_ConvertASCIItoUTF16( PRUnichar );
|
||||
};
|
||||
|
||||
class NS_COM NS_ConvertUTF8toUCS2
|
||||
|
||||
class NS_COM NS_ConvertUTF8toUTF16
|
||||
: public nsAutoString
|
||||
{
|
||||
public:
|
||||
explicit NS_ConvertUTF8toUCS2( const nsACString& aCString );
|
||||
explicit NS_ConvertUTF8toUCS2( const nsASingleFragmentCString& aCString );
|
||||
explicit NS_ConvertUTF8toUCS2( const char* aCString );
|
||||
NS_ConvertUTF8toUCS2( const char* aCString, PRUint32 aLength );
|
||||
explicit NS_ConvertUTF8toUTF16( const nsACString& aCString );
|
||||
explicit NS_ConvertUTF8toUTF16( const nsASingleFragmentCString& aCString );
|
||||
explicit NS_ConvertUTF8toUTF16( const char* aCString );
|
||||
NS_ConvertUTF8toUTF16( const char* aCString, PRUint32 aLength );
|
||||
|
||||
protected:
|
||||
void Init( const char* aCString, PRUint32 aLength );
|
||||
|
||||
private:
|
||||
NS_ConvertUTF8toUCS2( PRUnichar );
|
||||
NS_ConvertUTF8toUTF16( PRUnichar );
|
||||
};
|
||||
|
||||
// Backward compatibility
|
||||
typedef NS_ConvertASCIItoUTF16 NS_ConvertASCIItoUCS2;
|
||||
typedef NS_ConvertUTF8toUTF16 NS_ConvertUTF8toUCS2;
|
||||
|
||||
#endif /* !defined(nsString2_h__) */
|
||||
|
@ -69,8 +69,8 @@ literal_string( const nsACString::char_type* aPtr, PRUint32 aLength )
|
||||
#define NS_NAMED_MULTILINE_LITERAL_STRING(n,s) nsDependentString n(NS_REINTERPRET_CAST(const nsAString::char_type*, s), PRUint32((sizeof(s)/sizeof(wchar_t))-1))
|
||||
#else
|
||||
#define NS_LL(s) s
|
||||
#define NS_MULTILINE_LITERAL_STRING(s) NS_ConvertASCIItoUCS2(s, PRUint32(sizeof(s)-1))
|
||||
#define NS_NAMED_MULTILINE_LITERAL_STRING(n,s) NS_ConvertASCIItoUCS2 n(s, PRUint32(sizeof(s)-1))
|
||||
#define NS_MULTILINE_LITERAL_STRING(s) NS_ConvertASCIItoUTF16(s, PRUint32(sizeof(s)-1))
|
||||
#define NS_NAMED_MULTILINE_LITERAL_STRING(n,s) NS_ConvertASCIItoUTF16 n(s, PRUint32(sizeof(s)-1))
|
||||
#endif
|
||||
|
||||
/*
|
||||
|
@ -61,7 +61,7 @@
|
||||
* wide version of this class, since wide |printf| is not generally available. That means
|
||||
* to get a wide version of your formatted data, you must, e.g.,
|
||||
*
|
||||
* CopyASCIItoUCS2(nsPrintfCString("%f", 13.917"), myStr);
|
||||
* CopyASCIItoUTF16(nsPrintfCString("%f", 13.917"), myStr);
|
||||
*
|
||||
* That's another good reason to avoid this class for anything but numbers ... as strings can be
|
||||
* much more efficiently handled with |NS_LITERAL_[C]STRING| and |nsLiteral[C]String|.
|
||||
|
@ -45,8 +45,8 @@ class nsCString;
|
||||
NS_COM size_t Distance( const nsReadingIterator<PRUnichar>&, const nsReadingIterator<PRUnichar>& );
|
||||
NS_COM size_t Distance( const nsReadingIterator<char>&, const nsReadingIterator<char>& );
|
||||
|
||||
NS_COM void CopyUCS2toASCII( const nsAString& aSource, nsACString& aDest );
|
||||
NS_COM void CopyASCIItoUCS2( const nsACString& aSource, nsAString& aDest );
|
||||
NS_COM void CopyUTF16toASCII( const nsAString& aSource, nsACString& aDest );
|
||||
NS_COM void CopyASCIItoUTF16( const nsACString& aSource, nsAString& aDest );
|
||||
|
||||
NS_COM void CopyUTF16toUTF8( const nsAString& aSource, nsACString& aDest );
|
||||
NS_COM void CopyUTF8toUTF16( const nsACString& aSource, nsAString& aDest );
|
||||
@ -60,6 +60,14 @@ NS_COM void AppendUTF8toUTF16( const nsACString& aSource, nsAString& aDest );
|
||||
NS_COM void AppendUTF16toUTF8( const PRUnichar* aSource, nsACString& aDest );
|
||||
NS_COM void AppendUTF8toUTF16( const char* aSource, nsAString& aDest );
|
||||
|
||||
// Backward compatibility
|
||||
inline
|
||||
NS_COM void CopyUCS2toASCII( const nsAString& aSource, nsACString& aDest )
|
||||
{ CopyUTF16toASCII(aSource, aDest); }
|
||||
inline
|
||||
NS_COM void CopyASCIItoUCS2( const nsACString& aSource, nsAString& aDest )
|
||||
{ CopyASCIItoUTF16(aSource, aDest); }
|
||||
|
||||
/**
|
||||
* Returns a new |char| buffer containing a zero-terminated copy of |aSource|.
|
||||
*
|
||||
@ -88,11 +96,14 @@ NS_COM char* ToNewCString( const nsACString& aSource );
|
||||
/**
|
||||
* Returns a new |char| buffer containing a zero-terminated copy of |aSource|.
|
||||
*
|
||||
* Allocates and returns a new |char| buffer which you must free with |nsMemory::Free|.
|
||||
* Performs a encoding conversion by converting 16-bit wide characters down to UTF8 encoded 8-bits wide string copying |aSource| to your new buffer.
|
||||
* The new buffer is zero-terminated, but that may not help you if |aSource| contains embedded nulls.
|
||||
* Allocates and returns a new |char| buffer which you must free with
|
||||
* |nsMemory::Free|.
|
||||
* Performs an encoding conversion from a UTF-16 string to a UTF-8 string
|
||||
* copying |aSource| to your new buffer.
|
||||
* The new buffer is zero-terminated, but that may not help you if |aSource|
|
||||
* contains embedded nulls.
|
||||
*
|
||||
* @param aSource a 16-bit wide string
|
||||
* @param aSource a UTF-16 string (made of PRUnichar's)
|
||||
* @return a new |char| buffer you must free with |nsMemory::Free|.
|
||||
*/
|
||||
|
||||
@ -100,12 +111,15 @@ NS_COM char* ToNewUTF8String( const nsAString& aSource );
|
||||
|
||||
|
||||
/**
|
||||
* Returns a new |PRUnichar| buffer containing a zero-terminated copy of |aSource|.
|
||||
* Returns a new |PRUnichar| buffer containing a zero-terminated copy of
|
||||
* |aSource|.
|
||||
*
|
||||
* Allocates and returns a new |PRUnichar| buffer which you must free with |nsMemory::Free|.
|
||||
* The new buffer is zero-terminated, but that may not help you if |aSource| contains embedded nulls.
|
||||
* Allocates and returns a new |PRUnichar| buffer which you must free with
|
||||
* |nsMemory::Free|.
|
||||
* The new buffer is zero-terminated, but that may not help you if |aSource|
|
||||
* contains embedded nulls.
|
||||
*
|
||||
* @param aSource a 16-bit wide string
|
||||
* @param aSource a UTF-16 string
|
||||
* @return a new |PRUnichar| buffer you must free with |nsMemory::Free|.
|
||||
*/
|
||||
NS_COM PRUnichar* ToNewUnicode( const nsAString& aSource );
|
||||
@ -129,28 +143,27 @@ NS_COM PRUnichar* ToNewUnicode( const nsACString& aSource );
|
||||
* of |aSource|.
|
||||
*
|
||||
* Allocates and returns a new |char| buffer which you must free with
|
||||
* |nsMemory::Free|. Performs an encoding conversion by 0-padding
|
||||
* 8-bit wide characters up to 16-bits wide while copying |aSource| to
|
||||
* your new buffer. This conversion is not well defined; but it
|
||||
* reproduces legacy string behavior. The new buffer is
|
||||
* zero-terminated, but that may not help you if |aSource| contains
|
||||
* embedded nulls.
|
||||
* |nsMemory::Free|. Performs an encoding conversion from UTF-8 to UTF-16
|
||||
* while copying |aSource| to your new buffer. This conversion is well defined
|
||||
* for a valid UTF-8 string. The new buffer is zero-terminated, but that
|
||||
* may not help you if |aSource| contains embedded nulls.
|
||||
*
|
||||
* @param aSource an 8-bit wide string, UTF-8 encoded
|
||||
* @return a new |PRUnichar| buffer you must free with |nsMemory::Free|.
|
||||
* (UTF-16 encoded)
|
||||
*/
|
||||
NS_COM PRUnichar* UTF8ToNewUnicode( const nsACString& aSource );
|
||||
|
||||
/**
|
||||
* Copies |aLength| 16-bit characters from the start of |aSource| to the
|
||||
* Copies |aLength| 16-bit code units from the start of |aSource| to the
|
||||
* |PRUnichar| buffer |aDest|.
|
||||
*
|
||||
* After this operation |aDest| is not null terminated.
|
||||
*
|
||||
* @param aSource a 16-bit wide string
|
||||
* @param aSource a UTF-16 string
|
||||
* @param aSrcOffset start offset in the source string
|
||||
* @param aDest a |PRUnichar| buffer
|
||||
* @param aLength the number of 16-bit characters to copy
|
||||
* @param aLength the number of 16-bit code units to copy
|
||||
* @return pointer to destination buffer - identical to |aDest|
|
||||
*/
|
||||
NS_COM PRUnichar* CopyUnicodeTo( const nsAString& aSource,
|
||||
@ -208,7 +221,7 @@ NS_COM PRBool IsASCII( const nsACString& aString );
|
||||
* XXX This is not bullet-proof and nor an all-purpose UTF-8 validator.
|
||||
* It is mainly written to replace and roughly equivalent to
|
||||
*
|
||||
* str.Equals(NS_ConvertUCS2toUTF8(NS_ConvertUTF8toUCS2(str)))
|
||||
* str.Equals(NS_ConvertUTF16toUTF8(NS_ConvertUTF8toUTF16(str)))
|
||||
*
|
||||
* (see bug 191541)
|
||||
* As such, it does not check for non-UTF-8 7bit encodings such as
|
||||
|
@ -62,15 +62,15 @@ class UTF8traits
|
||||
|
||||
/**
|
||||
* A character sink (see |copy_string| in nsAlgorithm.h) for converting
|
||||
* UTF-8 to UCS2 (really UTF-16).
|
||||
* UTF-8 to UTF-16
|
||||
*/
|
||||
class ConvertUTF8toUCS2
|
||||
class ConvertUTF8toUTF16
|
||||
{
|
||||
public:
|
||||
typedef nsACString::char_type value_type;
|
||||
typedef nsAString::char_type buffer_type;
|
||||
|
||||
ConvertUTF8toUCS2( buffer_type* aBuffer )
|
||||
ConvertUTF8toUTF16( buffer_type* aBuffer )
|
||||
: mStart(aBuffer), mBuffer(aBuffer), mErrorEncountered(PR_FALSE) {}
|
||||
|
||||
size_t Length() const { return mBuffer - mStart; }
|
||||
@ -267,19 +267,19 @@ class CalculateUTF8Length
|
||||
|
||||
/**
|
||||
* A character sink (see |copy_string| in nsAlgorithm.h) for converting
|
||||
* UCS2 (really UTF-16) to UTF-8.
|
||||
* UTF-16 to UTF-8.
|
||||
*/
|
||||
class ConvertUCS2toUTF8
|
||||
class ConvertUTF16toUTF8
|
||||
{
|
||||
public:
|
||||
typedef nsAString::char_type value_type;
|
||||
typedef nsACString::char_type buffer_type;
|
||||
|
||||
// The error handling here is more lenient than that in
|
||||
// |ConvertUTF8toUCS2|, but it's that way for backwards
|
||||
// |ConvertUTF8toUTF16|, but it's that way for backwards
|
||||
// compatibility.
|
||||
|
||||
ConvertUCS2toUTF8( buffer_type* aBuffer )
|
||||
ConvertUTF16toUTF8( buffer_type* aBuffer )
|
||||
: mStart(aBuffer), mBuffer(aBuffer) {}
|
||||
|
||||
size_t Size() const { return mBuffer - mStart; }
|
||||
@ -363,7 +363,7 @@ class ConvertUCS2toUTF8
|
||||
|
||||
/**
|
||||
* A character sink (see |copy_string| in nsAlgorithm.h) for computing
|
||||
* the number of bytes a UCS2 (really UTF-16) would occupy in UTF-8.
|
||||
* the number of bytes a UTF-16 would occupy in UTF-8.
|
||||
*/
|
||||
class CalculateUTF8Size
|
||||
{
|
||||
|
@ -148,7 +148,7 @@ class LossyConvertEncoding
|
||||
|
||||
NS_COM
|
||||
void
|
||||
CopyUCS2toASCII( const nsAString& aSource, nsACString& aDest )
|
||||
CopyUTF16toASCII( const nsAString& aSource, nsACString& aDest )
|
||||
{
|
||||
// right now, this won't work on multi-fragment destinations
|
||||
aDest.SetLength(aSource.Length());
|
||||
@ -163,7 +163,7 @@ CopyUCS2toASCII( const nsAString& aSource, nsACString& aDest )
|
||||
|
||||
NS_COM
|
||||
void
|
||||
CopyASCIItoUCS2( const nsACString& aSource, nsAString& aDest )
|
||||
CopyASCIItoUTF16( const nsACString& aSource, nsAString& aDest )
|
||||
{
|
||||
// right now, this won't work on multi-fragment destinations
|
||||
aDest.SetLength(aSource.Length());
|
||||
@ -239,7 +239,7 @@ AppendUTF16toUTF8( const nsAString& aSource, nsACString& aDest )
|
||||
|
||||
// All ready? Time to convert
|
||||
|
||||
ConvertUCS2toUTF8 converter(dest.get());
|
||||
ConvertUTF16toUTF8 converter(dest.get());
|
||||
copy_string(aSource.BeginReading(source_start),
|
||||
aSource.EndReading(source_end), converter);
|
||||
|
||||
@ -258,7 +258,7 @@ AppendUTF16toUTF8( const nsAString& aSource, nsACString& aDest )
|
||||
// rare situation.
|
||||
|
||||
aDest.Replace(old_dest_length, count,
|
||||
NS_ConvertUCS2toUTF8(aSource));
|
||||
NS_ConvertUTF16toUTF8(aSource));
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -294,7 +294,7 @@ AppendUTF8toUTF16( const nsACString& aSource, nsAString& aDest )
|
||||
|
||||
// All ready? Time to convert
|
||||
|
||||
ConvertUTF8toUCS2 converter(dest.get());
|
||||
ConvertUTF8toUTF16 converter(dest.get());
|
||||
copy_string(aSource.BeginReading(source_start),
|
||||
aSource.EndReading(source_end), converter);
|
||||
|
||||
@ -312,7 +312,7 @@ AppendUTF8toUTF16( const nsACString& aSource, nsAString& aDest )
|
||||
// this rare situation.
|
||||
|
||||
aDest.Replace(old_dest_length, count,
|
||||
NS_ConvertUTF8toUCS2(aSource));
|
||||
NS_ConvertUTF8toUTF16(aSource));
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -376,7 +376,7 @@ ToNewUTF8String( const nsAString& aSource )
|
||||
char *result = NS_STATIC_CAST(char*,
|
||||
nsMemory::Alloc(calculator.Size() + 1));
|
||||
|
||||
ConvertUCS2toUTF8 converter(result);
|
||||
ConvertUTF16toUTF8 converter(result);
|
||||
copy_string(aSource.BeginReading(start), aSource.EndReading(end),
|
||||
converter).write_terminator();
|
||||
NS_ASSERTION(calculator.Size() == converter.Size(), "length mismatch");
|
||||
@ -436,7 +436,7 @@ UTF8ToNewUnicode( const nsACString& aSource )
|
||||
PRUnichar *result = NS_STATIC_CAST(PRUnichar*,
|
||||
nsMemory::Alloc(sizeof(PRUnichar) * (calculator.Length() + 1)));
|
||||
|
||||
ConvertUTF8toUCS2 converter(result);
|
||||
ConvertUTF8toUTF16 converter(result);
|
||||
copy_string(aSource.BeginReading(start), aSource.EndReading(end),
|
||||
converter).write_terminator();
|
||||
NS_ASSERTION(calculator.Length() == converter.Length(), "length mismatch");
|
||||
@ -616,7 +616,7 @@ IsUTF8( const nsACString& aString )
|
||||
}
|
||||
}
|
||||
else
|
||||
return PR_FALSE; // Not UTF8 string
|
||||
return PR_FALSE; // Not UTF-8 string
|
||||
}
|
||||
|
||||
while (ptr < fragmentEnd && state)
|
||||
@ -632,7 +632,7 @@ IsUTF8( const nsACString& aString )
|
||||
|
||||
if ( !UTF8traits::isInSeq(c) || overlong && c <= olupper ||
|
||||
surrogate && slower <= c || nonchar && !state )
|
||||
return PR_FALSE; // Not UTF8 string
|
||||
return PR_FALSE; // Not UTF-8 string
|
||||
overlong = surrogate = PR_FALSE;
|
||||
}
|
||||
}
|
||||
|
@ -339,7 +339,7 @@ PRInt32 UTF8InputStream::Fill(nsresult * aErrorCode)
|
||||
"Ouch. I would overflow my buffer if I wasn't so careful.");
|
||||
if (PRInt32(dstLen) > mUnicharData->GetBufferSize()) return 0;
|
||||
|
||||
ConvertUTF8toUCS2 converter(mUnicharData->GetBuffer());
|
||||
ConvertUTF8toUTF16 converter(mUnicharData->GetBuffer());
|
||||
|
||||
nsASingleFragmentCString::const_char_iterator start = mByteData->GetBuffer();
|
||||
nsASingleFragmentCString::const_char_iterator end = mByteData->GetBuffer() + srcLen;
|
||||
|
@ -516,9 +516,10 @@ foo::GetShortName( nsAString& aResult ) const
|
||||
If your string happens to be wide,
|
||||
you'll need to convert it before you can <span class="code">printf</span> something reasonable.
|
||||
If it's just for debugging,
|
||||
you probably wouldn't care if something odd was printed in the case of a UCS2 character that didn't have
|
||||
an ASCII equivalent.
|
||||
The simplest thing in this case is to make a temporary conversion using <span class="code">NS_ConvertUCS2toUTF8</span>.
|
||||
you probably wouldn't care if something odd was printed in the case of a Unicode character that didn't have
|
||||
an ASCII equivalent. (If you have a UTF-8 terminal, the result is
|
||||
perfectly legible and nothing odd is printed.)
|
||||
The simplest thing in this case is to make a temporary conversion using <span class="code">NS_ConvertUTF16toUTF8</span>.
|
||||
The result is conveniently flat already, so getting the pointer is simple.
|
||||
Remember not to hold onto the pointer you get out of this beyond the lifetime of temporary.
|
||||
</dd>
|
||||
@ -534,14 +535,14 @@ void PrintSomeStrings( const nsAString& aString, const PRUnichar* aKey, const ns
|
||||
printf("%s\n", <span class="notice">PromiseFlatCString(</span>aCString<span class="notice">).get()</span>); // GOOD
|
||||
|
||||
// the simplest way to get a |printf|-able |const char*| out of a string
|
||||
printf("%s\n", <span class="notice">NS_ConvertUCS2toUTF8(</span>aKey<span class="notice">).get()</span>); // GOOD
|
||||
printf("%s\n", <span class="notice">NS_ConvertUTF16toUTF8(</span>aKey<span class="notice">).get()</span>); // GOOD
|
||||
|
||||
// works just as well with an formal wide string type...
|
||||
printf("%s\n", <span class="notice">NS_ConvertUCS2toUTF8(</span>aString<span class="notice">).get()</span>);
|
||||
printf("%s\n", <span class="notice">NS_ConvertUTF16toUTF8(</span>aString<span class="notice">).get()</span>);
|
||||
|
||||
|
||||
// But don't hold onto the pointer longer than the lifetime of the temporary!
|
||||
<span class="warning">const char* cstring = NS_ConvertUCS2toUTF8(aKey).get(); // BAD! |cstring| is dangling
|
||||
<span class="warning">const char* cstring = NS_ConvertUTF16toUTF8(aKey).get(); // BAD! |cstring| is dangling
|
||||
printf("%s\n", cstring);</span>
|
||||
}
|
||||
</pre>
|
||||
@ -555,6 +556,15 @@ void PrintSomeStrings( const nsAString& aString, const PRUnichar* aKey, const ns
|
||||
Some of the URLs may be out-dated or moved.
|
||||
The messages are in order from oldest to newest.
|
||||
</p>
|
||||
<p class="editnote">[Note : In June, 2003, these emails were modified
|
||||
to better reflect what is stored in 'wide' string
|
||||
classes (UTF-16 string instead of UCS-2) and what
|
||||
related methods do as a part of the patch for <a href=
|
||||
"http://bugzilla.mozilla.org/show_bug.cgi?id=183156"
|
||||
title="replace UCS2 in function/class/method names with UTF16">bug 183156</a>.
|
||||
Therefore, they're a little different from the original emails
|
||||
written by <a href="http://ScottCollins.net/">Scott Collins</a>]
|
||||
</p>
|
||||
<hr>
|
||||
<pre>
|
||||
Date: Thu, 13 Apr 2000 19:41:47 -0400
|
||||
@ -570,19 +580,25 @@ rambling, and for the fact that this message may accidentally mix
|
||||
discussion of how things <strong>are</strong> and how they will be.
|
||||
|
||||
<p>There are many different possible encodings. Three in common use in
|
||||
the Mozilla source base are: ASCII, UCS2, and UTF8. In ASCII, every
|
||||
the Mozilla source base are: ASCII, UTF-16, and UTF-8. In ASCII, every
|
||||
<!--the Mozilla source base are: ASCII, UCS2, and UTF8. In ASCII, every-->
|
||||
character fits in 7-bits and is typically stored in an 8-bit byte. We
|
||||
usually represent ASCII strings with <span class="code">nsCString</span>s, <span class="code">nsXPIDLCString</span>s,
|
||||
or <span class="code">char</span> string literals. In UCS2, characters occupy 16 bits each.
|
||||
We usually represent UCS2 strings as <span class="code">nsString</span>s, etc., i.e., two-byte
|
||||
or `wide' strings. UTF8 is a multi-byte encoding. A character might
|
||||
occupy one, two, or three bytes. It is easiest to store and
|
||||
or <span class="code">char</span> string literals. In UTF-16, characters occupy one 16-bit code unit (
|
||||
<a href="http://www.unicode.org/glossary/index.html#BMP_character">
|
||||
<abbr title="Basic Multilingual Plane">BMP</abbr>characters</a>)
|
||||
or two 16-bit code units
|
||||
(<a href="http://www.unicode.org/glossary/index.html#supplementary_character">
|
||||
<abbr title="Supplementary Plane : Plane 1 through 16">non-BMP</abbr> characters</a>).
|
||||
We usually represent UTF-16 strings as <span class="code">nsString</span>s, etc., i.e., two-byte
|
||||
or `wide' strings. UTF-8 is a multi-byte encoding. A character might
|
||||
occupy one, two, three, or four bytes. It is easiest to store and
|
||||
manipulate such a string within a single-byte or `narrow' string
|
||||
implementation.
|
||||
|
||||
<p>None of our current string implementations know the encoding of the
|
||||
data they hold at any given moment. An <span class="code">nsCString</span> might legitimately
|
||||
hold data encoded in ASCII, UTF8, or even EBCDIC for that matter.
|
||||
hold data encoded in ASCII, UTF-8 or even EBCDIC for that matter.
|
||||
|
||||
<p>Operations that convert from one encoding to another, or operations
|
||||
that are encoding sensitive (e.g., <span class="code">to_upper</span>), rightly belong in
|
||||
@ -590,7 +606,7 @@ i18n. The fact that our current string interfaces automatically and
|
||||
implicitly convert between wide and narrow strings is actually the
|
||||
source of many errors in two particular categories: (1) unintended
|
||||
extra work, (2) mistaken re-encoding, e.g., accidentally `converting'
|
||||
a UTF8 string to UCS2 by pretending the UTF8 string is ASCII and then
|
||||
a UTF-8 string to UTF-16 by pretending the UTF-8 string is ASCII and then
|
||||
padding with <span class="code">'\0'</span>s.
|
||||
|
||||
<p>We've known these were bad for a long time, and have been trying to
|
||||
@ -600,7 +616,7 @@ ramifications.
|
||||
|
||||
<div class="source-code">
|
||||
<pre>
|
||||
void foo( const nsString& aUCS2string );
|
||||
void foo( const nsString& aUTF16string );
|
||||
|
||||
foo("hello"); // works! constructs a temporary |nsString| by
|
||||
// converting the ASCII literal with padding.
|
||||
@ -620,13 +636,13 @@ foo( nsAutoString("hello") );
|
||||
<p>which still copy/converts, but at least it probably doesn't need to do
|
||||
a heap allocation. In the best of all worlds, no conversion, copying,
|
||||
or allocation would be necessary. To do that, you would need to be
|
||||
able to directly specify a UCS2 string, e.g., with the <span class="code">L"hello"</span>
|
||||
able to directly specify a UTF-16 string, e.g., with the <span class="code">L"hello"</span>
|
||||
notation, and wrap that in an interface that just held a pointer.
|
||||
E.g., something like
|
||||
|
||||
<div class="source-code">
|
||||
<pre>
|
||||
void foo( const nsAReadableString& aUCS2string );
|
||||
void foo( const nsAReadableString& aUTF16string );
|
||||
|
||||
foo( nsLiteralString(L"hello") );
|
||||
</pre>
|
||||
@ -675,10 +691,10 @@ class that derives from <span class="code">nsAutoString</span>, but allows const
|
||||
|
||||
<div class="source-code">
|
||||
<pre>
|
||||
class NS_ConvertASCIItoUCS2 : public nsAutoString
|
||||
class NS_ConvertASCIItoUTF16 : public nsAutoString
|
||||
{
|
||||
public:
|
||||
NS_ConvertASCIItoUCS2( const char* );
|
||||
NS_ConvertASCIItoUTF16( const char* );
|
||||
// ...
|
||||
};
|
||||
</pre>
|
||||
@ -688,7 +704,7 @@ class NS_ConvertASCIItoUCS2 : public nsAutoString
|
||||
|
||||
<div class="source-code">
|
||||
<pre>
|
||||
foo( NS_ConvertASCIItoUCS2("hello") );
|
||||
foo( NS_ConvertASCIItoUTF16("hello") );
|
||||
</pre>
|
||||
</div>
|
||||
|
||||
@ -697,8 +713,8 @@ acts like a function call to an explicit encoding conversion. It <strong>is</st
|
||||
a function call to an explicit encoding conversion. We think that
|
||||
this naming pattern has room for growth. In the meeting, we concluded
|
||||
that the best representation for encoding conversions is a family of
|
||||
functions, and <span class="code">NS_ConvertASCIItoUCS2</span> fits right in. We think that
|
||||
XPCOM probably can't live without the ASCII to UCS2 conversion (though
|
||||
functions, and <span class="code">NS_ConvertASCIItoUTF16</span> fits right in. We think that
|
||||
XPCOM probably can't live without the ASCII to UTF-16 conversion (though
|
||||
as explicit as possible) but that all others rightly belong in i18n
|
||||
land.
|
||||
|
||||
@ -710,19 +726,19 @@ the `WithConversion' form must be used. E.g.,
|
||||
|
||||
<div class="source-code">
|
||||
<pre>
|
||||
nsString aUCS2string;
|
||||
nsString aUTF16string;
|
||||
nsCString anASCIIstring;
|
||||
// ...
|
||||
|
||||
aUCS2string += anASCIIstring; // Currently legal, but not for long
|
||||
aUCS2string.Append(anASCIIstring); // same
|
||||
aUTF16string += anASCIIstring; // Currently legal, but not for long
|
||||
aUTF16string.Append(anASCIIstring); // same
|
||||
|
||||
aUCS2string.AppendWithConversion(anASCIIstring); // the new way
|
||||
aUTF16string.AppendWithConversion(anASCIIstring); // the new way
|
||||
|
||||
if ( aUCS2string == anASCIIstring ) // Sorry, this is going away too
|
||||
if ( aUTF16string == anASCIIstring ) // Sorry, this is going away too
|
||||
// ...
|
||||
|
||||
if ( aUCS2string.EqualsWithConversion(anASCIIstring) )
|
||||
if ( aUTF16string.EqualsWithConversion(anASCIIstring) )
|
||||
// ...
|
||||
</pre>
|
||||
</div>
|
||||
@ -747,8 +763,8 @@ unrelated to encoding issues, so I'll defer it to another post.
|
||||
|
||||
<div class="source-code">
|
||||
<pre>
|
||||
xxxConvertingASCIItoUCS2
|
||||
xxxConvertingUCS2toASCII
|
||||
xxxConvertingASCIItoUTF16
|
||||
xxxConvertingUTF16toASCII
|
||||
</pre>
|
||||
</div>
|
||||
|
||||
@ -781,7 +797,7 @@ appealing, but more likely to work, like
|
||||
|
||||
<div class="source-code">
|
||||
<pre>
|
||||
NS_ConvertASCIItoUCS2("Hello")
|
||||
NS_ConvertASCIItoUTF16("Hello")
|
||||
</pre>
|
||||
</div>
|
||||
|
||||
@ -800,7 +816,7 @@ often we are converting constant literal strings, and why.
|
||||
`WithConversion' forms where appropriate. I was also converting
|
||||
things to use <span class="code">NS_ConvertToString</span> where appropriate; unless I get
|
||||
talked out of it, I want to switch midstream to
|
||||
<span class="code">NS_ConvertASCIItoUCS2</span>, then go back and fix up the
|
||||
<span class="code">NS_ConvertASCIItoUTF16</span>, then go back and fix up the
|
||||
<span class="code">NS_ConvertToString</span> instances later. I've set things up so I can
|
||||
check in as I go. After all these conversions have been done, I'll be
|
||||
able to throw the switch (what switch? NEW_STRING_APIS) which will
|
||||
@ -815,8 +831,8 @@ reasoning.)
|
||||
<ul>
|
||||
<li>how really annoying this whole topic is
|
||||
<li>how bad <span class="code">L"xxx"</span> is
|
||||
<li>whether to move forward with <span class="code">NS_ConvertASCIItoUCS2</span>
|
||||
<li>whether we should move to xxxConvertingASCIItoUCS2 etc instead
|
||||
<li>whether to move forward with <span class="code">NS_ConvertASCIItoUTF16</span>
|
||||
<li>whether we should move to xxxConvertingASCIItoUTF16 etc instead
|
||||
of `WithConverting'
|
||||
<li>arguments about where encoding conversions should live
|
||||
<li>arguments about whether going between 1 and 2 byte storage is an
|
||||
@ -908,7 +924,7 @@ standard as we move forward.
|
||||
#define NS_LITERAL_STRING(s) nsLiteralString(L##s, \
|
||||
(sizeof(L##s)/sizeof(wchar_t))-1)
|
||||
#else
|
||||
#define NS_LITERAL_STRING(s) NS_ConvertASCIItoUCS2(s, \
|
||||
#define NS_LITERAL_STRING(s) NS_ConvertASCIItoUTF16(s, \
|
||||
sizeof(s)-1)
|
||||
#endif
|
||||
</pre>
|
||||
@ -1045,7 +1061,7 @@ example I gave above, that is, the one with <span class="code">AssignWithConvers
|
||||
|
||||
<p><span class="code">Assign</span> still exists. <span class="code">AssignWithConversion</span> takes on that
|
||||
functionality for assignments that require encoding transformations
|
||||
(e.g., from ASCII to UCS2). <span class="code">SetString</span> is gone, since it was always
|
||||
(e.g., from ASCII to UTF16). <span class="code">SetString</span> is gone, since it was always
|
||||
a synonym for <span class="code">Assign</span>.
|
||||
|
||||
<p>Learn more about the general APIs for strings that we are trying to
|
||||
@ -1263,7 +1279,7 @@ strings semantics
|
||||
<p>In a later message, Chris Waterson asks a related question
|
||||
<pre class="email-quote">
|
||||
>scc: should we add <span class="code">operator PRUnichar*()</span> to
|
||||
>NS_ConvertASCIItoUCS2?
|
||||
>NS_ConvertASCIItoUTF16?
|
||||
</pre>
|
||||
|
||||
<p>And I reply:
|
||||
@ -1999,7 +2015,7 @@ Subject: Re: how to free an nsString::ToNewCString
|
||||
|
||||
<hr>
|
||||
|
||||
<p>You use several <span class="code">NS_ConvertASCIItoUCS2("...").get()</span>, these should be
|
||||
<p>You use several <span class="code">NS_ConvertASCIItoUTF16("...").get()</span>, these should be
|
||||
|
||||
NS_LITERAL_STRING("...").get()
|
||||
|
||||
@ -2037,7 +2053,7 @@ DoSomething( nsAWritableString& answer )
|
||||
if ( localFile )
|
||||
{
|
||||
|
||||
localFile->SetPersistentDescriptor(NS_ConvertUCS2toUTF8(path));
|
||||
localFile->SetPersistentDescriptor(NS_ConvertUTF16toUTF8(path));
|
||||
|
||||
nsXPIDLString converted_path;
|
||||
localFile->GetUnicodePath(getter_Copies(converted_path));
|
||||
|
@ -1087,7 +1087,7 @@ PRBool nsCString::EqualsWithConversion(const char* aCString,PRBool aIgnoreCase,P
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
|
||||
NS_ConvertUCS2toUTF8::NS_ConvertUCS2toUTF8( const PRUnichar* aString )
|
||||
NS_ConvertUTF16toUTF8::NS_ConvertUTF16toUTF8( const PRUnichar* aString )
|
||||
{
|
||||
if (!aString)
|
||||
// Leave us as an uninitialized nsCAutoString.
|
||||
@ -1095,7 +1095,7 @@ NS_ConvertUCS2toUTF8::NS_ConvertUCS2toUTF8( const PRUnichar* aString )
|
||||
Init(aString, nsCharTraits<PRUnichar>::length(aString));
|
||||
}
|
||||
|
||||
NS_ConvertUCS2toUTF8::NS_ConvertUCS2toUTF8( const PRUnichar* aString, PRUint32 aLength )
|
||||
NS_ConvertUTF16toUTF8::NS_ConvertUTF16toUTF8( const PRUnichar* aString, PRUint32 aLength )
|
||||
{
|
||||
if (!aString)
|
||||
// Leave us as an uninitialized nsCAutoString.
|
||||
@ -1103,13 +1103,13 @@ NS_ConvertUCS2toUTF8::NS_ConvertUCS2toUTF8( const PRUnichar* aString, PRUint32 a
|
||||
Init(aString, aLength);
|
||||
}
|
||||
|
||||
NS_ConvertUCS2toUTF8::NS_ConvertUCS2toUTF8( const nsASingleFragmentString& aString )
|
||||
NS_ConvertUTF16toUTF8::NS_ConvertUTF16toUTF8( const nsASingleFragmentString& aString )
|
||||
{
|
||||
nsASingleFragmentString::const_char_iterator start;
|
||||
Init(aString.BeginReading(start), aString.Length());
|
||||
}
|
||||
|
||||
NS_ConvertUCS2toUTF8::NS_ConvertUCS2toUTF8( const nsAString& aString )
|
||||
NS_ConvertUTF16toUTF8::NS_ConvertUTF16toUTF8( const nsAString& aString )
|
||||
{
|
||||
// Compute space required: do this once so we don't incur multiple
|
||||
// allocations. This "optimization" is probably of dubious value...
|
||||
@ -1128,7 +1128,7 @@ NS_ConvertUCS2toUTF8::NS_ConvertUCS2toUTF8( const nsAString& aString )
|
||||
|
||||
// All ready? Time to convert
|
||||
|
||||
ConvertUCS2toUTF8 converter(mStr);
|
||||
ConvertUTF16toUTF8 converter(mStr);
|
||||
copy_string(aString.BeginReading(start), aString.EndReading(end),
|
||||
converter).write_terminator();
|
||||
mLength = converter.Size();
|
||||
@ -1140,7 +1140,7 @@ NS_ConvertUCS2toUTF8::NS_ConvertUCS2toUTF8( const nsAString& aString )
|
||||
}
|
||||
}
|
||||
|
||||
void NS_ConvertUCS2toUTF8::Init( const PRUnichar* aString, PRUint32 aLength )
|
||||
void NS_ConvertUTF16toUTF8::Init( const PRUnichar* aString, PRUint32 aLength )
|
||||
{
|
||||
// Compute space required: do this once so we don't incur multiple
|
||||
// allocations. This "optimization" is probably of dubious value...
|
||||
@ -1157,7 +1157,7 @@ void NS_ConvertUCS2toUTF8::Init( const PRUnichar* aString, PRUint32 aLength )
|
||||
|
||||
// All ready? Time to convert
|
||||
|
||||
ConvertUCS2toUTF8 converter(mStr);
|
||||
ConvertUTF16toUTF8 converter(mStr);
|
||||
converter.write(aString, aLength);
|
||||
mLength = converter.Size();
|
||||
mStr[mLength] = char_type(0);
|
||||
@ -1169,7 +1169,7 @@ void NS_ConvertUCS2toUTF8::Init( const PRUnichar* aString, PRUint32 aLength )
|
||||
}
|
||||
}
|
||||
|
||||
NS_LossyConvertUCS2toASCII::NS_LossyConvertUCS2toASCII( const nsAString& aString )
|
||||
NS_LossyConvertUTF16toASCII::NS_LossyConvertUTF16toASCII( const nsAString& aString )
|
||||
{
|
||||
SetCapacity(aString.Length());
|
||||
|
||||
|
@ -422,32 +422,33 @@ public:
|
||||
// NS_DEF_DERIVED_STRING_OPERATOR_PLUS(nsCAutoString, char)
|
||||
|
||||
/**
|
||||
* A helper class that converts a UCS2 string to UTF8
|
||||
* A helper class that converts a UTF-16 string to UTF-8
|
||||
*/
|
||||
class NS_COM NS_ConvertUCS2toUTF8
|
||||
class NS_COM NS_ConvertUTF16toUTF8
|
||||
: public nsCAutoString
|
||||
/*
|
||||
...
|
||||
*/
|
||||
{
|
||||
public:
|
||||
explicit NS_ConvertUCS2toUTF8( const PRUnichar* aString );
|
||||
NS_ConvertUCS2toUTF8( const PRUnichar* aString, PRUint32 aLength );
|
||||
explicit NS_ConvertUCS2toUTF8( const nsAString& aString );
|
||||
explicit NS_ConvertUCS2toUTF8( const nsASingleFragmentString& aString );
|
||||
explicit NS_ConvertUTF16toUTF8( const PRUnichar* aString );
|
||||
NS_ConvertUTF16toUTF8( const PRUnichar* aString, PRUint32 aLength );
|
||||
explicit NS_ConvertUTF16toUTF8( const nsAString& aString );
|
||||
explicit NS_ConvertUTF16toUTF8( const nsASingleFragmentString& aString );
|
||||
|
||||
protected:
|
||||
void Init( const PRUnichar* aString, PRUint32 aLength );
|
||||
|
||||
private:
|
||||
// NOT TO BE IMPLEMENTED
|
||||
NS_ConvertUCS2toUTF8( char );
|
||||
NS_ConvertUTF16toUTF8( char );
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* A helper class that converts a UCS2 string to ASCII in a lossy manner
|
||||
* A helper class that converts a UTF-16 string to ASCII in a lossy manner
|
||||
*/
|
||||
class NS_COM NS_LossyConvertUCS2toASCII
|
||||
class NS_COM NS_LossyConvertUTF16toASCII
|
||||
: public nsCAutoString
|
||||
/*
|
||||
...
|
||||
@ -455,21 +456,25 @@ class NS_COM NS_LossyConvertUCS2toASCII
|
||||
{
|
||||
public:
|
||||
explicit
|
||||
NS_LossyConvertUCS2toASCII( const PRUnichar* aString )
|
||||
NS_LossyConvertUTF16toASCII( const PRUnichar* aString )
|
||||
{
|
||||
AppendWithConversion( aString, ~PRUint32(0) /* MAXINT */);
|
||||
}
|
||||
|
||||
NS_LossyConvertUCS2toASCII( const PRUnichar* aString, PRUint32 aLength )
|
||||
NS_LossyConvertUTF16toASCII( const PRUnichar* aString, PRUint32 aLength )
|
||||
{
|
||||
AppendWithConversion( aString, aLength );
|
||||
}
|
||||
|
||||
explicit NS_LossyConvertUCS2toASCII( const nsAString& aString );
|
||||
explicit NS_LossyConvertUTF16toASCII( const nsAString& aString );
|
||||
|
||||
private:
|
||||
// NOT TO BE IMPLEMENTED
|
||||
NS_LossyConvertUCS2toASCII( char );
|
||||
NS_LossyConvertUTF16toASCII( char );
|
||||
};
|
||||
|
||||
// Backward compatibility
|
||||
typedef NS_ConvertUTF16toUTF8 NS_ConvertUCS2toUTF8;
|
||||
typedef NS_LossyConvertUTF16toASCII NS_LossyConvertUCS2toASCII;
|
||||
|
||||
#endif /* !defined(nsString_h__) */
|
||||
|
@ -1331,12 +1331,12 @@ nsAutoString::nsAutoString(const CBufDescriptor& aBuffer) : nsString() {
|
||||
}
|
||||
|
||||
void
|
||||
NS_ConvertASCIItoUCS2::Init( const char* aCString, PRUint32 aLength )
|
||||
NS_ConvertASCIItoUTF16::Init( const char* aCString, PRUint32 aLength )
|
||||
{
|
||||
AppendWithConversion(aCString,aLength);
|
||||
}
|
||||
|
||||
NS_ConvertASCIItoUCS2::NS_ConvertASCIItoUCS2( const nsACString& aCString )
|
||||
NS_ConvertASCIItoUTF16::NS_ConvertASCIItoUTF16( const nsACString& aCString )
|
||||
{
|
||||
SetCapacity(aCString.Length());
|
||||
|
||||
@ -1351,7 +1351,7 @@ NS_ConvertASCIItoUCS2::NS_ConvertASCIItoUCS2( const nsACString& aCString )
|
||||
}
|
||||
}
|
||||
|
||||
NS_ConvertUTF8toUCS2::NS_ConvertUTF8toUCS2( const nsACString& aCString )
|
||||
NS_ConvertUTF8toUTF16::NS_ConvertUTF8toUTF16( const nsACString& aCString )
|
||||
{
|
||||
// Compute space required: do this once so we don't incur multiple
|
||||
// allocations. This "optimization" is probably of dubious value...
|
||||
@ -1370,36 +1370,36 @@ NS_ConvertUTF8toUCS2::NS_ConvertUTF8toUCS2( const nsACString& aCString )
|
||||
|
||||
// All ready? Time to convert
|
||||
|
||||
ConvertUTF8toUCS2 converter(mUStr);
|
||||
ConvertUTF8toUTF16 converter(mUStr);
|
||||
copy_string(aCString.BeginReading(start), aCString.EndReading(end),
|
||||
converter).write_terminator();
|
||||
mLength = converter.Length();
|
||||
if (mLength != count)
|
||||
{
|
||||
NS_ERROR("Input wasn't UTF8 or incorrect length was calculated");
|
||||
NS_ERROR("Input wasn't UTF-8 or incorrect length was calculated");
|
||||
Truncate();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
NS_ConvertUTF8toUCS2::NS_ConvertUTF8toUCS2( const nsASingleFragmentCString& aCString )
|
||||
NS_ConvertUTF8toUTF16::NS_ConvertUTF8toUTF16( const nsASingleFragmentCString& aCString )
|
||||
{
|
||||
nsASingleFragmentCString::const_char_iterator start;
|
||||
Init(aCString.BeginReading(start), aCString.Length());
|
||||
}
|
||||
|
||||
NS_ConvertUTF8toUCS2::NS_ConvertUTF8toUCS2( const char* aCString )
|
||||
NS_ConvertUTF8toUTF16::NS_ConvertUTF8toUTF16( const char* aCString )
|
||||
{
|
||||
Init(aCString, nsCharTraits<char>::length(aCString));
|
||||
}
|
||||
|
||||
NS_ConvertUTF8toUCS2::NS_ConvertUTF8toUCS2( const char* aCString, PRUint32 aLength )
|
||||
NS_ConvertUTF8toUTF16::NS_ConvertUTF8toUTF16( const char* aCString, PRUint32 aLength )
|
||||
{
|
||||
Init(aCString, aLength);
|
||||
}
|
||||
|
||||
void
|
||||
NS_ConvertUTF8toUCS2::Init( const char* aCString, PRUint32 aLength )
|
||||
NS_ConvertUTF8toUTF16::Init( const char* aCString, PRUint32 aLength )
|
||||
{
|
||||
// Compute space required: do this once so we don't incur multiple
|
||||
// allocations. This "optimization" is probably of dubious value...
|
||||
@ -1416,7 +1416,7 @@ NS_ConvertUTF8toUCS2::Init( const char* aCString, PRUint32 aLength )
|
||||
|
||||
// All ready? Time to convert
|
||||
|
||||
ConvertUTF8toUCS2 converter(mUStr);
|
||||
ConvertUTF8toUTF16 converter(mUStr);
|
||||
converter.write(aCString, aLength);
|
||||
mLength = converter.Length();
|
||||
mUStr[mLength] = char_type(0);
|
||||
|
@ -484,7 +484,7 @@ public:
|
||||
|
||||
// NS_DEF_DERIVED_STRING_OPERATOR_PLUS(nsAutoString, PRUnichar)
|
||||
|
||||
class NS_COM NS_ConvertASCIItoUCS2
|
||||
class NS_COM NS_ConvertASCIItoUTF16
|
||||
: public nsAutoString
|
||||
/*
|
||||
...
|
||||
@ -492,21 +492,21 @@ class NS_COM NS_ConvertASCIItoUCS2
|
||||
{
|
||||
public:
|
||||
explicit
|
||||
NS_ConvertASCIItoUCS2( const nsACString& aCString );
|
||||
NS_ConvertASCIItoUTF16( const nsACString& aCString );
|
||||
|
||||
explicit
|
||||
NS_ConvertASCIItoUCS2( const nsAFlatCString& aCString )
|
||||
NS_ConvertASCIItoUTF16( const nsAFlatCString& aCString )
|
||||
{
|
||||
Init( aCString.get(), aCString.Length() );
|
||||
}
|
||||
|
||||
explicit
|
||||
NS_ConvertASCIItoUCS2( const char* aCString )
|
||||
NS_ConvertASCIItoUTF16( const char* aCString )
|
||||
{
|
||||
Init( aCString, ~PRUint32(0) /* MAXINT */ );
|
||||
}
|
||||
|
||||
NS_ConvertASCIItoUCS2( const char* aCString, PRUint32 aLength )
|
||||
NS_ConvertASCIItoUTF16( const char* aCString, PRUint32 aLength )
|
||||
{
|
||||
Init( aCString, aLength );
|
||||
}
|
||||
@ -523,23 +523,28 @@ class NS_COM NS_ConvertASCIItoUCS2
|
||||
|
||||
private:
|
||||
// NOT TO BE IMPLEMENTED
|
||||
NS_ConvertASCIItoUCS2( PRUnichar );
|
||||
NS_ConvertASCIItoUTF16( PRUnichar );
|
||||
};
|
||||
|
||||
class NS_COM NS_ConvertUTF8toUCS2
|
||||
|
||||
class NS_COM NS_ConvertUTF8toUTF16
|
||||
: public nsAutoString
|
||||
{
|
||||
public:
|
||||
explicit NS_ConvertUTF8toUCS2( const nsACString& aCString );
|
||||
explicit NS_ConvertUTF8toUCS2( const nsASingleFragmentCString& aCString );
|
||||
explicit NS_ConvertUTF8toUCS2( const char* aCString );
|
||||
NS_ConvertUTF8toUCS2( const char* aCString, PRUint32 aLength );
|
||||
explicit NS_ConvertUTF8toUTF16( const nsACString& aCString );
|
||||
explicit NS_ConvertUTF8toUTF16( const nsASingleFragmentCString& aCString );
|
||||
explicit NS_ConvertUTF8toUTF16( const char* aCString );
|
||||
NS_ConvertUTF8toUTF16( const char* aCString, PRUint32 aLength );
|
||||
|
||||
protected:
|
||||
void Init( const char* aCString, PRUint32 aLength );
|
||||
|
||||
private:
|
||||
NS_ConvertUTF8toUCS2( PRUnichar );
|
||||
NS_ConvertUTF8toUTF16( PRUnichar );
|
||||
};
|
||||
|
||||
// Backward compatibility
|
||||
typedef NS_ConvertASCIItoUTF16 NS_ConvertASCIItoUCS2;
|
||||
typedef NS_ConvertUTF8toUTF16 NS_ConvertUTF8toUCS2;
|
||||
|
||||
#endif /* !defined(nsString2_h__) */
|
||||
|
@ -69,8 +69,8 @@ literal_string( const nsACString::char_type* aPtr, PRUint32 aLength )
|
||||
#define NS_NAMED_MULTILINE_LITERAL_STRING(n,s) nsDependentString n(NS_REINTERPRET_CAST(const nsAString::char_type*, s), PRUint32((sizeof(s)/sizeof(wchar_t))-1))
|
||||
#else
|
||||
#define NS_LL(s) s
|
||||
#define NS_MULTILINE_LITERAL_STRING(s) NS_ConvertASCIItoUCS2(s, PRUint32(sizeof(s)-1))
|
||||
#define NS_NAMED_MULTILINE_LITERAL_STRING(n,s) NS_ConvertASCIItoUCS2 n(s, PRUint32(sizeof(s)-1))
|
||||
#define NS_MULTILINE_LITERAL_STRING(s) NS_ConvertASCIItoUTF16(s, PRUint32(sizeof(s)-1))
|
||||
#define NS_NAMED_MULTILINE_LITERAL_STRING(n,s) NS_ConvertASCIItoUTF16 n(s, PRUint32(sizeof(s)-1))
|
||||
#endif
|
||||
|
||||
/*
|
||||
|
@ -61,7 +61,7 @@
|
||||
* wide version of this class, since wide |printf| is not generally available. That means
|
||||
* to get a wide version of your formatted data, you must, e.g.,
|
||||
*
|
||||
* CopyASCIItoUCS2(nsPrintfCString("%f", 13.917"), myStr);
|
||||
* CopyASCIItoUTF16(nsPrintfCString("%f", 13.917"), myStr);
|
||||
*
|
||||
* That's another good reason to avoid this class for anything but numbers ... as strings can be
|
||||
* much more efficiently handled with |NS_LITERAL_[C]STRING| and |nsLiteral[C]String|.
|
||||
|
@ -45,8 +45,8 @@ class nsCString;
|
||||
NS_COM size_t Distance( const nsReadingIterator<PRUnichar>&, const nsReadingIterator<PRUnichar>& );
|
||||
NS_COM size_t Distance( const nsReadingIterator<char>&, const nsReadingIterator<char>& );
|
||||
|
||||
NS_COM void CopyUCS2toASCII( const nsAString& aSource, nsACString& aDest );
|
||||
NS_COM void CopyASCIItoUCS2( const nsACString& aSource, nsAString& aDest );
|
||||
NS_COM void CopyUTF16toASCII( const nsAString& aSource, nsACString& aDest );
|
||||
NS_COM void CopyASCIItoUTF16( const nsACString& aSource, nsAString& aDest );
|
||||
|
||||
NS_COM void CopyUTF16toUTF8( const nsAString& aSource, nsACString& aDest );
|
||||
NS_COM void CopyUTF8toUTF16( const nsACString& aSource, nsAString& aDest );
|
||||
@ -60,6 +60,14 @@ NS_COM void AppendUTF8toUTF16( const nsACString& aSource, nsAString& aDest );
|
||||
NS_COM void AppendUTF16toUTF8( const PRUnichar* aSource, nsACString& aDest );
|
||||
NS_COM void AppendUTF8toUTF16( const char* aSource, nsAString& aDest );
|
||||
|
||||
// Backward compatibility
|
||||
inline
|
||||
NS_COM void CopyUCS2toASCII( const nsAString& aSource, nsACString& aDest )
|
||||
{ CopyUTF16toASCII(aSource, aDest); }
|
||||
inline
|
||||
NS_COM void CopyASCIItoUCS2( const nsACString& aSource, nsAString& aDest )
|
||||
{ CopyASCIItoUTF16(aSource, aDest); }
|
||||
|
||||
/**
|
||||
* Returns a new |char| buffer containing a zero-terminated copy of |aSource|.
|
||||
*
|
||||
@ -88,11 +96,14 @@ NS_COM char* ToNewCString( const nsACString& aSource );
|
||||
/**
|
||||
* Returns a new |char| buffer containing a zero-terminated copy of |aSource|.
|
||||
*
|
||||
* Allocates and returns a new |char| buffer which you must free with |nsMemory::Free|.
|
||||
* Performs a encoding conversion by converting 16-bit wide characters down to UTF8 encoded 8-bits wide string copying |aSource| to your new buffer.
|
||||
* The new buffer is zero-terminated, but that may not help you if |aSource| contains embedded nulls.
|
||||
* Allocates and returns a new |char| buffer which you must free with
|
||||
* |nsMemory::Free|.
|
||||
* Performs an encoding conversion from a UTF-16 string to a UTF-8 string
|
||||
* copying |aSource| to your new buffer.
|
||||
* The new buffer is zero-terminated, but that may not help you if |aSource|
|
||||
* contains embedded nulls.
|
||||
*
|
||||
* @param aSource a 16-bit wide string
|
||||
* @param aSource a UTF-16 string (made of PRUnichar's)
|
||||
* @return a new |char| buffer you must free with |nsMemory::Free|.
|
||||
*/
|
||||
|
||||
@ -100,12 +111,15 @@ NS_COM char* ToNewUTF8String( const nsAString& aSource );
|
||||
|
||||
|
||||
/**
|
||||
* Returns a new |PRUnichar| buffer containing a zero-terminated copy of |aSource|.
|
||||
* Returns a new |PRUnichar| buffer containing a zero-terminated copy of
|
||||
* |aSource|.
|
||||
*
|
||||
* Allocates and returns a new |PRUnichar| buffer which you must free with |nsMemory::Free|.
|
||||
* The new buffer is zero-terminated, but that may not help you if |aSource| contains embedded nulls.
|
||||
* Allocates and returns a new |PRUnichar| buffer which you must free with
|
||||
* |nsMemory::Free|.
|
||||
* The new buffer is zero-terminated, but that may not help you if |aSource|
|
||||
* contains embedded nulls.
|
||||
*
|
||||
* @param aSource a 16-bit wide string
|
||||
* @param aSource a UTF-16 string
|
||||
* @return a new |PRUnichar| buffer you must free with |nsMemory::Free|.
|
||||
*/
|
||||
NS_COM PRUnichar* ToNewUnicode( const nsAString& aSource );
|
||||
@ -129,28 +143,27 @@ NS_COM PRUnichar* ToNewUnicode( const nsACString& aSource );
|
||||
* of |aSource|.
|
||||
*
|
||||
* Allocates and returns a new |char| buffer which you must free with
|
||||
* |nsMemory::Free|. Performs an encoding conversion by 0-padding
|
||||
* 8-bit wide characters up to 16-bits wide while copying |aSource| to
|
||||
* your new buffer. This conversion is not well defined; but it
|
||||
* reproduces legacy string behavior. The new buffer is
|
||||
* zero-terminated, but that may not help you if |aSource| contains
|
||||
* embedded nulls.
|
||||
* |nsMemory::Free|. Performs an encoding conversion from UTF-8 to UTF-16
|
||||
* while copying |aSource| to your new buffer. This conversion is well defined
|
||||
* for a valid UTF-8 string. The new buffer is zero-terminated, but that
|
||||
* may not help you if |aSource| contains embedded nulls.
|
||||
*
|
||||
* @param aSource an 8-bit wide string, UTF-8 encoded
|
||||
* @return a new |PRUnichar| buffer you must free with |nsMemory::Free|.
|
||||
* (UTF-16 encoded)
|
||||
*/
|
||||
NS_COM PRUnichar* UTF8ToNewUnicode( const nsACString& aSource );
|
||||
|
||||
/**
|
||||
* Copies |aLength| 16-bit characters from the start of |aSource| to the
|
||||
* Copies |aLength| 16-bit code units from the start of |aSource| to the
|
||||
* |PRUnichar| buffer |aDest|.
|
||||
*
|
||||
* After this operation |aDest| is not null terminated.
|
||||
*
|
||||
* @param aSource a 16-bit wide string
|
||||
* @param aSource a UTF-16 string
|
||||
* @param aSrcOffset start offset in the source string
|
||||
* @param aDest a |PRUnichar| buffer
|
||||
* @param aLength the number of 16-bit characters to copy
|
||||
* @param aLength the number of 16-bit code units to copy
|
||||
* @return pointer to destination buffer - identical to |aDest|
|
||||
*/
|
||||
NS_COM PRUnichar* CopyUnicodeTo( const nsAString& aSource,
|
||||
@ -208,7 +221,7 @@ NS_COM PRBool IsASCII( const nsACString& aString );
|
||||
* XXX This is not bullet-proof and nor an all-purpose UTF-8 validator.
|
||||
* It is mainly written to replace and roughly equivalent to
|
||||
*
|
||||
* str.Equals(NS_ConvertUCS2toUTF8(NS_ConvertUTF8toUCS2(str)))
|
||||
* str.Equals(NS_ConvertUTF16toUTF8(NS_ConvertUTF8toUTF16(str)))
|
||||
*
|
||||
* (see bug 191541)
|
||||
* As such, it does not check for non-UTF-8 7bit encodings such as
|
||||
|
@ -62,15 +62,15 @@ class UTF8traits
|
||||
|
||||
/**
|
||||
* A character sink (see |copy_string| in nsAlgorithm.h) for converting
|
||||
* UTF-8 to UCS2 (really UTF-16).
|
||||
* UTF-8 to UTF-16
|
||||
*/
|
||||
class ConvertUTF8toUCS2
|
||||
class ConvertUTF8toUTF16
|
||||
{
|
||||
public:
|
||||
typedef nsACString::char_type value_type;
|
||||
typedef nsAString::char_type buffer_type;
|
||||
|
||||
ConvertUTF8toUCS2( buffer_type* aBuffer )
|
||||
ConvertUTF8toUTF16( buffer_type* aBuffer )
|
||||
: mStart(aBuffer), mBuffer(aBuffer), mErrorEncountered(PR_FALSE) {}
|
||||
|
||||
size_t Length() const { return mBuffer - mStart; }
|
||||
@ -267,19 +267,19 @@ class CalculateUTF8Length
|
||||
|
||||
/**
|
||||
* A character sink (see |copy_string| in nsAlgorithm.h) for converting
|
||||
* UCS2 (really UTF-16) to UTF-8.
|
||||
* UTF-16 to UTF-8.
|
||||
*/
|
||||
class ConvertUCS2toUTF8
|
||||
class ConvertUTF16toUTF8
|
||||
{
|
||||
public:
|
||||
typedef nsAString::char_type value_type;
|
||||
typedef nsACString::char_type buffer_type;
|
||||
|
||||
// The error handling here is more lenient than that in
|
||||
// |ConvertUTF8toUCS2|, but it's that way for backwards
|
||||
// |ConvertUTF8toUTF16|, but it's that way for backwards
|
||||
// compatibility.
|
||||
|
||||
ConvertUCS2toUTF8( buffer_type* aBuffer )
|
||||
ConvertUTF16toUTF8( buffer_type* aBuffer )
|
||||
: mStart(aBuffer), mBuffer(aBuffer) {}
|
||||
|
||||
size_t Size() const { return mBuffer - mStart; }
|
||||
@ -363,7 +363,7 @@ class ConvertUCS2toUTF8
|
||||
|
||||
/**
|
||||
* A character sink (see |copy_string| in nsAlgorithm.h) for computing
|
||||
* the number of bytes a UCS2 (really UTF-16) would occupy in UTF-8.
|
||||
* the number of bytes a UTF-16 would occupy in UTF-8.
|
||||
*/
|
||||
class CalculateUTF8Size
|
||||
{
|
||||
|
@ -148,7 +148,7 @@ class LossyConvertEncoding
|
||||
|
||||
NS_COM
|
||||
void
|
||||
CopyUCS2toASCII( const nsAString& aSource, nsACString& aDest )
|
||||
CopyUTF16toASCII( const nsAString& aSource, nsACString& aDest )
|
||||
{
|
||||
// right now, this won't work on multi-fragment destinations
|
||||
aDest.SetLength(aSource.Length());
|
||||
@ -163,7 +163,7 @@ CopyUCS2toASCII( const nsAString& aSource, nsACString& aDest )
|
||||
|
||||
NS_COM
|
||||
void
|
||||
CopyASCIItoUCS2( const nsACString& aSource, nsAString& aDest )
|
||||
CopyASCIItoUTF16( const nsACString& aSource, nsAString& aDest )
|
||||
{
|
||||
// right now, this won't work on multi-fragment destinations
|
||||
aDest.SetLength(aSource.Length());
|
||||
@ -239,7 +239,7 @@ AppendUTF16toUTF8( const nsAString& aSource, nsACString& aDest )
|
||||
|
||||
// All ready? Time to convert
|
||||
|
||||
ConvertUCS2toUTF8 converter(dest.get());
|
||||
ConvertUTF16toUTF8 converter(dest.get());
|
||||
copy_string(aSource.BeginReading(source_start),
|
||||
aSource.EndReading(source_end), converter);
|
||||
|
||||
@ -258,7 +258,7 @@ AppendUTF16toUTF8( const nsAString& aSource, nsACString& aDest )
|
||||
// rare situation.
|
||||
|
||||
aDest.Replace(old_dest_length, count,
|
||||
NS_ConvertUCS2toUTF8(aSource));
|
||||
NS_ConvertUTF16toUTF8(aSource));
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -294,7 +294,7 @@ AppendUTF8toUTF16( const nsACString& aSource, nsAString& aDest )
|
||||
|
||||
// All ready? Time to convert
|
||||
|
||||
ConvertUTF8toUCS2 converter(dest.get());
|
||||
ConvertUTF8toUTF16 converter(dest.get());
|
||||
copy_string(aSource.BeginReading(source_start),
|
||||
aSource.EndReading(source_end), converter);
|
||||
|
||||
@ -312,7 +312,7 @@ AppendUTF8toUTF16( const nsACString& aSource, nsAString& aDest )
|
||||
// this rare situation.
|
||||
|
||||
aDest.Replace(old_dest_length, count,
|
||||
NS_ConvertUTF8toUCS2(aSource));
|
||||
NS_ConvertUTF8toUTF16(aSource));
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -376,7 +376,7 @@ ToNewUTF8String( const nsAString& aSource )
|
||||
char *result = NS_STATIC_CAST(char*,
|
||||
nsMemory::Alloc(calculator.Size() + 1));
|
||||
|
||||
ConvertUCS2toUTF8 converter(result);
|
||||
ConvertUTF16toUTF8 converter(result);
|
||||
copy_string(aSource.BeginReading(start), aSource.EndReading(end),
|
||||
converter).write_terminator();
|
||||
NS_ASSERTION(calculator.Size() == converter.Size(), "length mismatch");
|
||||
@ -436,7 +436,7 @@ UTF8ToNewUnicode( const nsACString& aSource )
|
||||
PRUnichar *result = NS_STATIC_CAST(PRUnichar*,
|
||||
nsMemory::Alloc(sizeof(PRUnichar) * (calculator.Length() + 1)));
|
||||
|
||||
ConvertUTF8toUCS2 converter(result);
|
||||
ConvertUTF8toUTF16 converter(result);
|
||||
copy_string(aSource.BeginReading(start), aSource.EndReading(end),
|
||||
converter).write_terminator();
|
||||
NS_ASSERTION(calculator.Length() == converter.Length(), "length mismatch");
|
||||
@ -616,7 +616,7 @@ IsUTF8( const nsACString& aString )
|
||||
}
|
||||
}
|
||||
else
|
||||
return PR_FALSE; // Not UTF8 string
|
||||
return PR_FALSE; // Not UTF-8 string
|
||||
}
|
||||
|
||||
while (ptr < fragmentEnd && state)
|
||||
@ -632,7 +632,7 @@ IsUTF8( const nsACString& aString )
|
||||
|
||||
if ( !UTF8traits::isInSeq(c) || overlong && c <= olupper ||
|
||||
surrogate && slower <= c || nonchar && !state )
|
||||
return PR_FALSE; // Not UTF8 string
|
||||
return PR_FALSE; // Not UTF-8 string
|
||||
overlong = surrogate = PR_FALSE;
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user