gecko-dev/xpcom/string/nsString.h
Henri Sivonen 3edc601325 Bug 1402247 - Use encoding_rs for XPCOM string encoding conversions. r=Nika,erahm,froydnj.
Correctness improvements:

 * UTF errors are handled safely per spec instead of dangerously truncating
   strings.

 * There are fewer converter implementations.

Performance improvements:

 * The old code did exact buffer length math, which meant doing UTF math twice
   on each input string (once for length calculation and another time for
   conversion). Exact length math is more complicated when handling errors
   properly, which the old code didn't do. The new code does UTF math on the
   string content only once (when converting) but risks allocating more than
   once. There are heuristics in place to lower the probability of
   reallocation in cases where the double math avoidance isn't enough of a
   saving to absorb an allocation and memcpy.

 * Previously, in UTF-16 <-> UTF-8 conversions, an ASCII prefix was optimized
   but a single non-ASCII code point pessimized the rest of the string. The
   new code tries to get back on the fast ASCII path.

 * UTF-16 to Latin1 conversion guarantees less about handling of out-of-range
   input to eliminate an operation from the inner loop on x86/x86_64.

 * When assigning to a pre-existing string, the new code tries to reuse the
   old buffer instead of first releasing the old buffer and then allocating a
   new one.

 * When reallocating from the new code, the memcpy covers only the data that
   is part of the logical length of the old string instead of memcpying the
   whole capacity. (For old callers old excess memcpy behavior is preserved
   due to bogus callers. See bug 1472113.)

 * UTF-8 strings in XPConnect that are in the Latin1 range are passed to
   SpiderMonkey as Latin1.

New features:

 * Conversion between UTF-8 and Latin1 is added in order to enable faster
   future interop between Rust code (or otherwise UTF-8-using code) and text
   node and SpiderMonkey code that uses Latin1.

MozReview-Commit-ID: JaJuExfILM9
2018-08-14 14:43:42 +03:00

149 lines
3.9 KiB
C++

/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* vim: set ts=8 sts=2 et sw=2 tw=80: */
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#ifndef nsString_h___
#define nsString_h___
#include "mozilla/Attributes.h"
#include "nsStringFwd.h"
#include "nsAString.h"
#include "nsDependentSubstring.h"
#include "nsReadableUtils.h"
// enable support for the obsolete string API if not explicitly disabled
#ifndef MOZ_STRING_WITH_OBSOLETE_API
#define MOZ_STRING_WITH_OBSOLETE_API 1
#endif
#include "nsTString.h"
static_assert(sizeof(char16_t) == 2, "size of char16_t must be 2");
static_assert(sizeof(nsString::char_type) == 2,
"size of nsString::char_type must be 2");
static_assert(nsString::char_type(-1) > nsString::char_type(0),
"nsString::char_type must be unsigned");
static_assert(sizeof(nsCString::char_type) == 1,
"size of nsCString::char_type must be 1");
static_assert(sizeof(nsTLiteralString<char>) == sizeof(nsTString<char>),
"nsLiteralCString can masquerade as nsCString, "
"so they must have identical layout");
static_assert(sizeof(nsTLiteralString<char16_t>) == sizeof(nsTString<char16_t>),
"nsTLiteralString can masquerade as nsString, "
"so they must have identical layout");
/**
* A helper class that converts a UTF-16 string to ASCII in a lossy manner
*/
class NS_LossyConvertUTF16toASCII : public nsAutoCString
{
public:
explicit NS_LossyConvertUTF16toASCII(const char16ptr_t aString)
{
LossyAppendUTF16toASCII(mozilla::MakeStringSpan(aString), *this);
}
NS_LossyConvertUTF16toASCII(const char16ptr_t aString, uint32_t aLength)
{
LossyAppendUTF16toASCII(Substring(static_cast<const char16_t*>(aString), aLength), *this);
}
explicit NS_LossyConvertUTF16toASCII(const nsAString& aString)
{
LossyAppendUTF16toASCII(aString, *this);
}
private:
// NOT TO BE IMPLEMENTED
NS_LossyConvertUTF16toASCII(char) = delete;
};
class NS_ConvertASCIItoUTF16 : public nsAutoString
{
public:
explicit NS_ConvertASCIItoUTF16(const char* aCString)
{
AppendASCIItoUTF16(mozilla::MakeStringSpan(aCString), *this);
}
NS_ConvertASCIItoUTF16(const char* aCString, uint32_t aLength)
{
AppendASCIItoUTF16(Substring(aCString, aLength), *this);
}
explicit NS_ConvertASCIItoUTF16(const nsACString& aCString)
{
AppendASCIItoUTF16(aCString, *this);
}
private:
// NOT TO BE IMPLEMENTED
NS_ConvertASCIItoUTF16(char16_t) = delete;
};
/**
* A helper class that converts a UTF-16 string to UTF-8
*/
class NS_ConvertUTF16toUTF8 : public nsAutoCString
{
public:
explicit NS_ConvertUTF16toUTF8(const char16ptr_t aString)
{
AppendUTF16toUTF8(mozilla::MakeStringSpan(aString), *this);
}
NS_ConvertUTF16toUTF8(const char16ptr_t aString, uint32_t aLength)
{
AppendUTF16toUTF8(Substring(static_cast<const char16_t*>(aString), aLength), *this);
}
explicit NS_ConvertUTF16toUTF8(const nsAString& aString)
{
AppendUTF16toUTF8(aString, *this);
}
private:
// NOT TO BE IMPLEMENTED
NS_ConvertUTF16toUTF8(char) = delete;
};
class NS_ConvertUTF8toUTF16 : public nsAutoString
{
public:
explicit NS_ConvertUTF8toUTF16(const char* aCString)
{
AppendUTF8toUTF16(mozilla::MakeStringSpan(aCString), *this);
}
NS_ConvertUTF8toUTF16(const char* aCString, uint32_t aLength)
{
AppendUTF8toUTF16(Substring(aCString, aLength), *this);
}
explicit NS_ConvertUTF8toUTF16(const nsACString& aCString)
{
AppendUTF8toUTF16(aCString, *this);
}
private:
// NOT TO BE IMPLEMENTED
NS_ConvertUTF8toUTF16(char16_t) = delete;
};
// the following are included/declared for backwards compatibility
#include "nsDependentString.h"
#include "nsLiteralString.h"
#include "nsPromiseFlatString.h"
#endif // !defined(nsString_h___)