gecko-dev/xpcom/string/nsString.h

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

172 lines
5.0 KiB
C
Raw Normal View History

/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* vim: set ts=8 sts=2 et sw=2 tw=80: */
2012-05-21 11:12:37 +00:00
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#ifndef nsString_h___
#define nsString_h___
#include <ostream>
#include "mozilla/Attributes.h"
#include "nsStringFwd.h"
#include "nsAString.h"
#include "nsDependentSubstring.h"
#include "nsReadableUtils.h"
#include "nsTString.h"
static_assert(sizeof(char16_t) == 2, "size of char16_t must be 2");
Bug 895322 - Part 1: Replace the usages of MOZ_STATIC_ASSERT with C++11 static_assert; r=Waldo This patch was mostly generated by running the following scripts on the codebase, with some manual changes made afterwards: # static_assert.sh #!/bin/bash # Command to convert an NSPR integer type to the equivalent standard integer type function convert() { echo "Converting $1 to $2..." find . ! -wholename "*nsprpub*" \ ! -wholename "*security/nss*" \ ! -wholename "*/.hg*" \ ! -wholename "obj-ff-dbg*" \ ! -name nsXPCOMCID.h \ ! -name prtypes.h \ -type f \ \( -iname "*.cpp" \ -o -iname "*.h" \ -o -iname "*.cc" \ -o -iname "*.mm" \) | \ xargs -n 1 `dirname $0`/assert_replacer.py #sed -i -e "s/\b$1\b/$2/g" } convert MOZ_STATIC_ASSERT static_assert hg rev --no-backup mfbt/Assertions.h \ media/webrtc/signaling/src/sipcc/core/includes/ccapi.h \ modules/libmar/src/mar_private.h \ modules/libmar/src/mar.h # assert_replacer.py #!/usr/bin/python import sys import re pattern = re.compile(r"\bMOZ_STATIC_ASSERT\b") def replaceInPlace(fname): print fname f = open(fname, "rw+") lines = f.readlines() for i in range(0, len(lines)): while True: index = re.search(pattern, lines[i]) if index != None: index = index.start() lines[i] = lines[i][0:index] + "static_assert" + lines[i][index+len("MOZ_STATIC_ASSERT"):] for j in range(i + 1, len(lines)): if lines[j].find(" ", index) == index: lines[j] = lines[j][0:index] + lines[j][index+4:] else: break else: break f.seek(0, 0) f.truncate() f.write("".join(lines)) f.close() argc = len(sys.argv) for i in range(1, argc): replaceInPlace(sys.argv[i]) --HG-- extra : rebase_source : 4b4a4047d82f2c205b9fad8d56dfc3f1afc0b045
2013-07-18 17:59:53 +00:00
static_assert(sizeof(nsString::char_type) == 2,
"size of nsString::char_type must be 2");
static_assert(nsString::char_type(-1) > nsString::char_type(0),
"nsString::char_type must be unsigned");
Bug 895322 - Part 1: Replace the usages of MOZ_STATIC_ASSERT with C++11 static_assert; r=Waldo This patch was mostly generated by running the following scripts on the codebase, with some manual changes made afterwards: # static_assert.sh #!/bin/bash # Command to convert an NSPR integer type to the equivalent standard integer type function convert() { echo "Converting $1 to $2..." find . ! -wholename "*nsprpub*" \ ! -wholename "*security/nss*" \ ! -wholename "*/.hg*" \ ! -wholename "obj-ff-dbg*" \ ! -name nsXPCOMCID.h \ ! -name prtypes.h \ -type f \ \( -iname "*.cpp" \ -o -iname "*.h" \ -o -iname "*.cc" \ -o -iname "*.mm" \) | \ xargs -n 1 `dirname $0`/assert_replacer.py #sed -i -e "s/\b$1\b/$2/g" } convert MOZ_STATIC_ASSERT static_assert hg rev --no-backup mfbt/Assertions.h \ media/webrtc/signaling/src/sipcc/core/includes/ccapi.h \ modules/libmar/src/mar_private.h \ modules/libmar/src/mar.h # assert_replacer.py #!/usr/bin/python import sys import re pattern = re.compile(r"\bMOZ_STATIC_ASSERT\b") def replaceInPlace(fname): print fname f = open(fname, "rw+") lines = f.readlines() for i in range(0, len(lines)): while True: index = re.search(pattern, lines[i]) if index != None: index = index.start() lines[i] = lines[i][0:index] + "static_assert" + lines[i][index+len("MOZ_STATIC_ASSERT"):] for j in range(i + 1, len(lines)): if lines[j].find(" ", index) == index: lines[j] = lines[j][0:index] + lines[j][index+4:] else: break else: break f.seek(0, 0) f.truncate() f.write("".join(lines)) f.close() argc = len(sys.argv) for i in range(1, argc): replaceInPlace(sys.argv[i]) --HG-- extra : rebase_source : 4b4a4047d82f2c205b9fad8d56dfc3f1afc0b045
2013-07-18 17:59:53 +00:00
static_assert(sizeof(nsCString::char_type) == 1,
"size of nsCString::char_type must be 1");
static_assert(sizeof(nsTLiteralString<char>) == sizeof(nsTString<char>),
"nsLiteralCString can masquerade as nsCString, "
"so they must have identical layout");
static_assert(sizeof(nsTLiteralString<char16_t>) == sizeof(nsTString<char16_t>),
"nsTLiteralString can masquerade as nsString, "
"so they must have identical layout");
/**
* A helper class that converts a UTF-16 string to ASCII in a lossy manner
*/
class NS_LossyConvertUTF16toASCII : public nsAutoCString {
public:
explicit NS_LossyConvertUTF16toASCII(const char16ptr_t aString) {
Bug 1402247 - Use encoding_rs for XPCOM string encoding conversions. r=Nika,erahm,froydnj. Correctness improvements: * UTF errors are handled safely per spec instead of dangerously truncating strings. * There are fewer converter implementations. Performance improvements: * The old code did exact buffer length math, which meant doing UTF math twice on each input string (once for length calculation and another time for conversion). Exact length math is more complicated when handling errors properly, which the old code didn't do. The new code does UTF math on the string content only once (when converting) but risks allocating more than once. There are heuristics in place to lower the probability of reallocation in cases where the double math avoidance isn't enough of a saving to absorb an allocation and memcpy. * Previously, in UTF-16 <-> UTF-8 conversions, an ASCII prefix was optimized but a single non-ASCII code point pessimized the rest of the string. The new code tries to get back on the fast ASCII path. * UTF-16 to Latin1 conversion guarantees less about handling of out-of-range input to eliminate an operation from the inner loop on x86/x86_64. * When assigning to a pre-existing string, the new code tries to reuse the old buffer instead of first releasing the old buffer and then allocating a new one. * When reallocating from the new code, the memcpy covers only the data that is part of the logical length of the old string instead of memcpying the whole capacity. (For old callers old excess memcpy behavior is preserved due to bogus callers. See bug 1472113.) * UTF-8 strings in XPConnect that are in the Latin1 range are passed to SpiderMonkey as Latin1. New features: * Conversion between UTF-8 and Latin1 is added in order to enable faster future interop between Rust code (or otherwise UTF-8-using code) and text node and SpiderMonkey code that uses Latin1. MozReview-Commit-ID: JaJuExfILM9
2018-07-06 07:44:43 +00:00
LossyAppendUTF16toASCII(mozilla::MakeStringSpan(aString), *this);
}
NS_LossyConvertUTF16toASCII(const char16ptr_t aString, size_t aLength) {
LossyAppendUTF16toASCII(
Substring(static_cast<const char16_t*>(aString), aLength), *this);
}
explicit NS_LossyConvertUTF16toASCII(const nsAString& aString) {
LossyAppendUTF16toASCII(aString, *this);
}
private:
// NOT TO BE IMPLEMENTED
NS_LossyConvertUTF16toASCII(char) = delete;
};
class NS_ConvertASCIItoUTF16 : public nsAutoString {
public:
explicit NS_ConvertASCIItoUTF16(const char* aCString) {
Bug 1402247 - Use encoding_rs for XPCOM string encoding conversions. r=Nika,erahm,froydnj. Correctness improvements: * UTF errors are handled safely per spec instead of dangerously truncating strings. * There are fewer converter implementations. Performance improvements: * The old code did exact buffer length math, which meant doing UTF math twice on each input string (once for length calculation and another time for conversion). Exact length math is more complicated when handling errors properly, which the old code didn't do. The new code does UTF math on the string content only once (when converting) but risks allocating more than once. There are heuristics in place to lower the probability of reallocation in cases where the double math avoidance isn't enough of a saving to absorb an allocation and memcpy. * Previously, in UTF-16 <-> UTF-8 conversions, an ASCII prefix was optimized but a single non-ASCII code point pessimized the rest of the string. The new code tries to get back on the fast ASCII path. * UTF-16 to Latin1 conversion guarantees less about handling of out-of-range input to eliminate an operation from the inner loop on x86/x86_64. * When assigning to a pre-existing string, the new code tries to reuse the old buffer instead of first releasing the old buffer and then allocating a new one. * When reallocating from the new code, the memcpy covers only the data that is part of the logical length of the old string instead of memcpying the whole capacity. (For old callers old excess memcpy behavior is preserved due to bogus callers. See bug 1472113.) * UTF-8 strings in XPConnect that are in the Latin1 range are passed to SpiderMonkey as Latin1. New features: * Conversion between UTF-8 and Latin1 is added in order to enable faster future interop between Rust code (or otherwise UTF-8-using code) and text node and SpiderMonkey code that uses Latin1. MozReview-Commit-ID: JaJuExfILM9
2018-07-06 07:44:43 +00:00
AppendASCIItoUTF16(mozilla::MakeStringSpan(aCString), *this);
}
NS_ConvertASCIItoUTF16(const char* aCString, size_t aLength) {
AppendASCIItoUTF16(Substring(aCString, aLength), *this);
}
explicit NS_ConvertASCIItoUTF16(const nsACString& aCString) {
AppendASCIItoUTF16(aCString, *this);
}
explicit NS_ConvertASCIItoUTF16(mozilla::Span<const char> aCString) {
AppendASCIItoUTF16(aCString, *this);
}
private:
// NOT TO BE IMPLEMENTED
NS_ConvertASCIItoUTF16(char16_t) = delete;
};
/**
* A helper class that converts a UTF-16 string to UTF-8
*/
class NS_ConvertUTF16toUTF8 : public nsAutoCString {
public:
explicit NS_ConvertUTF16toUTF8(const char16ptr_t aString) {
Bug 1402247 - Use encoding_rs for XPCOM string encoding conversions. r=Nika,erahm,froydnj. Correctness improvements: * UTF errors are handled safely per spec instead of dangerously truncating strings. * There are fewer converter implementations. Performance improvements: * The old code did exact buffer length math, which meant doing UTF math twice on each input string (once for length calculation and another time for conversion). Exact length math is more complicated when handling errors properly, which the old code didn't do. The new code does UTF math on the string content only once (when converting) but risks allocating more than once. There are heuristics in place to lower the probability of reallocation in cases where the double math avoidance isn't enough of a saving to absorb an allocation and memcpy. * Previously, in UTF-16 <-> UTF-8 conversions, an ASCII prefix was optimized but a single non-ASCII code point pessimized the rest of the string. The new code tries to get back on the fast ASCII path. * UTF-16 to Latin1 conversion guarantees less about handling of out-of-range input to eliminate an operation from the inner loop on x86/x86_64. * When assigning to a pre-existing string, the new code tries to reuse the old buffer instead of first releasing the old buffer and then allocating a new one. * When reallocating from the new code, the memcpy covers only the data that is part of the logical length of the old string instead of memcpying the whole capacity. (For old callers old excess memcpy behavior is preserved due to bogus callers. See bug 1472113.) * UTF-8 strings in XPConnect that are in the Latin1 range are passed to SpiderMonkey as Latin1. New features: * Conversion between UTF-8 and Latin1 is added in order to enable faster future interop between Rust code (or otherwise UTF-8-using code) and text node and SpiderMonkey code that uses Latin1. MozReview-Commit-ID: JaJuExfILM9
2018-07-06 07:44:43 +00:00
AppendUTF16toUTF8(mozilla::MakeStringSpan(aString), *this);
}
NS_ConvertUTF16toUTF8(const char16ptr_t aString, size_t aLength) {
AppendUTF16toUTF8(Substring(static_cast<const char16_t*>(aString), aLength),
*this);
}
explicit NS_ConvertUTF16toUTF8(const nsAString& aString) {
AppendUTF16toUTF8(aString, *this);
}
explicit NS_ConvertUTF16toUTF8(mozilla::Span<const char16_t> aString) {
AppendUTF16toUTF8(aString, *this);
}
private:
// NOT TO BE IMPLEMENTED
NS_ConvertUTF16toUTF8(char) = delete;
};
class NS_ConvertUTF8toUTF16 : public nsAutoString {
public:
explicit NS_ConvertUTF8toUTF16(const char* aCString) {
Bug 1402247 - Use encoding_rs for XPCOM string encoding conversions. r=Nika,erahm,froydnj. Correctness improvements: * UTF errors are handled safely per spec instead of dangerously truncating strings. * There are fewer converter implementations. Performance improvements: * The old code did exact buffer length math, which meant doing UTF math twice on each input string (once for length calculation and another time for conversion). Exact length math is more complicated when handling errors properly, which the old code didn't do. The new code does UTF math on the string content only once (when converting) but risks allocating more than once. There are heuristics in place to lower the probability of reallocation in cases where the double math avoidance isn't enough of a saving to absorb an allocation and memcpy. * Previously, in UTF-16 <-> UTF-8 conversions, an ASCII prefix was optimized but a single non-ASCII code point pessimized the rest of the string. The new code tries to get back on the fast ASCII path. * UTF-16 to Latin1 conversion guarantees less about handling of out-of-range input to eliminate an operation from the inner loop on x86/x86_64. * When assigning to a pre-existing string, the new code tries to reuse the old buffer instead of first releasing the old buffer and then allocating a new one. * When reallocating from the new code, the memcpy covers only the data that is part of the logical length of the old string instead of memcpying the whole capacity. (For old callers old excess memcpy behavior is preserved due to bogus callers. See bug 1472113.) * UTF-8 strings in XPConnect that are in the Latin1 range are passed to SpiderMonkey as Latin1. New features: * Conversion between UTF-8 and Latin1 is added in order to enable faster future interop between Rust code (or otherwise UTF-8-using code) and text node and SpiderMonkey code that uses Latin1. MozReview-Commit-ID: JaJuExfILM9
2018-07-06 07:44:43 +00:00
AppendUTF8toUTF16(mozilla::MakeStringSpan(aCString), *this);
}
NS_ConvertUTF8toUTF16(const char* aCString, size_t aLength) {
AppendUTF8toUTF16(Substring(aCString, aLength), *this);
}
explicit NS_ConvertUTF8toUTF16(const nsACString& aCString) {
AppendUTF8toUTF16(aCString, *this);
}
explicit NS_ConvertUTF8toUTF16(mozilla::Span<const char> aCString) {
AppendUTF8toUTF16(aCString, *this);
}
private:
// NOT TO BE IMPLEMENTED
NS_ConvertUTF8toUTF16(char16_t) = delete;
};
/**
* Converts an integer (signed/unsigned, 32/64bit) to its decimal string
* representation and returns it as an nsAutoCString/nsAutoString.
*/
template <typename T, typename U>
nsTAutoString<T> IntToTString(const U aInt, const int aRadix = 10) {
nsTAutoString<T> string;
string.AppendInt(aInt, aRadix);
return string;
}
template <typename U>
nsAutoCString IntToCString(const U aInt, const int aRadix = 10) {
return IntToTString<char>(aInt, aRadix);
}
template <typename U>
nsAutoString IntToString(const U aInt, const int aRadix = 10) {
return IntToTString<char16_t>(aInt, aRadix);
}
// MOZ_DBG support
inline std::ostream& operator<<(std::ostream& aOut, const nsACString& aString) {
aOut.write(aString.Data(), aString.Length());
return aOut;
}
inline std::ostream& operator<<(std::ostream& aOut, const nsAString& aString) {
return aOut << NS_ConvertUTF16toUTF8(aString);
}
// the following are included/declared for backwards compatibility
#include "nsDependentString.h"
#include "nsLiteralString.h"
#include "nsPromiseFlatString.h"
#endif // !defined(nsString_h___)