gecko-dev/xpcom/string/nsReadableUtils.h

611 lines
23 KiB
C++

/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* vim: set ts=8 sts=2 et sw=2 tw=80: */
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
// IWYU pragma: private, include "nsString.h"
#ifndef nsReadableUtils_h___
#define nsReadableUtils_h___
/**
* I guess all the routines in this file are all mis-named.
* According to our conventions, they should be |NS_xxx|.
*/
#include "mozilla/Assertions.h"
#include "nsAString.h"
#include "mozilla/TextUtils.h"
#include "nsTArrayForwardDeclare.h"
// From the nsstring crate
extern "C" {
bool nsstring_fallible_append_utf8_impl(nsAString* aThis, const char* aOther,
size_t aOtherLen, size_t aOldLen);
bool nsstring_fallible_append_latin1_impl(nsAString* aThis, const char* aOther,
size_t aOtherLen, size_t aOldLen,
bool aAllowShrinking);
bool nscstring_fallible_append_utf16_to_utf8_impl(nsACString* aThis,
const char16_t*,
size_t aOtherLen,
size_t aOldLen);
bool nscstring_fallible_append_utf16_to_latin1_lossy_impl(nsACString* aThis,
const char16_t*,
size_t aOtherLen,
size_t aOldLen,
bool aAllowShrinking);
bool nscstring_fallible_append_utf8_to_latin1_lossy_check(
nsACString* aThis, const nsACString* aOther, size_t aOldLen);
bool nscstring_fallible_append_latin1_to_utf8_check(nsACString* aThis,
const nsACString* aOther,
size_t aOldLen);
}
inline size_t Distance(const nsReadingIterator<char16_t>& aStart,
const nsReadingIterator<char16_t>& aEnd) {
MOZ_ASSERT(aStart.get() <= aEnd.get());
return static_cast<size_t>(aEnd.get() - aStart.get());
}
inline size_t Distance(const nsReadingIterator<char>& aStart,
const nsReadingIterator<char>& aEnd) {
MOZ_ASSERT(aStart.get() <= aEnd.get());
return static_cast<size_t>(aEnd.get() - aStart.get());
}
// NOTE: Operations that don't need an operand to be an XPCOM string
// are in mozilla/TextUtils.h and mozilla/Utf8.h.
// UTF-8 to UTF-16
// Invalid UTF-8 byte sequences are replaced with the REPLACEMENT CHARACTER.
[[nodiscard]] inline bool CopyUTF8toUTF16(mozilla::Span<const char> aSource,
nsAString& aDest,
const mozilla::fallible_t&) {
return nsstring_fallible_append_utf8_impl(&aDest, aSource.Elements(),
aSource.Length(), 0);
}
inline void CopyUTF8toUTF16(mozilla::Span<const char> aSource,
nsAString& aDest) {
if (MOZ_UNLIKELY(!CopyUTF8toUTF16(aSource, aDest, mozilla::fallible))) {
aDest.AllocFailed(aSource.Length());
}
}
[[nodiscard]] inline bool AppendUTF8toUTF16(mozilla::Span<const char> aSource,
nsAString& aDest,
const mozilla::fallible_t&) {
return nsstring_fallible_append_utf8_impl(&aDest, aSource.Elements(),
aSource.Length(), aDest.Length());
}
inline void AppendUTF8toUTF16(mozilla::Span<const char> aSource,
nsAString& aDest) {
if (MOZ_UNLIKELY(!AppendUTF8toUTF16(aSource, aDest, mozilla::fallible))) {
aDest.AllocFailed(aDest.Length() + aSource.Length());
}
}
// Latin1 to UTF-16
// Interpret each incoming unsigned byte value as a Unicode scalar value (not
// windows-1252!). The function names say "ASCII" instead of "Latin1" for
// legacy reasons.
[[nodiscard]] inline bool CopyASCIItoUTF16(mozilla::Span<const char> aSource,
nsAString& aDest,
const mozilla::fallible_t&) {
return nsstring_fallible_append_latin1_impl(&aDest, aSource.Elements(),
aSource.Length(), 0, true);
}
inline void CopyASCIItoUTF16(mozilla::Span<const char> aSource,
nsAString& aDest) {
if (MOZ_UNLIKELY(!CopyASCIItoUTF16(aSource, aDest, mozilla::fallible))) {
aDest.AllocFailed(aSource.Length());
}
}
[[nodiscard]] inline bool AppendASCIItoUTF16(mozilla::Span<const char> aSource,
nsAString& aDest,
const mozilla::fallible_t&) {
return nsstring_fallible_append_latin1_impl(
&aDest, aSource.Elements(), aSource.Length(), aDest.Length(), false);
}
inline void AppendASCIItoUTF16(mozilla::Span<const char> aSource,
nsAString& aDest) {
if (MOZ_UNLIKELY(!AppendASCIItoUTF16(aSource, aDest, mozilla::fallible))) {
aDest.AllocFailed(aDest.Length() + aSource.Length());
}
}
// UTF-16 to UTF-8
// Unpaired surrogates are replaced with the REPLACEMENT CHARACTER.
[[nodiscard]] inline bool CopyUTF16toUTF8(mozilla::Span<const char16_t> aSource,
nsACString& aDest,
const mozilla::fallible_t&) {
return nscstring_fallible_append_utf16_to_utf8_impl(
&aDest, aSource.Elements(), aSource.Length(), 0);
}
inline void CopyUTF16toUTF8(mozilla::Span<const char16_t> aSource,
nsACString& aDest) {
if (MOZ_UNLIKELY(!CopyUTF16toUTF8(aSource, aDest, mozilla::fallible))) {
aDest.AllocFailed(aSource.Length());
}
}
[[nodiscard]] inline bool AppendUTF16toUTF8(
mozilla::Span<const char16_t> aSource, nsACString& aDest,
const mozilla::fallible_t&) {
return nscstring_fallible_append_utf16_to_utf8_impl(
&aDest, aSource.Elements(), aSource.Length(), aDest.Length());
}
inline void AppendUTF16toUTF8(mozilla::Span<const char16_t> aSource,
nsACString& aDest) {
if (MOZ_UNLIKELY(!AppendUTF16toUTF8(aSource, aDest, mozilla::fallible))) {
aDest.AllocFailed(aDest.Length() + aSource.Length());
}
}
// UTF-16 to Latin1
// If all code points in the input are below U+0100, represents each scalar
// value as an unsigned byte. (This is not windows-1252!) If there are code
// points above U+00FF, memory-safely produces garbage and will likely start
// asserting in future debug builds. The nature of the garbage may differ
// based on CPU architecture and must not be relied upon. The names say
// "ASCII" instead of "Latin1" for legacy reasons.
[[nodiscard]] inline bool LossyCopyUTF16toASCII(
mozilla::Span<const char16_t> aSource, nsACString& aDest,
const mozilla::fallible_t&) {
return nscstring_fallible_append_utf16_to_latin1_lossy_impl(
&aDest, aSource.Elements(), aSource.Length(), 0, true);
}
inline void LossyCopyUTF16toASCII(mozilla::Span<const char16_t> aSource,
nsACString& aDest) {
if (MOZ_UNLIKELY(!LossyCopyUTF16toASCII(aSource, aDest, mozilla::fallible))) {
aDest.AllocFailed(aSource.Length());
}
}
[[nodiscard]] inline bool LossyAppendUTF16toASCII(
mozilla::Span<const char16_t> aSource, nsACString& aDest,
const mozilla::fallible_t&) {
return nscstring_fallible_append_utf16_to_latin1_lossy_impl(
&aDest, aSource.Elements(), aSource.Length(), aDest.Length(), false);
}
inline void LossyAppendUTF16toASCII(mozilla::Span<const char16_t> aSource,
nsACString& aDest) {
if (MOZ_UNLIKELY(
!LossyAppendUTF16toASCII(aSource, aDest, mozilla::fallible))) {
aDest.AllocFailed(aDest.Length() + aSource.Length());
}
}
// Latin1 to UTF-8
// Interpret each incoming unsigned byte value as a Unicode scalar value (not
// windows-1252!).
// If the input is ASCII, the heap-allocated nsStringBuffer is shared if
// possible.
[[nodiscard]] inline bool CopyLatin1toUTF8(const nsACString& aSource,
nsACString& aDest,
const mozilla::fallible_t&) {
return nscstring_fallible_append_latin1_to_utf8_check(&aDest, &aSource, 0);
}
inline void CopyLatin1toUTF8(const nsACString& aSource, nsACString& aDest) {
if (MOZ_UNLIKELY(!CopyLatin1toUTF8(aSource, aDest, mozilla::fallible))) {
aDest.AllocFailed(aSource.Length());
}
}
[[nodiscard]] inline bool AppendLatin1toUTF8(const nsACString& aSource,
nsACString& aDest,
const mozilla::fallible_t&) {
return nscstring_fallible_append_latin1_to_utf8_check(&aDest, &aSource,
aDest.Length());
}
inline void AppendLatin1toUTF8(const nsACString& aSource, nsACString& aDest) {
if (MOZ_UNLIKELY(!AppendLatin1toUTF8(aSource, aDest, mozilla::fallible))) {
aDest.AllocFailed(aDest.Length() + aSource.Length());
}
}
// UTF-8 to Latin1
// If all code points in the input are below U+0100, represents each scalar
// value as an unsigned byte. (This is not windows-1252!) If there are code
// points above U+00FF, memory-safely produces garbage in release builds and
// asserts in debug builds. The nature of the garbage may differ
// based on CPU architecture and must not be relied upon.
// If the input is ASCII, the heap-allocated nsStringBuffer is shared if
// possible.
[[nodiscard]] inline bool LossyCopyUTF8toLatin1(const nsACString& aSource,
nsACString& aDest,
const mozilla::fallible_t&) {
return nscstring_fallible_append_utf8_to_latin1_lossy_check(&aDest, &aSource,
0);
}
inline void LossyCopyUTF8toLatin1(const nsACString& aSource,
nsACString& aDest) {
if (MOZ_UNLIKELY(!LossyCopyUTF8toLatin1(aSource, aDest, mozilla::fallible))) {
aDest.AllocFailed(aSource.Length());
}
}
[[nodiscard]] inline bool LossyAppendUTF8toLatin1(const nsACString& aSource,
nsACString& aDest,
const mozilla::fallible_t&) {
return nscstring_fallible_append_utf8_to_latin1_lossy_check(&aDest, &aSource,
aDest.Length());
}
inline void LossyAppendUTF8toLatin1(const nsACString& aSource,
nsACString& aDest) {
if (MOZ_UNLIKELY(
!LossyAppendUTF8toLatin1(aSource, aDest, mozilla::fallible))) {
aDest.AllocFailed(aDest.Length() + aSource.Length());
}
}
/**
* Returns a new |char| buffer containing a zero-terminated copy of |aSource|.
*
* Infallibly allocates and returns a new |char| buffer which you must
* free with |free|.
* Performs a conversion with LossyConvertUTF16toLatin1() writing into the
* newly-allocated buffer.
*
* The new buffer is zero-terminated, but that may not help you if |aSource|
* contains embedded nulls.
*
* @param aSource a 16-bit wide string
* @return a new |char| buffer you must free with |free|.
*/
char* ToNewCString(const nsAString& aSource);
/* A fallible version of ToNewCString. Returns nullptr on failure. */
char* ToNewCString(const nsAString& aSource,
const mozilla::fallible_t& aFallible);
/**
* Returns a new |char| buffer containing a zero-terminated copy of |aSource|.
*
* Infallibly allocates and returns a new |char| buffer which you must
* free with |free|.
*
* The new buffer is zero-terminated, but that may not help you if |aSource|
* contains embedded nulls.
*
* @param aSource an 8-bit wide string
* @return a new |char| buffer you must free with |free|.
*/
char* ToNewCString(const nsACString& aSource);
/* A fallible version of ToNewCString. Returns nullptr on failure. */
char* ToNewCString(const nsACString& aSource,
const mozilla::fallible_t& aFallible);
/**
* Returns a new |char| buffer containing a zero-terminated copy of |aSource|.
*
* Infallibly allocates and returns a new |char| buffer which you must
* free with |free|.
* Performs an encoding conversion from a UTF-16 string to a UTF-8 string with
* unpaired surrogates replaced with the REPLACEMENT CHARACTER copying
* |aSource| to your new buffer.
*
* The new buffer is zero-terminated, but that may not help you if |aSource|
* contains embedded nulls.
*
* @param aSource a UTF-16 string (made of char16_t's)
* @param aUTF8Count the number of 8-bit units that was returned
* @return a new |char| buffer you must free with |free|.
*/
char* ToNewUTF8String(const nsAString& aSource, uint32_t* aUTF8Count = nullptr);
/* A fallible version of ToNewUTF8String. Returns nullptr on failure. */
char* ToNewUTF8String(const nsAString& aSource, uint32_t* aUTF8Count,
const mozilla::fallible_t& aFallible);
/**
* Returns a new |char16_t| buffer containing a zero-terminated copy
* of |aSource|.
*
* Infallibly allocates and returns a new |char16_t| buffer which you must
* free with |free|.
*
* The new buffer is zero-terminated, but that may not help you if |aSource|
* contains embedded nulls.
*
* @param aSource a UTF-16 string
* @return a new |char16_t| buffer you must free with |free|.
*/
char16_t* ToNewUnicode(const nsAString& aSource);
/* A fallible version of ToNewUnicode. Returns nullptr on failure. */
char16_t* ToNewUnicode(const nsAString& aSource,
const mozilla::fallible_t& aFallible);
/**
* Returns a new |char16_t| buffer containing a zero-terminated copy
* of |aSource|.
*
* Infallibly allocates and returns a new |char16_t| buffer which you must
* free with|free|.
*
* Performs an encoding conversion by 0-padding 8-bit wide characters up to
* 16-bits wide (i.e. Latin1 to UTF-16 conversion) while copying |aSource|
* to your new buffer.
*
* The new buffer is zero-terminated, but that may not help you if |aSource|
* contains embedded nulls.
*
* @param aSource a Latin1 string
* @return a new |char16_t| buffer you must free with |free|.
*/
char16_t* ToNewUnicode(const nsACString& aSource);
/* A fallible version of ToNewUnicode. Returns nullptr on failure. */
char16_t* ToNewUnicode(const nsACString& aSource,
const mozilla::fallible_t& aFallible);
/**
* Returns a new |char16_t| buffer containing a zero-terminated copy
* of |aSource|.
*
* Infallibly allocates and returns a new |char| buffer which you must
* free with |free|. Performs an encoding conversion from UTF-8 to UTF-16
* while copying |aSource| to your new buffer. Malformed byte sequences
* are replaced with the REPLACEMENT CHARACTER.
*
* The new buffer is zero-terminated, but that may not help you if |aSource|
* contains embedded nulls.
*
* @param aSource an 8-bit wide string, UTF-8 encoded
* @param aUTF16Count the number of 16-bit units that was returned
* @return a new |char16_t| buffer you must free with |free|.
* (UTF-16 encoded)
*/
char16_t* UTF8ToNewUnicode(const nsACString& aSource,
uint32_t* aUTF16Count = nullptr);
/* A fallible version of UTF8ToNewUnicode. Returns nullptr on failure. */
char16_t* UTF8ToNewUnicode(const nsACString& aSource, uint32_t* aUTF16Count,
const mozilla::fallible_t& aFallible);
/**
* Copies |aLength| 16-bit code units from the start of |aSource| to the
* |char16_t| buffer |aDest|.
*
* After this operation |aDest| is not null terminated.
*
* @param aSource a UTF-16 string
* @param aSrcOffset start offset in the source string
* @param aDest a |char16_t| buffer
* @param aLength the number of 16-bit code units to copy
* @return pointer to destination buffer - identical to |aDest|
*/
char16_t* CopyUnicodeTo(const nsAString& aSource, uint32_t aSrcOffset,
char16_t* aDest, uint32_t aLength);
/**
* Replaces unpaired surrogates with U+FFFD in the argument.
*
* Copies a shared string buffer or an otherwise read-only
* buffer only if there are unpaired surrogates.
*/
[[nodiscard]] inline bool EnsureUTF16Validity(nsAString& aString) {
size_t upTo = mozilla::Utf16ValidUpTo(aString);
size_t len = aString.Length();
if (upTo == len) {
return true;
}
char16_t* ptr = aString.BeginWriting(mozilla::fallible);
if (!ptr) {
return false;
}
auto span = mozilla::Span(ptr, len);
span[upTo] = 0xFFFD;
mozilla::EnsureUtf16ValiditySpan(span.From(upTo + 1));
return true;
}
void ParseString(const nsACString& aSource, char aDelimiter,
nsTArray<nsCString>& aArray);
namespace mozilla::detail {
constexpr auto kStringJoinAppendDefault =
[](auto& aResult, const auto& aValue) { aResult.Append(aValue); };
} // namespace mozilla::detail
/**
* Join a sequence of items, each optionally transformed to a string, with a
* given separator, appending to a given string.
*
* \tparam CharType char or char16_t
* \tparam InputRange a range usable with range-based for
* \tparam Func optionally, a functor accepting a nsTSubstring<CharType>& and
* an item of InputRange which appends the latter to the former
*/
template <
typename CharType, typename InputRange,
typename Func = const decltype(mozilla::detail::kStringJoinAppendDefault)&>
void StringJoinAppend(
nsTSubstring<CharType>& aOutput,
const nsTLiteralString<CharType>& aSeparator, const InputRange& aInputRange,
Func&& aFunc = mozilla::detail::kStringJoinAppendDefault) {
bool first = true;
for (const auto& item : aInputRange) {
if (first) {
first = false;
} else {
aOutput.Append(aSeparator);
}
aFunc(aOutput, item);
}
}
/**
* Join a sequence of items, each optionally transformed to a string, with a
* given separator, returning a new string.
*
* \tparam CharType char or char16_t
* \tparam InputRange a range usable with range-based for
* \tparam Func optionally, a functor accepting a nsTSubstring<CharType>& and
* an item of InputRange which appends the latter to the former
*/
template <
typename CharType, typename InputRange,
typename Func = const decltype(mozilla::detail::kStringJoinAppendDefault)&>
auto StringJoin(const nsTLiteralString<CharType>& aSeparator,
const InputRange& aInputRange,
Func&& aFunc = mozilla::detail::kStringJoinAppendDefault) {
nsTAutoString<CharType> res;
StringJoinAppend(res, aSeparator, aInputRange, std::forward<Func>(aFunc));
return res;
}
/**
* Converts case in place in the argument string.
*/
void ToUpperCase(nsACString&);
void ToLowerCase(nsACString&);
void ToUpperCase(nsACString&);
void ToLowerCase(nsACString&);
/**
* Converts case from string aSource to aDest.
*/
void ToUpperCase(const nsACString& aSource, nsACString& aDest);
void ToLowerCase(const nsACString& aSource, nsACString& aDest);
/**
* Finds the leftmost occurrence of |aPattern|, if any in the range
* |aSearchStart|..|aSearchEnd|.
*
* Returns |true| if a match was found, and adjusts |aSearchStart| and
* |aSearchEnd| to point to the match. If no match was found, returns |false|
* and makes |aSearchStart == aSearchEnd|.
*
* Currently, this is equivalent to the O(m*n) implementation previously on
* |ns[C]String|.
*
* If we need something faster, then we can implement that later.
*/
bool FindInReadable(const nsAString& aPattern, nsAString::const_iterator&,
nsAString::const_iterator&,
nsStringComparator = nsTDefaultStringComparator);
bool FindInReadable(const nsACString& aPattern, nsACString::const_iterator&,
nsACString::const_iterator&,
nsCStringComparator = nsTDefaultStringComparator);
/* sometimes we don't care about where the string was, just that we
* found it or not */
inline bool FindInReadable(
const nsAString& aPattern, const nsAString& aSource,
nsStringComparator aCompare = nsTDefaultStringComparator) {
nsAString::const_iterator start, end;
aSource.BeginReading(start);
aSource.EndReading(end);
return FindInReadable(aPattern, start, end, aCompare);
}
inline bool FindInReadable(
const nsACString& aPattern, const nsACString& aSource,
nsCStringComparator aCompare = nsTDefaultStringComparator) {
nsACString::const_iterator start, end;
aSource.BeginReading(start);
aSource.EndReading(end);
return FindInReadable(aPattern, start, end, aCompare);
}
bool CaseInsensitiveFindInReadable(const nsACString& aPattern,
nsACString::const_iterator&,
nsACString::const_iterator&);
/**
* Finds the rightmost occurrence of |aPattern|
* Returns |true| if a match was found, and adjusts |aSearchStart| and
* |aSearchEnd| to point to the match. If no match was found, returns |false|
* and makes |aSearchStart == aSearchEnd|.
*/
bool RFindInReadable(const nsAString& aPattern, nsAString::const_iterator&,
nsAString::const_iterator&,
nsStringComparator = nsTDefaultStringComparator);
bool RFindInReadable(const nsACString& aPattern, nsACString::const_iterator&,
nsACString::const_iterator&,
nsCStringComparator = nsTDefaultStringComparator);
/**
* Finds the leftmost occurrence of |aChar|, if any in the range
* |aSearchStart|..|aSearchEnd|.
*
* Returns |true| if a match was found, and adjusts |aSearchStart| to
* point to the match. If no match was found, returns |false| and
* makes |aSearchStart == aSearchEnd|.
*/
bool FindCharInReadable(char16_t aChar, nsAString::const_iterator& aSearchStart,
const nsAString::const_iterator& aSearchEnd);
bool FindCharInReadable(char aChar, nsACString::const_iterator& aSearchStart,
const nsACString::const_iterator& aSearchEnd);
bool StringBeginsWith(const nsAString& aSource, const nsAString& aSubstring);
bool StringBeginsWith(const nsAString& aSource, const nsAString& aSubstring,
nsStringComparator);
bool StringBeginsWith(const nsACString& aSource, const nsACString& aSubstring);
bool StringBeginsWith(const nsACString& aSource, const nsACString& aSubstring,
nsCStringComparator);
bool StringEndsWith(const nsAString& aSource, const nsAString& aSubstring);
bool StringEndsWith(const nsAString& aSource, const nsAString& aSubstring,
nsStringComparator);
bool StringEndsWith(const nsACString& aSource, const nsACString& aSubstring);
bool StringEndsWith(const nsACString& aSource, const nsACString& aSubstring,
nsCStringComparator);
const nsString& EmptyString();
const nsCString& EmptyCString();
const nsString& VoidString();
const nsCString& VoidCString();
/**
* Compare a UTF-8 string to an UTF-16 string.
*
* Returns 0 if the strings are equal, -1 if aUTF8String is less
* than aUTF16Count, and 1 in the reverse case. Errors are replaced
* with U+FFFD and then the U+FFFD is compared as if it had occurred
* in the input. If aErr is not nullptr, *aErr is set to true if
* either string had malformed sequences.
*/
int32_t CompareUTF8toUTF16(const nsACString& aUTF8String,
const nsAString& aUTF16String, bool* aErr = nullptr);
void AppendUCS4ToUTF16(const uint32_t aSource, nsAString& aDest);
#endif // !defined(nsReadableUtils_h___)