/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ /* vim: set ts=8 sts=2 et sw=2 tw=80: */ /* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ // IWYU pragma: private, include "nsString.h" #ifndef nsReadableUtils_h___ #define nsReadableUtils_h___ /** * I guess all the routines in this file are all mis-named. * According to our conventions, they should be |NS_xxx|. */ #include "mozilla/Assertions.h" #include "nsAString.h" #include "mozilla/Tuple.h" #include "nsTArrayForwardDeclare.h" // Can't include mozilla/Encoding.h here. The implementations are in // the encoding_rs and encoding_glue crates. extern "C" { size_t encoding_utf8_valid_up_to(uint8_t const* buffer, size_t buffer_len); bool encoding_mem_is_ascii(uint8_t const* buffer, size_t buffer_len); bool encoding_mem_is_basic_latin(char16_t const* buffer, size_t buffer_len); bool encoding_mem_is_utf8_latin1(uint8_t const* buffer, size_t buffer_len); bool encoding_mem_is_str_latin1(uint8_t const* buffer, size_t buffer_len); bool encoding_mem_is_utf16_latin1(char16_t const* buffer, size_t buffer_len); size_t encoding_mem_utf16_valid_up_to(char16_t const* buffer, size_t buffer_len); void encoding_mem_ensure_utf16_validity(char16_t* buffer, size_t buffer_len); void encoding_mem_convert_utf16_to_latin1_lossy(const char16_t* src, size_t src_len, char* dst, size_t dst_len); size_t encoding_mem_convert_utf8_to_latin1_lossy(const char* src, size_t src_len, char* dst, size_t dst_len); void encoding_mem_convert_latin1_to_utf16(const char* src, size_t src_len, char16_t* dst, size_t dst_len); size_t encoding_mem_convert_utf16_to_utf8(const char16_t* src, size_t src_len, char* dst, size_t dst_len); void encoding_mem_convert_utf16_to_utf8_partial(const char16_t* src, size_t* src_len, char* dst, size_t* dst_len); size_t encoding_mem_convert_utf8_to_utf16(const char* src, size_t src_len, char16_t* dst, size_t dst_len); } // From the nsstring crate extern "C" { bool nsstring_fallible_append_utf8_impl(nsAString* aThis, const char* aOther, size_t aOtherLen, size_t aOldLen); bool nsstring_fallible_append_latin1_impl(nsAString* aThis, const char* aOther, size_t aOtherLen, size_t aOldLen, bool aAllowShrinking); bool nscstring_fallible_append_utf16_to_utf8_impl(nsACString* aThis, const char16_t*, size_t aOtherLen, size_t aOldLen); bool nscstring_fallible_append_utf16_to_latin1_lossy_impl(nsACString* aThis, const char16_t*, size_t aOtherLen, size_t aOldLen, bool aAllowShrinking); bool nscstring_fallible_append_utf8_to_latin1_lossy_check( nsACString* aThis, const nsACString* aOther, size_t aOldLen); bool nscstring_fallible_append_latin1_to_utf8_check(nsACString* aThis, const nsACString* aOther, size_t aOldLen); } /** * If all the code points in the input are below U+0100, converts to Latin1, * i.e. unsigned byte value is Unicode scalar value; not windows-1252. If * there are code points above U+00FF, produces garbage in a memory-safe way * and will likely start asserting in future debug builds. The nature of the * garbage depends on the CPU architecture and must not be relied upon. * * The length of aDest must be not be less than the length of aSource. */ inline void LossyConvertUTF16toLatin1(mozilla::Span aSource, mozilla::Span aDest) { encoding_mem_convert_utf16_to_latin1_lossy( aSource.Elements(), aSource.Length(), aDest.Elements(), aDest.Length()); } /** * If all the code points in the input are below U+0100, converts to Latin1, * i.e. unsigned byte value is Unicode scalar value; not windows-1252. If * there are code points above U+00FF, asserts in debug builds and produces * garbage in memory-safe way in release builds. The nature of the garbage * may depend on the CPU architecture and must not be relied upon. * * The length of aDest must be not be less than the length of aSource. */ inline size_t LossyConvertUTF8toLatin1(mozilla::Span aSource, mozilla::Span aDest) { return encoding_mem_convert_utf8_to_latin1_lossy( aSource.Elements(), aSource.Length(), aDest.Elements(), aDest.Length()); } /** * Interprets unsigned byte value as Unicode scalar value (i.e. not * windows-1252!). * * The length of aDest must be not be less than the length of aSource. */ inline void ConvertLatin1toUTF16(mozilla::Span aSource, mozilla::Span aDest) { encoding_mem_convert_latin1_to_utf16(aSource.Elements(), aSource.Length(), aDest.Elements(), aDest.Length()); } /** * Lone surrogates are replaced with the REPLACEMENT CHARACTER. * * The length of aDest must be at least the length of aSource times three. * * Returns the number of code units written. */ inline size_t ConvertUTF16toUTF8(mozilla::Span aSource, mozilla::Span aDest) { return encoding_mem_convert_utf16_to_utf8( aSource.Elements(), aSource.Length(), aDest.Elements(), aDest.Length()); } /** * Lone surrogates are replaced with the REPLACEMENT CHARACTER. * * The conversion is guaranteed to be complete if the length of aDest is * at least the length of aSource times three. * * The output is always valid UTF-8 ending on scalar value boundary * even in the case of partial conversion. * * Returns the number of code units read and the number of code * units written. */ inline mozilla::Tuple ConvertUTF16toUTF8Partial( mozilla::Span aSource, mozilla::Span aDest) { size_t srcLen = aSource.Length(); size_t dstLen = aDest.Length(); encoding_mem_convert_utf16_to_utf8_partial(aSource.Elements(), &srcLen, aDest.Elements(), &dstLen); return mozilla::MakeTuple(srcLen, dstLen); } /** * Malformed byte sequences are replaced with the REPLACEMENT CHARACTER. * * The length of aDest must at least one greater than the length of aSource. * * Returns the number of code units written. */ inline size_t ConvertUTF8toUTF16(mozilla::Span aSource, mozilla::Span aDest) { return encoding_mem_convert_utf8_to_utf16( aSource.Elements(), aSource.Length(), aDest.Elements(), aDest.Length()); } inline size_t Distance(const nsReadingIterator& aStart, const nsReadingIterator& aEnd) { MOZ_ASSERT(aStart.get() <= aEnd.get()); return static_cast(aEnd.get() - aStart.get()); } inline size_t Distance(const nsReadingIterator& aStart, const nsReadingIterator& aEnd) { MOZ_ASSERT(aStart.get() <= aEnd.get()); return static_cast(aEnd.get() - aStart.get()); } // UTF-8 to UTF-16 // Invalid UTF-8 byte sequences are replaced with the REPLACEMENT CHARACTER. inline MOZ_MUST_USE bool CopyUTF8toUTF16(mozilla::Span aSource, nsAString& aDest, const mozilla::fallible_t&) { return nsstring_fallible_append_utf8_impl(&aDest, aSource.Elements(), aSource.Length(), 0); } inline void CopyUTF8toUTF16(mozilla::Span aSource, nsAString& aDest) { if (MOZ_UNLIKELY(!CopyUTF8toUTF16(aSource, aDest, mozilla::fallible))) { aDest.AllocFailed(aSource.Length()); } } inline MOZ_MUST_USE bool AppendUTF8toUTF16(mozilla::Span aSource, nsAString& aDest, const mozilla::fallible_t&) { return nsstring_fallible_append_utf8_impl(&aDest, aSource.Elements(), aSource.Length(), aDest.Length()); } inline void AppendUTF8toUTF16(mozilla::Span aSource, nsAString& aDest) { if (MOZ_UNLIKELY(!AppendUTF8toUTF16(aSource, aDest, mozilla::fallible))) { aDest.AllocFailed(aDest.Length() + aSource.Length()); } } // Latin1 to UTF-16 // Interpret each incoming unsigned byte value as a Unicode scalar value (not // windows-1252!). The function names say "ASCII" instead of "Latin1" for // legacy reasons. inline MOZ_MUST_USE bool CopyASCIItoUTF16(mozilla::Span aSource, nsAString& aDest, const mozilla::fallible_t&) { return nsstring_fallible_append_latin1_impl(&aDest, aSource.Elements(), aSource.Length(), 0, true); } inline void CopyASCIItoUTF16(mozilla::Span aSource, nsAString& aDest) { if (MOZ_UNLIKELY(!CopyASCIItoUTF16(aSource, aDest, mozilla::fallible))) { aDest.AllocFailed(aSource.Length()); } } inline MOZ_MUST_USE bool AppendASCIItoUTF16(mozilla::Span aSource, nsAString& aDest, const mozilla::fallible_t&) { return nsstring_fallible_append_latin1_impl( &aDest, aSource.Elements(), aSource.Length(), aDest.Length(), false); } inline void AppendASCIItoUTF16(mozilla::Span aSource, nsAString& aDest) { if (MOZ_UNLIKELY(!AppendASCIItoUTF16(aSource, aDest, mozilla::fallible))) { aDest.AllocFailed(aDest.Length() + aSource.Length()); } } // UTF-16 to UTF-8 // Unpaired surrogates are replaced with the REPLACEMENT CHARACTER. inline MOZ_MUST_USE bool CopyUTF16toUTF8(mozilla::Span aSource, nsACString& aDest, const mozilla::fallible_t&) { return nscstring_fallible_append_utf16_to_utf8_impl( &aDest, aSource.Elements(), aSource.Length(), 0); } inline void CopyUTF16toUTF8(mozilla::Span aSource, nsACString& aDest) { if (MOZ_UNLIKELY(!CopyUTF16toUTF8(aSource, aDest, mozilla::fallible))) { aDest.AllocFailed(aSource.Length()); } } inline MOZ_MUST_USE bool AppendUTF16toUTF8( mozilla::Span aSource, nsACString& aDest, const mozilla::fallible_t&) { return nscstring_fallible_append_utf16_to_utf8_impl( &aDest, aSource.Elements(), aSource.Length(), aDest.Length()); } inline void AppendUTF16toUTF8(mozilla::Span aSource, nsACString& aDest) { if (MOZ_UNLIKELY(!AppendUTF16toUTF8(aSource, aDest, mozilla::fallible))) { aDest.AllocFailed(aDest.Length() + aSource.Length()); } } // UTF-16 to Latin1 // If all code points in the input are below U+0100, represents each scalar // value as an unsigned byte. (This is not windows-1252!) If there are code // points above U+00FF, memory-safely produces garbage and will likely start // asserting in future debug builds. The nature of the garbage may differ // based on CPU architecture and must not be relied upon. The names say // "ASCII" instead of "Latin1" for legacy reasons. inline MOZ_MUST_USE bool LossyCopyUTF16toASCII( mozilla::Span aSource, nsACString& aDest, const mozilla::fallible_t&) { return nscstring_fallible_append_utf16_to_latin1_lossy_impl( &aDest, aSource.Elements(), aSource.Length(), 0, true); } inline void LossyCopyUTF16toASCII(mozilla::Span aSource, nsACString& aDest) { if (MOZ_UNLIKELY(!LossyCopyUTF16toASCII(aSource, aDest, mozilla::fallible))) { aDest.AllocFailed(aSource.Length()); } } inline MOZ_MUST_USE bool LossyAppendUTF16toASCII( mozilla::Span aSource, nsACString& aDest, const mozilla::fallible_t&) { return nscstring_fallible_append_utf16_to_latin1_lossy_impl( &aDest, aSource.Elements(), aSource.Length(), aDest.Length(), false); } inline void LossyAppendUTF16toASCII(mozilla::Span aSource, nsACString& aDest) { if (MOZ_UNLIKELY( !LossyAppendUTF16toASCII(aSource, aDest, mozilla::fallible))) { aDest.AllocFailed(aDest.Length() + aSource.Length()); } } // Latin1 to UTF-8 // Interpret each incoming unsigned byte value as a Unicode scalar value (not // windows-1252!). // If the input is ASCII, the heap-allocated nsStringBuffer is shared if // possible. inline MOZ_MUST_USE bool CopyLatin1toUTF8(const nsACString& aSource, nsACString& aDest, const mozilla::fallible_t&) { return nscstring_fallible_append_latin1_to_utf8_check(&aDest, &aSource, 0); } inline void CopyLatin1toUTF8(const nsACString& aSource, nsACString& aDest) { if (MOZ_UNLIKELY(!CopyLatin1toUTF8(aSource, aDest, mozilla::fallible))) { aDest.AllocFailed(aSource.Length()); } } inline MOZ_MUST_USE bool AppendLatin1toUTF8(const nsACString& aSource, nsACString& aDest, const mozilla::fallible_t&) { return nscstring_fallible_append_latin1_to_utf8_check(&aDest, &aSource, aDest.Length()); } inline void AppendLatin1toUTF8(const nsACString& aSource, nsACString& aDest) { if (MOZ_UNLIKELY(!AppendLatin1toUTF8(aSource, aDest, mozilla::fallible))) { aDest.AllocFailed(aDest.Length() + aSource.Length()); } } // UTF-8 to Latin1 // If all code points in the input are below U+0100, represents each scalar // value as an unsigned byte. (This is not windows-1252!) If there are code // points above U+00FF, memory-safely produces garbage in release builds and // asserts in debug builds. The nature of the garbage may differ // based on CPU architecture and must not be relied upon. // If the input is ASCII, the heap-allocated nsStringBuffer is shared if // possible. inline MOZ_MUST_USE bool LossyCopyUTF8toLatin1(const nsACString& aSource, nsACString& aDest, const mozilla::fallible_t&) { return nscstring_fallible_append_utf8_to_latin1_lossy_check(&aDest, &aSource, 0); } inline void LossyCopyUTF8toLatin1(const nsACString& aSource, nsACString& aDest) { if (MOZ_UNLIKELY(!LossyCopyUTF8toLatin1(aSource, aDest, mozilla::fallible))) { aDest.AllocFailed(aSource.Length()); } } inline MOZ_MUST_USE bool LossyAppendUTF8toLatin1(const nsACString& aSource, nsACString& aDest, const mozilla::fallible_t&) { return nscstring_fallible_append_utf8_to_latin1_lossy_check(&aDest, &aSource, aDest.Length()); } inline void LossyAppendUTF8toLatin1(const nsACString& aSource, nsACString& aDest) { if (MOZ_UNLIKELY( !LossyAppendUTF8toLatin1(aSource, aDest, mozilla::fallible))) { aDest.AllocFailed(aDest.Length() + aSource.Length()); } } /** * Returns a new |char| buffer containing a zero-terminated copy of |aSource|. * * Allocates and returns a new |char| buffer which you must free with |free|. * Performs a conversion with LossyConvertUTF16toLatin1() writing into the * newly-allocated buffer. * * The new buffer is zero-terminated, but that may not help you if |aSource| * contains embedded nulls. * * @param aSource a 16-bit wide string * @return a new |char| buffer you must free with |free|. */ char* ToNewCString(const nsAString& aSource); /** * Returns a new |char| buffer containing a zero-terminated copy of |aSource|. * * Allocates and returns a new |char| buffer which you must free with |free|. * * The new buffer is zero-terminated, but that may not help you if |aSource| * contains embedded nulls. * * @param aSource an 8-bit wide string * @return a new |char| buffer you must free with |free|. */ char* ToNewCString(const nsACString& aSource); /** * Returns a new |char| buffer containing a zero-terminated copy of |aSource|. * * Allocates and returns a new |char| buffer which you must free with * |free|. * Performs an encoding conversion from a UTF-16 string to a UTF-8 string with * unpaired surrogates replaced with the REPLACEMENT CHARACTER copying * |aSource| to your new buffer. * * The new buffer is zero-terminated, but that may not help you if |aSource| * contains embedded nulls. * * @param aSource a UTF-16 string (made of char16_t's) * @param aUTF8Count the number of 8-bit units that was returned * @return a new |char| buffer you must free with |free|. */ char* ToNewUTF8String(const nsAString& aSource, uint32_t* aUTF8Count = nullptr); /** * Returns a new |char16_t| buffer containing a zero-terminated copy of * |aSource|. * * Allocates and returns a new |char16_t| buffer which you must free with * |free|. * * The new buffer is zero-terminated, but that may not help you if |aSource| * contains embedded nulls. * * @param aSource a UTF-16 string * @return a new |char16_t| buffer you must free with |free|. */ char16_t* ToNewUnicode(const nsAString& aSource); /** * Returns a new |char16_t| buffer containing a zero-terminated copy of * |aSource|. * * Allocates and returns a new |char16_t| buffer which you must free with * |free|. * * Performs an encoding conversion by 0-padding 8-bit wide characters up to * 16-bits wide (i.e. Latin1 to UTF-16 conversion) while copying |aSource| * to your new buffer. * * The new buffer is zero-terminated, but that may not help you if |aSource| * contains embedded nulls. * * @param aSource a Latin1 string * @return a new |char16_t| buffer you must free with |free|. */ char16_t* ToNewUnicode(const nsACString& aSource); /** * Returns a new |char16_t| buffer containing a zero-terminated copy * of |aSource|. * * Allocates and returns a new |char| buffer which you must free with * |free|. Performs an encoding conversion from UTF-8 to UTF-16 * while copying |aSource| to your new buffer. Malformed byte sequences * are replaced with the REPLACEMENT CHARACTER. * * The new buffer is zero-terminated, but that may not help you if |aSource| * contains embedded nulls. * * @param aSource an 8-bit wide string, UTF-8 encoded * @param aUTF16Count the number of 16-bit units that was returned * @return a new |char16_t| buffer you must free with |free|. * (UTF-16 encoded) */ char16_t* UTF8ToNewUnicode(const nsACString& aSource, uint32_t* aUTF16Count = nullptr); /** * Copies |aLength| 16-bit code units from the start of |aSource| to the * |char16_t| buffer |aDest|. * * After this operation |aDest| is not null terminated. * * @param aSource a UTF-16 string * @param aSrcOffset start offset in the source string * @param aDest a |char16_t| buffer * @param aLength the number of 16-bit code units to copy * @return pointer to destination buffer - identical to |aDest| */ char16_t* CopyUnicodeTo(const nsAString& aSource, uint32_t aSrcOffset, char16_t* aDest, uint32_t aLength); /** * Returns |true| if |aString| contains only ASCII characters, that is, * characters in the range (0x00, 0x7F). * * @param aString a 16-bit wide string to scan */ inline bool IsASCII(mozilla::Span aString) { size_t length = aString.Length(); const char16_t* ptr = aString.Elements(); // For short strings, calling into Rust is a pessimization, and the SIMD // code won't have a chance to kick in anyway. if (length < 16) { char16_t accu = 0; for (size_t i = 0; i < length; i++) { accu |= ptr[i]; } return accu < 0x80U; } return encoding_mem_is_basic_latin(ptr, length); } /** * Returns |true| if |aString| contains only ASCII characters, that is, * characters in the range (0x00, 0x7F). * * @param aString a 8-bit wide string to scan */ inline bool IsASCII(mozilla::Span aString) { size_t length = aString.Length(); const uint8_t* ptr = reinterpret_cast(aString.Elements()); // For short strings, calling into Rust is a pessimization, and the SIMD // code won't have a chance to kick in anyway. if (length < 16) { uint8_t accu = 0; for (size_t i = 0; i < length; i++) { accu |= ptr[i]; } return accu < 0x80U; } return encoding_mem_is_ascii(ptr, length); } /** * Returns |true| if |aString| contains only Latin1 characters, that is, * characters in the range (U+0000, U+00FF). * * @param aString a potentially-invalid UTF-16 string to scan */ inline bool IsUTF16Latin1(mozilla::Span aString) { size_t length = aString.Length(); const char16_t* ptr = aString.Elements(); // For short strings, calling into Rust is a pessimization, and the SIMD // code won't have a chance to kick in anyway. if (length < 16) { char16_t accu = 0; for (size_t i = 0; i < length; i++) { accu |= ptr[i]; } return accu < 0x100U; } return encoding_mem_is_utf16_latin1(ptr, length); } /** * Returns |true| if |aString| contains only Latin1 characters, that is, * characters in the range (U+0000, U+00FF). * * If you know that the argument is always absolutely guaranteed to be valid * UTF-8, use the faster UnsafeIsValidUTF8Latin1() instead. * * @param aString potentially-invalid UTF-8 string to scan */ inline bool IsUTF8Latin1(mozilla::Span aString) { size_t length = aString.Length(); const uint8_t* ptr = reinterpret_cast(aString.Elements()); // For short strings, calling into Rust is a pessimization, and the SIMD // code won't have a chance to kick in anyway. if (length < 16) { for (size_t i = 0; i < length; i++) { if (ptr[i] >= 0x80U) { ptr += i; length -= i; // This loop can't handle non-ASCII, but the Rust code can, so // upon seeing non-ASCII, break the loop and let the Rust code // handle the rest of the buffer (including the non-ASCII byte). goto end; } } return true; } end: return encoding_mem_is_utf8_latin1(ptr, length); } /** * Returns |true| if |aString| contains only Latin1 characters, that is, * characters in the range (U+0000, U+00FF). * * The argument MUST be valid UTF-8. If you are at all unsure, use IsUTF8Latin1 * instead! * * @param aString known-valid UTF-8 string to scan */ inline bool UnsafeIsValidUTF8Latin1(mozilla::Span aString) { size_t length = aString.Length(); const uint8_t* ptr = reinterpret_cast(aString.Elements()); // For short strings, calling into Rust is a pessimization, and the SIMD // code won't have a chance to kick in anyway. if (length < 16) { for (size_t i = 0; i < length; i++) { if (ptr[i] >= 0x80U) { ptr += i; length -= i; goto end; } } return true; } end: return encoding_mem_is_str_latin1(ptr, length); } /** * Returns |true| if |aString| is a valid UTF-8 string. * * Note that this doesn't check whether the string might look like a valid * string in another encoding, too, e.g. ISO-2022-JP. * * @param aString an 8-bit wide string to scan */ inline bool IsUTF8(mozilla::Span aString) { size_t length = aString.Length(); const uint8_t* ptr = reinterpret_cast(aString.Elements()); // For short strings, calling into Rust is a pessimization, and the SIMD // code won't have a chance to kick in anyway. if (length < 16) { for (size_t i = 0; i < length; i++) { if (ptr[i] >= 0x80U) { ptr += i; length -= i; goto end; } } return true; } end: return length == encoding_utf8_valid_up_to(ptr, length); } /** * Returns the index of the first unpaired surrogate or * the length of the string if there are none. */ inline uint32_t UTF16ValidUpTo(mozilla::Span aString) { return encoding_mem_utf16_valid_up_to(aString.Elements(), aString.Length()); } /** * Replaces unpaired surrogates with U+FFFD in the argument. */ inline void EnsureUTF16ValiditySpan(mozilla::Span aString) { encoding_mem_ensure_utf16_validity(aString.Elements(), aString.Length()); } /** * Replaces unpaired surrogates with U+FFFD in the argument. * * Copies a shared string buffer or an otherwise read-only * buffer only if there are unpaired surrogates. */ inline void EnsureUTF16Validity(nsAString& aString) { uint32_t upTo = UTF16ValidUpTo(aString); uint32_t len = aString.Length(); if (upTo == len) { return; } char16_t* ptr = aString.BeginWriting(); auto span = mozilla::MakeSpan(ptr, len); span[upTo] = 0xFFFD; EnsureUTF16ValiditySpan(span.From(upTo + 1)); } bool ParseString(const nsACString& aAstring, char aDelimiter, nsTArray& aArray); /** * Converts case in place in the argument string. */ void ToUpperCase(nsACString&); void ToLowerCase(nsACString&); void ToUpperCase(nsACString&); void ToLowerCase(nsACString&); /** * Converts case from string aSource to aDest. */ void ToUpperCase(const nsACString& aSource, nsACString& aDest); void ToLowerCase(const nsACString& aSource, nsACString& aDest); /** * Finds the leftmost occurrence of |aPattern|, if any in the range * |aSearchStart|..|aSearchEnd|. * * Returns |true| if a match was found, and adjusts |aSearchStart| and * |aSearchEnd| to point to the match. If no match was found, returns |false| * and makes |aSearchStart == aSearchEnd|. * * Currently, this is equivalent to the O(m*n) implementation previously on * |ns[C]String|. * * If we need something faster, then we can implement that later. */ bool FindInReadable(const nsAString& aPattern, nsAString::const_iterator&, nsAString::const_iterator&, const nsStringComparator& = nsDefaultStringComparator()); bool FindInReadable(const nsACString& aPattern, nsACString::const_iterator&, nsACString::const_iterator&, const nsCStringComparator& = nsDefaultCStringComparator()); /* sometimes we don't care about where the string was, just that we * found it or not */ inline bool FindInReadable( const nsAString& aPattern, const nsAString& aSource, const nsStringComparator& aCompare = nsDefaultStringComparator()) { nsAString::const_iterator start, end; aSource.BeginReading(start); aSource.EndReading(end); return FindInReadable(aPattern, start, end, aCompare); } inline bool FindInReadable( const nsACString& aPattern, const nsACString& aSource, const nsCStringComparator& aCompare = nsDefaultCStringComparator()) { nsACString::const_iterator start, end; aSource.BeginReading(start); aSource.EndReading(end); return FindInReadable(aPattern, start, end, aCompare); } bool CaseInsensitiveFindInReadable(const nsACString& aPattern, nsACString::const_iterator&, nsACString::const_iterator&); /** * Finds the rightmost occurrence of |aPattern| * Returns |true| if a match was found, and adjusts |aSearchStart| and * |aSearchEnd| to point to the match. If no match was found, returns |false| * and makes |aSearchStart == aSearchEnd|. */ bool RFindInReadable(const nsAString& aPattern, nsAString::const_iterator&, nsAString::const_iterator&, const nsStringComparator& = nsDefaultStringComparator()); bool RFindInReadable(const nsACString& aPattern, nsACString::const_iterator&, nsACString::const_iterator&, const nsCStringComparator& = nsDefaultCStringComparator()); /** * Finds the leftmost occurrence of |aChar|, if any in the range * |aSearchStart|..|aSearchEnd|. * * Returns |true| if a match was found, and adjusts |aSearchStart| to * point to the match. If no match was found, returns |false| and * makes |aSearchStart == aSearchEnd|. */ bool FindCharInReadable(char16_t aChar, nsAString::const_iterator& aSearchStart, const nsAString::const_iterator& aSearchEnd); bool FindCharInReadable(char aChar, nsACString::const_iterator& aSearchStart, const nsACString::const_iterator& aSearchEnd); bool StringBeginsWith(const nsAString& aSource, const nsAString& aSubstring); bool StringBeginsWith(const nsAString& aSource, const nsAString& aSubstring, const nsStringComparator& aComparator); bool StringBeginsWith(const nsACString& aSource, const nsACString& aSubstring); bool StringBeginsWith(const nsACString& aSource, const nsACString& aSubstring, const nsCStringComparator& aComparator); bool StringEndsWith(const nsAString& aSource, const nsAString& aSubstring); bool StringEndsWith(const nsAString& aSource, const nsAString& aSubstring, const nsStringComparator& aComparator); bool StringEndsWith(const nsACString& aSource, const nsACString& aSubstring); bool StringEndsWith(const nsACString& aSource, const nsACString& aSubstring, const nsCStringComparator& aComparator); const nsString& EmptyString(); const nsCString& EmptyCString(); const nsString& VoidString(); const nsCString& VoidCString(); /** * Compare a UTF-8 string to an UTF-16 string. * * Returns 0 if the strings are equal, -1 if aUTF8String is less * than aUTF16Count, and 1 in the reverse case. Errors are replaced * with U+FFFD and then the U+FFFD is compared as if it had occurred * in the input. If aErr is not nullptr, *aErr is set to true if * either string had malformed sequences. */ int32_t CompareUTF8toUTF16(const nsACString& aUTF8String, const nsAString& aUTF16String, bool* aErr = nullptr); void AppendUCS4ToUTF16(const uint32_t aSource, nsAString& aDest); #endif // !defined(nsReadableUtils_h___)