/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ /* vim: set ts=8 sts=2 et sw=2 tw=80: */ /* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ #ifndef nsTStringRepr_h #define nsTStringRepr_h #include #include #include // std::enable_if #include "fmt/format.h" #include "fmt/xchar.h" #include "mozilla/Char16.h" #include "mozilla/CheckedInt.h" #include "mozilla/fallible.h" #include "mozilla/StringBuffer.h" #include "nsStringFlags.h" #include "nsStringFwd.h" #include "nsStringIterator.h" #include "nsCharTraits.h" template class nsTSubstringTuple; namespace mozilla { // This is mainly intended to be used in the context of nsTStrings where // we want to enable a specific function only for a given character class. In // order for this technique to work the member function needs to be templated // on something other than `T`. We keep this in the `mozilla` namespace rather // than `nsTStringRepr` as it's intentionally not dependent on `T`. // // The 'T' at the end of `Char[16]OnlyT` is refering to the `::type` portion // which will only be defined if the character class is correct. This is similar // to `std::enable_if_t` which is available in C++14, but not C++11. // // `CharType` is generally going to be a shadowed type of `T`. // // Example usage of a function that will only be defined if `T` == `char`: // // template // class nsTSubstring : public nsTStringRepr { // template > // int Foo() { return 42; } // }; // // Please note that we had to use a separate type `Q` for this to work. You // will get a semi-decent compiler error if you use `T` directly. template using CharOnlyT = typename std::enable_if::value>::type; template using Char16OnlyT = typename std::enable_if::value>::type; namespace detail { // nsTStringLengthStorage is a helper class which holds the string's length and // provides getters and setters for converting to and from `size_t`. This is // done to allow the length to be stored in a `uint32_t` using assertions. template class nsTStringLengthStorage { public: // The maximum byte capacity for a `nsTString` must fit within an `int32_t`, // with enough room for a trailing null, as consumers often cast `Length()` // and `Capacity()` to smaller types like `int32_t`. static constexpr size_t kMax = size_t{std::numeric_limits::max()} / sizeof(T) - 1; static_assert( (kMax + 1) * sizeof(T) <= std::numeric_limits::max(), "nsTString's maximum length, including the trailing null, must fit " "within `int32_t`, as callers will cast to `int32_t` occasionally"); static_assert(((CheckedInt{kMax} + 1) * sizeof(T) + sizeof(mozilla::StringBuffer)) .isValid(), "Math required to allocate a mozilla::StringBuffer for a " "maximum-capacity string must not overflow uint32_t"); // Implicit conversion and assignment from `size_t` which assert that the // value is in-range. MOZ_IMPLICIT constexpr nsTStringLengthStorage(size_t aLength) : mLength(static_cast(aLength)) { MOZ_RELEASE_ASSERT(aLength <= kMax, "string is too large"); } constexpr nsTStringLengthStorage& operator=(size_t aLength) { MOZ_RELEASE_ASSERT(aLength <= kMax, "string is too large"); mLength = static_cast(aLength); return *this; } MOZ_IMPLICIT constexpr operator size_t() const { return mLength; } private: uint32_t mLength = 0; }; // nsTStringRepr defines a string's memory layout and some accessor methods. // This class exists so that nsTLiteralString can avoid inheriting // nsTSubstring's destructor. All methods on this class must be const because // literal strings are not writable. // // This class is an implementation detail and should not be instantiated // directly, nor used in any way outside of the string code itself. It is // buried in a namespace to discourage its use in function parameters. // If you need to take a parameter, use [const] ns[C]Substring&. // If you need to instantiate a string, use ns[C]String or descendents. // // NAMES: // nsStringRepr for wide characters // nsCStringRepr for narrow characters template class nsTStringRepr { public: typedef mozilla::fallible_t fallible_t; typedef T char_type; typedef nsCharTraits char_traits; typedef typename char_traits::incompatible_char_type incompatible_char_type; typedef nsTStringRepr self_type; typedef self_type base_string_type; typedef nsTSubstring substring_type; typedef nsTSubstringTuple substring_tuple_type; typedef nsReadingIterator const_iterator; typedef char_type* iterator; typedef nsTStringComparator comparator_type; typedef const char_type* const_char_iterator; typedef std::basic_string_view string_view; typedef size_t index_type; typedef size_t size_type; // These are only for internal use within the string classes: typedef StringDataFlags DataFlags; typedef StringClassFlags ClassFlags; typedef nsTStringLengthStorage LengthStorage; // Reading iterators. constexpr const_char_iterator BeginReading() const { return mData; } constexpr const_char_iterator EndReading() const { return mData + mLength; } // Deprecated reading iterators. const_iterator& BeginReading(const_iterator& aIter) const { aIter.mStart = mData; aIter.mEnd = mData + mLength; aIter.mPosition = aIter.mStart; return aIter; } const_iterator& EndReading(const_iterator& aIter) const { aIter.mStart = mData; aIter.mEnd = mData + mLength; aIter.mPosition = aIter.mEnd; return aIter; } const_char_iterator& BeginReading(const_char_iterator& aIter) const { return aIter = mData; } const_char_iterator& EndReading(const_char_iterator& aIter) const { return aIter = mData + mLength; } // Accessors. template struct raw_type { typedef const U* type; }; #if defined(MOZ_USE_CHAR16_WRAPPER) template struct raw_type { typedef char16ptr_t type; }; #endif // Returns pointer to string data (not necessarily null-terminated) constexpr typename raw_type::type Data() const { return mData; } constexpr size_type Length() const { return static_cast(mLength); } constexpr string_view View() const { return string_view(Data(), Length()); } constexpr operator string_view() const { return View(); } constexpr DataFlags GetDataFlags() const { return mDataFlags; } constexpr bool IsEmpty() const { return mLength == 0; } constexpr bool IsLiteral() const { return !!(mDataFlags & DataFlags::LITERAL); } constexpr bool IsVoid() const { return !!(mDataFlags & DataFlags::VOIDED); } constexpr bool IsTerminated() const { return !!(mDataFlags & DataFlags::TERMINATED); } constexpr char_type CharAt(index_type aIndex) const { NS_ASSERTION(aIndex < Length(), "index exceeds allowable range"); return mData[aIndex]; } constexpr char_type operator[](index_type aIndex) const { return CharAt(aIndex); } char_type First() const; char_type Last() const; // Equality. bool NS_FASTCALL Equals(const self_type&) const; bool NS_FASTCALL Equals(const self_type&, comparator_type) const; bool NS_FASTCALL Equals(const substring_tuple_type& aTuple) const; bool NS_FASTCALL Equals(const substring_tuple_type& aTuple, comparator_type) const; bool NS_FASTCALL Equals(const char_type* aData) const; bool NS_FASTCALL Equals(const char_type* aData, comparator_type) const; /** * Compare this string and another ASCII-case-insensitively. * * This method is similar to `LowerCaseEqualsASCII` however both strings are * lowercased, meaning that `aString` need not be all lowercase. * * @param aString is the string to check * @return boolean */ bool EqualsIgnoreCase(const std::string_view& aString) const; #ifdef __cpp_char8_t template > bool NS_FASTCALL Equals(const char8_t* aData) const { return Equals(reinterpret_cast(aData)); } template > bool NS_FASTCALL Equals(const char8_t* aData, comparator_type aComp) const { return Equals(reinterpret_cast(aData), aComp); } #endif #if defined(MOZ_USE_CHAR16_WRAPPER) template > bool NS_FASTCALL Equals(char16ptr_t aData) const { return Equals(static_cast(aData)); } template > bool NS_FASTCALL Equals(char16ptr_t aData, comparator_type aComp) const { return Equals(static_cast(aData), aComp); } #endif // An efficient comparison with ASCII that can be used even // for wide strings. Call this version when you know the // length of 'data'. bool NS_FASTCALL EqualsASCII(const char* aData, size_type aLen) const; // An efficient comparison with ASCII that can be used even // for wide strings. Call this version when 'data' is // null-terminated. bool NS_FASTCALL EqualsASCII(const char* aData) const; // An efficient comparison with Latin1 characters that can be used even for // wide strings. bool EqualsLatin1(const char* aData, size_type aLength) const; // EqualsLiteral must ONLY be called with an actual literal string, or // a char array *constant* declared without an explicit size and with an // initializer that is a string literal or is otherwise null-terminated. // Use EqualsASCII for other char array variables. // (Although this method may happen to produce expected results for other // char arrays that have bound one greater than the sequence of interest, // such use is discouraged for reasons of readability and maintainability.) // The template trick to acquire the array bound at compile time without // using a macro is due to Corey Kosak, with much thanks. template inline bool EqualsLiteral(const char (&aStr)[N]) const { return EqualsASCII(aStr, N - 1); } // EqualsLiteral must ONLY be called with an actual literal string, or // a char array *constant* declared without an explicit size and with an // initializer that is a string literal or is otherwise null-terminated. // Use EqualsASCII for other char array variables. // (Although this method may happen to produce expected results for other // char arrays that have bound one greater than the sequence of interest, // such use is discouraged for reasons of readability and maintainability.) // The template trick to acquire the array bound at compile time without // using a macro is due to Corey Kosak, with much thanks. template >> inline bool EqualsLiteral(const char_type (&aStr)[N]) const { return *this == nsTLiteralString(aStr); } // The LowerCaseEquals methods compare the ASCII-lowercase version of // this string (lowercasing only ASCII uppercase characters) to some // ASCII/Literal string. The ASCII string is *not* lowercased for // you. If you compare to an ASCII or literal string that contains an // uppercase character, it is guaranteed to return false. We will // throw assertions too. bool NS_FASTCALL LowerCaseEqualsASCII(const char* aData, size_type aLen) const; bool NS_FASTCALL LowerCaseEqualsASCII(const char* aData) const; // LowerCaseEqualsLiteral must ONLY be called with an actual literal string, // or a char array *constant* declared without an explicit size and with an // initializer that is a string literal or is otherwise null-terminated. // Use LowerCaseEqualsASCII for other char array variables. // (Although this method may happen to produce expected results for other // char arrays that have bound one greater than the sequence of interest, // such use is discouraged for reasons of readability and maintainability.) template bool LowerCaseEqualsLiteral(const char (&aStr)[N]) const { return LowerCaseEqualsASCII(aStr, N - 1); } // Returns true if this string overlaps with the given string fragment. bool IsDependentOn(const char_type* aStart, const char_type* aEnd) const { // If it _isn't_ the case that one fragment starts after the other ends, // or ends before the other starts, then, they conflict: // // !(f2.begin >= f1.aEnd || f2.aEnd <= f1.begin) // // Simplified, that gives us (To avoid relying on Undefined Behavior // from comparing pointers from different allocations (which in // principle gives the optimizer the permission to assume elsewhere // that the pointers are from the same allocation), the comparisons // are done on integers, which merely relies on implementation-defined // behavior of converting pointers to integers. std::less and // std::greater implementations don't actually provide the guarantees // that they should.): return (reinterpret_cast(aStart) < reinterpret_cast(mData + mLength) && reinterpret_cast(aEnd) > reinterpret_cast(mData)); } /** * Search for the given substring within this string. * * @param aString is substring to be sought in this * @param aOffset tells us where in this string to start searching * @return offset in string, or kNotFound */ int32_t Find(const string_view& aString, index_type aOffset = 0) const; // Previously there was an overload of `Find()` which took a bool second // argument. Avoid issues by explicitly preventing that overload. // TODO: Remove this at some point. template && std::is_convertible_v>> int32_t Find(const string_view& aString, I aOffset) const { static_assert(!std::is_same_v, "offset must not be `bool`"); return Find(aString, static_cast(aOffset)); } /** * Search for the given ASCII substring within this string, ignoring case. * * @param aString is substring to be sought in this * @param aOffset tells us where in this string to start searching * @return offset in string, or kNotFound */ int32_t LowerCaseFindASCII(const std::string_view& aString, index_type aOffset = 0) const; /** * Scan the string backwards, looking for the given substring. * * @param aString is substring to be sought in this * @return offset in string, or kNotFound */ int32_t RFind(const string_view& aString) const; size_type CountChar(char_type) const; bool Contains(char_type aChar) const { return FindChar(aChar) != kNotFound; } /** * Search for the first instance of a given char within this string * * @param aChar is the character to search for * @param aOffset tells us where in this string to start searching * @return offset in string, or kNotFound */ int32_t FindChar(char_type aChar, index_type aOffset = 0) const; /** * Search for the last instance of a given char within this string * * @param aChar is the character to search for * @param aOffset tells us where in this string to start searching * @return offset in string, or kNotFound */ int32_t RFindChar(char_type aChar, int32_t aOffset = -1) const; /** * This method searches this string for the first character found in * the given string. * * @param aSet contains set of chars to be found * @param aOffset tells us where in this string to start searching * (counting from left) * @return offset in string, or kNotFound */ int32_t FindCharInSet(const string_view& aSet, index_type aOffset = 0) const; /** * This method searches this string for the last character found in * the given string. * * @param aSet contains set of chars to be found * @param aOffset tells us where in this string to start searching * (counting from left) * @return offset in string, or kNotFound */ int32_t RFindCharInSet(const string_view& aSet, int32_t aOffset = -1) const; /** * Perform locale-independent string to double-precision float conversion. * * Leading spaces in the string will be ignored. The returned value will be * finite unless aErrorCode is set to a failed status. * * @param aErrorCode will contain error if one occurs * @return double-precision float rep of string value */ double ToDouble(nsresult* aErrorCode) const; /** * Perform locale-independent string to single-precision float conversion. * * Leading spaces in the string will be ignored. The returned value will be * finite unless aErrorCode is set to a failed status. * * @param aErrorCode will contain error if one occurs * @return single-precision float rep of string value */ float ToFloat(nsresult* aErrorCode) const; /** * Similar to above ToDouble and ToFloat but allows trailing characters that * are not converted. */ double ToDoubleAllowTrailingChars(nsresult* aErrorCode) const; float ToFloatAllowTrailingChars(nsresult* aErrorCode) const; protected: nsTStringRepr() = delete; // Never instantiate directly constexpr nsTStringRepr(char_type* aData, size_type aLength, DataFlags aDataFlags, ClassFlags aClassFlags) : mData(aData), mLength(aLength), mDataFlags(aDataFlags), mClassFlags(aClassFlags) {} static constexpr size_type kMaxCapacity = LengthStorage::kMax; /** * Checks if the given capacity is valid for this string type. */ [[nodiscard]] static constexpr bool CheckCapacity(size_type aCapacity) { return aCapacity <= kMaxCapacity; } char_type* mData; LengthStorage mLength; DataFlags mDataFlags; ClassFlags const mClassFlags; }; extern template class nsTStringRepr; extern template class nsTStringRepr; } // namespace detail } // namespace mozilla template int NS_FASTCALL Compare(const mozilla::detail::nsTStringRepr& aLhs, const mozilla::detail::nsTStringRepr& aRhs, nsTStringComparator = nsTDefaultStringComparator); extern template int NS_FASTCALL Compare( const mozilla::detail::nsTStringRepr&, const mozilla::detail::nsTStringRepr&, nsTStringComparator); extern template int NS_FASTCALL Compare(const mozilla::detail::nsTStringRepr&, const mozilla::detail::nsTStringRepr&, nsTStringComparator); template inline constexpr bool operator!=( const mozilla::detail::nsTStringRepr& aLhs, const mozilla::detail::nsTStringRepr& aRhs) { return !aLhs.Equals(aRhs); } template inline constexpr bool operator!=(const mozilla::detail::nsTStringRepr& aLhs, const T* aRhs) { return !aLhs.Equals(aRhs); } template inline bool operator<(const mozilla::detail::nsTStringRepr& aLhs, const mozilla::detail::nsTStringRepr& aRhs) { return Compare(aLhs, aRhs) < 0; } template inline bool operator<=(const mozilla::detail::nsTStringRepr& aLhs, const mozilla::detail::nsTStringRepr& aRhs) { return Compare(aLhs, aRhs) <= 0; } template inline bool operator==(const mozilla::detail::nsTStringRepr& aLhs, const mozilla::detail::nsTStringRepr& aRhs) { return aLhs.Equals(aRhs); } template inline bool operator==(const mozilla::detail::nsTStringRepr& aLhs, const T* aRhs) { return aLhs.Equals(aRhs); } template inline bool operator>=(const mozilla::detail::nsTStringRepr& aLhs, const mozilla::detail::nsTStringRepr& aRhs) { return Compare(aLhs, aRhs) >= 0; } template inline bool operator>(const mozilla::detail::nsTStringRepr& aLhs, const mozilla::detail::nsTStringRepr& aRhs) { return Compare(aLhs, aRhs) > 0; } template struct fmt::formatter, Char> : fmt::formatter, Char> { template constexpr auto format(const mozilla::detail::nsTStringRepr& aVal, FormatContext& aCtx) const -> decltype(aCtx.out()) { return formatter, Char>::format( basic_string_view{aVal.BeginReading(), aVal.Length()}, aCtx); } }; #endif