From 128ee32ee21f6ab4bf3ec32af1646e4a698ce6f5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Emilio=20Cobos=20=C3=81lvarez?= Date: Tue, 26 Sep 2023 23:14:14 +0000 Subject: [PATCH] Bug 1854446 - Make dynamic atoms store an nsStringBuffer. r=smaug,xpcom-reviewers,nika Performance results pending, but this shouldn't hurt utf-8 atomization at all, and it should also make utf-16 atomization potentially cheaper. So sending for review under the assumption that perf numbers will look good. Differential Revision: https://phabricator.services.mozilla.com/D189021 --- dom/base/nsAttrValue.cpp | 12 +++------- js/xpconnect/src/XPCString.cpp | 19 --------------- js/xpconnect/src/xpcpublic.h | 23 ++++-------------- parser/html/nsHtml5String.cpp | 10 ++------ xpcom/ds/nsAtom.h | 35 ++++++++-------------------- xpcom/ds/nsAtomTable.cpp | 41 ++++++++++++++++++--------------- xpcom/string/nsStringBuffer.cpp | 26 ++++++++++++++++++++- xpcom/string/nsStringBuffer.h | 14 +++++++++++ 8 files changed, 81 insertions(+), 99 deletions(-) diff --git a/dom/base/nsAttrValue.cpp b/dom/base/nsAttrValue.cpp index 9cdedffab6ba..ea7923fa0fd0 100644 --- a/dom/base/nsAttrValue.cpp +++ b/dom/base/nsAttrValue.cpp @@ -2112,17 +2112,11 @@ already_AddRefed nsAttrValue::GetStringBuffer( RefPtr buf = nsStringBuffer::FromString(aValue); if (buf && (buf->StorageSize() / sizeof(char16_t) - 1) == len) { + // We can only reuse the buffer if it's exactly sized, since we rely on + // StorageSize() to get the string length in ToString(). return buf.forget(); } - - buf = nsStringBuffer::Alloc((len + 1) * sizeof(char16_t)); - if (!buf) { - return nullptr; - } - char16_t* data = static_cast(buf->Data()); - CopyUnicodeTo(aValue, 0, data, len); - data[len] = char16_t(0); - return buf.forget(); + return nsStringBuffer::Create(aValue.Data(), aValue.Length()); } size_t nsAttrValue::SizeOfExcludingThis(MallocSizeOf aMallocSizeOf) const { diff --git a/js/xpconnect/src/XPCString.cpp b/js/xpconnect/src/XPCString.cpp index 634d7efcbd76..89a7dc8686b5 100644 --- a/js/xpconnect/src/XPCString.cpp +++ b/js/xpconnect/src/XPCString.cpp @@ -32,9 +32,6 @@ const XPCStringConvert::LiteralExternalString const XPCStringConvert::DOMStringExternalString XPCStringConvert::sDOMStringExternalString; -const XPCStringConvert::DynamicAtomExternalString - XPCStringConvert::sDynamicAtomExternalString; - void XPCStringConvert::LiteralExternalString::finalize(char16_t* aChars) const { // Nothing to do. } @@ -64,22 +61,6 @@ size_t XPCStringConvert::DOMStringExternalString::sizeOfBuffer( return buf->SizeOfIncludingThisIfUnshared(aMallocSizeOf); } -void XPCStringConvert::DynamicAtomExternalString::finalize( - char16_t* aChars) const { - nsDynamicAtom* atom = nsDynamicAtom::FromChars(aChars); - // nsDynamicAtom::Release is always-inline and defined in a translation unit - // we can't get to here. So we need to go through nsAtom::Release to call - // it. - static_cast(atom)->Release(); -} - -size_t XPCStringConvert::DynamicAtomExternalString::sizeOfBuffer( - const char16_t* aChars, mozilla::MallocSizeOf aMallocSizeOf) const { - // We return 0 here because NS_AddSizeOfAtoms reports all memory associated - // with atoms in the atom table. - return 0; -} - // convert a readable to a JSString, copying string data // static bool XPCStringConvert::ReadableToJSVal(JSContext* cx, const nsAString& readable, diff --git a/js/xpconnect/src/xpcpublic.h b/js/xpconnect/src/xpcpublic.h index 027f67909d76..29855bfb7e32 100644 --- a/js/xpconnect/src/xpcpublic.h +++ b/js/xpconnect/src/xpcpublic.h @@ -276,21 +276,14 @@ class XPCStringConvert { static inline bool DynamicAtomToJSVal(JSContext* cx, nsDynamicAtom* atom, JS::MutableHandle rval) { - bool sharedAtom; - JSString* str = - JS_NewMaybeExternalString(cx, atom->GetUTF16String(), atom->GetLength(), - &sDynamicAtomExternalString, &sharedAtom); - if (!str) { + bool shared = false; + nsStringBuffer* buf = atom->StringBuffer(); + if (!StringBufferToJSVal(cx, buf, atom->GetLength(), rval, &shared)) { return false; } - if (sharedAtom) { - // We only have non-owning atoms in DOMString for now. - // nsDynamicAtom::AddRef is always-inline and defined in a - // translation unit we can't get to here. So we need to go through - // nsAtom::AddRef to call it. - static_cast(atom)->AddRef(); + if (shared) { + buf->AddRef(); } - rval.setString(str); return true; } @@ -325,14 +318,8 @@ class XPCStringConvert { size_t sizeOfBuffer(const char16_t* aChars, mozilla::MallocSizeOf aMallocSizeOf) const override; }; - struct DynamicAtomExternalString : public JSExternalStringCallbacks { - void finalize(char16_t* aChars) const override; - size_t sizeOfBuffer(const char16_t* aChars, - mozilla::MallocSizeOf aMallocSizeOf) const override; - }; static const LiteralExternalString sLiteralExternalString; static const DOMStringExternalString sDOMStringExternalString; - static const DynamicAtomExternalString sDynamicAtomExternalString; XPCStringConvert() = delete; }; diff --git a/parser/html/nsHtml5String.cpp b/parser/html/nsHtml5String.cpp index 5f30869cbb59..5fa9415f0682 100644 --- a/parser/html/nsHtml5String.cpp +++ b/parser/html/nsHtml5String.cpp @@ -109,9 +109,8 @@ nsHtml5String nsHtml5String::FromBuffer(char16_t* aBuffer, int32_t aLength, // nsStringBuffer and to make sure the allocation strategy matches // nsAttrValue::GetStringBuffer, so that it doesn't need to reallocate and // copy. - RefPtr buffer( - nsStringBuffer::Alloc((aLength + 1) * sizeof(char16_t))); - if (!buffer) { + RefPtr buffer = nsStringBuffer::Create(aBuffer, aLength); + if (MOZ_UNLIKELY(!buffer)) { if (!aTreeBuilder) { MOZ_CRASH("Out of memory."); } @@ -124,12 +123,7 @@ nsHtml5String nsHtml5String::FromBuffer(char16_t* aBuffer, int32_t aLength, char16_t* data = reinterpret_cast(buffer->Data()); data[0] = 0xFFFD; data[1] = 0; - return nsHtml5String(reinterpret_cast(buffer.forget().take()) | - eStringBuffer); } - char16_t* data = reinterpret_cast(buffer->Data()); - memcpy(data, aBuffer, aLength * sizeof(char16_t)); - data[aLength] = 0; return nsHtml5String(reinterpret_cast(buffer.forget().take()) | eStringBuffer); } diff --git a/xpcom/ds/nsAtom.h b/xpcom/ds/nsAtom.h index 1b53b7be2eb3..70cc0446fe0a 100644 --- a/xpcom/ds/nsAtom.h +++ b/xpcom/ds/nsAtom.h @@ -95,17 +95,10 @@ class nsAtom { using HasThreadSafeRefCnt = std::true_type; protected: - // Used by nsStaticAtom. - constexpr nsAtom(uint32_t aLength, uint32_t aHash, bool aIsAsciiLowercase) + constexpr nsAtom(uint32_t aLength, bool aIsStatic, uint32_t aHash, + bool aIsAsciiLowercase) : mLength(aLength), - mIsStatic(true), - mIsAsciiLowercase(aIsAsciiLowercase), - mHash(aHash) {} - - // Used by nsDynamicAtom. - nsAtom(const nsAString& aString, uint32_t aHash, bool aIsAsciiLowercase) - : mLength(aString.Length()), - mIsStatic(false), + mIsStatic(aIsStatic), mIsAsciiLowercase(aIsAsciiLowercase), mHash(aHash) {} @@ -134,7 +127,7 @@ class nsStaticAtom : public nsAtom { // hashes match. constexpr nsStaticAtom(uint32_t aLength, uint32_t aHash, uint32_t aStringOffset, bool aIsAsciiLowercase) - : nsAtom(aLength, aHash, aIsAsciiLowercase), + : nsAtom(aLength, /* aIsStatic = */ true, aHash, aIsAsciiLowercase), mStringOffset(aStringOffset) {} const char16_t* String() const { @@ -184,12 +177,10 @@ class nsDynamicAtom : public nsAtom { return count; } - const char16_t* String() const { - return reinterpret_cast(this + 1); - } + nsStringBuffer* StringBuffer() const { return mStringBuffer; } - static nsDynamicAtom* FromChars(char16_t* chars) { - return reinterpret_cast(chars) - 1; + const char16_t* String() const { + return reinterpret_cast(mStringBuffer->Data()); } private: @@ -202,16 +193,15 @@ class nsDynamicAtom : public nsAtom { // These shouldn't be used directly, even by friend classes. The // Create()/Destroy() methods use them. - nsDynamicAtom(const nsAString& aString, uint32_t aHash, - bool aIsAsciiLowercase); + nsDynamicAtom(already_AddRefed, uint32_t aLength, + uint32_t aHash, bool aIsAsciiLowercase); ~nsDynamicAtom() = default; static nsDynamicAtom* Create(const nsAString& aString, uint32_t aHash); static void Destroy(nsDynamicAtom* aAtom); mozilla::ThreadSafeAutoRefCnt mRefCnt; - - // The atom's chars are stored at the end of the struct. + RefPtr mStringBuffer; }; const nsStaticAtom* nsAtom::AsStatic() const { @@ -277,11 +267,6 @@ class nsAtomString : public nsString { explicit nsAtomString(const nsAtom* aAtom) { aAtom->ToString(*this); } }; -class nsAutoAtomString : public nsAutoString { - public: - explicit nsAutoAtomString(const nsAtom* aAtom) { aAtom->ToString(*this); } -}; - class nsAtomCString : public nsCString { public: explicit nsAtomCString(const nsAtom* aAtom) { aAtom->ToUTF8String(*this); } diff --git a/xpcom/ds/nsAtomTable.cpp b/xpcom/ds/nsAtomTable.cpp index 56618cd46c5a..b89f97d1dad3 100644 --- a/xpcom/ds/nsAtomTable.cpp +++ b/xpcom/ds/nsAtomTable.cpp @@ -65,9 +65,12 @@ enum class GCKind { // replaying. Atomic nsDynamicAtom::gUnusedAtomCount; -nsDynamicAtom::nsDynamicAtom(const nsAString& aString, uint32_t aHash, +nsDynamicAtom::nsDynamicAtom(already_AddRefed aBuffer, + uint32_t aLength, uint32_t aHash, bool aIsAsciiLowercase) - : nsAtom(aString, aHash, aIsAsciiLowercase), mRefCnt(1) {} + : nsAtom(aLength, /* aIsStatic = */ false, aHash, aIsAsciiLowercase), + mRefCnt(1), + mStringBuffer(aBuffer) {} // Returns true if ToLowercaseASCII would return the string unchanged. static bool IsAsciiLowercase(const char16_t* aString, const uint32_t aLength) { @@ -76,34 +79,33 @@ static bool IsAsciiLowercase(const char16_t* aString, const uint32_t aLength) { return false; } } - return true; } nsDynamicAtom* nsDynamicAtom::Create(const nsAString& aString, uint32_t aHash) { // We tack the chars onto the end of the nsDynamicAtom object. - size_t numCharBytes = (aString.Length() + 1) * sizeof(char16_t); - size_t numTotalBytes = sizeof(nsDynamicAtom) + numCharBytes; - - bool isAsciiLower = ::IsAsciiLowercase(aString.Data(), aString.Length()); - - nsDynamicAtom* atom = (nsDynamicAtom*)moz_xmalloc(numTotalBytes); - new (atom) nsDynamicAtom(aString, aHash, isAsciiLower); - memcpy(const_cast(atom->String()), - PromiseFlatString(aString).get(), numCharBytes); - + const bool isAsciiLower = + ::IsAsciiLowercase(aString.Data(), aString.Length()); + RefPtr buffer = nsStringBuffer::FromString(aString); + if (!buffer) { + buffer = nsStringBuffer::Create(aString.Data(), aString.Length()); + if (MOZ_UNLIKELY(!buffer)) { + MOZ_CRASH("Out of memory atomizing"); + } + } else { + MOZ_ASSERT(aString.IsTerminated(), + "String buffers are always null-terminated"); + } + auto* atom = + new nsDynamicAtom(buffer.forget(), aString.Length(), aHash, isAsciiLower); MOZ_ASSERT(atom->String()[atom->GetLength()] == char16_t(0)); MOZ_ASSERT(atom->Equals(aString)); MOZ_ASSERT(atom->mHash == HashString(atom->String(), atom->GetLength())); MOZ_ASSERT(atom->mIsAsciiLowercase == isAsciiLower); - return atom; } -void nsDynamicAtom::Destroy(nsDynamicAtom* aAtom) { - aAtom->~nsDynamicAtom(); - free(aAtom); -} +void nsDynamicAtom::Destroy(nsDynamicAtom* aAtom) { delete aAtom; } void nsAtom::ToString(nsAString& aString) const { // See the comment on |mString|'s declaration. @@ -113,7 +115,7 @@ void nsAtom::ToString(nsAString& aString) const { // which is what's important. aString.AssignLiteral(AsStatic()->String(), mLength); } else { - aString.Assign(AsDynamic()->String(), mLength); + AsDynamic()->StringBuffer()->ToString(mLength, aString); } } @@ -563,6 +565,7 @@ already_AddRefed nsAtomTable::Atomize(const nsACString& aUTF8String) { nsString str; CopyUTF8toUTF16(aUTF8String, str); + MOZ_ASSERT(nsStringBuffer::FromString(str), "Should create a string buffer"); RefPtr atom = dont_AddRef(nsDynamicAtom::Create(str, key.mHash)); he->mAtom = atom; diff --git a/xpcom/string/nsStringBuffer.cpp b/xpcom/string/nsStringBuffer.cpp index f32b51d3a6ef..b5d506333f93 100644 --- a/xpcom/string/nsStringBuffer.cpp +++ b/xpcom/string/nsStringBuffer.cpp @@ -65,7 +65,7 @@ already_AddRefed nsStringBuffer::Alloc(size_t aSize) { sizeof(nsStringBuffer) + aSize > aSize, "mStorageSize will truncate"); - nsStringBuffer* hdr = (nsStringBuffer*)malloc(sizeof(nsStringBuffer) + aSize); + auto* hdr = (nsStringBuffer*)malloc(sizeof(nsStringBuffer) + aSize); if (hdr) { STRING_STAT_INCREMENT(Alloc); @@ -76,6 +76,30 @@ already_AddRefed nsStringBuffer::Alloc(size_t aSize) { return already_AddRefed(hdr); } +template +static already_AddRefed DoCreate(const CharT* aData, + size_t aLength) { + RefPtr buffer = + nsStringBuffer::Alloc((aLength + 1) * sizeof(CharT)); + if (MOZ_UNLIKELY(!buffer)) { + return nullptr; + } + auto* data = reinterpret_cast(buffer->Data()); + memcpy(data, aData, aLength * sizeof(CharT)); + data[aLength] = 0; + return buffer.forget(); +} + +already_AddRefed nsStringBuffer::Create(const char* aData, + size_t aLength) { + return DoCreate(aData, aLength); +} + +already_AddRefed nsStringBuffer::Create(const char16_t* aData, + size_t aLength) { + return DoCreate(aData, aLength); +} + nsStringBuffer* nsStringBuffer::Realloc(nsStringBuffer* aHdr, size_t aSize) { STRING_STAT_INCREMENT(Realloc); diff --git a/xpcom/string/nsStringBuffer.h b/xpcom/string/nsStringBuffer.h index 3c929599322c..43628d666820 100644 --- a/xpcom/string/nsStringBuffer.h +++ b/xpcom/string/nsStringBuffer.h @@ -43,10 +43,24 @@ class nsStringBuffer { * (i.e., it is not required that the null terminator appear in the last * storage unit of the string buffer's data). * + * This guarantees that StorageSize() returns aStorageSize if the returned + * buffer is non-null. Some callers like nsAttrValue rely on it. + * * @return new string buffer or null if out of memory. */ static already_AddRefed Alloc(size_t aStorageSize); + /** + * Returns a string buffer initialized with the given string on it, or null on + * OOM. + * Note that this will allocate extra space for the trailing null byte, which + * this method will add. + */ + static already_AddRefed Create(const char16_t* aData, + size_t aLength); + static already_AddRefed Create(const char* aData, + size_t aLength); + /** * Resizes the given string buffer to the specified storage size. This * method must not be called on a readonly string buffer. Use this API