Bug 1914378 part 3 - Use a StringBuffer for long flattened ropes. r=sfink

This seems to improve the TodoMVC-jQuery subtest on Speedometer 3 by a few percent.

Differential Revision: https://phabricator.services.mozilla.com/D219882
This commit is contained in:
Jan de Mooij 2024-08-27 07:23:47 +00:00
parent 71d31b4f68
commit 72e9c67c58
8 changed files with 246 additions and 27 deletions

View File

@ -37,6 +37,18 @@ inline bool js::Nursery::addStringBuffer(JSLinearString* s) {
return stringBuffers_.emplaceBack(s, s->stringBuffer());
}
inline bool js::Nursery::addExtensibleStringBuffer(
JSLinearString* s, mozilla::StringBuffer* buffer) {
MOZ_ASSERT(IsInsideNursery(s));
MOZ_ASSERT(isEnabled());
return extensibleStringBuffers_.putNew(s, buffer);
}
inline void js::Nursery::removeExtensibleStringBuffer(JSLinearString* s) {
MOZ_ASSERT(gc::IsInsideNursery(s));
extensibleStringBuffers_.remove(s);
}
inline bool js::Nursery::shouldTenure(gc::Cell* cell) {
MOZ_ASSERT(semispaceEnabled());
MOZ_ASSERT(inCollectedRegion(cell));

View File

@ -1917,8 +1917,8 @@ void js::Nursery::sweepStringsWithBuffer() {
MOZ_ASSERT(stringBuffersToReleaseAfterMinorGC_.empty());
stringBuffers_.mutableEraseIf([&](StringAndBuffer& entry) {
auto [str, buffer] = entry;
auto sweep = [&](JSLinearString* str,
mozilla::StringBuffer* buffer) -> JSLinearString* {
MOZ_ASSERT(inCollectedRegion(str));
if (!IsForwarded(str)) {
@ -1928,7 +1928,7 @@ void js::Nursery::sweepStringsWithBuffer() {
// Release on the main thread on OOM.
buffer->Release();
}
return true;
return nullptr;
}
JSLinearString* dst = Forwarded(str);
@ -1939,12 +1939,32 @@ void js::Nursery::sweepStringsWithBuffer() {
// Release on the main thread on OOM.
buffer->Release();
}
return true;
return nullptr;
}
entry.first = dst;
return false;
return dst;
};
stringBuffers_.mutableEraseIf([&](StringAndBuffer& entry) {
if (JSLinearString* dst = sweep(entry.first, entry.second)) {
entry.first = dst;
return false;
}
return true;
});
AutoEnterOOMUnsafeRegion oomUnsafe;
ExtensibleStringBuffers buffers(std::move(extensibleStringBuffers_));
MOZ_ASSERT(extensibleStringBuffers_.empty());
for (ExtensibleStringBuffers::Enum e(buffers); !e.empty(); e.popFront()) {
if (JSLinearString* dst = sweep(e.front().key(), e.front().value())) {
if (!extensibleStringBuffers_.putNew(dst, e.front().value())) {
oomUnsafe.crash("sweepStringsWithBuffer");
}
}
}
}
void js::Nursery::sweep() {

View File

@ -257,6 +257,10 @@ class Nursery {
[[nodiscard]] inline bool addStringBuffer(JSLinearString* s);
[[nodiscard]] inline bool addExtensibleStringBuffer(
JSLinearString* s, mozilla::StringBuffer* buffer);
inline void removeExtensibleStringBuffer(JSLinearString* s);
size_t sizeOfMallocedBuffers(mozilla::MallocSizeOf mallocSizeOf) const;
// Wasm "trailer" (C++-heap-allocated) blocks.
@ -736,6 +740,14 @@ class Nursery {
JS::GCVector<StringAndBuffer, 8, SystemAllocPolicy>;
StringAndBufferVector stringBuffers_;
// Like stringBuffers_, but for extensible strings for flattened ropes. This
// requires a HashMap instead of a Vector because we need to remove the entry
// when transferring the buffer to a new extensible string during flattening.
using ExtensibleStringBuffers =
HashMap<JSLinearString*, mozilla::StringBuffer*,
js::PointerHasher<JSLinearString*>, js::SystemAllocPolicy>;
ExtensibleStringBuffers extensibleStringBuffers_;
// List of StringBuffers to release off-thread.
using StringBufferVector =
Vector<mozilla::StringBuffer*, 8, SystemAllocPolicy>;

View File

@ -0,0 +1,94 @@
// Test that long strings allocated by rope flattening have a string buffer.
gczeal(0);
// Must match sizeof(mozilla::StringBuffer)
const StringBufferSizeInBytes = 8;
function checkRefCount(s, expected) {
// stringRepresentation and the bufferRefCount field aren't available in
// all builds.
if (getBuildConfiguration("debug")) {
var repr = JSON.parse(stringRepresentation(s));
assertEq(repr.bufferRefCount, expected);
}
}
function checkExtensibleCapacity(s, expected) {
if (getBuildConfiguration("debug")) {
var repr = JSON.parse(stringRepresentation(s));
assertEq(repr.capacity, expected);
}
}
function testBasicLatin1() {
var s1 = ensureLinearString("a".repeat(1000));
var s2 = ensureLinearString("b".repeat(1000));
var flattened1 = ensureLinearString(newRope(s1, s2));
checkRefCount(flattened1, 1);
// Test that the string's capacity + StringBuffer + null terminator fills up
// a jemalloc bucket of 2048 bytes.
var expectedCapacity = 2048 - StringBufferSizeInBytes - 1 /* null terminator */;
checkExtensibleCapacity(flattened1, expectedCapacity);
// Move string buffer to flattened2 and turn flattened into a dependent string.
var flattened2 = ensureLinearString(flattened1 + "abcdef");
checkRefCount(flattened1, undefined);
checkRefCount(flattened2, 1);
assertEq(flattened2.length, 2006);
// Share the StringBuffer with another JS string.
var sharedBuffer = newString(flattened2, {shareStringBuffer: true});
checkRefCount(flattened2, 2);
// Because there are now multiple references, we don't use the extensible buffer.
var flattened3 = ensureLinearString(flattened2 + "abcdef");
checkRefCount(flattened2, 2);
checkRefCount(flattened3, 1);
checkExtensibleCapacity(flattened3, expectedCapacity);
}
testBasicLatin1();
function testBasicTwoByte() {
var s1 = ensureLinearString("\u1234".repeat(500));
var s2 = ensureLinearString("\u1256".repeat(500));
var flattened1 = ensureLinearString(newRope(s1, s2));
checkRefCount(flattened1, 1);
// Test that the string's capacity + StringBuffer + null terminator fills up
// a jemalloc bucket of 2048 bytes.
var expectedCapacity = 1024 - StringBufferSizeInBytes / 2 - 1 /* null terminator */;
checkExtensibleCapacity(flattened1, expectedCapacity);
// Move string buffer to flattened2 and turn flattened into a dependent string.
var flattened2 = ensureLinearString(flattened1 + "abcdef");
checkRefCount(flattened1, undefined);
checkRefCount(flattened2, 1);
assertEq(flattened2.length, 1006);
// Share the StringBuffer with another JS string.
var sharedBuffer = newString(flattened2, {shareStringBuffer: true});
checkRefCount(flattened2, 2);
// Because there are now multiple references, we don't use the extensible buffer.
var flattened3 = ensureLinearString(flattened2 + "abcdef");
checkRefCount(flattened2, 2);
checkRefCount(flattened3, 1);
checkExtensibleCapacity(flattened3, expectedCapacity);
}
testBasicTwoByte();
function testBufferTransfer(fromNursery, toNursery) {
var s1 = ensureLinearString("a".repeat(1000));
var s2 = ensureLinearString("b".repeat(1000));
var flattened1 = ensureLinearString(newRope(s1, s2, {nursery: fromNursery}));
checkRefCount(flattened1, 1);
var flattened2 = ensureLinearString(newRope(flattened1, "abcdef", {nursery: toNursery}));
checkRefCount(flattened1, undefined);
checkRefCount(flattened2, 1);
gc();
checkRefCount(flattened2, 1);
}
testBufferTransfer(false, false);
testBufferTransfer(false, true);
testBufferTransfer(true, false);
testBufferTransfer(true, true);

View File

@ -277,6 +277,7 @@ assertEq(byteSize(ext16), s(Nurser
ext16 = copyString(ext16);
rope16a = ext16 + fragment16;
minorgc();
finishBackgroundFree();
assertEq(byteSize(rope16a), s(RN, RN));
rope16a.match(/x/, function() { assertEq(true, false); });
assertEq(byteSize(rope16a), s(XN + 128 * 1024, XN + 128 * 1024));

View File

@ -608,30 +608,84 @@ template <typename CharT>
static MOZ_ALWAYS_INLINE bool AllocCharsForFlatten(Nursery& nursery,
JSString* str, size_t length,
CharT** chars,
size_t* capacity) {
size_t* capacity,
bool* hasStringBuffer) {
/*
* Grow by 12.5% if the buffer is very large. Otherwise, round up to the
* next power of 2. This is similar to what we do with arrays; see
* JSObject::ensureDenseArrayElements.
*/
static const size_t DOUBLING_MAX = 1024 * 1024;
*capacity =
length > DOUBLING_MAX ? length + (length / 8) : RoundUpPow2(length);
auto calcCapacity = [](size_t length, size_t maxCapacity) {
static const size_t DOUBLING_MAX = 1024 * 1024;
if (length > DOUBLING_MAX) {
return std::min<size_t>(maxCapacity, length + (length / 8));
}
size_t capacity = RoundUpPow2(length);
MOZ_ASSERT(capacity <= maxCapacity);
return capacity;
};
static_assert(JSString::MAX_LENGTH * sizeof(CharT) <= UINT32_MAX);
if (length < JSString::MIN_BYTES_FOR_BUFFER / sizeof(CharT)) {
*capacity = calcCapacity(length, JSString::MAX_LENGTH);
MOZ_ASSERT(length <= *capacity);
MOZ_ASSERT(*capacity <= JSString::MAX_LENGTH);
auto buffer = str->zone()->make_pod_arena_array<CharT>(js::StringBufferArena,
*capacity);
auto buffer = str->zone()->make_pod_arena_array<CharT>(
js::StringBufferArena, *capacity);
if (!buffer) {
return false;
}
if (!str->isTenured()) {
if (!nursery.registerMallocedBuffer(buffer.get(),
*capacity * sizeof(CharT))) {
return false;
}
}
*chars = buffer.release();
*hasStringBuffer = false;
return true;
}
using mozilla::StringBuffer;
static_assert(StringBuffer::IsValidLength<CharT>(JSString::MAX_LENGTH),
"JSString length must be valid for StringBuffer");
// Include extra space for the header and the null-terminator before
// calculating the capacity. This ensures we make good use of jemalloc's
// bucket sizes. For example, for a Latin1 string with length 2000 we want to
// get a capacity of 2039 (chars). With the StringBuffer header (8 bytes) and
// the null-terminator this results in an allocation of 2048 bytes.
//
// Note: the null-terminator will not be included in the extensible string's
// capacity field.
static_assert(sizeof(StringBuffer) % sizeof(CharT) == 0);
static constexpr size_t ExtraChars = sizeof(StringBuffer) / sizeof(CharT) + 1;
size_t fullCapacity =
calcCapacity(length + ExtraChars, JSString::MAX_LENGTH + ExtraChars);
*capacity = fullCapacity - ExtraChars;
MOZ_ASSERT(length <= *capacity);
MOZ_ASSERT(*capacity <= JSString::MAX_LENGTH);
RefPtr<StringBuffer> buffer = StringBuffer::Alloc(
(*capacity + 1) * sizeof(CharT), mozilla::Some(js::StringBufferArena));
if (!buffer) {
return false;
}
if (!str->isTenured()) {
if (!nursery.registerMallocedBuffer(buffer.get(),
*capacity * sizeof(CharT))) {
auto* linear = static_cast<JSLinearString*>(str); // True when we're done.
if (!nursery.addExtensibleStringBuffer(linear, buffer)) {
return false;
}
}
*chars = buffer.release();
// Transfer ownership to the caller, where the buffer will be used for the
// extensible string.
// Note: the null-terminator will be stored in flattenInternal.
StringBuffer* buf;
buffer.forget(&buf);
*chars = static_cast<CharT*>(buf->Data());
*hasStringBuffer = true;
return true;
}
@ -873,23 +927,37 @@ static constexpr uint32_t StringFlagsForCharType(uint32_t baseFlags) {
return baseFlags | JSString::LATIN1_CHARS_BIT;
}
static bool UpdateNurseryBuffersOnTransfer(js::Nursery& nursery, JSString* from,
JSString* to, void* buffer,
static bool UpdateNurseryBuffersOnTransfer(js::Nursery& nursery,
JSExtensibleString* from,
JSString* to, void* chars,
size_t size) {
// Update the list of buffers associated with nursery cells when |buffer| is
// moved from string |from| to string |to|, depending on whether those strings
// are in the nursery or not.
if (from->hasStringBuffer()) {
if (!from->isTenured()) {
nursery.removeExtensibleStringBuffer(from);
}
if (!to->isTenured()) {
auto* linear = static_cast<JSLinearString*>(to);
if (!nursery.addExtensibleStringBuffer(linear, from->stringBuffer())) {
return false;
}
}
return true;
}
if (from->isTenured() && !to->isTenured()) {
// Tenured leftmost child is giving its chars buffer to the
// nursery-allocated root node.
if (!nursery.registerMallocedBuffer(buffer, size)) {
if (!nursery.registerMallocedBuffer(chars, size)) {
return false;
}
} else if (!from->isTenured() && to->isTenured()) {
// Leftmost child is giving its nursery-held chars buffer to a
// tenured string.
nursery.removeMallocedBuffer(buffer, size);
nursery.removeMallocedBuffer(chars, size);
}
return true;
@ -902,6 +970,13 @@ static bool CanReuseLeftmostBuffer(JSString* leftmostChild, size_t wholeLength,
}
JSExtensibleString& str = leftmostChild->asExtensible();
// Don't mutate the StringBuffer if there are other references to it, possibly
// on other threads.
if (str.hasStringBuffer() && str.stringBuffer()->IsReadonly()) {
return false;
}
return str.capacity() >= wholeLength &&
str.hasTwoByteChars() == hasTwoByteChars;
}
@ -1020,10 +1095,12 @@ JSLinearString* JSRope::flattenInternal(JSRope* root) {
bool reuseLeftmostBuffer = CanReuseLeftmostBuffer(
leftmostChild, wholeLength, std::is_same_v<CharT, char16_t>);
bool hasStringBuffer = false;
if (reuseLeftmostBuffer) {
JSExtensibleString& left = leftmostChild->asExtensible();
wholeCapacity = left.capacity();
wholeChars = const_cast<CharT*>(left.nonInlineChars<CharT>(nogc));
hasStringBuffer = left.hasStringBuffer();
// Nursery::registerMallocedBuffer is fallible, so attempt it first before
// doing anything irreversible.
@ -1034,7 +1111,7 @@ JSLinearString* JSRope::flattenInternal(JSRope* root) {
} else {
// If we can't reuse the leftmost child's buffer, allocate a new one.
if (!AllocCharsForFlatten(nursery, root, wholeLength, &wholeChars,
&wholeCapacity)) {
&wholeCapacity, &hasStringBuffer)) {
return nullptr;
}
}
@ -1130,9 +1207,13 @@ finish_root:
MOZ_ASSERT(str == root);
MOZ_ASSERT(pos == wholeChars + wholeLength);
root->setLengthAndFlags(wholeLength,
StringFlagsForCharType<CharT>(EXTENSIBLE_FLAGS));
root->setNonInlineChars(wholeChars, /* usesStringBuffer = */ false);
uint32_t flags = StringFlagsForCharType<CharT>(EXTENSIBLE_FLAGS);
if (hasStringBuffer) {
flags |= HAS_STRING_BUFFER_BIT;
wholeChars[wholeLength] = '\0';
}
root->setLengthAndFlags(wholeLength, flags);
root->setNonInlineChars(wholeChars, hasStringBuffer);
root->d.s.u3.capacity = wholeCapacity;
AddCellMemory(root, root->asLinear().allocSize(), MemoryUse::StringContents);

View File

@ -805,8 +805,7 @@ class JSString : public js::gc::CellWithLengthAndFlags {
bool hasStringBuffer() const {
MOZ_ASSERT_IF(flags() & HAS_STRING_BUFFER_BIT,
isLinear() && !isInline() && !isDependent() &&
!isExternal() && !isExtensible());
isLinear() && !isInline() && !isDependent() && !isExternal());
return flags() & HAS_STRING_BUFFER_BIT;
}

View File

@ -95,7 +95,7 @@ class StringBuffer {
* for Alloc. Adds +1 to aLength for the null-terminator.
*/
template <typename CharT>
static bool IsValidLength(size_t aLength) {
static constexpr bool IsValidLength(size_t aLength) {
auto checkedSize =
(CheckedUint32(aLength) + 1) * sizeof(CharT) + sizeof(StringBuffer);
return checkedSize.isValid();