Bug 1820120 - Manage Wasm{Array,Struct}Object OOL-storage-blocks using a thread-private cache. r=jonco,rhunt.

Wasm{Array,Struct}Objects may be freely nursery-allocated, hence enjoying the
benefits of generational GC.  However, those with out-of-line storage in the
C++ heap (called "trailer blocks" in the patch) have those blocks managed by
js_malloc/js_free.  This is expensive, and especially for objects which don't
get tenured, undercuts the benefit gained from generational GC.

This patch adds to js::Nursery, two new mechanisms:

* a cache of blocks, js::gc::MallocedBlockCache, which are suitable for use as
  trailers.  Allocation and freeing of trailers is done mostly from this
  cache.  A supporting type, js::PointerAndUint7, has also been added, as
  extra data (a freelist ID) is needed when returning blocks to the cache.

* a more limited version of the existing Nursery::mallocedBuffers mechanism.
  The goal is the same -- to enumerate the subtraction of sets of allocated vs
  tenured trailer blocks, at the end of minor GC.  The new version differs in
  that (1) it tracks PointerAndUint7, not void*s, (2) block resizing is not
  supported, and (3) the difference is computed via vector comparison rather
  than a hash set, for performance reasons.

An SMDOC explaining the mechanisms in detail has been added to WasmGcObject.cpp.

Differential Revision: https://phabricator.services.mozilla.com/D171551
This commit is contained in:
Julian Seward 2023-03-07 22:05:59 +00:00
parent 9736731687
commit 6dbd528357
11 changed files with 699 additions and 44 deletions

View File

@ -610,7 +610,7 @@ namespace JS {
D(FULL_CELL_PTR_STR_BUFFER, 28) \
D(TOO_MUCH_JIT_CODE, 29) \
D(FULL_CELL_PTR_BIGINT_BUFFER, 30) \
D(UNUSED5, 31) \
D(NURSERY_TRAILERS, 31) \
D(NURSERY_MALLOC_BUFFERS, 32) \
\
/* \

View File

@ -307,14 +307,16 @@ struct CodeSizes {
struct GCSizes {
// |nurseryDecommitted| is marked as NonHeap rather than GCHeapDecommitted
// because we don't consider the nursery to be part of the GC heap.
#define FOR_EACH_SIZE(MACRO) \
MACRO(_, MallocHeap, marker) \
MACRO(_, NonHeap, nurseryCommitted) \
MACRO(_, MallocHeap, nurseryMallocedBuffers) \
MACRO(_, MallocHeap, storeBufferVals) \
MACRO(_, MallocHeap, storeBufferCells) \
MACRO(_, MallocHeap, storeBufferSlots) \
MACRO(_, MallocHeap, storeBufferWholeCells) \
#define FOR_EACH_SIZE(MACRO) \
MACRO(_, MallocHeap, marker) \
MACRO(_, NonHeap, nurseryCommitted) \
MACRO(_, MallocHeap, nurseryMallocedBuffers) \
MACRO(_, MallocHeap, nurseryMallocedBlockCache) \
MACRO(_, MallocHeap, nurseryTrailerBlockSets) \
MACRO(_, MallocHeap, storeBufferVals) \
MACRO(_, MallocHeap, storeBufferCells) \
MACRO(_, MallocHeap, storeBufferSlots) \
MACRO(_, MallocHeap, storeBufferWholeCells) \
MACRO(_, MallocHeap, storeBufferGenerics)
GCSizes() = default;

125
js/src/ds/PointerAndUint7.h Normal file
View File

@ -0,0 +1,125 @@
/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
* vim: set ts=8 sw=2 et tw=80:
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this file,
* You can obtain one at http://mozilla.org/MPL/2.0/. */
#ifndef gc_PointerAndUint7_h
#define gc_PointerAndUint7_h
#include "mozilla/Assertions.h"
#include <stdint.h>
namespace js {
// A class that can store an address and a 7-bit unsigned integer in 64 bits,
// even on a 64-bit target.
//
// On 64-bit targets, it assumes that all supported target architectures
// contain at most 57 significant bits in their addresses, and that the valid
// address space is split evenly between addresses increasing from 0--(64)--0
// and addresses decreasing from 1--(64)--1.
//
// The 57-significant-bit constraint comes from Intel's 5-level paging scheme
// as introduced in the Ice Lake processor line, circa late 2019; see
// https://en.wikipedia.org/wiki/Intel_5-level_paging. Prior to that, Intel
// required only 48 significant bits. AArch64 requires 52 significant bits,
// as of the ARMv8.2 LVA (Large Virtual Addressing) extension, and so is less
// constraining than Intel.
//
// In any case, NaN-boxing of pointers in JS::Value gives us a pretty hard
// requirement that we can store pointers in 47 bits. So that constraint will
// break before the 57-bit constraint here breaks. See SMDOC in
// js/public/Value.h.
//
// On 32-bit targets, both components are stored unmodified in the upper and
// lower 32-bit chunks of the value, and there are no constraints on the
// component values.
#ifdef JS_64BIT
// The implementation for 64-bit targets.
class PointerAndUint7 final {
// The representation is: the lowest 57 bits of the pointer are stored in
// the top 57 bits of val_, and the Uint7 is stored in the bottom 7 bits.
// Hence recovering the pointer is 7-bit signed shift right of val_, and
// recovering the UInt7 is an AND with 127. In both cases, that's a single
// machine instruction.
uint64_t val_;
static const uint8_t SHIFT_PTR = 7;
static const uint64_t MASK_UINT7 = (uint64_t(1) << SHIFT_PTR) - 1;
static inline bool isRepresentablePtr(void* ptr) {
// We require that the top 7 bits (bits 63:57) are the same as bit 56.
// That will be the case iff, when we signedly shift `ptr` right by 56
// bits, the value is all zeroes or all ones.
int64_t s = int64_t(ptr);
// s should be bbbb'bbbb'X--(56)--X, for b = 0 or 1, and X can be anything
s >>= (64 - SHIFT_PTR - 1); // 56
// s should be 0--(64)--0 or 1--(64)--1
uint64_t u = uint64_t(s);
// Note, this addition can overflow, intentionally.
u += 1;
// u should be 0--(64)--0 or 0--(63)--01
return u <= uint64_t(1);
}
static inline bool isRepresentableUint7(uint32_t uint7) {
return uint7 <= MASK_UINT7;
}
public:
inline PointerAndUint7() : val_(0) {}
inline PointerAndUint7(void* ptr, uint32_t uint7)
: val_((uint64_t(ptr) << SHIFT_PTR) | (uint64_t(uint7 & MASK_UINT7))) {
MOZ_ASSERT(isRepresentablePtr(ptr));
MOZ_ASSERT(isRepresentableUint7(uint7));
}
inline void* pointer() const { return (void*)(int64_t(val_) >> SHIFT_PTR); }
inline uint32_t uint7() const { return uint32_t(val_ & MASK_UINT7); }
};
static_assert(sizeof(void*) == 8);
// "int64_t really is signed"
static_assert(((int64_t(1) << 63) >> 63) == int64_t(0xFFFFFFFFFFFFFFFFULL));
#else
// The implementation for 32-bit targets.
class PointerAndUint7 final {
// The representation places the pointer in the upper 32 bits of val_ and
// the Uint7 in the lower 32 bits. This is represented using a single
// 64-bit field in the hope of increasing the chance that the class will be
// passed around in a register-pair rather than through memory.
uint64_t val_;
static const uint8_t SHIFT_PTR = 32;
static const uint64_t MASK_UINT7 = (uint64_t(1) << 7) - 1;
static inline bool isRepresentableUint7(uint32_t uint7) {
return uint7 <= MASK_UINT7;
}
public:
inline PointerAndUint7() : val_(0) {}
inline PointerAndUint7(void* ptr, uint32_t uint7)
: val_((uint64_t(uint32_t(ptr)) << SHIFT_PTR) |
(uint64_t(uint7) & MASK_UINT7)) {
MOZ_ASSERT(isRepresentableUint7(uint7));
}
inline void* pointer() const { return (void*)(int32_t(val_ >> SHIFT_PTR)); }
inline uint32_t uint7() const { return uint32_t(val_ & MASK_UINT7); }
};
static_assert(sizeof(void*) == 4);
#endif // JS_64BIT
// We require this for both 32- and 64-bit targets.
static_assert(sizeof(PointerAndUint7) == 8);
} // namespace js
#endif // gc_PointerAndUint7_h

View File

@ -0,0 +1,144 @@
/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
* vim: set ts=8 sw=2 et tw=80:
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this file,
* You can obtain one at http://mozilla.org/MPL/2.0/. */
#include "gc/MallocedBlockCache.h"
#include "mozilla/MemoryChecking.h"
using js::PointerAndUint7;
using js::gc::MallocedBlockCache;
MallocedBlockCache::~MallocedBlockCache() { clear(); }
PointerAndUint7 MallocedBlockCache::alloc(size_t size) {
// Figure out which free list can give us a block of size `size`, after it
// has been rounded up to a multiple of `step`.
//
// Example mapping for STEP = 16 and NUM_LISTS = 8, after rounding up:
// 0 never holds any blocks (denotes "too large")
// 1 holds blocks of size 16
// 2 holds blocks of size 32
// 3 holds blocks of size 48
// 4 holds blocks of size 64
// 5 holds blocks of size 80
// 6 holds blocks of size 96
// 7 holds blocks of size 112
//
// For a request of size n:
// * if n == 0, fail
// * else
// round n up to a multiple of STEP
// let i = n / STEP
// if i >= NUM_LISTS
// alloc direct from js_malloc, and return listID = 0
// if lists[i] is nonempty, use lists[i] and return listID = i.
// else
// let p = js_malloc(n)
// return p and listID = i.
// We're never expected to handle zero-sized blocks.
MOZ_ASSERT(size > 0);
size = js::RoundUp(size, STEP);
size_t i = size / STEP;
// Too large to cache; go straight to js_malloc.
if (MOZ_UNLIKELY(i >= NUM_LISTS)) {
void* p = js_malloc(size);
// If p is nullptr, that fact is carried into the PointerAndUint7, and the
// caller is expected to check that.
return PointerAndUint7(p, OVERSIZE_BLOCK_LIST_ID);
}
// The case we hope is common. First, see if we can pull a block from the
// relevant list.
MOZ_ASSERT(i >= 1 && i < NUM_LISTS);
// Check that i is the right list
MOZ_ASSERT(i * STEP == size);
if (MOZ_LIKELY(!lists[i].empty())) {
void* block = lists[i].popCopy();
return PointerAndUint7(block, i);
}
// No luck.
void* p = js_malloc(size);
if (MOZ_UNLIKELY(!p)) {
return PointerAndUint7(nullptr, 0); // OOM
}
return PointerAndUint7(p, i);
}
void MallocedBlockCache::free(PointerAndUint7 blockAndListID) {
// This is a whole lot simpler than the ::alloc case, since we are given the
// listId and don't have to compute it (not that we have any way to).
void* block = blockAndListID.pointer();
uint32_t listID = blockAndListID.uint7();
MOZ_ASSERT(block);
MOZ_ASSERT(listID < NUM_LISTS);
if (MOZ_UNLIKELY(listID == OVERSIZE_BLOCK_LIST_ID)) {
// It was too large for recycling; go straight to js_free.
js_free(block);
return;
}
// Put it back on list `listId`, first poisoning it for safety.
memset(block, JS_NOTINUSE_TRAILER_PATTERN, listID * STEP);
MOZ_MAKE_MEM_UNDEFINED(block, listID * STEP);
if (MOZ_UNLIKELY(!lists[listID].append(block))) {
// OOM'd while doing admin. Hand it off to js_free and forget about the
// OOM.
js_free(block);
}
}
void MallocedBlockCache::preen(float percentOfBlocksToDiscard) {
MOZ_ASSERT(percentOfBlocksToDiscard >= 0.0 &&
percentOfBlocksToDiscard <= 100.0);
MOZ_ASSERT(lists[OVERSIZE_BLOCK_LIST_ID].empty());
for (size_t listID = 1; listID < NUM_LISTS; listID++) {
MallocedBlockVector& list = lists[listID];
size_t numToFree =
size_t(float(list.length()) * (percentOfBlocksToDiscard / 100.0));
MOZ_RELEASE_ASSERT(numToFree <= list.length());
while (numToFree > 0) {
void* block = list.popCopy();
MOZ_ASSERT(block);
js_free(block);
numToFree--;
}
}
}
void MallocedBlockCache::clear() {
MOZ_ASSERT(lists[OVERSIZE_BLOCK_LIST_ID].empty());
for (size_t i = 1; i < NUM_LISTS; i++) {
MallocedBlockVector& list = lists[i];
for (size_t j = 0; j < list.length(); j++) {
MOZ_ASSERT(list[j]);
js_free(list[j]);
list[j] = nullptr; // for safety
}
list.clear();
}
}
size_t MallocedBlockCache::sizeOfExcludingThis(
mozilla::MallocSizeOf mallocSizeOf) const {
MOZ_ASSERT(lists[OVERSIZE_BLOCK_LIST_ID].empty());
size_t nBytes = 0;
for (size_t listID = 0; listID < NUM_LISTS; listID++) {
const MallocedBlockVector& list = lists[listID];
nBytes += list.sizeOfExcludingThis(mallocSizeOf);
// The payload size of each block in `list` is the same. Hence, we could
// possibly do better here (measure once and multiply by the length) if we
// believe that the metadata size for each block is also the same.
for (size_t i = 0; i < list.length(); i++) {
MOZ_ASSERT(list[i]);
nBytes += mallocSizeOf(list[i]);
}
}
return nBytes;
}

View File

@ -0,0 +1,91 @@
/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
* vim: set ts=8 sw=2 et tw=80:
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this file,
* You can obtain one at http://mozilla.org/MPL/2.0/. */
#ifndef gc_MallocedBlockCache_h
#define gc_MallocedBlockCache_h
#include "ds/PointerAndUint7.h"
#include "js/AllocPolicy.h"
#include "js/Vector.h"
#include "util/Poison.h"
namespace js {
namespace gc {
// MallocedBlockCache implements a lightweight wrapper around js_malloc/js_free.
//
// Blocks are requested by ::alloc and must be returned with ::free, at which
// point the cache may decide to hold on to the block rather than hand it back
// to js_free. Subsequent ::alloc calls may be satisfied from the cached
// blocks rather than calling js_malloc. The mechanism is designed to be much
// cheaper than calling js_malloc/js_free directly. One consequence is that
// there is no locking; it is essential therefore to use each cache only from
// a single thread.
//
// The intended use is for lightweight management of OOL (malloc'd) storage
// associated with WasmStructObject and WasmArrayObject. The mechanism is
// general and potentially has other uses. Blocks of size STEP * NUM_LISTS
// and larger are never cached, though.
//
// Request sizes are rounded up to a multiple of STEP. There are NUM_LISTS-1
// free lists, with a "List ID" indicating the multiple of STEP stored on the
// list. So for example, blocks of size 3 * STEP (after rounding up) are
// stored on the list with ID 3. List ID 0 indicates blocks which are too
// large to live on any freelist. With the default settings, this gives
// separate freelists for blocks of size 16, 32, 48, .. 496. Blocks of size
// zero are not supported, and `lists[0]` will always be empty.
//
// Allocation of a block produces not only the block's address but also its
// List ID. When freeing, both values must be presented, because there is
// otherwise no way for ::free to know the size of the original allocation,
// and hence which freelist it should go on. For this reason, the ::alloc and
// ::free methods produce and take a `PointerAndUint7`, not a `void*`.
//
// Resizing of blocks is not supported.
class MallocedBlockCache {
public:
static const size_t STEP = 16;
static const size_t NUM_LISTS = 32;
// This limitation exists because allocation returns a PointerAndUint7, and
// a List-ID value (viz, 0 .. NUM_LISTS-1) is stored in the Uint7 part.
static_assert(NUM_LISTS <= (1 << 7));
// list[0] must always remain empty. List ID 0 indicates a block which
// cannot participate in the freelist machinery because it is too large.
//
// list[i], for 1 <= i < NUM_LISTS, holds blocks of size i * STEP only.
// All requests are rounded up to multiple of SIZE.
//
// We do not expect to be required to issue or accept blocks of size zero.
static const size_t OVERSIZE_BLOCK_LIST_ID = 0;
using MallocedBlockVector = Vector<void*, 0, SystemAllocPolicy>;
MallocedBlockVector lists[NUM_LISTS];
~MallocedBlockCache();
// Allocation and freeing.
[[nodiscard]] PointerAndUint7 alloc(size_t size);
void free(PointerAndUint7 blockAndListID);
// Allows users to gradually hand blocks back to js_free, so as to avoid
// space leaks in long-running scenarios. The specified percentage of
// blocks in each list is discarded.
void preen(float percentOfBlocksToDiscard);
// Return all blocks in the cache to js_free.
void clear();
size_t sizeOfExcludingThis(mozilla::MallocSizeOf mallocSizeOf) const;
};
} // namespace gc
} // namespace js
#endif // gc_MallocedBlockCache_h

View File

@ -1208,7 +1208,7 @@ void js::Nursery::collect(JS::GCOptions options, JS::GCReason reason) {
// old empty state.
bool wasEmpty = isEmpty();
if (!wasEmpty) {
CollectionResult result = doCollection(reason);
CollectionResult result = doCollection(options, reason);
// Don't include chunk headers when calculating nursery space, since this
// space does not represent data that can be tenured
MOZ_ASSERT(result.tenuredBytes <=
@ -1322,7 +1322,97 @@ void js::Nursery::printDeduplicationData(js::StringStats& prev,
}
}
js::Nursery::CollectionResult js::Nursery::doCollection(JS::GCReason reason) {
void js::Nursery::freeTrailerBlocks(void) {
// This routine frees those blocks denoted by the set
//
// trailersAdded_ (all of it)
// - trailersRemoved_ (entries with index below trailersRemovedUsed_)
//
// For each block, places it back on the nursery's small-malloced-block pool
// by calling mallocedBlockCache_.free.
MOZ_ASSERT(trailersAdded_.length() == trailersRemoved_.length());
MOZ_ASSERT(trailersRemovedUsed_ <= trailersRemoved_.length());
// Sort the removed entries.
std::sort(trailersRemoved_.begin(),
trailersRemoved_.begin() + trailersRemovedUsed_,
[](const void* block1, const void* block2) {
return uintptr_t(block1) < uintptr_t(block2);
});
// Use one of two schemes to enumerate the set subtraction.
if (trailersRemovedUsed_ < 1000) {
// If the number of removed items is relatively small, it isn't worth the
// cost of sorting `trailersAdded_`. Instead, walk through the vector in
// whatever order it is and use binary search to establish whether each
// item is present in trailersRemoved_[0 .. trailersRemovedUsed_ - 1].
const size_t nAdded = trailersAdded_.length();
for (size_t i = 0; i < nAdded; i++) {
const PointerAndUint7 block = trailersAdded_[i];
const void* blockPointer = block.pointer();
if (!std::binary_search(trailersRemoved_.begin(),
trailersRemoved_.begin() + trailersRemovedUsed_,
blockPointer)) {
mallocedBlockCache_.free(block);
}
}
} else {
// The general case, which is algorithmically safer for large inputs.
// Sort the added entries, and then walk through both them and the removed
// entries in lockstep.
std::sort(trailersAdded_.begin(), trailersAdded_.end(),
[](const PointerAndUint7& block1, const PointerAndUint7& block2) {
return uintptr_t(block1.pointer()) <
uintptr_t(block2.pointer());
});
// Enumerate the set subtraction. This is somewhat simplified by the fact
// that all elements of the removed set must also be present in the added
// set. (the "inclusion property").
const size_t nAdded = trailersAdded_.length();
const size_t nRemoved = trailersRemovedUsed_;
size_t iAdded;
size_t iRemoved = 0;
for (iAdded = 0; iAdded < nAdded; iAdded++) {
if (iRemoved == nRemoved) {
// We've run out of items to skip, so move on to the next loop.
break;
}
const PointerAndUint7 blockAdded = trailersAdded_[iAdded];
const void* blockRemoved = trailersRemoved_[iRemoved];
if (blockAdded.pointer() < blockRemoved) {
mallocedBlockCache_.free(blockAdded);
continue;
}
// If this doesn't hold
// (that is, if `blockAdded.pointer() > blockRemoved`),
// then the abovementioned inclusion property doesn't hold.
MOZ_RELEASE_ASSERT(blockAdded.pointer() == blockRemoved);
iRemoved++;
}
MOZ_ASSERT(iRemoved == nRemoved);
// We've used up the removed set, so now finish up the remainder of the
// added set.
for (/*keep going*/; iAdded < nAdded; iAdded++) {
const PointerAndUint7 block = trailersAdded_[iAdded];
mallocedBlockCache_.free(block);
}
}
// And empty out both sets, but preserve the underlying storage.
trailersAdded_.clear();
trailersRemoved_.clear();
trailersRemovedUsed_ = 0;
trailerBytes_ = 0;
// Discard blocks from the cache at 0.05% per megabyte of nursery capacity,
// that is, 0.8% of blocks for a 16-megabyte nursery. This allows the cache
// to gradually discard unneeded blocks in long running applications.
mallocedBlockCache_.preen(0.05 * float(capacity() / (1024 * 1024)));
}
js::Nursery::CollectionResult js::Nursery::doCollection(JS::GCOptions options,
JS::GCReason reason) {
JSRuntime* rt = runtime();
AutoGCSession session(gc, JS::HeapState::MinorCollecting);
AutoSetThreadIsPerformingGC performingGC(rt->gcContext());
@ -1374,6 +1464,15 @@ js::Nursery::CollectionResult js::Nursery::doCollection(JS::GCReason reason) {
mallocedBufferBytes = 0;
endProfile(ProfileKey::FreeMallocedBuffers);
// Give trailer blocks associated with non-tenured Wasm{Struct,Array}Objects
// back to our `mallocedBlockCache_`.
startProfile(ProfileKey::FreeTrailerBlocks);
freeTrailerBlocks();
if (options == JS::GCOptions::Shrink || gc::IsOOMReason(reason)) {
mallocedBlockCache_.clear();
}
endProfile(ProfileKey::FreeTrailerBlocks);
startProfile(ProfileKey::ClearNursery);
clear();
endProfile(ProfileKey::ClearNursery);

View File

@ -14,6 +14,7 @@
#include "gc/GCParallelTask.h"
#include "gc/Heap.h"
#include "gc/MallocedBlockCache.h"
#include "js/AllocPolicy.h"
#include "js/Class.h"
#include "js/GCAPI.h"
@ -40,6 +41,7 @@
_(Sweep, "sweep") \
_(UpdateJitActivations, "updtIn") \
_(FreeMallocedBuffers, "frSlts") \
_(FreeTrailerBlocks, "frTrBs") \
_(ClearStoreBuffer, "clrSB") \
_(ClearNursery, "clear") \
_(PurgeStringToAtomCache, "pStoA") \
@ -282,6 +284,63 @@ class Nursery {
return total;
}
// Wasm "trailer" (C++-heap-allocated) blocks.
//
// All involved blocks are allocated/deallocated via this nursery's
// `mallocedBlockCache_`. Hence we must store both the block address and
// its freelist ID, wrapped up in a PointerAndUint7.
//
// Trailer blocks registered here are added to `trailersAdded_`. Those that
// are later deregistered as a result of `obj_moved` calls that indicate
// tenuring, should be added to `trailersRemoved_`.
//
// Unfortunately ::unregisterTrailer cannot be allowed to OOM. To get
// around this we rely on the observation that all deregistered blocks
// should previously have been registered, so the deregistered set can never
// be larger than the registered set. Hence ::registerTrailer effectively
// preallocates space in `trailersRemoved_` so as to ensure that, in the
// worst case, all registered blocks can be handed to ::unregisterTrailer
// without needing to resize `trailersRemoved_` in ::unregisterTrailer.
//
// The downside is that most of the space in `trailersRemoved_` is wasted in
// the case where there are few blocks deregistered. This is unfortunate
// but it's hard to see how to avoid it.
//
// At the end of a minor collection, all blocks in the set `trailersAdded_ -
// trailersRemoved_[0 .. trailersRemovedUsed_ - 1]` are handed back to the
// `mallocedBlockCache_`.
[[nodiscard]] bool registerTrailer(PointerAndUint7 blockAndListID,
size_t nBytes) {
MOZ_ASSERT(trailersAdded_.length() == trailersRemoved_.length());
MOZ_ASSERT(nBytes > 0);
if (MOZ_UNLIKELY(!trailersAdded_.append(blockAndListID))) {
return false;
}
if (MOZ_UNLIKELY(!trailersRemoved_.append(nullptr))) {
return false;
}
// This is a clone of the logic in ::registerMallocedBuffer. It may be
// that some other heuristic is better, once we know more about the
// typical behaviour of wasm-GC applications.
trailerBytes_ += nBytes;
if (MOZ_UNLIKELY(trailerBytes_ > capacity() * 8)) {
requestMinorGC(JS::GCReason::NURSERY_TRAILERS);
}
return true;
}
void unregisterTrailer(void* block) {
MOZ_ASSERT(trailersRemovedUsed_ < trailersRemoved_.length());
trailersRemoved_[trailersRemovedUsed_] = block;
trailersRemovedUsed_++;
}
size_t sizeOfTrailerBlockSets(mozilla::MallocSizeOf mallocSizeOf) const {
return trailersAdded_.sizeOfExcludingThis(mallocSizeOf) +
trailersRemoved_.sizeOfExcludingThis(mallocSizeOf);
}
// The number of bytes from the start position to the end of the nursery.
// pass maxChunkCount(), allocatedChunkCount() or chunkCountLimit()
// to calculate the nursery size, current lazy-allocated size or nursery
@ -380,6 +439,12 @@ class Nursery {
// Round a size in bytes to the nearest valid nursery size.
static size_t roundSize(size_t size);
// The malloc'd block cache.
gc::MallocedBlockCache& mallocedBlockCache() { return mallocedBlockCache_; }
size_t sizeOfMallocedBlockCache(mozilla::MallocSizeOf mallocSizeOf) const {
return mallocedBlockCache_.sizeOfExcludingThis(mallocSizeOf);
}
private:
gc::GCRuntime* const gc;
@ -492,6 +557,15 @@ class Nursery {
BufferSet mallocedBuffers;
size_t mallocedBufferBytes = 0;
// Wasm "trailer" (C++-heap-allocated) blocks. See comments above on
// ::registerTrailer and ::unregisterTrailer.
Vector<PointerAndUint7, 0, SystemAllocPolicy> trailersAdded_;
Vector<void*, 0, SystemAllocPolicy> trailersRemoved_;
size_t trailersRemovedUsed_ = 0;
size_t trailerBytes_ = 0;
void freeTrailerBlocks();
// During a collection most hoisted slot and element buffers indicate their
// new location with a forwarding pointer at the base. This does not work
// for buffers whose length is less than pointer width, or when different
@ -581,6 +655,15 @@ class Nursery {
NurseryDecommitTask decommitTask;
// A cache of small C++-heap allocated blocks associated with this Nursery.
// This provided so as to provide cheap allocation/deallocation of
// out-of-line storage areas as used by WasmStructObject and
// WasmArrayObject, although the mechanism is general and not specific to
// these object types. Regarding lifetimes, because the cache holds only
// blocks that are not currently in use, it can be flushed at any point with
// no correctness impact, only a performance impact.
gc::MallocedBlockCache mallocedBlockCache_;
#ifdef JS_GC_ZEAL
struct Canary;
Canary* lastCanary_;
@ -633,7 +716,7 @@ class Nursery {
size_t tenuredBytes;
size_t tenuredCells;
};
CollectionResult doCollection(JS::GCReason reason);
CollectionResult doCollection(JS::GCOptions options, JS::GCReason reason);
void traceRoots(gc::AutoGCSession& session, TenuringTracer& mover);
size_t doPretenuring(JSRuntime* rt, JS::GCReason reason,

View File

@ -34,6 +34,7 @@ UNIFIED_SOURCES += [
"GCAPI.cpp",
"GCParallelTask.cpp",
"Heap.cpp",
"MallocedBlockCache.cpp",
"Marking.cpp",
"Memory.cpp",
"Nursery.cpp",

View File

@ -50,18 +50,13 @@ static MOZ_ALWAYS_INLINE void PodSet(T* aDst, const T& aSrc, size_t aNElem) {
/*
* Patterns used by SpiderMonkey to overwrite unused memory. If you are
* accessing an object with one of these pattern, you probably have a dangling
* pointer. These values should be odd, see the comment in IsThingPoisoned.
*
* Note: new patterns should also be added to the array in IsThingPoisoned!
*
* We try to keep our IRC bot, mrgiggles, up to date with these and other
* patterns:
* https://bitbucket.org/sfink/mrgiggles/src/default/plugins/knowledge/__init__.py
* accessing an object with one of these patterns, you probably have a dangling
* pointer. These values should be odd.
*/
const uint8_t JS_FRESH_NURSERY_PATTERN = 0x2F;
const uint8_t JS_SWEPT_NURSERY_PATTERN = 0x2B;
const uint8_t JS_ALLOCATED_NURSERY_PATTERN = 0x2D;
const uint8_t JS_NOTINUSE_TRAILER_PATTERN = 0x43;
const uint8_t JS_FRESH_TENURED_PATTERN = 0x4F;
const uint8_t JS_MOVED_TENURED_PATTERN = 0x49;
const uint8_t JS_SWEPT_TENURED_PATTERN = 0x4B;

View File

@ -335,6 +335,11 @@ void JSRuntime::addSizeOfIncludingThis(mozilla::MallocSizeOf mallocSizeOf,
gc.nursery().sizeOfMallocedBuffers(mallocSizeOf);
gc.storeBuffer().addSizeOfExcludingThis(mallocSizeOf, &rtSizes->gc);
rtSizes->gc.nurseryMallocedBlockCache +=
gc.nursery().sizeOfMallocedBlockCache(mallocSizeOf);
rtSizes->gc.nurseryTrailerBlockSets +=
gc.nursery().sizeOfTrailerBlockSets(mallocSizeOf);
if (isMainRuntime()) {
rtSizes->sharedImmutableStringsCache +=
js::SharedImmutableStringsCache::getSingleton().sizeOfExcludingThis(

View File

@ -47,6 +47,116 @@ using mozilla::PointerRangeSize;
using namespace js;
using namespace wasm;
// [SMDOC] Management of OOL storage areas for Wasm{Array,Struct}Object.
//
// WasmArrayObject always has its payload data stored in a block the C++-heap,
// which is pointed to from the WasmArrayObject. The same is true for
// WasmStructObject in the case where the fields cannot fit in the object
// itself. These C++ blocks are in some places referred to as "trailer blocks".
//
// The presence of trailer blocks complicates the use of generational GC (that
// is, Nursery allocation) of Wasm{Array,Struct}Object. In particular:
//
// (1) For objects which do not get tenured at minor collection, there must be
// a way to free the associated trailer, but there is no way to visit
// non-tenured blocks during minor collection.
//
// (2) Even if (1) were solved, calling js_malloc/js_free for every object
// creation-death cycle is expensive, possibly around 400 machine
// instructions, and we expressly want to avoid that in a generational GC
// scenario.
//
// The following scheme is therefore employed.
//
// (a) gc::Nursery maintains a pool of available C++-heap-allocated blocks --
// a js::MallocedBlockCache -- and the intention is that trailers are
// allocated from this pool and freed back into it whenever possible.
//
// (b) WasmArrayObject::createArray and WasmStructObject::createStruct always
// request trailer allocation from the nursery's cache (a). If the cache
// cannot honour the request directly it will allocate directly from
// js_malloc; we hope this happens only infrequently.
//
// (c) The allocated block is returned as a js::PointerAndUint7, a pair that
// holds the trailer block pointer and an auxiliary tag that the
// js::MallocedBlockCache needs to see when the block is freed.
//
// The raw trailer block pointer (a `void*`) is stored in the
// Wasm{Array,Struct}Object OOL data field. These objects are not aware
// of and do not interact with js::PointerAndUint7, and nor does any
// JIT-generated code.
//
// (d) Still in WasmArrayObject::createArray and
// WasmStructObject::createStruct, if the object was allocated in the
// nursery, then the resulting js::PointerAndUint7 is "registered" with
// the nursery by handing it to Nursery::registerTrailer.
//
// (e) When a minor collection happens (Nursery::doCollection), we are
// notified of objects that are moved by calls to the ::obj_moved methods
// in this file. For those objects that have been tenured, the raw
// trailer pointer is "deregistered" with the nursery by handing it to
// Nursery::deregisterTrailer.
//
// (f) Still during minor collection: The nursery now knows both the set of
// trailer blocks added, and those removed because the corresponding
// object has been tenured. The difference between these two sets (that
// is, `added - removed`) is the set of trailer blocks corresponding to
// blocks that didn't get tenured. That set is computed and freed (back
// to the nursery's js::MallocedBlockCache) by
// :Nursery::freeTrailerBlocks.
//
// (g) At the end of minor collection, the added and removed sets are made
// empty, and the cycle begins again.
//
// (h) Also at the end of minor collection, a call to
// `mallocedBlockCache_.preen` hands a few blocks in the cache back to
// js_free. This mechanism exists so as to ensure that unused blocks do
// not remain in the cache indefinitely.
//
// (i) For objects that got tenured, we are eventually notified of their death
// by a call to the ::obj_finalize methods below. At that point we hand
// their block pointers to js_free.
//
// (j) When the nursery is eventually destroyed, all blocks in its block cache
// are handed to js_free. Hence, at process exit, provided all nurseries
// are first collected and then their destructors run, no C++ heap blocks
// are leaked.
//
// As a result of this scheme, trailer blocks associated with what we hope is
// the frequent case -- objects that are allocated but never make it out of
// the nursery -- are cycled through the nursery's block cache.
//
// Trailers associated with tenured blocks cannot participate though; they are
// always returned to js_free. It would be possible to enable them to
// participate by changing their owning object's OOL data pointer to be a
// js::PointerAndUint7 rather than a raw `void*`, so that then the blocks
// could be released to the cache in the ::obj_finalize methods. This would
// however require changes in the generated code for array element and OOL
// struct element accesses.
//
// Here's a short summary of the trailer block life cycle:
//
// * allocated:
//
// - in WasmArrayObject::createArray / WasmStructObject::createStruct
//
// - by calling the nursery's MallocBlockCache alloc method
//
// * deallocated:
//
// - for non-tenured objects, in the collector itself,
// in Nursery::doCollection calling Nursery::freeTrailerBlocks,
// releasing to the nursery's block cache
//
// - for tenured objects, in the ::obj_finalize methods, releasing directly
// to js_free
//
// If this seems confusing ("why is it ok to allocate from the cache but
// release to js_free?"), remember that the cache holds blocks previously
// obtained from js_malloc but which are *not* currently in use. Hence it is
// fine to give them back to js_free; that just makes the cache a bit emptier
// but has no effect on correctness.
//=========================================================================
// WasmGcObject
@ -411,10 +521,11 @@ WasmArrayObject* WasmArrayObject::createArray(
// Allocate the outline data before allocating the object so that we can
// infallibly initialize the pointer on the array object after it is
// allocated.
uint8_t* outlineData = nullptr;
Nursery& nursery = cx->nursery();
PointerAndUint7 outlineData(nullptr, 0);
if (outlineBytes.value() > 0) {
outlineData = (uint8_t*)js_malloc(outlineBytes.value());
if (!outlineData) {
outlineData = nursery.mallocedBlockCache().alloc(outlineBytes.value());
if (!outlineData.pointer()) {
ReportOutOfMemory(cx);
return nullptr;
}
@ -428,25 +539,24 @@ WasmArrayObject* WasmArrayObject::createArray(
(WasmArrayObject*)WasmGcObject::create(cx, typeDefData, initialHeap);
if (!arrayObj) {
ReportOutOfMemory(cx);
if (outlineData) {
js_free(outlineData);
if (outlineData.pointer()) {
nursery.mallocedBlockCache().free(outlineData);
}
return nullptr;
}
arrayObj->numElements_ = numElements;
arrayObj->data_ = outlineData;
if (arrayObj->data_) {
arrayObj->data_ = (uint8_t*)outlineData.pointer();
if (outlineData.pointer()) {
if constexpr (ZeroFields) {
memset(arrayObj->data_, 0, outlineBytes.value());
memset(outlineData.pointer(), 0, outlineBytes.value());
}
if (js::gc::IsInsideNursery(arrayObj)) {
// We need to register the OOL area with the nursery, so it will be
// freed after GCing of the nursery if `arrayObj_` doesn't make it into
// the tenured heap.
if (!cx->nursery().registerMallocedBuffer(arrayObj->data_,
outlineBytes.value())) {
js_free(arrayObj->data_);
if (!nursery.registerTrailer(outlineData, outlineBytes.value())) {
nursery.mallocedBlockCache().free(outlineData);
return nullptr;
}
}
@ -506,7 +616,7 @@ size_t WasmArrayObject::obj_moved(JSObject* obj, JSObject* old) {
WasmArrayObject& arrayObj = obj->as<WasmArrayObject>();
if (arrayObj.data_) {
Nursery& nursery = obj->runtimeFromMainThread()->gc.nursery();
nursery.removeMallocedBufferDuringMinorGC(arrayObj.data_);
nursery.unregisterTrailer(arrayObj.data_);
}
}
return 0;
@ -603,10 +713,11 @@ WasmStructObject* WasmStructObject::createStruct(
// Allocate the outline data, if any, before allocating the object so that
// we can infallibly initialize the outline data of structs that require one.
uint8_t* outlineData = nullptr;
Nursery& nursery = cx->nursery();
PointerAndUint7 outlineData(nullptr, 0);
if (outlineBytes > 0) {
outlineData = (uint8_t*)js_malloc(outlineBytes);
if (!outlineData) {
outlineData = nursery.mallocedBlockCache().alloc(outlineBytes);
if (!outlineData.pointer()) {
ReportOutOfMemory(cx);
return nullptr;
}
@ -618,27 +729,26 @@ WasmStructObject* WasmStructObject::createStruct(
(WasmStructObject*)WasmGcObject::create(cx, typeDefData, initialHeap);
if (!structObj) {
ReportOutOfMemory(cx);
if (outlineData) {
js_free(outlineData);
if (outlineData.pointer()) {
nursery.mallocedBlockCache().free(outlineData);
}
return nullptr;
}
// Initialize the outline data field
structObj->outlineData_ = outlineData;
structObj->outlineData_ = (uint8_t*)outlineData.pointer();
if constexpr (ZeroFields) {
memset(&(structObj->inlineData_[0]), 0, inlineBytes);
}
if (outlineBytes > 0) {
if constexpr (ZeroFields) {
memset(structObj->outlineData_, 0, outlineBytes);
memset(outlineData.pointer(), 0, outlineBytes);
}
// See corresponding comment in WasmArrayObject::createArray.
if (js::gc::IsInsideNursery(structObj)) {
if (!cx->nursery().registerMallocedBuffer(structObj->outlineData_,
outlineBytes)) {
js_free(structObj->outlineData_);
// See corresponding comment in WasmArrayObject::createArray.
if (!nursery.registerTrailer(outlineData, outlineBytes)) {
nursery.mallocedBlockCache().free(outlineData);
return nullptr;
}
}
@ -692,7 +802,7 @@ size_t WasmStructObject::obj_moved(JSObject* obj, JSObject* old) {
// structs with OOL data. Hence:
MOZ_ASSERT(structObj.outlineData_);
Nursery& nursery = obj->runtimeFromMainThread()->gc.nursery();
nursery.removeMallocedBufferDuringMinorGC(structObj.outlineData_);
nursery.unregisterTrailer(structObj.outlineData_);
}
return 0;
}