mirror of
https://github.com/mozilla/gecko-dev.git
synced 2024-11-23 04:41:11 +00:00
Bug 1924444 - Use a linked list with LIFO policy for a bin's mNotFullRuns to reduce overhead and foster locality. r=smaug,pbone
The current order of re-using regions from non-full runs relies on the run's position in memory via a RedBlackTree. This leads to O(log(n)) complexity for the tree book-keeping and walking while providing a somewhat arbitrary order when doing GetNonFullBinRun. We can reduce the complexity here by using a DoublyLinkedList for mNonFullRuns. The resulting LIFO order seems to have beneficial effects for common gecko use cases, too. Differential Revision: https://phabricator.services.mozilla.com/D225533
This commit is contained in:
parent
ae13d95188
commit
542d0e564f
@ -305,11 +305,7 @@ struct arena_t;
|
||||
|
||||
// Each element of the chunk map corresponds to one page within the chunk.
|
||||
struct arena_chunk_map_t {
|
||||
// Linkage for run trees. There are two disjoint uses:
|
||||
//
|
||||
// 1) arena_t's tree or available runs.
|
||||
// 2) arena_run_t conceptually uses this linkage for in-use non-full
|
||||
// runs, rather than directly embedding linkage.
|
||||
// Linkage for run trees. Used for arena_t's tree or available runs.
|
||||
RedBlackTreeNode<arena_chunk_map_t> link;
|
||||
|
||||
// Run address (or size) and various flags are stored together. The bit
|
||||
@ -987,15 +983,6 @@ struct ArenaChunkMapLink {
|
||||
}
|
||||
};
|
||||
|
||||
struct ArenaRunTreeTrait : public ArenaChunkMapLink {
|
||||
static inline Order Compare(arena_chunk_map_t* aNode,
|
||||
arena_chunk_map_t* aOther) {
|
||||
MOZ_ASSERT(aNode);
|
||||
MOZ_ASSERT(aOther);
|
||||
return CompareAddr(aNode, aOther);
|
||||
}
|
||||
};
|
||||
|
||||
struct ArenaAvailTreeTrait : public ArenaChunkMapLink {
|
||||
static inline Order Compare(arena_chunk_map_t* aNode,
|
||||
arena_chunk_map_t* aOther) {
|
||||
@ -1051,6 +1038,9 @@ struct arena_run_t {
|
||||
unsigned mNumFree;
|
||||
#endif
|
||||
|
||||
// Used by arena_bin_t::mNonFullRuns.
|
||||
DoublyLinkedListElement<arena_run_t> mRunListElem;
|
||||
|
||||
// Bin this run is associated with.
|
||||
arena_bin_t* mBin;
|
||||
|
||||
@ -1066,17 +1056,28 @@ struct arena_run_t {
|
||||
unsigned mRegionsMask[]; // Dynamically sized.
|
||||
};
|
||||
|
||||
struct arena_bin_t {
|
||||
// Current run being used to service allocations of this bin's size
|
||||
// class.
|
||||
arena_run_t* mCurrentRun;
|
||||
namespace mozilla {
|
||||
|
||||
// Tree of non-full runs. This tree is used when looking for an
|
||||
// existing run when mCurrentRun is no longer usable. We choose the
|
||||
// non-full run that is lowest in memory; this policy tends to keep
|
||||
// objects packed well, and it can also help reduce the number of
|
||||
// almost-empty chunks.
|
||||
RedBlackTree<arena_chunk_map_t, ArenaRunTreeTrait> mNonFullRuns;
|
||||
template <>
|
||||
struct GetDoublyLinkedListElement<arena_run_t> {
|
||||
static DoublyLinkedListElement<arena_run_t>& Get(arena_run_t* aThis) {
|
||||
return aThis->mRunListElem;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace mozilla
|
||||
|
||||
struct arena_bin_t {
|
||||
// We use a LIFO ("last-in-first-out") policy to refill non-full runs.
|
||||
//
|
||||
// This has the following reasons:
|
||||
// 1. It is cheap, as all our non-full-runs' book-keeping is O(1), no
|
||||
// tree-balancing or walking is needed.
|
||||
// 2. It also helps to increase the probability for CPU cache hits for the
|
||||
// book-keeping and the reused slots themselves, as the same memory was
|
||||
// most recently touched during free, especially when used from the same
|
||||
// core (or via the same shared cache, depending on the architecture).
|
||||
DoublyLinkedList<arena_run_t> mNonFullRuns;
|
||||
|
||||
// Bin's size class.
|
||||
size_t mSizeClass;
|
||||
@ -1293,7 +1294,9 @@ struct arena_t {
|
||||
void TrimRunTail(arena_chunk_t* aChunk, arena_run_t* aRun, size_t aOldSize,
|
||||
size_t aNewSize, bool dirty) MOZ_REQUIRES(mLock);
|
||||
|
||||
arena_run_t* GetNonFullBinRun(arena_bin_t* aBin) MOZ_REQUIRES(mLock);
|
||||
arena_run_t* GetNewEmptyBinRun(arena_bin_t* aBin) MOZ_REQUIRES(mLock);
|
||||
|
||||
inline arena_run_t* GetNonFullBinRun(arena_bin_t* aBin) MOZ_REQUIRES(mLock);
|
||||
|
||||
inline uint8_t FindFreeBitInMask(uint32_t aMask, uint32_t& aRng)
|
||||
MOZ_REQUIRES(mLock);
|
||||
@ -3653,32 +3656,16 @@ void arena_t::TrimRunTail(arena_chunk_t* aChunk, arena_run_t* aRun,
|
||||
MOZ_ASSERT(!no_chunk);
|
||||
}
|
||||
|
||||
arena_run_t* arena_t::GetNonFullBinRun(arena_bin_t* aBin) {
|
||||
arena_chunk_map_t* mapelm;
|
||||
arena_run_t* arena_t::GetNewEmptyBinRun(arena_bin_t* aBin) {
|
||||
arena_run_t* run;
|
||||
unsigned i, remainder;
|
||||
|
||||
// Look for a usable run.
|
||||
mapelm = aBin->mNonFullRuns.First();
|
||||
if (mapelm) {
|
||||
// run is guaranteed to have available space.
|
||||
aBin->mNonFullRuns.Remove(mapelm);
|
||||
run = (arena_run_t*)(mapelm->bits & ~gPageSizeMask);
|
||||
return run;
|
||||
}
|
||||
// No existing runs have any space available.
|
||||
|
||||
// Allocate a new run.
|
||||
run = AllocRun(static_cast<size_t>(aBin->mRunSizePages) << gPageSize2Pow,
|
||||
false, false);
|
||||
if (!run) {
|
||||
return nullptr;
|
||||
}
|
||||
// Don't initialize if a race in arena_t::RunAlloc() allowed an existing
|
||||
// run to become usable.
|
||||
if (run == aBin->mCurrentRun) {
|
||||
return run;
|
||||
}
|
||||
|
||||
// Initialize run internals.
|
||||
run->mBin = aBin;
|
||||
@ -3702,10 +3689,27 @@ arena_run_t* arena_t::GetNonFullBinRun(arena_bin_t* aBin) {
|
||||
run->mMagic = ARENA_RUN_MAGIC;
|
||||
#endif
|
||||
|
||||
// Make sure we continue to use this run for subsequent allocations.
|
||||
new (&run->mRunListElem) DoublyLinkedListElement<arena_run_t>();
|
||||
aBin->mNonFullRuns.pushFront(run);
|
||||
|
||||
aBin->mNumRuns++;
|
||||
return run;
|
||||
}
|
||||
|
||||
arena_run_t* arena_t::GetNonFullBinRun(arena_bin_t* aBin) {
|
||||
auto mrf_head = aBin->mNonFullRuns.begin();
|
||||
if (mrf_head) {
|
||||
// Take the head and if we are going to fill it, remove it from our list.
|
||||
arena_run_t* run = &(*mrf_head);
|
||||
if (run->mNumFree == 1) {
|
||||
aBin->mNonFullRuns.remove(run);
|
||||
}
|
||||
return run;
|
||||
}
|
||||
return GetNewEmptyBinRun(aBin);
|
||||
}
|
||||
|
||||
void arena_bin_t::Init(SizeClass aSizeClass) {
|
||||
size_t try_run_size;
|
||||
unsigned try_nregs, try_mask_nelms, try_reg0_offset;
|
||||
@ -3716,8 +3720,6 @@ void arena_bin_t::Init(SizeClass aSizeClass) {
|
||||
|
||||
try_run_size = gPageSize;
|
||||
|
||||
mCurrentRun = nullptr;
|
||||
mNonFullRuns.Init();
|
||||
mSizeClass = aSizeClass.Size();
|
||||
mNumRuns = 0;
|
||||
|
||||
@ -3781,6 +3783,10 @@ void arena_bin_t::Init(SizeClass aSizeClass) {
|
||||
try_reg0_offset);
|
||||
MOZ_ASSERT((try_mask_nelms << (LOG2(sizeof(int)) + 3)) >= try_nregs);
|
||||
|
||||
// Our list management would break if mRunNumRegions == 1 and we should use
|
||||
// a large size class instead, anyways.
|
||||
MOZ_ASSERT(try_nregs > 1);
|
||||
|
||||
// Copy final settings.
|
||||
MOZ_ASSERT((try_run_size >> gPageSize2Pow) <= UINT8_MAX);
|
||||
mRunSizePages = static_cast<uint8_t>(try_run_size >> gPageSize2Pow);
|
||||
@ -3875,10 +3881,7 @@ void* arena_t::MallocSmall(size_t aSize, bool aZero) {
|
||||
MOZ_ASSERT(!mRandomizeSmallAllocations || mPRNG ||
|
||||
(mIsPRNGInitializing && !isInitializingThread));
|
||||
|
||||
run = bin->mCurrentRun;
|
||||
if (MOZ_UNLIKELY(!run || run->mNumFree == 0)) {
|
||||
run = bin->mCurrentRun = GetNonFullBinRun(bin);
|
||||
}
|
||||
run = GetNonFullBinRun(bin);
|
||||
if (MOZ_UNLIKELY(!run)) {
|
||||
return nullptr;
|
||||
}
|
||||
@ -4325,53 +4328,28 @@ arena_chunk_t* arena_t::DallocSmall(arena_chunk_t* aChunk, void* aPtr,
|
||||
arena_chunk_t* dealloc_chunk = nullptr;
|
||||
|
||||
if (run->mNumFree == bin->mRunNumRegions) {
|
||||
// Deallocate run.
|
||||
if (run == bin->mCurrentRun) {
|
||||
bin->mCurrentRun = nullptr;
|
||||
} else if (bin->mRunNumRegions != 1) {
|
||||
size_t run_pageind =
|
||||
(uintptr_t(run) - uintptr_t(aChunk)) >> gPageSize2Pow;
|
||||
arena_chunk_map_t* run_mapelm = &aChunk->map[run_pageind];
|
||||
|
||||
// This block's conditional is necessary because if the
|
||||
// run only contains one region, then it never gets
|
||||
// inserted into the non-full runs tree.
|
||||
MOZ_DIAGNOSTIC_ASSERT(bin->mNonFullRuns.Search(run_mapelm) == run_mapelm);
|
||||
bin->mNonFullRuns.Remove(run_mapelm);
|
||||
}
|
||||
// This run is entirely freed, remove it from our bin.
|
||||
#if defined(MOZ_DIAGNOSTIC_ASSERT_ENABLED)
|
||||
run->mMagic = 0;
|
||||
#endif
|
||||
MOZ_ASSERT(bin->mNonFullRuns.ElementProbablyInList(run));
|
||||
bin->mNonFullRuns.remove(run);
|
||||
dealloc_chunk = DallocRun(run, true);
|
||||
bin->mNumRuns--;
|
||||
} else if (run->mNumFree == 1 && run != bin->mCurrentRun) {
|
||||
// Make sure that bin->mCurrentRun always refers to the lowest
|
||||
// non-full run, if one exists.
|
||||
if (!bin->mCurrentRun) {
|
||||
bin->mCurrentRun = run;
|
||||
} else if (uintptr_t(run) < uintptr_t(bin->mCurrentRun)) {
|
||||
// Switch mCurrentRun.
|
||||
if (bin->mCurrentRun->mNumFree > 0) {
|
||||
arena_chunk_t* runcur_chunk = GetChunkForPtr(bin->mCurrentRun);
|
||||
size_t runcur_pageind =
|
||||
(uintptr_t(bin->mCurrentRun) - uintptr_t(runcur_chunk)) >>
|
||||
gPageSize2Pow;
|
||||
arena_chunk_map_t* runcur_mapelm = &runcur_chunk->map[runcur_pageind];
|
||||
|
||||
// Insert runcur.
|
||||
MOZ_DIAGNOSTIC_ASSERT(!bin->mNonFullRuns.Search(runcur_mapelm));
|
||||
bin->mNonFullRuns.Insert(runcur_mapelm);
|
||||
}
|
||||
bin->mCurrentRun = run;
|
||||
} else {
|
||||
size_t run_pageind =
|
||||
(uintptr_t(run) - uintptr_t(aChunk)) >> gPageSize2Pow;
|
||||
arena_chunk_map_t* run_mapelm = &aChunk->map[run_pageind];
|
||||
|
||||
MOZ_DIAGNOSTIC_ASSERT(bin->mNonFullRuns.Search(run_mapelm) == nullptr);
|
||||
bin->mNonFullRuns.Insert(run_mapelm);
|
||||
}
|
||||
} else if (run->mNumFree == 1) {
|
||||
// This is first slot we freed from this run, start tracking.
|
||||
MOZ_ASSERT(!bin->mNonFullRuns.ElementProbablyInList(run));
|
||||
bin->mNonFullRuns.pushFront(run);
|
||||
}
|
||||
// else we just keep the run in mNonFullRuns where it is.
|
||||
// Note that we could move it to the head of the list here to get a strict
|
||||
// "most-recently-freed" order, but some of our benchmarks seem to be more
|
||||
// sensible to the increased overhead that this brings than to the order
|
||||
// supposedly slightly better for keeping CPU caches warm if we do.
|
||||
// In general we cannot foresee the future, so any order we choose might
|
||||
// perform different for different use cases and needs to be balanced with
|
||||
// the book-keeping overhead via measurements.
|
||||
|
||||
mStats.allocated_small -= size;
|
||||
|
||||
return dealloc_chunk;
|
||||
@ -4674,7 +4652,7 @@ arena_t::~arena_t() {
|
||||
chunk_dealloc(mSpare, kChunkSize, ARENA_CHUNK);
|
||||
}
|
||||
for (i = 0; i < NUM_SMALL_CLASSES; i++) {
|
||||
MOZ_RELEASE_ASSERT(!mBins[i].mNonFullRuns.First(), "Bin is not empty");
|
||||
MOZ_RELEASE_ASSERT(mBins[i].mNonFullRuns.isEmpty(), "Bin is not empty");
|
||||
}
|
||||
#ifdef MOZ_DEBUG
|
||||
{
|
||||
@ -5393,14 +5371,8 @@ inline void MozJemalloc::jemalloc_stats_internal(
|
||||
size_t bin_unused = 0;
|
||||
size_t num_non_full_runs = 0;
|
||||
|
||||
for (auto mapelm : bin->mNonFullRuns.iter()) {
|
||||
arena_run_t* run = (arena_run_t*)(mapelm->bits & ~gPageSizeMask);
|
||||
bin_unused += run->mNumFree * bin->mSizeClass;
|
||||
num_non_full_runs++;
|
||||
}
|
||||
|
||||
if (bin->mCurrentRun) {
|
||||
bin_unused += bin->mCurrentRun->mNumFree * bin->mSizeClass;
|
||||
for (auto run : bin->mNonFullRuns) {
|
||||
bin_unused += run.mNumFree * bin->mSizeClass;
|
||||
num_non_full_runs++;
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user