Bug 1824655 - Avoid locking for the DOMArena jemalloc arena r=smaug

Differential Revision: https://phabricator.services.mozilla.com/D173828
This commit is contained in:
Paul Bone 2023-06-09 05:06:49 +00:00
parent ff8485be61
commit 8ea82cb1ff
7 changed files with 190 additions and 33 deletions

View File

@ -37,6 +37,7 @@ class DOMArena {
DOMArena() {
arena_params_t params;
params.mMaxDirtyIncreaseOverride = 7;
params.mFlags = ARENA_FLAG_THREAD_MAIN_THREAD_ONLY;
mArenaId = moz_create_arena_with_params(&params);
}

View File

@ -9,12 +9,14 @@
#if defined(XP_WIN)
# include <windows.h>
#elif defined(XP_DARWIN)
# include "mozilla/Assertions.h"
# include <os/lock.h>
#else
# include <pthread.h>
#endif
#if defined(XP_DARWIN)
# include <os/lock.h>
#endif
#include "mozilla/Assertions.h"
#include "mozilla/Attributes.h"
#include "mozilla/ThreadSafety.h"
@ -177,6 +179,97 @@ typedef Mutex StaticMutex;
#endif
#ifdef XP_WIN
typedef DWORD ThreadId;
inline ThreadId GetThreadId() { return GetCurrentThreadId(); }
#else
typedef pthread_t ThreadId;
inline ThreadId GetThreadId() { return pthread_self(); }
#endif
class MOZ_CAPABILITY("mutex") MaybeMutex : public Mutex {
public:
enum DoLock {
MUST_LOCK,
AVOID_LOCK_UNSAFE,
};
bool Init(DoLock aDoLock) {
mDoLock = aDoLock;
#ifdef MOZ_DEBUG
mThreadId = GetThreadId();
#endif
return Mutex::Init();
}
#ifndef XP_WIN
// Re initialise after fork(), assumes that mDoLock is already initialised.
void Reinit(pthread_t aForkingThread) {
if (mDoLock == MUST_LOCK) {
Mutex::Init();
return;
}
# ifdef MOZ_DEBUG
// If this is an eluded lock we can only safely re-initialise it if the
// thread that called fork is the one that owns the lock.
if (pthread_equal(mThreadId, aForkingThread)) {
mThreadId = GetThreadId();
Mutex::Init();
} else {
// We can't guantee that whatever resource this lock protects (probably a
// jemalloc arena) is in a consistent state.
mDeniedAfterFork = true;
}
# endif
}
#endif
inline void Lock() MOZ_CAPABILITY_ACQUIRE() {
if (ShouldLock()) {
Mutex::Lock();
}
}
inline void Unlock() MOZ_CAPABILITY_RELEASE() {
if (ShouldLock()) {
Mutex::Unlock();
}
}
// Return true if we can use this resource from this thread, either because
// we'll use the lock or because this is the only thread that will access the
// protected resource.
#ifdef MOZ_DEBUG
bool SafeOnThisThread() const {
return mDoLock == MUST_LOCK || GetThreadId() == mThreadId;
}
#endif
bool LockIsEnabled() const { return mDoLock == MUST_LOCK; }
private:
bool ShouldLock() {
#ifndef XP_WIN
MOZ_ASSERT(!mDeniedAfterFork);
#endif
if (mDoLock == MUST_LOCK) {
return true;
}
MOZ_ASSERT(GetThreadId() == mThreadId);
return false;
}
DoLock mDoLock;
#ifdef MOZ_DEBUG
ThreadId mThreadId;
# ifndef XP_WIN
bool mDeniedAfterFork = false;
# endif
#endif
};
template <typename T>
struct MOZ_SCOPED_CAPABILITY MOZ_RAII AutoLock {
explicit AutoLock(T& aMutex) MOZ_CAPABILITY_ACQUIRE(aMutex) : mMutex(aMutex) {
@ -194,4 +287,6 @@ struct MOZ_SCOPED_CAPABILITY MOZ_RAII AutoLock {
using MutexAutoLock = AutoLock<Mutex>;
using MaybeMutexAutoLock = AutoLock<MaybeMutex>;
#endif

View File

@ -65,6 +65,7 @@ MALLOC_DECL(malloc_good_size, size_t, size_t)
# if MALLOC_FUNCS & MALLOC_FUNCS_JEMALLOC
// The 2nd argument points to an optional array exactly
// jemalloc_stats_num_bins() long to be filled in (if non-null).
// This must only be called on the main thread.
MALLOC_DECL(jemalloc_stats_internal, void, jemalloc_stats_t*,
jemalloc_bin_stats_t*)
@ -94,12 +95,15 @@ MALLOC_DECL(jemalloc_stats_num_bins, size_t)
// less work to do.
//
// If MALLOC_DOUBLE_PURGE is not defined, this function does nothing.
//
// It may only be used from the main thread.
MALLOC_DECL(jemalloc_purge_freed_pages, void)
// Free all unused dirty pages in all arenas. Calling this function will slow
// down subsequent allocations so it is recommended to use it only when
// memory needs to be reclaimed at all costs (see bug 805855). This function
// provides functionality similar to mallctl("arenas.purge") in jemalloc 3.
// It may only be used from the main thread.
MALLOC_DECL(jemalloc_free_dirty_pages, void)
// Opt in or out of a thread local arena (bool argument is whether to opt-in

View File

@ -1086,8 +1086,10 @@ struct arena_t {
// and it keeps the value it had after the destructor.
arena_id_t mId;
// All operations on this arena require that lock be locked.
Mutex mLock MOZ_UNANNOTATED;
// All operations on this arena require that lock be locked. The MaybeMutex
// class well elude locking if the arena is accessed from a single thread
// only.
MaybeMutex mLock MOZ_UNANNOTATED;
arena_stats_t mStats;
@ -1274,6 +1276,7 @@ struct ArenaTreeTrait {
class ArenaCollection {
public:
bool Init() {
mMainThreadId = GetThreadId();
mArenas.Init();
mPrivateArenas.Init();
arena_params_t params;
@ -1332,6 +1335,11 @@ class ArenaCollection {
Mutex mLock MOZ_UNANNOTATED;
bool IsOnMainThread() const { return mMainThreadId == GetThreadId(); }
// After a fork set the new thread ID in the child.
void SetMainThread() { mMainThreadId = GetThreadId(); }
private:
inline arena_t* GetByIdInternal(arena_id_t aArenaId, bool aIsPrivate);
@ -1340,6 +1348,7 @@ class ArenaCollection {
Tree mArenas;
Tree mPrivateArenas;
Atomic<int32_t> mDefaultMaxDirtyPageModifier;
ThreadId mMainThreadId;
};
static ArenaCollection gArenas;
@ -3213,7 +3222,7 @@ void* arena_t::MallocSmall(size_t aSize, bool aZero) {
}
MOZ_ASSERT(!mRandomizeSmallAllocations || mPRNG);
MutexAutoLock lock(mLock);
MaybeMutexAutoLock lock(mLock);
run = bin->mCurrentRun;
if (MOZ_UNLIKELY(!run || run->mNumFree == 0)) {
run = bin->mCurrentRun = GetNonFullBinRun(bin);
@ -3249,7 +3258,7 @@ void* arena_t::MallocLarge(size_t aSize, bool aZero) {
aSize = PAGE_CEILING(aSize);
{
MutexAutoLock lock(mLock);
MaybeMutexAutoLock lock(mLock);
ret = AllocRun(aSize, true, aZero);
if (!ret) {
return nullptr;
@ -3287,7 +3296,7 @@ void* arena_t::PallocLarge(size_t aAlignment, size_t aSize, size_t aAllocSize) {
MOZ_ASSERT((aAlignment & gPageSizeMask) == 0);
{
MutexAutoLock lock(mLock);
MaybeMutexAutoLock lock(mLock);
ret = AllocRun(aAllocSize, true, false);
if (!ret) {
return nullptr;
@ -3743,7 +3752,7 @@ static inline void arena_dalloc(void* aPtr, size_t aOffset, arena_t* aArena) {
arena_chunk_t* chunk_dealloc_delay = nullptr;
{
MutexAutoLock lock(arena->mLock);
MaybeMutexAutoLock lock(arena->mLock);
arena_chunk_map_t* mapelm = &chunk->map[pageind];
MOZ_RELEASE_ASSERT((mapelm->bits & CHUNK_MAP_DECOMMITTED) == 0,
"Freeing in decommitted page.");
@ -3782,7 +3791,7 @@ void arena_t::RallocShrinkLarge(arena_chunk_t* aChunk, void* aPtr, size_t aSize,
// Shrink the run, and make trailing pages available for other
// allocations.
MutexAutoLock lock(mLock);
MaybeMutexAutoLock lock(mLock);
TrimRunTail(aChunk, (arena_run_t*)aPtr, aOldSize, aSize, true);
mStats.allocated_large -= aOldSize - aSize;
}
@ -3793,7 +3802,7 @@ bool arena_t::RallocGrowLarge(arena_chunk_t* aChunk, void* aPtr, size_t aSize,
size_t pageind = (uintptr_t(aPtr) - uintptr_t(aChunk)) >> gPageSize2Pow;
size_t npages = aOldSize >> gPageSize2Pow;
MutexAutoLock lock(mLock);
MaybeMutexAutoLock lock(mLock);
MOZ_DIAGNOSTIC_ASSERT(aOldSize ==
(aChunk->map[pageind].bits & ~gPageSizeMask));
@ -3892,8 +3901,6 @@ void arena_t::operator delete(void* aPtr) {
arena_t::arena_t(arena_params_t* aParams, bool aIsPrivate) {
unsigned i;
MOZ_RELEASE_ASSERT(mLock.Init());
memset(&mLink, 0, sizeof(mLink));
memset(&mStats, 0, sizeof(arena_stats_t));
mId = 0;
@ -3906,9 +3913,10 @@ arena_t::arena_t(arena_params_t* aParams, bool aIsPrivate) {
mSpare = nullptr;
mRandomizeSmallAllocations = opt_randomize_small;
MaybeMutex::DoLock doLock = MaybeMutex::MUST_LOCK;
if (aParams) {
uint32_t flags = aParams->mFlags & ARENA_FLAG_RANDOMIZE_SMALL_MASK;
switch (flags) {
uint32_t randFlags = aParams->mFlags & ARENA_FLAG_RANDOMIZE_SMALL_MASK;
switch (randFlags) {
case ARENA_FLAG_RANDOMIZE_SMALL_ENABLED:
mRandomizeSmallAllocations = true;
break;
@ -3920,6 +3928,22 @@ arena_t::arena_t(arena_params_t* aParams, bool aIsPrivate) {
break;
}
uint32_t threadFlags = aParams->mFlags & ARENA_FLAG_THREAD_MASK;
if (threadFlags == ARENA_FLAG_THREAD_MAIN_THREAD_ONLY) {
// At the moment we require that any ARENA_FLAG_THREAD_MAIN_THREAD_ONLY
// arenas are created and therefore always accessed by the main thread.
// This is for two reasons:
// * it allows jemalloc_stats to read their statistics (we also require
// that jemalloc_stats is only used on the main thread).
// * Only main-thread or threadsafe arenas can be guanteed to be in a
// consistent state after a fork() from the main thread. If fork()
// occurs off-thread then the new child process cannot use these arenas
// (new children should usually exec() or exit() since other data may
// also be inconsistent).
MOZ_ASSERT(gArenas.IsOnMainThread());
doLock = MaybeMutex::AVOID_LOCK_UNSAFE;
}
mMaxDirtyIncreaseOverride = aParams->mMaxDirtyIncreaseOverride;
mMaxDirtyDecreaseOverride = aParams->mMaxDirtyDecreaseOverride;
} else {
@ -3927,6 +3951,8 @@ arena_t::arena_t(arena_params_t* aParams, bool aIsPrivate) {
mMaxDirtyDecreaseOverride = 0;
}
MOZ_RELEASE_ASSERT(mLock.Init(doLock));
mPRNG = nullptr;
mIsPrivate = aIsPrivate;
@ -3961,7 +3987,7 @@ arena_t::arena_t(arena_params_t* aParams, bool aIsPrivate) {
arena_t::~arena_t() {
size_t i;
MutexAutoLock lock(mLock);
MaybeMutexAutoLock lock(mLock);
MOZ_RELEASE_ASSERT(!mLink.Left() && !mLink.Right(),
"Arena is still registered");
MOZ_RELEASE_ASSERT(!mStats.allocated_small && !mStats.allocated_large,
@ -4683,8 +4709,16 @@ inline void MozJemalloc::jemalloc_stats_internal(
}
gArenas.mLock.Lock();
// Stats can only read complete information if its run on the main thread.
MOZ_ASSERT(gArenas.IsOnMainThread());
// Iterate over arenas.
for (auto arena : gArenas.iter()) {
// Cannot safely read stats for this arena and therefore stats would be
// incomplete.
MOZ_ASSERT(arena->mLock.SafeOnThisThread());
size_t arena_mapped, arena_allocated, arena_committed, arena_dirty, j,
arena_unused, arena_headers;
@ -4692,7 +4726,7 @@ inline void MozJemalloc::jemalloc_stats_internal(
arena_unused = 0;
{
MutexAutoLock lock(arena->mLock);
MaybeMutexAutoLock lock(arena->mLock);
arena_mapped = arena->mStats.mapped;
@ -4804,7 +4838,7 @@ static void hard_purge_chunk(arena_chunk_t* aChunk) {
// Explicitly remove all of this arena's MADV_FREE'd pages from memory.
void arena_t::HardPurge() {
MutexAutoLock lock(mLock);
MaybeMutexAutoLock lock(mLock);
while (!mChunksMAdvised.isEmpty()) {
arena_chunk_t* chunk = mChunksMAdvised.popFront();
@ -4816,6 +4850,7 @@ template <>
inline void MozJemalloc::jemalloc_purge_freed_pages() {
if (malloc_initialized) {
MutexAutoLock lock(gArenas.mLock);
MOZ_ASSERT(gArenas.IsOnMainThread());
for (auto arena : gArenas.iter()) {
arena->HardPurge();
}
@ -4835,8 +4870,9 @@ template <>
inline void MozJemalloc::jemalloc_free_dirty_pages(void) {
if (malloc_initialized) {
MutexAutoLock lock(gArenas.mLock);
MOZ_ASSERT(gArenas.IsOnMainThread());
for (auto arena : gArenas.iter()) {
MutexAutoLock arena_lock(arena->mLock);
MaybeMutexAutoLock arena_lock(arena->mLock);
arena->Purge(1);
}
}
@ -4902,13 +4938,23 @@ inline void MozJemalloc::moz_set_max_dirty_page_modifier(int32_t aModifier) {
// of malloc during fork(). These functions are only called if the program is
// running in threaded mode, so there is no need to check whether the program
// is threaded here.
//
// Note that the only way to keep the main-thread-only arenas in a consistent
// state for the child is if fork is called from the main thread only. Or the
// child must not use them, eg it should call exec(). We attempt to prevent the
// child for accessing these arenas by refusing to re-initialise them.
static pthread_t gForkingThread;
FORK_HOOK
void _malloc_prefork(void) MOZ_NO_THREAD_SAFETY_ANALYSIS {
// Acquire all mutexes in a safe order.
gArenas.mLock.Lock();
gForkingThread = pthread_self();
for (auto arena : gArenas.iter()) {
arena->mLock.Lock();
if (arena->mLock.LockIsEnabled()) {
arena->mLock.Lock();
}
}
base_mtx.Lock();
@ -4924,7 +4970,9 @@ void _malloc_postfork_parent(void) MOZ_NO_THREAD_SAFETY_ANALYSIS {
base_mtx.Unlock();
for (auto arena : gArenas.iter()) {
arena->mLock.Unlock();
if (arena->mLock.LockIsEnabled()) {
arena->mLock.Unlock();
}
}
gArenas.mLock.Unlock();
@ -4938,9 +4986,10 @@ void _malloc_postfork_child(void) {
base_mtx.Init();
for (auto arena : gArenas.iter()) {
arena->mLock.Init();
arena->mLock.Reinit(gForkingThread);
}
gArenas.SetMainThread();
gArenas.mLock.Init();
}
#endif // XP_WIN

View File

@ -63,6 +63,13 @@ typedef size_t arena_id_t;
#define ARENA_FLAG_RANDOMIZE_SMALL_ENABLED 1
#define ARENA_FLAG_RANDOMIZE_SMALL_DISABLED 2
// Arenas are usually protected by a lock (ARENA_FLAG_THREAD_SAFE) however some
// arenas are accessed by only the main thread
// (ARENA_FLAG_THREAD_MAIN_THREAD_ONLY) and their locking can be skipped.
#define ARENA_FLAG_THREAD_MASK 0x4
#define ARENA_FLAG_THREAD_MAIN_THREAD_ONLY 0x4
#define ARENA_FLAG_THREAD_SAFE 0x0
typedef struct arena_params_s {
size_t mMaxDirty;
// Arena specific modifiers which override the value passed to

View File

@ -50,6 +50,8 @@ static inline size_t _malloc_good_size(size_t size) {
# define MALLOC_FUNCS MALLOC_FUNCS_JEMALLOC
# include "malloc_decls.h"
// jemalloc_stats may only be called on the main thread so that it can access
// main thread only arenas.
# ifdef __cplusplus
static inline void jemalloc_stats(jemalloc_stats_t* aStats,
jemalloc_bin_stats_t* aBinStats = nullptr) {

View File

@ -261,6 +261,16 @@ nsresult MemoryTelemetry::GatherReports(
RECORD(PAGE_FAULTS_HARD, PageFaultsHard, UNITS_COUNT_CUMULATIVE);
#endif
#ifdef HAVE_JEMALLOC_STATS
jemalloc_stats_t stats;
jemalloc_stats(&stats);
HandleMemoryReport(Telemetry::MEMORY_HEAP_ALLOCATED,
nsIMemoryReporter::UNITS_BYTES, mgr->HeapAllocated(stats));
HandleMemoryReport(Telemetry::MEMORY_HEAP_OVERHEAD_FRACTION,
nsIMemoryReporter::UNITS_PERCENTAGE,
mgr->HeapOverheadFraction(stats));
#endif
RefPtr<Runnable> completionRunnable;
if (aCompletionCallback) {
completionRunnable = NS_NewRunnableFunction(__func__, aCompletionCallback);
@ -283,17 +293,6 @@ nsresult MemoryTelemetry::GatherReports(
RECORD(MEMORY_UNIQUE, ResidentUnique, UNITS_BYTES);
#endif
#ifdef HAVE_JEMALLOC_STATS
jemalloc_stats_t stats;
jemalloc_stats(&stats);
HandleMemoryReport(Telemetry::MEMORY_HEAP_ALLOCATED,
nsIMemoryReporter::UNITS_BYTES,
mgr->HeapAllocated(stats));
HandleMemoryReport(Telemetry::MEMORY_HEAP_OVERHEAD_FRACTION,
nsIMemoryReporter::UNITS_PERCENTAGE,
mgr->HeapOverheadFraction(stats));
#endif
if (completionRunnable) {
NS_DispatchToMainThread(completionRunnable.forget(),
NS_DISPATCH_NORMAL);