Bug 1824655 - Avoid locking for the DOMArena jemalloc arena r=smaug

Differential Revision: https://phabricator.services.mozilla.com/D173828
2024-10-07 09:54:42 +00:00 · 2023-06-09 05:06:49 +00:00 · 2023-06-09 05:06:49 +00:00 · 8ea82cb1ff
commit 8ea82cb1ff
parent ff8485be61
7 changed files with 190 additions and 33 deletions
--- a/dom/base/DOMArena.h
+++ b/dom/base/DOMArena.h
@ -37,6 +37,7 @@ class DOMArena {
  DOMArena() {
    arena_params_t params;
    params.mMaxDirtyIncreaseOverride = 7;
+    params.mFlags = ARENA_FLAG_THREAD_MAIN_THREAD_ONLY;
    mArenaId = moz_create_arena_with_params(&params);
  }

--- a/memory/build/Mutex.h
+++ b/memory/build/Mutex.h
@ -9,12 +9,14 @@

 #if defined(XP_WIN)
 #  include <windows.h>
-#elif defined(XP_DARWIN)
-#  include "mozilla/Assertions.h"
-#  include <os/lock.h>
 #else
 #  include <pthread.h>
 #endif
+#if defined(XP_DARWIN)
+#  include <os/lock.h>
+#endif
+
+#include "mozilla/Assertions.h"
 #include "mozilla/Attributes.h"
 #include "mozilla/ThreadSafety.h"

@ -177,6 +179,97 @@ typedef Mutex StaticMutex;

 #endif

+#ifdef XP_WIN
+typedef DWORD ThreadId;
+inline ThreadId GetThreadId() { return GetCurrentThreadId(); }
+#else
+typedef pthread_t ThreadId;
+inline ThreadId GetThreadId() { return pthread_self(); }
+#endif
+
+class MOZ_CAPABILITY("mutex") MaybeMutex : public Mutex {
+ public:
+  enum DoLock {
+    MUST_LOCK,
+    AVOID_LOCK_UNSAFE,
+  };
+
+  bool Init(DoLock aDoLock) {
+    mDoLock = aDoLock;
+#ifdef MOZ_DEBUG
+    mThreadId = GetThreadId();
+#endif
+    return Mutex::Init();
+  }
+
+#ifndef XP_WIN
+  // Re initialise after fork(), assumes that mDoLock is already initialised.
+  void Reinit(pthread_t aForkingThread) {
+    if (mDoLock == MUST_LOCK) {
+      Mutex::Init();
+      return;
+    }
+#  ifdef MOZ_DEBUG
+    // If this is an eluded lock we can only safely re-initialise it if the
+    // thread that called fork is the one that owns the lock.
+    if (pthread_equal(mThreadId, aForkingThread)) {
+      mThreadId = GetThreadId();
+      Mutex::Init();
+    } else {
+      // We can't guantee that whatever resource this lock protects (probably a
+      // jemalloc arena) is in a consistent state.
+      mDeniedAfterFork = true;
+    }
+#  endif
+  }
+#endif
+
+  inline void Lock() MOZ_CAPABILITY_ACQUIRE() {
+    if (ShouldLock()) {
+      Mutex::Lock();
+    }
+  }
+
+  inline void Unlock() MOZ_CAPABILITY_RELEASE() {
+    if (ShouldLock()) {
+      Mutex::Unlock();
+    }
+  }
+
+  // Return true if we can use this resource from this thread, either because
+  // we'll use the lock or because this is the only thread that will access the
+  // protected resource.
+#ifdef MOZ_DEBUG
+  bool SafeOnThisThread() const {
+    return mDoLock == MUST_LOCK || GetThreadId() == mThreadId;
+  }
+#endif
+
+  bool LockIsEnabled() const { return mDoLock == MUST_LOCK; }
+
+ private:
+  bool ShouldLock() {
+#ifndef XP_WIN
+    MOZ_ASSERT(!mDeniedAfterFork);
+#endif
+
+    if (mDoLock == MUST_LOCK) {
+      return true;
+    }
+
+    MOZ_ASSERT(GetThreadId() == mThreadId);
+    return false;
+  }
+
+  DoLock mDoLock;
+#ifdef MOZ_DEBUG
+  ThreadId mThreadId;
+#  ifndef XP_WIN
+  bool mDeniedAfterFork = false;
+#  endif
+#endif
+};
+
 template <typename T>
 struct MOZ_SCOPED_CAPABILITY MOZ_RAII AutoLock {
  explicit AutoLock(T& aMutex) MOZ_CAPABILITY_ACQUIRE(aMutex) : mMutex(aMutex) {
@ -194,4 +287,6 @@ struct MOZ_SCOPED_CAPABILITY MOZ_RAII AutoLock {

 using MutexAutoLock = AutoLock<Mutex>;

+using MaybeMutexAutoLock = AutoLock<MaybeMutex>;
+
 #endif
--- a/memory/build/malloc_decls.h
+++ b/memory/build/malloc_decls.h
@ -65,6 +65,7 @@ MALLOC_DECL(malloc_good_size, size_t, size_t)
 #  if MALLOC_FUNCS & MALLOC_FUNCS_JEMALLOC
 // The 2nd argument points to an optional array exactly
 // jemalloc_stats_num_bins() long to be filled in (if non-null).
+// This must only be called on the main thread.
 MALLOC_DECL(jemalloc_stats_internal, void, jemalloc_stats_t*,
            jemalloc_bin_stats_t*)

@ -94,12 +95,15 @@ MALLOC_DECL(jemalloc_stats_num_bins, size_t)
 // less work to do.
 //
 // If MALLOC_DOUBLE_PURGE is not defined, this function does nothing.
+//
+// It may only be used from the main thread.
 MALLOC_DECL(jemalloc_purge_freed_pages, void)

 // Free all unused dirty pages in all arenas. Calling this function will slow
 // down subsequent allocations so it is recommended to use it only when
 // memory needs to be reclaimed at all costs (see bug 805855). This function
 // provides functionality similar to mallctl("arenas.purge") in jemalloc 3.
+// It may only be used from the main thread.
 MALLOC_DECL(jemalloc_free_dirty_pages, void)

 // Opt in or out of a thread local arena (bool argument is whether to opt-in
--- a/memory/build/mozjemalloc.cpp
+++ b/memory/build/mozjemalloc.cpp
@ -1086,8 +1086,10 @@ struct arena_t {
  // and it keeps the value it had after the destructor.
  arena_id_t mId;

-  // All operations on this arena require that lock be locked.
-  Mutex mLock MOZ_UNANNOTATED;
+  // All operations on this arena require that lock be locked.  The MaybeMutex
+  // class well elude locking if the arena is accessed from a single thread
+  // only.
+  MaybeMutex mLock MOZ_UNANNOTATED;

  arena_stats_t mStats;

@ -1274,6 +1276,7 @@ struct ArenaTreeTrait {
 class ArenaCollection {
 public:
  bool Init() {
+    mMainThreadId = GetThreadId();
    mArenas.Init();
    mPrivateArenas.Init();
    arena_params_t params;
@ -1332,6 +1335,11 @@ class ArenaCollection {

  Mutex mLock MOZ_UNANNOTATED;

+  bool IsOnMainThread() const { return mMainThreadId == GetThreadId(); }
+
+  // After a fork set the new thread ID in the child.
+  void SetMainThread() { mMainThreadId = GetThreadId(); }
+
 private:
  inline arena_t* GetByIdInternal(arena_id_t aArenaId, bool aIsPrivate);

@ -1340,6 +1348,7 @@ class ArenaCollection {
  Tree mArenas;
  Tree mPrivateArenas;
  Atomic<int32_t> mDefaultMaxDirtyPageModifier;
+  ThreadId mMainThreadId;
 };

 static ArenaCollection gArenas;
@ -3213,7 +3222,7 @@ void* arena_t::MallocSmall(size_t aSize, bool aZero) {
    }
    MOZ_ASSERT(!mRandomizeSmallAllocations || mPRNG);

-    MutexAutoLock lock(mLock);
+    MaybeMutexAutoLock lock(mLock);
    run = bin->mCurrentRun;
    if (MOZ_UNLIKELY(!run || run->mNumFree == 0)) {
      run = bin->mCurrentRun = GetNonFullBinRun(bin);
@ -3249,7 +3258,7 @@ void* arena_t::MallocLarge(size_t aSize, bool aZero) {
  aSize = PAGE_CEILING(aSize);

  {
-    MutexAutoLock lock(mLock);
+    MaybeMutexAutoLock lock(mLock);
    ret = AllocRun(aSize, true, aZero);
    if (!ret) {
      return nullptr;
@ -3287,7 +3296,7 @@ void* arena_t::PallocLarge(size_t aAlignment, size_t aSize, size_t aAllocSize) {
  MOZ_ASSERT((aAlignment & gPageSizeMask) == 0);

  {
-    MutexAutoLock lock(mLock);
+    MaybeMutexAutoLock lock(mLock);
    ret = AllocRun(aAllocSize, true, false);
    if (!ret) {
      return nullptr;
@ -3743,7 +3752,7 @@ static inline void arena_dalloc(void* aPtr, size_t aOffset, arena_t* aArena) {
  arena_chunk_t* chunk_dealloc_delay = nullptr;

  {
-    MutexAutoLock lock(arena->mLock);
+    MaybeMutexAutoLock lock(arena->mLock);
    arena_chunk_map_t* mapelm = &chunk->map[pageind];
    MOZ_RELEASE_ASSERT((mapelm->bits & CHUNK_MAP_DECOMMITTED) == 0,
                       "Freeing in decommitted page.");
@ -3782,7 +3791,7 @@ void arena_t::RallocShrinkLarge(arena_chunk_t* aChunk, void* aPtr, size_t aSize,

  // Shrink the run, and make trailing pages available for other
  // allocations.
-  MutexAutoLock lock(mLock);
+  MaybeMutexAutoLock lock(mLock);
  TrimRunTail(aChunk, (arena_run_t*)aPtr, aOldSize, aSize, true);
  mStats.allocated_large -= aOldSize - aSize;
 }
@ -3793,7 +3802,7 @@ bool arena_t::RallocGrowLarge(arena_chunk_t* aChunk, void* aPtr, size_t aSize,
  size_t pageind = (uintptr_t(aPtr) - uintptr_t(aChunk)) >> gPageSize2Pow;
  size_t npages = aOldSize >> gPageSize2Pow;

-  MutexAutoLock lock(mLock);
+  MaybeMutexAutoLock lock(mLock);
  MOZ_DIAGNOSTIC_ASSERT(aOldSize ==
                        (aChunk->map[pageind].bits & ~gPageSizeMask));

@ -3892,8 +3901,6 @@ void arena_t::operator delete(void* aPtr) {
 arena_t::arena_t(arena_params_t* aParams, bool aIsPrivate) {
  unsigned i;

-  MOZ_RELEASE_ASSERT(mLock.Init());
-
  memset(&mLink, 0, sizeof(mLink));
  memset(&mStats, 0, sizeof(arena_stats_t));
  mId = 0;
@ -3906,9 +3913,10 @@ arena_t::arena_t(arena_params_t* aParams, bool aIsPrivate) {
  mSpare = nullptr;

  mRandomizeSmallAllocations = opt_randomize_small;
+  MaybeMutex::DoLock doLock = MaybeMutex::MUST_LOCK;
  if (aParams) {
-    uint32_t flags = aParams->mFlags & ARENA_FLAG_RANDOMIZE_SMALL_MASK;
-    switch (flags) {
+    uint32_t randFlags = aParams->mFlags & ARENA_FLAG_RANDOMIZE_SMALL_MASK;
+    switch (randFlags) {
      case ARENA_FLAG_RANDOMIZE_SMALL_ENABLED:
        mRandomizeSmallAllocations = true;
        break;
@ -3920,6 +3928,22 @@ arena_t::arena_t(arena_params_t* aParams, bool aIsPrivate) {
        break;
    }

+    uint32_t threadFlags = aParams->mFlags & ARENA_FLAG_THREAD_MASK;
+    if (threadFlags == ARENA_FLAG_THREAD_MAIN_THREAD_ONLY) {
+      // At the moment we require that any ARENA_FLAG_THREAD_MAIN_THREAD_ONLY
+      // arenas are created and therefore always accessed by the main thread.
+      // This is for two reasons:
+      //  * it allows jemalloc_stats to read their statistics (we also require
+      //    that jemalloc_stats is only used on the main thread).
+      //  * Only main-thread or threadsafe arenas can be guanteed to be in a
+      //    consistent state after a fork() from the main thread.  If fork()
+      //    occurs off-thread then the new child process cannot use these arenas
+      //    (new children should usually exec() or exit() since other data may
+      //    also be inconsistent).
+      MOZ_ASSERT(gArenas.IsOnMainThread());
+      doLock = MaybeMutex::AVOID_LOCK_UNSAFE;
+    }
+
    mMaxDirtyIncreaseOverride = aParams->mMaxDirtyIncreaseOverride;
    mMaxDirtyDecreaseOverride = aParams->mMaxDirtyDecreaseOverride;
  } else {
@ -3927,6 +3951,8 @@ arena_t::arena_t(arena_params_t* aParams, bool aIsPrivate) {
    mMaxDirtyDecreaseOverride = 0;
  }

+  MOZ_RELEASE_ASSERT(mLock.Init(doLock));
+
  mPRNG = nullptr;

  mIsPrivate = aIsPrivate;
@ -3961,7 +3987,7 @@ arena_t::arena_t(arena_params_t* aParams, bool aIsPrivate) {

 arena_t::~arena_t() {
  size_t i;
-  MutexAutoLock lock(mLock);
+  MaybeMutexAutoLock lock(mLock);
  MOZ_RELEASE_ASSERT(!mLink.Left() && !mLink.Right(),
                     "Arena is still registered");
  MOZ_RELEASE_ASSERT(!mStats.allocated_small && !mStats.allocated_large,
@ -4683,8 +4709,16 @@ inline void MozJemalloc::jemalloc_stats_internal(
  }

  gArenas.mLock.Lock();
+
+  // Stats can only read complete information if its run on the main thread.
+  MOZ_ASSERT(gArenas.IsOnMainThread());
+
  // Iterate over arenas.
  for (auto arena : gArenas.iter()) {
+    // Cannot safely read stats for this arena and therefore stats would be
+    // incomplete.
+    MOZ_ASSERT(arena->mLock.SafeOnThisThread());
+
    size_t arena_mapped, arena_allocated, arena_committed, arena_dirty, j,
        arena_unused, arena_headers;

@ -4692,7 +4726,7 @@ inline void MozJemalloc::jemalloc_stats_internal(
    arena_unused = 0;

    {
-      MutexAutoLock lock(arena->mLock);
+      MaybeMutexAutoLock lock(arena->mLock);

      arena_mapped = arena->mStats.mapped;

@ -4804,7 +4838,7 @@ static void hard_purge_chunk(arena_chunk_t* aChunk) {

 // Explicitly remove all of this arena's MADV_FREE'd pages from memory.
 void arena_t::HardPurge() {
-  MutexAutoLock lock(mLock);
+  MaybeMutexAutoLock lock(mLock);

  while (!mChunksMAdvised.isEmpty()) {
    arena_chunk_t* chunk = mChunksMAdvised.popFront();
@ -4816,6 +4850,7 @@ template <>
 inline void MozJemalloc::jemalloc_purge_freed_pages() {
  if (malloc_initialized) {
    MutexAutoLock lock(gArenas.mLock);
+    MOZ_ASSERT(gArenas.IsOnMainThread());
    for (auto arena : gArenas.iter()) {
      arena->HardPurge();
    }
@ -4835,8 +4870,9 @@ template <>
 inline void MozJemalloc::jemalloc_free_dirty_pages(void) {
  if (malloc_initialized) {
    MutexAutoLock lock(gArenas.mLock);
+    MOZ_ASSERT(gArenas.IsOnMainThread());
    for (auto arena : gArenas.iter()) {
-      MutexAutoLock arena_lock(arena->mLock);
+      MaybeMutexAutoLock arena_lock(arena->mLock);
      arena->Purge(1);
    }
  }
@ -4902,13 +4938,23 @@ inline void MozJemalloc::moz_set_max_dirty_page_modifier(int32_t aModifier) {
 // of malloc during fork().  These functions are only called if the program is
 // running in threaded mode, so there is no need to check whether the program
 // is threaded here.
+//
+// Note that the only way to keep the main-thread-only arenas in a consistent
+// state for the child is if fork is called from the main thread only.  Or the
+// child must not use them, eg it should call exec().  We attempt to prevent the
+// child for accessing these arenas by refusing to re-initialise them.
+static pthread_t gForkingThread;
+
 FORK_HOOK
 void _malloc_prefork(void) MOZ_NO_THREAD_SAFETY_ANALYSIS {
  // Acquire all mutexes in a safe order.
  gArenas.mLock.Lock();
+  gForkingThread = pthread_self();

  for (auto arena : gArenas.iter()) {
-    arena->mLock.Lock();
+    if (arena->mLock.LockIsEnabled()) {
+      arena->mLock.Lock();
+    }
  }

  base_mtx.Lock();
@ -4924,7 +4970,9 @@ void _malloc_postfork_parent(void) MOZ_NO_THREAD_SAFETY_ANALYSIS {
  base_mtx.Unlock();

  for (auto arena : gArenas.iter()) {
-    arena->mLock.Unlock();
+    if (arena->mLock.LockIsEnabled()) {
+      arena->mLock.Unlock();
+    }
  }

  gArenas.mLock.Unlock();
@ -4938,9 +4986,10 @@ void _malloc_postfork_child(void) {
  base_mtx.Init();

  for (auto arena : gArenas.iter()) {
-    arena->mLock.Init();
+    arena->mLock.Reinit(gForkingThread);
  }

+  gArenas.SetMainThread();
  gArenas.mLock.Init();
 }
 #endif  // XP_WIN
--- a/memory/build/mozjemalloc_types.h
+++ b/memory/build/mozjemalloc_types.h
@ -63,6 +63,13 @@ typedef size_t arena_id_t;
 #define ARENA_FLAG_RANDOMIZE_SMALL_ENABLED 1
 #define ARENA_FLAG_RANDOMIZE_SMALL_DISABLED 2

+// Arenas are usually protected by a lock (ARENA_FLAG_THREAD_SAFE) however some
+// arenas are accessed by only the main thread
+// (ARENA_FLAG_THREAD_MAIN_THREAD_ONLY) and their locking can be skipped.
+#define ARENA_FLAG_THREAD_MASK 0x4
+#define ARENA_FLAG_THREAD_MAIN_THREAD_ONLY 0x4
+#define ARENA_FLAG_THREAD_SAFE 0x0
+
 typedef struct arena_params_s {
  size_t mMaxDirty;
  // Arena specific modifiers which override the value passed to
--- a/memory/build/mozmemory.h
+++ b/memory/build/mozmemory.h
@ -50,6 +50,8 @@ static inline size_t _malloc_good_size(size_t size) {
 #  define MALLOC_FUNCS MALLOC_FUNCS_JEMALLOC
 #  include "malloc_decls.h"

+// jemalloc_stats may only be called on the main thread so that it can access
+// main thread only arenas.
 #  ifdef __cplusplus
 static inline void jemalloc_stats(jemalloc_stats_t* aStats,
                                  jemalloc_bin_stats_t* aBinStats = nullptr) {
--- a/xpcom/base/MemoryTelemetry.cpp
+++ b/xpcom/base/MemoryTelemetry.cpp
@ -261,6 +261,16 @@ nsresult MemoryTelemetry::GatherReports(
  RECORD(PAGE_FAULTS_HARD, PageFaultsHard, UNITS_COUNT_CUMULATIVE);
 #endif

+#ifdef HAVE_JEMALLOC_STATS
+  jemalloc_stats_t stats;
+  jemalloc_stats(&stats);
+  HandleMemoryReport(Telemetry::MEMORY_HEAP_ALLOCATED,
+                     nsIMemoryReporter::UNITS_BYTES, mgr->HeapAllocated(stats));
+  HandleMemoryReport(Telemetry::MEMORY_HEAP_OVERHEAD_FRACTION,
+                     nsIMemoryReporter::UNITS_PERCENTAGE,
+                     mgr->HeapOverheadFraction(stats));
+#endif
+
  RefPtr<Runnable> completionRunnable;
  if (aCompletionCallback) {
    completionRunnable = NS_NewRunnableFunction(__func__, aCompletionCallback);
@ -283,17 +293,6 @@ nsresult MemoryTelemetry::GatherReports(
        RECORD(MEMORY_UNIQUE, ResidentUnique, UNITS_BYTES);
 #endif

-#ifdef HAVE_JEMALLOC_STATS
-        jemalloc_stats_t stats;
-        jemalloc_stats(&stats);
-        HandleMemoryReport(Telemetry::MEMORY_HEAP_ALLOCATED,
-                           nsIMemoryReporter::UNITS_BYTES,
-                           mgr->HeapAllocated(stats));
-        HandleMemoryReport(Telemetry::MEMORY_HEAP_OVERHEAD_FRACTION,
-                           nsIMemoryReporter::UNITS_PERCENTAGE,
-                           mgr->HeapOverheadFraction(stats));
-#endif
-
        if (completionRunnable) {
          NS_DispatchToMainThread(completionRunnable.forget(),
                                  NS_DISPATCH_NORMAL);