Bug 1716959 - On-thread sampling uses a per-thread JS frame buffer that's only allocated when there's also a JSContext - r=canaltinova

MergeStack requires a fairly large buffer to store JS frames, too big to be allocated on the stack without risking a stack overflow.
Until now, there was only one buffer, stored in CorePS, and only accessible while holding the Profiler gPSMutex.

Now each thread that has a JSContext, also has its own JS frame buffer, which is accessible on the thread without needing any lock.
The Profiler's Sampler still uses the CorePS buffer for its periodic sampling, but it won't prevent parallel on-thread sampling anymore.

The appropriate buffer is passed to ExtractJsFrames and then MergeStacks.
MergeStacks accepts a null pointer, which happens on threads where there is no JSContext, and therefore no JS to sample.

Differential Revision: https://phabricator.services.mozilla.com/D122087
This commit is contained in:
Gerald Squelart 2021-08-25 00:55:59 +00:00
parent 911112cf23
commit e8f70d8971
4 changed files with 67 additions and 24 deletions

View File

@ -116,6 +116,7 @@ void ThreadRegistrationLockedRWFromAnyThread::
ClearIsBeingProfiledAndProfiledThreadData(const PSAutoLock&) {
mIsBeingProfiled = false;
mProfiledThreadData = nullptr;
// Check invariants.
MOZ_ASSERT(mIsBeingProfiled == !!mProfiledThreadData);
}
@ -125,13 +126,29 @@ void ThreadRegistrationLockedRWOnThread::SetJSContext(JSContext* aJSContext) {
mJSContext = aJSContext;
// We now have a JSContext, allocate a JsFramesBuffer to allow unlocked
// on-thread sampling.
MOZ_ASSERT(!mJsFrameBuffer);
mJsFrameBuffer = new JsFrame[MAX_JS_FRAMES];
// We give the JS engine a non-owning reference to the ProfilingStack. It's
// important that the JS engine doesn't touch this once the thread dies.
js::SetContextProfilingStack(aJSContext, &ProfilingStackRef());
// Check invariants.
MOZ_ASSERT(!!mJSContext == !!mJsFrameBuffer);
}
void ThreadRegistrationLockedRWOnThread::ClearJSContext() {
mJSContext = nullptr;
if (mJsFrameBuffer) {
delete[] mJsFrameBuffer;
mJsFrameBuffer = nullptr;
}
// Check invariants.
MOZ_ASSERT(!!mJSContext == !!mJsFrameBuffer);
}
void ThreadRegistrationLockedRWOnThread::PollJSSampling() {

View File

@ -337,8 +337,10 @@ typedef const PSAutoLock& PSLockRef;
sInstance->m##name_ = a##name_; \
}
static const size_t MAX_JS_FRAMES = 1024;
using JsFrameBuffer = JS::ProfilingFrameIterator::Frame[MAX_JS_FRAMES];
static constexpr size_t MAX_JS_FRAMES =
mozilla::profiler::ThreadRegistrationData::MAX_JS_FRAMES;
using JsFrame = mozilla::profiler::ThreadRegistrationData::JsFrame;
using JsFrameBuffer = mozilla::profiler::ThreadRegistrationData::JsFrameBuffer;
// All functions in this file can run on multiple threads unless they have an
// NS_IsMainThread() assertion.
@ -1571,6 +1573,10 @@ static uint32_t ExtractJsFrames(
const ThreadRegistration::UnlockedReaderAndAtomicRWOnThread& aThreadData,
const Registers& aRegs, ProfilerStackCollector& aCollector,
JsFrameBuffer aJsFrames, StackWalkControl* aStackWalkControlIfSupported) {
MOZ_ASSERT(aJsFrames,
"ExtractJsFrames should only be called if there is a "
"JsFrameBuffer to fill.");
uint32_t jsFramesCount = 0;
// Only walk jit stack if profiling frame iterator is turned on.
@ -1648,12 +1654,14 @@ static void MergeStacks(
uint32_t aFeatures, bool aIsSynchronous,
const ThreadRegistration::UnlockedReaderAndAtomicRWOnThread& aThreadData,
const Registers& aRegs, const NativeStack& aNativeStack,
ProfilerStackCollector& aCollector, JsFrameBuffer aJsFrames,
ProfilerStackCollector& aCollector, JsFrame* aJsFrames,
uint32_t aJsFramesCount) {
// WARNING: this function runs within the profiler's "critical section".
// WARNING: this function might be called while the profiler is inactive, and
// cannot rely on ActivePS.
MOZ_ASSERT_IF(!aJsFrames, aJsFramesCount == 0);
const ProfilingStack& profilingStack = aThreadData.ProfilingStackCRef();
const js::ProfilingStackFrame* profilingStackFrames = profilingStack.frames;
uint32_t profilingStackFrameCount = profilingStack.stackSize();
@ -2257,8 +2265,8 @@ static void DoNativeBacktrace(
static inline void DoSharedSample(
PSLockRef aLock, bool aIsSynchronous, uint32_t aFeatures,
const ThreadRegistration::UnlockedReaderAndAtomicRWOnThread& aThreadData,
const Registers& aRegs, uint64_t aSamplePos, uint64_t aBufferRangeStart,
ProfileBuffer& aBuffer,
JsFrame* aJsFrames, const Registers& aRegs, uint64_t aSamplePos,
uint64_t aBufferRangeStart, ProfileBuffer& aBuffer,
StackCaptureOptions aCaptureOptions = StackCaptureOptions::Full) {
// WARNING: this function runs within the profiler's "critical section".
@ -2268,7 +2276,6 @@ static inline void DoSharedSample(
MOZ_RELEASE_ASSERT(ActivePS::Exists(aLock));
ProfileBufferCollector collector(aBuffer, aSamplePos, aBufferRangeStart);
JsFrameBuffer& jsFrames = CorePS::JsFrames(aLock);
StackWalkControl* stackWalkControlIfSupported = nullptr;
#if defined(HAVE_NATIVE_UNWIND)
const bool captureNative = ProfilerFeature::HasStackWalk(aFeatures) &&
@ -2281,8 +2288,9 @@ static inline void DoSharedSample(
}
#endif // defined(HAVE_NATIVE_UNWIND)
const uint32_t jsFramesCount =
ExtractJsFrames(aIsSynchronous, aThreadData, aRegs, collector, jsFrames,
stackWalkControlIfSupported);
aJsFrames ? ExtractJsFrames(aIsSynchronous, aThreadData, aRegs, collector,
aJsFrames, stackWalkControlIfSupported)
: 0;
NativeStack nativeStack;
#if defined(HAVE_NATIVE_UNWIND)
if (captureNative) {
@ -2290,12 +2298,12 @@ static inline void DoSharedSample(
stackWalkControlIfSupported);
MergeStacks(aFeatures, aIsSynchronous, aThreadData, aRegs, nativeStack,
collector, jsFrames, jsFramesCount);
collector, aJsFrames, jsFramesCount);
} else
#endif
{
MergeStacks(aFeatures, aIsSynchronous, aThreadData, aRegs, nativeStack,
collector, jsFrames, jsFramesCount);
collector, aJsFrames, jsFramesCount);
// We can't walk the whole native stack, but we can record the top frame.
if (ProfilerFeature::HasLeaf(aFeatures) &&
@ -2325,7 +2333,8 @@ static void DoSyncSample(
aBuffer.AddEntry(ProfileBufferEntry::Time(delta.ToMilliseconds()));
DoSharedSample(aLock, /* aIsSynchronous = */ true, aFeatures, aThreadData,
aRegs, samplePos, bufferRangeStart, aBuffer, aCaptureOptions);
aThreadData.GetJsFrameBuffer(), aRegs, samplePos,
bufferRangeStart, aBuffer, aCaptureOptions);
}
// Writes the components of a periodic sample to ActivePS's ProfileBuffer.
@ -2338,8 +2347,10 @@ static inline void DoPeriodicSample(
ProfileBuffer& aBuffer) {
// WARNING: this function runs within the profiler's "critical section".
JsFrameBuffer& jsFrames = CorePS::JsFrames(aLock);
DoSharedSample(aLock, /* aIsSynchronous = */ false, ActivePS::Features(aLock),
aThreadData, aRegs, aSamplePos, aBufferRangeStart, aBuffer);
aThreadData, jsFrames, aRegs, aSamplePos, aBufferRangeStart,
aBuffer);
}
// END sampling/unwinding code
@ -5657,8 +5668,8 @@ void profiler_clear_js_context() {
static void profiler_suspend_and_sample_thread(
PSLockRef aLock,
const ThreadRegistration::UnlockedReaderAndAtomicRWOnThread& aThreadData,
bool aIsSynchronous, uint32_t aFeatures, ProfilerStackCollector& aCollector,
bool aSampleNative) {
JsFrame* aJsFrames, bool aIsSynchronous, uint32_t aFeatures,
ProfilerStackCollector& aCollector, bool aSampleNative) {
const ThreadRegistrationInfo& info = aThreadData.Info();
if (info.IsMainThread()) {
@ -5671,7 +5682,6 @@ static void profiler_suspend_and_sample_thread(
auto collectStack = [&](const Registers& aRegs, const TimeStamp& aNow) {
// The target thread is now suspended. Collect a native backtrace,
// and call the callback.
JsFrameBuffer& jsFrames = CorePS::JsFrames(aLock);
StackWalkControl* stackWalkControlIfSupported = nullptr;
#if defined(HAVE_FASTINIT_NATIVE_UNWIND)
StackWalkControl stackWalkControl;
@ -5682,8 +5692,10 @@ static void profiler_suspend_and_sample_thread(
}
#endif
const uint32_t jsFramesCount =
ExtractJsFrames(aIsSynchronous, aThreadData, aRegs, aCollector,
jsFrames, stackWalkControlIfSupported);
aJsFrames
? ExtractJsFrames(aIsSynchronous, aThreadData, aRegs, aCollector,
aJsFrames, stackWalkControlIfSupported)
: 0;
#if defined(HAVE_FASTINIT_NATIVE_UNWIND)
if (aSampleNative) {
@ -5701,12 +5713,12 @@ static void profiler_suspend_and_sample_thread(
# endif
MergeStacks(aFeatures, aIsSynchronous, aThreadData, aRegs, nativeStack,
aCollector, jsFrames, jsFramesCount);
aCollector, aJsFrames, jsFramesCount);
} else
#endif
{
MergeStacks(aFeatures, aIsSynchronous, aThreadData, aRegs, nativeStack,
aCollector, jsFrames, jsFramesCount);
aCollector, aJsFrames, jsFramesCount);
if (ProfilerFeature::HasLeaf(aFeatures)) {
aCollector.CollectNativeLeafAddr((void*)aRegs.mPC);
@ -5754,9 +5766,9 @@ void profiler_suspend_and_sample_thread(ProfilerThreadId aThreadId,
// TODO: Remove this lock when on-thread sampling doesn't
// require it anymore.
PSAutoLock lock;
profiler_suspend_and_sample_thread(lock, aThreadData, true,
aFeatures, aCollector,
aSampleNative);
profiler_suspend_and_sample_thread(
lock, aThreadData, aThreadData.GetJsFrameBuffer(), true,
aFeatures, aCollector, aSampleNative);
});
});
} else {
@ -5767,8 +5779,9 @@ void profiler_suspend_and_sample_thread(ProfilerThreadId aThreadId,
aOffThreadRef.WithLockedRWFromAnyThread(
[&](const ThreadRegistration::UnlockedReaderAndAtomicRWOnThread&
aThreadData) {
profiler_suspend_and_sample_thread(lock, aThreadData, false,
aFeatures, aCollector,
JsFrameBuffer& jsFrames = CorePS::JsFrames(lock);
profiler_suspend_and_sample_thread(lock, aThreadData, jsFrames,
false, aFeatures, aCollector,
aSampleNative);
});
});

View File

@ -37,6 +37,7 @@
#ifndef ProfilerThreadRegistrationData_h
#define ProfilerThreadRegistrationData_h
#include "js/ProfilingFrameIterator.h"
#include "js/ProfilingStack.h"
#include "mozilla/Atomics.h"
#include "mozilla/MemoryReporting.h"
@ -69,6 +70,10 @@ class ThreadRegistrationData {
return aMallocSizeOf(this) + SizeOfExcludingThis(aMallocSizeOf);
}
static constexpr size_t MAX_JS_FRAMES = 1024;
using JsFrame = JS::ProfilingFrameIterator::Frame;
using JsFrameBuffer = JsFrame[MAX_JS_FRAMES];
// `protected` to allow derived classes to read all data members.
protected:
ThreadRegistrationData(const char* aName, const void* aStackTop);
@ -105,6 +110,10 @@ class ThreadRegistrationData {
// Written from thread, read from thread and suspended thread.
JSContext* mJSContext = nullptr;
// If mJSContext is not null, this points at the start of a JsFrameBuffer to
// be used for on-thread synchronous sampling.
JsFrame* mJsFrameBuffer = nullptr;
// The profiler needs to start and stop JS sampling of JS threads at various
// times. However, the JS engine can only do the required actions on the
// JS thread itself ("on-thread"), not from another thread ("off-thread").
@ -353,6 +362,9 @@ class ThreadRegistrationUnlockedReaderAndAtomicRWOnThread
[[nodiscard]] JSContext* GetJSContext() const { return mJSContext; }
// Not null when JSContext is not null. Points at the start of JsFrameBuffer.
[[nodiscard]] JsFrame* GetJsFrameBuffer() const { return mJsFrameBuffer; }
protected:
ThreadRegistrationUnlockedReaderAndAtomicRWOnThread(const char* aName,
const void* aStackTop)

View File

@ -301,6 +301,7 @@ static void TestConstUnlockedReaderAndAtomicRWOnThread(
aThreadId);
EXPECT_EQ(aData.GetJSContext(), nullptr);
EXPECT_EQ(aData.GetJsFrameBuffer(), nullptr);
};
static void TestUnlockedRWForLockedProfiler(