mirror of
https://github.com/mozilla/gecko-dev.git
synced 2024-10-10 03:45:46 +00:00
Bug 1373154 (part 3, attempt 2) - Overhaul TickSample and things around it. r=mstange.
Bug 1357829 added a third kind of sample, in addition to the existing "periodic" and "synchronous" samples. This patch cleans things up around that change. In particular, it cleans up TickSample, which is a mess of semi-related things. The patch does the following. - It removes everything from TickSample except the register values and renames TickSample as Registers. Almost all the removed stuff is available in ThreadInfo anyway, and the patch adds a ThreadInfo argument to various functions. (Doing it this way wasn't possible until recently because a ThreadInfo wasn't available in profiler_get_backtrace() until recently.) One non-obvious consequence: in synchronous samples we used to use a value of 0 for the stackTop. Because synchronous samples now use ThreadInfo directly, they are able to use the proper stack top value from ThreadInfo::mStackTop. This will presumably only improve the quality of the stack traces. - It splits Tick() in two and renames the halves DoPeriodicSample() and DoSyncSample(). - It reorders arguments in some functions so that ProfileBuffer (the output) is always last, and inputs are passed in roughly the order they are obtained. - It adds a comment at the top of platform.cpp explaining the three kinds of sample. - It renames a couple of other things. --HG-- extra : rebase_source : 4f1e69c605102354dd56ef7af5ebade201e1d106
This commit is contained in:
parent
61516fe3a0
commit
d529284168
@ -40,7 +40,7 @@ public:
|
||||
|
||||
// Add to the buffer a sample start (ThreadId) entry for aThreadId. Also,
|
||||
// record the resulting generation and index in |aLS| if it's non-null.
|
||||
void addTagThreadId(int aThreadId, LastSample* aLS);
|
||||
void addTagThreadId(int aThreadId, LastSample* aLS = nullptr);
|
||||
|
||||
void StreamSamplesToJSON(SpliceableJSONWriter& aWriter, int aThreadId,
|
||||
double aSinceTime, JSContext* cx,
|
||||
|
@ -358,8 +358,8 @@ private:
|
||||
INACTIVE_REQUESTED = 3,
|
||||
} mJSSampling;
|
||||
|
||||
// When sampling, this holds the generation number and offset in PS::mBuffer
|
||||
// of the most recent sample for this thread.
|
||||
// When sampling, this holds the generation number and offset in
|
||||
// ActivePS::mBuffer of the most recent sample for this thread.
|
||||
ProfileBuffer::LastSample mLastSample;
|
||||
};
|
||||
|
||||
|
@ -76,30 +76,30 @@ Thread::GetCurrentId()
|
||||
}
|
||||
|
||||
static void
|
||||
FillInSample(TickSample& aSample, ucontext_t* aContext)
|
||||
FillInRegs(Registers& aRegs, ucontext_t* aContext)
|
||||
{
|
||||
aSample.mContext = aContext;
|
||||
aRegs.mContext = aContext;
|
||||
mcontext_t& mcontext = aContext->uc_mcontext;
|
||||
|
||||
// Extracting the sample from the context is extremely machine dependent.
|
||||
#if defined(GP_ARCH_x86)
|
||||
aSample.mPC = reinterpret_cast<Address>(mcontext.gregs[REG_EIP]);
|
||||
aSample.mSP = reinterpret_cast<Address>(mcontext.gregs[REG_ESP]);
|
||||
aSample.mFP = reinterpret_cast<Address>(mcontext.gregs[REG_EBP]);
|
||||
aRegs.mPC = reinterpret_cast<Address>(mcontext.gregs[REG_EIP]);
|
||||
aRegs.mSP = reinterpret_cast<Address>(mcontext.gregs[REG_ESP]);
|
||||
aRegs.mFP = reinterpret_cast<Address>(mcontext.gregs[REG_EBP]);
|
||||
#elif defined(GP_ARCH_amd64)
|
||||
aSample.mPC = reinterpret_cast<Address>(mcontext.gregs[REG_RIP]);
|
||||
aSample.mSP = reinterpret_cast<Address>(mcontext.gregs[REG_RSP]);
|
||||
aSample.mFP = reinterpret_cast<Address>(mcontext.gregs[REG_RBP]);
|
||||
aRegs.mPC = reinterpret_cast<Address>(mcontext.gregs[REG_RIP]);
|
||||
aRegs.mSP = reinterpret_cast<Address>(mcontext.gregs[REG_RSP]);
|
||||
aRegs.mFP = reinterpret_cast<Address>(mcontext.gregs[REG_RBP]);
|
||||
#elif defined(GP_ARCH_arm)
|
||||
aSample.mPC = reinterpret_cast<Address>(mcontext.arm_pc);
|
||||
aSample.mSP = reinterpret_cast<Address>(mcontext.arm_sp);
|
||||
aSample.mFP = reinterpret_cast<Address>(mcontext.arm_fp);
|
||||
aSample.mLR = reinterpret_cast<Address>(mcontext.arm_lr);
|
||||
aRegs.mPC = reinterpret_cast<Address>(mcontext.arm_pc);
|
||||
aRegs.mSP = reinterpret_cast<Address>(mcontext.arm_sp);
|
||||
aRegs.mFP = reinterpret_cast<Address>(mcontext.arm_fp);
|
||||
aRegs.mLR = reinterpret_cast<Address>(mcontext.arm_lr);
|
||||
#elif defined(GP_ARCH_aarch64)
|
||||
aSample.mPC = reinterpret_cast<Address>(mcontext.pc);
|
||||
aSample.mSP = reinterpret_cast<Address>(mcontext.sp);
|
||||
aSample.mFP = reinterpret_cast<Address>(mcontext.regs[29]);
|
||||
aSample.mLR = reinterpret_cast<Address>(mcontext.regs[30]);
|
||||
aRegs.mPC = reinterpret_cast<Address>(mcontext.pc);
|
||||
aRegs.mSP = reinterpret_cast<Address>(mcontext.sp);
|
||||
aRegs.mFP = reinterpret_cast<Address>(mcontext.regs[29]);
|
||||
aRegs.mLR = reinterpret_cast<Address>(mcontext.regs[30]);
|
||||
#else
|
||||
# error "bad platform"
|
||||
#endif
|
||||
@ -303,8 +303,8 @@ Sampler::Disable(PSLockRef aLock)
|
||||
template<typename Func>
|
||||
void
|
||||
Sampler::SuspendAndSampleAndResumeThread(PSLockRef aLock,
|
||||
TickSample& aSample,
|
||||
const Func& aDoSample)
|
||||
const ThreadInfo& aThreadInfo,
|
||||
const Func& aProcessRegs)
|
||||
{
|
||||
// Only one sampler thread can be sampling at once. So we expect to have
|
||||
// complete control over |sSigHandlerCoordinator|.
|
||||
@ -313,7 +313,7 @@ Sampler::SuspendAndSampleAndResumeThread(PSLockRef aLock,
|
||||
if (mSamplerTid == -1) {
|
||||
mSamplerTid = gettid();
|
||||
}
|
||||
int sampleeTid = aSample.mThreadId;
|
||||
int sampleeTid = aThreadInfo.ThreadId();
|
||||
MOZ_RELEASE_ASSERT(sampleeTid != mSamplerTid);
|
||||
|
||||
//----------------------------------------------------------------//
|
||||
@ -357,10 +357,10 @@ Sampler::SuspendAndSampleAndResumeThread(PSLockRef aLock,
|
||||
// The samplee thread is now frozen and sSigHandlerCoordinator->mUContext is
|
||||
// valid. We can poke around in it and unwind its stack as we like.
|
||||
|
||||
// Extract the current PC and sp.
|
||||
FillInSample(aSample, &sSigHandlerCoordinator->mUContext);
|
||||
|
||||
aDoSample();
|
||||
// Extract the current register values.
|
||||
Registers regs;
|
||||
FillInRegs(regs, &sSigHandlerCoordinator->mUContext);
|
||||
aProcessRegs(regs);
|
||||
|
||||
//----------------------------------------------------------------//
|
||||
// Resume the target thread.
|
||||
@ -524,13 +524,12 @@ PlatformInit(PSLockRef aLock)
|
||||
#endif
|
||||
|
||||
void
|
||||
TickSample::PopulateContext(ucontext_t* aContext)
|
||||
Registers::SyncPopulate(ucontext_t* aContext)
|
||||
{
|
||||
MOZ_ASSERT(mIsSynchronous);
|
||||
MOZ_ASSERT(aContext);
|
||||
|
||||
if (!getcontext(aContext)) {
|
||||
FillInSample(*this, aContext);
|
||||
FillInRegs(*this, aContext);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -76,10 +76,11 @@ Sampler::Disable(PSLockRef aLock)
|
||||
template<typename Func>
|
||||
void
|
||||
Sampler::SuspendAndSampleAndResumeThread(PSLockRef aLock,
|
||||
TickSample& aSample,
|
||||
const Func& aDoSample)
|
||||
const ThreadInfo& aThreadInfo,
|
||||
const Func& aProcessRegs)
|
||||
{
|
||||
thread_act_t samplee_thread = aSample.mPlatformData->ProfiledThread();
|
||||
thread_act_t samplee_thread =
|
||||
aThreadInfo.GetPlatformData()->ProfiledThread();
|
||||
|
||||
//----------------------------------------------------------------//
|
||||
// Suspend the samplee thread and get its context.
|
||||
@ -115,11 +116,12 @@ Sampler::SuspendAndSampleAndResumeThread(PSLockRef aLock,
|
||||
flavor,
|
||||
reinterpret_cast<natural_t*>(&state),
|
||||
&count) == KERN_SUCCESS) {
|
||||
aSample.mPC = reinterpret_cast<Address>(state.REGISTER_FIELD(ip));
|
||||
aSample.mSP = reinterpret_cast<Address>(state.REGISTER_FIELD(sp));
|
||||
aSample.mFP = reinterpret_cast<Address>(state.REGISTER_FIELD(bp));
|
||||
Registers regs;
|
||||
regs.mPC = reinterpret_cast<Address>(state.REGISTER_FIELD(ip));
|
||||
regs.mSP = reinterpret_cast<Address>(state.REGISTER_FIELD(sp));
|
||||
regs.mFP = reinterpret_cast<Address>(state.REGISTER_FIELD(bp));
|
||||
|
||||
aDoSample();
|
||||
aProcessRegs(regs);
|
||||
}
|
||||
|
||||
#undef REGISTER_FIELD
|
||||
@ -191,10 +193,8 @@ PlatformInit(PSLockRef aLock)
|
||||
}
|
||||
|
||||
void
|
||||
TickSample::PopulateContext()
|
||||
Registers::SyncPopulate()
|
||||
{
|
||||
MOZ_ASSERT(mIsSynchronous);
|
||||
|
||||
asm (
|
||||
// Compute caller's %rsp by adding to %rbp:
|
||||
// 8 bytes for previous %rbp, 8 bytes for return address
|
||||
|
@ -93,10 +93,10 @@ Sampler::Disable(PSLockRef aLock)
|
||||
template<typename Func>
|
||||
void
|
||||
Sampler::SuspendAndSampleAndResumeThread(PSLockRef aLock,
|
||||
TickSample& aSample,
|
||||
const Func& aDoSample)
|
||||
const ThreadInfo& aThreadInfo,
|
||||
const Func& aProcessRegs)
|
||||
{
|
||||
HANDLE profiled_thread = aSample.mPlatformData->ProfiledThread();
|
||||
HANDLE profiled_thread = aThreadInfo.GetPlatformData()->ProfiledThread();
|
||||
if (profiled_thread == nullptr) {
|
||||
return;
|
||||
}
|
||||
@ -138,17 +138,18 @@ Sampler::SuspendAndSampleAndResumeThread(PSLockRef aLock,
|
||||
// what we do here, or risk deadlock. See the corresponding comment in
|
||||
// platform-linux-android.cpp for details.
|
||||
|
||||
Registers regs;
|
||||
#if defined(GP_ARCH_amd64)
|
||||
aSample.mPC = reinterpret_cast<Address>(context.Rip);
|
||||
aSample.mSP = reinterpret_cast<Address>(context.Rsp);
|
||||
aSample.mFP = reinterpret_cast<Address>(context.Rbp);
|
||||
regs.mPC = reinterpret_cast<Address>(context.Rip);
|
||||
regs.mSP = reinterpret_cast<Address>(context.Rsp);
|
||||
regs.mFP = reinterpret_cast<Address>(context.Rbp);
|
||||
#else
|
||||
aSample.mPC = reinterpret_cast<Address>(context.Eip);
|
||||
aSample.mSP = reinterpret_cast<Address>(context.Esp);
|
||||
aSample.mFP = reinterpret_cast<Address>(context.Ebp);
|
||||
regs.mPC = reinterpret_cast<Address>(context.Eip);
|
||||
regs.mSP = reinterpret_cast<Address>(context.Esp);
|
||||
regs.mFP = reinterpret_cast<Address>(context.Ebp);
|
||||
#endif
|
||||
|
||||
aDoSample();
|
||||
aProcessRegs(regs);
|
||||
|
||||
//----------------------------------------------------------------//
|
||||
// Resume the target thread.
|
||||
@ -264,10 +265,8 @@ PlatformInit(PSLockRef aLock)
|
||||
}
|
||||
|
||||
void
|
||||
TickSample::PopulateContext()
|
||||
Registers::SyncPopulate()
|
||||
{
|
||||
MOZ_ASSERT(mIsSynchronous);
|
||||
|
||||
CONTEXT context;
|
||||
RtlCaptureContext(&context);
|
||||
|
||||
|
@ -4,6 +4,28 @@
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
|
||||
// There are three kinds of samples done by the profiler.
|
||||
//
|
||||
// - A "periodic" sample is the most complex kind. It is done in response to a
|
||||
// timer while the profiler is active. It involves writing a stack trace plus
|
||||
// a variety of other values (memory measurements, responsiveness
|
||||
// measurements, markers, etc.) into the main ProfileBuffer. The sampling is
|
||||
// done from off-thread, and so SuspendAndSampleAndResumeThread() is used to
|
||||
// get the register values.
|
||||
//
|
||||
// - A "synchronous" sample is a simpler kind. It is done in response to an API
|
||||
// call (profiler_get_backtrace()). It involves writing a stack trace and
|
||||
// little else into a temporary ProfileBuffer, and wrapping that up in a
|
||||
// ProfilerBacktrace that can be subsequently used in a marker. The sampling
|
||||
// is done on-thread, and so Registers::SyncPopulate() is used to get the
|
||||
// register values.
|
||||
//
|
||||
// - A "backtrace" sample is the simplest kind. It is done in response to an
|
||||
// API call (profiler_suspend_and_sample_thread()). It involves getting a
|
||||
// stack trace and passing it to a callback function; it does not write to a
|
||||
// ProfileBuffer. The sampling is done from off-thread, and so uses
|
||||
// SuspendAndSampleAndResumeThread() to get the register values.
|
||||
|
||||
#include <algorithm>
|
||||
#include <ostream>
|
||||
#include <fstream>
|
||||
@ -603,103 +625,41 @@ MOZ_THREAD_LOCAL(PseudoStack*) sPseudoStack;
|
||||
static const char* const kMainThreadName = "GeckoMain";
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// BEGIN tick/unwinding code
|
||||
// BEGIN sampling/unwinding code
|
||||
|
||||
// TickSample contains all the information needed by Tick(). Some of it is
|
||||
// pointers to long-lived things, and some of it is sampled just before the
|
||||
// call to Tick().
|
||||
class TickSample {
|
||||
// The registers used for stack unwinding and a few other sampling purposes.
|
||||
class Registers
|
||||
{
|
||||
public:
|
||||
// This constructor is for periodic samples, i.e. those performed in response
|
||||
// to a timer firing. Periodic samples are performed off-thread, i.e. the
|
||||
// SamplerThread samples the thread in question.
|
||||
TickSample(ThreadInfo* aThreadInfo, int64_t aRSSMemory, int64_t aUSSMemory)
|
||||
: mIsSynchronous(false)
|
||||
, mTimeStamp(TimeStamp::Now())
|
||||
, mThreadId(aThreadInfo->ThreadId())
|
||||
, mRacyInfo(aThreadInfo->RacyInfo())
|
||||
, mJSContext(aThreadInfo->mContext)
|
||||
, mStackTop(aThreadInfo->StackTop())
|
||||
, mLastSample(&aThreadInfo->LastSample())
|
||||
, mPlatformData(aThreadInfo->GetPlatformData())
|
||||
, mResponsiveness(aThreadInfo->GetThreadResponsiveness())
|
||||
, mRSSMemory(aRSSMemory) // may be zero
|
||||
, mUSSMemory(aUSSMemory) // may be zero
|
||||
#if defined(GP_OS_linux) || defined(GP_OS_android)
|
||||
, mContext(nullptr)
|
||||
#endif
|
||||
, mPC(nullptr)
|
||||
Registers()
|
||||
: mPC(nullptr)
|
||||
, mSP(nullptr)
|
||||
, mFP(nullptr)
|
||||
, mLR(nullptr)
|
||||
{}
|
||||
|
||||
// This constructor is for synchronous samples, i.e. those performed in
|
||||
// response to an explicit sampling request via the API. Synchronous samples
|
||||
// are performed on-thread, i.e. the thread samples itself.
|
||||
TickSample(NotNull<RacyThreadInfo*> aRacyInfo, JSContext* aJSContext,
|
||||
PlatformData* aPlatformData)
|
||||
: mIsSynchronous(true)
|
||||
, mTimeStamp(TimeStamp::Now())
|
||||
, mThreadId(Thread::GetCurrentId())
|
||||
, mRacyInfo(aRacyInfo)
|
||||
, mJSContext(aJSContext)
|
||||
, mStackTop(nullptr)
|
||||
, mLastSample(nullptr)
|
||||
, mPlatformData(aPlatformData)
|
||||
, mResponsiveness(nullptr)
|
||||
, mRSSMemory(0)
|
||||
, mUSSMemory(0)
|
||||
#if defined(GP_OS_linux) || defined(GP_OS_android)
|
||||
, mContext(nullptr)
|
||||
#endif
|
||||
, mPC(nullptr)
|
||||
, mSP(nullptr)
|
||||
, mFP(nullptr)
|
||||
, mLR(nullptr)
|
||||
{}
|
||||
|
||||
// Fills in mContext, mPC, mSP, mFP, and mLR for a synchronous sample.
|
||||
#if defined(GP_OS_linux) || defined(GP_OS_android)
|
||||
void PopulateContext(ucontext_t* aContext);
|
||||
void SyncPopulate(ucontext_t* aContext);
|
||||
#else
|
||||
void PopulateContext();
|
||||
void SyncPopulate();
|
||||
#endif
|
||||
|
||||
// False for periodic samples, true for synchronous samples.
|
||||
const bool mIsSynchronous;
|
||||
|
||||
const TimeStamp mTimeStamp;
|
||||
|
||||
const int mThreadId;
|
||||
|
||||
const NotNull<RacyThreadInfo*> mRacyInfo;
|
||||
|
||||
JSContext* const mJSContext;
|
||||
|
||||
void* const mStackTop;
|
||||
|
||||
ProfileBuffer::LastSample* const mLastSample; // may be null
|
||||
|
||||
PlatformData* const mPlatformData;
|
||||
|
||||
ThreadResponsiveness* const mResponsiveness; // may be null
|
||||
|
||||
const int64_t mRSSMemory; // may be zero
|
||||
const int64_t mUSSMemory; // may be zero
|
||||
|
||||
// The remaining fields are filled in, after construction, by
|
||||
// SamplerThread::SuspendAndSampleAndResume() for periodic samples, and
|
||||
// PopulateContext() for synchronous samples. They are filled in separately
|
||||
// from the other fields in this class because the code that fills them in is
|
||||
// platform-specific.
|
||||
#if defined(GP_OS_linux) || defined(GP_OS_android)
|
||||
ucontext_t* mContext; // The context from the signal handler.
|
||||
#endif
|
||||
// These fields are filled in by
|
||||
// SamplerThread::SuspendAndSampleAndResumeThread() for periodic and
|
||||
// backtrace samples, and by SyncPopulate() for synchronous samples.
|
||||
Address mPC; // Instruction pointer.
|
||||
Address mSP; // Stack pointer.
|
||||
Address mFP; // Frame pointer.
|
||||
Address mLR; // ARM link register.
|
||||
#if defined(GP_OS_linux) || defined(GP_OS_android)
|
||||
// This contains all the registers, which means it duplicates the four fields
|
||||
// above. This is ok.
|
||||
ucontext_t* mContext; // The context from the signal handler.
|
||||
#endif
|
||||
};
|
||||
|
||||
static void
|
||||
@ -833,15 +793,16 @@ struct AutoWalkJSStack
|
||||
};
|
||||
|
||||
static void
|
||||
MergeStacksIntoProfile(PSLockRef aLock, ProfileBuffer* aBuffer,
|
||||
const TickSample& aSample, NativeStack& aNativeStack)
|
||||
MergeStacksIntoProfile(PSLockRef aLock, bool aIsSynchronous,
|
||||
const ThreadInfo& aThreadInfo, const Registers& aRegs,
|
||||
const NativeStack& aNativeStack, ProfileBuffer* aBuffer)
|
||||
{
|
||||
// WARNING: this function runs within the profiler's "critical section".
|
||||
|
||||
NotNull<RacyThreadInfo*> racyInfo = aSample.mRacyInfo;
|
||||
NotNull<RacyThreadInfo*> racyInfo = aThreadInfo.RacyInfo();
|
||||
js::ProfileEntry* pseudoEntries = racyInfo->entries;
|
||||
uint32_t pseudoCount = racyInfo->stackSize();
|
||||
JSContext* context = aSample.mJSContext;
|
||||
JSContext* context = aThreadInfo.mContext;
|
||||
|
||||
// Make a copy of the JS stack into a JSFrame array. This is necessary since,
|
||||
// like the native stack, the JS stack is iterated youngest-to-oldest and we
|
||||
@ -852,7 +813,7 @@ MergeStacksIntoProfile(PSLockRef aLock, ProfileBuffer* aBuffer,
|
||||
// sampled JIT entries inside the JS engine. See note below concerning 'J'
|
||||
// entries.
|
||||
uint32_t startBufferGen;
|
||||
startBufferGen = aSample.mIsSynchronous
|
||||
startBufferGen = aIsSynchronous
|
||||
? UINT32_MAX
|
||||
: aBuffer->mGeneration;
|
||||
uint32_t jsCount = 0;
|
||||
@ -865,16 +826,16 @@ MergeStacksIntoProfile(PSLockRef aLock, ProfileBuffer* aBuffer,
|
||||
|
||||
if (autoWalkJSStack.walkAllowed) {
|
||||
JS::ProfilingFrameIterator::RegisterState registerState;
|
||||
registerState.pc = aSample.mPC;
|
||||
registerState.sp = aSample.mSP;
|
||||
registerState.lr = aSample.mLR;
|
||||
registerState.fp = aSample.mFP;
|
||||
registerState.pc = aRegs.mPC;
|
||||
registerState.sp = aRegs.mSP;
|
||||
registerState.lr = aRegs.mLR;
|
||||
registerState.fp = aRegs.mFP;
|
||||
|
||||
JS::ProfilingFrameIterator jsIter(context, registerState,
|
||||
startBufferGen);
|
||||
for (; jsCount < maxFrames && !jsIter.done(); ++jsIter) {
|
||||
// See note below regarding 'J' entries.
|
||||
if (aSample.mIsSynchronous || jsIter.isWasm()) {
|
||||
if (aIsSynchronous || jsIter.isWasm()) {
|
||||
uint32_t extracted =
|
||||
jsIter.extractStack(jsFrames, jsCount, maxFrames);
|
||||
jsCount += extracted;
|
||||
@ -993,7 +954,7 @@ MergeStacksIntoProfile(PSLockRef aLock, ProfileBuffer* aBuffer,
|
||||
// JIT code. This means that if we inserted such OptInfoAddr entries into
|
||||
// the buffer, nsRefreshDriver would now be holding on to a backtrace
|
||||
// with stale JIT code return addresses.
|
||||
if (aSample.mIsSynchronous ||
|
||||
if (aIsSynchronous ||
|
||||
jsFrame.kind == JS::ProfilingFrameIterator::Frame_Wasm) {
|
||||
AddDynamicCodeLocationTag(aBuffer, jsFrame.label);
|
||||
} else {
|
||||
@ -1023,7 +984,7 @@ MergeStacksIntoProfile(PSLockRef aLock, ProfileBuffer* aBuffer,
|
||||
//
|
||||
// Do not do this for synchronous samples, which use their own
|
||||
// ProfileBuffers instead of the global one in CorePS.
|
||||
if (!aSample.mIsSynchronous && context) {
|
||||
if (!aIsSynchronous && context) {
|
||||
MOZ_ASSERT(aBuffer->mGeneration >= startBufferGen);
|
||||
uint32_t lapCount = aBuffer->mGeneration - startBufferGen;
|
||||
JS::UpdateJSContextProfilerSampleBufferGen(context, aBuffer->mGeneration,
|
||||
@ -1047,8 +1008,8 @@ StackWalkCallback(uint32_t aFrameNumber, void* aPC, void* aSP, void* aClosure)
|
||||
}
|
||||
|
||||
static void
|
||||
DoNativeBacktrace(PSLockRef aLock, NativeStack& aNativeStack,
|
||||
const TickSample& aSample)
|
||||
DoNativeBacktrace(PSLockRef aLock, const ThreadInfo& aThreadInfo,
|
||||
const Registers& aRegs, NativeStack& aNativeStack)
|
||||
{
|
||||
// WARNING: this function runs within the profiler's "critical section".
|
||||
|
||||
@ -1056,21 +1017,21 @@ DoNativeBacktrace(PSLockRef aLock, NativeStack& aNativeStack,
|
||||
// the FramePointerStackWalk() and MozStackWalk() calls below will use 1..N.
|
||||
// This is a bit weird but it doesn't matter because StackWalkCallback()
|
||||
// doesn't use the frame number argument.
|
||||
StackWalkCallback(/* frameNum */ 0, aSample.mPC, aSample.mSP, &aNativeStack);
|
||||
StackWalkCallback(/* frameNum */ 0, aRegs.mPC, aRegs.mSP, &aNativeStack);
|
||||
|
||||
uint32_t maxFrames = uint32_t(MAX_NATIVE_FRAMES - aNativeStack.mCount);
|
||||
|
||||
#if defined(GP_OS_darwin) || (defined(GP_PLAT_x86_windows))
|
||||
void* stackEnd = aSample.mStackTop;
|
||||
if (aSample.mFP >= aSample.mSP && aSample.mFP <= stackEnd) {
|
||||
void* stackEnd = aThreadInfo.StackTop();
|
||||
if (aRegs.mFP >= aRegs.mSP && aRegs.mFP <= stackEnd) {
|
||||
FramePointerStackWalk(StackWalkCallback, /* skipFrames */ 0, maxFrames,
|
||||
&aNativeStack, reinterpret_cast<void**>(aSample.mFP),
|
||||
&aNativeStack, reinterpret_cast<void**>(aRegs.mFP),
|
||||
stackEnd);
|
||||
}
|
||||
#else
|
||||
// Win64 always omits frame pointers so for it we use the slower
|
||||
// MozStackWalk().
|
||||
uintptr_t thread = GetThreadHandle(aSample.mPlatformData);
|
||||
uintptr_t thread = GetThreadHandle(aThreadInfo.GetPlatformData());
|
||||
MOZ_ASSERT(thread);
|
||||
MozStackWalk(StackWalkCallback, /* skipFrames */ 0, maxFrames, &aNativeStack,
|
||||
thread, /* platformData */ nullptr);
|
||||
@ -1080,14 +1041,14 @@ DoNativeBacktrace(PSLockRef aLock, NativeStack& aNativeStack,
|
||||
|
||||
#ifdef USE_EHABI_STACKWALK
|
||||
static void
|
||||
DoNativeBacktrace(PSLockRef aLock, NativeStack& aNativeStack,
|
||||
const TickSample& aSample)
|
||||
DoNativeBacktrace(PSLockRef aLock, const ThreadInfo& aThreadInfo,
|
||||
const Registers& aRegs, NativeStack& aNativeStack)
|
||||
{
|
||||
// WARNING: this function runs within the profiler's "critical section".
|
||||
|
||||
const mcontext_t* mcontext = &aSample.mContext->uc_mcontext;
|
||||
const mcontext_t* mcontext = &aRegs.mContext->uc_mcontext;
|
||||
mcontext_t savedContext;
|
||||
NotNull<RacyThreadInfo*> racyInfo = aSample.mRacyInfo;
|
||||
NotNull<RacyThreadInfo*> racyInfo = aThreadInfo.RacyInfo();
|
||||
|
||||
// The pseudostack contains an "EnterJIT" frame whenever we enter
|
||||
// JIT code with profiling enabled; the stack pointer value points
|
||||
@ -1131,7 +1092,7 @@ DoNativeBacktrace(PSLockRef aLock, NativeStack& aNativeStack,
|
||||
// Now unwind whatever's left (starting from either the last EnterJIT frame
|
||||
// or, if no EnterJIT was found, the original registers).
|
||||
aNativeStack.mCount +=
|
||||
EHABIStackWalk(*mcontext, aSample.mStackTop,
|
||||
EHABIStackWalk(*mcontext, aThreadInfo.StackTop(),
|
||||
aNativeStack.mSPs + aNativeStack.mCount,
|
||||
aNativeStack.mPCs + aNativeStack.mCount,
|
||||
MAX_NATIVE_FRAMES - aNativeStack.mCount);
|
||||
@ -1159,12 +1120,12 @@ ASAN_memcpy(void* aDst, const void* aSrc, size_t aLen)
|
||||
#endif
|
||||
|
||||
static void
|
||||
DoNativeBacktrace(PSLockRef aLock, NativeStack& aNativeStack,
|
||||
const TickSample& aSample)
|
||||
DoNativeBacktrace(PSLockRef aLock, const ThreadInfo& aThreadInfo,
|
||||
const Registers& aRegs, NativeStack& aNativeStack)
|
||||
{
|
||||
// WARNING: this function runs within the profiler's "critical section".
|
||||
|
||||
const mcontext_t* mc = &aSample.mContext->uc_mcontext;
|
||||
const mcontext_t* mc = &aRegs.mContext->uc_mcontext;
|
||||
|
||||
lul::UnwindRegs startRegs;
|
||||
memset(&startRegs, 0, sizeof(startRegs));
|
||||
@ -1234,7 +1195,7 @@ DoNativeBacktrace(PSLockRef aLock, NativeStack& aNativeStack,
|
||||
#else
|
||||
# error "Unknown plat"
|
||||
#endif
|
||||
uintptr_t end = reinterpret_cast<uintptr_t>(aSample.mStackTop);
|
||||
uintptr_t end = reinterpret_cast<uintptr_t>(aThreadInfo.StackTop());
|
||||
uintptr_t ws = sizeof(void*);
|
||||
start &= ~(ws-1);
|
||||
end &= ~(ws-1);
|
||||
@ -1288,66 +1249,93 @@ DoNativeBacktrace(PSLockRef aLock, NativeStack& aNativeStack,
|
||||
|
||||
#endif
|
||||
|
||||
void
|
||||
Tick(PSLockRef aLock, const TickSample& aSample, ProfileBuffer* aBuffer)
|
||||
// Writes some components shared by periodic and synchronous profiles to
|
||||
// ActivePS's ProfileBuffer. (This should only be called from DoSyncSample()
|
||||
// and DoPeriodicSample().)
|
||||
static inline void
|
||||
DoSharedSample(PSLockRef aLock, bool aIsSynchronous,
|
||||
ThreadInfo& aThreadInfo, const TimeStamp& aNow,
|
||||
const Registers& aRegs, ProfileBuffer::LastSample* aLS,
|
||||
ProfileBuffer* aBuffer)
|
||||
{
|
||||
// WARNING: this function runs within the profiler's "critical section".
|
||||
|
||||
MOZ_RELEASE_ASSERT(ActivePS::Exists(aLock));
|
||||
|
||||
aBuffer->addTagThreadId(aSample.mThreadId, aSample.mLastSample);
|
||||
aBuffer->addTagThreadId(aThreadInfo.ThreadId(), aLS);
|
||||
|
||||
TimeDuration delta = aSample.mTimeStamp - CorePS::ProcessStartTime();
|
||||
TimeDuration delta = aNow - CorePS::ProcessStartTime();
|
||||
aBuffer->addTag(ProfileBufferEntry::Time(delta.ToMilliseconds()));
|
||||
|
||||
NativeStack nativeStack;
|
||||
#if defined(HAVE_NATIVE_UNWIND)
|
||||
if (ActivePS::FeatureStackWalk(aLock)) {
|
||||
DoNativeBacktrace(aLock, nativeStack, aSample);
|
||||
DoNativeBacktrace(aLock, aThreadInfo, aRegs, nativeStack);
|
||||
|
||||
MergeStacksIntoProfile(aLock, aBuffer, aSample, nativeStack);
|
||||
MergeStacksIntoProfile(aLock, aIsSynchronous, aThreadInfo, aRegs,
|
||||
nativeStack, aBuffer);
|
||||
} else
|
||||
#endif
|
||||
{
|
||||
MergeStacksIntoProfile(aLock, aBuffer, aSample, nativeStack);
|
||||
MergeStacksIntoProfile(aLock, aIsSynchronous, aThreadInfo, aRegs,
|
||||
nativeStack, aBuffer);
|
||||
|
||||
if (ActivePS::FeatureLeaf(aLock)) {
|
||||
aBuffer->addTag(ProfileBufferEntry::NativeLeafAddr((void*)aSample.mPC));
|
||||
aBuffer->addTag(ProfileBufferEntry::NativeLeafAddr((void*)aRegs.mPC));
|
||||
}
|
||||
}
|
||||
|
||||
// Don't process the PseudoStack's markers if we're synchronously sampling
|
||||
// the current thread.
|
||||
if (!aSample.mIsSynchronous) {
|
||||
ProfilerMarkerLinkedList* pendingMarkersList =
|
||||
aSample.mRacyInfo->GetPendingMarkers();
|
||||
while (pendingMarkersList && pendingMarkersList->peek()) {
|
||||
ProfilerMarker* marker = pendingMarkersList->popHead();
|
||||
aBuffer->addStoredMarker(marker);
|
||||
aBuffer->addTag(ProfileBufferEntry::Marker(marker));
|
||||
}
|
||||
}
|
||||
|
||||
if (aSample.mResponsiveness && aSample.mResponsiveness->HasData()) {
|
||||
TimeDuration delta =
|
||||
aSample.mResponsiveness->GetUnresponsiveDuration(aSample.mTimeStamp);
|
||||
aBuffer->addTag(ProfileBufferEntry::Responsiveness(delta.ToMilliseconds()));
|
||||
}
|
||||
|
||||
// rssMemory is equal to 0 when we are not recording.
|
||||
if (aSample.mRSSMemory != 0) {
|
||||
double rssMemory = static_cast<double>(aSample.mRSSMemory);
|
||||
aBuffer->addTag(ProfileBufferEntry::ResidentMemory(rssMemory));
|
||||
}
|
||||
|
||||
// ussMemory is equal to 0 when we are not recording.
|
||||
if (aSample.mUSSMemory != 0) {
|
||||
double ussMemory = static_cast<double>(aSample.mUSSMemory);
|
||||
aBuffer->addTag(ProfileBufferEntry::UnsharedMemory(ussMemory));
|
||||
}
|
||||
}
|
||||
|
||||
// END tick/unwinding code
|
||||
// Writes the components of a synchronous sample to the given ProfileBuffer.
|
||||
static void
|
||||
DoSyncSample(PSLockRef aLock, ThreadInfo& aThreadInfo, const TimeStamp& aNow,
|
||||
const Registers& aRegs, ProfileBuffer* aBuffer)
|
||||
{
|
||||
// WARNING: this function runs within the profiler's "critical section".
|
||||
|
||||
DoSharedSample(aLock, /* isSynchronous = */ true, aThreadInfo, aNow, aRegs,
|
||||
/* lastSample = */ nullptr, aBuffer);
|
||||
}
|
||||
|
||||
// Writes the components of a periodic sample to ActivePS's ProfileBuffer.
|
||||
static void
|
||||
DoPeriodicSample(PSLockRef aLock, ThreadInfo& aThreadInfo,
|
||||
const TimeStamp& aNow, const Registers& aRegs,
|
||||
int64_t aRSSMemory, int64_t aUSSMemory)
|
||||
{
|
||||
// WARNING: this function runs within the profiler's "critical section".
|
||||
|
||||
ProfileBuffer* buffer = ActivePS::Buffer(aLock);
|
||||
|
||||
DoSharedSample(aLock, /* isSynchronous = */ false, aThreadInfo, aNow, aRegs,
|
||||
&aThreadInfo.LastSample(), buffer);
|
||||
|
||||
ProfilerMarkerLinkedList* pendingMarkersList =
|
||||
aThreadInfo.RacyInfo()->GetPendingMarkers();
|
||||
while (pendingMarkersList && pendingMarkersList->peek()) {
|
||||
ProfilerMarker* marker = pendingMarkersList->popHead();
|
||||
buffer->addStoredMarker(marker);
|
||||
buffer->addTag(ProfileBufferEntry::Marker(marker));
|
||||
}
|
||||
|
||||
ThreadResponsiveness* resp = aThreadInfo.GetThreadResponsiveness();
|
||||
if (resp && resp->HasData()) {
|
||||
TimeDuration delta = resp->GetUnresponsiveDuration(aNow);
|
||||
buffer->addTag(ProfileBufferEntry::Responsiveness(delta.ToMilliseconds()));
|
||||
}
|
||||
|
||||
if (aRSSMemory != 0) {
|
||||
double rssMemory = static_cast<double>(aRSSMemory);
|
||||
buffer->addTag(ProfileBufferEntry::ResidentMemory(rssMemory));
|
||||
}
|
||||
|
||||
if (aUSSMemory != 0) {
|
||||
double ussMemory = static_cast<double>(aUSSMemory);
|
||||
buffer->addTag(ProfileBufferEntry::UnsharedMemory(ussMemory));
|
||||
}
|
||||
}
|
||||
|
||||
// END sampling/unwinding code
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
@ -1748,14 +1736,14 @@ public:
|
||||
void Disable(PSLockRef aLock);
|
||||
|
||||
// This method suspends and resumes the samplee thread. It calls the passed-in
|
||||
// function like object aDoSample while the samplee thread is suspended, after
|
||||
// filling in register values in aSample.
|
||||
// function-like object aProcessRegs (passing it a populated |const
|
||||
// Registers&| arg) while the samplee thread is suspended.
|
||||
//
|
||||
// Func must be a function-like object of type `void()`.
|
||||
template<typename Func>
|
||||
void SuspendAndSampleAndResumeThread(PSLockRef aLock,
|
||||
TickSample& aSample,
|
||||
const Func& aDoSample);
|
||||
const ThreadInfo& aThreadInfo,
|
||||
const Func& aProcessRegs);
|
||||
|
||||
private:
|
||||
#if defined(GP_OS_linux) || defined(GP_OS_android)
|
||||
@ -1901,11 +1889,11 @@ SamplerThread::Run()
|
||||
#endif
|
||||
}
|
||||
|
||||
TickSample sample(info, rssMemory, ussMemory);
|
||||
|
||||
SuspendAndSampleAndResumeThread(lock, sample, [&] {
|
||||
Tick(lock, sample, ActivePS::Buffer(lock));
|
||||
});
|
||||
TimeStamp now = TimeStamp::Now();
|
||||
SuspendAndSampleAndResumeThread(lock, *info,
|
||||
[&](const Registers& aRegs) {
|
||||
DoPeriodicSample(lock, *info, now, aRegs, rssMemory, ussMemory);
|
||||
});
|
||||
}
|
||||
|
||||
#if defined(USE_LUL_STACKWALK)
|
||||
@ -2843,20 +2831,22 @@ profiler_get_backtrace()
|
||||
|
||||
Thread::tid_t tid = Thread::GetCurrentId();
|
||||
|
||||
ProfileBuffer* buffer = new ProfileBuffer(PROFILER_GET_BACKTRACE_ENTRIES);
|
||||
TimeStamp now = TimeStamp::Now();
|
||||
|
||||
TickSample sample(info->RacyInfo(), info->mContext, info->GetPlatformData());
|
||||
Registers regs;
|
||||
|
||||
#if defined(HAVE_NATIVE_UNWIND)
|
||||
#if defined(GP_OS_linux) || defined(GP_OS_android)
|
||||
ucontext_t context;
|
||||
sample.PopulateContext(&context);
|
||||
regs.SyncPopulate(&context);
|
||||
#else
|
||||
sample.PopulateContext();
|
||||
regs.SyncPopulate();
|
||||
#endif
|
||||
#endif
|
||||
|
||||
Tick(lock, sample, buffer);
|
||||
ProfileBuffer* buffer = new ProfileBuffer(PROFILER_GET_BACKTRACE_ENTRIES);
|
||||
|
||||
DoSyncSample(lock, *info, now, regs, buffer);
|
||||
|
||||
return UniqueProfilerBacktrace(
|
||||
new ProfilerBacktrace("SyncProfile", tid, buffer));
|
||||
@ -3069,9 +3059,10 @@ profiler_current_thread_id()
|
||||
// is paused. Doing stuff in this function like allocating which may try to
|
||||
// claim locks is a surefire way to deadlock.
|
||||
void
|
||||
profiler_suspend_and_sample_thread(int aThreadId,
|
||||
const std::function<void(void**, size_t)>& aCallback,
|
||||
bool aSampleNative /* = true */)
|
||||
profiler_suspend_and_sample_thread(
|
||||
int aThreadId,
|
||||
const std::function<void(void**, size_t)>& aCallback,
|
||||
bool aSampleNative /* = true */)
|
||||
{
|
||||
// Allocate the space for the native stack
|
||||
NativeStack nativeStack;
|
||||
@ -3086,17 +3077,17 @@ profiler_suspend_and_sample_thread(int aThreadId,
|
||||
if (info->ThreadId() == aThreadId) {
|
||||
// Suspend, sample, and then resume the target thread.
|
||||
Sampler sampler(lock);
|
||||
TickSample sample(info, 0, 0);
|
||||
sampler.SuspendAndSampleAndResumeThread(lock, sample, [&] {
|
||||
// The target thread is now suspended, collect a native backtrace, and
|
||||
// call the callback.
|
||||
sampler.SuspendAndSampleAndResumeThread(lock, *info,
|
||||
[&](const Registers& aRegs) {
|
||||
// The target thread is now suspended. Collect a native backtrace, and
|
||||
// call the callback.
|
||||
#if defined(HAVE_NATIVE_UNWIND)
|
||||
if (aSampleNative) {
|
||||
DoNativeBacktrace(lock, nativeStack, sample);
|
||||
}
|
||||
if (aSampleNative) {
|
||||
DoNativeBacktrace(lock, *info, aRegs, nativeStack);
|
||||
}
|
||||
#endif
|
||||
aCallback(nativeStack.mPCs, nativeStack.mCount);
|
||||
});
|
||||
aCallback(nativeStack.mPCs, nativeStack.mCount);
|
||||
});
|
||||
|
||||
// NOTE: Make sure to disable the sampler before it is destroyed, in case
|
||||
// the profiler is running at the same time.
|
||||
|
@ -10,11 +10,7 @@
|
||||
// generic cross-platform way without requiring custom tools or kernel support.
|
||||
//
|
||||
// Samples are collected to form a timeline with optional timeline event
|
||||
// (markers) used for filtering. Both "periodic" (in response to a timer) and
|
||||
// "synchronous" (in response to an explicit sampling request via the API)
|
||||
// samples are supported.
|
||||
//
|
||||
// The profiler collects samples that include native stacks and
|
||||
// (markers) used for filtering. The samples include both native stacks and
|
||||
// platform-independent "pseudostacks".
|
||||
|
||||
#ifndef GeckoProfiler_h
|
||||
|
Loading…
Reference in New Issue
Block a user