Bug 1373154 (part 3, attempt 2) - Overhaul TickSample and things around it. r=mstange.

Bug 1357829 added a third kind of sample, in addition to the existing
"periodic" and "synchronous" samples. This patch cleans things up around that
change. In particular, it cleans up TickSample, which is a mess of semi-related
things.

The patch does the following.

- It removes everything from TickSample except the register values and renames
  TickSample as Registers. Almost all the removed stuff is available in
  ThreadInfo anyway, and the patch adds a ThreadInfo argument to various
  functions. (Doing it this way wasn't possible until recently because a
  ThreadInfo wasn't available in profiler_get_backtrace() until recently.)

  One non-obvious consequence: in synchronous samples we used to use a value of
  0 for the stackTop. Because synchronous samples now use ThreadInfo directly,
  they are able to use the proper stack top value from ThreadInfo::mStackTop.
  This will presumably only improve the quality of the stack traces.

- It splits Tick() in two and renames the halves DoPeriodicSample() and
  DoSyncSample().

- It reorders arguments in some functions so that ProfileBuffer (the output) is
  always last, and inputs are passed in roughly the order they are obtained.

- It adds a comment at the top of platform.cpp explaining the three kinds of
  sample.

- It renames a couple of other things.

--HG--
extra : rebase_source : 4f1e69c605102354dd56ef7af5ebade201e1d106
This commit is contained in:
Nicholas Nethercote 2017-06-19 09:38:15 +10:00
parent 61516fe3a0
commit d529284168
7 changed files with 213 additions and 228 deletions

View File

@ -40,7 +40,7 @@ public:
// Add to the buffer a sample start (ThreadId) entry for aThreadId. Also,
// record the resulting generation and index in |aLS| if it's non-null.
void addTagThreadId(int aThreadId, LastSample* aLS);
void addTagThreadId(int aThreadId, LastSample* aLS = nullptr);
void StreamSamplesToJSON(SpliceableJSONWriter& aWriter, int aThreadId,
double aSinceTime, JSContext* cx,

View File

@ -358,8 +358,8 @@ private:
INACTIVE_REQUESTED = 3,
} mJSSampling;
// When sampling, this holds the generation number and offset in PS::mBuffer
// of the most recent sample for this thread.
// When sampling, this holds the generation number and offset in
// ActivePS::mBuffer of the most recent sample for this thread.
ProfileBuffer::LastSample mLastSample;
};

View File

@ -76,30 +76,30 @@ Thread::GetCurrentId()
}
static void
FillInSample(TickSample& aSample, ucontext_t* aContext)
FillInRegs(Registers& aRegs, ucontext_t* aContext)
{
aSample.mContext = aContext;
aRegs.mContext = aContext;
mcontext_t& mcontext = aContext->uc_mcontext;
// Extracting the sample from the context is extremely machine dependent.
#if defined(GP_ARCH_x86)
aSample.mPC = reinterpret_cast<Address>(mcontext.gregs[REG_EIP]);
aSample.mSP = reinterpret_cast<Address>(mcontext.gregs[REG_ESP]);
aSample.mFP = reinterpret_cast<Address>(mcontext.gregs[REG_EBP]);
aRegs.mPC = reinterpret_cast<Address>(mcontext.gregs[REG_EIP]);
aRegs.mSP = reinterpret_cast<Address>(mcontext.gregs[REG_ESP]);
aRegs.mFP = reinterpret_cast<Address>(mcontext.gregs[REG_EBP]);
#elif defined(GP_ARCH_amd64)
aSample.mPC = reinterpret_cast<Address>(mcontext.gregs[REG_RIP]);
aSample.mSP = reinterpret_cast<Address>(mcontext.gregs[REG_RSP]);
aSample.mFP = reinterpret_cast<Address>(mcontext.gregs[REG_RBP]);
aRegs.mPC = reinterpret_cast<Address>(mcontext.gregs[REG_RIP]);
aRegs.mSP = reinterpret_cast<Address>(mcontext.gregs[REG_RSP]);
aRegs.mFP = reinterpret_cast<Address>(mcontext.gregs[REG_RBP]);
#elif defined(GP_ARCH_arm)
aSample.mPC = reinterpret_cast<Address>(mcontext.arm_pc);
aSample.mSP = reinterpret_cast<Address>(mcontext.arm_sp);
aSample.mFP = reinterpret_cast<Address>(mcontext.arm_fp);
aSample.mLR = reinterpret_cast<Address>(mcontext.arm_lr);
aRegs.mPC = reinterpret_cast<Address>(mcontext.arm_pc);
aRegs.mSP = reinterpret_cast<Address>(mcontext.arm_sp);
aRegs.mFP = reinterpret_cast<Address>(mcontext.arm_fp);
aRegs.mLR = reinterpret_cast<Address>(mcontext.arm_lr);
#elif defined(GP_ARCH_aarch64)
aSample.mPC = reinterpret_cast<Address>(mcontext.pc);
aSample.mSP = reinterpret_cast<Address>(mcontext.sp);
aSample.mFP = reinterpret_cast<Address>(mcontext.regs[29]);
aSample.mLR = reinterpret_cast<Address>(mcontext.regs[30]);
aRegs.mPC = reinterpret_cast<Address>(mcontext.pc);
aRegs.mSP = reinterpret_cast<Address>(mcontext.sp);
aRegs.mFP = reinterpret_cast<Address>(mcontext.regs[29]);
aRegs.mLR = reinterpret_cast<Address>(mcontext.regs[30]);
#else
# error "bad platform"
#endif
@ -303,8 +303,8 @@ Sampler::Disable(PSLockRef aLock)
template<typename Func>
void
Sampler::SuspendAndSampleAndResumeThread(PSLockRef aLock,
TickSample& aSample,
const Func& aDoSample)
const ThreadInfo& aThreadInfo,
const Func& aProcessRegs)
{
// Only one sampler thread can be sampling at once. So we expect to have
// complete control over |sSigHandlerCoordinator|.
@ -313,7 +313,7 @@ Sampler::SuspendAndSampleAndResumeThread(PSLockRef aLock,
if (mSamplerTid == -1) {
mSamplerTid = gettid();
}
int sampleeTid = aSample.mThreadId;
int sampleeTid = aThreadInfo.ThreadId();
MOZ_RELEASE_ASSERT(sampleeTid != mSamplerTid);
//----------------------------------------------------------------//
@ -357,10 +357,10 @@ Sampler::SuspendAndSampleAndResumeThread(PSLockRef aLock,
// The samplee thread is now frozen and sSigHandlerCoordinator->mUContext is
// valid. We can poke around in it and unwind its stack as we like.
// Extract the current PC and sp.
FillInSample(aSample, &sSigHandlerCoordinator->mUContext);
aDoSample();
// Extract the current register values.
Registers regs;
FillInRegs(regs, &sSigHandlerCoordinator->mUContext);
aProcessRegs(regs);
//----------------------------------------------------------------//
// Resume the target thread.
@ -524,13 +524,12 @@ PlatformInit(PSLockRef aLock)
#endif
void
TickSample::PopulateContext(ucontext_t* aContext)
Registers::SyncPopulate(ucontext_t* aContext)
{
MOZ_ASSERT(mIsSynchronous);
MOZ_ASSERT(aContext);
if (!getcontext(aContext)) {
FillInSample(*this, aContext);
FillInRegs(*this, aContext);
}
}

View File

@ -76,10 +76,11 @@ Sampler::Disable(PSLockRef aLock)
template<typename Func>
void
Sampler::SuspendAndSampleAndResumeThread(PSLockRef aLock,
TickSample& aSample,
const Func& aDoSample)
const ThreadInfo& aThreadInfo,
const Func& aProcessRegs)
{
thread_act_t samplee_thread = aSample.mPlatformData->ProfiledThread();
thread_act_t samplee_thread =
aThreadInfo.GetPlatformData()->ProfiledThread();
//----------------------------------------------------------------//
// Suspend the samplee thread and get its context.
@ -115,11 +116,12 @@ Sampler::SuspendAndSampleAndResumeThread(PSLockRef aLock,
flavor,
reinterpret_cast<natural_t*>(&state),
&count) == KERN_SUCCESS) {
aSample.mPC = reinterpret_cast<Address>(state.REGISTER_FIELD(ip));
aSample.mSP = reinterpret_cast<Address>(state.REGISTER_FIELD(sp));
aSample.mFP = reinterpret_cast<Address>(state.REGISTER_FIELD(bp));
Registers regs;
regs.mPC = reinterpret_cast<Address>(state.REGISTER_FIELD(ip));
regs.mSP = reinterpret_cast<Address>(state.REGISTER_FIELD(sp));
regs.mFP = reinterpret_cast<Address>(state.REGISTER_FIELD(bp));
aDoSample();
aProcessRegs(regs);
}
#undef REGISTER_FIELD
@ -191,10 +193,8 @@ PlatformInit(PSLockRef aLock)
}
void
TickSample::PopulateContext()
Registers::SyncPopulate()
{
MOZ_ASSERT(mIsSynchronous);
asm (
// Compute caller's %rsp by adding to %rbp:
// 8 bytes for previous %rbp, 8 bytes for return address

View File

@ -93,10 +93,10 @@ Sampler::Disable(PSLockRef aLock)
template<typename Func>
void
Sampler::SuspendAndSampleAndResumeThread(PSLockRef aLock,
TickSample& aSample,
const Func& aDoSample)
const ThreadInfo& aThreadInfo,
const Func& aProcessRegs)
{
HANDLE profiled_thread = aSample.mPlatformData->ProfiledThread();
HANDLE profiled_thread = aThreadInfo.GetPlatformData()->ProfiledThread();
if (profiled_thread == nullptr) {
return;
}
@ -138,17 +138,18 @@ Sampler::SuspendAndSampleAndResumeThread(PSLockRef aLock,
// what we do here, or risk deadlock. See the corresponding comment in
// platform-linux-android.cpp for details.
Registers regs;
#if defined(GP_ARCH_amd64)
aSample.mPC = reinterpret_cast<Address>(context.Rip);
aSample.mSP = reinterpret_cast<Address>(context.Rsp);
aSample.mFP = reinterpret_cast<Address>(context.Rbp);
regs.mPC = reinterpret_cast<Address>(context.Rip);
regs.mSP = reinterpret_cast<Address>(context.Rsp);
regs.mFP = reinterpret_cast<Address>(context.Rbp);
#else
aSample.mPC = reinterpret_cast<Address>(context.Eip);
aSample.mSP = reinterpret_cast<Address>(context.Esp);
aSample.mFP = reinterpret_cast<Address>(context.Ebp);
regs.mPC = reinterpret_cast<Address>(context.Eip);
regs.mSP = reinterpret_cast<Address>(context.Esp);
regs.mFP = reinterpret_cast<Address>(context.Ebp);
#endif
aDoSample();
aProcessRegs(regs);
//----------------------------------------------------------------//
// Resume the target thread.
@ -264,10 +265,8 @@ PlatformInit(PSLockRef aLock)
}
void
TickSample::PopulateContext()
Registers::SyncPopulate()
{
MOZ_ASSERT(mIsSynchronous);
CONTEXT context;
RtlCaptureContext(&context);

View File

@ -4,6 +4,28 @@
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
// There are three kinds of samples done by the profiler.
//
// - A "periodic" sample is the most complex kind. It is done in response to a
// timer while the profiler is active. It involves writing a stack trace plus
// a variety of other values (memory measurements, responsiveness
// measurements, markers, etc.) into the main ProfileBuffer. The sampling is
// done from off-thread, and so SuspendAndSampleAndResumeThread() is used to
// get the register values.
//
// - A "synchronous" sample is a simpler kind. It is done in response to an API
// call (profiler_get_backtrace()). It involves writing a stack trace and
// little else into a temporary ProfileBuffer, and wrapping that up in a
// ProfilerBacktrace that can be subsequently used in a marker. The sampling
// is done on-thread, and so Registers::SyncPopulate() is used to get the
// register values.
//
// - A "backtrace" sample is the simplest kind. It is done in response to an
// API call (profiler_suspend_and_sample_thread()). It involves getting a
// stack trace and passing it to a callback function; it does not write to a
// ProfileBuffer. The sampling is done from off-thread, and so uses
// SuspendAndSampleAndResumeThread() to get the register values.
#include <algorithm>
#include <ostream>
#include <fstream>
@ -603,103 +625,41 @@ MOZ_THREAD_LOCAL(PseudoStack*) sPseudoStack;
static const char* const kMainThreadName = "GeckoMain";
////////////////////////////////////////////////////////////////////////
// BEGIN tick/unwinding code
// BEGIN sampling/unwinding code
// TickSample contains all the information needed by Tick(). Some of it is
// pointers to long-lived things, and some of it is sampled just before the
// call to Tick().
class TickSample {
// The registers used for stack unwinding and a few other sampling purposes.
class Registers
{
public:
// This constructor is for periodic samples, i.e. those performed in response
// to a timer firing. Periodic samples are performed off-thread, i.e. the
// SamplerThread samples the thread in question.
TickSample(ThreadInfo* aThreadInfo, int64_t aRSSMemory, int64_t aUSSMemory)
: mIsSynchronous(false)
, mTimeStamp(TimeStamp::Now())
, mThreadId(aThreadInfo->ThreadId())
, mRacyInfo(aThreadInfo->RacyInfo())
, mJSContext(aThreadInfo->mContext)
, mStackTop(aThreadInfo->StackTop())
, mLastSample(&aThreadInfo->LastSample())
, mPlatformData(aThreadInfo->GetPlatformData())
, mResponsiveness(aThreadInfo->GetThreadResponsiveness())
, mRSSMemory(aRSSMemory) // may be zero
, mUSSMemory(aUSSMemory) // may be zero
#if defined(GP_OS_linux) || defined(GP_OS_android)
, mContext(nullptr)
#endif
, mPC(nullptr)
Registers()
: mPC(nullptr)
, mSP(nullptr)
, mFP(nullptr)
, mLR(nullptr)
{}
// This constructor is for synchronous samples, i.e. those performed in
// response to an explicit sampling request via the API. Synchronous samples
// are performed on-thread, i.e. the thread samples itself.
TickSample(NotNull<RacyThreadInfo*> aRacyInfo, JSContext* aJSContext,
PlatformData* aPlatformData)
: mIsSynchronous(true)
, mTimeStamp(TimeStamp::Now())
, mThreadId(Thread::GetCurrentId())
, mRacyInfo(aRacyInfo)
, mJSContext(aJSContext)
, mStackTop(nullptr)
, mLastSample(nullptr)
, mPlatformData(aPlatformData)
, mResponsiveness(nullptr)
, mRSSMemory(0)
, mUSSMemory(0)
#if defined(GP_OS_linux) || defined(GP_OS_android)
, mContext(nullptr)
#endif
, mPC(nullptr)
, mSP(nullptr)
, mFP(nullptr)
, mLR(nullptr)
{}
// Fills in mContext, mPC, mSP, mFP, and mLR for a synchronous sample.
#if defined(GP_OS_linux) || defined(GP_OS_android)
void PopulateContext(ucontext_t* aContext);
void SyncPopulate(ucontext_t* aContext);
#else
void PopulateContext();
void SyncPopulate();
#endif
// False for periodic samples, true for synchronous samples.
const bool mIsSynchronous;
const TimeStamp mTimeStamp;
const int mThreadId;
const NotNull<RacyThreadInfo*> mRacyInfo;
JSContext* const mJSContext;
void* const mStackTop;
ProfileBuffer::LastSample* const mLastSample; // may be null
PlatformData* const mPlatformData;
ThreadResponsiveness* const mResponsiveness; // may be null
const int64_t mRSSMemory; // may be zero
const int64_t mUSSMemory; // may be zero
// The remaining fields are filled in, after construction, by
// SamplerThread::SuspendAndSampleAndResume() for periodic samples, and
// PopulateContext() for synchronous samples. They are filled in separately
// from the other fields in this class because the code that fills them in is
// platform-specific.
#if defined(GP_OS_linux) || defined(GP_OS_android)
ucontext_t* mContext; // The context from the signal handler.
#endif
// These fields are filled in by
// SamplerThread::SuspendAndSampleAndResumeThread() for periodic and
// backtrace samples, and by SyncPopulate() for synchronous samples.
Address mPC; // Instruction pointer.
Address mSP; // Stack pointer.
Address mFP; // Frame pointer.
Address mLR; // ARM link register.
#if defined(GP_OS_linux) || defined(GP_OS_android)
// This contains all the registers, which means it duplicates the four fields
// above. This is ok.
ucontext_t* mContext; // The context from the signal handler.
#endif
};
static void
@ -833,15 +793,16 @@ struct AutoWalkJSStack
};
static void
MergeStacksIntoProfile(PSLockRef aLock, ProfileBuffer* aBuffer,
const TickSample& aSample, NativeStack& aNativeStack)
MergeStacksIntoProfile(PSLockRef aLock, bool aIsSynchronous,
const ThreadInfo& aThreadInfo, const Registers& aRegs,
const NativeStack& aNativeStack, ProfileBuffer* aBuffer)
{
// WARNING: this function runs within the profiler's "critical section".
NotNull<RacyThreadInfo*> racyInfo = aSample.mRacyInfo;
NotNull<RacyThreadInfo*> racyInfo = aThreadInfo.RacyInfo();
js::ProfileEntry* pseudoEntries = racyInfo->entries;
uint32_t pseudoCount = racyInfo->stackSize();
JSContext* context = aSample.mJSContext;
JSContext* context = aThreadInfo.mContext;
// Make a copy of the JS stack into a JSFrame array. This is necessary since,
// like the native stack, the JS stack is iterated youngest-to-oldest and we
@ -852,7 +813,7 @@ MergeStacksIntoProfile(PSLockRef aLock, ProfileBuffer* aBuffer,
// sampled JIT entries inside the JS engine. See note below concerning 'J'
// entries.
uint32_t startBufferGen;
startBufferGen = aSample.mIsSynchronous
startBufferGen = aIsSynchronous
? UINT32_MAX
: aBuffer->mGeneration;
uint32_t jsCount = 0;
@ -865,16 +826,16 @@ MergeStacksIntoProfile(PSLockRef aLock, ProfileBuffer* aBuffer,
if (autoWalkJSStack.walkAllowed) {
JS::ProfilingFrameIterator::RegisterState registerState;
registerState.pc = aSample.mPC;
registerState.sp = aSample.mSP;
registerState.lr = aSample.mLR;
registerState.fp = aSample.mFP;
registerState.pc = aRegs.mPC;
registerState.sp = aRegs.mSP;
registerState.lr = aRegs.mLR;
registerState.fp = aRegs.mFP;
JS::ProfilingFrameIterator jsIter(context, registerState,
startBufferGen);
for (; jsCount < maxFrames && !jsIter.done(); ++jsIter) {
// See note below regarding 'J' entries.
if (aSample.mIsSynchronous || jsIter.isWasm()) {
if (aIsSynchronous || jsIter.isWasm()) {
uint32_t extracted =
jsIter.extractStack(jsFrames, jsCount, maxFrames);
jsCount += extracted;
@ -993,7 +954,7 @@ MergeStacksIntoProfile(PSLockRef aLock, ProfileBuffer* aBuffer,
// JIT code. This means that if we inserted such OptInfoAddr entries into
// the buffer, nsRefreshDriver would now be holding on to a backtrace
// with stale JIT code return addresses.
if (aSample.mIsSynchronous ||
if (aIsSynchronous ||
jsFrame.kind == JS::ProfilingFrameIterator::Frame_Wasm) {
AddDynamicCodeLocationTag(aBuffer, jsFrame.label);
} else {
@ -1023,7 +984,7 @@ MergeStacksIntoProfile(PSLockRef aLock, ProfileBuffer* aBuffer,
//
// Do not do this for synchronous samples, which use their own
// ProfileBuffers instead of the global one in CorePS.
if (!aSample.mIsSynchronous && context) {
if (!aIsSynchronous && context) {
MOZ_ASSERT(aBuffer->mGeneration >= startBufferGen);
uint32_t lapCount = aBuffer->mGeneration - startBufferGen;
JS::UpdateJSContextProfilerSampleBufferGen(context, aBuffer->mGeneration,
@ -1047,8 +1008,8 @@ StackWalkCallback(uint32_t aFrameNumber, void* aPC, void* aSP, void* aClosure)
}
static void
DoNativeBacktrace(PSLockRef aLock, NativeStack& aNativeStack,
const TickSample& aSample)
DoNativeBacktrace(PSLockRef aLock, const ThreadInfo& aThreadInfo,
const Registers& aRegs, NativeStack& aNativeStack)
{
// WARNING: this function runs within the profiler's "critical section".
@ -1056,21 +1017,21 @@ DoNativeBacktrace(PSLockRef aLock, NativeStack& aNativeStack,
// the FramePointerStackWalk() and MozStackWalk() calls below will use 1..N.
// This is a bit weird but it doesn't matter because StackWalkCallback()
// doesn't use the frame number argument.
StackWalkCallback(/* frameNum */ 0, aSample.mPC, aSample.mSP, &aNativeStack);
StackWalkCallback(/* frameNum */ 0, aRegs.mPC, aRegs.mSP, &aNativeStack);
uint32_t maxFrames = uint32_t(MAX_NATIVE_FRAMES - aNativeStack.mCount);
#if defined(GP_OS_darwin) || (defined(GP_PLAT_x86_windows))
void* stackEnd = aSample.mStackTop;
if (aSample.mFP >= aSample.mSP && aSample.mFP <= stackEnd) {
void* stackEnd = aThreadInfo.StackTop();
if (aRegs.mFP >= aRegs.mSP && aRegs.mFP <= stackEnd) {
FramePointerStackWalk(StackWalkCallback, /* skipFrames */ 0, maxFrames,
&aNativeStack, reinterpret_cast<void**>(aSample.mFP),
&aNativeStack, reinterpret_cast<void**>(aRegs.mFP),
stackEnd);
}
#else
// Win64 always omits frame pointers so for it we use the slower
// MozStackWalk().
uintptr_t thread = GetThreadHandle(aSample.mPlatformData);
uintptr_t thread = GetThreadHandle(aThreadInfo.GetPlatformData());
MOZ_ASSERT(thread);
MozStackWalk(StackWalkCallback, /* skipFrames */ 0, maxFrames, &aNativeStack,
thread, /* platformData */ nullptr);
@ -1080,14 +1041,14 @@ DoNativeBacktrace(PSLockRef aLock, NativeStack& aNativeStack,
#ifdef USE_EHABI_STACKWALK
static void
DoNativeBacktrace(PSLockRef aLock, NativeStack& aNativeStack,
const TickSample& aSample)
DoNativeBacktrace(PSLockRef aLock, const ThreadInfo& aThreadInfo,
const Registers& aRegs, NativeStack& aNativeStack)
{
// WARNING: this function runs within the profiler's "critical section".
const mcontext_t* mcontext = &aSample.mContext->uc_mcontext;
const mcontext_t* mcontext = &aRegs.mContext->uc_mcontext;
mcontext_t savedContext;
NotNull<RacyThreadInfo*> racyInfo = aSample.mRacyInfo;
NotNull<RacyThreadInfo*> racyInfo = aThreadInfo.RacyInfo();
// The pseudostack contains an "EnterJIT" frame whenever we enter
// JIT code with profiling enabled; the stack pointer value points
@ -1131,7 +1092,7 @@ DoNativeBacktrace(PSLockRef aLock, NativeStack& aNativeStack,
// Now unwind whatever's left (starting from either the last EnterJIT frame
// or, if no EnterJIT was found, the original registers).
aNativeStack.mCount +=
EHABIStackWalk(*mcontext, aSample.mStackTop,
EHABIStackWalk(*mcontext, aThreadInfo.StackTop(),
aNativeStack.mSPs + aNativeStack.mCount,
aNativeStack.mPCs + aNativeStack.mCount,
MAX_NATIVE_FRAMES - aNativeStack.mCount);
@ -1159,12 +1120,12 @@ ASAN_memcpy(void* aDst, const void* aSrc, size_t aLen)
#endif
static void
DoNativeBacktrace(PSLockRef aLock, NativeStack& aNativeStack,
const TickSample& aSample)
DoNativeBacktrace(PSLockRef aLock, const ThreadInfo& aThreadInfo,
const Registers& aRegs, NativeStack& aNativeStack)
{
// WARNING: this function runs within the profiler's "critical section".
const mcontext_t* mc = &aSample.mContext->uc_mcontext;
const mcontext_t* mc = &aRegs.mContext->uc_mcontext;
lul::UnwindRegs startRegs;
memset(&startRegs, 0, sizeof(startRegs));
@ -1234,7 +1195,7 @@ DoNativeBacktrace(PSLockRef aLock, NativeStack& aNativeStack,
#else
# error "Unknown plat"
#endif
uintptr_t end = reinterpret_cast<uintptr_t>(aSample.mStackTop);
uintptr_t end = reinterpret_cast<uintptr_t>(aThreadInfo.StackTop());
uintptr_t ws = sizeof(void*);
start &= ~(ws-1);
end &= ~(ws-1);
@ -1288,66 +1249,93 @@ DoNativeBacktrace(PSLockRef aLock, NativeStack& aNativeStack,
#endif
void
Tick(PSLockRef aLock, const TickSample& aSample, ProfileBuffer* aBuffer)
// Writes some components shared by periodic and synchronous profiles to
// ActivePS's ProfileBuffer. (This should only be called from DoSyncSample()
// and DoPeriodicSample().)
static inline void
DoSharedSample(PSLockRef aLock, bool aIsSynchronous,
ThreadInfo& aThreadInfo, const TimeStamp& aNow,
const Registers& aRegs, ProfileBuffer::LastSample* aLS,
ProfileBuffer* aBuffer)
{
// WARNING: this function runs within the profiler's "critical section".
MOZ_RELEASE_ASSERT(ActivePS::Exists(aLock));
aBuffer->addTagThreadId(aSample.mThreadId, aSample.mLastSample);
aBuffer->addTagThreadId(aThreadInfo.ThreadId(), aLS);
TimeDuration delta = aSample.mTimeStamp - CorePS::ProcessStartTime();
TimeDuration delta = aNow - CorePS::ProcessStartTime();
aBuffer->addTag(ProfileBufferEntry::Time(delta.ToMilliseconds()));
NativeStack nativeStack;
#if defined(HAVE_NATIVE_UNWIND)
if (ActivePS::FeatureStackWalk(aLock)) {
DoNativeBacktrace(aLock, nativeStack, aSample);
DoNativeBacktrace(aLock, aThreadInfo, aRegs, nativeStack);
MergeStacksIntoProfile(aLock, aBuffer, aSample, nativeStack);
MergeStacksIntoProfile(aLock, aIsSynchronous, aThreadInfo, aRegs,
nativeStack, aBuffer);
} else
#endif
{
MergeStacksIntoProfile(aLock, aBuffer, aSample, nativeStack);
MergeStacksIntoProfile(aLock, aIsSynchronous, aThreadInfo, aRegs,
nativeStack, aBuffer);
if (ActivePS::FeatureLeaf(aLock)) {
aBuffer->addTag(ProfileBufferEntry::NativeLeafAddr((void*)aSample.mPC));
aBuffer->addTag(ProfileBufferEntry::NativeLeafAddr((void*)aRegs.mPC));
}
}
// Don't process the PseudoStack's markers if we're synchronously sampling
// the current thread.
if (!aSample.mIsSynchronous) {
ProfilerMarkerLinkedList* pendingMarkersList =
aSample.mRacyInfo->GetPendingMarkers();
while (pendingMarkersList && pendingMarkersList->peek()) {
ProfilerMarker* marker = pendingMarkersList->popHead();
aBuffer->addStoredMarker(marker);
aBuffer->addTag(ProfileBufferEntry::Marker(marker));
}
}
if (aSample.mResponsiveness && aSample.mResponsiveness->HasData()) {
TimeDuration delta =
aSample.mResponsiveness->GetUnresponsiveDuration(aSample.mTimeStamp);
aBuffer->addTag(ProfileBufferEntry::Responsiveness(delta.ToMilliseconds()));
}
// rssMemory is equal to 0 when we are not recording.
if (aSample.mRSSMemory != 0) {
double rssMemory = static_cast<double>(aSample.mRSSMemory);
aBuffer->addTag(ProfileBufferEntry::ResidentMemory(rssMemory));
}
// ussMemory is equal to 0 when we are not recording.
if (aSample.mUSSMemory != 0) {
double ussMemory = static_cast<double>(aSample.mUSSMemory);
aBuffer->addTag(ProfileBufferEntry::UnsharedMemory(ussMemory));
}
}
// END tick/unwinding code
// Writes the components of a synchronous sample to the given ProfileBuffer.
static void
DoSyncSample(PSLockRef aLock, ThreadInfo& aThreadInfo, const TimeStamp& aNow,
const Registers& aRegs, ProfileBuffer* aBuffer)
{
// WARNING: this function runs within the profiler's "critical section".
DoSharedSample(aLock, /* isSynchronous = */ true, aThreadInfo, aNow, aRegs,
/* lastSample = */ nullptr, aBuffer);
}
// Writes the components of a periodic sample to ActivePS's ProfileBuffer.
static void
DoPeriodicSample(PSLockRef aLock, ThreadInfo& aThreadInfo,
const TimeStamp& aNow, const Registers& aRegs,
int64_t aRSSMemory, int64_t aUSSMemory)
{
// WARNING: this function runs within the profiler's "critical section".
ProfileBuffer* buffer = ActivePS::Buffer(aLock);
DoSharedSample(aLock, /* isSynchronous = */ false, aThreadInfo, aNow, aRegs,
&aThreadInfo.LastSample(), buffer);
ProfilerMarkerLinkedList* pendingMarkersList =
aThreadInfo.RacyInfo()->GetPendingMarkers();
while (pendingMarkersList && pendingMarkersList->peek()) {
ProfilerMarker* marker = pendingMarkersList->popHead();
buffer->addStoredMarker(marker);
buffer->addTag(ProfileBufferEntry::Marker(marker));
}
ThreadResponsiveness* resp = aThreadInfo.GetThreadResponsiveness();
if (resp && resp->HasData()) {
TimeDuration delta = resp->GetUnresponsiveDuration(aNow);
buffer->addTag(ProfileBufferEntry::Responsiveness(delta.ToMilliseconds()));
}
if (aRSSMemory != 0) {
double rssMemory = static_cast<double>(aRSSMemory);
buffer->addTag(ProfileBufferEntry::ResidentMemory(rssMemory));
}
if (aUSSMemory != 0) {
double ussMemory = static_cast<double>(aUSSMemory);
buffer->addTag(ProfileBufferEntry::UnsharedMemory(ussMemory));
}
}
// END sampling/unwinding code
////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////
@ -1748,14 +1736,14 @@ public:
void Disable(PSLockRef aLock);
// This method suspends and resumes the samplee thread. It calls the passed-in
// function like object aDoSample while the samplee thread is suspended, after
// filling in register values in aSample.
// function-like object aProcessRegs (passing it a populated |const
// Registers&| arg) while the samplee thread is suspended.
//
// Func must be a function-like object of type `void()`.
template<typename Func>
void SuspendAndSampleAndResumeThread(PSLockRef aLock,
TickSample& aSample,
const Func& aDoSample);
const ThreadInfo& aThreadInfo,
const Func& aProcessRegs);
private:
#if defined(GP_OS_linux) || defined(GP_OS_android)
@ -1901,11 +1889,11 @@ SamplerThread::Run()
#endif
}
TickSample sample(info, rssMemory, ussMemory);
SuspendAndSampleAndResumeThread(lock, sample, [&] {
Tick(lock, sample, ActivePS::Buffer(lock));
});
TimeStamp now = TimeStamp::Now();
SuspendAndSampleAndResumeThread(lock, *info,
[&](const Registers& aRegs) {
DoPeriodicSample(lock, *info, now, aRegs, rssMemory, ussMemory);
});
}
#if defined(USE_LUL_STACKWALK)
@ -2843,20 +2831,22 @@ profiler_get_backtrace()
Thread::tid_t tid = Thread::GetCurrentId();
ProfileBuffer* buffer = new ProfileBuffer(PROFILER_GET_BACKTRACE_ENTRIES);
TimeStamp now = TimeStamp::Now();
TickSample sample(info->RacyInfo(), info->mContext, info->GetPlatformData());
Registers regs;
#if defined(HAVE_NATIVE_UNWIND)
#if defined(GP_OS_linux) || defined(GP_OS_android)
ucontext_t context;
sample.PopulateContext(&context);
regs.SyncPopulate(&context);
#else
sample.PopulateContext();
regs.SyncPopulate();
#endif
#endif
Tick(lock, sample, buffer);
ProfileBuffer* buffer = new ProfileBuffer(PROFILER_GET_BACKTRACE_ENTRIES);
DoSyncSample(lock, *info, now, regs, buffer);
return UniqueProfilerBacktrace(
new ProfilerBacktrace("SyncProfile", tid, buffer));
@ -3069,9 +3059,10 @@ profiler_current_thread_id()
// is paused. Doing stuff in this function like allocating which may try to
// claim locks is a surefire way to deadlock.
void
profiler_suspend_and_sample_thread(int aThreadId,
const std::function<void(void**, size_t)>& aCallback,
bool aSampleNative /* = true */)
profiler_suspend_and_sample_thread(
int aThreadId,
const std::function<void(void**, size_t)>& aCallback,
bool aSampleNative /* = true */)
{
// Allocate the space for the native stack
NativeStack nativeStack;
@ -3086,17 +3077,17 @@ profiler_suspend_and_sample_thread(int aThreadId,
if (info->ThreadId() == aThreadId) {
// Suspend, sample, and then resume the target thread.
Sampler sampler(lock);
TickSample sample(info, 0, 0);
sampler.SuspendAndSampleAndResumeThread(lock, sample, [&] {
// The target thread is now suspended, collect a native backtrace, and
// call the callback.
sampler.SuspendAndSampleAndResumeThread(lock, *info,
[&](const Registers& aRegs) {
// The target thread is now suspended. Collect a native backtrace, and
// call the callback.
#if defined(HAVE_NATIVE_UNWIND)
if (aSampleNative) {
DoNativeBacktrace(lock, nativeStack, sample);
}
if (aSampleNative) {
DoNativeBacktrace(lock, *info, aRegs, nativeStack);
}
#endif
aCallback(nativeStack.mPCs, nativeStack.mCount);
});
aCallback(nativeStack.mPCs, nativeStack.mCount);
});
// NOTE: Make sure to disable the sampler before it is destroyed, in case
// the profiler is running at the same time.

View File

@ -10,11 +10,7 @@
// generic cross-platform way without requiring custom tools or kernel support.
//
// Samples are collected to form a timeline with optional timeline event
// (markers) used for filtering. Both "periodic" (in response to a timer) and
// "synchronous" (in response to an explicit sampling request via the API)
// samples are supported.
//
// The profiler collects samples that include native stacks and
// (markers) used for filtering. The samples include both native stacks and
// platform-independent "pseudostacks".
#ifndef GeckoProfiler_h