mirror of
https://github.com/mozilla/gecko-dev.git
synced 2024-10-19 08:15:31 +00:00
Bug 1357777
- LUL on x86_64-linux: recover frames by following the frame pointer chain. r=froydnj,jandem.
On x86_64-Linux, LUL currently can only unwind frames for which CFI unwind data is available. This causes a noticeable number of junk samples in the profiler, characterised by failures at transition points between JIT and native frame sequences. This patch allows LUL to try recovering the previous frame using frame pointer chasing in the case where CFI isn't present. This allows LUL to unwind through or jump over interleaved JIT frames, because, respectively: * The baseline JIT produces frame-pointerised code. * If the profiler is enabled, IonMonkey doesn't produce frame-pointerised code, but also doesn't change the frame pointer register value. It can use the frame pointer if profiling is disabled, but that's irrelevant here. The patch also adds counts of FP-recovered frames to LUL's statistics printing, to make it possible to assess how often this feature is used. --HG-- extra : rebase_source : eadc54393788693b0e3f8d5129d48aaaad143a0b
This commit is contained in:
parent
1ba9dbe74f
commit
62e5c659b4
@ -1033,10 +1033,10 @@ DoNativeBacktrace(PS::LockRef aLock, ProfileBuffer* aBuffer,
|
||||
uintptr_t frameSPs[MAX_NATIVE_FRAMES];
|
||||
size_t framesAvail = mozilla::ArrayLength(framePCs);
|
||||
size_t framesUsed = 0;
|
||||
size_t scannedFramesAcquired = 0;
|
||||
size_t scannedFramesAcquired = 0, framePointerFramesAcquired = 0;
|
||||
lul::LUL* lul = gPS->LUL(aLock);
|
||||
lul->Unwind(&framePCs[0], &frameSPs[0],
|
||||
&framesUsed, &scannedFramesAcquired,
|
||||
&framesUsed, &framePointerFramesAcquired, &scannedFramesAcquired,
|
||||
framesAvail, scannedFramesAllowed,
|
||||
&startRegs, &stackImg);
|
||||
|
||||
@ -1052,7 +1052,9 @@ DoNativeBacktrace(PS::LockRef aLock, ProfileBuffer* aBuffer,
|
||||
// Update stats in the LUL stats object. Unfortunately this requires
|
||||
// three global memory operations.
|
||||
lul->mStats.mContext += 1;
|
||||
lul->mStats.mCFI += framesUsed - 1 - scannedFramesAcquired;
|
||||
lul->mStats.mCFI += framesUsed - 1 - framePointerFramesAcquired -
|
||||
scannedFramesAcquired;
|
||||
lul->mStats.mFP += framePointerFramesAcquired;
|
||||
lul->mStats.mScanned += scannedFramesAcquired;
|
||||
}
|
||||
|
||||
|
@ -896,13 +896,14 @@ LUL::MaybeShowStats()
|
||||
if (n_new >= 5000) {
|
||||
uint32_t n_new_Context = mStats.mContext - mStatsPrevious.mContext;
|
||||
uint32_t n_new_CFI = mStats.mCFI - mStatsPrevious.mCFI;
|
||||
uint32_t n_new_FP = mStats.mFP - mStatsPrevious.mFP;
|
||||
uint32_t n_new_Scanned = mStats.mScanned - mStatsPrevious.mScanned;
|
||||
mStatsPrevious = mStats;
|
||||
char buf[200];
|
||||
SprintfLiteral(buf,
|
||||
"LUL frame stats: TOTAL %5u"
|
||||
" CTX %4u CFI %4u SCAN %4u",
|
||||
n_new, n_new_Context, n_new_CFI, n_new_Scanned);
|
||||
" CTX %4u CFI %4u FP %4u SCAN %4u",
|
||||
n_new, n_new_Context, n_new_CFI, n_new_FP, n_new_Scanned);
|
||||
buf[sizeof(buf)-1] = 0;
|
||||
mLog(buf);
|
||||
}
|
||||
@ -1346,6 +1347,7 @@ void
|
||||
LUL::Unwind(/*OUT*/uintptr_t* aFramePCs,
|
||||
/*OUT*/uintptr_t* aFrameSPs,
|
||||
/*OUT*/size_t* aFramesUsed,
|
||||
/*OUT*/size_t* aFramePointerFramesAcquired,
|
||||
/*OUT*/size_t* aScannedFramesAcquired,
|
||||
size_t aFramesAvail,
|
||||
size_t aScannedFramesAllowed,
|
||||
@ -1545,8 +1547,62 @@ LUL::Unwind(/*OUT*/uintptr_t* aFramePCs,
|
||||
|
||||
} else {
|
||||
|
||||
// There's no RuleSet for the specified address, so see if
|
||||
// it's possible to get anywhere by stack-scanning.
|
||||
// There's no RuleSet for the specified address. On amd64_linux, see if
|
||||
// it's possible to recover the caller's frame by using the frame pointer.
|
||||
// This would probably work for the 32-bit case too, but hasn't been
|
||||
// tested for that case.
|
||||
|
||||
#if defined(GP_PLAT_amd64_linux)
|
||||
// We seek to compute (new_IP, new_SP, new_BP) from (old_BP, stack image),
|
||||
// and assume the following layout:
|
||||
//
|
||||
// <--- new_SP
|
||||
// +----------+
|
||||
// | new_IP | (return address)
|
||||
// +----------+
|
||||
// | new_BP | <--- old_BP
|
||||
// +----------+
|
||||
// | .... |
|
||||
// | .... |
|
||||
// | .... |
|
||||
// +----------+ <---- old_SP (arbitrary, but must be <= old_BP)
|
||||
|
||||
const size_t wordSzB = sizeof(uintptr_t);
|
||||
TaggedUWord old_xsp = regs.xsp;
|
||||
|
||||
// points at new_BP ?
|
||||
TaggedUWord old_xbp = regs.xbp;
|
||||
// points at new_IP ?
|
||||
TaggedUWord old_xbp_plus1 = regs.xbp + TaggedUWord(1 * wordSzB);
|
||||
// is the new_SP ?
|
||||
TaggedUWord old_xbp_plus2 = regs.xbp + TaggedUWord(2 * wordSzB);
|
||||
|
||||
if (old_xbp.Valid() && old_xbp.IsAligned() &&
|
||||
old_xsp.Valid() && old_xsp.IsAligned() &&
|
||||
old_xsp.Value() <= old_xbp.Value()) {
|
||||
// We don't need to do any range, alignment or validity checks for
|
||||
// addresses passed to DerefTUW, since that performs them itself, and
|
||||
// returns an invalid value on failure. Any such value will poison
|
||||
// subsequent uses, and we do a final check for validity before putting
|
||||
// the computed values into |regs|.
|
||||
TaggedUWord new_xbp = DerefTUW(old_xbp, aStackImg);
|
||||
if (new_xbp.Valid() && new_xbp.IsAligned() &&
|
||||
old_xbp.Value() < new_xbp.Value()) {
|
||||
TaggedUWord new_xip = DerefTUW(old_xbp_plus1, aStackImg);
|
||||
TaggedUWord new_xsp = old_xbp_plus2;
|
||||
if (new_xbp.Valid() && new_xip.Valid() && new_xsp.Valid()) {
|
||||
regs.xbp = new_xbp;
|
||||
regs.xip = new_xip;
|
||||
regs.xsp = new_xsp;
|
||||
(*aFramePointerFramesAcquired)++;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
// As a last-ditch resort, see if it's possible to get anywhere by
|
||||
// stack-scanning.
|
||||
|
||||
// Use stack scanning frugally.
|
||||
if (n_scanned_frames++ >= aScannedFramesAllowed) {
|
||||
@ -1768,9 +1824,10 @@ bool GetAndCheckStackTrace(LUL* aLUL, const char* dstring)
|
||||
size_t framesAvail = mozilla::ArrayLength(framePCs);
|
||||
size_t framesUsed = 0;
|
||||
size_t scannedFramesAllowed = 0;
|
||||
size_t scannedFramesAcquired = 0;
|
||||
size_t scannedFramesAcquired = 0, framePointerFramesAcquired = 0;
|
||||
aLUL->Unwind( &framePCs[0], &frameSPs[0],
|
||||
&framesUsed, &scannedFramesAcquired,
|
||||
&framesUsed,
|
||||
&framePointerFramesAcquired, &scannedFramesAcquired,
|
||||
framesAvail, scannedFramesAllowed,
|
||||
&startRegs, stackImg );
|
||||
|
||||
|
@ -183,6 +183,7 @@ public:
|
||||
LULStats()
|
||||
: mContext(0)
|
||||
, mCFI(0)
|
||||
, mFP(0)
|
||||
, mScanned(0)
|
||||
{}
|
||||
|
||||
@ -190,6 +191,7 @@ public:
|
||||
explicit LULStats(const LULStats<S>& aOther)
|
||||
: mContext(aOther.mContext)
|
||||
, mCFI(aOther.mCFI)
|
||||
, mFP(aOther.mFP)
|
||||
, mScanned(aOther.mScanned)
|
||||
{}
|
||||
|
||||
@ -198,6 +200,7 @@ public:
|
||||
{
|
||||
mContext = aOther.mContext;
|
||||
mCFI = aOther.mCFI;
|
||||
mFP = aOther.mFP;
|
||||
mScanned = aOther.mScanned;
|
||||
return *this;
|
||||
}
|
||||
@ -205,11 +208,13 @@ public:
|
||||
template <typename S>
|
||||
uint32_t operator-(const LULStats<S>& aOther) {
|
||||
return (mContext - aOther.mContext) +
|
||||
(mCFI - aOther.mCFI) + (mScanned - aOther.mScanned);
|
||||
(mCFI - aOther.mCFI) + (mFP - aOther.mFP) +
|
||||
(mScanned - aOther.mScanned);
|
||||
}
|
||||
|
||||
T mContext; // Number of context frames
|
||||
T mCFI; // Number of CFI/EXIDX frames
|
||||
T mFP; // Number of frame-pointer recovered frames
|
||||
T mScanned; // Number of scanned frames
|
||||
};
|
||||
|
||||
@ -338,6 +343,7 @@ public:
|
||||
void Unwind(/*OUT*/uintptr_t* aFramePCs,
|
||||
/*OUT*/uintptr_t* aFrameSPs,
|
||||
/*OUT*/size_t* aFramesUsed,
|
||||
/*OUT*/size_t* aFramePointerFramesAcquired,
|
||||
/*OUT*/size_t* aScannedFramesAcquired,
|
||||
size_t aFramesAvail,
|
||||
size_t aScannedFramesAllowed,
|
||||
|
Loading…
Reference in New Issue
Block a user