mirror of
https://github.com/capstone-engine/llvm-capstone.git
synced 2025-02-10 11:23:52 +00:00
[llvm-profgen] Filter out invalid LBR ranges.
The profiler can sometimes give us a LBR trace that implicates bogus code ranges. For example, 0xc5acb56/0xc66c6c0 0xc628195/0xf31fbb0 0xc611261/0xc628130 0xc5c1a21/0xc6111c0 0x1f7edfd3/0xc5c3a50 0xc5c154f/0x1f7edec0 0xe8eed07/0xc5c11e0 , note that the first two pairs are supposed to form a linear execution range, in this case, it is [0xf31fbb0, 0xc5acb56] , which doesn't make sense. Such bogus ranges should be ruled out to avoid generating a bad profile. I'm fixing this for both CS and non-CS cases. Reviewed By: wenlei Differential Revision: https://reviews.llvm.org/D123271
This commit is contained in:
parent
208f93c1fd
commit
8a0406dcc8
10
llvm/test/tools/llvm-profgen/Inputs/invalid-range.perfscript
Normal file
10
llvm/test/tools/llvm-profgen/Inputs/invalid-range.perfscript
Normal file
@ -0,0 +1,10 @@
|
||||
PERF_RECORD_MMAP2 1243676/1243676: [0x201000(0x1000) @ 0 00:1d 224517108 1044165]: r-xp /home/noinline-cs-pseudoprobe.perfbin
|
||||
|
||||
20179e
|
||||
2017f9
|
||||
7f83e84e7793
|
||||
5541f689495641d7
|
||||
0x2017cf/0x20179e/P/-/-/0 0x20177f/0x2017c4/P/-/-/0 0x2017bf/0x201760/P/-/-/0 0x2017d8/0x2017e3/P/-/-/0 0x20177f/0x2017c4/P/-/-/0 0x2017bf/0x201760/P/-/-/0 0x2017cf/0x20179e/P/-/-/0
|
||||
|
||||
|
||||
// The consecutive pairs 0x2017bf/0x201760 and 0x2017d8/0x2017e3 form an invalid execution range [0x2017e3, 0x2017bf], should be ignored to avoid bogus instruction ranges.
|
68
llvm/test/tools/llvm-profgen/invalid-range.test
Normal file
68
llvm/test/tools/llvm-profgen/invalid-range.test
Normal file
@ -0,0 +1,68 @@
|
||||
; In the perfscript input, the consecutive branch pairs 0x2017bf/0x201760 and 0x2017d8/0x2017e3 form an invalid execution range [0x2017e3, 0x2017bf].
|
||||
; We are testing only the invalid range is dropped to avoid bogus instruction ranges. All other ranges and all branch samples should be kept.
|
||||
;
|
||||
; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/invalid-range.perfscript --binary=%S/Inputs/noinline-cs-pseudoprobe.perfbin --output=%t1 --skip-symbolization --ignore-stack-samples --use-offset=0
|
||||
; RUN: FileCheck %s --input-file %t1 --check-prefix=NOCS
|
||||
|
||||
; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/invalid-range.perfscript --binary=%S/Inputs/noinline-cs-pseudoprobe.perfbin --output=%t2 --skip-symbolization --use-offset=0
|
||||
; RUN: FileCheck %s --input-file %t2 --check-prefix=CS
|
||||
|
||||
|
||||
; NOCS: 4
|
||||
; NOCS-NEXT: 201760-20177f:2
|
||||
; NOCS-NEXT: 20179e-2017bf:1
|
||||
; NOCS-NEXT: 2017c4-2017cf:1
|
||||
; NOCS-NEXT: 2017c4-2017d8:1
|
||||
; NOCS-NEXT: 4
|
||||
; NOCS-NEXT: 20177f->2017c4:2
|
||||
; NOCS-NEXT: 2017bf->201760:2
|
||||
; NOCS-NEXT: 2017cf->20179e:2
|
||||
; NOCS-NEXT: 2017d8->2017e3:1
|
||||
|
||||
|
||||
; CS: []
|
||||
; CS-NEXT: 3
|
||||
; CS-NEXT: 201760-20177f:1
|
||||
; CS-NEXT: 20179e-2017bf:1
|
||||
; CS-NEXT: 2017c4-2017d8:1
|
||||
; CS-NEXT: 4
|
||||
; CS-NEXT: 20177f->2017c4:1
|
||||
; CS-NEXT: 2017bf->201760:1
|
||||
; CS-NEXT: 2017cf->20179e:1
|
||||
; CS-NEXT: 2017d8->2017e3:1
|
||||
; CS-NEXT: [0x7f4]
|
||||
; CS-NEXT: 1
|
||||
; CS-NEXT: 2017c4-2017cf:1
|
||||
; CS-NEXT: 2
|
||||
; CS-NEXT: 2017bf->201760:1
|
||||
; CS-NEXT: 2017cf->20179e:1
|
||||
; CS-NEXT: [0x7f4 @ 0x7bf]
|
||||
; CS-NEXT: 1
|
||||
; CS-NEXT: 201760-20177f:1
|
||||
; CS-NEXT: 1
|
||||
; CS-NEXT: 20177f->2017c4:1
|
||||
|
||||
; clang -O3 -fexperimental-new-pass-manager -fuse-ld=lld -fpseudo-probe-for-profiling
|
||||
; -fno-omit-frame-pointer -mno-omit-leaf-frame-pointer -Xclang -mdisable-tail-calls
|
||||
; -g test.c -o a.out
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
int bar(int x, int y) {
|
||||
if (x % 3) {
|
||||
return x - y;
|
||||
}
|
||||
return x + y;
|
||||
}
|
||||
|
||||
void foo() {
|
||||
int s, i = 0;
|
||||
while (i++ < 4000 * 4000)
|
||||
if (i % 91) s = bar(i, s); else s += 30;
|
||||
printf("sum is %d\n", s);
|
||||
}
|
||||
|
||||
int main() {
|
||||
foo();
|
||||
return 0;
|
||||
}
|
@ -100,6 +100,11 @@ void VirtualUnwinder::unwindLinear(UnwindState &State, uint64_t Repeat) {
|
||||
InstructionPointer &IP = State.InstPtr;
|
||||
uint64_t Target = State.getCurrentLBRTarget();
|
||||
uint64_t End = IP.Address;
|
||||
if (Target > End) {
|
||||
// Skip unwinding the rest of LBR trace when a bogus range is seen.
|
||||
State.setInvalid();
|
||||
return;
|
||||
}
|
||||
if (Binary->usePseudoProbes()) {
|
||||
// We don't need to top frame probe since it should be extracted
|
||||
// from the range.
|
||||
@ -303,19 +308,28 @@ bool VirtualUnwinder::unwind(const PerfSample *Sample, uint64_t Repeat) {
|
||||
// extra frame when processing the return paired with this call.
|
||||
unwindCall(State);
|
||||
} else if (isReturnState(State)) {
|
||||
// Unwind returns - check whether the IP is indeed at a return instruction
|
||||
// Unwind returns - check whether the IP is indeed at a return
|
||||
// instruction
|
||||
unwindReturn(State);
|
||||
} else {
|
||||
} else if (isValidState(State)) {
|
||||
// Unwind branches
|
||||
// For regular intra function branches, we only need to record branch with
|
||||
// context. For an artificial branch cross function boundaries, we got an
|
||||
// issue with returning to external code. Take the two LBR enties for
|
||||
// example: [foo:8(RETURN), ext:1] [ext:3(CALL), bar:1] After perf reader,
|
||||
// we only get[foo:8(RETURN), bar:1], unwinder will be confused like foo
|
||||
// return to bar. Here we detect and treat this case as BRANCH instead of
|
||||
// RETURN which only update the source address.
|
||||
// For regular intra function branches, we only need to record branch
|
||||
// with context. For an artificial branch cross function boundaries, we
|
||||
// got an issue with returning to external code. Take the two LBR enties
|
||||
// for example: [foo:8(RETURN), ext:1] [ext:3(CALL), bar:1] After perf
|
||||
// reader, we only get[foo:8(RETURN), bar:1], unwinder will be confused
|
||||
// like foo return to bar. Here we detect and treat this case as BRANCH
|
||||
// instead of RETURN which only update the source address.
|
||||
unwindBranch(State);
|
||||
} else {
|
||||
// Skip unwinding the rest of LBR trace. Reset the stack and update the
|
||||
// state so that the rest of the trace can still be processed as if they
|
||||
// do not have stack samples.
|
||||
State.clearCallStack();
|
||||
State.InstPtr.update(State.getCurrentLBRSource());
|
||||
State.pushFrame(State.InstPtr.Address);
|
||||
}
|
||||
|
||||
State.advanceLBR();
|
||||
// Record `branch` with calling context after unwinding.
|
||||
recordBranchCount(Branch, State, Repeat);
|
||||
@ -720,7 +734,9 @@ void HybridPerfReader::parseSample(TraceStream &TraceIt, uint64_t Count) {
|
||||
// ... 0x4005c8/0x4005dc/P/-/-/0 # LBR Entries
|
||||
//
|
||||
std::shared_ptr<PerfSample> Sample = std::make_shared<PerfSample>();
|
||||
|
||||
#ifndef NDEBUG
|
||||
Sample->Linenum = TraceIt.getLineNumber();
|
||||
#endif
|
||||
// Parsing call stack and populate into PerfSample.CallStack
|
||||
if (!extractCallstack(TraceIt, Sample->CallStack)) {
|
||||
// Skip the next LBR line matched current call stack
|
||||
@ -915,8 +931,10 @@ void PerfScriptReader::computeCounterFromLBR(const PerfSample *Sample,
|
||||
// If this not the first LBR, update the range count between TO of current
|
||||
// LBR and FROM of next LBR.
|
||||
uint64_t StartOffset = TargetOffset;
|
||||
if (EndOffeset != 0)
|
||||
Counter.recordRangeCount(StartOffset, EndOffeset, Repeat);
|
||||
if (EndOffeset != 0) {
|
||||
if (StartOffset <= EndOffeset)
|
||||
Counter.recordRangeCount(StartOffset, EndOffeset, Repeat);
|
||||
}
|
||||
EndOffeset = SourceOffset;
|
||||
}
|
||||
}
|
||||
@ -1161,41 +1179,55 @@ void PerfScriptReader::warnInvalidRange() {
|
||||
const char *RangeCrossFuncMsg =
|
||||
"Fall through range should not cross function boundaries, likely due to "
|
||||
"profile and binary mismatch.";
|
||||
const char *BogusRangeMsg = "Range start is after range end.";
|
||||
|
||||
uint64_t TotalRangeNum = 0;
|
||||
uint64_t InstNotBoundary = 0;
|
||||
uint64_t UnmatchedRange = 0;
|
||||
uint64_t RangeCrossFunc = 0;
|
||||
uint64_t BogusRange = 0;
|
||||
|
||||
for (auto &I : Ranges) {
|
||||
uint64_t StartOffset = I.first.first;
|
||||
uint64_t EndOffset = I.first.second;
|
||||
TotalRangeNum += I.second;
|
||||
|
||||
if (!Binary->offsetIsCode(StartOffset) ||
|
||||
!Binary->offsetIsTransfer(EndOffset)) {
|
||||
InstNotBoundary++;
|
||||
InstNotBoundary += I.second;
|
||||
WarnInvalidRange(StartOffset, EndOffset, EndNotBoundaryMsg);
|
||||
}
|
||||
|
||||
auto *FRange = Binary->findFuncRangeForOffset(StartOffset);
|
||||
if (!FRange) {
|
||||
UnmatchedRange++;
|
||||
UnmatchedRange += I.second;
|
||||
WarnInvalidRange(StartOffset, EndOffset, DanglingRangeMsg);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (EndOffset >= FRange->EndOffset) {
|
||||
RangeCrossFunc++;
|
||||
RangeCrossFunc += I.second;
|
||||
WarnInvalidRange(StartOffset, EndOffset, RangeCrossFuncMsg);
|
||||
}
|
||||
|
||||
if (StartOffset > EndOffset) {
|
||||
BogusRange += I.second;
|
||||
WarnInvalidRange(StartOffset, EndOffset, BogusRangeMsg);
|
||||
}
|
||||
}
|
||||
|
||||
uint64_t TotalRangeNum = Ranges.size();
|
||||
emitWarningSummary(InstNotBoundary, TotalRangeNum,
|
||||
"of profiled ranges are not on instruction boundary.");
|
||||
emitWarningSummary(UnmatchedRange, TotalRangeNum,
|
||||
"of profiled ranges do not belong to any functions.");
|
||||
emitWarningSummary(RangeCrossFunc, TotalRangeNum,
|
||||
"of profiled ranges do cross function boundaries.");
|
||||
emitWarningSummary(
|
||||
InstNotBoundary, TotalRangeNum,
|
||||
"of samples are from ranges that are not on instruction boundary.");
|
||||
emitWarningSummary(
|
||||
UnmatchedRange, TotalRangeNum,
|
||||
"of samples are from ranges that do not belong to any functions.");
|
||||
emitWarningSummary(
|
||||
RangeCrossFunc, TotalRangeNum,
|
||||
"of samples are from ranges that do cross function boundaries.");
|
||||
emitWarningSummary(
|
||||
BogusRange, TotalRangeNum,
|
||||
"of samples are from ranges that have range start after range end.");
|
||||
}
|
||||
|
||||
void PerfScriptReader::parsePerfTraces() {
|
||||
|
@ -197,7 +197,10 @@ struct PerfSample {
|
||||
}
|
||||
|
||||
#ifndef NDEBUG
|
||||
uint64_t Linenum = 0;
|
||||
|
||||
void print() const {
|
||||
dbgs() << "Line " << Linenum << "\n";
|
||||
dbgs() << "LBR stack\n";
|
||||
printLBRStack(LBRStack);
|
||||
dbgs() << "Call stack\n";
|
||||
@ -255,6 +258,9 @@ struct UnwindState {
|
||||
const SmallVector<LBREntry, 16> &LBRStack;
|
||||
// Used to iterate the address range
|
||||
InstructionPointer InstPtr;
|
||||
// Indicate whether unwinding is currently in a bad state which requires to
|
||||
// skip all subsequent unwinding.
|
||||
bool Invalid = false;
|
||||
UnwindState(const PerfSample *Sample, const ProfiledBinary *Binary)
|
||||
: Binary(Binary), LBRStack(Sample->LBRStack),
|
||||
InstPtr(Binary, Sample->CallStack.front()) {
|
||||
@ -284,6 +290,7 @@ struct UnwindState {
|
||||
"IP should align with context leaf");
|
||||
}
|
||||
|
||||
void setInvalid() { Invalid = true; }
|
||||
bool hasNextLBR() const { return LBRIndex < LBRStack.size(); }
|
||||
uint64_t getCurrentLBRSource() const { return LBRStack[LBRIndex].Source; }
|
||||
uint64_t getCurrentLBRTarget() const { return LBRStack[LBRIndex].Target; }
|
||||
@ -476,10 +483,14 @@ private:
|
||||
bool isCallState(UnwindState &State) const {
|
||||
// The tail call frame is always missing here in stack sample, we will
|
||||
// use a specific tail call tracker to infer it.
|
||||
return Binary->addressIsCall(State.getCurrentLBRSource());
|
||||
return isValidState(State) &&
|
||||
Binary->addressIsCall(State.getCurrentLBRSource());
|
||||
}
|
||||
|
||||
bool isReturnState(UnwindState &State) const {
|
||||
if (!isValidState(State))
|
||||
return false;
|
||||
|
||||
// Simply check addressIsReturn, as ret is always reliable, both for
|
||||
// regular call and tail call.
|
||||
if (!Binary->addressIsReturn(State.getCurrentLBRSource()))
|
||||
@ -497,6 +508,8 @@ private:
|
||||
return (CallAddr != 0);
|
||||
}
|
||||
|
||||
bool isValidState(UnwindState &State) const { return !State.Invalid; }
|
||||
|
||||
void unwindCall(UnwindState &State);
|
||||
void unwindLinear(UnwindState &State, uint64_t Repeat);
|
||||
void unwindReturn(UnwindState &State);
|
||||
|
Loading…
x
Reference in New Issue
Block a user