mirror of
https://github.com/RPCS3/llvm.git
synced 2025-04-09 00:51:41 +00:00
Codegen: MachineBlockPlacement Improve probability layout.
The following pattern was being layed out poorly: A / \ B C / \ / \ D E ? (Doesn't matter) Where A->B is far more likely than A->C, and prob(B->D) = prob(B->E) The current algorithm gives: A,B,C,E (D goes on worklist) It does this even if C has a frequency count of 0. This patch adjusts the layout calculation so that if freq(B->E) >> freq(C->E) then we go ahead and layout E rather than C. Fallthrough half the time is better than fallthrough never, or fallthrough very rarely. The resulting layout is: A,B,E, (C and D are in a worklist) git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@277187 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
02e59638f8
commit
9f1f15e084
@ -631,18 +631,46 @@ bool MachineBlockPlacement::hasBetterLayoutPredecessor(
|
|||||||
// BB->Succ. This is equivalent to looking the CFG backward with backward
|
// BB->Succ. This is equivalent to looking the CFG backward with backward
|
||||||
// edge: Prob(Succ->BB) needs to >= HotProb in order to be selected (without
|
// edge: Prob(Succ->BB) needs to >= HotProb in order to be selected (without
|
||||||
// profile data).
|
// profile data).
|
||||||
|
// --------------------------------------------------------------------------
|
||||||
|
// Case 3: forked diamond
|
||||||
|
// S
|
||||||
|
// / \
|
||||||
|
// / \
|
||||||
|
// BB Pred
|
||||||
|
// | \ / |
|
||||||
|
// | \ / |
|
||||||
|
// | X |
|
||||||
|
// | / \ |
|
||||||
|
// | / \ |
|
||||||
|
// S1 S2
|
||||||
|
//
|
||||||
|
// The current block is BB and edge BB->S1 is now being evaluated.
|
||||||
|
// As above S->BB was already selected because
|
||||||
|
// prob(S->BB) > prob(S->Pred). Assume that prob(BB->S1) >= prob(BB->S2).
|
||||||
|
//
|
||||||
|
// topo-order:
|
||||||
|
//
|
||||||
|
// S-------| ---S
|
||||||
|
// | | | |
|
||||||
|
// ---BB | | BB
|
||||||
|
// | | | |
|
||||||
|
// | Pred----| | S1----
|
||||||
|
// | | | |
|
||||||
|
// --(S1 or S2) ---Pred--
|
||||||
|
//
|
||||||
|
// topo-cost = freq(S->Pred) + freq(BB->S1) + freq(BB->S2)
|
||||||
|
// + min(freq(Pred->S1), freq(Pred->S2))
|
||||||
|
// Non-topo-order cost:
|
||||||
|
// In the worst case, S2 will not get laid out after Pred.
|
||||||
|
// non-topo-cost = 2 * freq(S->Pred) + freq(BB->S2).
|
||||||
|
// To be conservative, we can assume that min(freq(Pred->S1), freq(Pred->S2))
|
||||||
|
// is 0. Then the non topo layout is better when
|
||||||
|
// freq(S->Pred) < freq(BB->S1).
|
||||||
|
// This is exactly what is checked below.
|
||||||
|
// Note there are other shapes that apply (Pred may not be a single block,
|
||||||
|
// but they all fit this general pattern.)
|
||||||
BranchProbability HotProb = getLayoutSuccessorProbThreshold(BB);
|
BranchProbability HotProb = getLayoutSuccessorProbThreshold(BB);
|
||||||
|
|
||||||
// Forward checking. For case 2, SuccProb will be 1.
|
|
||||||
if (SuccProb < HotProb) {
|
|
||||||
DEBUG(dbgs() << " Not a candidate: " << getBlockName(Succ) << " "
|
|
||||||
<< "Respecting topological ordering because "
|
|
||||||
<< "probability is less than prob treshold: "
|
|
||||||
<< SuccProb << "\n");
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Make sure that a hot successor doesn't have a globally more
|
// Make sure that a hot successor doesn't have a globally more
|
||||||
// important predecessor.
|
// important predecessor.
|
||||||
BlockFrequency CandidateEdgeFreq = MBFI->getBlockFreq(BB) * RealSuccProb;
|
BlockFrequency CandidateEdgeFreq = MBFI->getBlockFreq(BB) * RealSuccProb;
|
||||||
@ -653,11 +681,11 @@ bool MachineBlockPlacement::hasBetterLayoutPredecessor(
|
|||||||
(BlockFilter && !BlockFilter->count(Pred)) ||
|
(BlockFilter && !BlockFilter->count(Pred)) ||
|
||||||
BlockToChain[Pred] == &Chain)
|
BlockToChain[Pred] == &Chain)
|
||||||
continue;
|
continue;
|
||||||
// Do backward checking. For case 1, it is actually redundant check. For
|
// Do backward checking.
|
||||||
// case 2 above, we need a backward checking to filter out edges that are
|
// For all cases above, we need a backward checking to filter out edges that
|
||||||
// not 'strongly' biased. With profile data available, the check is mostly
|
// are not 'strongly' biased. With profile data available, the check is
|
||||||
// redundant too (when threshold prob is set at 50%) unless S has more than
|
// mostly redundant for case 2 (when threshold prob is set at 50%) unless S
|
||||||
// two successors.
|
// has more than two successors.
|
||||||
// BB Pred
|
// BB Pred
|
||||||
// \ /
|
// \ /
|
||||||
// Succ
|
// Succ
|
||||||
@ -666,6 +694,8 @@ bool MachineBlockPlacement::hasBetterLayoutPredecessor(
|
|||||||
// i.e. freq(BB->Succ) > freq(BB->Succ) * HotProb + freq(Pred->Succ) *
|
// i.e. freq(BB->Succ) > freq(BB->Succ) * HotProb + freq(Pred->Succ) *
|
||||||
// HotProb
|
// HotProb
|
||||||
// i.e. freq((BB->Succ) * (1 - HotProb) > freq(Pred->Succ) * HotProb
|
// i.e. freq((BB->Succ) * (1 - HotProb) > freq(Pred->Succ) * HotProb
|
||||||
|
// Case 1 is covered too, because the first equation reduces to:
|
||||||
|
// prob(BB->Succ) > HotProb. (freq(Succ) = freq(BB) for a triangle)
|
||||||
BlockFrequency PredEdgeFreq =
|
BlockFrequency PredEdgeFreq =
|
||||||
MBFI->getBlockFreq(Pred) * MBPI->getEdgeProbability(Pred, Succ);
|
MBFI->getBlockFreq(Pred) * MBPI->getEdgeProbability(Pred, Succ);
|
||||||
if (PredEdgeFreq * HotProb >= CandidateEdgeFreq * HotProb.getCompl()) {
|
if (PredEdgeFreq * HotProb >= CandidateEdgeFreq * HotProb.getCompl()) {
|
||||||
|
@ -1283,6 +1283,174 @@ exit:
|
|||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
|
declare void @a()
|
||||||
|
declare void @b()
|
||||||
|
|
||||||
|
define void @test_forked_hot_diamond(i32* %a) {
|
||||||
|
; Test that a hot-branch with probability > 80% followed by a 50/50 branch
|
||||||
|
; will not place the cold predecessor if the probability for the fallthrough
|
||||||
|
; remains above 80%
|
||||||
|
; CHECK-LABEL: test_forked_hot_diamond
|
||||||
|
; CHECK: %entry
|
||||||
|
; CHECK: %then
|
||||||
|
; CHECK: %fork1
|
||||||
|
; CHECK: %else
|
||||||
|
; CHECK: %fork2
|
||||||
|
; CHECK: %exit
|
||||||
|
entry:
|
||||||
|
%gep1 = getelementptr i32, i32* %a, i32 1
|
||||||
|
%val1 = load i32, i32* %gep1
|
||||||
|
%cond1 = icmp ugt i32 %val1, 1
|
||||||
|
br i1 %cond1, label %then, label %else, !prof !5
|
||||||
|
|
||||||
|
then:
|
||||||
|
call void @hot_function()
|
||||||
|
%gep2 = getelementptr i32, i32* %a, i32 2
|
||||||
|
%val2 = load i32, i32* %gep2
|
||||||
|
%cond2 = icmp ugt i32 %val2, 2
|
||||||
|
br i1 %cond2, label %fork1, label %fork2, !prof !8
|
||||||
|
|
||||||
|
else:
|
||||||
|
call void @cold_function()
|
||||||
|
%gep3 = getelementptr i32, i32* %a, i32 3
|
||||||
|
%val3 = load i32, i32* %gep3
|
||||||
|
%cond3 = icmp ugt i32 %val3, 3
|
||||||
|
br i1 %cond3, label %fork1, label %fork2, !prof !8
|
||||||
|
|
||||||
|
fork1:
|
||||||
|
call void @a()
|
||||||
|
br label %exit
|
||||||
|
|
||||||
|
fork2:
|
||||||
|
call void @b()
|
||||||
|
br label %exit
|
||||||
|
|
||||||
|
exit:
|
||||||
|
call void @hot_function()
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define void @test_forked_hot_diamond_gets_cold(i32* %a) {
|
||||||
|
; Test that a hot-branch with probability > 80% followed by a 50/50 branch
|
||||||
|
; will place the cold predecessor if the probability for the fallthrough
|
||||||
|
; falls below 80%
|
||||||
|
; The probability for both branches is 85%. For then2 vs else1
|
||||||
|
; this results in a compounded probability of 83%.
|
||||||
|
; Neither then2->fork1 nor then2->fork2 has a large enough relative
|
||||||
|
; probability to break the CFG.
|
||||||
|
; Relative probs:
|
||||||
|
; then2 -> fork1 vs else1 -> fork1 = 71%
|
||||||
|
; then2 -> fork2 vs else2 -> fork2 = 74%
|
||||||
|
; CHECK-LABEL: test_forked_hot_diamond_gets_cold
|
||||||
|
; CHECK: %entry
|
||||||
|
; CHECK: %then1
|
||||||
|
; CHECK: %then2
|
||||||
|
; CHECK: %else1
|
||||||
|
; CHECK: %fork1
|
||||||
|
; CHECK: %else2
|
||||||
|
; CHECK: %fork2
|
||||||
|
; CHECK: %exit
|
||||||
|
entry:
|
||||||
|
%gep1 = getelementptr i32, i32* %a, i32 1
|
||||||
|
%val1 = load i32, i32* %gep1
|
||||||
|
%cond1 = icmp ugt i32 %val1, 1
|
||||||
|
br i1 %cond1, label %then1, label %else1, !prof !9
|
||||||
|
|
||||||
|
then1:
|
||||||
|
call void @hot_function()
|
||||||
|
%gep2 = getelementptr i32, i32* %a, i32 2
|
||||||
|
%val2 = load i32, i32* %gep2
|
||||||
|
%cond2 = icmp ugt i32 %val2, 2
|
||||||
|
br i1 %cond2, label %then2, label %else2, !prof !9
|
||||||
|
|
||||||
|
else1:
|
||||||
|
call void @cold_function()
|
||||||
|
br label %fork1
|
||||||
|
|
||||||
|
then2:
|
||||||
|
call void @hot_function()
|
||||||
|
%gep3 = getelementptr i32, i32* %a, i32 3
|
||||||
|
%val3 = load i32, i32* %gep2
|
||||||
|
%cond3 = icmp ugt i32 %val2, 3
|
||||||
|
br i1 %cond3, label %fork1, label %fork2, !prof !8
|
||||||
|
|
||||||
|
else2:
|
||||||
|
call void @cold_function()
|
||||||
|
br label %fork2
|
||||||
|
|
||||||
|
fork1:
|
||||||
|
call void @a()
|
||||||
|
br label %exit
|
||||||
|
|
||||||
|
fork2:
|
||||||
|
call void @b()
|
||||||
|
br label %exit
|
||||||
|
|
||||||
|
exit:
|
||||||
|
call void @hot_function()
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define void @test_forked_hot_diamond_stays_hot(i32* %a) {
|
||||||
|
; Test that a hot-branch with probability > 88.88% (1:8) followed by a 50/50
|
||||||
|
; branch will not place the cold predecessor as the probability for the
|
||||||
|
; fallthrough stays above 80%
|
||||||
|
; (1:8) followed by (1:1) is still (1:4)
|
||||||
|
; Here we use 90% probability because two in a row
|
||||||
|
; have a 89 % probability vs the original branch.
|
||||||
|
; CHECK-LABEL: test_forked_hot_diamond_stays_hot
|
||||||
|
; CHECK: %entry
|
||||||
|
; CHECK: %then1
|
||||||
|
; CHECK: %then2
|
||||||
|
; CHECK: %fork1
|
||||||
|
; CHECK: %else1
|
||||||
|
; CHECK: %else2
|
||||||
|
; CHECK: %fork2
|
||||||
|
; CHECK: %exit
|
||||||
|
entry:
|
||||||
|
%gep1 = getelementptr i32, i32* %a, i32 1
|
||||||
|
%val1 = load i32, i32* %gep1
|
||||||
|
%cond1 = icmp ugt i32 %val1, 1
|
||||||
|
br i1 %cond1, label %then1, label %else1, !prof !10
|
||||||
|
|
||||||
|
then1:
|
||||||
|
call void @hot_function()
|
||||||
|
%gep2 = getelementptr i32, i32* %a, i32 2
|
||||||
|
%val2 = load i32, i32* %gep2
|
||||||
|
%cond2 = icmp ugt i32 %val2, 2
|
||||||
|
br i1 %cond2, label %then2, label %else2, !prof !10
|
||||||
|
|
||||||
|
else1:
|
||||||
|
call void @cold_function()
|
||||||
|
br label %fork1
|
||||||
|
|
||||||
|
then2:
|
||||||
|
call void @hot_function()
|
||||||
|
%gep3 = getelementptr i32, i32* %a, i32 3
|
||||||
|
%val3 = load i32, i32* %gep2
|
||||||
|
%cond3 = icmp ugt i32 %val2, 3
|
||||||
|
br i1 %cond3, label %fork1, label %fork2, !prof !8
|
||||||
|
|
||||||
|
else2:
|
||||||
|
call void @cold_function()
|
||||||
|
br label %fork2
|
||||||
|
|
||||||
|
fork1:
|
||||||
|
call void @a()
|
||||||
|
br label %exit
|
||||||
|
|
||||||
|
fork2:
|
||||||
|
call void @b()
|
||||||
|
br label %exit
|
||||||
|
|
||||||
|
exit:
|
||||||
|
call void @hot_function()
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
!5 = !{!"branch_weights", i32 84, i32 16}
|
!5 = !{!"branch_weights", i32 84, i32 16}
|
||||||
!6 = !{!"function_entry_count", i32 10}
|
!6 = !{!"function_entry_count", i32 10}
|
||||||
!7 = !{!"branch_weights", i32 60, i32 40}
|
!7 = !{!"branch_weights", i32 60, i32 40}
|
||||||
|
!8 = !{!"branch_weights", i32 5001, i32 4999}
|
||||||
|
!9 = !{!"branch_weights", i32 85, i32 15}
|
||||||
|
!10 = !{!"branch_weights", i32 90, i32 10}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user