Remove return heuristics from the static branch probabilities, and

introduce no-return or unreachable heuristics.

The return heuristics from the Ball and Larus paper don't work well in
practice as they pessimize early return paths. The only good hitrate
return heuristics are those for:
 - NULL return
 - Constant return
 - negative integer return

Only the last of these three can possibly require significant code for
the returning block, and even the last is fairly rare and usually also
a constant. As a consequence, even for the cold return paths, there is
little code on that return path, and so little code density to be gained
by sinking it. The places where sinking these blocks is valuable (inner
loops) will already be weighted appropriately as the edge is a loop-exit
branch.

All of this aside, early returns are nearly as common as all three of
these return categories, and should actually be predicted as taken!
Rather than muddy the waters of the static predictions, just remain
silent on returns and let the CFG itself dictate any layout or other
issues.

However, the return heuristic was flagging one very important case:
unreachable. Unfortunately it still gave a 1/4 chance of the
branch-to-unreachable occuring. It also didn't do a rigorous job of
finding those blocks which post-dominate an unreachable block.

This patch builds a more powerful analysis that should flag all branches
to blocks known to then reach unreachable. It also has better worst-case
runtime complexity by not looping through successors for each block. The
previous code would perform an N^2 walk in the event of a single entry
block branching to N successors with a switch where each successor falls
through to the next and they finally fall through to a return.

Test case added for noreturn heuristics. Also doxygen comments improved
along the way.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142793 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Chandler Carruth 2011-10-24 12:01:08 +00:00
parent aa337b7cd5
commit de1c9bb450
3 changed files with 163 additions and 77 deletions

View File

@ -17,6 +17,7 @@
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/Support/BranchProbability.h"
namespace llvm {
@ -109,11 +110,14 @@ private:
/// \brief Track the last function we run over for printing.
Function *LastF;
/// \brief Track the set of blocks directly succeeded by a returning block.
SmallPtrSet<BasicBlock *, 16> PostDominatedByUnreachable;
/// \brief Get sum of the block successors' weights.
uint32_t getSumForBlock(const BasicBlock *BB) const;
bool calcUnreachableHeuristics(BasicBlock *BB);
bool calcMetadataWeights(BasicBlock *BB);
bool calcReturnHeuristics(BasicBlock *BB);
bool calcPointerHeuristics(BasicBlock *BB);
bool calcLoopBranchHeuristics(BasicBlock *BB);
bool calcZeroHeuristics(BasicBlock *BB);

View File

@ -18,6 +18,7 @@
#include "llvm/Metadata.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/Support/CFG.h"
#include "llvm/Support/Debug.h"
@ -54,8 +55,18 @@ char BranchProbabilityInfo::ID = 0;
static const uint32_t LBH_TAKEN_WEIGHT = 124;
static const uint32_t LBH_NONTAKEN_WEIGHT = 4;
static const uint32_t RH_TAKEN_WEIGHT = 24;
static const uint32_t RH_NONTAKEN_WEIGHT = 8;
/// \brief Unreachable-terminating branch taken weight.
///
/// This is the weight for a branch being taken to a block that terminates
/// (eventually) in unreachable. These are predicted as unlikely as possible.
static const uint32_t UR_TAKEN_WEIGHT = 1;
/// \brief Unreachable-terminating branch not-taken weight.
///
/// This is the weight for a branch not being taken toward a block that
/// terminates (eventually) in unreachable. Such a branch is essentially never
/// taken.
static const uint32_t UR_NONTAKEN_WEIGHT = 1023;
static const uint32_t PH_TAKEN_WEIGHT = 20;
static const uint32_t PH_NONTAKEN_WEIGHT = 12;
@ -73,38 +84,62 @@ static const uint32_t NORMAL_WEIGHT = 16;
// Minimum weight of an edge. Please note, that weight is NEVER 0.
static const uint32_t MIN_WEIGHT = 1;
// Return TRUE if BB leads directly to a Return Instruction.
static bool isReturningBlock(BasicBlock *BB) {
SmallPtrSet<BasicBlock *, 8> Visited;
while (true) {
TerminatorInst *TI = BB->getTerminator();
if (isa<ReturnInst>(TI))
return true;
if (TI->getNumSuccessors() > 1)
break;
// It is unreachable block which we can consider as a return instruction.
if (TI->getNumSuccessors() == 0)
return true;
Visited.insert(BB);
BB = TI->getSuccessor(0);
// Stop if cycle is detected.
if (Visited.count(BB))
return false;
}
return false;
}
static uint32_t getMaxWeightFor(BasicBlock *BB) {
return UINT32_MAX / BB->getTerminator()->getNumSuccessors();
}
/// \brief Calculate edge weights for successors lead to unreachable.
///
/// Predict that a successor which leads necessarily to an
/// unreachable-terminated block as extremely unlikely.
bool BranchProbabilityInfo::calcUnreachableHeuristics(BasicBlock *BB) {
TerminatorInst *TI = BB->getTerminator();
if (TI->getNumSuccessors() == 0) {
if (isa<UnreachableInst>(TI))
PostDominatedByUnreachable.insert(BB);
return false;
}
SmallPtrSet<BasicBlock *, 4> UnreachableEdges;
SmallPtrSet<BasicBlock *, 4> ReachableEdges;
for (succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) {
if (PostDominatedByUnreachable.count(*I))
UnreachableEdges.insert(*I);
else
ReachableEdges.insert(*I);
}
// If all successors are in the set of blocks post-dominated by unreachable,
// this block is too.
if (UnreachableEdges.size() == TI->getNumSuccessors())
PostDominatedByUnreachable.insert(BB);
// Skip probabilities if this block has a single successor or if all were
// reachable.
if (TI->getNumSuccessors() == 1 || UnreachableEdges.empty())
return false;
uint32_t UnreachableWeight =
std::max(UR_TAKEN_WEIGHT / UnreachableEdges.size(), MIN_WEIGHT);
for (SmallPtrSet<BasicBlock *, 4>::iterator I = UnreachableEdges.begin(),
E = UnreachableEdges.end();
I != E; ++I)
setEdgeWeight(BB, *I, UnreachableWeight);
if (ReachableEdges.empty())
return true;
uint32_t ReachableWeight =
std::max(UR_NONTAKEN_WEIGHT / ReachableEdges.size(), NORMAL_WEIGHT);
for (SmallPtrSet<BasicBlock *, 4>::iterator I = ReachableEdges.begin(),
E = ReachableEdges.end();
I != E; ++I)
setEdgeWeight(BB, *I, ReachableWeight);
return true;
}
// Propagate existing explicit probabilities from either profile data or
// 'expect' intrinsic processing.
bool BranchProbabilityInfo::calcMetadataWeights(BasicBlock *BB) {
@ -143,46 +178,6 @@ bool BranchProbabilityInfo::calcMetadataWeights(BasicBlock *BB) {
return true;
}
// Calculate Edge Weights using "Return Heuristics". Predict a successor which
// leads directly to Return Instruction will not be taken.
bool BranchProbabilityInfo::calcReturnHeuristics(BasicBlock *BB){
if (BB->getTerminator()->getNumSuccessors() == 1)
return false;
SmallPtrSet<BasicBlock *, 4> ReturningEdges;
SmallPtrSet<BasicBlock *, 4> StayEdges;
for (succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) {
BasicBlock *Succ = *I;
if (isReturningBlock(Succ))
ReturningEdges.insert(Succ);
else
StayEdges.insert(Succ);
}
if (uint32_t numStayEdges = StayEdges.size()) {
uint32_t stayWeight = RH_TAKEN_WEIGHT / numStayEdges;
if (stayWeight < NORMAL_WEIGHT)
stayWeight = NORMAL_WEIGHT;
for (SmallPtrSet<BasicBlock *, 4>::iterator I = StayEdges.begin(),
E = StayEdges.end(); I != E; ++I)
setEdgeWeight(BB, *I, stayWeight);
}
if (uint32_t numRetEdges = ReturningEdges.size()) {
uint32_t retWeight = RH_NONTAKEN_WEIGHT / numRetEdges;
if (retWeight < MIN_WEIGHT)
retWeight = MIN_WEIGHT;
for (SmallPtrSet<BasicBlock *, 4>::iterator I = ReturningEdges.begin(),
E = ReturningEdges.end(); I != E; ++I) {
setEdgeWeight(BB, *I, retWeight);
}
}
return ReturningEdges.size() > 0;
}
// Calculate Edge Weights using "Pointer Heuristics". Predict a comparsion
// between two pointer or pointer and NULL will fail.
bool BranchProbabilityInfo::calcPointerHeuristics(BasicBlock *BB) {
@ -390,20 +385,28 @@ void BranchProbabilityInfo::getAnalysisUsage(AnalysisUsage &AU) const {
bool BranchProbabilityInfo::runOnFunction(Function &F) {
LastF = &F; // Store the last function we ran on for printing.
LI = &getAnalysis<LoopInfo>();
assert(PostDominatedByUnreachable.empty());
for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) {
if (calcMetadataWeights(I))
// Walk the basic blocks in post-order so that we can build up state about
// the successors of a block iteratively.
for (po_iterator<BasicBlock *> I = po_begin(&F.getEntryBlock()),
E = po_end(&F.getEntryBlock());
I != E; ++I) {
DEBUG(dbgs() << "Computing probabilities for " << I->getName() << "\n");
if (calcUnreachableHeuristics(*I))
continue;
if (calcLoopBranchHeuristics(I))
if (calcMetadataWeights(*I))
continue;
if (calcReturnHeuristics(I))
if (calcLoopBranchHeuristics(*I))
continue;
if (calcPointerHeuristics(I))
if (calcPointerHeuristics(*I))
continue;
if (calcZeroHeuristics(I))
if (calcZeroHeuristics(*I))
continue;
calcFloatingPointHeuristics(I);
calcFloatingPointHeuristics(*I);
}
PostDominatedByUnreachable.clear();
return false;
}

View File

@ -0,0 +1,79 @@
; Test the static branch probability heuristics for no-return functions.
; RUN: opt < %s -analyze -branch-prob | FileCheck %s
declare void @abort() noreturn
define i32 @test1(i32 %a, i32 %b) {
; CHECK: Printing analysis {{.*}} for function 'test1'
entry:
%cond = icmp eq i32 %a, 42
br i1 %cond, label %exit, label %abort
; CHECK: edge entry -> exit probability is 1023 / 1024
; CHECK: edge entry -> abort probability is 1 / 1024
abort:
call void @abort() noreturn
unreachable
exit:
ret i32 %b
}
define i32 @test2(i32 %a, i32 %b) {
; CHECK: Printing analysis {{.*}} for function 'test2'
entry:
switch i32 %a, label %exit [i32 1, label %case_a
i32 2, label %case_b
i32 3, label %case_c
i32 4, label %case_d]
; CHECK: edge entry -> exit probability is 1023 / 1027
; CHECK: edge entry -> case_a probability is 1 / 1027
; CHECK: edge entry -> case_b probability is 1 / 1027
; CHECK: edge entry -> case_c probability is 1 / 1027
; CHECK: edge entry -> case_d probability is 1 / 1027
case_a:
br label %case_b
case_b:
br label %case_c
case_c:
br label %case_d
case_d:
call void @abort() noreturn
unreachable
exit:
ret i32 %b
}
define i32 @test3(i32 %a, i32 %b) {
; CHECK: Printing analysis {{.*}} for function 'test3'
; Make sure we unify across multiple conditional branches.
entry:
%cond1 = icmp eq i32 %a, 42
br i1 %cond1, label %exit, label %dom
; CHECK: edge entry -> exit probability is 1023 / 1024
; CHECK: edge entry -> dom probability is 1 / 1024
dom:
%cond2 = icmp ult i32 %a, 42
br i1 %cond2, label %idom1, label %idom2
; CHECK: edge dom -> idom1 probability is 1 / 2
; CHECK: edge dom -> idom2 probability is 1 / 2
idom1:
br label %abort
idom2:
br label %abort
abort:
call void @abort() noreturn
unreachable
exit:
ret i32 %b
}