mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 01:03:52 +00:00
Add a new function attribute 'cold' to functions.
Other than recognizing the attribute, the patch does little else. It changes the branch probability analyzer so that edges into blocks postdominated by a cold function are given low weight. Added analysis and code generation tests. Added documentation for the new attribute. llvm-svn: 182638
This commit is contained in:
parent
fcb0899e18
commit
d1f091f169
@ -812,6 +812,11 @@ example:
|
||||
This attribute indicates that the inliner should attempt to inline
|
||||
this function into callers whenever possible, ignoring any active
|
||||
inlining size threshold for this caller.
|
||||
``cold``
|
||||
This attribute indicates that this function is rarely called. When
|
||||
computing edge weights, basic blocks post-dominated by a cold
|
||||
function call are also considered to be cold; and, thus, given low
|
||||
weight.
|
||||
``nonlazybind``
|
||||
This attribute suppresses lazy symbol binding for the function. This
|
||||
may make calls to the function faster, at the cost of extra program
|
||||
|
@ -166,6 +166,7 @@ typedef enum {
|
||||
and the path forward agreed upon.
|
||||
LLVMAddressSafety = 1ULL << 32,
|
||||
LLVMStackProtectStrongAttribute = 1ULL<<33
|
||||
LLVMCold = 1ULL << 34
|
||||
*/
|
||||
} LLVMAttribute;
|
||||
|
||||
|
@ -131,11 +131,15 @@ private:
|
||||
/// \brief Track the set of blocks directly succeeded by a returning block.
|
||||
SmallPtrSet<BasicBlock *, 16> PostDominatedByUnreachable;
|
||||
|
||||
/// \brief Track the set of blocks that always lead to a cold call.
|
||||
SmallPtrSet<BasicBlock *, 16> PostDominatedByColdCall;
|
||||
|
||||
/// \brief Get sum of the block successors' weights.
|
||||
uint32_t getSumForBlock(const BasicBlock *BB) const;
|
||||
|
||||
bool calcUnreachableHeuristics(BasicBlock *BB);
|
||||
bool calcMetadataWeights(BasicBlock *BB);
|
||||
bool calcColdCallHeuristics(BasicBlock *BB);
|
||||
bool calcPointerHeuristics(BasicBlock *BB);
|
||||
bool calcLoopBranchHeuristics(BasicBlock *BB);
|
||||
bool calcZeroHeuristics(BasicBlock *BB);
|
||||
|
@ -68,6 +68,7 @@ public:
|
||||
///< 0 means unaligned (different from align(1))
|
||||
AlwaysInline, ///< inline=always
|
||||
ByVal, ///< Pass structure by value
|
||||
Cold, ///< Marks function as being in a cold path.
|
||||
InlineHint, ///< Source said inlining was desirable
|
||||
InReg, ///< Force argument to be passed in register
|
||||
MinSize, ///< Function must be optimized for size first
|
||||
|
@ -69,6 +69,20 @@ static const uint32_t UR_TAKEN_WEIGHT = 1;
|
||||
/// easily subsume it.
|
||||
static const uint32_t UR_NONTAKEN_WEIGHT = 1024*1024 - 1;
|
||||
|
||||
/// \brief Weight for a branch taken going into a cold block.
|
||||
///
|
||||
/// This is the weight for a branch taken toward a block marked
|
||||
/// cold. A block is marked cold if it's postdominated by a
|
||||
/// block containing a call to a cold function. Cold functions
|
||||
/// are those marked with attribute 'cold'.
|
||||
static const uint32_t CC_TAKEN_WEIGHT = 4;
|
||||
|
||||
/// \brief Weight for a branch not-taken into a cold block.
|
||||
///
|
||||
/// This is the weight for a branch not taken toward a block marked
|
||||
/// cold.
|
||||
static const uint32_t CC_NONTAKEN_WEIGHT = 64;
|
||||
|
||||
static const uint32_t PH_TAKEN_WEIGHT = 20;
|
||||
static const uint32_t PH_NONTAKEN_WEIGHT = 12;
|
||||
|
||||
@ -193,6 +207,69 @@ bool BranchProbabilityInfo::calcMetadataWeights(BasicBlock *BB) {
|
||||
return true;
|
||||
}
|
||||
|
||||
/// \brief Calculate edge weights for edges leading to cold blocks.
|
||||
///
|
||||
/// A cold block is one post-dominated by a block with a call to a
|
||||
/// cold function. Those edges are unlikely to be taken, so we give
|
||||
/// them relatively low weight.
|
||||
///
|
||||
/// Return true if we could compute the weights for cold edges.
|
||||
/// Return false, otherwise.
|
||||
bool BranchProbabilityInfo::calcColdCallHeuristics(BasicBlock *BB) {
|
||||
TerminatorInst *TI = BB->getTerminator();
|
||||
if (TI->getNumSuccessors() == 0)
|
||||
return false;
|
||||
|
||||
// Determine which successors are post-dominated by a cold block.
|
||||
SmallVector<unsigned, 4> ColdEdges;
|
||||
ColdEdges.reserve(TI->getNumSuccessors());
|
||||
SmallVector<unsigned, 4> NormalEdges;
|
||||
NormalEdges.reserve(TI->getNumSuccessors());
|
||||
for (succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I)
|
||||
if (PostDominatedByColdCall.count(*I))
|
||||
ColdEdges.push_back(I.getSuccessorIndex());
|
||||
else
|
||||
NormalEdges.push_back(I.getSuccessorIndex());
|
||||
|
||||
// If all successors are in the set of blocks post-dominated by cold calls,
|
||||
// this block is in the set post-dominated by cold calls.
|
||||
if (ColdEdges.size() == TI->getNumSuccessors())
|
||||
PostDominatedByColdCall.insert(BB);
|
||||
else {
|
||||
// Otherwise, if the block itself contains a cold function, add it to the
|
||||
// set of blocks postdominated by a cold call.
|
||||
assert(!PostDominatedByColdCall.count(BB));
|
||||
for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
|
||||
if (CallInst *CI = dyn_cast<CallInst>(I))
|
||||
if (CI->hasFnAttr(Attribute::Cold)) {
|
||||
PostDominatedByColdCall.insert(BB);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Skip probabilities if this block has a single successor.
|
||||
if (TI->getNumSuccessors() == 1 || ColdEdges.empty())
|
||||
return false;
|
||||
|
||||
uint32_t ColdWeight =
|
||||
std::max(CC_TAKEN_WEIGHT / (unsigned) ColdEdges.size(), MIN_WEIGHT);
|
||||
for (SmallVector<unsigned, 4>::iterator I = ColdEdges.begin(),
|
||||
E = ColdEdges.end();
|
||||
I != E; ++I)
|
||||
setEdgeWeight(BB, *I, ColdWeight);
|
||||
|
||||
if (NormalEdges.empty())
|
||||
return true;
|
||||
uint32_t NormalWeight = std::max(
|
||||
CC_NONTAKEN_WEIGHT / (unsigned) NormalEdges.size(), NORMAL_WEIGHT);
|
||||
for (SmallVector<unsigned, 4>::iterator I = NormalEdges.begin(),
|
||||
E = NormalEdges.end();
|
||||
I != E; ++I)
|
||||
setEdgeWeight(BB, *I, NormalWeight);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// Calculate Edge Weights using "Pointer Heuristics". Predict a comparsion
|
||||
// between two pointer or pointer and NULL will fail.
|
||||
bool BranchProbabilityInfo::calcPointerHeuristics(BasicBlock *BB) {
|
||||
@ -397,6 +474,7 @@ bool BranchProbabilityInfo::runOnFunction(Function &F) {
|
||||
LastF = &F; // Store the last function we ran on for printing.
|
||||
LI = &getAnalysis<LoopInfo>();
|
||||
assert(PostDominatedByUnreachable.empty());
|
||||
assert(PostDominatedByColdCall.empty());
|
||||
|
||||
// Walk the basic blocks in post-order so that we can build up state about
|
||||
// the successors of a block iteratively.
|
||||
@ -408,6 +486,8 @@ bool BranchProbabilityInfo::runOnFunction(Function &F) {
|
||||
continue;
|
||||
if (calcMetadataWeights(*I))
|
||||
continue;
|
||||
if (calcColdCallHeuristics(*I))
|
||||
continue;
|
||||
if (calcLoopBranchHeuristics(*I))
|
||||
continue;
|
||||
if (calcPointerHeuristics(*I))
|
||||
@ -420,6 +500,7 @@ bool BranchProbabilityInfo::runOnFunction(Function &F) {
|
||||
}
|
||||
|
||||
PostDominatedByUnreachable.clear();
|
||||
PostDominatedByColdCall.clear();
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -564,6 +564,7 @@ lltok::Kind LLLexer::LexIdentifier() {
|
||||
|
||||
KEYWORD(alwaysinline);
|
||||
KEYWORD(byval);
|
||||
KEYWORD(cold);
|
||||
KEYWORD(inlinehint);
|
||||
KEYWORD(inreg);
|
||||
KEYWORD(minsize);
|
||||
|
@ -909,6 +909,7 @@ bool LLParser::ParseFnAttributeValuePairs(AttrBuilder &B,
|
||||
continue;
|
||||
}
|
||||
case lltok::kw_alwaysinline: B.addAttribute(Attribute::AlwaysInline); break;
|
||||
case lltok::kw_cold: B.addAttribute(Attribute::Cold); break;
|
||||
case lltok::kw_inlinehint: B.addAttribute(Attribute::InlineHint); break;
|
||||
case lltok::kw_minsize: B.addAttribute(Attribute::MinSize); break;
|
||||
case lltok::kw_naked: B.addAttribute(Attribute::Naked); break;
|
||||
@ -1222,6 +1223,7 @@ bool LLParser::ParseOptionalReturnAttrs(AttrBuilder &B) {
|
||||
|
||||
case lltok::kw_alignstack:
|
||||
case lltok::kw_alwaysinline:
|
||||
case lltok::kw_cold:
|
||||
case lltok::kw_inlinehint:
|
||||
case lltok::kw_minsize:
|
||||
case lltok::kw_naked:
|
||||
|
@ -96,6 +96,7 @@ namespace lltok {
|
||||
kw_alwaysinline,
|
||||
kw_sanitize_address,
|
||||
kw_byval,
|
||||
kw_cold,
|
||||
kw_inlinehint,
|
||||
kw_inreg,
|
||||
kw_minsize,
|
||||
|
@ -217,6 +217,8 @@ std::string Attribute::getAsString(bool InAttrGrp) const {
|
||||
return "uwtable";
|
||||
if (hasAttribute(Attribute::ZExt))
|
||||
return "zeroext";
|
||||
if (hasAttribute(Attribute::Cold))
|
||||
return "cold";
|
||||
|
||||
// FIXME: These should be output like this:
|
||||
//
|
||||
@ -396,6 +398,7 @@ uint64_t AttributeImpl::getAttrMask(Attribute::AttrKind Val) {
|
||||
case Attribute::SanitizeMemory: return 1ULL << 37;
|
||||
case Attribute::NoBuiltin: return 1ULL << 38;
|
||||
case Attribute::Returned: return 1ULL << 39;
|
||||
case Attribute::Cold: return 1ULL << 40;
|
||||
}
|
||||
llvm_unreachable("Unsupported attribute type");
|
||||
}
|
||||
|
@ -692,7 +692,8 @@ void Verifier::VerifyAttributeTypes(AttributeSet Attrs, unsigned Idx,
|
||||
I->getKindAsEnum() == Attribute::SanitizeMemory ||
|
||||
I->getKindAsEnum() == Attribute::MinSize ||
|
||||
I->getKindAsEnum() == Attribute::NoDuplicate ||
|
||||
I->getKindAsEnum() == Attribute::NoBuiltin) {
|
||||
I->getKindAsEnum() == Attribute::NoBuiltin ||
|
||||
I->getKindAsEnum() == Attribute::Cold) {
|
||||
if (!isFunction)
|
||||
CheckFailed("Attribute '" + I->getKindAsString() +
|
||||
"' only applies to functions!", V);
|
||||
|
@ -115,3 +115,61 @@ return:
|
||||
}
|
||||
|
||||
!2 = metadata !{metadata !"branch_weights", i32 7, i32 6, i32 4, i32 4, i32 64}
|
||||
|
||||
declare void @coldfunc() cold
|
||||
|
||||
define i32 @test5(i32 %a, i32 %b, i1 %flag) {
|
||||
; CHECK: Printing analysis {{.*}} for function 'test5'
|
||||
entry:
|
||||
br i1 %flag, label %then, label %else
|
||||
; CHECK: edge entry -> then probability is 4 / 68
|
||||
; CHECK: edge entry -> else probability is 64 / 68
|
||||
|
||||
then:
|
||||
call void @coldfunc()
|
||||
br label %exit
|
||||
; CHECK: edge then -> exit probability is 16 / 16 = 100%
|
||||
|
||||
else:
|
||||
br label %exit
|
||||
; CHECK: edge else -> exit probability is 16 / 16 = 100%
|
||||
|
||||
exit:
|
||||
%result = phi i32 [ %a, %then ], [ %b, %else ]
|
||||
ret i32 %result
|
||||
}
|
||||
|
||||
declare i32 @regular_function(i32 %i)
|
||||
|
||||
define i32 @test_cold_call_sites(i32* %a) {
|
||||
; Test that edges to blocks post-dominated by cold call sites
|
||||
; are marked as not expected to be taken.
|
||||
; TODO(dnovillo) The calls to regular_function should not be merged, but
|
||||
; they are currently being merged. Convert this into a code generation test
|
||||
; after that is fixed.
|
||||
|
||||
; CHECK: Printing analysis {{.*}} for function 'test_cold_call_sites'
|
||||
; CHECK: edge entry -> then probability is 4 / 68 = 5.88235%
|
||||
; CHECK: edge entry -> else probability is 64 / 68 = 94.1176% [HOT edge]
|
||||
|
||||
entry:
|
||||
%gep1 = getelementptr i32* %a, i32 1
|
||||
%val1 = load i32* %gep1
|
||||
%cond1 = icmp ugt i32 %val1, 1
|
||||
br i1 %cond1, label %then, label %else
|
||||
|
||||
then:
|
||||
; This function is not declared cold, but this call site is.
|
||||
%val4 = call i32 @regular_function(i32 %val1) cold
|
||||
br label %exit
|
||||
|
||||
else:
|
||||
%gep2 = getelementptr i32* %a, i32 2
|
||||
%val2 = load i32* %gep2
|
||||
%val3 = call i32 @regular_function(i32 %val2)
|
||||
br label %exit
|
||||
|
||||
exit:
|
||||
%ret = phi i32 [ %val4, %then ], [ %val3, %else ]
|
||||
ret i32 %ret
|
||||
}
|
||||
|
@ -1089,3 +1089,35 @@ while.end:
|
||||
store double %rra.0, double* %arrayidx34, align 8
|
||||
br label %for.cond
|
||||
}
|
||||
|
||||
declare void @cold_function() cold
|
||||
|
||||
define i32 @test_cold_calls(i32* %a) {
|
||||
; Test that edges to blocks post-dominated by cold calls are
|
||||
; marked as not expected to be taken. They should be laid out
|
||||
; at the bottom.
|
||||
; CHECK: test_cold_calls:
|
||||
; CHECK: %entry
|
||||
; CHECK: %else
|
||||
; CHECK: %exit
|
||||
; CHECK: %then
|
||||
|
||||
entry:
|
||||
%gep1 = getelementptr i32* %a, i32 1
|
||||
%val1 = load i32* %gep1
|
||||
%cond1 = icmp ugt i32 %val1, 1
|
||||
br i1 %cond1, label %then, label %else
|
||||
|
||||
then:
|
||||
call void @cold_function()
|
||||
br label %exit
|
||||
|
||||
else:
|
||||
%gep2 = getelementptr i32* %a, i32 2
|
||||
%val2 = load i32* %gep2
|
||||
br label %exit
|
||||
|
||||
exit:
|
||||
%ret = phi i32 [ %val1, %then ], [ %val2, %else ]
|
||||
ret i32 %ret
|
||||
}
|
||||
|
9
test/Feature/cold.ll
Normal file
9
test/Feature/cold.ll
Normal file
@ -0,0 +1,9 @@
|
||||
; RUN: llvm-as < %s | llvm-dis | FileCheck %s
|
||||
|
||||
; CHECK: @fun() #0
|
||||
define void @fun() #0 {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: attributes #0 = { cold }
|
||||
attributes #0 = { cold }
|
Loading…
x
Reference in New Issue
Block a user