mirror of
https://github.com/RPCSX/llvm.git
synced 2025-02-25 07:31:32 +00:00
Use ProfileSummaryInfo in inline cost analysis.
Instead of directly using MaxFunctionCount and function entry count to determine callee hotness, use the isHotFunction/isColdFunction methods provided by ProfileSummaryInfo. Differential revision: http://reviews.llvm.org/D21045 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@272321 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
8f579ce1a6
commit
f8bdcad7be
@ -23,6 +23,7 @@ class AssumptionCacheTracker;
|
||||
class CallSite;
|
||||
class DataLayout;
|
||||
class Function;
|
||||
class ProfileSummaryInfo;
|
||||
class TargetTransformInfo;
|
||||
|
||||
namespace InlineConstants {
|
||||
@ -111,7 +112,7 @@ public:
|
||||
/// inlining the callsite. It is an expensive, heavyweight call.
|
||||
InlineCost getInlineCost(CallSite CS, int DefaultThreshold,
|
||||
TargetTransformInfo &CalleeTTI,
|
||||
AssumptionCacheTracker *ACT);
|
||||
AssumptionCacheTracker *ACT, ProfileSummaryInfo *PSI);
|
||||
|
||||
/// \brief Get an InlineCost with the callee explicitly specified.
|
||||
/// This allows you to calculate the cost of inlining a function via a
|
||||
@ -120,7 +121,7 @@ InlineCost getInlineCost(CallSite CS, int DefaultThreshold,
|
||||
//
|
||||
InlineCost getInlineCost(CallSite CS, Function *Callee, int DefaultThreshold,
|
||||
TargetTransformInfo &CalleeTTI,
|
||||
AssumptionCacheTracker *ACT);
|
||||
AssumptionCacheTracker *ACT, ProfileSummaryInfo *PSI);
|
||||
|
||||
int computeThresholdFromOptLevels(unsigned OptLevel, unsigned SizeOptLevel);
|
||||
|
||||
|
@ -24,6 +24,7 @@ class AssumptionCacheTracker;
|
||||
class CallSite;
|
||||
class DataLayout;
|
||||
class InlineCost;
|
||||
class ProfileSummaryInfo;
|
||||
template <class PtrType, unsigned SmallSize> class SmallPtrSet;
|
||||
|
||||
/// Inliner - This class contains all of the helper code which is used to
|
||||
@ -85,6 +86,7 @@ private:
|
||||
|
||||
protected:
|
||||
AssumptionCacheTracker *ACT;
|
||||
ProfileSummaryInfo *PSI;
|
||||
};
|
||||
|
||||
} // End llvm namespace
|
||||
|
@ -21,6 +21,7 @@
|
||||
#include "llvm/Analysis/CodeMetrics.h"
|
||||
#include "llvm/Analysis/ConstantFolding.h"
|
||||
#include "llvm/Analysis/InstructionSimplify.h"
|
||||
#include "llvm/Analysis/ProfileSummaryInfo.h"
|
||||
#include "llvm/Analysis/TargetTransformInfo.h"
|
||||
#include "llvm/IR/CallSite.h"
|
||||
#include "llvm/IR/CallingConv.h"
|
||||
@ -77,6 +78,9 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
|
||||
/// The cache of @llvm.assume intrinsics.
|
||||
AssumptionCacheTracker *ACT;
|
||||
|
||||
/// Profile summary information.
|
||||
ProfileSummaryInfo *PSI;
|
||||
|
||||
// The called function.
|
||||
Function &F;
|
||||
|
||||
@ -200,17 +204,19 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
|
||||
|
||||
public:
|
||||
CallAnalyzer(const TargetTransformInfo &TTI, AssumptionCacheTracker *ACT,
|
||||
Function &Callee, int Threshold, CallSite CSArg)
|
||||
: TTI(TTI), ACT(ACT), F(Callee), CandidateCS(CSArg), Threshold(Threshold),
|
||||
Cost(0), IsCallerRecursive(false), IsRecursiveCall(false),
|
||||
ExposesReturnsTwice(false), HasDynamicAlloca(false),
|
||||
ContainsNoDuplicateCall(false), HasReturn(false), HasIndirectBr(false),
|
||||
HasFrameEscape(false), AllocatedSize(0), NumInstructions(0),
|
||||
NumVectorInstructions(0), FiftyPercentVectorBonus(0),
|
||||
TenPercentVectorBonus(0), VectorBonus(0), NumConstantArgs(0),
|
||||
NumConstantOffsetPtrArgs(0), NumAllocaArgs(0), NumConstantPtrCmps(0),
|
||||
NumConstantPtrDiffs(0), NumInstructionsSimplified(0),
|
||||
SROACostSavings(0), SROACostSavingsLost(0) {}
|
||||
ProfileSummaryInfo *PSI, Function &Callee, int Threshold,
|
||||
CallSite CSArg)
|
||||
: TTI(TTI), ACT(ACT), PSI(PSI), F(Callee), CandidateCS(CSArg),
|
||||
Threshold(Threshold), Cost(0), IsCallerRecursive(false),
|
||||
IsRecursiveCall(false), ExposesReturnsTwice(false),
|
||||
HasDynamicAlloca(false), ContainsNoDuplicateCall(false),
|
||||
HasReturn(false), HasIndirectBr(false), HasFrameEscape(false),
|
||||
AllocatedSize(0), NumInstructions(0), NumVectorInstructions(0),
|
||||
FiftyPercentVectorBonus(0), TenPercentVectorBonus(0), VectorBonus(0),
|
||||
NumConstantArgs(0), NumConstantOffsetPtrArgs(0), NumAllocaArgs(0),
|
||||
NumConstantPtrCmps(0), NumConstantPtrDiffs(0),
|
||||
NumInstructionsSimplified(0), SROACostSavings(0),
|
||||
SROACostSavingsLost(0) {}
|
||||
|
||||
bool analyzeCall(CallSite CS);
|
||||
|
||||
@ -626,35 +632,15 @@ void CallAnalyzer::updateThreshold(CallSite CS, Function &Callee) {
|
||||
Threshold = OptSizeThreshold;
|
||||
}
|
||||
|
||||
// If profile information is available, use that to adjust threshold of hot
|
||||
// and cold functions.
|
||||
// FIXME: The heuristic used below for determining hotness and coldness are
|
||||
// based on preliminary SPEC tuning and may not be optimal. Replace this with
|
||||
// a well-tuned heuristic based on *callsite* hotness and not callee hotness.
|
||||
uint64_t FunctionCount = 0, MaxFunctionCount = 0;
|
||||
bool HasPGOCounts = false;
|
||||
if (Callee.getEntryCount() && Callee.getParent()->getMaximumFunctionCount()) {
|
||||
HasPGOCounts = true;
|
||||
FunctionCount = Callee.getEntryCount().getValue();
|
||||
MaxFunctionCount = Callee.getParent()->getMaximumFunctionCount().getValue();
|
||||
}
|
||||
|
||||
// Listen to the inlinehint attribute or profile based hotness information
|
||||
// when it would increase the threshold and the caller does not need to
|
||||
// minimize its size.
|
||||
bool InlineHint =
|
||||
Callee.hasFnAttribute(Attribute::InlineHint) ||
|
||||
(HasPGOCounts &&
|
||||
FunctionCount >= (uint64_t)(0.3 * (double)MaxFunctionCount));
|
||||
bool InlineHint = Callee.hasFnAttribute(Attribute::InlineHint) ||
|
||||
PSI->isHotFunction(&Callee);
|
||||
if (InlineHint && HintThreshold > Threshold && !Caller->optForMinSize())
|
||||
Threshold = HintThreshold;
|
||||
|
||||
// Listen to the cold attribute or profile based coldness information
|
||||
// when it would decrease the threshold.
|
||||
bool ColdCallee =
|
||||
Callee.hasFnAttribute(Attribute::Cold) ||
|
||||
(HasPGOCounts &&
|
||||
FunctionCount <= (uint64_t)(0.01 * (double)MaxFunctionCount));
|
||||
bool ColdCallee = PSI->isColdFunction(&Callee);
|
||||
// Command line argument for DefaultInlineThreshold will override the default
|
||||
// ColdThreshold. If we have -inline-threshold but no -inlinecold-threshold,
|
||||
// do not use the default cold threshold even if it is smaller.
|
||||
@ -963,7 +949,8 @@ bool CallAnalyzer::visitCallSite(CallSite CS) {
|
||||
// during devirtualization and so we want to give it a hefty bonus for
|
||||
// inlining, but cap that bonus in the event that inlining wouldn't pan
|
||||
// out. Pretend to inline the function, with a custom threshold.
|
||||
CallAnalyzer CA(TTI, ACT, *F, InlineConstants::IndirectCallThreshold, CS);
|
||||
CallAnalyzer CA(TTI, ACT, PSI, *F, InlineConstants::IndirectCallThreshold,
|
||||
CS);
|
||||
if (CA.analyzeCall(CS)) {
|
||||
// We were able to inline the indirect call! Subtract the cost from the
|
||||
// threshold to get the bonus we want to apply, but don't go below zero.
|
||||
@ -1451,9 +1438,10 @@ static bool functionsHaveCompatibleAttributes(Function *Caller,
|
||||
|
||||
InlineCost llvm::getInlineCost(CallSite CS, int DefaultThreshold,
|
||||
TargetTransformInfo &CalleeTTI,
|
||||
AssumptionCacheTracker *ACT) {
|
||||
AssumptionCacheTracker *ACT,
|
||||
ProfileSummaryInfo *PSI) {
|
||||
return getInlineCost(CS, CS.getCalledFunction(), DefaultThreshold, CalleeTTI,
|
||||
ACT);
|
||||
ACT, PSI);
|
||||
}
|
||||
|
||||
int llvm::computeThresholdFromOptLevels(unsigned OptLevel,
|
||||
@ -1472,7 +1460,8 @@ int llvm::getDefaultInlineThreshold() { return DefaultInlineThreshold; }
|
||||
InlineCost llvm::getInlineCost(CallSite CS, Function *Callee,
|
||||
int DefaultThreshold,
|
||||
TargetTransformInfo &CalleeTTI,
|
||||
AssumptionCacheTracker *ACT) {
|
||||
AssumptionCacheTracker *ACT,
|
||||
ProfileSummaryInfo *PSI) {
|
||||
|
||||
// Cannot inline indirect calls.
|
||||
if (!Callee)
|
||||
@ -1506,7 +1495,7 @@ InlineCost llvm::getInlineCost(CallSite CS, Function *Callee,
|
||||
DEBUG(llvm::dbgs() << " Analyzing call of " << Callee->getName()
|
||||
<< "...\n");
|
||||
|
||||
CallAnalyzer CA(CalleeTTI, ACT, *Callee, DefaultThreshold, CS);
|
||||
CallAnalyzer CA(CalleeTTI, ACT, PSI, *Callee, DefaultThreshold, CS);
|
||||
bool ShouldInline = CA.analyzeCall(CS);
|
||||
|
||||
DEBUG(CA.dump());
|
||||
|
@ -17,6 +17,7 @@
|
||||
#include "llvm/Analysis/AssumptionCache.h"
|
||||
#include "llvm/Analysis/CallGraph.h"
|
||||
#include "llvm/Analysis/InlineCost.h"
|
||||
#include "llvm/Analysis/ProfileSummaryInfo.h"
|
||||
#include "llvm/Analysis/TargetLibraryInfo.h"
|
||||
#include "llvm/IR/CallSite.h"
|
||||
#include "llvm/IR/CallingConv.h"
|
||||
@ -65,6 +66,7 @@ INITIALIZE_PASS_BEGIN(AlwaysInliner, "always-inline",
|
||||
"Inliner for always_inline functions", false, false)
|
||||
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
|
||||
INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
|
||||
INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
|
||||
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
|
||||
INITIALIZE_PASS_END(AlwaysInliner, "always-inline",
|
||||
"Inliner for always_inline functions", false, false)
|
||||
|
@ -14,6 +14,7 @@
|
||||
#include "llvm/Analysis/AssumptionCache.h"
|
||||
#include "llvm/Analysis/CallGraph.h"
|
||||
#include "llvm/Analysis/InlineCost.h"
|
||||
#include "llvm/Analysis/ProfileSummaryInfo.h"
|
||||
#include "llvm/Analysis/TargetLibraryInfo.h"
|
||||
#include "llvm/Analysis/TargetTransformInfo.h"
|
||||
#include "llvm/IR/CallSite.h"
|
||||
@ -60,7 +61,7 @@ public:
|
||||
InlineCost getInlineCost(CallSite CS) override {
|
||||
Function *Callee = CS.getCalledFunction();
|
||||
TargetTransformInfo &TTI = TTIWP->getTTI(*Callee);
|
||||
return llvm::getInlineCost(CS, DefaultThreshold, TTI, ACT);
|
||||
return llvm::getInlineCost(CS, DefaultThreshold, TTI, ACT, PSI);
|
||||
}
|
||||
|
||||
bool runOnSCC(CallGraphSCC &SCC) override;
|
||||
@ -77,6 +78,7 @@ INITIALIZE_PASS_BEGIN(SimpleInliner, "inline",
|
||||
"Function Integration/Inlining", false, false)
|
||||
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
|
||||
INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
|
||||
INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
|
||||
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
|
||||
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
|
||||
INITIALIZE_PASS_END(SimpleInliner, "inline",
|
||||
|
@ -20,6 +20,7 @@
|
||||
#include "llvm/Analysis/BasicAliasAnalysis.h"
|
||||
#include "llvm/Analysis/CallGraph.h"
|
||||
#include "llvm/Analysis/InlineCost.h"
|
||||
#include "llvm/Analysis/ProfileSummaryInfo.h"
|
||||
#include "llvm/Analysis/TargetLibraryInfo.h"
|
||||
#include "llvm/IR/CallSite.h"
|
||||
#include "llvm/IR/DataLayout.h"
|
||||
@ -56,6 +57,7 @@ Inliner::Inliner(char &ID, bool InsertLifetime)
|
||||
/// always explicitly call the implementation here.
|
||||
void Inliner::getAnalysisUsage(AnalysisUsage &AU) const {
|
||||
AU.addRequired<AssumptionCacheTracker>();
|
||||
AU.addRequired<ProfileSummaryInfoWrapperPass>();
|
||||
AU.addRequired<TargetLibraryInfoWrapperPass>();
|
||||
getAAResultsAnalysisUsage(AU);
|
||||
CallGraphSCCPass::getAnalysisUsage(AU);
|
||||
@ -374,6 +376,7 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) {
|
||||
bool Inliner::inlineCalls(CallGraphSCC &SCC) {
|
||||
CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
|
||||
ACT = &getAnalysis<AssumptionCacheTracker>();
|
||||
PSI = getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(CG.getModule());
|
||||
auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
|
||||
|
||||
SmallPtrSet<Function*, 8> SCCFunctions;
|
||||
|
@ -5,7 +5,7 @@
|
||||
; A callee with identical body does gets inlined because cost fits within the
|
||||
; inline-threshold
|
||||
|
||||
define i32 @callee1(i32 %x) !prof !1 {
|
||||
define i32 @callee1(i32 %x) !prof !21 {
|
||||
%x1 = add i32 %x, 1
|
||||
%x2 = add i32 %x1, 1
|
||||
%x3 = add i32 %x2, 1
|
||||
@ -13,7 +13,7 @@ define i32 @callee1(i32 %x) !prof !1 {
|
||||
ret i32 %x3
|
||||
}
|
||||
|
||||
define i32 @callee2(i32 %x) !prof !2 {
|
||||
define i32 @callee2(i32 %x) !prof !22 {
|
||||
; CHECK-LABEL: @callee2(
|
||||
%x1 = add i32 %x, 1
|
||||
%x2 = add i32 %x1, 1
|
||||
@ -22,7 +22,7 @@ define i32 @callee2(i32 %x) !prof !2 {
|
||||
ret i32 %x3
|
||||
}
|
||||
|
||||
define i32 @caller2(i32 %y1) !prof !2 {
|
||||
define i32 @caller2(i32 %y1) !prof !22 {
|
||||
; CHECK-LABEL: @caller2(
|
||||
; CHECK: call i32 @callee2
|
||||
; CHECK-NOT: call i32 @callee1
|
||||
@ -32,8 +32,21 @@ define i32 @caller2(i32 %y1) !prof !2 {
|
||||
ret i32 %y3
|
||||
}
|
||||
|
||||
!llvm.module.flags = !{!0}
|
||||
!0 = !{i32 1, !"MaxFunctionCount", i32 1000}
|
||||
!1 = !{!"function_entry_count", i64 100}
|
||||
!2 = !{!"function_entry_count", i64 1}
|
||||
!llvm.module.flags = !{!1}
|
||||
!21 = !{!"function_entry_count", i64 100}
|
||||
!22 = !{!"function_entry_count", i64 1}
|
||||
|
||||
!1 = !{i32 1, !"ProfileSummary", !2}
|
||||
!2 = !{!3, !4, !5, !6, !7, !8, !9, !10}
|
||||
!3 = !{!"ProfileFormat", !"InstrProf"}
|
||||
!4 = !{!"TotalCount", i64 10000}
|
||||
!5 = !{!"MaxCount", i64 1000}
|
||||
!6 = !{!"MaxInternalCount", i64 1}
|
||||
!7 = !{!"MaxFunctionCount", i64 1000}
|
||||
!8 = !{!"NumCounts", i64 3}
|
||||
!9 = !{!"NumFunctions", i64 3}
|
||||
!10 = !{!"DetailedSummary", !11}
|
||||
!11 = !{!12, !13, !14}
|
||||
!12 = !{i32 10000, i64 100, i32 1}
|
||||
!13 = !{i32 999000, i64 100, i32 1}
|
||||
!14 = !{i32 999999, i64 1, i32 2}
|
||||
|
@ -5,7 +5,7 @@
|
||||
; A cold callee with identical body does not get inlined because cost exceeds the
|
||||
; inline-threshold
|
||||
|
||||
define i32 @callee1(i32 %x) !prof !1 {
|
||||
define i32 @callee1(i32 %x) !prof !21 {
|
||||
%x1 = add i32 %x, 1
|
||||
%x2 = add i32 %x1, 1
|
||||
%x3 = add i32 %x2, 1
|
||||
@ -13,7 +13,7 @@ define i32 @callee1(i32 %x) !prof !1 {
|
||||
ret i32 %x3
|
||||
}
|
||||
|
||||
define i32 @callee2(i32 %x) !prof !2 {
|
||||
define i32 @callee2(i32 %x) !prof !22 {
|
||||
; CHECK-LABEL: @callee2(
|
||||
%x1 = add i32 %x, 1
|
||||
%x2 = add i32 %x1, 1
|
||||
@ -22,7 +22,7 @@ define i32 @callee2(i32 %x) !prof !2 {
|
||||
ret i32 %x3
|
||||
}
|
||||
|
||||
define i32 @caller2(i32 %y1) !prof !2 {
|
||||
define i32 @caller2(i32 %y1) !prof !22 {
|
||||
; CHECK-LABEL: @caller2(
|
||||
; CHECK: call i32 @callee2
|
||||
; CHECK-NOT: call i32 @callee1
|
||||
@ -32,8 +32,21 @@ define i32 @caller2(i32 %y1) !prof !2 {
|
||||
ret i32 %y3
|
||||
}
|
||||
|
||||
!llvm.module.flags = !{!0}
|
||||
!0 = !{i32 1, !"MaxFunctionCount", i32 10}
|
||||
!1 = !{!"function_entry_count", i64 10}
|
||||
!2 = !{!"function_entry_count", i64 1}
|
||||
!llvm.module.flags = !{!1}
|
||||
!21 = !{!"function_entry_count", i64 300}
|
||||
!22 = !{!"function_entry_count", i64 1}
|
||||
|
||||
!1 = !{i32 1, !"ProfileSummary", !2}
|
||||
!2 = !{!3, !4, !5, !6, !7, !8, !9, !10}
|
||||
!3 = !{!"ProfileFormat", !"InstrProf"}
|
||||
!4 = !{!"TotalCount", i64 10000}
|
||||
!5 = !{!"MaxCount", i64 1000}
|
||||
!6 = !{!"MaxInternalCount", i64 1}
|
||||
!7 = !{!"MaxFunctionCount", i64 1000}
|
||||
!8 = !{!"NumCounts", i64 3}
|
||||
!9 = !{!"NumFunctions", i64 3}
|
||||
!10 = !{!"DetailedSummary", !11}
|
||||
!11 = !{!12, !13, !14}
|
||||
!12 = !{i32 10000, i64 100, i32 1}
|
||||
!13 = !{i32 999000, i64 100, i32 1}
|
||||
!14 = !{i32 999999, i64 1, i32 2}
|
||||
|
Loading…
x
Reference in New Issue
Block a user