mirror of
https://github.com/RPCSX/llvm.git
synced 2024-11-27 13:40:30 +00:00
[PartialInlining] Minor cost anaysis tuning
Also added a test option and 2 cost analysis related tests. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@304599 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
ffc893deb7
commit
118cef0ca3
@ -68,6 +68,10 @@ static cl::opt<int>
|
||||
cl::desc("Relative frequency of outline region to "
|
||||
"the entry block"));
|
||||
|
||||
static cl::opt<unsigned> ExtraOutliningPenalty(
|
||||
"partial-inlining-extra-penalty", cl::init(0), cl::Hidden,
|
||||
cl::desc("A debug option to add additional penalty to the computed one."));
|
||||
|
||||
namespace {
|
||||
|
||||
struct FunctionOutliningInfo {
|
||||
@ -83,7 +87,7 @@ struct FunctionOutliningInfo {
|
||||
SmallVector<BasicBlock *, 4> Entries;
|
||||
// The return block that is not included in the outlined region.
|
||||
BasicBlock *ReturnBlock;
|
||||
// The dominating block of the region ot be outlined.
|
||||
// The dominating block of the region to be outlined.
|
||||
BasicBlock *NonReturnBlock;
|
||||
// The set of blocks in Entries that that are predecessors to ReturnBlock
|
||||
SmallVector<BasicBlock *, 4> ReturnBlockPreds;
|
||||
@ -407,11 +411,23 @@ BranchProbability PartialInlinerImpl::getOutliningCallBBRelativeFreq(
|
||||
if (hasProfileData(F, OI))
|
||||
return OutlineRegionRelFreq;
|
||||
|
||||
// When profile data is not available, we need to be very
|
||||
// conservative in estimating the overall savings. We need to make sure
|
||||
// the outline region relative frequency is not below the threshold
|
||||
// specified by the option.
|
||||
OutlineRegionRelFreq = std::max(OutlineRegionRelFreq, BranchProbability(OutlineRegionFreqPercent, 100));
|
||||
// When profile data is not available, we need to be conservative in
|
||||
// estimating the overall savings. Static branch prediction can usually
|
||||
// guess the branch direction right (taken/non-taken), but the guessed
|
||||
// branch probability is usually not biased enough. In case when the
|
||||
// outlined region is predicted to be likely, its probability needs
|
||||
// to be made higher (more biased) to not under-estimate the cost of
|
||||
// function outlining. On the other hand, if the outlined region
|
||||
// is predicted to be less likely, the predicted probablity is usually
|
||||
// higher than the actual. For instance, the actual probability of the
|
||||
// less likely target is only 5%, but the guessed probablity can be
|
||||
// 40%. In the latter case, there is no need for further adjustement.
|
||||
// FIXME: add an option for this.
|
||||
if (OutlineRegionRelFreq < BranchProbability(45, 100))
|
||||
return OutlineRegionRelFreq;
|
||||
|
||||
OutlineRegionRelFreq = std::max(
|
||||
OutlineRegionRelFreq, BranchProbability(OutlineRegionFreqPercent, 100));
|
||||
|
||||
return OutlineRegionRelFreq;
|
||||
}
|
||||
@ -496,6 +512,26 @@ int PartialInlinerImpl::computeBBInlineCost(BasicBlock *BB) {
|
||||
if (isa<DbgInfoIntrinsic>(I))
|
||||
continue;
|
||||
|
||||
switch (I->getOpcode()) {
|
||||
case Instruction::BitCast:
|
||||
case Instruction::PtrToInt:
|
||||
case Instruction::IntToPtr:
|
||||
case Instruction::Alloca:
|
||||
continue;
|
||||
case Instruction::GetElementPtr:
|
||||
if (cast<GetElementPtrInst>(I)->hasAllZeroIndices())
|
||||
continue;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
IntrinsicInst *IntrInst = dyn_cast<IntrinsicInst>(I);
|
||||
if (IntrInst) {
|
||||
if (IntrInst->getIntrinsicID() == Intrinsic::lifetime_start ||
|
||||
IntrInst->getIntrinsicID() == Intrinsic::lifetime_end)
|
||||
continue;
|
||||
}
|
||||
|
||||
if (CallInst *CI = dyn_cast<CallInst>(I)) {
|
||||
InlineCost += getCallsiteCost(CallSite(CI), DL);
|
||||
continue;
|
||||
@ -519,7 +555,13 @@ std::tuple<int, int, int> PartialInlinerImpl::computeOutliningCosts(
|
||||
Function *F, const FunctionOutliningInfo *OI, Function *OutlinedFunction,
|
||||
BasicBlock *OutliningCallBB) {
|
||||
// First compute the cost of the outlined region 'OI' in the original
|
||||
// function 'F':
|
||||
// function 'F'.
|
||||
// FIXME: The code extractor (outliner) can now do code sinking/hoisting
|
||||
// to reduce outlining cost. The hoisted/sunk code currently do not
|
||||
// incur any runtime cost so it is still OK to compare the outlined
|
||||
// function cost with the outlined region in the original function.
|
||||
// If this ever changes, we will need to introduce new extractor api
|
||||
// to pass the information.
|
||||
int OutlinedRegionCost = 0;
|
||||
for (BasicBlock &BB : *F) {
|
||||
if (&BB != OI->ReturnBlock &&
|
||||
@ -539,11 +581,16 @@ std::tuple<int, int, int> PartialInlinerImpl::computeOutliningCosts(
|
||||
for (BasicBlock &BB : *OutlinedFunction) {
|
||||
OutlinedFunctionCost += computeBBInlineCost(&BB);
|
||||
}
|
||||
// The code extractor introduces a new root and exit stub blocks with
|
||||
// additional unconditional branches. Those branches will be eliminated
|
||||
// later with bb layout. The cost should be adjusted accordingly:
|
||||
OutlinedFunctionCost -= 2 * InlineConstants::InstrCost;
|
||||
|
||||
assert(OutlinedFunctionCost >= OutlinedRegionCost &&
|
||||
"Outlined function cost should be no less than the outlined region");
|
||||
int OutliningRuntimeOverhead =
|
||||
OutliningFuncCallCost + (OutlinedFunctionCost - OutlinedRegionCost);
|
||||
int OutliningRuntimeOverhead = OutliningFuncCallCost +
|
||||
(OutlinedFunctionCost - OutlinedRegionCost) +
|
||||
ExtraOutliningPenalty;
|
||||
|
||||
return std::make_tuple(OutliningFuncCallCost, OutliningRuntimeOverhead,
|
||||
OutlinedRegionCost);
|
||||
|
64
test/Transforms/CodeExtractor/cost.ll
Normal file
64
test/Transforms/CodeExtractor/cost.ll
Normal file
@ -0,0 +1,64 @@
|
||||
; RUN: opt -S < %s -partial-inliner -partial-inlining-extra-penalty=10 | FileCheck %s
|
||||
; RUN: opt -S < %s -passes=partial-inliner -partial-inlining-extra-penalty=10 | FileCheck %s
|
||||
define i32 @outline_region_notlikely(i32* %arg) local_unnamed_addr {
|
||||
bb:
|
||||
; ptr != null is predicted to be true
|
||||
%tmp = icmp ne i32* %arg, null
|
||||
br i1 %tmp, label %bb8, label %bb1
|
||||
|
||||
; bb1 is not likely
|
||||
bb1: ; preds = %bb
|
||||
%tmp2 = tail call i32 @foo(i32* nonnull %arg)
|
||||
%tmp3 = tail call i32 @foo(i32* nonnull %arg)
|
||||
%tmp4 = tail call i32 @foo(i32* nonnull %arg)
|
||||
%tmp5 = tail call i32 @foo(i32* nonnull %arg)
|
||||
%tmp6 = tail call i32 @foo(i32* nonnull %arg)
|
||||
%tmp7 = tail call i32 @foo(i32* nonnull %arg)
|
||||
br label %bb8
|
||||
|
||||
bb8: ; preds = %bb1, %bb
|
||||
%tmp9 = phi i32 [ 0, %bb1 ], [ 1, %bb ]
|
||||
ret i32 %tmp9
|
||||
}
|
||||
|
||||
define i32 @outline_region_likely(i32* %arg) local_unnamed_addr {
|
||||
bb:
|
||||
; ptr == null is predicted to be false
|
||||
%tmp = icmp eq i32* %arg, null
|
||||
br i1 %tmp, label %bb8, label %bb1
|
||||
|
||||
; bb1 is likely
|
||||
bb1: ; preds = %bb
|
||||
%tmp2 = tail call i32 @foo(i32* nonnull %arg)
|
||||
%tmp3 = tail call i32 @foo(i32* nonnull %arg)
|
||||
%tmp4 = tail call i32 @foo(i32* nonnull %arg)
|
||||
%tmp5 = tail call i32 @foo(i32* nonnull %arg)
|
||||
%tmp6 = tail call i32 @foo(i32* nonnull %arg)
|
||||
%tmp7 = tail call i32 @foo(i32* nonnull %arg)
|
||||
br label %bb8
|
||||
|
||||
bb8: ; preds = %bb1, %bb
|
||||
%tmp9 = phi i32 [ 0, %bb1 ], [ 1, %bb ]
|
||||
ret i32 %tmp9
|
||||
}
|
||||
|
||||
declare i32 @foo(i32* %arg)
|
||||
|
||||
define i32 @dummy_caller(i32* %arg) local_unnamed_addr {
|
||||
; CHECK-LABEL: @dummy_caller
|
||||
%tmp = call i32 @outline_region_notlikely(i32* %arg)
|
||||
; CHECK: call void @outline_region_notlikely.2_bb1
|
||||
%tmp2 = tail call i32 @outline_region_likely(i32* %arg)
|
||||
; CHECK: %tmp2 = tail call i32 @outline_region_likely(i32* %arg)
|
||||
ret i32 %tmp
|
||||
|
||||
}
|
||||
|
||||
; CHECK-LABEL: define internal void @outline_region_notlikely.2_bb1(i32* %arg) {
|
||||
; CHECK-NEXT: newFuncRoot:
|
||||
|
||||
!llvm.module.flags = !{!0}
|
||||
!llvm.ident = !{!1}
|
||||
|
||||
!0 = !{i32 1, !"wchar_size", i32 4}
|
||||
!1 = !{!"clang version 5.0.0 (trunk 304489)"}
|
41
test/Transforms/CodeExtractor/cost_meta.ll
Normal file
41
test/Transforms/CodeExtractor/cost_meta.ll
Normal file
@ -0,0 +1,41 @@
|
||||
; RUN: opt -S < %s -partial-inliner -partial-inlining-extra-penalty=2000 | FileCheck %s
|
||||
; RUN: opt -S < %s -passes=partial-inliner -partial-inlining-extra-penalty=2000 | FileCheck %s
|
||||
define i32 @outline_region_notlikely(i32* %arg) local_unnamed_addr {
|
||||
bb:
|
||||
; ptr != null is predicted to be true
|
||||
%tmp = icmp ne i32* %arg, null
|
||||
br i1 %tmp, label %bb8, label %bb1, !prof !2
|
||||
|
||||
; bb1 is not likely
|
||||
bb1: ; preds = %bb
|
||||
%tmp2 = tail call i32 @foo(i32* nonnull %arg)
|
||||
%tmp3 = tail call i32 @foo(i32* nonnull %arg)
|
||||
%tmp4 = tail call i32 @foo(i32* nonnull %arg)
|
||||
%tmp5 = tail call i32 @foo(i32* nonnull %arg)
|
||||
%tmp6 = tail call i32 @foo(i32* nonnull %arg)
|
||||
%tmp7 = tail call i32 @foo(i32* nonnull %arg)
|
||||
br label %bb8
|
||||
|
||||
bb8: ; preds = %bb1, %bb
|
||||
%tmp9 = phi i32 [ 0, %bb1 ], [ 1, %bb ]
|
||||
ret i32 %tmp9
|
||||
}
|
||||
|
||||
define i32 @dummy_caller(i32* %arg) local_unnamed_addr {
|
||||
; CHECK-LABEL: @dummy_caller
|
||||
%tmp = call i32 @outline_region_notlikely(i32* %arg)
|
||||
ret i32 %tmp
|
||||
}
|
||||
|
||||
|
||||
; CHECK-LABEL: define internal void @outline_region_notlikely.1_bb1(i32* %arg) {
|
||||
; CHECK-NEXT: newFuncRoot:
|
||||
|
||||
declare i32 @foo(i32 * %arg)
|
||||
|
||||
!llvm.module.flags = !{!0}
|
||||
!llvm.ident = !{!1}
|
||||
|
||||
!0 = !{i32 1, !"wchar_size", i32 4}
|
||||
!1 = !{!"clang version 5.0.0 (trunk 304489)"}
|
||||
!2 = !{!"branch_weights", i32 2000, i32 1}
|
Loading…
Reference in New Issue
Block a user