[PartialInlining] Minor cost anaysis tuning

Also added a test option and 2 cost analysis related tests.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@304599 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Xinliang David Li 2017-06-02 22:08:04 +00:00
parent ffc893deb7
commit 118cef0ca3
3 changed files with 161 additions and 9 deletions

View File

@ -68,6 +68,10 @@ static cl::opt<int>
cl::desc("Relative frequency of outline region to "
"the entry block"));
static cl::opt<unsigned> ExtraOutliningPenalty(
"partial-inlining-extra-penalty", cl::init(0), cl::Hidden,
cl::desc("A debug option to add additional penalty to the computed one."));
namespace {
struct FunctionOutliningInfo {
@ -83,7 +87,7 @@ struct FunctionOutliningInfo {
SmallVector<BasicBlock *, 4> Entries;
// The return block that is not included in the outlined region.
BasicBlock *ReturnBlock;
// The dominating block of the region ot be outlined.
// The dominating block of the region to be outlined.
BasicBlock *NonReturnBlock;
// The set of blocks in Entries that that are predecessors to ReturnBlock
SmallVector<BasicBlock *, 4> ReturnBlockPreds;
@ -407,11 +411,23 @@ BranchProbability PartialInlinerImpl::getOutliningCallBBRelativeFreq(
if (hasProfileData(F, OI))
return OutlineRegionRelFreq;
// When profile data is not available, we need to be very
// conservative in estimating the overall savings. We need to make sure
// the outline region relative frequency is not below the threshold
// specified by the option.
OutlineRegionRelFreq = std::max(OutlineRegionRelFreq, BranchProbability(OutlineRegionFreqPercent, 100));
// When profile data is not available, we need to be conservative in
// estimating the overall savings. Static branch prediction can usually
// guess the branch direction right (taken/non-taken), but the guessed
// branch probability is usually not biased enough. In case when the
// outlined region is predicted to be likely, its probability needs
// to be made higher (more biased) to not under-estimate the cost of
// function outlining. On the other hand, if the outlined region
// is predicted to be less likely, the predicted probablity is usually
// higher than the actual. For instance, the actual probability of the
// less likely target is only 5%, but the guessed probablity can be
// 40%. In the latter case, there is no need for further adjustement.
// FIXME: add an option for this.
if (OutlineRegionRelFreq < BranchProbability(45, 100))
return OutlineRegionRelFreq;
OutlineRegionRelFreq = std::max(
OutlineRegionRelFreq, BranchProbability(OutlineRegionFreqPercent, 100));
return OutlineRegionRelFreq;
}
@ -496,6 +512,26 @@ int PartialInlinerImpl::computeBBInlineCost(BasicBlock *BB) {
if (isa<DbgInfoIntrinsic>(I))
continue;
switch (I->getOpcode()) {
case Instruction::BitCast:
case Instruction::PtrToInt:
case Instruction::IntToPtr:
case Instruction::Alloca:
continue;
case Instruction::GetElementPtr:
if (cast<GetElementPtrInst>(I)->hasAllZeroIndices())
continue;
default:
break;
}
IntrinsicInst *IntrInst = dyn_cast<IntrinsicInst>(I);
if (IntrInst) {
if (IntrInst->getIntrinsicID() == Intrinsic::lifetime_start ||
IntrInst->getIntrinsicID() == Intrinsic::lifetime_end)
continue;
}
if (CallInst *CI = dyn_cast<CallInst>(I)) {
InlineCost += getCallsiteCost(CallSite(CI), DL);
continue;
@ -519,7 +555,13 @@ std::tuple<int, int, int> PartialInlinerImpl::computeOutliningCosts(
Function *F, const FunctionOutliningInfo *OI, Function *OutlinedFunction,
BasicBlock *OutliningCallBB) {
// First compute the cost of the outlined region 'OI' in the original
// function 'F':
// function 'F'.
// FIXME: The code extractor (outliner) can now do code sinking/hoisting
// to reduce outlining cost. The hoisted/sunk code currently do not
// incur any runtime cost so it is still OK to compare the outlined
// function cost with the outlined region in the original function.
// If this ever changes, we will need to introduce new extractor api
// to pass the information.
int OutlinedRegionCost = 0;
for (BasicBlock &BB : *F) {
if (&BB != OI->ReturnBlock &&
@ -539,11 +581,16 @@ std::tuple<int, int, int> PartialInlinerImpl::computeOutliningCosts(
for (BasicBlock &BB : *OutlinedFunction) {
OutlinedFunctionCost += computeBBInlineCost(&BB);
}
// The code extractor introduces a new root and exit stub blocks with
// additional unconditional branches. Those branches will be eliminated
// later with bb layout. The cost should be adjusted accordingly:
OutlinedFunctionCost -= 2 * InlineConstants::InstrCost;
assert(OutlinedFunctionCost >= OutlinedRegionCost &&
"Outlined function cost should be no less than the outlined region");
int OutliningRuntimeOverhead =
OutliningFuncCallCost + (OutlinedFunctionCost - OutlinedRegionCost);
int OutliningRuntimeOverhead = OutliningFuncCallCost +
(OutlinedFunctionCost - OutlinedRegionCost) +
ExtraOutliningPenalty;
return std::make_tuple(OutliningFuncCallCost, OutliningRuntimeOverhead,
OutlinedRegionCost);

View File

@ -0,0 +1,64 @@
; RUN: opt -S < %s -partial-inliner -partial-inlining-extra-penalty=10 | FileCheck %s
; RUN: opt -S < %s -passes=partial-inliner -partial-inlining-extra-penalty=10 | FileCheck %s
define i32 @outline_region_notlikely(i32* %arg) local_unnamed_addr {
bb:
; ptr != null is predicted to be true
%tmp = icmp ne i32* %arg, null
br i1 %tmp, label %bb8, label %bb1
; bb1 is not likely
bb1: ; preds = %bb
%tmp2 = tail call i32 @foo(i32* nonnull %arg)
%tmp3 = tail call i32 @foo(i32* nonnull %arg)
%tmp4 = tail call i32 @foo(i32* nonnull %arg)
%tmp5 = tail call i32 @foo(i32* nonnull %arg)
%tmp6 = tail call i32 @foo(i32* nonnull %arg)
%tmp7 = tail call i32 @foo(i32* nonnull %arg)
br label %bb8
bb8: ; preds = %bb1, %bb
%tmp9 = phi i32 [ 0, %bb1 ], [ 1, %bb ]
ret i32 %tmp9
}
define i32 @outline_region_likely(i32* %arg) local_unnamed_addr {
bb:
; ptr == null is predicted to be false
%tmp = icmp eq i32* %arg, null
br i1 %tmp, label %bb8, label %bb1
; bb1 is likely
bb1: ; preds = %bb
%tmp2 = tail call i32 @foo(i32* nonnull %arg)
%tmp3 = tail call i32 @foo(i32* nonnull %arg)
%tmp4 = tail call i32 @foo(i32* nonnull %arg)
%tmp5 = tail call i32 @foo(i32* nonnull %arg)
%tmp6 = tail call i32 @foo(i32* nonnull %arg)
%tmp7 = tail call i32 @foo(i32* nonnull %arg)
br label %bb8
bb8: ; preds = %bb1, %bb
%tmp9 = phi i32 [ 0, %bb1 ], [ 1, %bb ]
ret i32 %tmp9
}
declare i32 @foo(i32* %arg)
define i32 @dummy_caller(i32* %arg) local_unnamed_addr {
; CHECK-LABEL: @dummy_caller
%tmp = call i32 @outline_region_notlikely(i32* %arg)
; CHECK: call void @outline_region_notlikely.2_bb1
%tmp2 = tail call i32 @outline_region_likely(i32* %arg)
; CHECK: %tmp2 = tail call i32 @outline_region_likely(i32* %arg)
ret i32 %tmp
}
; CHECK-LABEL: define internal void @outline_region_notlikely.2_bb1(i32* %arg) {
; CHECK-NEXT: newFuncRoot:
!llvm.module.flags = !{!0}
!llvm.ident = !{!1}
!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{!"clang version 5.0.0 (trunk 304489)"}

View File

@ -0,0 +1,41 @@
; RUN: opt -S < %s -partial-inliner -partial-inlining-extra-penalty=2000 | FileCheck %s
; RUN: opt -S < %s -passes=partial-inliner -partial-inlining-extra-penalty=2000 | FileCheck %s
define i32 @outline_region_notlikely(i32* %arg) local_unnamed_addr {
bb:
; ptr != null is predicted to be true
%tmp = icmp ne i32* %arg, null
br i1 %tmp, label %bb8, label %bb1, !prof !2
; bb1 is not likely
bb1: ; preds = %bb
%tmp2 = tail call i32 @foo(i32* nonnull %arg)
%tmp3 = tail call i32 @foo(i32* nonnull %arg)
%tmp4 = tail call i32 @foo(i32* nonnull %arg)
%tmp5 = tail call i32 @foo(i32* nonnull %arg)
%tmp6 = tail call i32 @foo(i32* nonnull %arg)
%tmp7 = tail call i32 @foo(i32* nonnull %arg)
br label %bb8
bb8: ; preds = %bb1, %bb
%tmp9 = phi i32 [ 0, %bb1 ], [ 1, %bb ]
ret i32 %tmp9
}
define i32 @dummy_caller(i32* %arg) local_unnamed_addr {
; CHECK-LABEL: @dummy_caller
%tmp = call i32 @outline_region_notlikely(i32* %arg)
ret i32 %tmp
}
; CHECK-LABEL: define internal void @outline_region_notlikely.1_bb1(i32* %arg) {
; CHECK-NEXT: newFuncRoot:
declare i32 @foo(i32 * %arg)
!llvm.module.flags = !{!0}
!llvm.ident = !{!1}
!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{!"clang version 5.0.0 (trunk 304489)"}
!2 = !{!"branch_weights", i32 2000, i32 1}