[PartialInlining] Reduce outlining overhead by removing unneeded live-out(s)

Differential Revision: http://reviews.llvm.org/D33694


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@304375 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Xinliang David Li 2017-06-01 00:12:41 +00:00
parent b9583a3a17
commit 60b50fa9e6
3 changed files with 146 additions and 0 deletions

View File

@ -652,12 +652,21 @@ Function *PartialInlinerImpl::unswitchFunction(Function *F) {
// only split block when necessary:
PHINode *FirstPhi = getFirstPHI(PreReturn);
unsigned NumPredsFromEntries = OI->ReturnBlockPreds.size();
auto IsTrivialPhi = [](PHINode *PN) -> Value * {
Value *CommonValue = PN->getIncomingValue(0);
if (all_of(PN->incoming_values(),
[&](Value *V) { return V == CommonValue; }))
return CommonValue;
return nullptr;
};
if (FirstPhi && FirstPhi->getNumIncomingValues() > NumPredsFromEntries + 1) {
NewReturnBlock = NewReturnBlock->splitBasicBlock(
NewReturnBlock->getFirstNonPHI()->getIterator());
BasicBlock::iterator I = PreReturn->begin();
Instruction *Ins = &NewReturnBlock->front();
SmallVector<Instruction *, 4> DeadPhis;
while (I != PreReturn->end()) {
PHINode *OldPhi = dyn_cast<PHINode>(I);
if (!OldPhi)
@ -674,8 +683,22 @@ Function *PartialInlinerImpl::unswitchFunction(Function *F) {
RetPhi->addIncoming(OldPhi->getIncomingValueForBlock(NewE), NewE);
OldPhi->removeIncomingValue(NewE);
}
// After incoming values splitting, the old phi may become trivial.
// Keeping the trivial phi can introduce definition inside the outline
// region which is live-out, causing necessary overhead (load, store
// arg passing etc).
if (auto *OldPhiVal = IsTrivialPhi(OldPhi)) {
OldPhi->replaceAllUsesWith(OldPhiVal);
DeadPhis.push_back(OldPhi);
}
++I;
}
for (auto *DP : DeadPhis)
DP->eraseFromParent();
for (auto E : OI->ReturnBlockPreds) {
BasicBlock *NewE = cast<BasicBlock>(VMap[E]);
NewE->getTerminator()->replaceUsesOfWith(PreReturn, NewReturnBlock);

View File

@ -0,0 +1,61 @@
; RUN: opt -S -partial-inliner -max-num-inline-blocks=2 -skip-partial-inlining-cost-analysis < %s | FileCheck %s
; RUN: opt -S -passes=partial-inliner -max-num-inline-blocks=2 -skip-partial-inlining-cost-analysis < %s | FileCheck %s
define i32 @test(i32 %arg) local_unnamed_addr #0 {
bb:
%tmp = tail call i32 (...) @bar() #1
%tmp1 = icmp slt i32 %arg, 0
br i1 %tmp1, label %bb6, label %bb2
bb2: ; preds = %bb
tail call void (...) @foo() #1
tail call void (...) @foo() #1
tail call void (...) @foo() #1
tail call void (...) @foo() #1
tail call void (...) @foo() #1
%tmp3 = tail call i32 (...) @bar() #1
%tmp4 = icmp eq i32 %tmp3, 10
br i1 %tmp4, label %bb6, label %bb5
bb5: ; preds = %bb2
tail call void (...) @foo() #1
tail call void (...) @foo() #1
tail call void (...) @foo() #1
tail call void (...) @foo() #1
br label %bb6
bb6: ; preds = %bb5, %bb2, %bb
%tmp7 = phi i32 [ %tmp, %bb5 ], [ 0, %bb ], [ %tmp, %bb2 ]
ret i32 %tmp7
}
declare i32 @bar(...) local_unnamed_addr #1
declare void @foo(...) local_unnamed_addr #1
; Function Attrs: nounwind uwtable
define i32 @dummy_caller(i32 %arg) local_unnamed_addr #0 {
; CHECK-LABEL: @dummy_caller
; CHECK: codeRepl.i:
; CHECK: call void @test.1_bb2()
; CHECK-NOT: load
; CHECK br
bb:
%tmp = tail call i32 @test(i32 %arg)
ret i32 %tmp
}
; CHECK-LABEL: define internal void @test.1_bb2()
; CHECK: .exitStub:
; CHECK-NOT: store i32 %tmp7, i32* %tmp7.out
; CHECK: ret
attributes #0 = { nounwind uwtable }
attributes #1 = { nounwind uwtable }
!llvm.module.flags = !{!0}
!llvm.ident = !{!1}
!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{!"clang version 5.0.0 (trunk 303574)"}

View File

@ -0,0 +1,62 @@
; RUN: opt -S -partial-inliner -max-num-inline-blocks=2 -skip-partial-inlining-cost-analysis < %s | FileCheck %s
; RUN: opt -S -passes=partial-inliner -max-num-inline-blocks=2 -skip-partial-inlining-cost-analysis < %s | FileCheck %s
define i32 @test(i32 %arg) local_unnamed_addr #0 {
bb:
%tmp = tail call i32 (...) @bar() #1
%tmp1 = icmp slt i32 %arg, 0
br i1 %tmp1, label %bb6, label %bb2
bb2: ; preds = %bb
tail call void (...) @foo() #1
tail call void (...) @foo() #1
tail call void (...) @foo() #1
tail call void (...) @foo() #1
tail call void (...) @foo() #1
%tmp3 = tail call i32 (...) @bar() #1
%tmp4 = icmp eq i32 %tmp3, 10
br i1 %tmp4, label %bb6, label %bb5
bb5: ; preds = %bb2
tail call void (...) @foo() #1
tail call void (...) @foo() #1
tail call void (...) @foo() #1
tail call void (...) @foo() #1
br label %bb6
bb6: ; preds = %bb5, %bb2, %bb
%tmp7 = phi i32 [ 1, %bb5 ], [ 0, %bb ], [ 1, %bb2 ]
ret i32 %tmp7
}
; Function Attrs: nounwind uwtable
declare i32 @bar(...) local_unnamed_addr #0
; Function Attrs: nounwind uwtable
declare void @foo(...) local_unnamed_addr #0
; Function Attrs: nounwind uwtable
define i32 @dummy_caller(i32 %arg) local_unnamed_addr #0 {
; CHECK-LABEL: @dummy_caller
; CHECK: codeRepl.i:
; CHECK: call void @test.1_bb2()
; CHECK-NOT: load
; CHECK br
bb:
%tmp = tail call i32 @test(i32 %arg)
ret i32 %tmp
}
; CHECK-LABEL: define internal void @test.1_bb2()
; CHECK: .exitStub:
; CHECK-NOT: store i32 %tmp7, i32* %tmp7.out
; CHECK: ret
attributes #0 = { nounwind uwtable }
attributes #1 = { nounwind }
!llvm.module.flags = !{!0}
!llvm.ident = !{!1}
!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{!"clang version 5.0.0 (trunk 303574)"}