From 60b50fa9e6112b9682db91317b758f4b0cc3a21a Mon Sep 17 00:00:00 2001 From: Xinliang David Li Date: Thu, 1 Jun 2017 00:12:41 +0000 Subject: [PATCH] [PartialInlining] Reduce outlining overhead by removing unneeded live-out(s) Differential Revision: http://reviews.llvm.org/D33694 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@304375 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/IPO/PartialInlining.cpp | 23 +++++++ .../CodeExtractor/PartialInlineLiveAcross.ll | 61 ++++++++++++++++++ .../CodeExtractor/PartialInlineNoLiveOut.ll | 62 +++++++++++++++++++ 3 files changed, 146 insertions(+) create mode 100644 test/Transforms/CodeExtractor/PartialInlineLiveAcross.ll create mode 100644 test/Transforms/CodeExtractor/PartialInlineNoLiveOut.ll diff --git a/lib/Transforms/IPO/PartialInlining.cpp b/lib/Transforms/IPO/PartialInlining.cpp index 4c417f1c55e..bc0967448cd 100644 --- a/lib/Transforms/IPO/PartialInlining.cpp +++ b/lib/Transforms/IPO/PartialInlining.cpp @@ -652,12 +652,21 @@ Function *PartialInlinerImpl::unswitchFunction(Function *F) { // only split block when necessary: PHINode *FirstPhi = getFirstPHI(PreReturn); unsigned NumPredsFromEntries = OI->ReturnBlockPreds.size(); + auto IsTrivialPhi = [](PHINode *PN) -> Value * { + Value *CommonValue = PN->getIncomingValue(0); + if (all_of(PN->incoming_values(), + [&](Value *V) { return V == CommonValue; })) + return CommonValue; + return nullptr; + }; + if (FirstPhi && FirstPhi->getNumIncomingValues() > NumPredsFromEntries + 1) { NewReturnBlock = NewReturnBlock->splitBasicBlock( NewReturnBlock->getFirstNonPHI()->getIterator()); BasicBlock::iterator I = PreReturn->begin(); Instruction *Ins = &NewReturnBlock->front(); + SmallVector DeadPhis; while (I != PreReturn->end()) { PHINode *OldPhi = dyn_cast(I); if (!OldPhi) @@ -674,8 +683,22 @@ Function *PartialInlinerImpl::unswitchFunction(Function *F) { RetPhi->addIncoming(OldPhi->getIncomingValueForBlock(NewE), NewE); OldPhi->removeIncomingValue(NewE); } + + // After incoming values splitting, the old phi may become trivial. + // Keeping the trivial phi can introduce definition inside the outline + // region which is live-out, causing necessary overhead (load, store + // arg passing etc). + if (auto *OldPhiVal = IsTrivialPhi(OldPhi)) { + OldPhi->replaceAllUsesWith(OldPhiVal); + DeadPhis.push_back(OldPhi); + } + ++I; } + + for (auto *DP : DeadPhis) + DP->eraseFromParent(); + for (auto E : OI->ReturnBlockPreds) { BasicBlock *NewE = cast(VMap[E]); NewE->getTerminator()->replaceUsesOfWith(PreReturn, NewReturnBlock); diff --git a/test/Transforms/CodeExtractor/PartialInlineLiveAcross.ll b/test/Transforms/CodeExtractor/PartialInlineLiveAcross.ll new file mode 100644 index 00000000000..e8a4d1281a2 --- /dev/null +++ b/test/Transforms/CodeExtractor/PartialInlineLiveAcross.ll @@ -0,0 +1,61 @@ +; RUN: opt -S -partial-inliner -max-num-inline-blocks=2 -skip-partial-inlining-cost-analysis < %s | FileCheck %s +; RUN: opt -S -passes=partial-inliner -max-num-inline-blocks=2 -skip-partial-inlining-cost-analysis < %s | FileCheck %s +define i32 @test(i32 %arg) local_unnamed_addr #0 { +bb: + %tmp = tail call i32 (...) @bar() #1 + %tmp1 = icmp slt i32 %arg, 0 + br i1 %tmp1, label %bb6, label %bb2 + +bb2: ; preds = %bb + tail call void (...) @foo() #1 + tail call void (...) @foo() #1 + tail call void (...) @foo() #1 + tail call void (...) @foo() #1 + tail call void (...) @foo() #1 + %tmp3 = tail call i32 (...) @bar() #1 + %tmp4 = icmp eq i32 %tmp3, 10 + br i1 %tmp4, label %bb6, label %bb5 + +bb5: ; preds = %bb2 + tail call void (...) @foo() #1 + tail call void (...) @foo() #1 + tail call void (...) @foo() #1 + tail call void (...) @foo() #1 + br label %bb6 + +bb6: ; preds = %bb5, %bb2, %bb + %tmp7 = phi i32 [ %tmp, %bb5 ], [ 0, %bb ], [ %tmp, %bb2 ] + ret i32 %tmp7 +} + +declare i32 @bar(...) local_unnamed_addr #1 + +declare void @foo(...) local_unnamed_addr #1 + +; Function Attrs: nounwind uwtable +define i32 @dummy_caller(i32 %arg) local_unnamed_addr #0 { +; CHECK-LABEL: @dummy_caller +; CHECK: codeRepl.i: +; CHECK: call void @test.1_bb2() +; CHECK-NOT: load +; CHECK br + +bb: + %tmp = tail call i32 @test(i32 %arg) + ret i32 %tmp +} + +; CHECK-LABEL: define internal void @test.1_bb2() +; CHECK: .exitStub: +; CHECK-NOT: store i32 %tmp7, i32* %tmp7.out +; CHECK: ret + + +attributes #0 = { nounwind uwtable } +attributes #1 = { nounwind uwtable } + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{!"clang version 5.0.0 (trunk 303574)"} diff --git a/test/Transforms/CodeExtractor/PartialInlineNoLiveOut.ll b/test/Transforms/CodeExtractor/PartialInlineNoLiveOut.ll new file mode 100644 index 00000000000..a48ff4b1b8f --- /dev/null +++ b/test/Transforms/CodeExtractor/PartialInlineNoLiveOut.ll @@ -0,0 +1,62 @@ +; RUN: opt -S -partial-inliner -max-num-inline-blocks=2 -skip-partial-inlining-cost-analysis < %s | FileCheck %s +; RUN: opt -S -passes=partial-inliner -max-num-inline-blocks=2 -skip-partial-inlining-cost-analysis < %s | FileCheck %s + +define i32 @test(i32 %arg) local_unnamed_addr #0 { +bb: + %tmp = tail call i32 (...) @bar() #1 + %tmp1 = icmp slt i32 %arg, 0 + br i1 %tmp1, label %bb6, label %bb2 + +bb2: ; preds = %bb + tail call void (...) @foo() #1 + tail call void (...) @foo() #1 + tail call void (...) @foo() #1 + tail call void (...) @foo() #1 + tail call void (...) @foo() #1 + %tmp3 = tail call i32 (...) @bar() #1 + %tmp4 = icmp eq i32 %tmp3, 10 + br i1 %tmp4, label %bb6, label %bb5 + +bb5: ; preds = %bb2 + tail call void (...) @foo() #1 + tail call void (...) @foo() #1 + tail call void (...) @foo() #1 + tail call void (...) @foo() #1 + br label %bb6 + +bb6: ; preds = %bb5, %bb2, %bb + %tmp7 = phi i32 [ 1, %bb5 ], [ 0, %bb ], [ 1, %bb2 ] + ret i32 %tmp7 +} + +; Function Attrs: nounwind uwtable +declare i32 @bar(...) local_unnamed_addr #0 + +; Function Attrs: nounwind uwtable +declare void @foo(...) local_unnamed_addr #0 + +; Function Attrs: nounwind uwtable +define i32 @dummy_caller(i32 %arg) local_unnamed_addr #0 { +; CHECK-LABEL: @dummy_caller +; CHECK: codeRepl.i: +; CHECK: call void @test.1_bb2() +; CHECK-NOT: load +; CHECK br +bb: + %tmp = tail call i32 @test(i32 %arg) + ret i32 %tmp +} + +; CHECK-LABEL: define internal void @test.1_bb2() +; CHECK: .exitStub: +; CHECK-NOT: store i32 %tmp7, i32* %tmp7.out +; CHECK: ret + +attributes #0 = { nounwind uwtable } +attributes #1 = { nounwind } + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{!"clang version 5.0.0 (trunk 303574)"}