From d2d01d1984970400e63d36502e4ec94cec189cb3 Mon Sep 17 00:00:00 2001 From: Hyojin Sung Date: Tue, 29 Mar 2016 04:08:57 +0000 Subject: [PATCH] [SimlifyCFG] Prevent passes from destroying canonical loop structure, especially for nested loops When eliminating or merging almost empty basic blocks, the existence of non-trivial PHI nodes is currently used to recognize potential loops of which the block is the header and keep the block. However, the current algorithm fails if the loops' exit condition is evaluated only with volatile values hence no PHI nodes in the header. Especially when such a loop is an outer loop of a nested loop, the loop is collapsed into a single loop which prevent later optimizations from being applied (e.g., transforming nested loops into simplified forms and loop vectorization). The patch augments the existing PHI node-based check by adding a pre-test if the BB actually belongs to a set of loop headers and not eliminating it if yes. llvm-svn: 264697 --- include/llvm/Transforms/Utils/Local.h | 16 ++++++---- lib/Transforms/Scalar/JumpThreading.cpp | 5 ++- lib/Transforms/Scalar/SimplifyCFGPass.cpp | 10 +++++- lib/Transforms/Utils/SimplifyCFG.cpp | 24 +++++++++++--- .../LoopUnswitch/2015-06-17-Metadata.ll | 10 +++--- test/Transforms/LoopUnswitch/infinite-loop.ll | 6 ++-- .../SimplifyCFG/2008-05-16-PHIBlockMerge.ll | 32 +++++++++---------- .../SimplifyCFG/EqualPHIEdgeBlockMerge.ll | 32 +++++++++---------- .../SimplifyCFG/X86/switch_to_lookup_table.ll | 4 +-- 9 files changed, 84 insertions(+), 55 deletions(-) diff --git a/include/llvm/Transforms/Utils/Local.h b/include/llvm/Transforms/Utils/Local.h index df8082ec749..dd47843e2b7 100644 --- a/include/llvm/Transforms/Utils/Local.h +++ b/include/llvm/Transforms/Utils/Local.h @@ -21,6 +21,7 @@ #include "llvm/IR/GetElementPtrTypeIterator.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Operator.h" +#include "llvm/ADT/SmallPtrSet.h" namespace llvm { @@ -124,13 +125,16 @@ bool TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB); /// values, but instcombine orders them so it usually won't matter. bool EliminateDuplicatePHINodes(BasicBlock *BB); -/// This function is used to do simplification of a CFG. For example, it -/// adjusts branches to branches to eliminate the extra hop, it eliminates -/// unreachable basic blocks, and does other "peephole" optimization of the CFG. -/// It returns true if a modification was made, possibly deleting the basic -/// block that was pointed to. +/// This function is used to do simplification of a CFG. For +/// example, it adjusts branches to branches to eliminate the extra hop, it +/// eliminates unreachable basic blocks, and does other "peephole" optimization +/// of the CFG. It returns true if a modification was made, possibly deleting +/// the basic block that was pointed to. LoopHeaders is an optional input +/// parameter, providing the set of loop header that SimplifyCFG should not +/// eliminate. bool SimplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI, - unsigned BonusInstThreshold, AssumptionCache *AC = nullptr); + unsigned BonusInstThreshold, AssumptionCache *AC = nullptr, + SmallPtrSetImpl *LoopHeaders = nullptr); /// This function is used to flatten a CFG. For example, it uses parallel-and /// and parallel-or mode to collapse if-conditions and merge if-regions with diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp index c79e756f572..2def0b54c6d 100644 --- a/lib/Transforms/Scalar/JumpThreading.cpp +++ b/lib/Transforms/Scalar/JumpThreading.cpp @@ -245,10 +245,13 @@ bool JumpThreading::runOnFunction(Function &F) { // Can't thread an unconditional jump, but if the block is "almost // empty", we can replace uses of it with uses of the successor and make // this dead. + // We should not eliminate the loop header either, because eliminating + // a loop header might later prevent LoopSimplify from transforming nested + // loops into simplified form. if (BI && BI->isUnconditional() && BB != &BB->getParent()->getEntryBlock() && // If the terminator is the only non-phi instruction, try to nuke it. - BB->getFirstNonPHIOrDbg()->isTerminator()) { + BB->getFirstNonPHIOrDbg()->isTerminator() && !LoopHeaders.count(BB)) { // Since TryToSimplifyUncondBranchFromEmptyBlock may delete the // block, we have to make sure it isn't in the LoopHeaders set. We // reinsert afterward if needed. diff --git a/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/lib/Transforms/Scalar/SimplifyCFGPass.cpp index 687d388701e..30fbe047190 100644 --- a/lib/Transforms/Scalar/SimplifyCFGPass.cpp +++ b/lib/Transforms/Scalar/SimplifyCFGPass.cpp @@ -28,6 +28,7 @@ #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Analysis/CFG.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/CFG.h" #include "llvm/IR/Constants.h" @@ -131,12 +132,19 @@ static bool iterativelySimplifyCFG(Function &F, const TargetTransformInfo &TTI, unsigned BonusInstThreshold) { bool Changed = false; bool LocalChange = true; + + SmallVector, 32> Edges; + FindFunctionBackedges(F, Edges); + SmallPtrSet LoopHeaders; + for (unsigned i = 0, e = Edges.size(); i != e; ++i) + LoopHeaders.insert(const_cast(Edges[i].second)); + while (LocalChange) { LocalChange = false; // Loop over all of the basic blocks and remove them if they are unneeded. for (Function::iterator BBIt = F.begin(); BBIt != F.end(); ) { - if (SimplifyCFG(&*BBIt++, TTI, BonusInstThreshold, AC)) { + if (SimplifyCFG(&*BBIt++, TTI, BonusInstThreshold, AC, &LoopHeaders)) { LocalChange = true; ++NumSimpl; } diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index 75d42617a6a..f750509a5e8 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -135,6 +135,7 @@ class SimplifyCFGOpt { const DataLayout &DL; unsigned BonusInstThreshold; AssumptionCache *AC; + SmallPtrSetImpl *LoopHeaders; Value *isValueEqualityComparison(TerminatorInst *TI); BasicBlock *GetValueEqualityComparisonCases(TerminatorInst *TI, std::vector &Cases); @@ -157,8 +158,10 @@ class SimplifyCFGOpt { public: SimplifyCFGOpt(const TargetTransformInfo &TTI, const DataLayout &DL, - unsigned BonusInstThreshold, AssumptionCache *AC) - : TTI(TTI), DL(DL), BonusInstThreshold(BonusInstThreshold), AC(AC) {} + unsigned BonusInstThreshold, AssumptionCache *AC, + SmallPtrSetImpl *LoopHeaders) + : TTI(TTI), DL(DL), BonusInstThreshold(BonusInstThreshold), AC(AC), + LoopHeaders(LoopHeaders) {} bool run(BasicBlock *BB); }; } @@ -3362,6 +3365,7 @@ bool SimplifyCFGOpt::SimplifySingleResume(ResumeInst *RI) { // The landingpad is now unreachable. Zap it. BB->eraseFromParent(); + if (LoopHeaders) LoopHeaders->erase(BB); return true; } @@ -3560,9 +3564,11 @@ bool SimplifyCFGOpt::SimplifyReturn(ReturnInst *RI, IRBuilder<> &Builder) { } // If we eliminated all predecessors of the block, delete the block now. - if (pred_empty(BB)) + if (pred_empty(BB)) { // We know there are no successors, so just nuke the block. BB->eraseFromParent(); + if (LoopHeaders) LoopHeaders->erase(BB); + } return true; } @@ -3719,6 +3725,7 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) { BB != &BB->getParent()->getEntryBlock()) { // We know there are no successors, so just nuke the block. BB->eraseFromParent(); + if (LoopHeaders) LoopHeaders->erase(BB); return true; } @@ -5062,8 +5069,14 @@ bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder){ return true; // If the Terminator is the only non-phi instruction, simplify the block. + // if LoopHeader is provided, check if the block is a loop header + // (This is for early invocations before loop simplify and vectorization + // to keep canonical loop forms for nested loops. + // These blocks can be eliminated when the pass is invoked later + // in the back-end.) BasicBlock::iterator I = BB->getFirstNonPHIOrDbg()->getIterator(); if (I->isTerminator() && BB != &BB->getParent()->getEntryBlock() && + (!LoopHeaders || (LoopHeaders && !LoopHeaders->count(BB))) && TryToSimplifyUncondBranchFromEmptyBlock(BB)) return true; @@ -5343,7 +5356,8 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) { /// of the CFG. It returns true if a modification was made. /// bool llvm::SimplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI, - unsigned BonusInstThreshold, AssumptionCache *AC) { + unsigned BonusInstThreshold, AssumptionCache *AC, + SmallPtrSetImpl *LoopHeaders) { return SimplifyCFGOpt(TTI, BB->getModule()->getDataLayout(), - BonusInstThreshold, AC).run(BB); + BonusInstThreshold, AC, LoopHeaders).run(BB); } diff --git a/test/Transforms/LoopUnswitch/2015-06-17-Metadata.ll b/test/Transforms/LoopUnswitch/2015-06-17-Metadata.ll index d536da1e8b6..a215be9d487 100644 --- a/test/Transforms/LoopUnswitch/2015-06-17-Metadata.ll +++ b/test/Transforms/LoopUnswitch/2015-06-17-Metadata.ll @@ -16,23 +16,23 @@ for.body: ; preds = %for.inc, %for.body. %cmp1 = icmp eq i32 %a, 12345 br i1 %cmp1, label %if.then, label %if.else, !prof !0 ; CHECK: %cmp1 = icmp eq i32 %a, 12345 -; CHECK-NEXT: br i1 %cmp1, label %if.then.us, label %if.else, !prof !0 +; CHECK-NEXT: br i1 %cmp1, label %for.body.us, label %for.body, !prof !0 if.then: ; preds = %for.body -; CHECK: if.then.us: +; CHECK: for.body.us: ; CHECK: add nsw i32 %{{.*}}, 123 ; CHECK: %exitcond.us = icmp eq i32 %inc.us, %b -; CHECK: br i1 %exitcond.us, label %for.cond.cleanup, label %if.then.us +; CHECK: br i1 %exitcond.us, label %for.cond.cleanup, label %for.body.us %add = add nsw i32 %add.i, 123 br label %for.inc if.else: ; preds = %for.body %mul = mul nsw i32 %mul.i, %b br label %for.inc -; CHECK: if.else: +; CHECK: for.body: ; CHECK: %mul = mul nsw i32 %mul.i, %b ; CHECK: %inc = add nuw nsw i32 %inc.i, 1 ; CHECK: %exitcond = icmp eq i32 %inc, %b -; CHECK: br i1 %exitcond, label %for.cond.cleanup, label %if.else +; CHECK: br i1 %exitcond, label %for.cond.cleanup, label %for.body for.inc: ; preds = %if.then, %if.else %mul.p = phi i32 [ %b, %if.then ], [ %mul, %if.else ] %add.p = phi i32 [ %add, %if.then ], [ %a, %if.else ] diff --git a/test/Transforms/LoopUnswitch/infinite-loop.ll b/test/Transforms/LoopUnswitch/infinite-loop.ll index 3d1c895edec..0aef9092a1f 100644 --- a/test/Transforms/LoopUnswitch/infinite-loop.ll +++ b/test/Transforms/LoopUnswitch/infinite-loop.ll @@ -16,10 +16,10 @@ ; CHECK-NEXT: br i1 %a, label %entry.split, label %abort0.split ; CHECK: entry.split: -; CHECK-NEXT: br i1 %b, label %cond.end, label %abort1.split +; CHECK-NEXT: br i1 %b, label %for.body, label %abort1.split -; CHECK: cond.end: -; CHECK-NEXT: br label %cond.end +; CHECK: for.body: +; CHECK-NEXT: br label %for.body ; CHECK: abort0.split: ; CHECK-NEXT: call void @end0() [[NOR_NUW:#[0-9]+]] diff --git a/test/Transforms/SimplifyCFG/2008-05-16-PHIBlockMerge.ll b/test/Transforms/SimplifyCFG/2008-05-16-PHIBlockMerge.ll index 13ccad6a1ee..21e9bc7b7f4 100644 --- a/test/Transforms/SimplifyCFG/2008-05-16-PHIBlockMerge.ll +++ b/test/Transforms/SimplifyCFG/2008-05-16-PHIBlockMerge.ll @@ -1,6 +1,6 @@ ; RUN: opt < %s -simplifycfg -S > %t ; RUN: not grep "^BB.tomerge" %t -; RUN: grep "^BB.nomerge" %t | count 2 +; RUN: grep "^BB.nomerge" %t | count 4 ; ModuleID = '' declare i1 @foo() @@ -54,24 +54,24 @@ Exit: ; preds = %Succ ret void } -; This function can be merged +; This function can't be merged (for keeping canonical loop structures) define void @c() { entry: - br label %BB.tomerge + br label %BB.nomerge -BB.tomerge: ; preds = %Common, %entry +BB.nomerge: ; preds = %Common, %entry br label %Succ Succ: ; preds = %Common, %BB.tomerge, %Pre-Exit ; This phi has identical values for Common and (through BB) Common, ; blocks can't be merged - %b = phi i32 [ 1, %BB.tomerge ], [ 1, %Common ], [ 2, %Pre-Exit ] + %b = phi i32 [ 1, %BB.nomerge ], [ 1, %Common ], [ 2, %Pre-Exit ] %conde = call i1 @foo( ) ; [#uses=1] br i1 %conde, label %Common, label %Pre-Exit Common: ; preds = %Succ %cond = call i1 @foo( ) ; [#uses=1] - br i1 %cond, label %BB.tomerge, label %Succ + br i1 %cond, label %BB.nomerge, label %Succ Pre-Exit: ; preds = %Succ ; This adds a backedge, so the %b phi node gets a third branch and is @@ -83,25 +83,25 @@ Exit: ; preds = %Pre-Exit ret void } -; This function can be merged +; This function can't be merged (for keeping canonical loop structures) define void @d() { entry: - br label %BB.tomerge + br label %BB.nomerge -BB.tomerge: ; preds = %Common, %entry +BB.nomerge: ; preds = %Common, %entry ; This phi has a matching value (0) with below phi (0), so blocks ; can be merged. %a = phi i32 [ 1, %entry ], [ 0, %Common ] ; [#uses=1] br label %Succ Succ: ; preds = %Common, %BB.tomerge - %b = phi i32 [ %a, %BB.tomerge ], [ 0, %Common ] ; [#uses=0] + %b = phi i32 [ %a, %BB.nomerge ], [ 0, %Common ] ; [#uses=0] %conde = call i1 @foo( ) ; [#uses=1] br i1 %conde, label %Common, label %Exit Common: ; preds = %Succ %cond = call i1 @foo( ) ; [#uses=1] - br i1 %cond, label %BB.tomerge, label %Succ + br i1 %cond, label %BB.nomerge, label %Succ Exit: ; preds = %Succ ret void @@ -110,21 +110,21 @@ Exit: ; preds = %Succ ; This function can be merged define void @e() { entry: - br label %BB.tomerge + br label %Succ -BB.tomerge: ; preds = %Use, %entry +Succ: ; preds = %Use, %entry ; This phi is used somewhere else than Succ, but this should not prevent ; merging this block %a = phi i32 [ 1, %entry ], [ 0, %Use ] ; [#uses=1] - br label %Succ + br label %BB.tomerge -Succ: ; preds = %BB.tomerge +BB.tomerge: ; preds = %BB.tomerge %conde = call i1 @foo( ) ; [#uses=1] br i1 %conde, label %Use, label %Exit Use: ; preds = %Succ %cond = call i1 @bar( i32 %a ) ; [#uses=1] - br i1 %cond, label %BB.tomerge, label %Exit + br i1 %cond, label %Succ, label %Exit Exit: ; preds = %Use, %Succ ret void diff --git a/test/Transforms/SimplifyCFG/EqualPHIEdgeBlockMerge.ll b/test/Transforms/SimplifyCFG/EqualPHIEdgeBlockMerge.ll index b07ef970a20..6e8593755c7 100644 --- a/test/Transforms/SimplifyCFG/EqualPHIEdgeBlockMerge.ll +++ b/test/Transforms/SimplifyCFG/EqualPHIEdgeBlockMerge.ll @@ -5,7 +5,7 @@ ; RUN: not grep X: %t ; RUN: not grep 'switch i32[^U]+%U' %t ; RUN: not grep "^BB.tomerge" %t -; RUN: grep "^BB.nomerge" %t | count 2 +; RUN: grep "^BB.nomerge" %t | count 4 ; ; ModuleID = '' @@ -179,24 +179,24 @@ Exit: ; preds = %Succ ret void } -; This function can be merged +; This function can't be merged (for keeping canonical loop structures) define void @c() { entry: - br label %BB.tomerge + br label %BB.nomerge -BB.tomerge: ; preds = %Common, %entry +BB.nomerge: ; preds = %Common, %entry br label %Succ Succ: ; preds = %Common, %BB.tomerge, %Pre-Exit ; This phi has identical values for Common and (through BB) Common, ; blocks can't be merged - %b = phi i32 [ 1, %BB.tomerge ], [ 1, %Common ], [ 2, %Pre-Exit ] + %b = phi i32 [ 1, %BB.nomerge ], [ 1, %Common ], [ 2, %Pre-Exit ] %conde = call i1 @foo( ) ; [#uses=1] br i1 %conde, label %Common, label %Pre-Exit Common: ; preds = %Succ %cond = call i1 @foo( ) ; [#uses=1] - br i1 %cond, label %BB.tomerge, label %Succ + br i1 %cond, label %BB.nomerge, label %Succ Pre-Exit: ; preds = %Succ ; This adds a backedge, so the %b phi node gets a third branch and is @@ -208,25 +208,25 @@ Exit: ; preds = %Pre-Exit ret void } -; This function can be merged +; This function can't be merged (for keeping canonical loop structures) define void @d() { entry: - br label %BB.tomerge + br label %BB.nomerge -BB.tomerge: ; preds = %Common, %entry +BB.nomerge: ; preds = %Common, %entry ; This phi has a matching value (0) with below phi (0), so blocks ; can be merged. %a = phi i32 [ 1, %entry ], [ 0, %Common ] ; [#uses=1] br label %Succ Succ: ; preds = %Common, %BB.tomerge - %b = phi i32 [ %a, %BB.tomerge ], [ 0, %Common ] ; [#uses=0] + %b = phi i32 [ %a, %BB.nomerge ], [ 0, %Common ] ; [#uses=0] %conde = call i1 @foo( ) ; [#uses=1] br i1 %conde, label %Common, label %Exit Common: ; preds = %Succ %cond = call i1 @foo( ) ; [#uses=1] - br i1 %cond, label %BB.tomerge, label %Succ + br i1 %cond, label %BB.nomerge, label %Succ Exit: ; preds = %Succ ret void @@ -235,21 +235,21 @@ Exit: ; preds = %Succ ; This function can be merged define void @e() { entry: - br label %BB.tomerge + br label %Succ -BB.tomerge: ; preds = %Use, %entry +Succ: ; preds = %Use, %entry ; This phi is used somewhere else than Succ, but this should not prevent ; merging this block %a = phi i32 [ 1, %entry ], [ 0, %Use ] ; [#uses=1] - br label %Succ + br label %BB.tomerge -Succ: ; preds = %BB.tomerge +BB.tomerge: ; preds = %Succ %conde = call i1 @foo( ) ; [#uses=1] br i1 %conde, label %Use, label %Exit Use: ; preds = %Succ %cond = call i1 @bar( i32 %a ) ; [#uses=1] - br i1 %cond, label %BB.tomerge, label %Exit + br i1 %cond, label %Succ, label %Exit Exit: ; preds = %Use, %Succ ret void diff --git a/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll b/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll index 6953cf9c8b3..bae8c1dc5a4 100644 --- a/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll +++ b/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll @@ -1306,8 +1306,8 @@ l6: ; Speculation depth must be limited to avoid a zero-cost instruction cycle. ; CHECK-LABEL: @PR26308( -; CHECK: cleanup4: -; CHECK-NEXT: br label %cleanup4 +; CHECK: while.body: +; CHECK-NEXT: br label %while.body define i32 @PR26308(i1 %B, i64 %load) { entry: