[SimlifyCFG] Prevent passes from destroying canonical loop structure, especially for nested loops

When eliminating or merging almost empty basic blocks, the existence of non-trivial PHI nodes
is currently used to recognize potential loops of which the block is the header and keep the block.
However, the current algorithm fails if the loops' exit condition is evaluated only with volatile
values hence no PHI nodes in the header. Especially when such a loop is an outer loop of a nested
loop, the loop is collapsed into a single loop which prevent later optimizations from being
applied (e.g., transforming nested loops into simplified forms and loop vectorization).
    
The patch augments the existing PHI node-based check by adding a pre-test if the BB actually
belongs to a set of loop headers and not eliminating it if yes.

llvm-svn: 264697
This commit is contained in:
Hyojin Sung 2016-03-29 04:08:57 +00:00
parent 7e5bfbd9da
commit d2d01d1984
9 changed files with 84 additions and 55 deletions

View File

@ -21,6 +21,7 @@
#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Operator.h"
#include "llvm/ADT/SmallPtrSet.h"
namespace llvm {
@ -124,13 +125,16 @@ bool TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB);
/// values, but instcombine orders them so it usually won't matter.
bool EliminateDuplicatePHINodes(BasicBlock *BB);
/// This function is used to do simplification of a CFG. For example, it
/// adjusts branches to branches to eliminate the extra hop, it eliminates
/// unreachable basic blocks, and does other "peephole" optimization of the CFG.
/// It returns true if a modification was made, possibly deleting the basic
/// block that was pointed to.
/// This function is used to do simplification of a CFG. For
/// example, it adjusts branches to branches to eliminate the extra hop, it
/// eliminates unreachable basic blocks, and does other "peephole" optimization
/// of the CFG. It returns true if a modification was made, possibly deleting
/// the basic block that was pointed to. LoopHeaders is an optional input
/// parameter, providing the set of loop header that SimplifyCFG should not
/// eliminate.
bool SimplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI,
unsigned BonusInstThreshold, AssumptionCache *AC = nullptr);
unsigned BonusInstThreshold, AssumptionCache *AC = nullptr,
SmallPtrSetImpl<BasicBlock *> *LoopHeaders = nullptr);
/// This function is used to flatten a CFG. For example, it uses parallel-and
/// and parallel-or mode to collapse if-conditions and merge if-regions with

View File

@ -245,10 +245,13 @@ bool JumpThreading::runOnFunction(Function &F) {
// Can't thread an unconditional jump, but if the block is "almost
// empty", we can replace uses of it with uses of the successor and make
// this dead.
// We should not eliminate the loop header either, because eliminating
// a loop header might later prevent LoopSimplify from transforming nested
// loops into simplified form.
if (BI && BI->isUnconditional() &&
BB != &BB->getParent()->getEntryBlock() &&
// If the terminator is the only non-phi instruction, try to nuke it.
BB->getFirstNonPHIOrDbg()->isTerminator()) {
BB->getFirstNonPHIOrDbg()->isTerminator() && !LoopHeaders.count(BB)) {
// Since TryToSimplifyUncondBranchFromEmptyBlock may delete the
// block, we have to make sure it isn't in the LoopHeaders set. We
// reinsert afterward if needed.

View File

@ -28,6 +28,7 @@
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/CFG.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/Constants.h"
@ -131,12 +132,19 @@ static bool iterativelySimplifyCFG(Function &F, const TargetTransformInfo &TTI,
unsigned BonusInstThreshold) {
bool Changed = false;
bool LocalChange = true;
SmallVector<std::pair<const BasicBlock *, const BasicBlock *>, 32> Edges;
FindFunctionBackedges(F, Edges);
SmallPtrSet<BasicBlock *, 16> LoopHeaders;
for (unsigned i = 0, e = Edges.size(); i != e; ++i)
LoopHeaders.insert(const_cast<BasicBlock *>(Edges[i].second));
while (LocalChange) {
LocalChange = false;
// Loop over all of the basic blocks and remove them if they are unneeded.
for (Function::iterator BBIt = F.begin(); BBIt != F.end(); ) {
if (SimplifyCFG(&*BBIt++, TTI, BonusInstThreshold, AC)) {
if (SimplifyCFG(&*BBIt++, TTI, BonusInstThreshold, AC, &LoopHeaders)) {
LocalChange = true;
++NumSimpl;
}

View File

@ -135,6 +135,7 @@ class SimplifyCFGOpt {
const DataLayout &DL;
unsigned BonusInstThreshold;
AssumptionCache *AC;
SmallPtrSetImpl<BasicBlock *> *LoopHeaders;
Value *isValueEqualityComparison(TerminatorInst *TI);
BasicBlock *GetValueEqualityComparisonCases(TerminatorInst *TI,
std::vector<ValueEqualityComparisonCase> &Cases);
@ -157,8 +158,10 @@ class SimplifyCFGOpt {
public:
SimplifyCFGOpt(const TargetTransformInfo &TTI, const DataLayout &DL,
unsigned BonusInstThreshold, AssumptionCache *AC)
: TTI(TTI), DL(DL), BonusInstThreshold(BonusInstThreshold), AC(AC) {}
unsigned BonusInstThreshold, AssumptionCache *AC,
SmallPtrSetImpl<BasicBlock *> *LoopHeaders)
: TTI(TTI), DL(DL), BonusInstThreshold(BonusInstThreshold), AC(AC),
LoopHeaders(LoopHeaders) {}
bool run(BasicBlock *BB);
};
}
@ -3362,6 +3365,7 @@ bool SimplifyCFGOpt::SimplifySingleResume(ResumeInst *RI) {
// The landingpad is now unreachable. Zap it.
BB->eraseFromParent();
if (LoopHeaders) LoopHeaders->erase(BB);
return true;
}
@ -3560,9 +3564,11 @@ bool SimplifyCFGOpt::SimplifyReturn(ReturnInst *RI, IRBuilder<> &Builder) {
}
// If we eliminated all predecessors of the block, delete the block now.
if (pred_empty(BB))
if (pred_empty(BB)) {
// We know there are no successors, so just nuke the block.
BB->eraseFromParent();
if (LoopHeaders) LoopHeaders->erase(BB);
}
return true;
}
@ -3719,6 +3725,7 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) {
BB != &BB->getParent()->getEntryBlock()) {
// We know there are no successors, so just nuke the block.
BB->eraseFromParent();
if (LoopHeaders) LoopHeaders->erase(BB);
return true;
}
@ -5062,8 +5069,14 @@ bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder){
return true;
// If the Terminator is the only non-phi instruction, simplify the block.
// if LoopHeader is provided, check if the block is a loop header
// (This is for early invocations before loop simplify and vectorization
// to keep canonical loop forms for nested loops.
// These blocks can be eliminated when the pass is invoked later
// in the back-end.)
BasicBlock::iterator I = BB->getFirstNonPHIOrDbg()->getIterator();
if (I->isTerminator() && BB != &BB->getParent()->getEntryBlock() &&
(!LoopHeaders || (LoopHeaders && !LoopHeaders->count(BB))) &&
TryToSimplifyUncondBranchFromEmptyBlock(BB))
return true;
@ -5343,7 +5356,8 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) {
/// of the CFG. It returns true if a modification was made.
///
bool llvm::SimplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI,
unsigned BonusInstThreshold, AssumptionCache *AC) {
unsigned BonusInstThreshold, AssumptionCache *AC,
SmallPtrSetImpl<BasicBlock *> *LoopHeaders) {
return SimplifyCFGOpt(TTI, BB->getModule()->getDataLayout(),
BonusInstThreshold, AC).run(BB);
BonusInstThreshold, AC, LoopHeaders).run(BB);
}

View File

@ -16,23 +16,23 @@ for.body: ; preds = %for.inc, %for.body.
%cmp1 = icmp eq i32 %a, 12345
br i1 %cmp1, label %if.then, label %if.else, !prof !0
; CHECK: %cmp1 = icmp eq i32 %a, 12345
; CHECK-NEXT: br i1 %cmp1, label %if.then.us, label %if.else, !prof !0
; CHECK-NEXT: br i1 %cmp1, label %for.body.us, label %for.body, !prof !0
if.then: ; preds = %for.body
; CHECK: if.then.us:
; CHECK: for.body.us:
; CHECK: add nsw i32 %{{.*}}, 123
; CHECK: %exitcond.us = icmp eq i32 %inc.us, %b
; CHECK: br i1 %exitcond.us, label %for.cond.cleanup, label %if.then.us
; CHECK: br i1 %exitcond.us, label %for.cond.cleanup, label %for.body.us
%add = add nsw i32 %add.i, 123
br label %for.inc
if.else: ; preds = %for.body
%mul = mul nsw i32 %mul.i, %b
br label %for.inc
; CHECK: if.else:
; CHECK: for.body:
; CHECK: %mul = mul nsw i32 %mul.i, %b
; CHECK: %inc = add nuw nsw i32 %inc.i, 1
; CHECK: %exitcond = icmp eq i32 %inc, %b
; CHECK: br i1 %exitcond, label %for.cond.cleanup, label %if.else
; CHECK: br i1 %exitcond, label %for.cond.cleanup, label %for.body
for.inc: ; preds = %if.then, %if.else
%mul.p = phi i32 [ %b, %if.then ], [ %mul, %if.else ]
%add.p = phi i32 [ %add, %if.then ], [ %a, %if.else ]

View File

@ -16,10 +16,10 @@
; CHECK-NEXT: br i1 %a, label %entry.split, label %abort0.split
; CHECK: entry.split:
; CHECK-NEXT: br i1 %b, label %cond.end, label %abort1.split
; CHECK-NEXT: br i1 %b, label %for.body, label %abort1.split
; CHECK: cond.end:
; CHECK-NEXT: br label %cond.end
; CHECK: for.body:
; CHECK-NEXT: br label %for.body
; CHECK: abort0.split:
; CHECK-NEXT: call void @end0() [[NOR_NUW:#[0-9]+]]

View File

@ -1,6 +1,6 @@
; RUN: opt < %s -simplifycfg -S > %t
; RUN: not grep "^BB.tomerge" %t
; RUN: grep "^BB.nomerge" %t | count 2
; RUN: grep "^BB.nomerge" %t | count 4
; ModuleID = '<stdin>'
declare i1 @foo()
@ -54,24 +54,24 @@ Exit: ; preds = %Succ
ret void
}
; This function can be merged
; This function can't be merged (for keeping canonical loop structures)
define void @c() {
entry:
br label %BB.tomerge
br label %BB.nomerge
BB.tomerge: ; preds = %Common, %entry
BB.nomerge: ; preds = %Common, %entry
br label %Succ
Succ: ; preds = %Common, %BB.tomerge, %Pre-Exit
; This phi has identical values for Common and (through BB) Common,
; blocks can't be merged
%b = phi i32 [ 1, %BB.tomerge ], [ 1, %Common ], [ 2, %Pre-Exit ]
%b = phi i32 [ 1, %BB.nomerge ], [ 1, %Common ], [ 2, %Pre-Exit ]
%conde = call i1 @foo( ) ; <i1> [#uses=1]
br i1 %conde, label %Common, label %Pre-Exit
Common: ; preds = %Succ
%cond = call i1 @foo( ) ; <i1> [#uses=1]
br i1 %cond, label %BB.tomerge, label %Succ
br i1 %cond, label %BB.nomerge, label %Succ
Pre-Exit: ; preds = %Succ
; This adds a backedge, so the %b phi node gets a third branch and is
@ -83,25 +83,25 @@ Exit: ; preds = %Pre-Exit
ret void
}
; This function can be merged
; This function can't be merged (for keeping canonical loop structures)
define void @d() {
entry:
br label %BB.tomerge
br label %BB.nomerge
BB.tomerge: ; preds = %Common, %entry
BB.nomerge: ; preds = %Common, %entry
; This phi has a matching value (0) with below phi (0), so blocks
; can be merged.
%a = phi i32 [ 1, %entry ], [ 0, %Common ] ; <i32> [#uses=1]
br label %Succ
Succ: ; preds = %Common, %BB.tomerge
%b = phi i32 [ %a, %BB.tomerge ], [ 0, %Common ] ; <i32> [#uses=0]
%b = phi i32 [ %a, %BB.nomerge ], [ 0, %Common ] ; <i32> [#uses=0]
%conde = call i1 @foo( ) ; <i1> [#uses=1]
br i1 %conde, label %Common, label %Exit
Common: ; preds = %Succ
%cond = call i1 @foo( ) ; <i1> [#uses=1]
br i1 %cond, label %BB.tomerge, label %Succ
br i1 %cond, label %BB.nomerge, label %Succ
Exit: ; preds = %Succ
ret void
@ -110,21 +110,21 @@ Exit: ; preds = %Succ
; This function can be merged
define void @e() {
entry:
br label %BB.tomerge
br label %Succ
BB.tomerge: ; preds = %Use, %entry
Succ: ; preds = %Use, %entry
; This phi is used somewhere else than Succ, but this should not prevent
; merging this block
%a = phi i32 [ 1, %entry ], [ 0, %Use ] ; <i32> [#uses=1]
br label %Succ
br label %BB.tomerge
Succ: ; preds = %BB.tomerge
BB.tomerge: ; preds = %BB.tomerge
%conde = call i1 @foo( ) ; <i1> [#uses=1]
br i1 %conde, label %Use, label %Exit
Use: ; preds = %Succ
%cond = call i1 @bar( i32 %a ) ; <i1> [#uses=1]
br i1 %cond, label %BB.tomerge, label %Exit
br i1 %cond, label %Succ, label %Exit
Exit: ; preds = %Use, %Succ
ret void

View File

@ -5,7 +5,7 @@
; RUN: not grep X: %t
; RUN: not grep 'switch i32[^U]+%U' %t
; RUN: not grep "^BB.tomerge" %t
; RUN: grep "^BB.nomerge" %t | count 2
; RUN: grep "^BB.nomerge" %t | count 4
;
; ModuleID = '<stdin>'
@ -179,24 +179,24 @@ Exit: ; preds = %Succ
ret void
}
; This function can be merged
; This function can't be merged (for keeping canonical loop structures)
define void @c() {
entry:
br label %BB.tomerge
br label %BB.nomerge
BB.tomerge: ; preds = %Common, %entry
BB.nomerge: ; preds = %Common, %entry
br label %Succ
Succ: ; preds = %Common, %BB.tomerge, %Pre-Exit
; This phi has identical values for Common and (through BB) Common,
; blocks can't be merged
%b = phi i32 [ 1, %BB.tomerge ], [ 1, %Common ], [ 2, %Pre-Exit ]
%b = phi i32 [ 1, %BB.nomerge ], [ 1, %Common ], [ 2, %Pre-Exit ]
%conde = call i1 @foo( ) ; <i1> [#uses=1]
br i1 %conde, label %Common, label %Pre-Exit
Common: ; preds = %Succ
%cond = call i1 @foo( ) ; <i1> [#uses=1]
br i1 %cond, label %BB.tomerge, label %Succ
br i1 %cond, label %BB.nomerge, label %Succ
Pre-Exit: ; preds = %Succ
; This adds a backedge, so the %b phi node gets a third branch and is
@ -208,25 +208,25 @@ Exit: ; preds = %Pre-Exit
ret void
}
; This function can be merged
; This function can't be merged (for keeping canonical loop structures)
define void @d() {
entry:
br label %BB.tomerge
br label %BB.nomerge
BB.tomerge: ; preds = %Common, %entry
BB.nomerge: ; preds = %Common, %entry
; This phi has a matching value (0) with below phi (0), so blocks
; can be merged.
%a = phi i32 [ 1, %entry ], [ 0, %Common ] ; <i32> [#uses=1]
br label %Succ
Succ: ; preds = %Common, %BB.tomerge
%b = phi i32 [ %a, %BB.tomerge ], [ 0, %Common ] ; <i32> [#uses=0]
%b = phi i32 [ %a, %BB.nomerge ], [ 0, %Common ] ; <i32> [#uses=0]
%conde = call i1 @foo( ) ; <i1> [#uses=1]
br i1 %conde, label %Common, label %Exit
Common: ; preds = %Succ
%cond = call i1 @foo( ) ; <i1> [#uses=1]
br i1 %cond, label %BB.tomerge, label %Succ
br i1 %cond, label %BB.nomerge, label %Succ
Exit: ; preds = %Succ
ret void
@ -235,21 +235,21 @@ Exit: ; preds = %Succ
; This function can be merged
define void @e() {
entry:
br label %BB.tomerge
br label %Succ
BB.tomerge: ; preds = %Use, %entry
Succ: ; preds = %Use, %entry
; This phi is used somewhere else than Succ, but this should not prevent
; merging this block
%a = phi i32 [ 1, %entry ], [ 0, %Use ] ; <i32> [#uses=1]
br label %Succ
br label %BB.tomerge
Succ: ; preds = %BB.tomerge
BB.tomerge: ; preds = %Succ
%conde = call i1 @foo( ) ; <i1> [#uses=1]
br i1 %conde, label %Use, label %Exit
Use: ; preds = %Succ
%cond = call i1 @bar( i32 %a ) ; <i1> [#uses=1]
br i1 %cond, label %BB.tomerge, label %Exit
br i1 %cond, label %Succ, label %Exit
Exit: ; preds = %Use, %Succ
ret void

View File

@ -1306,8 +1306,8 @@ l6:
; Speculation depth must be limited to avoid a zero-cost instruction cycle.
; CHECK-LABEL: @PR26308(
; CHECK: cleanup4:
; CHECK-NEXT: br label %cleanup4
; CHECK: while.body:
; CHECK-NEXT: br label %while.body
define i32 @PR26308(i1 %B, i64 %load) {
entry: