mirror of
https://github.com/RPCSX/llvm.git
synced 2025-02-10 22:43:46 +00:00
Fix the LoopUnroller to handle nontrivial loops and partial unrolling.
These are not individual bug fixes. I had to rewrite a good chunk of the unroller to make it sane. I think it was getting lucky on trivial completely unrolled loops with no early exits. I included some fairly simple unit tests for partial unrolling. I didn't do much stress testing, so it may not be perfect, but should be usable now. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@137190 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
33e57515b1
commit
b1eede1281
@ -134,6 +134,11 @@ public:
|
||||
block_iterator block_begin() const { return Blocks.begin(); }
|
||||
block_iterator block_end() const { return Blocks.end(); }
|
||||
|
||||
/// getNumBlocks - Get the number of blocks in this loop.
|
||||
unsigned getNumBlocks() const {
|
||||
return std::distance(block_begin(), block_end());
|
||||
}
|
||||
|
||||
/// isLoopExiting - True if terminator in the block can branch to another
|
||||
/// block that is outside of the current loop.
|
||||
///
|
||||
|
@ -21,6 +21,7 @@
|
||||
#include "llvm/BasicBlock.h"
|
||||
#include "llvm/ADT/Statistic.h"
|
||||
#include "llvm/Analysis/InstructionSimplify.h"
|
||||
#include "llvm/Analysis/LoopIterator.h"
|
||||
#include "llvm/Analysis/LoopPass.h"
|
||||
#include "llvm/Analysis/ScalarEvolution.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
@ -225,11 +226,25 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
|
||||
Headers.push_back(Header);
|
||||
Latches.push_back(LatchBlock);
|
||||
|
||||
// The current on-the-fly SSA update requires blocks to be processed in
|
||||
// reverse postorder so that LastValueMap contains the correct value at each
|
||||
// exit.
|
||||
LoopBlocksDFS DFS(L);
|
||||
{
|
||||
// Traverse the loop blocks using depth-first search to record RPO numbers
|
||||
// for each block in the DFS result.
|
||||
LoopBlocksTraversal Traversal(DFS, LI);
|
||||
for (LoopBlocksTraversal::POTIterator POI = Traversal.begin(),
|
||||
POE = Traversal.end(); POI != POE; ++POI);
|
||||
}
|
||||
// Stash the DFS iterators before adding blocks to the loop.
|
||||
LoopBlocksDFS::RPOIterator BlockBegin = DFS.beginRPO();
|
||||
LoopBlocksDFS::RPOIterator BlockEnd = DFS.endRPO();
|
||||
|
||||
for (unsigned It = 1; It != Count; ++It) {
|
||||
std::vector<BasicBlock*> NewBlocks;
|
||||
|
||||
for (std::vector<BasicBlock*>::iterator BB = LoopBlocks.begin(),
|
||||
E = LoopBlocks.end(); BB != E; ++BB) {
|
||||
for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) {
|
||||
ValueToValueMapTy VMap;
|
||||
BasicBlock *New = CloneBasicBlock(*BB, VMap, "." + Twine(It));
|
||||
Header->getParent()->getBasicBlockList().push_back(New);
|
||||
@ -255,35 +270,27 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
|
||||
|
||||
L->addBasicBlockToLoop(New, LI->getBase());
|
||||
|
||||
// Add phi entries for newly created values to all exit blocks except
|
||||
// the successor of the latch block. The successor of the exit block will
|
||||
// be updated specially after unrolling all the way.
|
||||
if (*BB != LatchBlock)
|
||||
for (succ_iterator SI = succ_begin(*BB), SE = succ_end(*BB); SI != SE;
|
||||
++SI)
|
||||
if (!L->contains(*SI))
|
||||
for (BasicBlock::iterator BBI = (*SI)->begin();
|
||||
PHINode *phi = dyn_cast<PHINode>(BBI); ++BBI) {
|
||||
Value *Incoming = phi->getIncomingValueForBlock(*BB);
|
||||
phi->addIncoming(Incoming, New);
|
||||
}
|
||||
|
||||
// Add phi entries for newly created values to all exit blocks.
|
||||
for (succ_iterator SI = succ_begin(*BB), SE = succ_end(*BB);
|
||||
SI != SE; ++SI) {
|
||||
if (L->contains(*SI))
|
||||
continue;
|
||||
for (BasicBlock::iterator BBI = (*SI)->begin();
|
||||
PHINode *phi = dyn_cast<PHINode>(BBI); ++BBI) {
|
||||
Value *Incoming = phi->getIncomingValueForBlock(*BB);
|
||||
ValueToValueMapTy::iterator It = LastValueMap.find(Incoming);
|
||||
if (It != LastValueMap.end())
|
||||
Incoming = It->second;
|
||||
phi->addIncoming(Incoming, New);
|
||||
}
|
||||
}
|
||||
// Keep track of new headers and latches as we create them, so that
|
||||
// we can insert the proper branches later.
|
||||
if (*BB == Header)
|
||||
Headers.push_back(New);
|
||||
if (*BB == LatchBlock) {
|
||||
if (*BB == LatchBlock)
|
||||
Latches.push_back(New);
|
||||
|
||||
// Also, clear out the new latch's back edge so that it doesn't look
|
||||
// like a new loop, so that it's amenable to being merged with adjacent
|
||||
// blocks later on.
|
||||
TerminatorInst *Term = New->getTerminator();
|
||||
assert(L->contains(Term->getSuccessor(!ContinueOnTrue)));
|
||||
assert(Term->getSuccessor(ContinueOnTrue) == LoopExit);
|
||||
Term->setSuccessor(!ContinueOnTrue, NULL);
|
||||
}
|
||||
|
||||
NewBlocks.push_back(New);
|
||||
}
|
||||
|
||||
@ -294,36 +301,24 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
|
||||
::RemapInstruction(I, LastValueMap);
|
||||
}
|
||||
|
||||
// The latch block exits the loop. If there are any PHI nodes in the
|
||||
// successor blocks, update them to use the appropriate values computed as the
|
||||
// last iteration of the loop.
|
||||
if (Count != 1) {
|
||||
BasicBlock *LastIterationBB = cast<BasicBlock>(LastValueMap[LatchBlock]);
|
||||
for (succ_iterator SI = succ_begin(LatchBlock), SE = succ_end(LatchBlock);
|
||||
SI != SE; ++SI) {
|
||||
for (BasicBlock::iterator BBI = (*SI)->begin();
|
||||
PHINode *PN = dyn_cast<PHINode>(BBI); ++BBI) {
|
||||
Value *InVal = PN->removeIncomingValue(LatchBlock, false);
|
||||
// If this value was defined in the loop, take the value defined by the
|
||||
// last iteration of the loop.
|
||||
if (Instruction *InValI = dyn_cast<Instruction>(InVal)) {
|
||||
if (L->contains(InValI))
|
||||
InVal = LastValueMap[InVal];
|
||||
}
|
||||
PN->addIncoming(InVal, LastIterationBB);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Now, if we're doing complete unrolling, loop over the PHI nodes in the
|
||||
// original block, setting them to their incoming values.
|
||||
if (CompletelyUnroll) {
|
||||
BasicBlock *Preheader = L->getLoopPreheader();
|
||||
for (unsigned i = 0, e = OrigPHINode.size(); i != e; ++i) {
|
||||
PHINode *PN = OrigPHINode[i];
|
||||
// Loop over the PHI nodes in the original block, setting incoming values.
|
||||
for (unsigned i = 0, e = OrigPHINode.size(); i != e; ++i) {
|
||||
PHINode *PN = OrigPHINode[i];
|
||||
if (CompletelyUnroll) {
|
||||
PN->replaceAllUsesWith(PN->getIncomingValueForBlock(Preheader));
|
||||
Header->getInstList().erase(PN);
|
||||
}
|
||||
else if (Count > 1) {
|
||||
Value *InVal = PN->removeIncomingValue(LatchBlock, false);
|
||||
// If this value was defined in the loop, take the value defined by the
|
||||
// last iteration of the loop.
|
||||
if (Instruction *InValI = dyn_cast<Instruction>(InVal)) {
|
||||
if (L->contains(InValI))
|
||||
InVal = LastValueMap[InVal];
|
||||
}
|
||||
assert(Latches.back() == LastValueMap[LatchBlock] && "bad last latch");
|
||||
PN->addIncoming(InVal, Latches.back());
|
||||
}
|
||||
}
|
||||
|
||||
// Now that all the basic blocks for the unrolled iterations are in place,
|
||||
@ -355,6 +350,19 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
|
||||
// iteration.
|
||||
Term->setSuccessor(!ContinueOnTrue, Dest);
|
||||
} else {
|
||||
// Remove phi operands at this loop exit
|
||||
if (Dest != LoopExit) {
|
||||
BasicBlock *BB = Latches[i];
|
||||
for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB);
|
||||
SI != SE; ++SI) {
|
||||
if (*SI == Headers[i])
|
||||
continue;
|
||||
for (BasicBlock::iterator BBI = (*SI)->begin();
|
||||
PHINode *Phi = dyn_cast<PHINode>(BBI); ++BBI) {
|
||||
Phi->removeIncomingValue(BB, false);
|
||||
}
|
||||
}
|
||||
}
|
||||
// Replace the conditional branch with an unconditional one.
|
||||
BranchInst::Create(Dest, Term);
|
||||
Term->eraseFromParent();
|
||||
|
100
test/Transforms/LoopUnroll/2011-08-08-PhiUpdate.ll
Normal file
100
test/Transforms/LoopUnroll/2011-08-08-PhiUpdate.ll
Normal file
@ -0,0 +1,100 @@
|
||||
; RUN: opt < %s -loop-unroll -S -unroll-count=4 | FileCheck %s
|
||||
; Test phi update after partial unroll.
|
||||
|
||||
declare i1 @check() nounwind
|
||||
|
||||
; CHECK: @test
|
||||
; CHECK: if.else:
|
||||
; CHECK: if.then.loopexit
|
||||
; CHECK: %sub5.lcssa = phi i32 [ %sub{{.*}}, %if.else{{.*}} ], [ %sub{{.*}}, %if.else{{.*}} ], [ %sub{{.*}}, %if.else{{.*}} ], [ %sub{{.*}}, %if.else{{.*}} ]
|
||||
; CHECK: if.else.3
|
||||
define void @test1(i32 %i, i32 %j) nounwind uwtable ssp {
|
||||
entry:
|
||||
%cond1 = call zeroext i1 @check()
|
||||
br i1 %cond1, label %if.then, label %if.else.lr.ph
|
||||
|
||||
if.else.lr.ph: ; preds = %entry
|
||||
br label %if.else
|
||||
|
||||
if.else: ; preds = %if.else, %if.else.lr.ph
|
||||
%sub = phi i32 [ %i, %if.else.lr.ph ], [ %sub5, %if.else ]
|
||||
%sub5 = sub i32 %sub, %j
|
||||
%cond2 = call zeroext i1 @check()
|
||||
br i1 %cond2, label %if.then, label %if.else
|
||||
|
||||
if.then: ; preds = %if.else, %entry
|
||||
%i.tr = phi i32 [ %i, %entry ], [ %sub5, %if.else ]
|
||||
ret void
|
||||
|
||||
}
|
||||
|
||||
; PR7318: assertion failure after doing a simple loop unroll
|
||||
;
|
||||
; CHECK: @test2
|
||||
; CHECK: bb1.bb2_crit_edge:
|
||||
; CHECK: %.lcssa = phi i32 [ %{{[2468]}}, %bb1{{.*}} ], [ %{{[2468]}}, %bb1{{.*}} ], [ %{{[2468]}}, %bb1{{.*}} ], [ %{{[2468]}}, %bb1{{.*}} ]
|
||||
; CHECK: bb1.3:
|
||||
define i32 @test2(i32* nocapture %p, i32 %n) nounwind readonly {
|
||||
entry:
|
||||
%0 = icmp sgt i32 %n, 0 ; <i1> [#uses=1]
|
||||
br i1 %0, label %bb.nph, label %bb2
|
||||
|
||||
bb.nph: ; preds = %entry
|
||||
%tmp = zext i32 %n to i64 ; <i64> [#uses=1]
|
||||
br label %bb
|
||||
|
||||
bb: ; preds = %bb.nph, %bb1
|
||||
%indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %bb1 ] ; <i64> [#uses=2]
|
||||
%s.01 = phi i32 [ 0, %bb.nph ], [ %2, %bb1 ] ; <i32> [#uses=1]
|
||||
%scevgep = getelementptr i32* %p, i64 %indvar ; <i32*> [#uses=1]
|
||||
%1 = load i32* %scevgep, align 1 ; <i32> [#uses=1]
|
||||
%2 = add nsw i32 %1, %s.01 ; <i32> [#uses=2]
|
||||
br label %bb1
|
||||
|
||||
bb1: ; preds = %bb
|
||||
%indvar.next = add i64 %indvar, 1 ; <i64> [#uses=2]
|
||||
%exitcond = icmp ne i64 %indvar.next, %tmp ; <i1> [#uses=1]
|
||||
br i1 %exitcond, label %bb, label %bb1.bb2_crit_edge
|
||||
|
||||
bb1.bb2_crit_edge: ; preds = %bb1
|
||||
%.lcssa = phi i32 [ %2, %bb1 ] ; <i32> [#uses=1]
|
||||
br label %bb2
|
||||
|
||||
bb2: ; preds = %bb1.bb2_crit_edge, %entry
|
||||
%s.0.lcssa = phi i32 [ %.lcssa, %bb1.bb2_crit_edge ], [ 0, %entry ] ; <i32> [#uses=1]
|
||||
ret i32 %s.0.lcssa
|
||||
}
|
||||
|
||||
; Check phi update for loop with an early-exit.
|
||||
;
|
||||
; CHECK: @test3
|
||||
; CHECK: return.loopexit:
|
||||
; CHECK: %tmp7.i.lcssa = phi i32 [ %tmp7.i{{.*}}, %land.lhs.true{{.*}} ], [ %tmp7.i{{.*}}, %land.lhs.true{{.*}} ], [ %tmp7.i{{.*}}, %land.lhs.true{{.*}} ], [ %tmp7.i{{.*}}, %land.lhs.true{{.*}} ]
|
||||
; CHECK: exit.3:
|
||||
define i32 @test3() nounwind uwtable ssp align 2 {
|
||||
entry:
|
||||
br i1 undef, label %return, label %if.end
|
||||
|
||||
if.end: ; preds = %entry
|
||||
br label %do.body
|
||||
|
||||
do.body: ; preds = %do.cond, %if.end
|
||||
br i1 undef, label %exit, label %do.cond
|
||||
|
||||
exit: ; preds = %do.body
|
||||
%tmp7.i = load i32* undef, align 8
|
||||
br i1 undef, label %do.cond, label %land.lhs.true
|
||||
|
||||
land.lhs.true: ; preds = %exit
|
||||
br i1 undef, label %return, label %do.cond
|
||||
|
||||
do.cond: ; preds = %land.lhs.true, %exit, %do.body
|
||||
br i1 undef, label %do.end, label %do.body
|
||||
|
||||
do.end: ; preds = %do.cond
|
||||
br label %return
|
||||
|
||||
return: ; preds = %do.end, %land.lhs.true, %entry
|
||||
%retval.0 = phi i32 [ 0, %do.end ], [ 0, %entry ], [ %tmp7.i, %land.lhs.true ]
|
||||
ret i32 %retval.0
|
||||
}
|
62
test/Transforms/LoopUnroll/2011-08-09-PhiUpdate.ll
Normal file
62
test/Transforms/LoopUnroll/2011-08-09-PhiUpdate.ll
Normal file
@ -0,0 +1,62 @@
|
||||
; RUN: opt -S < %s -instcombine -inline -jump-threading -loop-unroll -unroll-count=4 | FileCheck %s
|
||||
;
|
||||
; This is a test case that required a number of setup passes because
|
||||
; it depends on block order.
|
||||
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
|
||||
target triple = "x86_64-apple-macosx10.6.8"
|
||||
|
||||
declare i1 @check() nounwind
|
||||
declare i32 @getval() nounwind
|
||||
|
||||
; Check that the loop exit merges values from all the iterations. This
|
||||
; could be a tad fragile, but it's a good test.
|
||||
;
|
||||
; CHECK: @foo
|
||||
; CHECK: return:
|
||||
; CHECK: %retval.0 = phi i32 [ %tmp7.i, %land.lhs.true ], [ 0, %do.cond ], [ %tmp7.i.1, %land.lhs.true.1 ], [ 0, %do.cond.1 ], [ %tmp7.i.2, %land.lhs.true.2 ], [ 0, %do.cond.2 ], [ %tmp7.i.3, %land.lhs.true.3 ], [ 0, %do.cond.3 ]
|
||||
; CHECK-NOT: @bar
|
||||
; CHECK: bar.exit.3
|
||||
define i32 @foo() uwtable ssp align 2 {
|
||||
entry:
|
||||
br i1 undef, label %return, label %if.end
|
||||
|
||||
if.end: ; preds = %entry
|
||||
%call2 = call i32 @getval()
|
||||
br label %do.body
|
||||
|
||||
do.body: ; preds = %do.cond, %if.end
|
||||
%call6 = call i32 @bar()
|
||||
%cmp = icmp ne i32 %call6, 0
|
||||
br i1 %cmp, label %land.lhs.true, label %do.cond
|
||||
|
||||
land.lhs.true: ; preds = %do.body
|
||||
%call10 = call i32 @getval()
|
||||
%cmp11 = icmp eq i32 0, %call10
|
||||
br i1 %cmp11, label %return, label %do.cond
|
||||
|
||||
do.cond: ; preds = %land.lhs.true, %do.body
|
||||
%cmp18 = icmp sle i32 0, %call2
|
||||
br i1 %cmp18, label %do.body, label %return
|
||||
|
||||
return: ; preds = %do.cond, %land.lhs.true, %entry
|
||||
%retval.0 = phi i32 [ 0, %entry ], [ %call6, %land.lhs.true ], [ 0, %do.cond ]
|
||||
ret i32 %retval.0
|
||||
}
|
||||
|
||||
define linkonce_odr i32 @bar() nounwind uwtable ssp align 2 {
|
||||
entry:
|
||||
br i1 undef, label %land.lhs.true, label %cond.end
|
||||
|
||||
land.lhs.true: ; preds = %entry
|
||||
%cmp4 = call zeroext i1 @check()
|
||||
br i1 %cmp4, label %cond.true, label %cond.end
|
||||
|
||||
cond.true: ; preds = %land.lhs.true
|
||||
%tmp7 = call i32 @getval()
|
||||
br label %cond.end
|
||||
|
||||
cond.end: ; preds = %cond.true, %land.lhs.true, %entry
|
||||
%cond = phi i32 [ %tmp7, %cond.true ], [ 0, %land.lhs.true ], [ 0, %entry ]
|
||||
ret i32 %cond
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user