mirror of
https://github.com/RPCSX/llvm.git
synced 2024-11-28 14:10:41 +00:00
The patch fixes PR27392.
Summary: It is incorrect to compare TripCount (which is BECount + 1) with extraiters (or Count) to check if we should enter unrolled loop or not, because TripCount can potentially overflow (when BECount is max unsigned integer). While comparing BECount with (Count - 1) is overflow safe and therefore correct. Reviewer: hfinkel Differential Revision: http://reviews.llvm.org/D19256 From: Evgeny Stupachenko <evstupac@gmail.com> git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@267662 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
ee1148650b
commit
a27504ef06
@ -246,7 +246,7 @@ static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit,
|
|||||||
|
|
||||||
Instruction *InsertPt = NewExit->getTerminator();
|
Instruction *InsertPt = NewExit->getTerminator();
|
||||||
IRBuilder<> B(InsertPt);
|
IRBuilder<> B(InsertPt);
|
||||||
Value *BrLoopExit = B.CreateIsNotNull(ModVal);
|
Value *BrLoopExit = B.CreateIsNotNull(ModVal, "lcmp.mod");
|
||||||
assert(Exit && "Loop must have a single exit block only");
|
assert(Exit && "Loop must have a single exit block only");
|
||||||
// Split the exit to maintain loop canonicalization guarantees
|
// Split the exit to maintain loop canonicalization guarantees
|
||||||
SmallVector<BasicBlock*, 4> Preds(predecessors(Exit));
|
SmallVector<BasicBlock*, 4> Preds(predecessors(Exit));
|
||||||
@ -416,7 +416,7 @@ static void CloneLoopBlocks(Loop *L, Value *NewIter,
|
|||||||
///
|
///
|
||||||
/// ***Epilog case***
|
/// ***Epilog case***
|
||||||
/// extraiters = tripcount % loopfactor
|
/// extraiters = tripcount % loopfactor
|
||||||
/// if (extraiters == tripcount) jump LoopExit:
|
/// if (tripcount < loopfactor) jump LoopExit:
|
||||||
/// unroll_iters = tripcount - extraiters
|
/// unroll_iters = tripcount - extraiters
|
||||||
/// Loop: LoopBody; (executes unroll_iter times);
|
/// Loop: LoopBody; (executes unroll_iter times);
|
||||||
/// unroll_iter -= 1
|
/// unroll_iter -= 1
|
||||||
@ -575,14 +575,15 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
|
|||||||
ConstantInt::get(BECount->getType(), Count),
|
ConstantInt::get(BECount->getType(), Count),
|
||||||
"xtraiter");
|
"xtraiter");
|
||||||
}
|
}
|
||||||
Value *CmpOperand =
|
Value *BranchVal =
|
||||||
UseEpilogRemainder ? TripCount :
|
UseEpilogRemainder ? B.CreateICmpULT(BECount,
|
||||||
ConstantInt::get(TripCount->getType(), 0);
|
ConstantInt::get(BECount->getType(),
|
||||||
Value *BranchVal = B.CreateICmpNE(ModVal, CmpOperand, "lcmp.mod");
|
Count - 1)) :
|
||||||
BasicBlock *FirstLoop = UseEpilogRemainder ? NewPreHeader : PrologPreHeader;
|
B.CreateIsNotNull(ModVal, "lcmp.mod");
|
||||||
BasicBlock *SecondLoop = UseEpilogRemainder ? NewExit : PrologExit;
|
BasicBlock *RemainderLoop = UseEpilogRemainder ? NewExit : PrologPreHeader;
|
||||||
|
BasicBlock *UnrollingLoop = UseEpilogRemainder ? NewPreHeader : PrologExit;
|
||||||
// Branch to either remainder (extra iterations) loop or unrolling loop.
|
// Branch to either remainder (extra iterations) loop or unrolling loop.
|
||||||
B.CreateCondBr(BranchVal, FirstLoop, SecondLoop);
|
B.CreateCondBr(BranchVal, RemainderLoop, UnrollingLoop);
|
||||||
PreHeaderBR->eraseFromParent();
|
PreHeaderBR->eraseFromParent();
|
||||||
Function *F = Header->getParent();
|
Function *F = Header->getParent();
|
||||||
// Get an ordered list of blocks in the loop to help with the ordering of the
|
// Get an ordered list of blocks in the loop to help with the ordering of the
|
||||||
|
@ -4,16 +4,14 @@
|
|||||||
; Tests for unrolling loops with run-time trip counts
|
; Tests for unrolling loops with run-time trip counts
|
||||||
|
|
||||||
; EPILOG: %xtraiter = and i32 %n
|
; EPILOG: %xtraiter = and i32 %n
|
||||||
; EPILOG: %lcmp.mod = icmp ne i32 %xtraiter, %n
|
; EPILOG: for.body:
|
||||||
; EPILOG: br i1 %lcmp.mod, label %for.body.preheader.new, label %for.end.loopexit.unr-lcssa
|
; EPILOG: %lcmp.mod = icmp ne i32 %xtraiter, 0
|
||||||
|
; EPILOG: br i1 %lcmp.mod, label %for.body.epil.preheader, label %for.end.loopexit
|
||||||
|
; EPILOG: for.body.epil:
|
||||||
|
|
||||||
; PROLOG: %xtraiter = and i32 %n
|
; PROLOG: %xtraiter = and i32 %n
|
||||||
; PROLOG: %lcmp.mod = icmp ne i32 %xtraiter, 0
|
; PROLOG: %lcmp.mod = icmp ne i32 %xtraiter, 0
|
||||||
; PROLOG: br i1 %lcmp.mod, label %for.body.prol.preheader, label %for.body.prol.loopexit
|
; PROLOG: br i1 %lcmp.mod, label %for.body.prol.preheader, label %for.body.prol.loopexit
|
||||||
|
|
||||||
; EPILOG: for.body:
|
|
||||||
; EPILOG: for.body.epil:
|
|
||||||
|
|
||||||
; PROLOG: for.body.prol:
|
; PROLOG: for.body.prol:
|
||||||
; PROLOG: for.body:
|
; PROLOG: for.body:
|
||||||
|
|
||||||
|
@ -6,8 +6,8 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
|
|||||||
; Tests for unrolling loops with run-time trip counts
|
; Tests for unrolling loops with run-time trip counts
|
||||||
|
|
||||||
; EPILOG: %xtraiter = and i32 %n
|
; EPILOG: %xtraiter = and i32 %n
|
||||||
; EPILOG: %lcmp.mod = icmp ne i32 %xtraiter, %n
|
; EPILOG: %lcmp.mod = icmp ne i32 %xtraiter, 0
|
||||||
; EPILOG: br i1 %lcmp.mod, label %for.body.preheader.new, label %for.end.loopexit.unr-lcssa
|
; EPILOG: br i1 %lcmp.mod, label %for.body.epil.preheader, label %for.end.loopexit
|
||||||
|
|
||||||
; PROLOG: %xtraiter = and i32 %n
|
; PROLOG: %xtraiter = and i32 %n
|
||||||
; PROLOG: %lcmp.mod = icmp ne i32 %xtraiter, 0
|
; PROLOG: %lcmp.mod = icmp ne i32 %xtraiter, 0
|
||||||
|
@ -5,7 +5,7 @@
|
|||||||
|
|
||||||
|
|
||||||
; EPILOG: for.body.preheader:
|
; EPILOG: for.body.preheader:
|
||||||
; EPILOG: br i1 %lcmp.mod, label %for.body.preheader.new, label %for.end.loopexit.unr-lcssa, !dbg [[PH_LOC:![0-9]+]]
|
; EPILOG: br i1 %1, label %for.end.loopexit.unr-lcssa, label %for.body.preheader.new, !dbg [[PH_LOC:![0-9]+]]
|
||||||
; EPILOG: for.body:
|
; EPILOG: for.body:
|
||||||
; EPILOG: br i1 %niter.ncmp.1, label %for.end.loopexit.unr-lcssa.loopexit, label %for.body, !dbg [[BODY_LOC:![0-9]+]]
|
; EPILOG: br i1 %niter.ncmp.1, label %for.end.loopexit.unr-lcssa.loopexit, label %for.body, !dbg [[BODY_LOC:![0-9]+]]
|
||||||
; EPILOG-NOT: br i1 %niter.ncmp.2, label %for.end.loopexit{{.*}}, label %for.body
|
; EPILOG-NOT: br i1 %niter.ncmp.2, label %for.end.loopexit{{.*}}, label %for.body
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
; RUN: opt < %s -S -unroll-runtime -unroll-count=2 -loop-unroll | FileCheck %s
|
; RUN: opt < %s -S -unroll-runtime -unroll-count=2 -loop-unroll | FileCheck %s -check-prefix=EPILOG
|
||||||
|
; RUN: opt < %s -S -unroll-runtime -unroll-count=2 -loop-unroll -unroll-runtime-epilog=false | FileCheck %s -check-prefix=PROLOG
|
||||||
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
|
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
|
||||||
|
|
||||||
; This test case documents how runtime loop unrolling handles the case
|
; This test case documents how runtime loop unrolling handles the case
|
||||||
@ -9,17 +10,28 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
|
|||||||
; is divisible by 2. The prologue then branches to the unrolled loop
|
; is divisible by 2. The prologue then branches to the unrolled loop
|
||||||
; and executes the 2^32 iterations there, in groups of 2.
|
; and executes the 2^32 iterations there, in groups of 2.
|
||||||
|
|
||||||
|
; EPILOG: entry:
|
||||||
|
|
||||||
; CHECK: entry:
|
; EPILOG-NEXT: %0 = add i32 %N, 1
|
||||||
; CHECK-NEXT: %0 = add i32 %N, 1
|
; EPILOG-NEXT: %xtraiter = and i32 %0, 1
|
||||||
; CHECK-NEXT: %xtraiter = and i32 %0, 1
|
; EPILOG-NEXT: %1 = icmp ult i32 %N, 1
|
||||||
; CHECK-NEXT: %lcmp.mod = icmp ne i32 %xtraiter, %0
|
; EPILOG-NEXT: br i1 %1, label %while.end.unr-lcssa, label %entry.new
|
||||||
; CHECK-NEXT: br i1 %lcmp.mod, label %entry.new, label %while.end.unr-lcssa
|
; EPILOG: while.body:
|
||||||
|
|
||||||
; CHECK: while.body.epil:
|
; EPILOG: %lcmp.mod = icmp ne i32 %xtraiter, 0
|
||||||
; CHECK: br label %while.end.epilog-lcssa
|
; EPILOG-NEXT: br i1 %lcmp.mod, label %while.body.epil.preheader, label %while.end
|
||||||
|
; EPILOG: while.body.epil:
|
||||||
|
|
||||||
; CHECK: while.end.epilog-lcssa:
|
; PROLOG: entry:
|
||||||
|
; PROLOG-NEXT: %0 = add i32 %N, 1
|
||||||
|
; PROLOG-NEXT: %xtraiter = and i32 %0, 1
|
||||||
|
; PROLOG-NEXT: %lcmp.mod = icmp ne i32 %xtraiter, 0
|
||||||
|
; PROLOG-NEXT: br i1 %lcmp.mod, label %while.body.prol.preheader, label %while.body.prol.loopexit
|
||||||
|
; PROLOG: while.body.prol:
|
||||||
|
|
||||||
|
; PROLOG: %1 = icmp ult i32 %N, 1
|
||||||
|
; PROLOG-NEXT: br i1 %1, label %while.end, label %entry.new
|
||||||
|
; PROLOG: while.body:
|
||||||
|
|
||||||
; Function Attrs: nounwind readnone ssp uwtable
|
; Function Attrs: nounwind readnone ssp uwtable
|
||||||
define i32 @foo(i32 %N) {
|
define i32 @foo(i32 %N) {
|
||||||
|
Loading…
Reference in New Issue
Block a user