[IRCE] Handle loops with step different from 1/-1

This patch generalizes IRCE to handle IV steps that are not equal to 1 or -1.

Differential Revision: https://reviews.llvm.org/D35539


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@310032 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Max Kazantsev 2017-08-04 07:01:04 +00:00
parent 6069fa8531
commit 9a16b8eafb
2 changed files with 570 additions and 65 deletions

View File

@ -457,6 +457,7 @@ struct LoopStructure {
Value *IndVarNext;
Value *IndVarStart;
Value *IndVarStep;
Value *LoopExitAt;
bool IndVarIncreasing;
bool IsSignedPredicate;
@ -464,8 +465,8 @@ struct LoopStructure {
LoopStructure()
: Tag(""), Header(nullptr), Latch(nullptr), LatchBr(nullptr),
LatchExit(nullptr), LatchBrExitIdx(-1), IndVarNext(nullptr),
IndVarStart(nullptr), LoopExitAt(nullptr), IndVarIncreasing(false),
IsSignedPredicate(true) {}
IndVarStart(nullptr), IndVarStep(nullptr), LoopExitAt(nullptr),
IndVarIncreasing(false), IsSignedPredicate(true) {}
template <typename M> LoopStructure map(M Map) const {
LoopStructure Result;
@ -477,6 +478,7 @@ struct LoopStructure {
Result.LatchBrExitIdx = LatchBrExitIdx;
Result.IndVarNext = Map(IndVarNext);
Result.IndVarStart = Map(IndVarStart);
Result.IndVarStep = Map(IndVarStep);
Result.LoopExitAt = Map(LoopExitAt);
Result.IndVarIncreasing = IndVarIncreasing;
Result.IsSignedPredicate = IsSignedPredicate;
@ -660,6 +662,21 @@ static bool CanBeMax(ScalarEvolution &SE, const SCEV *S, bool Signed) {
SE.getUnsignedRange(S).contains(Max);
}
static bool SumCanReachMax(ScalarEvolution &SE, const SCEV *S1, const SCEV *S2,
bool Signed) {
// S1 < INT_MAX - S2 ===> S1 + S2 < INT_MAX.
assert(SE.isKnownNonNegative(S2) &&
"We expected the 2nd arg to be non-negative!");
const SCEV *Max = SE.getConstant(
Signed ? APInt::getSignedMaxValue(
cast<IntegerType>(S1->getType())->getBitWidth())
: APInt::getMaxValue(
cast<IntegerType>(S1->getType())->getBitWidth()));
const SCEV *CapForS1 = SE.getMinusSCEV(Max, S2);
return !SE.isKnownPredicate(Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT,
S1, CapForS1);
}
static bool CanBeMin(ScalarEvolution &SE, const SCEV *S, bool Signed) {
APInt Min = Signed ?
APInt::getSignedMinValue(cast<IntegerType>(S->getType())->getBitWidth()) :
@ -668,6 +685,21 @@ static bool CanBeMin(ScalarEvolution &SE, const SCEV *S, bool Signed) {
SE.getUnsignedRange(S).contains(Min);
}
static bool SumCanReachMin(ScalarEvolution &SE, const SCEV *S1, const SCEV *S2,
bool Signed) {
// S1 > INT_MIN - S2 ===> S1 + S2 > INT_MIN.
assert(SE.isKnownNonPositive(S2) &&
"We expected the 2nd arg to be non-positive!");
const SCEV *Max = SE.getConstant(
Signed ? APInt::getSignedMinValue(
cast<IntegerType>(S1->getType())->getBitWidth())
: APInt::getMinValue(
cast<IntegerType>(S1->getType())->getBitWidth()));
const SCEV *CapForS1 = SE.getMinusSCEV(Max, S2);
return !SE.isKnownPredicate(Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT,
S1, CapForS1);
}
Optional<LoopStructure>
LoopStructure::parseLoopStructure(ScalarEvolution &SE,
BranchProbabilityInfo &BPI,
@ -772,7 +804,11 @@ LoopStructure::parseLoopStructure(ScalarEvolution &SE,
return AR->getNoWrapFlags(SCEV::FlagNSW) != SCEV::FlagAnyWrap;
};
auto IsInductionVar = [&](const SCEVAddRecExpr *AR, bool &IsIncreasing) {
// Here we check whether the suggested AddRec is an induction variable that
// can be handled (i.e. with known constant step), and if yes, calculate its
// step and identify whether it is increasing or decreasing.
auto IsInductionVar = [&](const SCEVAddRecExpr *AR, bool &IsIncreasing,
ConstantInt *&StepCI) {
if (!AR->isAffine())
return false;
@ -784,12 +820,10 @@ LoopStructure::parseLoopStructure(ScalarEvolution &SE,
if (const SCEVConstant *StepExpr =
dyn_cast<SCEVConstant>(AR->getStepRecurrence(SE))) {
ConstantInt *StepCI = StepExpr->getValue();
StepCI = StepExpr->getValue();
assert(!StepCI->isZero() && "Zero step?");
if (StepCI->isOne() || StepCI->isMinusOne()) {
IsIncreasing = StepCI->isOne();
return true;
}
IsIncreasing = !StepCI->isNegative();
return true;
}
return false;
@ -801,7 +835,8 @@ LoopStructure::parseLoopStructure(ScalarEvolution &SE,
const SCEVAddRecExpr *IndVarNext = cast<SCEVAddRecExpr>(LeftSCEV);
bool IsIncreasing = false;
bool IsSignedPredicate = true;
if (!IsInductionVar(IndVarNext, IsIncreasing)) {
ConstantInt *StepCI;
if (!IsInductionVar(IndVarNext, IsIncreasing, StepCI)) {
FailureReason = "LHS in icmp not induction variable";
return None;
}
@ -809,34 +844,37 @@ LoopStructure::parseLoopStructure(ScalarEvolution &SE,
const SCEV *StartNext = IndVarNext->getStart();
const SCEV *Addend = SE.getNegativeSCEV(IndVarNext->getStepRecurrence(SE));
const SCEV *IndVarStart = SE.getAddExpr(StartNext, Addend);
const SCEV *Step = SE.getSCEV(StepCI);
ConstantInt *One = ConstantInt::get(IndVarTy, 1);
if (IsIncreasing) {
bool DecreasedRightValueByOne = false;
// Try to turn eq/ne predicates to those we can work with.
if (Pred == ICmpInst::ICMP_NE && LatchBrExitIdx == 1)
// while (++i != len) { while (++i < len) {
// ... ---> ...
// } }
// If both parts are known non-negative, it is profitable to use unsigned
// comparison in increasing loop. This allows us to make the comparison
// check against "RightSCEV + 1" more optimistic.
if (SE.isKnownNonNegative(IndVarStart) &&
SE.isKnownNonNegative(RightSCEV))
Pred = ICmpInst::ICMP_ULT;
else
Pred = ICmpInst::ICMP_SLT;
else if (Pred == ICmpInst::ICMP_EQ && LatchBrExitIdx == 0 &&
!CanBeMin(SE, RightSCEV, /* IsSignedPredicate */ true)) {
// while (true) { while (true) {
// if (++i == len) ---> if (++i > len - 1)
// break; break;
// ... ...
// } }
// TODO: Insert ICMP_UGT if both are non-negative?
Pred = ICmpInst::ICMP_SGT;
RightSCEV = SE.getMinusSCEV(RightSCEV, SE.getOne(RightSCEV->getType()));
DecreasedRightValueByOne = true;
if (StepCI->isOne()) {
// Try to turn eq/ne predicates to those we can work with.
if (Pred == ICmpInst::ICMP_NE && LatchBrExitIdx == 1)
// while (++i != len) { while (++i < len) {
// ... ---> ...
// } }
// If both parts are known non-negative, it is profitable to use
// unsigned comparison in increasing loop. This allows us to make the
// comparison check against "RightSCEV + 1" more optimistic.
if (SE.isKnownNonNegative(IndVarStart) &&
SE.isKnownNonNegative(RightSCEV))
Pred = ICmpInst::ICMP_ULT;
else
Pred = ICmpInst::ICMP_SLT;
else if (Pred == ICmpInst::ICMP_EQ && LatchBrExitIdx == 0 &&
!CanBeMin(SE, RightSCEV, /* IsSignedPredicate */ true)) {
// while (true) { while (true) {
// if (++i == len) ---> if (++i > len - 1)
// break; break;
// ... ...
// } }
// TODO: Insert ICMP_UGT if both are non-negative?
Pred = ICmpInst::ICMP_SGT;
RightSCEV = SE.getMinusSCEV(RightSCEV, SE.getOne(RightSCEV->getType()));
DecreasedRightValueByOne = true;
}
}
bool LTPred = (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_ULT);
@ -857,7 +895,9 @@ LoopStructure::parseLoopStructure(ScalarEvolution &SE,
IsSignedPredicate ? CmpInst::ICMP_SLT : CmpInst::ICMP_ULT;
if (LatchBrExitIdx == 0) {
if (CanBeMax(SE, RightSCEV, IsSignedPredicate)) {
const SCEV *StepMinusOne = SE.getMinusSCEV(Step,
SE.getOne(Step->getType()));
if (SumCanReachMax(SE, RightSCEV, StepMinusOne, IsSignedPredicate)) {
// TODO: this restriction is easily removable -- we just have to
// remember that the icmp was an slt and not an sle.
FailureReason = "limit may overflow when coercing le to lt";
@ -866,7 +906,7 @@ LoopStructure::parseLoopStructure(ScalarEvolution &SE,
if (!SE.isLoopEntryGuardedByCond(
&L, BoundPred, IndVarStart,
SE.getAddExpr(RightSCEV, SE.getOne(RightSCEV->getType())))) {
SE.getAddExpr(RightSCEV, Step))) {
FailureReason = "Induction variable start not bounded by upper limit";
return None;
}
@ -887,26 +927,28 @@ LoopStructure::parseLoopStructure(ScalarEvolution &SE,
}
} else {
bool IncreasedRightValueByOne = false;
// Try to turn eq/ne predicates to those we can work with.
if (Pred == ICmpInst::ICMP_NE && LatchBrExitIdx == 1)
// while (--i != len) { while (--i > len) {
// ... ---> ...
// } }
// We intentionally don't turn the predicate into UGT even if we know that
// both operands are non-negative, because it will only pessimize our
// check against "RightSCEV - 1".
Pred = ICmpInst::ICMP_SGT;
else if (Pred == ICmpInst::ICMP_EQ && LatchBrExitIdx == 0 &&
!CanBeMax(SE, RightSCEV, /* IsSignedPredicate */ true)) {
// while (true) { while (true) {
// if (--i == len) ---> if (--i < len + 1)
// break; break;
// ... ...
// } }
// TODO: Insert ICMP_ULT if both are non-negative?
Pred = ICmpInst::ICMP_SLT;
RightSCEV = SE.getAddExpr(RightSCEV, SE.getOne(RightSCEV->getType()));
IncreasedRightValueByOne = true;
if (StepCI->isMinusOne()) {
// Try to turn eq/ne predicates to those we can work with.
if (Pred == ICmpInst::ICMP_NE && LatchBrExitIdx == 1)
// while (--i != len) { while (--i > len) {
// ... ---> ...
// } }
// We intentionally don't turn the predicate into UGT even if we know
// that both operands are non-negative, because it will only pessimize
// our check against "RightSCEV - 1".
Pred = ICmpInst::ICMP_SGT;
else if (Pred == ICmpInst::ICMP_EQ && LatchBrExitIdx == 0 &&
!CanBeMax(SE, RightSCEV, /* IsSignedPredicate */ true)) {
// while (true) { while (true) {
// if (--i == len) ---> if (--i < len + 1)
// break; break;
// ... ...
// } }
// TODO: Insert ICMP_ULT if both are non-negative?
Pred = ICmpInst::ICMP_SLT;
RightSCEV = SE.getAddExpr(RightSCEV, SE.getOne(RightSCEV->getType()));
IncreasedRightValueByOne = true;
}
}
bool LTPred = (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_ULT);
@ -928,7 +970,8 @@ LoopStructure::parseLoopStructure(ScalarEvolution &SE,
IsSignedPredicate ? CmpInst::ICMP_SGT : CmpInst::ICMP_UGT;
if (LatchBrExitIdx == 0) {
if (CanBeMin(SE, RightSCEV, IsSignedPredicate)) {
const SCEV *StepPlusOne = SE.getAddExpr(Step, SE.getOne(Step->getType()));
if (SumCanReachMin(SE, RightSCEV, StepPlusOne, IsSignedPredicate)) {
// TODO: this restriction is easily removable -- we just have to
// remember that the icmp was an sgt and not an sge.
FailureReason = "limit may overflow when coercing ge to gt";
@ -979,6 +1022,7 @@ LoopStructure::parseLoopStructure(ScalarEvolution &SE,
Result.LatchExit = LatchExit;
Result.LatchBrExitIdx = LatchBrExitIdx;
Result.IndVarStart = IndVarStartV;
Result.IndVarStep = StepCI;
Result.IndVarNext = LeftValue;
Result.IndVarIncreasing = IsIncreasing;
Result.LoopExitAt = RightValue;
@ -1529,8 +1573,6 @@ InductiveRangeCheck::computeSafeIterationSpace(
// this inductive range check is a range check on the "C + D * I" ("C" is
// getOffset() and "D" is getScale()). We rewrite the value being range
// checked to "M + N * IndVar" where "N" = "D * B^(-1)" and "M" = "C - NA".
// Currently we support this only for "B" = "D" = { 1 or -1 }, but the code
// can be generalized as needed.
//
// The actual inequalities we solve are of the form
//
@ -1567,11 +1609,9 @@ InductiveRangeCheck::computeSafeIterationSpace(
return None;
ConstantInt *ConstD = D->getValue();
if (!(ConstD->isMinusOne() || ConstD->isOne()))
return None;
assert(!ConstD->isZero() && "Recurrence with zero step?");
const SCEV *M = SE.getMinusSCEV(C, A);
const SCEV *Begin = SE.getNegativeSCEV(M);
const SCEV *UpperLimit = nullptr;
@ -1659,11 +1699,8 @@ bool InductiveRangeCheckElimination::runOnLoop(Loop *L, LPPassManager &LPM) {
return false;
}
LoopStructure LS = MaybeLoopStructure.getValue();
bool Increasing = LS.IndVarIncreasing;
const SCEV *MinusOne =
SE.getConstant(LS.IndVarNext->getType(), Increasing ? -1 : 1, true);
const SCEVAddRecExpr *IndVar =
cast<SCEVAddRecExpr>(SE.getAddExpr(SE.getSCEV(LS.IndVarNext), MinusOne));
cast<SCEVAddRecExpr>(SE.getMinusSCEV(SE.getSCEV(LS.IndVarNext), SE.getSCEV(LS.IndVarStep)));
Optional<InductiveRangeCheck::Range> SafeIterRange;
Instruction *ExprInsertPt = Preheader->getTerminator();

View File

@ -0,0 +1,468 @@
; RUN: opt -verify-loop-info -irce-print-changed-loops -irce -S < %s 2>&1 | FileCheck %s
; CHECK: irce: in function test_01: constrained Loop at depth 1 containing: %loop<header><exiting>,%in.bounds<latch><exiting>
; CHECK: irce: in function test_02: constrained Loop at depth 1 containing: %loop<header><exiting>,%in.bounds<latch><exiting>
; CHECK: irce: in function test_03: constrained Loop at depth 1 containing: %loop<header><exiting>,%in.bounds<latch><exiting>
; CHECK-NOT: irce: in function test_04: constrained Loop at depth 1 containing: %loop<header><exiting>,%in.bounds<latch><exiting>
; CHECK: irce: in function test_05: constrained Loop at depth 1 containing: %loop<header><exiting>,%in.bounds<latch><exiting>
; CHECK: irce: in function test_06: constrained Loop at depth 1 containing: %loop<header><exiting>,%in.bounds<latch><exiting>
; CHECK-NOT: irce: in function test_07: constrained Loop at depth 1 containing: %loop<header><exiting>,%in.bounds<latch><exiting>
; CHECK: irce: in function test_08: constrained Loop at depth 1 containing: %loop<header><exiting>,%in.bounds<latch><exiting>
; IV = 0; IV <s 100; IV += 7; 0 <= Len <= 50. IRCE is allowed.
define void @test_01(i32* %arr, i32* %a_len_ptr) {
; CHECK: @test_01(
; CHECK: entry:
; CHECK-NEXT: %exit.mainloop.at = load i32, i32* %a_len_ptr
; CHECK-NEXT: [[COND1:%[^ ]+]] = icmp slt i32 0, %exit.mainloop.at
; CHECK-NEXT: br i1 [[COND1]], label %loop.preheader, label %main.pseudo.exit
; CHECK: loop.preheader:
; CHECK-NEXT: br label %loop
; CHECK: loop:
; CHECK-NEXT: %idx = phi i32 [ %idx.next, %in.bounds ], [ 0, %loop.preheader ]
; CHECK-NEXT: %idx.next = add i32 %idx, 7
; CHECK-NEXT: %abc = icmp slt i32 %idx, %exit.mainloop.at
; CHECK-NEXT: br i1 true, label %in.bounds, label %out.of.bounds.loopexit1
; CHECK: in.bounds:
; CHECK-NEXT: %addr = getelementptr i32, i32* %arr, i32 %idx
; CHECK-NEXT: store i32 0, i32* %addr
; CHECK-NEXT: %next = icmp slt i32 %idx.next, 100
; CHECK-NEXT: [[COND2:%[^ ]+]] = icmp slt i32 %idx.next, %exit.mainloop.at
; CHECK-NEXT: br i1 [[COND2]], label %loop, label %main.exit.selector
; CHECK: main.exit.selector:
; CHECK-NEXT: %idx.next.lcssa = phi i32 [ %idx.next, %in.bounds ]
; CHECK-NEXT: [[COND3:%[^ ]+]] = icmp slt i32 %idx.next.lcssa, 100
; CHECK-NEXT: br i1 [[COND3]], label %main.pseudo.exit, label %exit
; CHECK: main.pseudo.exit:
; CHECK-NEXT: %idx.copy = phi i32 [ 0, %entry ], [ %idx.next.lcssa, %main.exit.selector ]
; CHECK-NEXT: %indvar.end = phi i32 [ 0, %entry ], [ %idx.next.lcssa, %main.exit.selector ]
; CHECK-NEXT: br label %postloop
; CHECK: postloop:
; CHECK-NEXT: br label %loop.postloop
; CHECK: loop.postloop:
; CHECK-NEXT: %idx.postloop = phi i32 [ %idx.copy, %postloop ], [ %idx.next.postloop, %in.bounds.postloop ]
; CHECK-NEXT: %idx.next.postloop = add i32 %idx.postloop, 7
; CHECK-NEXT: %abc.postloop = icmp slt i32 %idx.postloop, %exit.mainloop.at
; CHECK-NEXT: br i1 %abc.postloop, label %in.bounds.postloop, label %out.of.bounds.loopexit
; CHECK: in.bounds.postloop:
; CHECK-NEXT: %addr.postloop = getelementptr i32, i32* %arr, i32 %idx.postloop
; CHECK-NEXT: store i32 0, i32* %addr.postloop
; CHECK-NEXT: %next.postloop = icmp slt i32 %idx.next.postloop, 100
; CHECK-NEXT: br i1 %next.postloop, label %loop.postloop, label %exit.loopexit
entry:
%len = load i32, i32* %a_len_ptr, !range !0
br label %loop
loop:
%idx = phi i32 [ 0, %entry ], [ %idx.next, %in.bounds ]
%idx.next = add i32 %idx, 7
%abc = icmp slt i32 %idx, %len
br i1 %abc, label %in.bounds, label %out.of.bounds
in.bounds:
%addr = getelementptr i32, i32* %arr, i32 %idx
store i32 0, i32* %addr
%next = icmp slt i32 %idx.next, 100
br i1 %next, label %loop, label %exit
out.of.bounds:
ret void
exit:
ret void
}
; IV = 0; IV <s MAX_INT - 7; IV += 7; 0 <= Len <= 50. IRCE is allowed.
define void @test_02(i32* %arr, i32* %a_len_ptr) {
; CHECK: @test_02(
; CHECK: entry:
; CHECK-NEXT: %exit.mainloop.at = load i32, i32* %a_len_ptr
; CHECK-NEXT: [[COND1:%[^ ]+]] = icmp slt i32 0, %exit.mainloop.at
; CHECK-NEXT: br i1 [[COND1]], label %loop.preheader, label %main.pseudo.exit
; CHECK: loop.preheader:
; CHECK-NEXT: br label %loop
; CHECK: loop:
; CHECK-NEXT: %idx = phi i32 [ %idx.next, %in.bounds ], [ 0, %loop.preheader ]
; CHECK-NEXT: %idx.next = add i32 %idx, 7
; CHECK-NEXT: %abc = icmp slt i32 %idx, %exit.mainloop.at
; CHECK-NEXT: br i1 true, label %in.bounds, label %out.of.bounds.loopexit1
; CHECK: in.bounds:
; CHECK-NEXT: %addr = getelementptr i32, i32* %arr, i32 %idx
; CHECK-NEXT: store i32 0, i32* %addr
; CHECK-NEXT: %next = icmp slt i32 %idx.next, 2147483640
; CHECK-NEXT: [[COND2:%[^ ]+]] = icmp slt i32 %idx.next, %exit.mainloop.at
; CHECK-NEXT: br i1 [[COND2]], label %loop, label %main.exit.selector
; CHECK: main.exit.selector:
; CHECK-NEXT: %idx.next.lcssa = phi i32 [ %idx.next, %in.bounds ]
; CHECK-NEXT: [[COND3:%[^ ]+]] = icmp slt i32 %idx.next.lcssa, 2147483640
; CHECK-NEXT: br i1 [[COND3]], label %main.pseudo.exit, label %exit
; CHECK: main.pseudo.exit:
; CHECK-NEXT: %idx.copy = phi i32 [ 0, %entry ], [ %idx.next.lcssa, %main.exit.selector ]
; CHECK-NEXT: %indvar.end = phi i32 [ 0, %entry ], [ %idx.next.lcssa, %main.exit.selector ]
; CHECK-NEXT: br label %postloop
; CHECK: postloop:
; CHECK-NEXT: br label %loop.postloop
; CHECK: loop.postloop:
; CHECK-NEXT: %idx.postloop = phi i32 [ %idx.copy, %postloop ], [ %idx.next.postloop, %in.bounds.postloop ]
; CHECK-NEXT: %idx.next.postloop = add i32 %idx.postloop, 7
; CHECK-NEXT: %abc.postloop = icmp slt i32 %idx.postloop, %exit.mainloop.at
; CHECK-NEXT: br i1 %abc.postloop, label %in.bounds.postloop, label %out.of.bounds.loopexit
; CHECK: in.bounds.postloop:
; CHECK-NEXT: %addr.postloop = getelementptr i32, i32* %arr, i32 %idx.postloop
; CHECK-NEXT: store i32 0, i32* %addr.postloop
; CHECK-NEXT: %next.postloop = icmp slt i32 %idx.next.postloop, 2147483640
; CHECK-NEXT: br i1 %next.postloop, label %loop.postloop, label %exit.loopexit
entry:
%len = load i32, i32* %a_len_ptr, !range !0
br label %loop
loop:
%idx = phi i32 [ 0, %entry ], [ %idx.next, %in.bounds ]
%idx.next = add i32 %idx, 7
%abc = icmp slt i32 %idx, %len
br i1 %abc, label %in.bounds, label %out.of.bounds
in.bounds:
%addr = getelementptr i32, i32* %arr, i32 %idx
store i32 0, i32* %addr
%next = icmp slt i32 %idx.next, 2147483640
br i1 %next, label %loop, label %exit
out.of.bounds:
ret void
exit:
ret void
}
; IV = 0; IV <s MAX_INT; IV += 7; 0 <= Len <= MAX_INT - 7. This is the greatest
; value of Len for which IRCE is allowed.
define void @test_03(i32* %arr, i32* %a_len_ptr) {
; CHECK: @test_03(
; CHECK: entry:
; CHECK-NEXT: %exit.mainloop.at = load i32, i32* %a_len_ptr
; CHECK-NEXT: [[COND1:%[^ ]+]] = icmp slt i32 0, %exit.mainloop.at
; CHECK-NEXT: br i1 [[COND1]], label %loop.preheader, label %main.pseudo.exit
; CHECK: loop.preheader:
; CHECK-NEXT: br label %loop
; CHECK: loop:
; CHECK-NEXT: %idx = phi i32 [ %idx.next, %in.bounds ], [ 0, %loop.preheader ]
; CHECK-NEXT: %idx.next = add i32 %idx, 7
; CHECK-NEXT: %abc = icmp slt i32 %idx, %exit.mainloop.at
; CHECK-NEXT: br i1 true, label %in.bounds, label %out.of.bounds.loopexit1
; CHECK: in.bounds:
; CHECK-NEXT: %addr = getelementptr i32, i32* %arr, i32 %idx
; CHECK-NEXT: store i32 0, i32* %addr
; CHECK-NEXT: %next = icmp slt i32 %idx.next, 2147483647
; CHECK-NEXT: [[COND2:%[^ ]+]] = icmp slt i32 %idx.next, %exit.mainloop.at
; CHECK-NEXT: br i1 [[COND2]], label %loop, label %main.exit.selector
; CHECK: main.exit.selector:
; CHECK-NEXT: %idx.next.lcssa = phi i32 [ %idx.next, %in.bounds ]
; CHECK-NEXT: [[COND3:%[^ ]+]] = icmp slt i32 %idx.next.lcssa, 2147483647
; CHECK-NEXT: br i1 [[COND3]], label %main.pseudo.exit, label %exit
; CHECK: main.pseudo.exit:
; CHECK-NEXT: %idx.copy = phi i32 [ 0, %entry ], [ %idx.next.lcssa, %main.exit.selector ]
; CHECK-NEXT: %indvar.end = phi i32 [ 0, %entry ], [ %idx.next.lcssa, %main.exit.selector ]
; CHECK-NEXT: br label %postloop
; CHECK: postloop:
; CHECK-NEXT: br label %loop.postloop
; CHECK: loop.postloop:
; CHECK-NEXT: %idx.postloop = phi i32 [ %idx.copy, %postloop ], [ %idx.next.postloop, %in.bounds.postloop ]
; CHECK-NEXT: %idx.next.postloop = add i32 %idx.postloop, 7
; CHECK-NEXT: %abc.postloop = icmp slt i32 %idx.postloop, %exit.mainloop.at
; CHECK-NEXT: br i1 %abc.postloop, label %in.bounds.postloop, label %out.of.bounds.loopexit
; CHECK: in.bounds.postloop:
; CHECK-NEXT: %addr.postloop = getelementptr i32, i32* %arr, i32 %idx.postloop
; CHECK-NEXT: store i32 0, i32* %addr.postloop
; CHECK-NEXT: %next.postloop = icmp slt i32 %idx.next.postloop, 2147483647
; CHECK-NEXT: br i1 %next.postloop, label %loop.postloop, label %exit.loopexit
entry:
%len = load i32, i32* %a_len_ptr, !range !1
br label %loop
loop:
%idx = phi i32 [ 0, %entry ], [ %idx.next, %in.bounds ]
%idx.next = add i32 %idx, 7
%abc = icmp slt i32 %idx, %len
br i1 %abc, label %in.bounds, label %out.of.bounds
in.bounds:
%addr = getelementptr i32, i32* %arr, i32 %idx
store i32 0, i32* %addr
%next = icmp slt i32 %idx.next, 2147483647
br i1 %next, label %loop, label %exit
out.of.bounds:
ret void
exit:
ret void
}
; IV = 0; IV <s MAX_INT; IV += 7; 0 <= Len <= MAX_INT - 6. IRCE is not allowed,
; because we cannot guarantee that IV + 7 will not exceed MAX_INT.
; Negative test.
define void @test_04(i32* %arr, i32* %a_len_ptr) {
; CHECK: @test_04(
entry:
%len = load i32, i32* %a_len_ptr, !range !2
br label %loop
loop:
%idx = phi i32 [ 0, %entry ], [ %idx.next, %in.bounds ]
%idx.next = add i32 %idx, 7
%abc = icmp slt i32 %idx, %len
br i1 %abc, label %in.bounds, label %out.of.bounds
in.bounds:
%addr = getelementptr i32, i32* %arr, i32 %idx
store i32 0, i32* %addr
%next = icmp slt i32 %idx.next, 2147483647
br i1 %next, label %loop, label %exit
out.of.bounds:
ret void
exit:
ret void
}
; IV = 100; IV >s -1; IV -= 7; 0 <= Len <= 50. IRCE is allowed.
define void @test_05(i32* %arr, i32* %a_len_ptr) {
; CHECK: @test_05(
; CHECK: entry:
; CHECK-NEXT: %len = load i32, i32* %a_len_ptr
; CHECK-NEXT: %exit.preloop.at = add i32 %len, -1
; CHECK-NEXT: [[COND1:%[^ ]+]] = icmp sgt i32 100, %exit.preloop.at
; CHECK-NEXT: br i1 [[COND1]], label %loop.preloop.preheader, label %preloop.pseudo.exit
; CHECK: loop.preloop.preheader:
; CHECK-NEXT: br label %loop.preloop
; CHECK: mainloop:
; CHECK-NEXT: br label %loop
; CHECK: loop:
; CHECK-NEXT: %idx = phi i32 [ %idx.preloop.copy, %mainloop ], [ %idx.next, %in.bounds ]
; CHECK-NEXT: %idx.next = add i32 %idx, -7
; CHECK-NEXT: %abc = icmp slt i32 %idx, %len
; CHECK-NEXT: br i1 true, label %in.bounds, label %out.of.bounds.loopexit1
; CHECK: in.bounds:
; CHECK-NEXT: %addr = getelementptr i32, i32* %arr, i32 %idx
; CHECK-NEXT: store i32 0, i32* %addr
; CHECK-NEXT: %next = icmp sgt i32 %idx.next, -1
; CHECK-NEXT: br i1 %next, label %loop, label %exit.loopexit
; CHECK: loop.preloop:
; CHECK-NEXT: %idx.preloop = phi i32 [ %idx.next.preloop, %in.bounds.preloop ], [ 100, %loop.preloop.preheader ]
; CHECK-NEXT: %idx.next.preloop = add i32 %idx.preloop, -7
; CHECK-NEXT: %abc.preloop = icmp slt i32 %idx.preloop, %len
; CHECK-NEXT: br i1 %abc.preloop, label %in.bounds.preloop, label %out.of.bounds.loopexit
; CHECK: in.bounds.preloop:
; CHECK-NEXT: %addr.preloop = getelementptr i32, i32* %arr, i32 %idx.preloop
; CHECK-NEXT: store i32 0, i32* %addr.preloop
; CHECK-NEXT: %next.preloop = icmp sgt i32 %idx.next.preloop, -1
; CHECK-NEXT: [[COND2:%[^ ]+]] = icmp sgt i32 %idx.next.preloop, %exit.preloop.at
; CHECK-NEXT: br i1 [[COND2]], label %loop.preloop, label %preloop.exit.selector
; CHECK: preloop.exit.selector:
; CHECK-NEXT: %idx.next.preloop.lcssa = phi i32 [ %idx.next.preloop, %in.bounds.preloop ]
; CHECK-NEXT: [[COND3:%[^ ]+]] = icmp sgt i32 %idx.next.preloop.lcssa, -1
; CHECK-NEXT: br i1 [[COND3]], label %preloop.pseudo.exit, label %exit
; CHECK: preloop.pseudo.exit:
; CHECK-NEXT: %idx.preloop.copy = phi i32 [ 100, %entry ], [ %idx.next.preloop.lcssa, %preloop.exit.selector ]
; CHECK-NEXT: %indvar.end = phi i32 [ 100, %entry ], [ %idx.next.preloop.lcssa, %preloop.exit.selector ]
; CHECK-NEXT: br label %mainloop
entry:
%len = load i32, i32* %a_len_ptr, !range !0
br label %loop
loop:
%idx = phi i32 [ 100, %entry ], [ %idx.next, %in.bounds ]
%idx.next = add i32 %idx, -7
%abc = icmp slt i32 %idx, %len
br i1 %abc, label %in.bounds, label %out.of.bounds
in.bounds:
%addr = getelementptr i32, i32* %arr, i32 %idx
store i32 0, i32* %addr
%next = icmp sgt i32 %idx.next, -1
br i1 %next, label %loop, label %exit
out.of.bounds:
ret void
exit:
ret void
}
; IV = MAX_INT - 7; IV >u 6; IV -= 7; 10 <= Len <= 50. IRCE is allowed.
define void @test_06(i32* %arr, i32* %a_len_ptr) {
; CHECK: @test_06(
; CHECK: entry:
; CHECK-NEXT: %len = load i32, i32* %a_len_ptr
; CHECK-NEXT: %exit.preloop.at = add i32 %len, -1
; CHECK-NEXT: [[COND1:%[^ ]+]] = icmp ugt i32 2147483640, %exit.preloop.at
; CHECK-NEXT: br i1 [[COND1]], label %loop.preloop.preheader, label %preloop.pseudo.exit
; CHECK: loop.preloop.preheader:
; CHECK-NEXT: br label %loop.preloop
; CHECK: mainloop:
; CHECK-NEXT: br label %loop
; CHECK: loop:
; CHECK-NEXT: %idx = phi i32 [ %idx.preloop.copy, %mainloop ], [ %idx.next, %in.bounds ]
; CHECK-NEXT: %idx.next = add i32 %idx, -7
; CHECK-NEXT: %abc = icmp slt i32 %idx, %len
; CHECK-NEXT: br i1 true, label %in.bounds, label %out.of.bounds.loopexit1
; CHECK: in.bounds:
; CHECK-NEXT: %addr = getelementptr i32, i32* %arr, i32 %idx
; CHECK-NEXT: store i32 0, i32* %addr
; CHECK-NEXT: %next = icmp ugt i32 %idx.next, 6
; CHECK-NEXT: br i1 %next, label %loop, label %exit.loopexit
; CHECK: loop.preloop:
; CHECK-NEXT: %idx.preloop = phi i32 [ %idx.next.preloop, %in.bounds.preloop ], [ 2147483640, %loop.preloop.preheader ]
; CHECK-NEXT: %idx.next.preloop = add i32 %idx.preloop, -7
; CHECK-NEXT: %abc.preloop = icmp slt i32 %idx.preloop, %len
; CHECK-NEXT: br i1 %abc.preloop, label %in.bounds.preloop, label %out.of.bounds.loopexit
; CHECK: in.bounds.preloop:
; CHECK-NEXT: %addr.preloop = getelementptr i32, i32* %arr, i32 %idx.preloop
; CHECK-NEXT: store i32 0, i32* %addr.preloop
; CHECK-NEXT: %next.preloop = icmp ugt i32 %idx.next.preloop, 6
; CHECK-NEXT: [[COND2:%[^ ]+]] = icmp ugt i32 %idx.next.preloop, %exit.preloop.at
; CHECK-NEXT: br i1 [[COND2]], label %loop.preloop, label %preloop.exit.selector
; CHECK: preloop.exit.selector:
; CHECK-NEXT: %idx.next.preloop.lcssa = phi i32 [ %idx.next.preloop, %in.bounds.preloop ]
; CHECK-NEXT: [[COND3:%[^ ]+]] = icmp ugt i32 %idx.next.preloop.lcssa, 6
; CHECK-NEXT: br i1 [[COND3]], label %preloop.pseudo.exit, label %exit
; CHECK: preloop.pseudo.exit:
; CHECK-NEXT: %idx.preloop.copy = phi i32 [ 2147483640, %entry ], [ %idx.next.preloop.lcssa, %preloop.exit.selector ]
; CHECK-NEXT: %indvar.end = phi i32 [ 2147483640, %entry ], [ %idx.next.preloop.lcssa, %preloop.exit.selector ]
; CHECK-NEXT: br label %mainloop
entry:
%len = load i32, i32* %a_len_ptr, !range !3
br label %loop
loop:
%idx = phi i32 [ 2147483640, %entry ], [ %idx.next, %in.bounds ]
%idx.next = add i32 %idx, -7
%abc = icmp slt i32 %idx, %len
br i1 %abc, label %in.bounds, label %out.of.bounds
in.bounds:
%addr = getelementptr i32, i32* %arr, i32 %idx
store i32 0, i32* %addr
%next = icmp ugt i32 %idx.next, 6
br i1 %next, label %loop, label %exit
out.of.bounds:
ret void
exit:
ret void
}
; IV = MAX_INT - 7; IV >u 5; IV -= 7; 10 <= Len <= 50. IRCE is not allowed,
; because we can cross the 0 border.
define void @test_07(i32* %arr, i32* %a_len_ptr) {
; CHECK: @test_07(
entry:
%len = load i32, i32* %a_len_ptr, !range !3
br label %loop
loop:
%idx = phi i32 [ 2147483640, %entry ], [ %idx.next, %in.bounds ]
%idx.next = add i32 %idx, -7
%abc = icmp slt i32 %idx, %len
br i1 %abc, label %in.bounds, label %out.of.bounds
in.bounds:
%addr = getelementptr i32, i32* %arr, i32 %idx
store i32 0, i32* %addr
%next = icmp ugt i32 %idx.next, 5
br i1 %next, label %loop, label %exit
out.of.bounds:
ret void
exit:
ret void
}
; IV = MAX_INT; IV >u 6; IV -= 7; 10 <= Len <= 50. IRCE is allowed.
define void @test_08(i32* %arr, i32* %a_len_ptr) {
; CHECK: @test_08(
; CHECK: entry:
; CHECK-NEXT: %len = load i32, i32* %a_len_ptr
; CHECK-NEXT: %exit.preloop.at = add i32 %len, -1
; CHECK-NEXT: [[COND1:%[^ ]+]] = icmp ugt i32 2147483647, %exit.preloop.at
; CHECK-NEXT: br i1 [[COND1]], label %loop.preloop.preheader, label %preloop.pseudo.exit
; CHECK: loop.preloop.preheader:
; CHECK-NEXT: br label %loop.preloop
; CHECK: mainloop:
; CHECK-NEXT: br label %loop
; CHECK: loop:
; CHECK-NEXT: %idx = phi i32 [ %idx.preloop.copy, %mainloop ], [ %idx.next, %in.bounds ]
; CHECK-NEXT: %idx.next = add i32 %idx, -7
; CHECK-NEXT: %abc = icmp slt i32 %idx, %len
; CHECK-NEXT: br i1 true, label %in.bounds, label %out.of.bounds.loopexit1
; CHECK: in.bounds:
; CHECK-NEXT: %addr = getelementptr i32, i32* %arr, i32 %idx
; CHECK-NEXT: store i32 0, i32* %addr
; CHECK-NEXT: %next = icmp ugt i32 %idx.next, 6
; CHECK-NEXT: br i1 %next, label %loop, label %exit.loopexit
; CHECK: loop.preloop:
; CHECK-NEXT: %idx.preloop = phi i32 [ %idx.next.preloop, %in.bounds.preloop ], [ 2147483647, %loop.preloop.preheader ]
; CHECK-NEXT: %idx.next.preloop = add i32 %idx.preloop, -7
; CHECK-NEXT: %abc.preloop = icmp slt i32 %idx.preloop, %len
; CHECK-NEXT: br i1 %abc.preloop, label %in.bounds.preloop, label %out.of.bounds.loopexit
; CHECK: in.bounds.preloop:
; CHECK-NEXT: %addr.preloop = getelementptr i32, i32* %arr, i32 %idx.preloop
; CHECK-NEXT: store i32 0, i32* %addr.preloop
; CHECK-NEXT: %next.preloop = icmp ugt i32 %idx.next.preloop, 6
; CHECK-NEXT: [[COND2:%[^ ]+]] = icmp ugt i32 %idx.next.preloop, %exit.preloop.at
; CHECK-NEXT: br i1 [[COND2]], label %loop.preloop, label %preloop.exit.selector
; CHECK: preloop.exit.selector:
; CHECK-NEXT: %idx.next.preloop.lcssa = phi i32 [ %idx.next.preloop, %in.bounds.preloop ]
; CHECK-NEXT: [[COND3:%[^ ]+]] = icmp ugt i32 %idx.next.preloop.lcssa, 6
; CHECK-NEXT: br i1 [[COND3]], label %preloop.pseudo.exit, label %exit
; CHECK: preloop.pseudo.exit:
; CHECK-NEXT: %idx.preloop.copy = phi i32 [ 2147483647, %entry ], [ %idx.next.preloop.lcssa, %preloop.exit.selector ]
; CHECK-NEXT: %indvar.end = phi i32 [ 2147483647, %entry ], [ %idx.next.preloop.lcssa, %preloop.exit.selector ]
; CHECK-NEXT: br label %mainloop
entry:
%len = load i32, i32* %a_len_ptr, !range !3
br label %loop
loop:
%idx = phi i32 [ 2147483647, %entry ], [ %idx.next, %in.bounds ]
%idx.next = add i32 %idx, -7
%abc = icmp slt i32 %idx, %len
br i1 %abc, label %in.bounds, label %out.of.bounds
in.bounds:
%addr = getelementptr i32, i32* %arr, i32 %idx
store i32 0, i32* %addr
%next = icmp ugt i32 %idx.next, 6
br i1 %next, label %loop, label %exit
out.of.bounds:
ret void
exit:
ret void
}
!0 = !{i32 0, i32 50}
!1 = !{i32 0, i32 2147483640}
!2 = !{i32 0, i32 2147483641}
!3 = !{i32 10, i32 50}