[IRCE] Identify loops with latch comparison against current IV value

Current implementation of parseLoopStructure interprets the latch comparison as a
comarison against `iv.next`. If the actual comparison is made against the `iv` current value
then the loop may be rejected, because this misinterpretation leads to incorrect evaluation
of the latch start value.

This patch teaches the IRCE to distinguish this kind of loops and perform the optimization
for them. Now we use `IndVarBase` variable which can be either next or current value of the
induction variable (previously we used `IndVarNext` which was always the value on next iteration).

Differential Revision: https://reviews.llvm.org/D36215

llvm-svn: 312221
This commit is contained in:
Max Kazantsev 2017-08-31 07:04:20 +00:00
parent e14f6c502a
commit a43a398580
2 changed files with 231 additions and 11 deletions

View File

@ -450,10 +450,20 @@ struct LoopStructure {
// equivalent to:
//
// intN_ty inc = IndVarIncreasing ? 1 : -1;
// pred_ty predicate = IndVarIncreasing ? ICMP_SLT : ICMP_SGT;
// pred_ty predicate = IndVarIncreasing
// ? IsSignedPredicate ? ICMP_SLT : ICMP_ULT
// : IsSignedPredicate ? ICMP_SGT : ICMP_UGT;
//
// for (intN_ty iv = IndVarStart; predicate(iv, LoopExitAt); iv = IndVarBase)
//
// for (intN_ty iv = IndVarStart; predicate(IndVarBase, LoopExitAt);
// iv = IndVarNext)
// ... body ...
//
// Here IndVarBase is either current or next value of the induction variable.
// in the former case, IsIndVarNext = false and IndVarBase points to the
// Phi node of the induction variable. Otherwise, IsIndVarNext = true and
// IndVarBase points to IV increment instruction.
//
Value *IndVarBase;
Value *IndVarStart;
@ -461,12 +471,13 @@ struct LoopStructure {
Value *LoopExitAt;
bool IndVarIncreasing;
bool IsSignedPredicate;
bool IsIndVarNext;
LoopStructure()
: Tag(""), Header(nullptr), Latch(nullptr), LatchBr(nullptr),
LatchExit(nullptr), LatchBrExitIdx(-1), IndVarBase(nullptr),
IndVarStart(nullptr), IndVarStep(nullptr), LoopExitAt(nullptr),
IndVarIncreasing(false), IsSignedPredicate(true) {}
IndVarIncreasing(false), IsSignedPredicate(true), IsIndVarNext(false) {}
template <typename M> LoopStructure map(M Map) const {
LoopStructure Result;
@ -482,6 +493,7 @@ struct LoopStructure {
Result.LoopExitAt = Map(LoopExitAt);
Result.IndVarIncreasing = IndVarIncreasing;
Result.IsSignedPredicate = IsSignedPredicate;
Result.IsIndVarNext = IsIndVarNext;
return Result;
}
@ -829,21 +841,42 @@ LoopStructure::parseLoopStructure(ScalarEvolution &SE,
return false;
};
// `ICI` is interpreted as taking the backedge if the *next* value of the
// induction variable satisfies some constraint.
// `ICI` can either be a comparison against IV or a comparison of IV.next.
// Depending on the interpretation, we calculate the start value differently.
// Pair {IndVarBase; IsIndVarNext} semantically designates whether the latch
// comparisons happens against the IV before or after its value is
// incremented. Two valid combinations for them are:
//
// 1) { phi [ iv.start, preheader ], [ iv.next, latch ]; false },
// 2) { iv.next; true }.
//
// The latch comparison happens against IndVarBase which can be either current
// or next value of the induction variable.
const SCEVAddRecExpr *IndVarBase = cast<SCEVAddRecExpr>(LeftSCEV);
bool IsIncreasing = false;
bool IsSignedPredicate = true;
bool IsIndVarNext = false;
ConstantInt *StepCI;
if (!IsInductionVar(IndVarBase, IsIncreasing, StepCI)) {
FailureReason = "LHS in icmp not induction variable";
return None;
}
const SCEV *StartNext = IndVarBase->getStart();
const SCEV *Addend = SE.getNegativeSCEV(IndVarBase->getStepRecurrence(SE));
const SCEV *IndVarStart = SE.getAddExpr(StartNext, Addend);
const SCEV *IndVarStart = nullptr;
// TODO: Currently we only handle comparison against IV, but we can extend
// this analysis to be able to deal with comparison against sext(iv) and such.
if (isa<PHINode>(LeftValue) &&
cast<PHINode>(LeftValue)->getParent() == Header)
// The comparison is made against current IV value.
IndVarStart = IndVarBase->getStart();
else {
// Assume that the comparison is made against next IV value.
const SCEV *StartNext = IndVarBase->getStart();
const SCEV *Addend = SE.getNegativeSCEV(IndVarBase->getStepRecurrence(SE));
IndVarStart = SE.getAddExpr(StartNext, Addend);
IsIndVarNext = true;
}
const SCEV *Step = SE.getSCEV(StepCI);
ConstantInt *One = ConstantInt::get(IndVarTy, 1);
@ -1027,6 +1060,7 @@ LoopStructure::parseLoopStructure(ScalarEvolution &SE,
Result.IndVarIncreasing = IsIncreasing;
Result.LoopExitAt = RightValue;
Result.IsSignedPredicate = IsSignedPredicate;
Result.IsIndVarNext = IsIndVarNext;
FailureReason = nullptr;
@ -1316,8 +1350,9 @@ LoopConstrainer::RewrittenRangeInfo LoopConstrainer::changeIterationSpaceEnd(
BranchToContinuation);
NewPHI->addIncoming(PN->getIncomingValueForBlock(Preheader), Preheader);
NewPHI->addIncoming(PN->getIncomingValueForBlock(LS.Latch),
RRI.ExitSelector);
auto *FixupValue =
LS.IsIndVarNext ? PN->getIncomingValueForBlock(LS.Latch) : PN;
NewPHI->addIncoming(FixupValue, RRI.ExitSelector);
RRI.PHIValuesAtPseudoExit.push_back(NewPHI);
}
@ -1700,7 +1735,10 @@ bool InductiveRangeCheckElimination::runOnLoop(Loop *L, LPPassManager &LPM) {
}
LoopStructure LS = MaybeLoopStructure.getValue();
const SCEVAddRecExpr *IndVar =
cast<SCEVAddRecExpr>(SE.getMinusSCEV(SE.getSCEV(LS.IndVarBase), SE.getSCEV(LS.IndVarStep)));
cast<SCEVAddRecExpr>(SE.getSCEV(LS.IndVarBase));
if (LS.IsIndVarNext)
IndVar = cast<SCEVAddRecExpr>(SE.getMinusSCEV(IndVar,
SE.getSCEV(LS.IndVarStep)));
Optional<InductiveRangeCheck::Range> SafeIterRange;
Instruction *ExprInsertPt = Preheader->getTerminator();

View File

@ -0,0 +1,182 @@
; RUN: opt -verify-loop-info -irce-print-changed-loops -irce -S < %s 2>&1 | FileCheck %s
; Check that IRCE is able to deal with loops where the latch comparison is
; done against current value of the IV, not the IV.next.
; CHECK: irce: in function test_01: constrained Loop at depth 1 containing: %loop<header><exiting>,%in.bounds<latch><exiting>
; CHECK: irce: in function test_02: constrained Loop at depth 1 containing: %loop<header><exiting>,%in.bounds<latch><exiting>
; CHECK-NOT: irce: in function test_03: constrained Loop at depth 1 containing: %loop<header><exiting>,%in.bounds<latch><exiting>
; CHECK-NOT: irce: in function test_04: constrained Loop at depth 1 containing: %loop<header><exiting>,%in.bounds<latch><exiting>
; SLT condition for increasing loop from 0 to 100.
define void @test_01(i32* %arr, i32* %a_len_ptr) #0 {
; CHECK: test_01
; CHECK: entry:
; CHECK-NEXT: %exit.mainloop.at = load i32, i32* %a_len_ptr, !range !0
; CHECK-NEXT: [[COND2:%[^ ]+]] = icmp slt i32 0, %exit.mainloop.at
; CHECK-NEXT: br i1 [[COND2]], label %loop.preheader, label %main.pseudo.exit
; CHECK: loop:
; CHECK-NEXT: %idx = phi i32 [ %idx.next, %in.bounds ], [ 0, %loop.preheader ]
; CHECK-NEXT: %idx.next = add nuw nsw i32 %idx, 1
; CHECK-NEXT: %abc = icmp slt i32 %idx, %exit.mainloop.at
; CHECK-NEXT: br i1 true, label %in.bounds, label %out.of.bounds.loopexit1
; CHECK: in.bounds:
; CHECK-NEXT: %addr = getelementptr i32, i32* %arr, i32 %idx
; CHECK-NEXT: store i32 0, i32* %addr
; CHECK-NEXT: %next = icmp slt i32 %idx, 100
; CHECK-NEXT: [[COND3:%[^ ]+]] = icmp slt i32 %idx, %exit.mainloop.at
; CHECK-NEXT: br i1 [[COND3]], label %loop, label %main.exit.selector
; CHECK: main.exit.selector:
; CHECK-NEXT: %idx.lcssa = phi i32 [ %idx, %in.bounds ]
; CHECK-NEXT: [[COND4:%[^ ]+]] = icmp slt i32 %idx.lcssa, 100
; CHECK-NEXT: br i1 [[COND4]], label %main.pseudo.exit, label %exit
; CHECK-NOT: loop.preloop:
; CHECK: loop.postloop:
; CHECK-NEXT: %idx.postloop = phi i32 [ %idx.copy, %postloop ], [ %idx.next.postloop, %in.bounds.postloop ]
; CHECK-NEXT: %idx.next.postloop = add nuw nsw i32 %idx.postloop, 1
; CHECK-NEXT: %abc.postloop = icmp slt i32 %idx.postloop, %exit.mainloop.at
; CHECK-NEXT: br i1 %abc.postloop, label %in.bounds.postloop, label %out.of.bounds.loopexit
entry:
%len = load i32, i32* %a_len_ptr, !range !0
br label %loop
loop:
%idx = phi i32 [ 0, %entry ], [ %idx.next, %in.bounds ]
%idx.next = add nsw nuw i32 %idx, 1
%abc = icmp slt i32 %idx, %len
br i1 %abc, label %in.bounds, label %out.of.bounds
in.bounds:
%addr = getelementptr i32, i32* %arr, i32 %idx
store i32 0, i32* %addr
%next = icmp slt i32 %idx, 100
br i1 %next, label %loop, label %exit
out.of.bounds:
ret void
exit:
ret void
}
; ULT condition for increasing loop from 0 to 100.
define void @test_02(i32* %arr, i32* %a_len_ptr) #0 {
; CHECK: test_02
; CHECK: entry:
; CHECK-NEXT: %exit.mainloop.at = load i32, i32* %a_len_ptr, !range !0
; CHECK-NEXT: [[COND2:%[^ ]+]] = icmp ult i32 0, %exit.mainloop.at
; CHECK-NEXT: br i1 [[COND2]], label %loop.preheader, label %main.pseudo.exit
; CHECK: loop:
; CHECK-NEXT: %idx = phi i32 [ %idx.next, %in.bounds ], [ 0, %loop.preheader ]
; CHECK-NEXT: %idx.next = add nuw nsw i32 %idx, 1
; CHECK-NEXT: %abc = icmp ult i32 %idx, %exit.mainloop.at
; CHECK-NEXT: br i1 true, label %in.bounds, label %out.of.bounds.loopexit1
; CHECK: in.bounds:
; CHECK-NEXT: %addr = getelementptr i32, i32* %arr, i32 %idx
; CHECK-NEXT: store i32 0, i32* %addr
; CHECK-NEXT: %next = icmp ult i32 %idx, 100
; CHECK-NEXT: [[COND3:%[^ ]+]] = icmp ult i32 %idx, %exit.mainloop.at
; CHECK-NEXT: br i1 [[COND3]], label %loop, label %main.exit.selector
; CHECK: main.exit.selector:
; CHECK-NEXT: %idx.lcssa = phi i32 [ %idx, %in.bounds ]
; CHECK-NEXT: [[COND4:%[^ ]+]] = icmp ult i32 %idx.lcssa, 100
; CHECK-NEXT: br i1 [[COND4]], label %main.pseudo.exit, label %exit
; CHECK-NOT: loop.preloop:
; CHECK: loop.postloop:
; CHECK-NEXT: %idx.postloop = phi i32 [ %idx.copy, %postloop ], [ %idx.next.postloop, %in.bounds.postloop ]
; CHECK-NEXT: %idx.next.postloop = add nuw nsw i32 %idx.postloop, 1
; CHECK-NEXT: %abc.postloop = icmp ult i32 %idx.postloop, %exit.mainloop.at
; CHECK-NEXT: br i1 %abc.postloop, label %in.bounds.postloop, label %out.of.bounds.loopexit
entry:
%len = load i32, i32* %a_len_ptr, !range !0
br label %loop
loop:
%idx = phi i32 [ 0, %entry ], [ %idx.next, %in.bounds ]
%idx.next = add nsw nuw i32 %idx, 1
%abc = icmp ult i32 %idx, %len
br i1 %abc, label %in.bounds, label %out.of.bounds
in.bounds:
%addr = getelementptr i32, i32* %arr, i32 %idx
store i32 0, i32* %addr
%next = icmp ult i32 %idx, 100
br i1 %next, label %loop, label %exit
out.of.bounds:
ret void
exit:
ret void
}
; Same as test_01, but comparison happens against IV extended to a wider type.
; This test ensures that IRCE rejects it and does not falsely assume that it was
; a comparison against iv.next.
; TODO: We can actually extend the recognition to cover this case.
define void @test_03(i32* %arr, i64* %a_len_ptr) #0 {
; CHECK: test_03
entry:
%len = load i64, i64* %a_len_ptr, !range !1
br label %loop
loop:
%idx = phi i32 [ 0, %entry ], [ %idx.next, %in.bounds ]
%idx.next = add nsw nuw i32 %idx, 1
%idx.ext = sext i32 %idx to i64
%abc = icmp slt i64 %idx.ext, %len
br i1 %abc, label %in.bounds, label %out.of.bounds
in.bounds:
%addr = getelementptr i32, i32* %arr, i32 %idx
store i32 0, i32* %addr
%next = icmp slt i32 %idx, 100
br i1 %next, label %loop, label %exit
out.of.bounds:
ret void
exit:
ret void
}
; Same as test_02, but comparison happens against IV extended to a wider type.
; This test ensures that IRCE rejects it and does not falsely assume that it was
; a comparison against iv.next.
; TODO: We can actually extend the recognition to cover this case.
define void @test_04(i32* %arr, i64* %a_len_ptr) #0 {
; CHECK: test_04
entry:
%len = load i64, i64* %a_len_ptr, !range !1
br label %loop
loop:
%idx = phi i32 [ 0, %entry ], [ %idx.next, %in.bounds ]
%idx.next = add nsw nuw i32 %idx, 1
%idx.ext = sext i32 %idx to i64
%abc = icmp ult i64 %idx.ext, %len
br i1 %abc, label %in.bounds, label %out.of.bounds
in.bounds:
%addr = getelementptr i32, i32* %arr, i32 %idx
store i32 0, i32* %addr
%next = icmp ult i32 %idx, 100
br i1 %next, label %loop, label %exit
out.of.bounds:
ret void
exit:
ret void
}
!0 = !{i32 0, i32 50}
!1 = !{i64 0, i64 50}