mirror of
https://github.com/RPCS3/llvm.git
synced 2025-01-19 08:24:12 +00:00
[SCEV] Teach SCEV to find maxBECount when loop endbound is variant
Summary: This patch teaches SCEV to calculate the maxBECount when the end bound of the loop can vary. Note that we cannot calculate the exactBECount. This will only be done when both conditions are satisfied: 1. the loop termination condition is strictly LT. 2. the IV is proven to not overflow. This provides more information to users of SCEV and can be used to improve identification of finite loops. Reviewers: sanjoy, mkazantsev, silviu.baranga, atrick Reviewed by: mkazantsev Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D38825 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@315683 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
138989f016
commit
7f1eb15289
@ -1741,6 +1741,12 @@ private:
|
||||
const SCEV *computeBECount(const SCEV *Delta, const SCEV *Stride,
|
||||
bool Equality);
|
||||
|
||||
// Compute the maximum backedge count based on the range of values
|
||||
// permitted by Start, End, and Stride.
|
||||
const SCEV *computeMaxBECount(const SCEV *Start, const SCEV *Stride,
|
||||
const SCEV *End, unsigned BitWidth,
|
||||
bool IsSigned);
|
||||
|
||||
/// Verify if an linear IV with positive stride can overflow when in a
|
||||
/// less-than comparison, knowing the invariant term of the comparison,
|
||||
/// the stride and the knowledge of NSW/NUW flags on the recurrence.
|
||||
|
@ -9689,14 +9689,54 @@ const SCEV *ScalarEvolution::computeBECount(const SCEV *Delta, const SCEV *Step,
|
||||
return getUDivExpr(Delta, Step);
|
||||
}
|
||||
|
||||
const SCEV *ScalarEvolution::computeMaxBECount(const SCEV *Start,
|
||||
const SCEV *Stride,
|
||||
const SCEV *End,
|
||||
unsigned BitWidth,
|
||||
bool IsSigned) {
|
||||
|
||||
assert(!isKnownNonPositive(Stride) &&
|
||||
"Stride is expected strictly positive!");
|
||||
// Calculate the maximum backedge count based on the range of values
|
||||
// permitted by Start, End, and Stride.
|
||||
const SCEV *MaxBECount;
|
||||
APInt MinStart =
|
||||
IsSigned ? getSignedRangeMin(Start) : getUnsignedRangeMin(Start);
|
||||
|
||||
APInt StrideForMaxBECount;
|
||||
|
||||
bool PositiveStride = isKnownPositive(Stride);
|
||||
if (PositiveStride)
|
||||
StrideForMaxBECount =
|
||||
IsSigned ? getSignedRangeMin(Stride) : getUnsignedRangeMin(Stride);
|
||||
else
|
||||
// Using a stride of 1 is safe when computing max backedge taken count for
|
||||
// a loop with unknown stride.
|
||||
StrideForMaxBECount = APInt(BitWidth, 1, IsSigned);
|
||||
|
||||
APInt MaxValue = IsSigned ? APInt::getSignedMaxValue(BitWidth)
|
||||
: APInt::getMaxValue(BitWidth);
|
||||
APInt Limit = MaxValue - (StrideForMaxBECount - 1);
|
||||
|
||||
// Although End can be a MAX expression we estimate MaxEnd considering only
|
||||
// the case End = RHS of the loop termination condition. This is safe because
|
||||
// in the other case (End - Start) is zero, leading to a zero maximum backedge
|
||||
// taken count.
|
||||
APInt MaxEnd = IsSigned ? APIntOps::smin(getSignedRangeMax(End), Limit)
|
||||
: APIntOps::umin(getUnsignedRangeMax(End), Limit);
|
||||
|
||||
MaxBECount = computeBECount(getConstant(MaxEnd - MinStart) /* Delta */,
|
||||
getConstant(StrideForMaxBECount) /* Step */,
|
||||
false /* Equality */);
|
||||
|
||||
return MaxBECount;
|
||||
}
|
||||
|
||||
ScalarEvolution::ExitLimit
|
||||
ScalarEvolution::howManyLessThans(const SCEV *LHS, const SCEV *RHS,
|
||||
const Loop *L, bool IsSigned,
|
||||
bool ControlsExit, bool AllowPredicates) {
|
||||
SmallPtrSet<const SCEVPredicate *, 4> Predicates;
|
||||
// We handle only IV < Invariant
|
||||
if (!isLoopInvariant(RHS, L))
|
||||
return getCouldNotCompute();
|
||||
|
||||
const SCEVAddRecExpr *IV = dyn_cast<SCEVAddRecExpr>(LHS);
|
||||
bool PredicatedIV = false;
|
||||
@ -9779,6 +9819,17 @@ ScalarEvolution::howManyLessThans(const SCEV *LHS, const SCEV *RHS,
|
||||
: ICmpInst::ICMP_ULT;
|
||||
const SCEV *Start = IV->getStart();
|
||||
const SCEV *End = RHS;
|
||||
// When the RHS is not invariant, we do not know the end bound of the loop and
|
||||
// cannot calculate the ExactBECount needed by ExitLimit. However, we can
|
||||
// calculate the MaxBECount, given the start, stride and max value for the end
|
||||
// bound of the loop (RHS), and the fact that IV does not overflow (which is
|
||||
// checked above).
|
||||
if (!isLoopInvariant(RHS, L)) {
|
||||
const SCEV *MaxBECount = computeMaxBECount(
|
||||
Start, Stride, RHS, getTypeSizeInBits(LHS->getType()), IsSigned);
|
||||
return ExitLimit(getCouldNotCompute() /* ExactNotTaken */, MaxBECount,
|
||||
false /*MaxOrZero*/, Predicates);
|
||||
}
|
||||
// If the backedge is taken at least once, then it will be taken
|
||||
// (End-Start)/Stride times (rounded up to a multiple of Stride), where Start
|
||||
// is the LHS value of the less-than comparison the first time it is evaluated
|
||||
@ -9811,37 +9862,8 @@ ScalarEvolution::howManyLessThans(const SCEV *LHS, const SCEV *RHS,
|
||||
MaxBECount = BECountIfBackedgeTaken;
|
||||
MaxOrZero = true;
|
||||
} else {
|
||||
// Calculate the maximum backedge count based on the range of values
|
||||
// permitted by Start, End, and Stride.
|
||||
APInt MinStart = IsSigned ? getSignedRangeMin(Start)
|
||||
: getUnsignedRangeMin(Start);
|
||||
|
||||
unsigned BitWidth = getTypeSizeInBits(LHS->getType());
|
||||
|
||||
APInt StrideForMaxBECount;
|
||||
|
||||
if (PositiveStride)
|
||||
StrideForMaxBECount =
|
||||
IsSigned ? getSignedRangeMin(Stride)
|
||||
: getUnsignedRangeMin(Stride);
|
||||
else
|
||||
// Using a stride of 1 is safe when computing max backedge taken count for
|
||||
// a loop with unknown stride.
|
||||
StrideForMaxBECount = APInt(BitWidth, 1, IsSigned);
|
||||
|
||||
APInt Limit =
|
||||
IsSigned ? APInt::getSignedMaxValue(BitWidth) - (StrideForMaxBECount - 1)
|
||||
: APInt::getMaxValue(BitWidth) - (StrideForMaxBECount - 1);
|
||||
|
||||
// Although End can be a MAX expression we estimate MaxEnd considering only
|
||||
// the case End = RHS. This is safe because in the other case (End - Start)
|
||||
// is zero, leading to a zero maximum backedge taken count.
|
||||
APInt MaxEnd =
|
||||
IsSigned ? APIntOps::smin(getSignedRangeMax(RHS), Limit)
|
||||
: APIntOps::umin(getUnsignedRangeMax(RHS), Limit);
|
||||
|
||||
MaxBECount = computeBECount(getConstant(MaxEnd - MinStart),
|
||||
getConstant(StrideForMaxBECount), false);
|
||||
MaxBECount = computeMaxBECount(Start, Stride, RHS,
|
||||
getTypeSizeInBits(LHS->getType()), IsSigned);
|
||||
}
|
||||
|
||||
if (isa<SCEVCouldNotCompute>(MaxBECount) &&
|
||||
|
@ -288,3 +288,146 @@ loop.exit:
|
||||
exit:
|
||||
ret i32 0
|
||||
}
|
||||
|
||||
; The end bound of the loop can change between iterations, so the exact trip
|
||||
; count is unknown, but SCEV can calculate the max trip count.
|
||||
define void @changing_end_bound(i32* %n_addr, i32* %addr) {
|
||||
; CHECK-LABEL: Determining loop execution counts for: @changing_end_bound
|
||||
; CHECK: Loop %loop: Unpredictable backedge-taken count.
|
||||
; CHECK: Loop %loop: max backedge-taken count is 2147483646
|
||||
entry:
|
||||
br label %loop
|
||||
|
||||
loop:
|
||||
%iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
|
||||
%acc = phi i32 [ 0, %entry ], [ %acc.next, %loop ]
|
||||
%val = load atomic i32, i32* %addr unordered, align 4
|
||||
fence acquire
|
||||
%acc.next = add i32 %acc, %val
|
||||
%iv.next = add nsw i32 %iv, 1
|
||||
%n = load atomic i32, i32* %n_addr unordered, align 4
|
||||
%cmp = icmp slt i32 %iv.next, %n
|
||||
br i1 %cmp, label %loop, label %loop.exit
|
||||
|
||||
loop.exit:
|
||||
ret void
|
||||
}
|
||||
|
||||
; Similar test as above, but unknown start value.
|
||||
; Also, there's no nsw on the iv.next, but SCEV knows
|
||||
; the termination condition is LT, so the IV cannot wrap.
|
||||
define void @changing_end_bound2(i32 %start, i32* %n_addr, i32* %addr) {
|
||||
; CHECK-LABEL: Determining loop execution counts for: @changing_end_bound2
|
||||
; CHECK: Loop %loop: Unpredictable backedge-taken count.
|
||||
; CHECK: Loop %loop: max backedge-taken count is -1
|
||||
entry:
|
||||
br label %loop
|
||||
|
||||
loop:
|
||||
%iv = phi i32 [ %start, %entry ], [ %iv.next, %loop ]
|
||||
%acc = phi i32 [ 0, %entry ], [ %acc.next, %loop ]
|
||||
%val = load atomic i32, i32* %addr unordered, align 4
|
||||
fence acquire
|
||||
%acc.next = add i32 %acc, %val
|
||||
%iv.next = add i32 %iv, 1
|
||||
%n = load atomic i32, i32* %n_addr unordered, align 4
|
||||
%cmp = icmp slt i32 %iv.next, %n
|
||||
br i1 %cmp, label %loop, label %loop.exit
|
||||
|
||||
loop.exit:
|
||||
ret void
|
||||
}
|
||||
|
||||
; changing end bound and greater than one stride
|
||||
define void @changing_end_bound3(i32 %start, i32* %n_addr, i32* %addr) {
|
||||
; CHECK-LABEL: Determining loop execution counts for: @changing_end_bound3
|
||||
; CHECK: Loop %loop: Unpredictable backedge-taken count.
|
||||
; CHECK: Loop %loop: max backedge-taken count is 1073741823
|
||||
entry:
|
||||
br label %loop
|
||||
|
||||
loop:
|
||||
%iv = phi i32 [ %start, %entry ], [ %iv.next, %loop ]
|
||||
%acc = phi i32 [ 0, %entry ], [ %acc.next, %loop ]
|
||||
%val = load atomic i32, i32* %addr unordered, align 4
|
||||
fence acquire
|
||||
%acc.next = add i32 %acc, %val
|
||||
%iv.next = add nsw i32 %iv, 4
|
||||
%n = load atomic i32, i32* %n_addr unordered, align 4
|
||||
%cmp = icmp slt i32 %iv.next, %n
|
||||
br i1 %cmp, label %loop, label %loop.exit
|
||||
|
||||
loop.exit:
|
||||
ret void
|
||||
}
|
||||
|
||||
; same as above test, but the IV can wrap around.
|
||||
; so the max backedge taken count is unpredictable.
|
||||
define void @changing_end_bound4(i32 %start, i32* %n_addr, i32* %addr) {
|
||||
; CHECK-LABEL: Determining loop execution counts for: @changing_end_bound4
|
||||
; CHECK: Loop %loop: Unpredictable backedge-taken count.
|
||||
; CHECK: Loop %loop: Unpredictable max backedge-taken count.
|
||||
entry:
|
||||
br label %loop
|
||||
|
||||
loop:
|
||||
%iv = phi i32 [ %start, %entry ], [ %iv.next, %loop ]
|
||||
%acc = phi i32 [ 0, %entry ], [ %acc.next, %loop ]
|
||||
%val = load atomic i32, i32* %addr unordered, align 4
|
||||
fence acquire
|
||||
%acc.next = add i32 %acc, %val
|
||||
%iv.next = add i32 %iv, 4
|
||||
%n = load atomic i32, i32* %n_addr unordered, align 4
|
||||
%cmp = icmp slt i32 %iv.next, %n
|
||||
br i1 %cmp, label %loop, label %loop.exit
|
||||
|
||||
loop.exit:
|
||||
ret void
|
||||
}
|
||||
|
||||
; unknown stride. Since it's not knownPositive, we do not estimate the max
|
||||
; backedge taken count.
|
||||
define void @changing_end_bound5(i32 %stride, i32 %start, i32* %n_addr, i32* %addr) {
|
||||
; CHECK-LABEL: Determining loop execution counts for: @changing_end_bound5
|
||||
; CHECK: Loop %loop: Unpredictable backedge-taken count.
|
||||
; CHECK: Loop %loop: Unpredictable max backedge-taken count.
|
||||
entry:
|
||||
br label %loop
|
||||
|
||||
loop:
|
||||
%iv = phi i32 [ %start, %entry ], [ %iv.next, %loop ]
|
||||
%acc = phi i32 [ 0, %entry ], [ %acc.next, %loop ]
|
||||
%val = load atomic i32, i32* %addr unordered, align 4
|
||||
fence acquire
|
||||
%acc.next = add i32 %acc, %val
|
||||
%iv.next = add nsw i32 %iv, %stride
|
||||
%n = load atomic i32, i32* %n_addr unordered, align 4
|
||||
%cmp = icmp slt i32 %iv.next, %n
|
||||
br i1 %cmp, label %loop, label %loop.exit
|
||||
|
||||
loop.exit:
|
||||
ret void
|
||||
}
|
||||
|
||||
; negative stride value
|
||||
define void @changing_end_bound6(i32 %start, i32* %n_addr, i32* %addr) {
|
||||
; CHECK-LABEL: Determining loop execution counts for: @changing_end_bound6
|
||||
; CHECK: Loop %loop: Unpredictable backedge-taken count.
|
||||
; CHECK: Loop %loop: Unpredictable max backedge-taken count.
|
||||
entry:
|
||||
br label %loop
|
||||
|
||||
loop:
|
||||
%iv = phi i32 [ %start, %entry ], [ %iv.next, %loop ]
|
||||
%acc = phi i32 [ 0, %entry ], [ %acc.next, %loop ]
|
||||
%val = load atomic i32, i32* %addr unordered, align 4
|
||||
fence acquire
|
||||
%acc.next = add i32 %acc, %val
|
||||
%iv.next = add nsw i32 %iv, -1
|
||||
%n = load atomic i32, i32* %n_addr unordered, align 4
|
||||
%cmp = icmp slt i32 %iv.next, %n
|
||||
br i1 %cmp, label %loop, label %loop.exit
|
||||
|
||||
loop.exit:
|
||||
ret void
|
||||
}
|
||||
|
@ -13,7 +13,7 @@ target datalayout = "n8:16:32:64"
|
||||
; CHECK: %[[PHI:.*]] = phi i32 [ 0, %entry ], [ %{{.*}}, %if.then5 ], [ %[[PHI]], %if.end ]
|
||||
; CHECK-LABEL: Determining loop execution counts for: @test
|
||||
; CHECK: Loop %for.body18: Unpredictable backedge-taken count.
|
||||
; CHECK: Loop %for.body18: Unpredictable max backedge-taken count.
|
||||
; CHECK: Loop %for.body18: max backedge-taken count is 2147483646
|
||||
; CHECK: Loop %for.body18: Unpredictable predicated backedge-taken count.
|
||||
; CHECK: Loop %for.cond: <multiple exits> Unpredictable backedge-taken count.
|
||||
; CHECK: Loop %for.cond: Unpredictable max backedge-taken count.
|
||||
@ -25,7 +25,7 @@ target datalayout = "n8:16:32:64"
|
||||
; CHECK: phi i32 [ %{{.*}}, %if.then5 ], [ 0, %entry ]
|
||||
; CHECK-LABEL: Determining loop execution counts for: @test
|
||||
; CHECK: Loop %for.body18: Unpredictable backedge-taken count.
|
||||
; CHECK: Loop %for.body18: Unpredictable max backedge-taken count.
|
||||
; CHECK: Loop %for.body18: max backedge-taken count is 2147483646
|
||||
; CHECK: Loop %for.body18: Unpredictable predicated backedge-taken count.
|
||||
; CHECK: Loop %for.cond: <multiple exits> Unpredictable backedge-taken count.
|
||||
; CHECK: Loop %for.cond: max backedge-taken count is -2147483647
|
||||
|
Loading…
x
Reference in New Issue
Block a user