mirror of
https://github.com/RPCS3/llvm.git
synced 2025-01-10 06:03:52 +00:00
[LoopPeeling] Get rid of Phis that become invariant after N steps
This patch is a generalization of the improvement introduced in rL296898. Previously, we were able to peel one iteration of a loop to get rid of a Phi that becomes an invariant on the 2nd iteration. In more general case, if a Phi becomes invariant after N iterations, we can peel N times and turn it into invariant. In order to do this, we for every Phi in loop's header we define the Invariant Depth value which is calculated as follows: Given %x = phi <Inputs from above the loop>, ..., [%y, %back.edge]. If %y is a loop invariant, then Depth(%x) = 1. If %y is a Phi from the loop header, Depth(%x) = Depth(%y) + 1. Otherwise, Depth(%x) is infinite. Notice that if we peel a loop, all Phis with Depth = 1 become invariants, and all other Phis with finite depth decrease the depth by 1. Thus, peeling N first iterations allows us to turn all Phis with Depth <= N into invariants. Reviewers: reames, apilipenko, mkuper, skatkov, anna, sanjoy Reviewed By: sanjoy Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D31613 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@300446 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
38eae0ca5e
commit
63c0d1d702
@ -46,6 +46,11 @@ static cl::opt<unsigned> UnrollForcePeelCount(
|
||||
"unroll-force-peel-count", cl::init(0), cl::Hidden,
|
||||
cl::desc("Force a peel count regardless of profiling information."));
|
||||
|
||||
// Designates that a Phi is estimated to become invariant after an "infinite"
|
||||
// number of loop iterations (i.e. only may become an invariant if the loop is
|
||||
// fully unrolled).
|
||||
static const unsigned InfiniteIterationsToInvariance = UINT_MAX;
|
||||
|
||||
// Check whether we are capable of peeling this loop.
|
||||
static bool canPeel(Loop *L) {
|
||||
// Make sure the loop is in simplified form
|
||||
@ -66,10 +71,62 @@ static bool canPeel(Loop *L) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// This function calculates the number of iterations after which the given Phi
|
||||
// becomes an invariant. The pre-calculated values are memorized in the map. The
|
||||
// function (shortcut is I) is calculated according to the following definition:
|
||||
// Given %x = phi <Inputs from above the loop>, ..., [%y, %back.edge].
|
||||
// If %y is a loop invariant, then I(%x) = 1.
|
||||
// If %y is a Phi from the loop header, I(%x) = I(%y) + 1.
|
||||
// Otherwise, I(%x) is infinite.
|
||||
// TODO: Actually if %y is an expression that depends only on Phi %z and some
|
||||
// loop invariants, we can estimate I(%x) = I(%z) + 1. The example
|
||||
// looks like:
|
||||
// %x = phi(0, %a), <-- becomes invariant starting from 3rd iteration.
|
||||
// %y = phi(0, 5),
|
||||
// %a = %y + 1.
|
||||
static unsigned calculateIterationsToInvariance(
|
||||
PHINode *Phi, Loop *L, BasicBlock *BackEdge,
|
||||
SmallDenseMap<PHINode *, unsigned> &IterationsToInvariance) {
|
||||
assert(Phi->getParent() == L->getHeader() &&
|
||||
"Non-loop Phi should not be checked for turning into invariant.");
|
||||
assert(BackEdge == L->getLoopLatch() && "Wrong latch?");
|
||||
// If we already know the answer, take it from the map.
|
||||
auto I = IterationsToInvariance.find(Phi);
|
||||
if (I != IterationsToInvariance.end())
|
||||
return I->second;
|
||||
|
||||
// Otherwise we need to analyze the input from the back edge.
|
||||
Value *Input = Phi->getIncomingValueForBlock(BackEdge);
|
||||
// Place infinity to map to avoid infinite recursion for cycled Phis. Such
|
||||
// cycles can never stop on an invariant.
|
||||
IterationsToInvariance[Phi] = InfiniteIterationsToInvariance;
|
||||
unsigned ToInvariance = InfiniteIterationsToInvariance;
|
||||
|
||||
if (L->isLoopInvariant(Input))
|
||||
ToInvariance = 1u;
|
||||
else if (PHINode *IncPhi = dyn_cast<PHINode>(Input)) {
|
||||
// Only consider Phis in header block.
|
||||
if (IncPhi->getParent() != L->getHeader())
|
||||
return InfiniteIterationsToInvariance;
|
||||
// If the input becomes an invariant after X iterations, then our Phi
|
||||
// becomes an invariant after X + 1 iterations.
|
||||
unsigned InputToInvariance = calculateIterationsToInvariance(
|
||||
IncPhi, L, BackEdge, IterationsToInvariance);
|
||||
if (InputToInvariance != InfiniteIterationsToInvariance)
|
||||
ToInvariance = InputToInvariance + 1u;
|
||||
}
|
||||
|
||||
// If we found that this Phi lies in an invariant chain, update the map.
|
||||
if (ToInvariance != InfiniteIterationsToInvariance)
|
||||
IterationsToInvariance[Phi] = ToInvariance;
|
||||
return ToInvariance;
|
||||
}
|
||||
|
||||
// Return the number of iterations we want to peel off.
|
||||
void llvm::computePeelCount(Loop *L, unsigned LoopSize,
|
||||
TargetTransformInfo::UnrollingPreferences &UP,
|
||||
unsigned &TripCount) {
|
||||
assert(LoopSize > 0 && "Zero loop size is not allowed!");
|
||||
UP.PeelCount = 0;
|
||||
if (!canPeel(L))
|
||||
return;
|
||||
@ -78,31 +135,37 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize,
|
||||
if (!L->empty())
|
||||
return;
|
||||
|
||||
// Try to find a Phi node that has the same loop invariant as an input from
|
||||
// its only back edge. If there is such Phi, peeling 1 iteration from the
|
||||
// loop is profitable, because starting from 2nd iteration we will have an
|
||||
// invariant instead of this Phi.
|
||||
// Here we try to get rid of Phis which become invariants after 1, 2, ..., N
|
||||
// iterations of the loop. For this we compute the number for iterations after
|
||||
// which every Phi is guaranteed to become an invariant, and try to peel the
|
||||
// maximum number of iterations among these values, thus turning all those
|
||||
// Phis into invariants.
|
||||
// First, check that we can peel at least one iteration.
|
||||
if (2 * LoopSize <= UP.Threshold && UnrollPeelMaxCount > 0) {
|
||||
// Store the pre-calculated values here.
|
||||
SmallDenseMap<PHINode *, unsigned> IterationsToInvariance;
|
||||
// Now go through all Phis to calculate their the number of iterations they
|
||||
// need to become invariants.
|
||||
unsigned DesiredPeelCount = 0;
|
||||
BasicBlock *BackEdge = L->getLoopLatch();
|
||||
assert(BackEdge && "Loop is not in simplified form?");
|
||||
BasicBlock *Header = L->getHeader();
|
||||
// Iterate over Phis to find one with invariant input on back edge.
|
||||
bool FoundCandidate = false;
|
||||
PHINode *Phi;
|
||||
for (auto BI = Header->begin(); isa<PHINode>(&*BI); ++BI) {
|
||||
Phi = cast<PHINode>(&*BI);
|
||||
Value *Input = Phi->getIncomingValueForBlock(BackEdge);
|
||||
if (L->isLoopInvariant(Input)) {
|
||||
FoundCandidate = true;
|
||||
break;
|
||||
}
|
||||
for (auto BI = L->getHeader()->begin(); isa<PHINode>(&*BI); ++BI) {
|
||||
PHINode *Phi = cast<PHINode>(&*BI);
|
||||
unsigned ToInvariance = calculateIterationsToInvariance(
|
||||
Phi, L, BackEdge, IterationsToInvariance);
|
||||
if (ToInvariance != InfiniteIterationsToInvariance)
|
||||
DesiredPeelCount = std::max(DesiredPeelCount, ToInvariance);
|
||||
}
|
||||
if (FoundCandidate) {
|
||||
DEBUG(dbgs() << "Peel one iteration to get rid of " << *Phi
|
||||
<< " because starting from 2nd iteration it is always"
|
||||
<< " an invariant\n");
|
||||
UP.PeelCount = 1;
|
||||
if (DesiredPeelCount > 0) {
|
||||
// Pay respect to limitations implied by loop size and the max peel count.
|
||||
unsigned MaxPeelCount = UnrollPeelMaxCount;
|
||||
MaxPeelCount = std::min(MaxPeelCount, UP.Threshold / LoopSize - 1);
|
||||
DesiredPeelCount = std::min(DesiredPeelCount, MaxPeelCount);
|
||||
// Consider max peel count limitation.
|
||||
assert(DesiredPeelCount > 0 && "Wrong loop size estimation?");
|
||||
DEBUG(dbgs() << "Peel " << DesiredPeelCount << " iteration(s) to turn"
|
||||
<< " some Phis into invariants.\n");
|
||||
UP.PeelCount = DesiredPeelCount;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
@ -1,4 +1,4 @@
|
||||
; RUN: opt < %s -S -loop-unroll -unroll-threshold=8 | FileCheck %s
|
||||
; RUN: opt < %s -S -loop-unroll -unroll-threshold=30 | FileCheck %s
|
||||
|
||||
define i32 @invariant_backedge_1(i32 %a, i32 %b) {
|
||||
; CHECK-LABEL: @invariant_backedge_1
|
||||
@ -25,10 +25,112 @@ exit:
|
||||
ret i32 %sum
|
||||
}
|
||||
|
||||
; Peeling should fail due to method size.
|
||||
define i32 @invariant_backedge_2(i32 %a, i32 %b) {
|
||||
; This loop should be peeled twice because it has a Phi which becomes invariant
|
||||
; starting from 3rd iteration.
|
||||
; CHECK-LABEL: @invariant_backedge_2
|
||||
; CHECK-NOT: loop.peel:
|
||||
; CHECK: loop.peel{{.*}}:
|
||||
; CHECK: loop.peel{{.*}}:
|
||||
; CHECK: %i = phi
|
||||
; CHECK: %sum = phi
|
||||
; CHECK-NOT: %half.inv = phi
|
||||
; CHECK-NOT: %plus = phi
|
||||
entry:
|
||||
br label %loop
|
||||
|
||||
loop:
|
||||
%i = phi i32 [ 0, %entry ], [ %inc, %loop ]
|
||||
%sum = phi i32 [ 0, %entry ], [ %incsum, %loop ]
|
||||
%half.inv = phi i32 [ %a, %entry ], [ %b, %loop ]
|
||||
%plus = phi i32 [ %a, %entry ], [ %half.inv, %loop ]
|
||||
|
||||
%incsum = add i32 %sum, %plus
|
||||
%inc = add i32 %i, 1
|
||||
%cmp = icmp slt i32 %i, 1000
|
||||
|
||||
br i1 %cmp, label %loop, label %exit
|
||||
|
||||
exit:
|
||||
ret i32 %sum
|
||||
}
|
||||
|
||||
define i32 @invariant_backedge_3(i32 %a, i32 %b) {
|
||||
; This loop should be peeled thrice because it has a Phi which becomes invariant
|
||||
; starting from 4th iteration.
|
||||
; CHECK-LABEL: @invariant_backedge_3
|
||||
; CHECK: loop.peel{{.*}}:
|
||||
; CHECK: loop.peel{{.*}}:
|
||||
; CHECK: loop.peel{{.*}}:
|
||||
; CHECK: %i = phi
|
||||
; CHECK: %sum = phi
|
||||
; CHECK-NOT: %half.inv = phi
|
||||
; CHECK-NOT: %half.inv.2 = phi
|
||||
; CHECK-NOT: %plus = phi
|
||||
entry:
|
||||
br label %loop
|
||||
|
||||
loop:
|
||||
%i = phi i32 [ 0, %entry ], [ %inc, %loop ]
|
||||
%sum = phi i32 [ 0, %entry ], [ %incsum, %loop ]
|
||||
%half.inv = phi i32 [ %a, %entry ], [ %b, %loop ]
|
||||
%half.inv.2 = phi i32 [ %a, %entry ], [ %half.inv, %loop ]
|
||||
%plus = phi i32 [ %a, %entry ], [ %half.inv.2, %loop ]
|
||||
|
||||
%incsum = add i32 %sum, %plus
|
||||
%inc = add i32 %i, 1
|
||||
%cmp = icmp slt i32 %i, 1000
|
||||
|
||||
br i1 %cmp, label %loop, label %exit
|
||||
|
||||
exit:
|
||||
ret i32 %sum
|
||||
}
|
||||
|
||||
define i32 @invariant_backedge_limited_by_size(i32 %a, i32 %b) {
|
||||
; This loop should normally be peeled thrice because it has a Phi which becomes
|
||||
; invariant starting from 4th iteration, but the size of the loop only allows
|
||||
; us to peel twice because we are restricted to 30 instructions in resulting
|
||||
; code. Thus, %plus Phi node should stay in loop even despite its backedge
|
||||
; input is an invariant.
|
||||
; CHECK-LABEL: @invariant_backedge_limited_by_size
|
||||
; CHECK: loop.peel{{.*}}:
|
||||
; CHECK: loop.peel{{.*}}:
|
||||
; CHECK: %i = phi
|
||||
; CHECK: %sum = phi
|
||||
; CHECK: %plus = phi i32 [ %a, {{.*}} ], [ %b, %loop ]
|
||||
; CHECK-NOT: %half.inv = phi
|
||||
; CHECK-NOT: %half.inv.2 = phi
|
||||
entry:
|
||||
br label %loop
|
||||
|
||||
loop:
|
||||
%i = phi i32 [ 0, %entry ], [ %inc, %loop ]
|
||||
%sum = phi i32 [ 0, %entry ], [ %incsum, %loop ]
|
||||
%half.inv = phi i32 [ %a, %entry ], [ %b, %loop ]
|
||||
%half.inv.2 = phi i32 [ %a, %entry ], [ %half.inv, %loop ]
|
||||
%plus = phi i32 [ %a, %entry ], [ %half.inv.2, %loop ]
|
||||
|
||||
%incsum = add i32 %sum, %plus
|
||||
%inc = add i32 %i, 1
|
||||
%cmp = icmp slt i32 %i, 1000
|
||||
|
||||
%incsum2 = add i32 %incsum, %plus
|
||||
%incsum3 = add i32 %incsum, %plus
|
||||
%incsum4 = add i32 %incsum, %plus
|
||||
%incsum5 = add i32 %incsum, %plus
|
||||
%incsum6 = add i32 %incsum, %plus
|
||||
%incsum7 = add i32 %incsum, %plus
|
||||
|
||||
br i1 %cmp, label %loop, label %exit
|
||||
|
||||
exit:
|
||||
ret i32 %sum
|
||||
}
|
||||
|
||||
; Peeling should fail due to method size.
|
||||
define i32 @invariant_backedge_negative(i32 %a, i32 %b) {
|
||||
; CHECK-LABEL: @invariant_backedge_negative
|
||||
; CHECK-NOT: loop.peel{{.*}}:
|
||||
; CHECK: loop:
|
||||
; CHECK: %i = phi
|
||||
; CHECK: %sum = phi
|
||||
@ -43,6 +145,47 @@ loop:
|
||||
|
||||
%incsum = add i32 %sum, %plus
|
||||
%incsum2 = add i32 %incsum, %plus
|
||||
%incsum3 = add i32 %incsum, %plus
|
||||
%incsum4 = add i32 %incsum, %plus
|
||||
%incsum5 = add i32 %incsum, %plus
|
||||
%incsum6 = add i32 %incsum, %plus
|
||||
%incsum7 = add i32 %incsum, %plus
|
||||
%incsum8 = add i32 %incsum, %plus
|
||||
%incsum9 = add i32 %incsum, %plus
|
||||
%incsum10 = add i32 %incsum, %plus
|
||||
%incsum11 = add i32 %incsum, %plus
|
||||
%incsum12 = add i32 %incsum, %plus
|
||||
%incsum13 = add i32 %incsum, %plus
|
||||
%incsum14 = add i32 %incsum, %plus
|
||||
%incsum15 = add i32 %incsum, %plus
|
||||
%inc = add i32 %i, 1
|
||||
%cmp = icmp slt i32 %i, 1000
|
||||
|
||||
br i1 %cmp, label %loop, label %exit
|
||||
|
||||
exit:
|
||||
ret i32 %sum
|
||||
}
|
||||
|
||||
define i32 @cycled_phis(i32 %a, i32 %b) {
|
||||
; Make sure that we do not crash working with cycled Phis and don't peel it.
|
||||
; TODO: Actually this loop should be partially unrolled with factor 2.
|
||||
; CHECK-LABEL: @cycled_phis
|
||||
; CHECK-NOT: loop.peel{{.*}}:
|
||||
; CHECK: loop:
|
||||
; CHECK: %i = phi
|
||||
; CHECK: %phi.a = phi
|
||||
; CHECK: %phi.b = phi
|
||||
; CHECK: %sum = phi
|
||||
entry:
|
||||
br label %loop
|
||||
|
||||
loop:
|
||||
%i = phi i32 [ 0, %entry ], [ %inc, %loop ]
|
||||
%phi.a = phi i32 [ %a, %entry ], [ %phi.b, %loop ]
|
||||
%phi.b = phi i32 [ %b, %entry ], [ %phi.a, %loop ]
|
||||
%sum = phi i32 [ 0, %entry], [ %incsum, %loop ]
|
||||
%incsum = add i32 %sum, %phi.a
|
||||
%inc = add i32 %i, 1
|
||||
%cmp = icmp slt i32 %i, 1000
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user