diff --git a/lib/Transforms/Utils/LoopUnrollPeel.cpp b/lib/Transforms/Utils/LoopUnrollPeel.cpp index a5f2765e1a3..5c21490793e 100644 --- a/lib/Transforms/Utils/LoopUnrollPeel.cpp +++ b/lib/Transforms/Utils/LoopUnrollPeel.cpp @@ -46,6 +46,11 @@ static cl::opt UnrollForcePeelCount( "unroll-force-peel-count", cl::init(0), cl::Hidden, cl::desc("Force a peel count regardless of profiling information.")); +// Designates that a Phi is estimated to become invariant after an "infinite" +// number of loop iterations (i.e. only may become an invariant if the loop is +// fully unrolled). +static const unsigned InfiniteIterationsToInvariance = UINT_MAX; + // Check whether we are capable of peeling this loop. static bool canPeel(Loop *L) { // Make sure the loop is in simplified form @@ -66,10 +71,62 @@ static bool canPeel(Loop *L) { return true; } +// This function calculates the number of iterations after which the given Phi +// becomes an invariant. The pre-calculated values are memorized in the map. The +// function (shortcut is I) is calculated according to the following definition: +// Given %x = phi , ..., [%y, %back.edge]. +// If %y is a loop invariant, then I(%x) = 1. +// If %y is a Phi from the loop header, I(%x) = I(%y) + 1. +// Otherwise, I(%x) is infinite. +// TODO: Actually if %y is an expression that depends only on Phi %z and some +// loop invariants, we can estimate I(%x) = I(%z) + 1. The example +// looks like: +// %x = phi(0, %a), <-- becomes invariant starting from 3rd iteration. +// %y = phi(0, 5), +// %a = %y + 1. +static unsigned calculateIterationsToInvariance( + PHINode *Phi, Loop *L, BasicBlock *BackEdge, + SmallDenseMap &IterationsToInvariance) { + assert(Phi->getParent() == L->getHeader() && + "Non-loop Phi should not be checked for turning into invariant."); + assert(BackEdge == L->getLoopLatch() && "Wrong latch?"); + // If we already know the answer, take it from the map. + auto I = IterationsToInvariance.find(Phi); + if (I != IterationsToInvariance.end()) + return I->second; + + // Otherwise we need to analyze the input from the back edge. + Value *Input = Phi->getIncomingValueForBlock(BackEdge); + // Place infinity to map to avoid infinite recursion for cycled Phis. Such + // cycles can never stop on an invariant. + IterationsToInvariance[Phi] = InfiniteIterationsToInvariance; + unsigned ToInvariance = InfiniteIterationsToInvariance; + + if (L->isLoopInvariant(Input)) + ToInvariance = 1u; + else if (PHINode *IncPhi = dyn_cast(Input)) { + // Only consider Phis in header block. + if (IncPhi->getParent() != L->getHeader()) + return InfiniteIterationsToInvariance; + // If the input becomes an invariant after X iterations, then our Phi + // becomes an invariant after X + 1 iterations. + unsigned InputToInvariance = calculateIterationsToInvariance( + IncPhi, L, BackEdge, IterationsToInvariance); + if (InputToInvariance != InfiniteIterationsToInvariance) + ToInvariance = InputToInvariance + 1u; + } + + // If we found that this Phi lies in an invariant chain, update the map. + if (ToInvariance != InfiniteIterationsToInvariance) + IterationsToInvariance[Phi] = ToInvariance; + return ToInvariance; +} + // Return the number of iterations we want to peel off. void llvm::computePeelCount(Loop *L, unsigned LoopSize, TargetTransformInfo::UnrollingPreferences &UP, unsigned &TripCount) { + assert(LoopSize > 0 && "Zero loop size is not allowed!"); UP.PeelCount = 0; if (!canPeel(L)) return; @@ -78,31 +135,37 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize, if (!L->empty()) return; - // Try to find a Phi node that has the same loop invariant as an input from - // its only back edge. If there is such Phi, peeling 1 iteration from the - // loop is profitable, because starting from 2nd iteration we will have an - // invariant instead of this Phi. + // Here we try to get rid of Phis which become invariants after 1, 2, ..., N + // iterations of the loop. For this we compute the number for iterations after + // which every Phi is guaranteed to become an invariant, and try to peel the + // maximum number of iterations among these values, thus turning all those + // Phis into invariants. // First, check that we can peel at least one iteration. if (2 * LoopSize <= UP.Threshold && UnrollPeelMaxCount > 0) { + // Store the pre-calculated values here. + SmallDenseMap IterationsToInvariance; + // Now go through all Phis to calculate their the number of iterations they + // need to become invariants. + unsigned DesiredPeelCount = 0; BasicBlock *BackEdge = L->getLoopLatch(); assert(BackEdge && "Loop is not in simplified form?"); - BasicBlock *Header = L->getHeader(); - // Iterate over Phis to find one with invariant input on back edge. - bool FoundCandidate = false; - PHINode *Phi; - for (auto BI = Header->begin(); isa(&*BI); ++BI) { - Phi = cast(&*BI); - Value *Input = Phi->getIncomingValueForBlock(BackEdge); - if (L->isLoopInvariant(Input)) { - FoundCandidate = true; - break; - } + for (auto BI = L->getHeader()->begin(); isa(&*BI); ++BI) { + PHINode *Phi = cast(&*BI); + unsigned ToInvariance = calculateIterationsToInvariance( + Phi, L, BackEdge, IterationsToInvariance); + if (ToInvariance != InfiniteIterationsToInvariance) + DesiredPeelCount = std::max(DesiredPeelCount, ToInvariance); } - if (FoundCandidate) { - DEBUG(dbgs() << "Peel one iteration to get rid of " << *Phi - << " because starting from 2nd iteration it is always" - << " an invariant\n"); - UP.PeelCount = 1; + if (DesiredPeelCount > 0) { + // Pay respect to limitations implied by loop size and the max peel count. + unsigned MaxPeelCount = UnrollPeelMaxCount; + MaxPeelCount = std::min(MaxPeelCount, UP.Threshold / LoopSize - 1); + DesiredPeelCount = std::min(DesiredPeelCount, MaxPeelCount); + // Consider max peel count limitation. + assert(DesiredPeelCount > 0 && "Wrong loop size estimation?"); + DEBUG(dbgs() << "Peel " << DesiredPeelCount << " iteration(s) to turn" + << " some Phis into invariants.\n"); + UP.PeelCount = DesiredPeelCount; return; } } diff --git a/test/Transforms/LoopUnroll/peel-loop-not-forced.ll b/test/Transforms/LoopUnroll/peel-loop-not-forced.ll index c3cbbf1ca0c..8691481acc1 100644 --- a/test/Transforms/LoopUnroll/peel-loop-not-forced.ll +++ b/test/Transforms/LoopUnroll/peel-loop-not-forced.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -S -loop-unroll -unroll-threshold=8 | FileCheck %s +; RUN: opt < %s -S -loop-unroll -unroll-threshold=30 | FileCheck %s define i32 @invariant_backedge_1(i32 %a, i32 %b) { ; CHECK-LABEL: @invariant_backedge_1 @@ -25,10 +25,112 @@ exit: ret i32 %sum } -; Peeling should fail due to method size. define i32 @invariant_backedge_2(i32 %a, i32 %b) { +; This loop should be peeled twice because it has a Phi which becomes invariant +; starting from 3rd iteration. ; CHECK-LABEL: @invariant_backedge_2 -; CHECK-NOT: loop.peel: +; CHECK: loop.peel{{.*}}: +; CHECK: loop.peel{{.*}}: +; CHECK: %i = phi +; CHECK: %sum = phi +; CHECK-NOT: %half.inv = phi +; CHECK-NOT: %plus = phi +entry: + br label %loop + +loop: + %i = phi i32 [ 0, %entry ], [ %inc, %loop ] + %sum = phi i32 [ 0, %entry ], [ %incsum, %loop ] + %half.inv = phi i32 [ %a, %entry ], [ %b, %loop ] + %plus = phi i32 [ %a, %entry ], [ %half.inv, %loop ] + + %incsum = add i32 %sum, %plus + %inc = add i32 %i, 1 + %cmp = icmp slt i32 %i, 1000 + + br i1 %cmp, label %loop, label %exit + +exit: + ret i32 %sum +} + +define i32 @invariant_backedge_3(i32 %a, i32 %b) { +; This loop should be peeled thrice because it has a Phi which becomes invariant +; starting from 4th iteration. +; CHECK-LABEL: @invariant_backedge_3 +; CHECK: loop.peel{{.*}}: +; CHECK: loop.peel{{.*}}: +; CHECK: loop.peel{{.*}}: +; CHECK: %i = phi +; CHECK: %sum = phi +; CHECK-NOT: %half.inv = phi +; CHECK-NOT: %half.inv.2 = phi +; CHECK-NOT: %plus = phi +entry: + br label %loop + +loop: + %i = phi i32 [ 0, %entry ], [ %inc, %loop ] + %sum = phi i32 [ 0, %entry ], [ %incsum, %loop ] + %half.inv = phi i32 [ %a, %entry ], [ %b, %loop ] + %half.inv.2 = phi i32 [ %a, %entry ], [ %half.inv, %loop ] + %plus = phi i32 [ %a, %entry ], [ %half.inv.2, %loop ] + + %incsum = add i32 %sum, %plus + %inc = add i32 %i, 1 + %cmp = icmp slt i32 %i, 1000 + + br i1 %cmp, label %loop, label %exit + +exit: + ret i32 %sum +} + +define i32 @invariant_backedge_limited_by_size(i32 %a, i32 %b) { +; This loop should normally be peeled thrice because it has a Phi which becomes +; invariant starting from 4th iteration, but the size of the loop only allows +; us to peel twice because we are restricted to 30 instructions in resulting +; code. Thus, %plus Phi node should stay in loop even despite its backedge +; input is an invariant. +; CHECK-LABEL: @invariant_backedge_limited_by_size +; CHECK: loop.peel{{.*}}: +; CHECK: loop.peel{{.*}}: +; CHECK: %i = phi +; CHECK: %sum = phi +; CHECK: %plus = phi i32 [ %a, {{.*}} ], [ %b, %loop ] +; CHECK-NOT: %half.inv = phi +; CHECK-NOT: %half.inv.2 = phi +entry: + br label %loop + +loop: + %i = phi i32 [ 0, %entry ], [ %inc, %loop ] + %sum = phi i32 [ 0, %entry ], [ %incsum, %loop ] + %half.inv = phi i32 [ %a, %entry ], [ %b, %loop ] + %half.inv.2 = phi i32 [ %a, %entry ], [ %half.inv, %loop ] + %plus = phi i32 [ %a, %entry ], [ %half.inv.2, %loop ] + + %incsum = add i32 %sum, %plus + %inc = add i32 %i, 1 + %cmp = icmp slt i32 %i, 1000 + + %incsum2 = add i32 %incsum, %plus + %incsum3 = add i32 %incsum, %plus + %incsum4 = add i32 %incsum, %plus + %incsum5 = add i32 %incsum, %plus + %incsum6 = add i32 %incsum, %plus + %incsum7 = add i32 %incsum, %plus + + br i1 %cmp, label %loop, label %exit + +exit: + ret i32 %sum +} + +; Peeling should fail due to method size. +define i32 @invariant_backedge_negative(i32 %a, i32 %b) { +; CHECK-LABEL: @invariant_backedge_negative +; CHECK-NOT: loop.peel{{.*}}: ; CHECK: loop: ; CHECK: %i = phi ; CHECK: %sum = phi @@ -43,6 +145,47 @@ loop: %incsum = add i32 %sum, %plus %incsum2 = add i32 %incsum, %plus + %incsum3 = add i32 %incsum, %plus + %incsum4 = add i32 %incsum, %plus + %incsum5 = add i32 %incsum, %plus + %incsum6 = add i32 %incsum, %plus + %incsum7 = add i32 %incsum, %plus + %incsum8 = add i32 %incsum, %plus + %incsum9 = add i32 %incsum, %plus + %incsum10 = add i32 %incsum, %plus + %incsum11 = add i32 %incsum, %plus + %incsum12 = add i32 %incsum, %plus + %incsum13 = add i32 %incsum, %plus + %incsum14 = add i32 %incsum, %plus + %incsum15 = add i32 %incsum, %plus + %inc = add i32 %i, 1 + %cmp = icmp slt i32 %i, 1000 + + br i1 %cmp, label %loop, label %exit + +exit: + ret i32 %sum +} + +define i32 @cycled_phis(i32 %a, i32 %b) { +; Make sure that we do not crash working with cycled Phis and don't peel it. +; TODO: Actually this loop should be partially unrolled with factor 2. +; CHECK-LABEL: @cycled_phis +; CHECK-NOT: loop.peel{{.*}}: +; CHECK: loop: +; CHECK: %i = phi +; CHECK: %phi.a = phi +; CHECK: %phi.b = phi +; CHECK: %sum = phi +entry: + br label %loop + +loop: + %i = phi i32 [ 0, %entry ], [ %inc, %loop ] + %phi.a = phi i32 [ %a, %entry ], [ %phi.b, %loop ] + %phi.b = phi i32 [ %b, %entry ], [ %phi.a, %loop ] + %sum = phi i32 [ 0, %entry], [ %incsum, %loop ] + %incsum = add i32 %sum, %phi.a %inc = add i32 %i, 1 %cmp = icmp slt i32 %i, 1000