mirror of
https://github.com/RPCSX/llvm.git
synced 2024-11-27 13:40:30 +00:00
[LV] Enable vectorization of loops where the IV has an external use
Vectorizing loops with "escaping" IVs has been disabled since r190790, due to PR17179. This re-enables it, with support for external use of both "post-increment" (last iteration) and "pre-increment" (second-to-last iteration) IVs. Differential Revision: http://reviews.llvm.org/D21048 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@272715 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
8291779372
commit
4d190088e9
@ -355,6 +355,12 @@ protected:
|
||||
|
||||
/// Create an empty loop, based on the loop ranges of the old loop.
|
||||
void createEmptyLoop();
|
||||
|
||||
/// Set up the values of the IVs correctly when exiting the vector loop.
|
||||
void fixupIVUsers(PHINode *OrigPhi, const InductionDescriptor &II,
|
||||
Value *CountRoundDown, Value *EndValue,
|
||||
BasicBlock *MiddleBlock);
|
||||
|
||||
/// Create a new induction variable inside L.
|
||||
PHINode *createInductionVariable(Loop *L, Value *Start, Value *End,
|
||||
Value *Step, Instruction *DL);
|
||||
@ -1433,13 +1439,11 @@ private:
|
||||
/// invariant.
|
||||
void collectStridedAccess(Value *LoadOrStoreInst);
|
||||
|
||||
/// \brief Returns true if we can vectorize using this PHI node as an
|
||||
/// induction.
|
||||
///
|
||||
/// Updates the vectorization state by adding \p Phi to the inductions list.
|
||||
/// This can set \p Phi as the main induction of the loop if \p Phi is a
|
||||
/// better choice for the main induction than the existing one.
|
||||
bool addInductionPhi(PHINode *Phi, InductionDescriptor ID);
|
||||
void addInductionPhi(PHINode *Phi, InductionDescriptor ID,
|
||||
SmallPtrSetImpl<Value *> &AllowedExit);
|
||||
|
||||
/// Report an analysis message to assist the user in diagnosing loops that are
|
||||
/// not vectorized. These are handled as LoopAccessReport rather than
|
||||
@ -1493,7 +1497,7 @@ private:
|
||||
/// Holds the widest induction type encountered.
|
||||
Type *WidestIndTy;
|
||||
|
||||
/// Allowed outside users. This holds the reduction
|
||||
/// Allowed outside users. This holds the induction and reduction
|
||||
/// vars which can be accessed from outside the loop.
|
||||
SmallPtrSet<Value *, 4> AllowedExit;
|
||||
/// This set holds the variables which are known to be uniform after
|
||||
@ -3219,6 +3223,9 @@ void InnerLoopVectorizer::createEmptyLoop() {
|
||||
// or the value at the end of the vectorized loop.
|
||||
BCResumeVal->addIncoming(EndValue, MiddleBlock);
|
||||
|
||||
// Fix up external users of the induction variable.
|
||||
fixupIVUsers(OrigPhi, II, CountRoundDown, EndValue, MiddleBlock);
|
||||
|
||||
// Fix the scalar body counter (PHI node).
|
||||
unsigned BlockIdx = OrigPhi->getBasicBlockIndex(ScalarPH);
|
||||
|
||||
@ -3258,6 +3265,59 @@ void InnerLoopVectorizer::createEmptyLoop() {
|
||||
Hints.setAlreadyVectorized();
|
||||
}
|
||||
|
||||
// Fix up external users of the induction variable. At this point, we are
|
||||
// in LCSSA form, with all external PHIs that use the IV having one input value,
|
||||
// coming from the remainder loop. We need those PHIs to also have a correct
|
||||
// value for the IV when arriving directly from the middle block.
|
||||
void InnerLoopVectorizer::fixupIVUsers(PHINode *OrigPhi,
|
||||
const InductionDescriptor &II,
|
||||
Value *CountRoundDown, Value *EndValue,
|
||||
BasicBlock *MiddleBlock) {
|
||||
// There are two kinds of external IV usages - those that use the value
|
||||
// computed in the last iteration (the PHI) and those that use the penultimate
|
||||
// value (the value that feeds into the phi from the loop latch).
|
||||
// We allow both, but they, obviously, have different values.
|
||||
|
||||
// We only expect at most one of each kind of user. This is because LCSSA will
|
||||
// canonicalize the users to a single PHI node per exit block, and we
|
||||
// currently only vectorize loops with a single exit.
|
||||
assert(OrigLoop->getExitBlock() && "Expected a single exit block");
|
||||
|
||||
// An external user of the last iteration's value should see the value that
|
||||
// the remainder loop uses to initialize its own IV.
|
||||
Value *PostInc = OrigPhi->getIncomingValueForBlock(OrigLoop->getLoopLatch());
|
||||
for (User *U : PostInc->users()) {
|
||||
Instruction *UI = cast<Instruction>(U);
|
||||
if (!OrigLoop->contains(UI)) {
|
||||
assert(isa<PHINode>(UI) && "Expected LCSSA form");
|
||||
cast<PHINode>(UI)->addIncoming(EndValue, MiddleBlock);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// An external user of the penultimate value need to see EndValue - Step.
|
||||
// The simplest way to get this is to recompute it from the constituent SCEVs,
|
||||
// that is Start + (Step * (CRD - 1)).
|
||||
for (User *U : OrigPhi->users()) {
|
||||
Instruction *UI = cast<Instruction>(U);
|
||||
if (!OrigLoop->contains(UI)) {
|
||||
assert(isa<PHINode>(UI) && "Expected LCSSA form");
|
||||
const DataLayout &DL =
|
||||
OrigLoop->getHeader()->getModule()->getDataLayout();
|
||||
|
||||
IRBuilder<> B(MiddleBlock->getTerminator());
|
||||
Value *CountMinusOne = B.CreateSub(
|
||||
CountRoundDown, ConstantInt::get(CountRoundDown->getType(), 1));
|
||||
Value *CMO = B.CreateSExtOrTrunc(CountMinusOne, II.getStep()->getType(),
|
||||
"cast.cmo");
|
||||
Value *Escape = II.transform(B, CMO, PSE.getSE(), DL);
|
||||
Escape->setName("ind.escape");
|
||||
cast<PHINode>(UI)->addIncoming(Escape, MiddleBlock);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
namespace {
|
||||
struct CSEDenseMapInfo {
|
||||
static bool canHandle(Instruction *I) {
|
||||
@ -4639,10 +4699,10 @@ static Type *getWiderType(const DataLayout &DL, Type *Ty0, Type *Ty1) {
|
||||
/// \brief Check that the instruction has outside loop users and is not an
|
||||
/// identified reduction variable.
|
||||
static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst,
|
||||
SmallPtrSetImpl<Value *> &Reductions) {
|
||||
// Reduction instructions are allowed to have exit users. All other
|
||||
// instructions must not have external users.
|
||||
if (!Reductions.count(Inst))
|
||||
SmallPtrSetImpl<Value *> &AllowedExit) {
|
||||
// Reduction and Induction instructions are allowed to have exit users. All
|
||||
// other instructions must not have external users.
|
||||
if (!AllowedExit.count(Inst))
|
||||
// Check that all of the users of the loop are inside the BB.
|
||||
for (User *U : Inst->users()) {
|
||||
Instruction *UI = cast<Instruction>(U);
|
||||
@ -4655,8 +4715,9 @@ static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst,
|
||||
return false;
|
||||
}
|
||||
|
||||
bool LoopVectorizationLegality::addInductionPhi(PHINode *Phi,
|
||||
InductionDescriptor ID) {
|
||||
void LoopVectorizationLegality::addInductionPhi(
|
||||
PHINode *Phi, InductionDescriptor ID,
|
||||
SmallPtrSetImpl<Value *> &AllowedExit) {
|
||||
Inductions[Phi] = ID;
|
||||
Type *PhiTy = Phi->getType();
|
||||
const DataLayout &DL = Phi->getModule()->getDataLayout();
|
||||
@ -4682,18 +4743,13 @@ bool LoopVectorizationLegality::addInductionPhi(PHINode *Phi,
|
||||
Induction = Phi;
|
||||
}
|
||||
|
||||
// Both the PHI node itself, and the "post-increment" value feeding
|
||||
// back into the PHI node may have external users.
|
||||
AllowedExit.insert(Phi);
|
||||
AllowedExit.insert(Phi->getIncomingValueForBlock(TheLoop->getLoopLatch()));
|
||||
|
||||
DEBUG(dbgs() << "LV: Found an induction variable.\n");
|
||||
|
||||
// Until we explicitly handle the case of an induction variable with
|
||||
// an outside loop user we have to give up vectorizing this loop.
|
||||
if (hasOutsideLoopUser(TheLoop, Phi, AllowedExit)) {
|
||||
emitAnalysis(VectorizationReport(Phi) <<
|
||||
"use of induction value outside of the "
|
||||
"loop is not handled by vectorizer");
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
return;
|
||||
}
|
||||
|
||||
bool LoopVectorizationLegality::canVectorizeInstrs() {
|
||||
@ -4757,8 +4813,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
|
||||
|
||||
InductionDescriptor ID;
|
||||
if (InductionDescriptor::isInductionPHI(Phi, PSE, ID)) {
|
||||
if (!addInductionPhi(Phi, ID))
|
||||
return false;
|
||||
addInductionPhi(Phi, ID, AllowedExit);
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -4770,8 +4825,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
|
||||
// As a last resort, coerce the PHI to a AddRec expression
|
||||
// and re-try classifying it a an induction PHI.
|
||||
if (InductionDescriptor::isInductionPHI(Phi, PSE, ID, true)) {
|
||||
if (!addInductionPhi(Phi, ID))
|
||||
return false;
|
||||
addInductionPhi(Phi, ID, AllowedExit);
|
||||
continue;
|
||||
}
|
||||
|
||||
|
84
test/Transforms/LoopVectorize/iv_outside_user.ll
Normal file
84
test/Transforms/LoopVectorize/iv_outside_user.ll
Normal file
@ -0,0 +1,84 @@
|
||||
; RUN: opt -S -loop-vectorize -force-vector-interleave=1 -force-vector-width=2 < %s | FileCheck %s
|
||||
|
||||
; CHECK-LABEL: @postinc
|
||||
; CHECK-LABEL: scalar.ph:
|
||||
; CHECK: %bc.resume.val = phi i32 [ %n.vec, %middle.block ], [ 0, %entry ]
|
||||
; CHECK-LABEL: for.end:
|
||||
; CHECK: %[[RET:.*]] = phi i32 [ {{.*}}, %for.body ], [ %n.vec, %middle.block ]
|
||||
; CHECK: ret i32 %[[RET]]
|
||||
define i32 @postinc(i32 %k) {
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.body:
|
||||
%inc.phi = phi i32 [ 0, %entry ], [ %inc, %for.body ]
|
||||
%inc = add nsw i32 %inc.phi, 1
|
||||
%cmp = icmp eq i32 %inc, %k
|
||||
br i1 %cmp, label %for.end, label %for.body
|
||||
|
||||
for.end:
|
||||
ret i32 %inc
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @preinc
|
||||
; CHECK-LABEL: middle.block:
|
||||
; CHECK: %3 = sub i32 %n.vec, 1
|
||||
; CHECK: %ind.escape = add i32 0, %3
|
||||
; CHECK-LABEL: scalar.ph:
|
||||
; CHECK: %bc.resume.val = phi i32 [ %n.vec, %middle.block ], [ 0, %entry ]
|
||||
; CHECK-LABEL: for.end:
|
||||
; CHECK: %[[RET:.*]] = phi i32 [ {{.*}}, %for.body ], [ %ind.escape, %middle.block ]
|
||||
; CHECK: ret i32 %[[RET]]
|
||||
define i32 @preinc(i32 %k) {
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.body:
|
||||
%inc.phi = phi i32 [ 0, %entry ], [ %inc, %for.body ]
|
||||
%inc = add nsw i32 %inc.phi, 1
|
||||
%cmp = icmp eq i32 %inc, %k
|
||||
br i1 %cmp, label %for.end, label %for.body
|
||||
|
||||
for.end:
|
||||
ret i32 %inc.phi
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @constpre
|
||||
; CHECK-LABEL: for.end:
|
||||
; CHECK: %[[RET:.*]] = phi i32 [ {{.*}}, %for.body ], [ 2, %middle.block ]
|
||||
; CHECK: ret i32 %[[RET]]
|
||||
define i32 @constpre() {
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.body:
|
||||
%inc.phi = phi i32 [ 32, %entry ], [ %inc, %for.body ]
|
||||
%inc = sub nsw i32 %inc.phi, 2
|
||||
%cmp = icmp eq i32 %inc, 0
|
||||
br i1 %cmp, label %for.end, label %for.body
|
||||
|
||||
for.end:
|
||||
ret i32 %inc.phi
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @geppre
|
||||
; CHECK-LABEL: middle.block:
|
||||
; CHECK: %ind.escape = getelementptr i32, i32* %ptr, i64 124
|
||||
; CHECK-LABEL: for.end:
|
||||
; CHECK: %[[RET:.*]] = phi i32* [ {{.*}}, %for.body ], [ %ind.escape, %middle.block ]
|
||||
; CHECK: ret i32* %[[RET]]
|
||||
define i32* @geppre(i32* %ptr) {
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.body:
|
||||
%inc.phi = phi i32 [ 0, %entry ], [ %inc, %for.body ]
|
||||
%ptr.phi = phi i32* [ %ptr, %entry ], [ %inc.ptr, %for.body ]
|
||||
%inc = add nsw i32 %inc.phi, 1
|
||||
%inc.ptr = getelementptr i32, i32* %ptr.phi, i32 4
|
||||
%cmp = icmp eq i32 %inc, 32
|
||||
br i1 %cmp, label %for.end, label %for.body
|
||||
|
||||
for.end:
|
||||
ret i32* %ptr.phi
|
||||
}
|
@ -1,7 +1,6 @@
|
||||
; RUN: opt -S -loop-vectorize -force-vector-interleave=1 -force-vector-width=2 < %s 2>&1 | FileCheck %s
|
||||
|
||||
; CHECK: remark: {{.*}}: loop not vectorized: value could not be identified as an induction or reduction variable
|
||||
; CHECK: remark: {{.*}}: loop not vectorized: use of induction value outside of the loop is not handled by vectorizer
|
||||
|
||||
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S128"
|
||||
|
||||
@ -41,34 +40,3 @@ f1.exit.loopexit:
|
||||
%.lcssa = phi i32 [ %tmp17, %bb16 ]
|
||||
ret i32 %.lcssa
|
||||
}
|
||||
|
||||
; Don't vectorize this loop. Its phi node (induction variable) has an outside
|
||||
; loop user. We currently don't handle this case.
|
||||
; PR17179
|
||||
|
||||
; CHECK-LABEL: @test2(
|
||||
; CHECK-NOT: <2 x
|
||||
|
||||
@x1 = common global i32 0, align 4
|
||||
@x2 = common global i32 0, align 4
|
||||
@x0 = common global i32 0, align 4
|
||||
|
||||
define i32 @test2() {
|
||||
entry:
|
||||
store i32 0, i32* @x1, align 4
|
||||
%0 = load i32, i32* @x0, align 4
|
||||
br label %for.cond1.preheader
|
||||
|
||||
for.cond1.preheader:
|
||||
%inc7 = phi i32 [ 0, %entry ], [ %inc, %for.cond1.preheader ]
|
||||
%inc = add nsw i32 %inc7, 1
|
||||
%cmp = icmp eq i32 %inc, 52
|
||||
br i1 %cmp, label %for.end5, label %for.cond1.preheader
|
||||
|
||||
for.end5:
|
||||
%inc7.lcssa = phi i32 [ %inc7, %for.cond1.preheader ]
|
||||
%xor = xor i32 %inc7.lcssa, %0
|
||||
store i32 52, i32* @x1, align 4
|
||||
store i32 1, i32* @x2, align 4
|
||||
ret i32 %xor
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user