mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-12-15 07:39:31 +00:00
Recommit [LV] Enable vectorization of loops where the IV has an external use
r272715 broke libcxx because it did not correctly handle cases where the last iteration of one IV is the second-to-last iteration of another. Original commit message: Vectorizing loops with "escaping" IVs has been disabled since r190790, due to PR17179. This re-enables it, with support for external use of both "post-increment" (last iteration) and "pre-increment" (second-to-last iteration) IVs. llvm-svn: 272742
This commit is contained in:
parent
0d147cb4a2
commit
f25364903d
@ -355,6 +355,12 @@ protected:
|
||||
|
||||
/// Create an empty loop, based on the loop ranges of the old loop.
|
||||
void createEmptyLoop();
|
||||
|
||||
/// Set up the values of the IVs correctly when exiting the vector loop.
|
||||
void fixupIVUsers(PHINode *OrigPhi, const InductionDescriptor &II,
|
||||
Value *CountRoundDown, Value *EndValue,
|
||||
BasicBlock *MiddleBlock);
|
||||
|
||||
/// Create a new induction variable inside L.
|
||||
PHINode *createInductionVariable(Loop *L, Value *Start, Value *End,
|
||||
Value *Step, Instruction *DL);
|
||||
@ -1433,13 +1439,11 @@ private:
|
||||
/// invariant.
|
||||
void collectStridedAccess(Value *LoadOrStoreInst);
|
||||
|
||||
/// \brief Returns true if we can vectorize using this PHI node as an
|
||||
/// induction.
|
||||
///
|
||||
/// Updates the vectorization state by adding \p Phi to the inductions list.
|
||||
/// This can set \p Phi as the main induction of the loop if \p Phi is a
|
||||
/// better choice for the main induction than the existing one.
|
||||
bool addInductionPhi(PHINode *Phi, InductionDescriptor ID);
|
||||
void addInductionPhi(PHINode *Phi, InductionDescriptor ID,
|
||||
SmallPtrSetImpl<Value *> &AllowedExit);
|
||||
|
||||
/// Report an analysis message to assist the user in diagnosing loops that are
|
||||
/// not vectorized. These are handled as LoopAccessReport rather than
|
||||
@ -1493,7 +1497,7 @@ private:
|
||||
/// Holds the widest induction type encountered.
|
||||
Type *WidestIndTy;
|
||||
|
||||
/// Allowed outside users. This holds the reduction
|
||||
/// Allowed outside users. This holds the induction and reduction
|
||||
/// vars which can be accessed from outside the loop.
|
||||
SmallPtrSet<Value *, 4> AllowedExit;
|
||||
/// This set holds the variables which are known to be uniform after
|
||||
@ -3219,6 +3223,9 @@ void InnerLoopVectorizer::createEmptyLoop() {
|
||||
// or the value at the end of the vectorized loop.
|
||||
BCResumeVal->addIncoming(EndValue, MiddleBlock);
|
||||
|
||||
// Fix up external users of the induction variable.
|
||||
fixupIVUsers(OrigPhi, II, CountRoundDown, EndValue, MiddleBlock);
|
||||
|
||||
// Fix the scalar body counter (PHI node).
|
||||
unsigned BlockIdx = OrigPhi->getBasicBlockIndex(ScalarPH);
|
||||
|
||||
@ -3258,6 +3265,71 @@ void InnerLoopVectorizer::createEmptyLoop() {
|
||||
Hints.setAlreadyVectorized();
|
||||
}
|
||||
|
||||
// Fix up external users of the induction variable. At this point, we are
|
||||
// in LCSSA form, with all external PHIs that use the IV having one input value,
|
||||
// coming from the remainder loop. We need those PHIs to also have a correct
|
||||
// value for the IV when arriving directly from the middle block.
|
||||
void InnerLoopVectorizer::fixupIVUsers(PHINode *OrigPhi,
|
||||
const InductionDescriptor &II,
|
||||
Value *CountRoundDown, Value *EndValue,
|
||||
BasicBlock *MiddleBlock) {
|
||||
// There are two kinds of external IV usages - those that use the value
|
||||
// computed in the last iteration (the PHI) and those that use the penultimate
|
||||
// value (the value that feeds into the phi from the loop latch).
|
||||
// We allow both, but they, obviously, have different values.
|
||||
|
||||
// We only expect at most one of each kind of user. This is because LCSSA will
|
||||
// canonicalize the users to a single PHI node per exit block, and we
|
||||
// currently only vectorize loops with a single exit.
|
||||
assert(OrigLoop->getExitBlock() && "Expected a single exit block");
|
||||
|
||||
// An external user of the last iteration's value should see the value that
|
||||
// the remainder loop uses to initialize its own IV.
|
||||
Value *PostInc = OrigPhi->getIncomingValueForBlock(OrigLoop->getLoopLatch());
|
||||
for (User *U : PostInc->users()) {
|
||||
Instruction *UI = cast<Instruction>(U);
|
||||
if (!OrigLoop->contains(UI)) {
|
||||
assert(isa<PHINode>(UI) && "Expected LCSSA form");
|
||||
// One corner case we have to handle is two IVs "chasing" each-other,
|
||||
// that is %IV2 = phi [...], [ %IV1, %latch ]
|
||||
// In this case, if IV1 has an external use, we need to avoid adding both
|
||||
// "last value of IV1" and "penultimate value of IV2". Since we don't know
|
||||
// which IV will be handled first, check we haven't handled this user yet.
|
||||
PHINode *User = cast<PHINode>(UI);
|
||||
if (User->getBasicBlockIndex(MiddleBlock) == -1)
|
||||
User->addIncoming(EndValue, MiddleBlock);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// An external user of the penultimate value need to see EndValue - Step.
|
||||
// The simplest way to get this is to recompute it from the constituent SCEVs,
|
||||
// that is Start + (Step * (CRD - 1)).
|
||||
for (User *U : OrigPhi->users()) {
|
||||
Instruction *UI = cast<Instruction>(U);
|
||||
if (!OrigLoop->contains(UI)) {
|
||||
const DataLayout &DL =
|
||||
OrigLoop->getHeader()->getModule()->getDataLayout();
|
||||
|
||||
assert(isa<PHINode>(UI) && "Expected LCSSA form");
|
||||
PHINode *User = cast<PHINode>(UI);
|
||||
// As above, check we haven't already handled this user.
|
||||
if (User->getBasicBlockIndex(MiddleBlock) != -1)
|
||||
break;
|
||||
|
||||
IRBuilder<> B(MiddleBlock->getTerminator());
|
||||
Value *CountMinusOne = B.CreateSub(
|
||||
CountRoundDown, ConstantInt::get(CountRoundDown->getType(), 1));
|
||||
Value *CMO = B.CreateSExtOrTrunc(CountMinusOne, II.getStep()->getType(),
|
||||
"cast.cmo");
|
||||
Value *Escape = II.transform(B, CMO, PSE.getSE(), DL);
|
||||
Escape->setName("ind.escape");
|
||||
User->addIncoming(Escape, MiddleBlock);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
namespace {
|
||||
struct CSEDenseMapInfo {
|
||||
static bool canHandle(Instruction *I) {
|
||||
@ -4639,10 +4711,10 @@ static Type *getWiderType(const DataLayout &DL, Type *Ty0, Type *Ty1) {
|
||||
/// \brief Check that the instruction has outside loop users and is not an
|
||||
/// identified reduction variable.
|
||||
static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst,
|
||||
SmallPtrSetImpl<Value *> &Reductions) {
|
||||
// Reduction instructions are allowed to have exit users. All other
|
||||
// instructions must not have external users.
|
||||
if (!Reductions.count(Inst))
|
||||
SmallPtrSetImpl<Value *> &AllowedExit) {
|
||||
// Reduction and Induction instructions are allowed to have exit users. All
|
||||
// other instructions must not have external users.
|
||||
if (!AllowedExit.count(Inst))
|
||||
// Check that all of the users of the loop are inside the BB.
|
||||
for (User *U : Inst->users()) {
|
||||
Instruction *UI = cast<Instruction>(U);
|
||||
@ -4655,8 +4727,9 @@ static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst,
|
||||
return false;
|
||||
}
|
||||
|
||||
bool LoopVectorizationLegality::addInductionPhi(PHINode *Phi,
|
||||
InductionDescriptor ID) {
|
||||
void LoopVectorizationLegality::addInductionPhi(
|
||||
PHINode *Phi, InductionDescriptor ID,
|
||||
SmallPtrSetImpl<Value *> &AllowedExit) {
|
||||
Inductions[Phi] = ID;
|
||||
Type *PhiTy = Phi->getType();
|
||||
const DataLayout &DL = Phi->getModule()->getDataLayout();
|
||||
@ -4682,18 +4755,13 @@ bool LoopVectorizationLegality::addInductionPhi(PHINode *Phi,
|
||||
Induction = Phi;
|
||||
}
|
||||
|
||||
// Both the PHI node itself, and the "post-increment" value feeding
|
||||
// back into the PHI node may have external users.
|
||||
AllowedExit.insert(Phi);
|
||||
AllowedExit.insert(Phi->getIncomingValueForBlock(TheLoop->getLoopLatch()));
|
||||
|
||||
DEBUG(dbgs() << "LV: Found an induction variable.\n");
|
||||
|
||||
// Until we explicitly handle the case of an induction variable with
|
||||
// an outside loop user we have to give up vectorizing this loop.
|
||||
if (hasOutsideLoopUser(TheLoop, Phi, AllowedExit)) {
|
||||
emitAnalysis(VectorizationReport(Phi) <<
|
||||
"use of induction value outside of the "
|
||||
"loop is not handled by vectorizer");
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
return;
|
||||
}
|
||||
|
||||
bool LoopVectorizationLegality::canVectorizeInstrs() {
|
||||
@ -4757,8 +4825,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
|
||||
|
||||
InductionDescriptor ID;
|
||||
if (InductionDescriptor::isInductionPHI(Phi, PSE, ID)) {
|
||||
if (!addInductionPhi(Phi, ID))
|
||||
return false;
|
||||
addInductionPhi(Phi, ID, AllowedExit);
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -4770,8 +4837,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
|
||||
// As a last resort, coerce the PHI to a AddRec expression
|
||||
// and re-try classifying it a an induction PHI.
|
||||
if (InductionDescriptor::isInductionPHI(Phi, PSE, ID, true)) {
|
||||
if (!addInductionPhi(Phi, ID))
|
||||
return false;
|
||||
addInductionPhi(Phi, ID, AllowedExit);
|
||||
continue;
|
||||
}
|
||||
|
||||
|
110
test/Transforms/LoopVectorize/iv_outside_user.ll
Normal file
110
test/Transforms/LoopVectorize/iv_outside_user.ll
Normal file
@ -0,0 +1,110 @@
|
||||
; RUN: opt -S -loop-vectorize -force-vector-interleave=1 -force-vector-width=2 < %s | FileCheck %s
|
||||
|
||||
; CHECK-LABEL: @postinc
|
||||
; CHECK-LABEL: scalar.ph:
|
||||
; CHECK: %bc.resume.val = phi i32 [ %n.vec, %middle.block ], [ 0, %entry ]
|
||||
; CHECK-LABEL: for.end:
|
||||
; CHECK: %[[RET:.*]] = phi i32 [ {{.*}}, %for.body ], [ %n.vec, %middle.block ]
|
||||
; CHECK: ret i32 %[[RET]]
|
||||
define i32 @postinc(i32 %k) {
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.body:
|
||||
%inc.phi = phi i32 [ 0, %entry ], [ %inc, %for.body ]
|
||||
%inc = add nsw i32 %inc.phi, 1
|
||||
%cmp = icmp eq i32 %inc, %k
|
||||
br i1 %cmp, label %for.end, label %for.body
|
||||
|
||||
for.end:
|
||||
ret i32 %inc
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @preinc
|
||||
; CHECK-LABEL: middle.block:
|
||||
; CHECK: %3 = sub i32 %n.vec, 1
|
||||
; CHECK: %ind.escape = add i32 0, %3
|
||||
; CHECK-LABEL: scalar.ph:
|
||||
; CHECK: %bc.resume.val = phi i32 [ %n.vec, %middle.block ], [ 0, %entry ]
|
||||
; CHECK-LABEL: for.end:
|
||||
; CHECK: %[[RET:.*]] = phi i32 [ {{.*}}, %for.body ], [ %ind.escape, %middle.block ]
|
||||
; CHECK: ret i32 %[[RET]]
|
||||
define i32 @preinc(i32 %k) {
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.body:
|
||||
%inc.phi = phi i32 [ 0, %entry ], [ %inc, %for.body ]
|
||||
%inc = add nsw i32 %inc.phi, 1
|
||||
%cmp = icmp eq i32 %inc, %k
|
||||
br i1 %cmp, label %for.end, label %for.body
|
||||
|
||||
for.end:
|
||||
ret i32 %inc.phi
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @constpre
|
||||
; CHECK-LABEL: for.end:
|
||||
; CHECK: %[[RET:.*]] = phi i32 [ {{.*}}, %for.body ], [ 2, %middle.block ]
|
||||
; CHECK: ret i32 %[[RET]]
|
||||
define i32 @constpre() {
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.body:
|
||||
%inc.phi = phi i32 [ 32, %entry ], [ %inc, %for.body ]
|
||||
%inc = sub nsw i32 %inc.phi, 2
|
||||
%cmp = icmp eq i32 %inc, 0
|
||||
br i1 %cmp, label %for.end, label %for.body
|
||||
|
||||
for.end:
|
||||
ret i32 %inc.phi
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @geppre
|
||||
; CHECK-LABEL: middle.block:
|
||||
; CHECK: %ind.escape = getelementptr i32, i32* %ptr, i64 124
|
||||
; CHECK-LABEL: for.end:
|
||||
; CHECK: %[[RET:.*]] = phi i32* [ {{.*}}, %for.body ], [ %ind.escape, %middle.block ]
|
||||
; CHECK: ret i32* %[[RET]]
|
||||
define i32* @geppre(i32* %ptr) {
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.body:
|
||||
%inc.phi = phi i32 [ 0, %entry ], [ %inc, %for.body ]
|
||||
%ptr.phi = phi i32* [ %ptr, %entry ], [ %inc.ptr, %for.body ]
|
||||
%inc = add nsw i32 %inc.phi, 1
|
||||
%inc.ptr = getelementptr i32, i32* %ptr.phi, i32 4
|
||||
%cmp = icmp eq i32 %inc, 32
|
||||
br i1 %cmp, label %for.end, label %for.body
|
||||
|
||||
for.end:
|
||||
ret i32* %ptr.phi
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @both
|
||||
; CHECK-LABEL: middle.block:
|
||||
; CHECK: %[[END:.*]] = sub i64 %n.vec, 1
|
||||
; CHECK: %ind.escape = getelementptr i32, i32* %base, i64 %[[END]]
|
||||
; CHECK-LABEL: for.end:
|
||||
; CHECK: %[[RET:.*]] = phi i32* [ %inc.lag1, %for.body ], [ %ind.escape, %middle.block ]
|
||||
; CHECK: ret i32* %[[RET]]
|
||||
|
||||
define i32* @both(i32 %k) {
|
||||
entry:
|
||||
%base = getelementptr inbounds i32, i32* undef, i64 1
|
||||
br label %for.body
|
||||
|
||||
for.body:
|
||||
%inc.phi = phi i32 [ 0, %entry ], [ %inc, %for.body ]
|
||||
%inc.lag1 = phi i32* [ %base, %entry ], [ %tmp, %for.body]
|
||||
%inc.lag2 = phi i32* [ undef, %entry ], [ %inc.lag1, %for.body]
|
||||
%tmp = getelementptr inbounds i32, i32* %inc.lag1, i64 1
|
||||
%inc = add nsw i32 %inc.phi, 1
|
||||
%cmp = icmp eq i32 %inc, %k
|
||||
br i1 %cmp, label %for.end, label %for.body
|
||||
|
||||
for.end:
|
||||
ret i32* %inc.lag1
|
||||
}
|
@ -1,7 +1,6 @@
|
||||
; RUN: opt -S -loop-vectorize -force-vector-interleave=1 -force-vector-width=2 < %s 2>&1 | FileCheck %s
|
||||
|
||||
; CHECK: remark: {{.*}}: loop not vectorized: value could not be identified as an induction or reduction variable
|
||||
; CHECK: remark: {{.*}}: loop not vectorized: use of induction value outside of the loop is not handled by vectorizer
|
||||
|
||||
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S128"
|
||||
|
||||
@ -41,34 +40,3 @@ f1.exit.loopexit:
|
||||
%.lcssa = phi i32 [ %tmp17, %bb16 ]
|
||||
ret i32 %.lcssa
|
||||
}
|
||||
|
||||
; Don't vectorize this loop. Its phi node (induction variable) has an outside
|
||||
; loop user. We currently don't handle this case.
|
||||
; PR17179
|
||||
|
||||
; CHECK-LABEL: @test2(
|
||||
; CHECK-NOT: <2 x
|
||||
|
||||
@x1 = common global i32 0, align 4
|
||||
@x2 = common global i32 0, align 4
|
||||
@x0 = common global i32 0, align 4
|
||||
|
||||
define i32 @test2() {
|
||||
entry:
|
||||
store i32 0, i32* @x1, align 4
|
||||
%0 = load i32, i32* @x0, align 4
|
||||
br label %for.cond1.preheader
|
||||
|
||||
for.cond1.preheader:
|
||||
%inc7 = phi i32 [ 0, %entry ], [ %inc, %for.cond1.preheader ]
|
||||
%inc = add nsw i32 %inc7, 1
|
||||
%cmp = icmp eq i32 %inc, 52
|
||||
br i1 %cmp, label %for.end5, label %for.cond1.preheader
|
||||
|
||||
for.end5:
|
||||
%inc7.lcssa = phi i32 [ %inc7, %for.cond1.preheader ]
|
||||
%xor = xor i32 %inc7.lcssa, %0
|
||||
store i32 52, i32* @x1, align 4
|
||||
store i32 1, i32* @x2, align 4
|
||||
ret i32 %xor
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user