mirror of
https://github.com/RPCSX/llvm.git
synced 2024-11-26 13:10:34 +00:00
Add support for reverse induction variables. For example:
while (i--) sum+=A[i]; git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@169752 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
0a2e27d964
commit
f0d19bd129
@ -175,9 +175,9 @@ private:
|
|||||||
/// element.
|
/// element.
|
||||||
Value *getBroadcastInstrs(Value *V);
|
Value *getBroadcastInstrs(Value *V);
|
||||||
|
|
||||||
/// This is a helper function used by getBroadcastInstrs. It adds 0, 1, 2 ..
|
/// This function adds 0, 1, 2 ... to each vector element, starting at zero.
|
||||||
/// for each element in the vector. Starting from zero.
|
/// If Negate is set then negative numbers are added e.g. (0, -1, -2, ...).
|
||||||
Value *getConsecutiveVector(Value* Val);
|
Value *getConsecutiveVector(Value* Val, bool Negate = false);
|
||||||
|
|
||||||
/// When we go over instructions in the basic block we rely on previous
|
/// When we go over instructions in the basic block we rely on previous
|
||||||
/// values within the current basic block or on loop invariant values.
|
/// values within the current basic block or on loop invariant values.
|
||||||
@ -252,7 +252,7 @@ public:
|
|||||||
DominatorTree *Dt):
|
DominatorTree *Dt):
|
||||||
TheLoop(Lp), SE(Se), DL(Dl), DT(Dt), Induction(0) { }
|
TheLoop(Lp), SE(Se), DL(Dl), DT(Dt), Induction(0) { }
|
||||||
|
|
||||||
/// This represents the kinds of reductions that we support.
|
/// This enum represents the kinds of reductions that we support.
|
||||||
enum ReductionKind {
|
enum ReductionKind {
|
||||||
NoReduction, /// Not a reduction.
|
NoReduction, /// Not a reduction.
|
||||||
IntegerAdd, /// Sum of numbers.
|
IntegerAdd, /// Sum of numbers.
|
||||||
@ -262,6 +262,14 @@ public:
|
|||||||
IntegerXor /// Bitwise or logical XOR of numbers.
|
IntegerXor /// Bitwise or logical XOR of numbers.
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/// This enum represents the kinds of inductions that we support.
|
||||||
|
enum InductionKind {
|
||||||
|
NoInduction, /// Not an induction variable.
|
||||||
|
IntInduction, /// Integer induction variable. Step = 1.
|
||||||
|
ReverseIntInduction, /// Reverse int induction variable. Step = -1.
|
||||||
|
PtrInduction /// Pointer induction variable. Step = sizeof(elem).
|
||||||
|
};
|
||||||
|
|
||||||
/// This POD struct holds information about reduction variables.
|
/// This POD struct holds information about reduction variables.
|
||||||
struct ReductionDescriptor {
|
struct ReductionDescriptor {
|
||||||
// Default C'tor
|
// Default C'tor
|
||||||
@ -316,13 +324,25 @@ public:
|
|||||||
SmallVector<const SCEV*, 2> Ends;
|
SmallVector<const SCEV*, 2> Ends;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/// A POD for saving information about induction variables.
|
||||||
|
struct InductionInfo {
|
||||||
|
/// Ctors.
|
||||||
|
InductionInfo(Value *Start, InductionKind K):
|
||||||
|
StartValue(Start), IK(K) {};
|
||||||
|
InductionInfo(): StartValue(0), IK(NoInduction) {};
|
||||||
|
/// Start value.
|
||||||
|
Value *StartValue;
|
||||||
|
/// Induction kind.
|
||||||
|
InductionKind IK;
|
||||||
|
};
|
||||||
|
|
||||||
/// ReductionList contains the reduction descriptors for all
|
/// ReductionList contains the reduction descriptors for all
|
||||||
/// of the reductions that were found in the loop.
|
/// of the reductions that were found in the loop.
|
||||||
typedef DenseMap<PHINode*, ReductionDescriptor> ReductionList;
|
typedef DenseMap<PHINode*, ReductionDescriptor> ReductionList;
|
||||||
|
|
||||||
/// InductionList saves induction variables and maps them to the initial
|
/// InductionList saves induction variables and maps them to the
|
||||||
/// value entring the loop.
|
/// induction descriptor.
|
||||||
typedef DenseMap<PHINode*, Value*> InductionList;
|
typedef DenseMap<PHINode*, InductionInfo> InductionList;
|
||||||
|
|
||||||
/// Returns true if it is legal to vectorize this loop.
|
/// Returns true if it is legal to vectorize this loop.
|
||||||
/// This does not mean that it is profitable to vectorize this
|
/// This does not mean that it is profitable to vectorize this
|
||||||
@ -385,8 +405,9 @@ private:
|
|||||||
/// Returns true if the instruction I can be a reduction variable of type
|
/// Returns true if the instruction I can be a reduction variable of type
|
||||||
/// 'Kind'.
|
/// 'Kind'.
|
||||||
bool isReductionInstr(Instruction *I, ReductionKind Kind);
|
bool isReductionInstr(Instruction *I, ReductionKind Kind);
|
||||||
/// Returns True, if 'Phi' is an induction variable.
|
/// Returns the induction kind of Phi. This function may return NoInduction
|
||||||
bool isInductionVariable(PHINode *Phi);
|
/// if the PHI is not an induction variable.
|
||||||
|
InductionKind isInductionVariable(PHINode *Phi);
|
||||||
/// Return true if can compute the address bounds of Ptr within the loop.
|
/// Return true if can compute the address bounds of Ptr within the loop.
|
||||||
bool hasComputableBounds(Value *Ptr);
|
bool hasComputableBounds(Value *Ptr);
|
||||||
|
|
||||||
@ -558,7 +579,9 @@ Value *InnerLoopVectorizer::getBroadcastInstrs(Value *V) {
|
|||||||
Instruction *Loc = Builder.GetInsertPoint();
|
Instruction *Loc = Builder.GetInsertPoint();
|
||||||
|
|
||||||
// We need to place the broadcast of invariant variables outside the loop.
|
// We need to place the broadcast of invariant variables outside the loop.
|
||||||
bool Invariant = (OrigLoop->isLoopInvariant(V) && V != Induction);
|
Instruction *Instr = dyn_cast<Instruction>(V);
|
||||||
|
bool NewInstr = (Instr && Instr->getParent() == LoopVectorBody);
|
||||||
|
bool Invariant = OrigLoop->isLoopInvariant(V) && !NewInstr;
|
||||||
|
|
||||||
// Place the code for broadcasting invariant variables in the new preheader.
|
// Place the code for broadcasting invariant variables in the new preheader.
|
||||||
if (Invariant)
|
if (Invariant)
|
||||||
@ -580,19 +603,19 @@ Value *InnerLoopVectorizer::getBroadcastInstrs(Value *V) {
|
|||||||
return Shuf;
|
return Shuf;
|
||||||
}
|
}
|
||||||
|
|
||||||
Value *InnerLoopVectorizer::getConsecutiveVector(Value* Val) {
|
Value *InnerLoopVectorizer::getConsecutiveVector(Value* Val, bool Negate) {
|
||||||
assert(Val->getType()->isVectorTy() && "Must be a vector");
|
assert(Val->getType()->isVectorTy() && "Must be a vector");
|
||||||
assert(Val->getType()->getScalarType()->isIntegerTy() &&
|
assert(Val->getType()->getScalarType()->isIntegerTy() &&
|
||||||
"Elem must be an integer");
|
"Elem must be an integer");
|
||||||
// Create the types.
|
// Create the types.
|
||||||
Type *ITy = Val->getType()->getScalarType();
|
Type *ITy = Val->getType()->getScalarType();
|
||||||
VectorType *Ty = cast<VectorType>(Val->getType());
|
VectorType *Ty = cast<VectorType>(Val->getType());
|
||||||
unsigned VLen = Ty->getNumElements();
|
int VLen = Ty->getNumElements();
|
||||||
SmallVector<Constant*, 8> Indices;
|
SmallVector<Constant*, 8> Indices;
|
||||||
|
|
||||||
// Create a vector of consecutive numbers from zero to VF.
|
// Create a vector of consecutive numbers from zero to VF.
|
||||||
for (unsigned i = 0; i < VLen; ++i)
|
for (int i = 0; i < VLen; ++i)
|
||||||
Indices.push_back(ConstantInt::get(ITy, i));
|
Indices.push_back(ConstantInt::get(ITy, Negate ? (-i): i ));
|
||||||
|
|
||||||
// Add the consecutive indices to the vector value.
|
// Add the consecutive indices to the vector value.
|
||||||
Constant *Cv = ConstantVector::get(Indices);
|
Constant *Cv = ConstantVector::get(Indices);
|
||||||
@ -603,10 +626,13 @@ Value *InnerLoopVectorizer::getConsecutiveVector(Value* Val) {
|
|||||||
bool LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) {
|
bool LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) {
|
||||||
assert(Ptr->getType()->isPointerTy() && "Unexpected non ptr");
|
assert(Ptr->getType()->isPointerTy() && "Unexpected non ptr");
|
||||||
|
|
||||||
// If this pointer is an induction variable, return it.
|
// If this value is a pointer induction variable we know it is consecutive.
|
||||||
PHINode *Phi = dyn_cast_or_null<PHINode>(Ptr);
|
PHINode *Phi = dyn_cast_or_null<PHINode>(Ptr);
|
||||||
if (Phi && getInductionVars()->count(Phi))
|
if (Phi && Inductions.count(Phi)) {
|
||||||
return true;
|
InductionInfo II = Inductions[Phi];
|
||||||
|
if (PtrInduction == II.IK)
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
GetElementPtrInst *Gep = dyn_cast_or_null<GetElementPtrInst>(Ptr);
|
GetElementPtrInst *Gep = dyn_cast_or_null<GetElementPtrInst>(Ptr);
|
||||||
if (!Gep)
|
if (!Gep)
|
||||||
@ -730,7 +756,7 @@ void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr) {
|
|||||||
|
|
||||||
Value*
|
Value*
|
||||||
InnerLoopVectorizer::addRuntimeCheck(LoopVectorizationLegality *Legal,
|
InnerLoopVectorizer::addRuntimeCheck(LoopVectorizationLegality *Legal,
|
||||||
Instruction *Loc) {
|
Instruction *Loc) {
|
||||||
LoopVectorizationLegality::RuntimePointerCheck *PtrRtCheck =
|
LoopVectorizationLegality::RuntimePointerCheck *PtrRtCheck =
|
||||||
Legal->getRuntimePointerCheck();
|
Legal->getRuntimePointerCheck();
|
||||||
|
|
||||||
@ -745,7 +771,7 @@ InnerLoopVectorizer::addRuntimeCheck(LoopVectorizationLegality *Legal,
|
|||||||
SCEVExpander Exp(*SE, "induction");
|
SCEVExpander Exp(*SE, "induction");
|
||||||
|
|
||||||
// Use this type for pointer arithmetic.
|
// Use this type for pointer arithmetic.
|
||||||
Type* PtrArithTy = PtrRtCheck->Pointers[0]->getType();
|
Type* PtrArithTy = Type::getInt8PtrTy(Loc->getContext(), 0);
|
||||||
|
|
||||||
for (unsigned i = 0; i < NumPointers; ++i) {
|
for (unsigned i = 0; i < NumPointers; ++i) {
|
||||||
Value *Ptr = PtrRtCheck->Pointers[i];
|
Value *Ptr = PtrRtCheck->Pointers[i];
|
||||||
@ -759,8 +785,7 @@ InnerLoopVectorizer::addRuntimeCheck(LoopVectorizationLegality *Legal,
|
|||||||
} else {
|
} else {
|
||||||
DEBUG(dbgs() << "LV: Adding RT check for range:" << *Ptr <<"\n");
|
DEBUG(dbgs() << "LV: Adding RT check for range:" << *Ptr <<"\n");
|
||||||
|
|
||||||
Value *Start = Exp.expandCodeFor(PtrRtCheck->Starts[i],
|
Value *Start = Exp.expandCodeFor(PtrRtCheck->Starts[i], PtrArithTy, Loc);
|
||||||
PtrArithTy, Loc);
|
|
||||||
Value *End = Exp.expandCodeFor(PtrRtCheck->Ends[i], PtrArithTy, Loc);
|
Value *End = Exp.expandCodeFor(PtrRtCheck->Ends[i], PtrArithTy, Loc);
|
||||||
Starts.push_back(Start);
|
Starts.push_back(Start);
|
||||||
Ends.push_back(End);
|
Ends.push_back(End);
|
||||||
@ -769,10 +794,16 @@ InnerLoopVectorizer::addRuntimeCheck(LoopVectorizationLegality *Legal,
|
|||||||
|
|
||||||
for (unsigned i = 0; i < NumPointers; ++i) {
|
for (unsigned i = 0; i < NumPointers; ++i) {
|
||||||
for (unsigned j = i+1; j < NumPointers; ++j) {
|
for (unsigned j = i+1; j < NumPointers; ++j) {
|
||||||
|
Instruction::CastOps Op = Instruction::BitCast;
|
||||||
|
Value *Start0 = CastInst::Create(Op, Starts[i], PtrArithTy, "bc", Loc);
|
||||||
|
Value *Start1 = CastInst::Create(Op, Starts[j], PtrArithTy, "bc", Loc);
|
||||||
|
Value *End0 = CastInst::Create(Op, Ends[i], PtrArithTy, "bc", Loc);
|
||||||
|
Value *End1 = CastInst::Create(Op, Ends[j], PtrArithTy, "bc", Loc);
|
||||||
|
|
||||||
Value *Cmp0 = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_ULE,
|
Value *Cmp0 = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_ULE,
|
||||||
Starts[i], Ends[j], "bound0", Loc);
|
Start0, End1, "bound0", Loc);
|
||||||
Value *Cmp1 = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_ULE,
|
Value *Cmp1 = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_ULE,
|
||||||
Starts[j], Ends[i], "bound1", Loc);
|
Start1, End0, "bound1", Loc);
|
||||||
Value *IsConflict = BinaryOperator::Create(Instruction::And, Cmp0, Cmp1,
|
Value *IsConflict = BinaryOperator::Create(Instruction::And, Cmp0, Cmp1,
|
||||||
"found.conflict", Loc);
|
"found.conflict", Loc);
|
||||||
if (MemoryRuntimeCheck)
|
if (MemoryRuntimeCheck)
|
||||||
@ -936,27 +967,54 @@ InnerLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) {
|
|||||||
LoopVectorizationLegality::InductionList *List = Legal->getInductionVars();
|
LoopVectorizationLegality::InductionList *List = Legal->getInductionVars();
|
||||||
for (I = List->begin(), E = List->end(); I != E; ++I) {
|
for (I = List->begin(), E = List->end(); I != E; ++I) {
|
||||||
PHINode *OrigPhi = I->first;
|
PHINode *OrigPhi = I->first;
|
||||||
|
LoopVectorizationLegality::InductionInfo II = I->second;
|
||||||
PHINode *ResumeVal = PHINode::Create(OrigPhi->getType(), 2, "resume.val",
|
PHINode *ResumeVal = PHINode::Create(OrigPhi->getType(), 2, "resume.val",
|
||||||
MiddleBlock->getTerminator());
|
MiddleBlock->getTerminator());
|
||||||
Value *EndValue = 0;
|
Value *EndValue = 0;
|
||||||
if (OrigPhi->getType()->isIntegerTy()) {
|
switch (II.IK) {
|
||||||
|
case LoopVectorizationLegality::NoInduction:
|
||||||
|
llvm_unreachable("Unknown induction");
|
||||||
|
case LoopVectorizationLegality::IntInduction: {
|
||||||
// Handle the integer induction counter:
|
// Handle the integer induction counter:
|
||||||
|
assert(OrigPhi->getType()->isIntegerTy() && "Invalid type");
|
||||||
assert(OrigPhi == OldInduction && "Unknown integer PHI");
|
assert(OrigPhi == OldInduction && "Unknown integer PHI");
|
||||||
// We know what the end value is.
|
// We know what the end value is.
|
||||||
EndValue = IdxEndRoundDown;
|
EndValue = IdxEndRoundDown;
|
||||||
// We also know which PHI node holds it.
|
// We also know which PHI node holds it.
|
||||||
ResumeIndex = ResumeVal;
|
ResumeIndex = ResumeVal;
|
||||||
} else {
|
break;
|
||||||
|
}
|
||||||
|
case LoopVectorizationLegality::ReverseIntInduction: {
|
||||||
|
// Convert the CountRoundDown variable to the PHI size.
|
||||||
|
unsigned CRDSize = CountRoundDown->getType()->getScalarSizeInBits();
|
||||||
|
unsigned IISize = II.StartValue->getType()->getScalarSizeInBits();
|
||||||
|
Value *CRD = CountRoundDown;
|
||||||
|
if (CRDSize > IISize)
|
||||||
|
CRD = CastInst::Create(Instruction::Trunc, CountRoundDown,
|
||||||
|
II.StartValue->getType(),
|
||||||
|
"tr.crd", BypassBlock->getTerminator());
|
||||||
|
else if (CRDSize < IISize)
|
||||||
|
CRD = CastInst::Create(Instruction::SExt, CountRoundDown,
|
||||||
|
II.StartValue->getType(),
|
||||||
|
"sext.crd", BypassBlock->getTerminator());
|
||||||
|
// Handle reverse integer induction counter:
|
||||||
|
EndValue = BinaryOperator::CreateSub(II.StartValue, CRD, "rev.ind.end",
|
||||||
|
BypassBlock->getTerminator());
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case LoopVectorizationLegality::PtrInduction: {
|
||||||
// For pointer induction variables, calculate the offset using
|
// For pointer induction variables, calculate the offset using
|
||||||
// the end index.
|
// the end index.
|
||||||
EndValue = GetElementPtrInst::Create(I->second, CountRoundDown,
|
EndValue = GetElementPtrInst::Create(II.StartValue, CountRoundDown,
|
||||||
"ptr.ind.end",
|
"ptr.ind.end",
|
||||||
BypassBlock->getTerminator());
|
BypassBlock->getTerminator());
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
|
}// end of case
|
||||||
|
|
||||||
// The new PHI merges the original incoming value, in case of a bypass,
|
// The new PHI merges the original incoming value, in case of a bypass,
|
||||||
// or the value at the end of the vectorized loop.
|
// or the value at the end of the vectorized loop.
|
||||||
ResumeVal->addIncoming(I->second, BypassBlock);
|
ResumeVal->addIncoming(II.StartValue, BypassBlock);
|
||||||
ResumeVal->addIncoming(EndValue, VecBody);
|
ResumeVal->addIncoming(EndValue, VecBody);
|
||||||
|
|
||||||
// Fix the scalar body counter (PHI node).
|
// Fix the scalar body counter (PHI node).
|
||||||
@ -1188,19 +1246,19 @@ InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
|
|||||||
Builder.CreateExtractElement(NewPhi, Builder.getInt32(i));
|
Builder.CreateExtractElement(NewPhi, Builder.getInt32(i));
|
||||||
switch (RdxDesc.Kind) {
|
switch (RdxDesc.Kind) {
|
||||||
case LoopVectorizationLegality::IntegerAdd:
|
case LoopVectorizationLegality::IntegerAdd:
|
||||||
Scalar0 = Builder.CreateAdd(Scalar0, Scalar1);
|
Scalar0 = Builder.CreateAdd(Scalar0, Scalar1, "add.rdx");
|
||||||
break;
|
break;
|
||||||
case LoopVectorizationLegality::IntegerMult:
|
case LoopVectorizationLegality::IntegerMult:
|
||||||
Scalar0 = Builder.CreateMul(Scalar0, Scalar1);
|
Scalar0 = Builder.CreateMul(Scalar0, Scalar1, "mul.rdx");
|
||||||
break;
|
break;
|
||||||
case LoopVectorizationLegality::IntegerOr:
|
case LoopVectorizationLegality::IntegerOr:
|
||||||
Scalar0 = Builder.CreateOr(Scalar0, Scalar1);
|
Scalar0 = Builder.CreateOr(Scalar0, Scalar1, "or.rdx");
|
||||||
break;
|
break;
|
||||||
case LoopVectorizationLegality::IntegerAnd:
|
case LoopVectorizationLegality::IntegerAnd:
|
||||||
Scalar0 = Builder.CreateAnd(Scalar0, Scalar1);
|
Scalar0 = Builder.CreateAnd(Scalar0, Scalar1, "and.rdx");
|
||||||
break;
|
break;
|
||||||
case LoopVectorizationLegality::IntegerXor:
|
case LoopVectorizationLegality::IntegerXor:
|
||||||
Scalar0 = Builder.CreateXor(Scalar0, Scalar1);
|
Scalar0 = Builder.CreateXor(Scalar0, Scalar1, "xor.rdx");
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
llvm_unreachable("Unknown reduction operation");
|
llvm_unreachable("Unknown reduction operation");
|
||||||
@ -1282,7 +1340,7 @@ Value *InnerLoopVectorizer::createBlockInMask(BasicBlock *BB) {
|
|||||||
|
|
||||||
void
|
void
|
||||||
InnerLoopVectorizer::vectorizeBlockInLoop(LoopVectorizationLegality *Legal,
|
InnerLoopVectorizer::vectorizeBlockInLoop(LoopVectorizationLegality *Legal,
|
||||||
BasicBlock *BB, PhiVector *PV) {
|
BasicBlock *BB, PhiVector *PV) {
|
||||||
Constant *Zero =
|
Constant *Zero =
|
||||||
ConstantInt::get(IntegerType::getInt32Ty(BB->getContext()), 0);
|
ConstantInt::get(IntegerType::getInt32Ty(BB->getContext()), 0);
|
||||||
|
|
||||||
@ -1329,45 +1387,77 @@ InnerLoopVectorizer::vectorizeBlockInLoop(LoopVectorizationLegality *Legal,
|
|||||||
assert(Legal->getInductionVars()->count(P) &&
|
assert(Legal->getInductionVars()->count(P) &&
|
||||||
"Not an induction variable");
|
"Not an induction variable");
|
||||||
|
|
||||||
if (P->getType()->isIntegerTy()) {
|
LoopVectorizationLegality::InductionInfo II =
|
||||||
|
Legal->getInductionVars()->lookup(P);
|
||||||
|
|
||||||
|
switch (II.IK) {
|
||||||
|
case LoopVectorizationLegality::NoInduction:
|
||||||
|
llvm_unreachable("Unknown induction");
|
||||||
|
case LoopVectorizationLegality::IntInduction: {
|
||||||
assert(P == OldInduction && "Unexpected PHI");
|
assert(P == OldInduction && "Unexpected PHI");
|
||||||
Value *Broadcasted = getBroadcastInstrs(Induction);
|
Value *Broadcasted = getBroadcastInstrs(Induction);
|
||||||
// After broadcasting the induction variable we need to make the
|
// After broadcasting the induction variable we need to make the
|
||||||
// vector consecutive by adding 0, 1, 2 ...
|
// vector consecutive by adding 0, 1, 2 ...
|
||||||
Value *ConsecutiveInduction = getConsecutiveVector(Broadcasted);
|
Value *ConsecutiveInduction = getConsecutiveVector(Broadcasted);
|
||||||
|
|
||||||
WidenMap[OldInduction] = ConsecutiveInduction;
|
WidenMap[OldInduction] = ConsecutiveInduction;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
case LoopVectorizationLegality::ReverseIntInduction:
|
||||||
|
case LoopVectorizationLegality::PtrInduction:
|
||||||
|
// Handle reverse integer and pointer inductions.
|
||||||
|
Value *StartIdx = 0;
|
||||||
|
// If we have a single integer induction variable then use it.
|
||||||
|
// Otherwise, start counting at zero.
|
||||||
|
if (OldInduction) {
|
||||||
|
LoopVectorizationLegality::InductionInfo OldII =
|
||||||
|
Legal->getInductionVars()->lookup(OldInduction);
|
||||||
|
StartIdx = OldII.StartValue;
|
||||||
|
} else {
|
||||||
|
StartIdx = ConstantInt::get(Induction->getType(), 0);
|
||||||
|
}
|
||||||
|
// This is the normalized GEP that starts counting at zero.
|
||||||
|
Value *NormalizedIdx = Builder.CreateSub(Induction, StartIdx,
|
||||||
|
"normalized.idx");
|
||||||
|
|
||||||
// Handle pointer inductions.
|
// Handle the reverse integer induction variable case.
|
||||||
assert(P->getType()->isPointerTy() && "Unexpected type.");
|
if (LoopVectorizationLegality::ReverseIntInduction == II.IK) {
|
||||||
Value *StartIdx = OldInduction ?
|
IntegerType *DstTy = cast<IntegerType>(II.StartValue->getType());
|
||||||
Legal->getInductionVars()->lookup(OldInduction) :
|
Value *CNI = Builder.CreateSExtOrTrunc(NormalizedIdx, DstTy,
|
||||||
ConstantInt::get(Induction->getType(), 0);
|
"resize.norm.idx");
|
||||||
|
Value *ReverseInd = Builder.CreateSub(II.StartValue, CNI,
|
||||||
|
"reverse.idx");
|
||||||
|
|
||||||
// This is the pointer value coming into the loop.
|
// This is a new value so do not hoist it out.
|
||||||
Value *StartPtr = Legal->getInductionVars()->lookup(P);
|
Value *Broadcasted = getBroadcastInstrs(ReverseInd);
|
||||||
|
// After broadcasting the induction variable we need to make the
|
||||||
|
// vector consecutive by adding ... -3, -2, -1, 0.
|
||||||
|
Value *ConsecutiveInduction = getConsecutiveVector(Broadcasted,
|
||||||
|
true);
|
||||||
|
WidenMap[it] = ConsecutiveInduction;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
// This is the normalized GEP that starts counting at zero.
|
// Handle the pointer induction variable case.
|
||||||
Value *NormalizedIdx = Builder.CreateSub(Induction, StartIdx,
|
assert(P->getType()->isPointerTy() && "Unexpected type.");
|
||||||
"normalized.idx");
|
|
||||||
|
|
||||||
// This is the vector of results. Notice that we don't generate vector
|
// This is the vector of results. Notice that we don't generate vector
|
||||||
// geps because scalar geps result in better code.
|
// geps because scalar geps result in better code.
|
||||||
Value *VecVal = UndefValue::get(VectorType::get(P->getType(), VF));
|
Value *VecVal = UndefValue::get(VectorType::get(P->getType(), VF));
|
||||||
for (unsigned int i = 0; i < VF; ++i) {
|
for (unsigned int i = 0; i < VF; ++i) {
|
||||||
Constant *Idx = ConstantInt::get(Induction->getType(), i);
|
Constant *Idx = ConstantInt::get(Induction->getType(), i);
|
||||||
Value *GlobalIdx = Builder.CreateAdd(NormalizedIdx, Idx, "gep.idx");
|
Value *GlobalIdx = Builder.CreateAdd(NormalizedIdx, Idx, "gep.idx");
|
||||||
Value *SclrGep = Builder.CreateGEP(StartPtr, GlobalIdx, "next.gep");
|
Value *SclrGep = Builder.CreateGEP(II.StartValue, GlobalIdx, "next.gep");
|
||||||
VecVal = Builder.CreateInsertElement(VecVal, SclrGep,
|
VecVal = Builder.CreateInsertElement(VecVal, SclrGep,
|
||||||
Builder.getInt32(i),
|
Builder.getInt32(i),
|
||||||
"insert.gep");
|
"insert.gep");
|
||||||
|
}
|
||||||
|
|
||||||
|
WidenMap[it] = VecVal;
|
||||||
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
WidenMap[it] = VecVal;
|
}// End of PHI.
|
||||||
continue;
|
|
||||||
}
|
|
||||||
case Instruction::Add:
|
case Instruction::Add:
|
||||||
case Instruction::FAdd:
|
case Instruction::FAdd:
|
||||||
case Instruction::Sub:
|
case Instruction::Sub:
|
||||||
@ -1561,7 +1651,6 @@ InnerLoopVectorizer::vectorizeBlockInLoop(LoopVectorizationLegality *Legal,
|
|||||||
}// end of for_each instr.
|
}// end of for_each instr.
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void InnerLoopVectorizer::updateAnalysis() {
|
void InnerLoopVectorizer::updateAnalysis() {
|
||||||
// Forget the original basic block.
|
// Forget the original basic block.
|
||||||
SE->forgetLoop(OrigLoop);
|
SE->forgetLoop(OrigLoop);
|
||||||
@ -1580,7 +1669,6 @@ void InnerLoopVectorizer::updateAnalysis() {
|
|||||||
DEBUG(DT->verifyAnalysis());
|
DEBUG(DT->verifyAnalysis());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
|
bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
|
||||||
if (!EnableIfConversion)
|
if (!EnableIfConversion)
|
||||||
return false;
|
return false;
|
||||||
@ -1694,35 +1782,39 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Check that this PHI type is allowed.
|
||||||
|
if (!Phi->getType()->isIntegerTy() &&
|
||||||
|
!Phi->getType()->isPointerTy()) {
|
||||||
|
DEBUG(dbgs() << "LV: Found an non-int non-pointer PHI.\n");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
// If this PHINode is not in the header block, then we know that we
|
// If this PHINode is not in the header block, then we know that we
|
||||||
// can convert it to select during if-conversion.
|
// can convert it to select during if-conversion. No need to check if
|
||||||
|
// the PHIs in this block are induction or reduction variables.
|
||||||
if (*bb != Header)
|
if (*bb != Header)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
// This is the value coming from the preheader.
|
// This is the value coming from the preheader.
|
||||||
Value *StartValue = Phi->getIncomingValueForBlock(PreHeader);
|
Value *StartValue = Phi->getIncomingValueForBlock(PreHeader);
|
||||||
|
// Check if this is an induction variable.
|
||||||
|
InductionKind IK = isInductionVariable(Phi);
|
||||||
|
|
||||||
// We only look at integer and pointer phi nodes.
|
if (NoInduction != IK) {
|
||||||
if (Phi->getType()->isPointerTy() && isInductionVariable(Phi)) {
|
// Int inductions are special because we only allow one IV.
|
||||||
DEBUG(dbgs() << "LV: Found a pointer induction variable.\n");
|
if (IK == IntInduction) {
|
||||||
Inductions[Phi] = StartValue;
|
if (Induction) {
|
||||||
continue;
|
DEBUG(dbgs() << "LV: Found too many inductions."<< *Phi <<"\n");
|
||||||
} else if (!Phi->getType()->isIntegerTy()) {
|
return false;
|
||||||
DEBUG(dbgs() << "LV: Found an non-int non-pointer PHI.\n");
|
}
|
||||||
return false;
|
Induction = Phi;
|
||||||
}
|
|
||||||
|
|
||||||
// Handle integer PHIs:
|
|
||||||
if (isInductionVariable(Phi)) {
|
|
||||||
if (Induction) {
|
|
||||||
DEBUG(dbgs() << "LV: Found too many inductions."<< *Phi <<"\n");
|
|
||||||
return false;
|
|
||||||
}
|
}
|
||||||
DEBUG(dbgs() << "LV: Found the induction PHI."<< *Phi <<"\n");
|
|
||||||
Induction = Phi;
|
DEBUG(dbgs() << "LV: Found an induction variable.\n");
|
||||||
Inductions[Phi] = StartValue;
|
Inductions[Phi] = InductionInfo(StartValue, IK);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (AddReductionVar(Phi, IntegerAdd)) {
|
if (AddReductionVar(Phi, IntegerAdd)) {
|
||||||
DEBUG(dbgs() << "LV: Found an ADD reduction PHI."<< *Phi <<"\n");
|
DEBUG(dbgs() << "LV: Found an ADD reduction PHI."<< *Phi <<"\n");
|
||||||
continue;
|
continue;
|
||||||
@ -2119,32 +2211,42 @@ LoopVectorizationLegality::isReductionInstr(Instruction *I,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bool LoopVectorizationLegality::isInductionVariable(PHINode *Phi) {
|
LoopVectorizationLegality::InductionKind
|
||||||
|
LoopVectorizationLegality::isInductionVariable(PHINode *Phi) {
|
||||||
Type *PhiTy = Phi->getType();
|
Type *PhiTy = Phi->getType();
|
||||||
// We only handle integer and pointer inductions variables.
|
// We only handle integer and pointer inductions variables.
|
||||||
if (!PhiTy->isIntegerTy() && !PhiTy->isPointerTy())
|
if (!PhiTy->isIntegerTy() && !PhiTy->isPointerTy())
|
||||||
return false;
|
return NoInduction;
|
||||||
|
|
||||||
// Check that the PHI is consecutive and starts at zero.
|
// Check that the PHI is consecutive and starts at zero.
|
||||||
const SCEV *PhiScev = SE->getSCEV(Phi);
|
const SCEV *PhiScev = SE->getSCEV(Phi);
|
||||||
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PhiScev);
|
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PhiScev);
|
||||||
if (!AR) {
|
if (!AR) {
|
||||||
DEBUG(dbgs() << "LV: PHI is not a poly recurrence.\n");
|
DEBUG(dbgs() << "LV: PHI is not a poly recurrence.\n");
|
||||||
return false;
|
return NoInduction;
|
||||||
}
|
}
|
||||||
const SCEV *Step = AR->getStepRecurrence(*SE);
|
const SCEV *Step = AR->getStepRecurrence(*SE);
|
||||||
|
|
||||||
// Integer inductions need to have a stride of one.
|
// Integer inductions need to have a stride of one.
|
||||||
if (PhiTy->isIntegerTy())
|
if (PhiTy->isIntegerTy()) {
|
||||||
return Step->isOne();
|
if (Step->isOne())
|
||||||
|
return IntInduction;
|
||||||
|
if (Step->isAllOnesValue())
|
||||||
|
return ReverseIntInduction;
|
||||||
|
return NoInduction;
|
||||||
|
}
|
||||||
|
|
||||||
// Calculate the pointer stride and check if it is consecutive.
|
// Calculate the pointer stride and check if it is consecutive.
|
||||||
const SCEVConstant *C = dyn_cast<SCEVConstant>(Step);
|
const SCEVConstant *C = dyn_cast<SCEVConstant>(Step);
|
||||||
if (!C) return false;
|
if (!C)
|
||||||
|
return NoInduction;
|
||||||
|
|
||||||
assert(PhiTy->isPointerTy() && "The PHI must be a pointer");
|
assert(PhiTy->isPointerTy() && "The PHI must be a pointer");
|
||||||
uint64_t Size = DL->getTypeAllocSize(PhiTy->getPointerElementType());
|
uint64_t Size = DL->getTypeAllocSize(PhiTy->getPointerElementType());
|
||||||
return (C->getValue()->equalsInt(Size));
|
if (C->getValue()->equalsInt(Size))
|
||||||
|
return PtrInduction;
|
||||||
|
|
||||||
|
return NoInduction;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool LoopVectorizationLegality::blockNeedsPredication(BasicBlock *BB) {
|
bool LoopVectorizationLegality::blockNeedsPredication(BasicBlock *BB) {
|
||||||
@ -2252,7 +2354,6 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
|
|||||||
Type *RetTy = I->getType();
|
Type *RetTy = I->getType();
|
||||||
Type *VectorTy = ToVectorTy(RetTy, VF);
|
Type *VectorTy = ToVectorTy(RetTy, VF);
|
||||||
|
|
||||||
|
|
||||||
// TODO: We need to estimate the cost of intrinsic calls.
|
// TODO: We need to estimate the cost of intrinsic calls.
|
||||||
switch (I->getOpcode()) {
|
switch (I->getOpcode()) {
|
||||||
case Instruction::GetElementPtr:
|
case Instruction::GetElementPtr:
|
||||||
|
@ -89,9 +89,8 @@ define void @example2(i32 %n, i32 %x) nounwind uwtable ssp {
|
|||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
; We can't vectorize this loop because it has non constant loop bounds.
|
|
||||||
;CHECK: @example3
|
;CHECK: @example3
|
||||||
;CHECK-NOT: <4 x i32>
|
;CHECK: <4 x i32>
|
||||||
;CHECK: ret void
|
;CHECK: ret void
|
||||||
define void @example3(i32 %n, i32* noalias nocapture %p, i32* noalias nocapture %q) nounwind uwtable ssp {
|
define void @example3(i32 %n, i32* noalias nocapture %p, i32* noalias nocapture %q) nounwind uwtable ssp {
|
||||||
%1 = icmp eq i32 %n, 0
|
%1 = icmp eq i32 %n, 0
|
||||||
@ -537,9 +536,8 @@ define void @example14(i32** nocapture %in, i32** nocapture %coeff, i32* nocaptu
|
|||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
; Can't vectorize because the src and dst pointers are not disjoint.
|
|
||||||
;CHECK: @example21
|
;CHECK: @example21
|
||||||
;CHECK-NOT: <4 x i32>
|
;CHECK: <4 x i32>
|
||||||
;CHECK: ret i32
|
;CHECK: ret i32
|
||||||
define i32 @example21(i32* nocapture %b, i32 %n) nounwind uwtable readonly ssp {
|
define i32 @example21(i32* nocapture %b, i32 %n) nounwind uwtable readonly ssp {
|
||||||
%1 = icmp sgt i32 %n, 0
|
%1 = icmp sgt i32 %n, 0
|
||||||
|
Loading…
Reference in New Issue
Block a user