mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 19:39:57 +00:00
[SVE] Make ElementCount members private
This patch changes ElementCount so that the Min and Scalable members are now private and can only be accessed via the get functions getKnownMinValue() and isScalable(). In addition I've added some other member functions for more commonly used operations. Hopefully this makes the class more useful and will reduce the need for calling getKnownMinValue(). Differential Revision: https://reviews.llvm.org/D86065
This commit is contained in:
parent
c0f8e4de72
commit
56b8c35591
@ -130,8 +130,8 @@ public:
|
||||
unsigned Factor);
|
||||
IntrinsicCostAttributes(Intrinsic::ID Id, const CallBase &CI,
|
||||
ElementCount Factor)
|
||||
: IntrinsicCostAttributes(Id, CI, Factor.Min) {
|
||||
assert(!Factor.Scalable);
|
||||
: IntrinsicCostAttributes(Id, CI, Factor.getKnownMinValue()) {
|
||||
assert(!Factor.isScalable());
|
||||
}
|
||||
|
||||
IntrinsicCostAttributes(Intrinsic::ID Id, const CallBase &CI,
|
||||
|
@ -115,7 +115,7 @@ struct VFShape {
|
||||
Parameters.push_back(
|
||||
VFParameter({CI.arg_size(), VFParamKind::GlobalPredicate}));
|
||||
|
||||
return {EC.Min, EC.Scalable, Parameters};
|
||||
return {EC.getKnownMinValue(), EC.isScalable(), Parameters};
|
||||
}
|
||||
/// Sanity check on the Parameters in the VFShape.
|
||||
bool hasValidParameterList() const;
|
||||
|
@ -304,7 +304,7 @@ namespace llvm {
|
||||
|
||||
/// Given a vector type, return the minimum number of elements it contains.
|
||||
unsigned getVectorMinNumElements() const {
|
||||
return getVectorElementCount().Min;
|
||||
return getVectorElementCount().getKnownMinValue();
|
||||
}
|
||||
|
||||
/// Return the size of the specified value type in bits.
|
||||
@ -383,7 +383,7 @@ namespace llvm {
|
||||
EVT getHalfNumVectorElementsVT(LLVMContext &Context) const {
|
||||
EVT EltVT = getVectorElementType();
|
||||
auto EltCnt = getVectorElementCount();
|
||||
assert(!(EltCnt.Min & 1) && "Splitting vector, but not in half!");
|
||||
assert(EltCnt.isKnownEven() && "Splitting vector, but not in half!");
|
||||
return EVT::getVectorVT(Context, EltVT, EltCnt / 2);
|
||||
}
|
||||
|
||||
@ -398,7 +398,8 @@ namespace llvm {
|
||||
EVT getPow2VectorType(LLVMContext &Context) const {
|
||||
if (!isPow2VectorType()) {
|
||||
ElementCount NElts = getVectorElementCount();
|
||||
NElts.Min = 1 << Log2_32_Ceil(NElts.Min);
|
||||
unsigned NewMinCount = 1 << Log2_32_Ceil(NElts.getKnownMinValue());
|
||||
NElts = ElementCount::get(NewMinCount, NElts.isScalable());
|
||||
return EVT::getVectorVT(Context, getVectorElementType(), NElts);
|
||||
}
|
||||
else {
|
||||
|
@ -696,9 +696,9 @@ inline TypeSize DataLayout::getTypeSizeInBits(Type *Ty) const {
|
||||
case Type::ScalableVectorTyID: {
|
||||
VectorType *VTy = cast<VectorType>(Ty);
|
||||
auto EltCnt = VTy->getElementCount();
|
||||
uint64_t MinBits = EltCnt.Min *
|
||||
getTypeSizeInBits(VTy->getElementType()).getFixedSize();
|
||||
return TypeSize(MinBits, EltCnt.Scalable);
|
||||
uint64_t MinBits = EltCnt.getKnownMinValue() *
|
||||
getTypeSizeInBits(VTy->getElementType()).getFixedSize();
|
||||
return TypeSize(MinBits, EltCnt.isScalable());
|
||||
}
|
||||
default:
|
||||
llvm_unreachable("DataLayout::getTypeSizeInBits(): Unsupported type");
|
||||
|
@ -426,16 +426,16 @@ public:
|
||||
unsigned getNumElements() const {
|
||||
ElementCount EC = getElementCount();
|
||||
#ifdef STRICT_FIXED_SIZE_VECTORS
|
||||
assert(!EC.Scalable &&
|
||||
assert(!EC.isScalable() &&
|
||||
"Request for fixed number of elements from scalable vector");
|
||||
return EC.Min;
|
||||
return EC.getKnownMinValue();
|
||||
#else
|
||||
if (EC.Scalable)
|
||||
if (EC.isScalable())
|
||||
WithColor::warning()
|
||||
<< "The code that requested the fixed number of elements has made "
|
||||
"the assumption that this vector is not scalable. This assumption "
|
||||
"was not correct, and this may lead to broken code\n";
|
||||
return EC.Min;
|
||||
return EC.getKnownMinValue();
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -512,8 +512,8 @@ public:
|
||||
/// input type and the same element type.
|
||||
static VectorType *getHalfElementsVectorType(VectorType *VTy) {
|
||||
auto EltCnt = VTy->getElementCount();
|
||||
assert ((EltCnt.Min & 1) == 0 &&
|
||||
"Cannot halve vector with odd number of elements.");
|
||||
assert(EltCnt.isKnownEven() &&
|
||||
"Cannot halve vector with odd number of elements.");
|
||||
return VectorType::get(VTy->getElementType(), EltCnt/2);
|
||||
}
|
||||
|
||||
@ -521,7 +521,8 @@ public:
|
||||
/// input type and the same element type.
|
||||
static VectorType *getDoubleElementsVectorType(VectorType *VTy) {
|
||||
auto EltCnt = VTy->getElementCount();
|
||||
assert((EltCnt.Min * 2ull) <= UINT_MAX && "Too many elements in vector");
|
||||
assert((EltCnt.getKnownMinValue() * 2ull) <= UINT_MAX &&
|
||||
"Too many elements in vector");
|
||||
return VectorType::get(VTy->getElementType(), EltCnt * 2);
|
||||
}
|
||||
|
||||
|
@ -2046,8 +2046,9 @@ public:
|
||||
/// Examples: shufflevector <4 x n> A, <4 x n> B, <1,2,3>
|
||||
/// shufflevector <4 x n> A, <4 x n> B, <1,2,3,4,5>
|
||||
bool changesLength() const {
|
||||
unsigned NumSourceElts =
|
||||
cast<VectorType>(Op<0>()->getType())->getElementCount().Min;
|
||||
unsigned NumSourceElts = cast<VectorType>(Op<0>()->getType())
|
||||
->getElementCount()
|
||||
.getKnownMinValue();
|
||||
unsigned NumMaskElts = ShuffleMask.size();
|
||||
return NumSourceElts != NumMaskElts;
|
||||
}
|
||||
|
@ -424,7 +424,7 @@ namespace llvm {
|
||||
MVT getHalfNumVectorElementsVT() const {
|
||||
MVT EltVT = getVectorElementType();
|
||||
auto EltCnt = getVectorElementCount();
|
||||
assert(!(EltCnt.Min & 1) && "Splitting vector, but not in half!");
|
||||
assert(EltCnt.isKnownEven() && "Splitting vector, but not in half!");
|
||||
return getVectorVT(EltVT, EltCnt / 2);
|
||||
}
|
||||
|
||||
@ -742,7 +742,7 @@ namespace llvm {
|
||||
|
||||
/// Given a vector type, return the minimum number of elements it contains.
|
||||
unsigned getVectorMinNumElements() const {
|
||||
return getVectorElementCount().Min;
|
||||
return getVectorElementCount().getKnownMinValue();
|
||||
}
|
||||
|
||||
/// Returns the size of the specified MVT in bits.
|
||||
@ -1207,9 +1207,9 @@ namespace llvm {
|
||||
}
|
||||
|
||||
static MVT getVectorVT(MVT VT, ElementCount EC) {
|
||||
if (EC.Scalable)
|
||||
return getScalableVectorVT(VT, EC.Min);
|
||||
return getVectorVT(VT, EC.Min);
|
||||
if (EC.isScalable())
|
||||
return getScalableVectorVT(VT, EC.getKnownMinValue());
|
||||
return getVectorVT(VT, EC.getKnownMinValue());
|
||||
}
|
||||
|
||||
/// Return the value type corresponding to the specified type. This returns
|
||||
|
@ -27,6 +27,10 @@ template <typename T> struct DenseMapInfo;
|
||||
|
||||
class ElementCount {
|
||||
private:
|
||||
unsigned Min; // Minimum number of vector elements.
|
||||
bool Scalable; // If true, NumElements is a multiple of 'Min' determined
|
||||
// at runtime rather than compile time.
|
||||
|
||||
/// Prevent code from using initializer-list contructors like
|
||||
/// ElementCount EC = {<unsigned>, <bool>}. The static `get*`
|
||||
/// methods below are preferred, as users should always make a
|
||||
@ -35,10 +39,6 @@ private:
|
||||
ElementCount(unsigned Min, bool Scalable) : Min(Min), Scalable(Scalable) {}
|
||||
|
||||
public:
|
||||
unsigned Min; // Minimum number of vector elements.
|
||||
bool Scalable; // If true, NumElements is a multiple of 'Min' determined
|
||||
// at runtime rather than compile time.
|
||||
|
||||
ElementCount() = default;
|
||||
|
||||
ElementCount operator*(unsigned RHS) {
|
||||
@ -58,6 +58,16 @@ public:
|
||||
bool operator==(unsigned RHS) const { return Min == RHS && !Scalable; }
|
||||
bool operator!=(unsigned RHS) const { return !(*this == RHS); }
|
||||
|
||||
ElementCount &operator*=(unsigned RHS) {
|
||||
Min *= RHS;
|
||||
return *this;
|
||||
}
|
||||
|
||||
ElementCount &operator/=(unsigned RHS) {
|
||||
Min /= RHS;
|
||||
return *this;
|
||||
}
|
||||
|
||||
ElementCount NextPowerOf2() const {
|
||||
return {(unsigned)llvm::NextPowerOf2(Min), Scalable};
|
||||
}
|
||||
@ -81,11 +91,21 @@ public:
|
||||
///
|
||||
///@{ No elements..
|
||||
bool isZero() const { return Min == 0; }
|
||||
/// At least one element.
|
||||
bool isNonZero() const { return Min != 0; }
|
||||
/// A return value of true indicates we know at compile time that the number
|
||||
/// of elements (vscale * Min) is definitely even. However, returning false
|
||||
/// does not guarantee that the total number of elements is odd.
|
||||
bool isKnownEven() const { return (Min & 0x1) == 0; }
|
||||
/// Exactly one element.
|
||||
bool isScalar() const { return !Scalable && Min == 1; }
|
||||
/// One or more elements.
|
||||
bool isVector() const { return (Scalable && Min != 0) || Min > 1; }
|
||||
///@}
|
||||
|
||||
unsigned getKnownMinValue() const { return Min; }
|
||||
|
||||
bool isScalable() const { return Scalable; }
|
||||
};
|
||||
|
||||
/// Stream operator function for `ElementCount`.
|
||||
@ -322,10 +342,11 @@ template <> struct DenseMapInfo<ElementCount> {
|
||||
return ElementCount::getFixed(~0U - 1);
|
||||
}
|
||||
static unsigned getHashValue(const ElementCount& EltCnt) {
|
||||
if (EltCnt.Scalable)
|
||||
return (EltCnt.Min * 37U) - 1U;
|
||||
unsigned HashVal = EltCnt.getKnownMinValue() * 37U;
|
||||
if (EltCnt.isScalable())
|
||||
return (HashVal - 1U);
|
||||
|
||||
return EltCnt.Min * 37U;
|
||||
return HashVal;
|
||||
}
|
||||
|
||||
static bool isEqual(const ElementCount& LHS, const ElementCount& RHS) {
|
||||
|
@ -4550,7 +4550,7 @@ static Value *SimplifyShuffleVectorInst(Value *Op0, Value *Op1,
|
||||
unsigned MaskNumElts = Mask.size();
|
||||
ElementCount InVecEltCount = InVecTy->getElementCount();
|
||||
|
||||
bool Scalable = InVecEltCount.Scalable;
|
||||
bool Scalable = InVecEltCount.isScalable();
|
||||
|
||||
SmallVector<int, 32> Indices;
|
||||
Indices.assign(Mask.begin(), Mask.end());
|
||||
@ -4559,7 +4559,7 @@ static Value *SimplifyShuffleVectorInst(Value *Op0, Value *Op1,
|
||||
// replace that input vector with undef.
|
||||
if (!Scalable) {
|
||||
bool MaskSelects0 = false, MaskSelects1 = false;
|
||||
unsigned InVecNumElts = InVecEltCount.Min;
|
||||
unsigned InVecNumElts = InVecEltCount.getKnownMinValue();
|
||||
for (unsigned i = 0; i != MaskNumElts; ++i) {
|
||||
if (Indices[i] == -1)
|
||||
continue;
|
||||
@ -4588,7 +4588,8 @@ static Value *SimplifyShuffleVectorInst(Value *Op0, Value *Op1,
|
||||
// is not known at compile time for scalable vectors
|
||||
if (!Scalable && Op0Const && !Op1Const) {
|
||||
std::swap(Op0, Op1);
|
||||
ShuffleVectorInst::commuteShuffleMask(Indices, InVecEltCount.Min);
|
||||
ShuffleVectorInst::commuteShuffleMask(Indices,
|
||||
InVecEltCount.getKnownMinValue());
|
||||
}
|
||||
|
||||
// A splat of an inserted scalar constant becomes a vector constant:
|
||||
|
@ -442,7 +442,7 @@ Optional<VFInfo> VFABI::tryDemangleForVFABI(StringRef MangledName,
|
||||
if (!F)
|
||||
return None;
|
||||
const ElementCount EC = getECFromSignature(F->getFunctionType());
|
||||
VF = EC.Min;
|
||||
VF = EC.getKnownMinValue();
|
||||
}
|
||||
|
||||
// Sanity checks.
|
||||
|
@ -4808,7 +4808,8 @@ static bool canCreateUndefOrPoison(const Operator *Op, bool PoisonOnly) {
|
||||
auto *VTy = cast<VectorType>(Op->getOperand(0)->getType());
|
||||
unsigned IdxOp = Op->getOpcode() == Instruction::InsertElement ? 2 : 1;
|
||||
auto *Idx = dyn_cast<ConstantInt>(Op->getOperand(IdxOp));
|
||||
if (!Idx || Idx->getZExtValue() >= VTy->getElementCount().Min)
|
||||
if (!Idx ||
|
||||
Idx->getZExtValue() >= VTy->getElementCount().getKnownMinValue())
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
@ -970,7 +970,7 @@ void ModuleBitcodeWriter::writeTypeTable() {
|
||||
// VECTOR [numelts, eltty] or
|
||||
// [numelts, eltty, scalable]
|
||||
Code = bitc::TYPE_CODE_VECTOR;
|
||||
TypeVals.push_back(VT->getElementCount().Min);
|
||||
TypeVals.push_back(VT->getElementCount().getKnownMinValue());
|
||||
TypeVals.push_back(VE.getTypeID(VT->getElementType()));
|
||||
if (isa<ScalableVectorType>(VT))
|
||||
TypeVals.push_back(true);
|
||||
|
@ -6957,10 +6957,10 @@ class VectorPromoteHelper {
|
||||
if (UseSplat)
|
||||
return ConstantVector::getSplat(EC, Val);
|
||||
|
||||
if (!EC.Scalable) {
|
||||
if (!EC.isScalable()) {
|
||||
SmallVector<Constant *, 4> ConstVec;
|
||||
UndefValue *UndefVal = UndefValue::get(Val->getType());
|
||||
for (unsigned Idx = 0; Idx != EC.Min; ++Idx) {
|
||||
for (unsigned Idx = 0; Idx != EC.getKnownMinValue(); ++Idx) {
|
||||
if (Idx == ExtractIdx)
|
||||
ConstVec.push_back(Val);
|
||||
else
|
||||
|
@ -18994,7 +18994,7 @@ static SDValue combineConcatVectorOfCasts(SDNode *N, SelectionDAG &DAG) {
|
||||
// check the other type in the cast to make sure this is really legal.
|
||||
EVT VT = N->getValueType(0);
|
||||
EVT SrcEltVT = SrcVT.getVectorElementType();
|
||||
unsigned NumElts = SrcVT.getVectorElementCount().Min * N->getNumOperands();
|
||||
ElementCount NumElts = SrcVT.getVectorElementCount() * N->getNumOperands();
|
||||
EVT ConcatSrcVT = EVT::getVectorVT(*DAG.getContext(), SrcEltVT, NumElts);
|
||||
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
|
||||
switch (CastOpcode) {
|
||||
|
@ -428,10 +428,10 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,
|
||||
// vector widening case (e.g. <2 x float> -> <4 x float>). Extract the
|
||||
// elements we want.
|
||||
if (PartEVT.getVectorElementType() == ValueVT.getVectorElementType()) {
|
||||
assert((PartEVT.getVectorElementCount().Min >
|
||||
ValueVT.getVectorElementCount().Min) &&
|
||||
(PartEVT.getVectorElementCount().Scalable ==
|
||||
ValueVT.getVectorElementCount().Scalable) &&
|
||||
assert((PartEVT.getVectorElementCount().getKnownMinValue() >
|
||||
ValueVT.getVectorElementCount().getKnownMinValue()) &&
|
||||
(PartEVT.getVectorElementCount().isScalable() ==
|
||||
ValueVT.getVectorElementCount().isScalable()) &&
|
||||
"Cannot narrow, it would be a lossy transformation");
|
||||
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val,
|
||||
DAG.getVectorIdxConstant(0, DL));
|
||||
@ -3751,7 +3751,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
|
||||
if (IsVectorGEP && !N.getValueType().isVector()) {
|
||||
LLVMContext &Context = *DAG.getContext();
|
||||
EVT VT = EVT::getVectorVT(Context, N.getValueType(), VectorElementCount);
|
||||
if (VectorElementCount.Scalable)
|
||||
if (VectorElementCount.isScalable())
|
||||
N = DAG.getSplatVector(VT, dl, N);
|
||||
else
|
||||
N = DAG.getSplatBuildVector(VT, dl, N);
|
||||
@ -3824,7 +3824,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
|
||||
if (!IdxN.getValueType().isVector() && IsVectorGEP) {
|
||||
EVT VT = EVT::getVectorVT(*Context, IdxN.getValueType(),
|
||||
VectorElementCount);
|
||||
if (VectorElementCount.Scalable)
|
||||
if (VectorElementCount.isScalable())
|
||||
IdxN = DAG.getSplatVector(VT, dl, IdxN);
|
||||
else
|
||||
IdxN = DAG.getSplatBuildVector(VT, dl, IdxN);
|
||||
|
@ -964,23 +964,24 @@ static unsigned getVectorTypeBreakdownMVT(MVT VT, MVT &IntermediateVT,
|
||||
|
||||
// Scalable vectors cannot be scalarized, so splitting or widening is
|
||||
// required.
|
||||
if (VT.isScalableVector() && !isPowerOf2_32(EC.Min))
|
||||
if (VT.isScalableVector() && !isPowerOf2_32(EC.getKnownMinValue()))
|
||||
llvm_unreachable(
|
||||
"Splitting or widening of non-power-of-2 MVTs is not implemented.");
|
||||
|
||||
// FIXME: We don't support non-power-of-2-sized vectors for now.
|
||||
// Ideally we could break down into LHS/RHS like LegalizeDAG does.
|
||||
if (!isPowerOf2_32(EC.Min)) {
|
||||
if (!isPowerOf2_32(EC.getKnownMinValue())) {
|
||||
// Split EC to unit size (scalable property is preserved).
|
||||
NumVectorRegs = EC.Min;
|
||||
EC = EC / NumVectorRegs;
|
||||
NumVectorRegs = EC.getKnownMinValue();
|
||||
EC = ElementCount::getFixed(1);
|
||||
}
|
||||
|
||||
// Divide the input until we get to a supported size. This will
|
||||
// always end up with an EC that represent a scalar or a scalable
|
||||
// scalar.
|
||||
while (EC.Min > 1 && !TLI->isTypeLegal(MVT::getVectorVT(EltTy, EC))) {
|
||||
EC.Min >>= 1;
|
||||
while (EC.getKnownMinValue() > 1 &&
|
||||
!TLI->isTypeLegal(MVT::getVectorVT(EltTy, EC))) {
|
||||
EC /= 2;
|
||||
NumVectorRegs <<= 1;
|
||||
}
|
||||
|
||||
@ -1315,13 +1316,15 @@ void TargetLoweringBase::computeRegisterProperties(
|
||||
}
|
||||
|
||||
case TypeWidenVector:
|
||||
if (isPowerOf2_32(EC.Min)) {
|
||||
if (isPowerOf2_32(EC.getKnownMinValue())) {
|
||||
// Try to widen the vector.
|
||||
for (unsigned nVT = i + 1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) {
|
||||
MVT SVT = (MVT::SimpleValueType) nVT;
|
||||
if (SVT.getVectorElementType() == EltVT &&
|
||||
SVT.isScalableVector() == IsScalable &&
|
||||
SVT.getVectorElementCount().Min > EC.Min && isTypeLegal(SVT)) {
|
||||
SVT.getVectorElementCount().getKnownMinValue() >
|
||||
EC.getKnownMinValue() &&
|
||||
isTypeLegal(SVT)) {
|
||||
TransformToType[i] = SVT;
|
||||
RegisterTypeForVT[i] = SVT;
|
||||
NumRegistersForVT[i] = 1;
|
||||
@ -1365,10 +1368,10 @@ void TargetLoweringBase::computeRegisterProperties(
|
||||
ValueTypeActions.setTypeAction(VT, TypeScalarizeVector);
|
||||
else if (PreferredAction == TypeSplitVector)
|
||||
ValueTypeActions.setTypeAction(VT, TypeSplitVector);
|
||||
else if (EC.Min > 1)
|
||||
else if (EC.getKnownMinValue() > 1)
|
||||
ValueTypeActions.setTypeAction(VT, TypeSplitVector);
|
||||
else
|
||||
ValueTypeActions.setTypeAction(VT, EC.Scalable
|
||||
ValueTypeActions.setTypeAction(VT, EC.isScalable()
|
||||
? TypeScalarizeScalableVector
|
||||
: TypeScalarizeVector);
|
||||
} else {
|
||||
@ -1426,7 +1429,8 @@ unsigned TargetLoweringBase::getVectorTypeBreakdown(LLVMContext &Context, EVT VT
|
||||
// This handles things like <2 x float> -> <4 x float> and
|
||||
// <4 x i1> -> <4 x i32>.
|
||||
LegalizeTypeAction TA = getTypeAction(Context, VT);
|
||||
if (EltCnt.Min != 1 && (TA == TypeWidenVector || TA == TypePromoteInteger)) {
|
||||
if (EltCnt.getKnownMinValue() != 1 &&
|
||||
(TA == TypeWidenVector || TA == TypePromoteInteger)) {
|
||||
EVT RegisterEVT = getTypeToTransformTo(Context, VT);
|
||||
if (isTypeLegal(RegisterEVT)) {
|
||||
IntermediateVT = RegisterEVT;
|
||||
@ -1443,7 +1447,7 @@ unsigned TargetLoweringBase::getVectorTypeBreakdown(LLVMContext &Context, EVT VT
|
||||
|
||||
// Scalable vectors cannot be scalarized, so handle the legalisation of the
|
||||
// types like done elsewhere in SelectionDAG.
|
||||
if (VT.isScalableVector() && !isPowerOf2_32(EltCnt.Min)) {
|
||||
if (VT.isScalableVector() && !isPowerOf2_32(EltCnt.getKnownMinValue())) {
|
||||
LegalizeKind LK;
|
||||
EVT PartVT = VT;
|
||||
do {
|
||||
@ -1452,15 +1456,15 @@ unsigned TargetLoweringBase::getVectorTypeBreakdown(LLVMContext &Context, EVT VT
|
||||
PartVT = LK.second;
|
||||
} while (LK.first != TypeLegal);
|
||||
|
||||
NumIntermediates =
|
||||
VT.getVectorElementCount().Min / PartVT.getVectorElementCount().Min;
|
||||
NumIntermediates = VT.getVectorElementCount().getKnownMinValue() /
|
||||
PartVT.getVectorElementCount().getKnownMinValue();
|
||||
|
||||
// FIXME: This code needs to be extended to handle more complex vector
|
||||
// breakdowns, like nxv7i64 -> nxv8i64 -> 4 x nxv2i64. Currently the only
|
||||
// supported cases are vectors that are broken down into equal parts
|
||||
// such as nxv6i64 -> 3 x nxv2i64.
|
||||
assert(NumIntermediates * PartVT.getVectorElementCount().Min ==
|
||||
VT.getVectorElementCount().Min &&
|
||||
assert((PartVT.getVectorElementCount() * NumIntermediates) ==
|
||||
VT.getVectorElementCount() &&
|
||||
"Expected an integer multiple of PartVT");
|
||||
IntermediateVT = PartVT;
|
||||
RegisterVT = getRegisterType(Context, IntermediateVT);
|
||||
@ -1469,16 +1473,16 @@ unsigned TargetLoweringBase::getVectorTypeBreakdown(LLVMContext &Context, EVT VT
|
||||
|
||||
// FIXME: We don't support non-power-of-2-sized vectors for now. Ideally
|
||||
// we could break down into LHS/RHS like LegalizeDAG does.
|
||||
if (!isPowerOf2_32(EltCnt.Min)) {
|
||||
NumVectorRegs = EltCnt.Min;
|
||||
EltCnt.Min = 1;
|
||||
if (!isPowerOf2_32(EltCnt.getKnownMinValue())) {
|
||||
NumVectorRegs = EltCnt.getKnownMinValue();
|
||||
EltCnt = ElementCount::getFixed(1);
|
||||
}
|
||||
|
||||
// Divide the input until we get to a supported size. This will always
|
||||
// end with a scalar if the target doesn't support vectors.
|
||||
while (EltCnt.Min > 1 &&
|
||||
while (EltCnt.getKnownMinValue() > 1 &&
|
||||
!isTypeLegal(EVT::getVectorVT(Context, EltTy, EltCnt))) {
|
||||
EltCnt.Min >>= 1;
|
||||
EltCnt /= 2;
|
||||
NumVectorRegs <<= 1;
|
||||
}
|
||||
|
||||
|
@ -122,13 +122,13 @@ EVT EVT::getExtendedVectorElementType() const {
|
||||
unsigned EVT::getExtendedVectorNumElements() const {
|
||||
assert(isExtended() && "Type is not extended!");
|
||||
ElementCount EC = cast<VectorType>(LLVMTy)->getElementCount();
|
||||
if (EC.Scalable) {
|
||||
if (EC.isScalable()) {
|
||||
WithColor::warning()
|
||||
<< "The code that requested the fixed number of elements has made the "
|
||||
"assumption that this vector is not scalable. This assumption was "
|
||||
"not correct, and this may lead to broken code\n";
|
||||
}
|
||||
return EC.Min;
|
||||
return EC.getKnownMinValue();
|
||||
}
|
||||
|
||||
ElementCount EVT::getExtendedVectorElementCount() const {
|
||||
@ -150,9 +150,9 @@ std::string EVT::getEVTString() const {
|
||||
switch (V.SimpleTy) {
|
||||
default:
|
||||
if (isVector())
|
||||
return (isScalableVector() ? "nxv" : "v")
|
||||
+ utostr(getVectorElementCount().Min)
|
||||
+ getVectorElementType().getEVTString();
|
||||
return (isScalableVector() ? "nxv" : "v") +
|
||||
utostr(getVectorElementCount().getKnownMinValue()) +
|
||||
getVectorElementType().getEVTString();
|
||||
if (isInteger())
|
||||
return "i" + utostr(getSizeInBits());
|
||||
if (isFloatingPoint())
|
||||
|
@ -656,9 +656,9 @@ void TypePrinting::print(Type *Ty, raw_ostream &OS) {
|
||||
VectorType *PTy = cast<VectorType>(Ty);
|
||||
ElementCount EC = PTy->getElementCount();
|
||||
OS << "<";
|
||||
if (EC.Scalable)
|
||||
if (EC.isScalable())
|
||||
OS << "vscale x ";
|
||||
OS << EC.Min << " x ";
|
||||
OS << EC.getKnownMinValue() << " x ";
|
||||
print(PTy->getElementType(), OS);
|
||||
OS << '>';
|
||||
return;
|
||||
|
@ -931,7 +931,7 @@ Constant *llvm::ConstantFoldShuffleVectorInstruction(Constant *V1, Constant *V2,
|
||||
// If the mask is all zeros this is a splat, no need to go through all
|
||||
// elements.
|
||||
if (all_of(Mask, [](int Elt) { return Elt == 0; }) &&
|
||||
!MaskEltCount.Scalable) {
|
||||
!MaskEltCount.isScalable()) {
|
||||
Type *Ty = IntegerType::get(V1->getContext(), 32);
|
||||
Constant *Elt =
|
||||
ConstantExpr::getExtractElement(V1, ConstantInt::get(Ty, 0));
|
||||
@ -942,7 +942,7 @@ Constant *llvm::ConstantFoldShuffleVectorInstruction(Constant *V1, Constant *V2,
|
||||
if (isa<ScalableVectorType>(V1VTy))
|
||||
return nullptr;
|
||||
|
||||
unsigned SrcNumElts = V1VTy->getElementCount().Min;
|
||||
unsigned SrcNumElts = V1VTy->getElementCount().getKnownMinValue();
|
||||
|
||||
// Loop over the shuffle mask, evaluating each element.
|
||||
SmallVector<Constant*, 32> Result;
|
||||
@ -2056,11 +2056,12 @@ Constant *llvm::ConstantFoldCompareInstruction(unsigned short pred,
|
||||
SmallVector<Constant*, 4> ResElts;
|
||||
Type *Ty = IntegerType::get(C1->getContext(), 32);
|
||||
// Compare the elements, producing an i1 result or constant expr.
|
||||
for (unsigned i = 0, e = C1VTy->getElementCount().Min; i != e; ++i) {
|
||||
for (unsigned I = 0, E = C1VTy->getElementCount().getKnownMinValue();
|
||||
I != E; ++I) {
|
||||
Constant *C1E =
|
||||
ConstantExpr::getExtractElement(C1, ConstantInt::get(Ty, i));
|
||||
ConstantExpr::getExtractElement(C1, ConstantInt::get(Ty, I));
|
||||
Constant *C2E =
|
||||
ConstantExpr::getExtractElement(C2, ConstantInt::get(Ty, i));
|
||||
ConstantExpr::getExtractElement(C2, ConstantInt::get(Ty, I));
|
||||
|
||||
ResElts.push_back(ConstantExpr::getCompare(pred, C1E, C2E));
|
||||
}
|
||||
|
@ -1300,14 +1300,14 @@ Constant *ConstantVector::getImpl(ArrayRef<Constant*> V) {
|
||||
}
|
||||
|
||||
Constant *ConstantVector::getSplat(ElementCount EC, Constant *V) {
|
||||
if (!EC.Scalable) {
|
||||
if (!EC.isScalable()) {
|
||||
// If this splat is compatible with ConstantDataVector, use it instead of
|
||||
// ConstantVector.
|
||||
if ((isa<ConstantFP>(V) || isa<ConstantInt>(V)) &&
|
||||
ConstantDataSequential::isElementTypeCompatible(V->getType()))
|
||||
return ConstantDataVector::getSplat(EC.Min, V);
|
||||
return ConstantDataVector::getSplat(EC.getKnownMinValue(), V);
|
||||
|
||||
SmallVector<Constant *, 32> Elts(EC.Min, V);
|
||||
SmallVector<Constant *, 32> Elts(EC.getKnownMinValue(), V);
|
||||
return get(Elts);
|
||||
}
|
||||
|
||||
@ -1324,7 +1324,7 @@ Constant *ConstantVector::getSplat(ElementCount EC, Constant *V) {
|
||||
Constant *UndefV = UndefValue::get(VTy);
|
||||
V = ConstantExpr::getInsertElement(UndefV, V, ConstantInt::get(I32Ty, 0));
|
||||
// Build shuffle mask to perform the splat.
|
||||
SmallVector<int, 8> Zeros(EC.Min, 0);
|
||||
SmallVector<int, 8> Zeros(EC.getKnownMinValue(), 0);
|
||||
// Splat.
|
||||
return ConstantExpr::getShuffleVector(V, UndefV, Zeros);
|
||||
}
|
||||
@ -2264,7 +2264,7 @@ Constant *ConstantExpr::getGetElementPtr(Type *Ty, Constant *C,
|
||||
if (VectorType *VecTy = dyn_cast<VectorType>(Idx->getType()))
|
||||
EltCount = VecTy->getElementCount();
|
||||
|
||||
if (EltCount.Min != 0)
|
||||
if (EltCount.isNonZero())
|
||||
ReqTy = VectorType::get(ReqTy, EltCount);
|
||||
|
||||
if (OnlyIfReducedTy == ReqTy)
|
||||
@ -2284,7 +2284,7 @@ Constant *ConstantExpr::getGetElementPtr(Type *Ty, Constant *C,
|
||||
|
||||
if (GTI.isStruct() && Idx->getType()->isVectorTy()) {
|
||||
Idx = Idx->getSplatValue();
|
||||
} else if (GTI.isSequential() && EltCount.Min != 0 &&
|
||||
} else if (GTI.isSequential() && EltCount.isNonZero() &&
|
||||
!Idx->getType()->isVectorTy()) {
|
||||
Idx = ConstantVector::getSplat(EltCount, Idx);
|
||||
}
|
||||
|
@ -781,7 +781,7 @@ unsigned LLVMGetPointerAddressSpace(LLVMTypeRef PointerTy) {
|
||||
}
|
||||
|
||||
unsigned LLVMGetVectorSize(LLVMTypeRef VectorTy) {
|
||||
return unwrap<VectorType>(VectorTy)->getElementCount().Min;
|
||||
return unwrap<VectorType>(VectorTy)->getElementCount().getKnownMinValue();
|
||||
}
|
||||
|
||||
/*--.. Operations on other types ...........................................--*/
|
||||
|
@ -630,7 +630,7 @@ Align DataLayout::getAlignmentInfo(AlignTypeEnum AlignType, uint32_t BitWidth,
|
||||
// We're only calculating a natural alignment, so it doesn't have to be
|
||||
// based on the full size for scalable vectors. Using the minimum element
|
||||
// count should be enough here.
|
||||
Alignment *= cast<VectorType>(Ty)->getElementCount().Min;
|
||||
Alignment *= cast<VectorType>(Ty)->getElementCount().getKnownMinValue();
|
||||
Alignment = PowerOf2Ceil(Alignment);
|
||||
return Align(Alignment);
|
||||
}
|
||||
|
@ -714,9 +714,10 @@ static std::string getMangledTypeStr(Type* Ty) {
|
||||
Result += "f";
|
||||
} else if (VectorType* VTy = dyn_cast<VectorType>(Ty)) {
|
||||
ElementCount EC = VTy->getElementCount();
|
||||
if (EC.Scalable)
|
||||
if (EC.isScalable())
|
||||
Result += "nx";
|
||||
Result += "v" + utostr(EC.Min) + getMangledTypeStr(VTy->getElementType());
|
||||
Result += "v" + utostr(EC.getKnownMinValue()) +
|
||||
getMangledTypeStr(VTy->getElementType());
|
||||
} else if (Ty) {
|
||||
switch (Ty->getTypeID()) {
|
||||
default: llvm_unreachable("Unhandled type");
|
||||
|
@ -1003,7 +1003,7 @@ Value *IRBuilderBase::CreateVectorSplat(unsigned NumElts, Value *V,
|
||||
|
||||
Value *IRBuilderBase::CreateVectorSplat(ElementCount EC, Value *V,
|
||||
const Twine &Name) {
|
||||
assert(EC.Min > 0 && "Cannot splat to an empty vector!");
|
||||
assert(EC.isNonZero() && "Cannot splat to an empty vector!");
|
||||
|
||||
// First insert it into an undef vector so we can shuffle it.
|
||||
Type *I32Ty = getInt32Ty();
|
||||
|
@ -1967,7 +1967,8 @@ bool ShuffleVectorInst::isValidOperands(const Value *V1, const Value *V2,
|
||||
return false;
|
||||
|
||||
// Make sure the mask elements make sense.
|
||||
int V1Size = cast<VectorType>(V1->getType())->getElementCount().Min;
|
||||
int V1Size =
|
||||
cast<VectorType>(V1->getType())->getElementCount().getKnownMinValue();
|
||||
for (int Elem : Mask)
|
||||
if (Elem != UndefMaskElem && Elem >= V1Size * 2)
|
||||
return false;
|
||||
@ -2026,22 +2027,22 @@ void ShuffleVectorInst::getShuffleMask(const Constant *Mask,
|
||||
ElementCount EC = cast<VectorType>(Mask->getType())->getElementCount();
|
||||
|
||||
if (isa<ConstantAggregateZero>(Mask)) {
|
||||
Result.resize(EC.Min, 0);
|
||||
Result.resize(EC.getKnownMinValue(), 0);
|
||||
return;
|
||||
}
|
||||
|
||||
Result.reserve(EC.Min);
|
||||
Result.reserve(EC.getKnownMinValue());
|
||||
|
||||
if (EC.Scalable) {
|
||||
if (EC.isScalable()) {
|
||||
assert((isa<ConstantAggregateZero>(Mask) || isa<UndefValue>(Mask)) &&
|
||||
"Scalable vector shuffle mask must be undef or zeroinitializer");
|
||||
int MaskVal = isa<UndefValue>(Mask) ? -1 : 0;
|
||||
for (unsigned I = 0; I < EC.Min; ++I)
|
||||
for (unsigned I = 0; I < EC.getKnownMinValue(); ++I)
|
||||
Result.emplace_back(MaskVal);
|
||||
return;
|
||||
}
|
||||
|
||||
unsigned NumElts = EC.Min;
|
||||
unsigned NumElts = EC.getKnownMinValue();
|
||||
|
||||
if (auto *CDS = dyn_cast<ConstantDataSequential>(Mask)) {
|
||||
for (unsigned i = 0; i != NumElts; ++i)
|
||||
|
@ -280,8 +280,8 @@ bool VPIntrinsic::canIgnoreVectorLengthParam() const {
|
||||
// the operation. This function returns true when this is detected statically
|
||||
// in the IR.
|
||||
|
||||
// Check whether "W == vscale * EC.Min"
|
||||
if (EC.Scalable) {
|
||||
// Check whether "W == vscale * EC.getKnownMinValue()"
|
||||
if (EC.isScalable()) {
|
||||
// Undig the DL
|
||||
auto ParMod = this->getModule();
|
||||
if (!ParMod)
|
||||
@ -291,8 +291,8 @@ bool VPIntrinsic::canIgnoreVectorLengthParam() const {
|
||||
// Compare vscale patterns
|
||||
uint64_t VScaleFactor;
|
||||
if (match(VLParam, m_c_Mul(m_ConstantInt(VScaleFactor), m_VScale(DL))))
|
||||
return VScaleFactor >= EC.Min;
|
||||
return (EC.Min == 1) && match(VLParam, m_VScale(DL));
|
||||
return VScaleFactor >= EC.getKnownMinValue();
|
||||
return (EC.getKnownMinValue() == 1) && match(VLParam, m_VScale(DL));
|
||||
}
|
||||
|
||||
// standard SIMD operation
|
||||
@ -301,7 +301,7 @@ bool VPIntrinsic::canIgnoreVectorLengthParam() const {
|
||||
return false;
|
||||
|
||||
uint64_t VLNum = VLConst->getZExtValue();
|
||||
if (VLNum >= EC.Min)
|
||||
if (VLNum >= EC.getKnownMinValue())
|
||||
return true;
|
||||
|
||||
return false;
|
||||
|
@ -128,7 +128,7 @@ TypeSize Type::getPrimitiveSizeInBits() const {
|
||||
ElementCount EC = VTy->getElementCount();
|
||||
TypeSize ETS = VTy->getElementType()->getPrimitiveSizeInBits();
|
||||
assert(!ETS.isScalable() && "Vector type should have fixed-width elements");
|
||||
return {ETS.getFixedSize() * EC.Min, EC.Scalable};
|
||||
return {ETS.getFixedSize() * EC.getKnownMinValue(), EC.isScalable()};
|
||||
}
|
||||
default: return TypeSize::Fixed(0);
|
||||
}
|
||||
@ -598,10 +598,10 @@ VectorType::VectorType(Type *ElType, unsigned EQ, Type::TypeID TID)
|
||||
}
|
||||
|
||||
VectorType *VectorType::get(Type *ElementType, ElementCount EC) {
|
||||
if (EC.Scalable)
|
||||
return ScalableVectorType::get(ElementType, EC.Min);
|
||||
if (EC.isScalable())
|
||||
return ScalableVectorType::get(ElementType, EC.getKnownMinValue());
|
||||
else
|
||||
return FixedVectorType::get(ElementType, EC.Min);
|
||||
return FixedVectorType::get(ElementType, EC.getKnownMinValue());
|
||||
}
|
||||
|
||||
bool VectorType::isValidElementType(Type *ElemTy) {
|
||||
|
@ -4827,7 +4827,8 @@ static EVT getPackedVectorTypeFromPredicateType(LLVMContext &Ctx, EVT PredVT,
|
||||
return EVT();
|
||||
|
||||
ElementCount EC = PredVT.getVectorElementCount();
|
||||
EVT ScalarVT = EVT::getIntegerVT(Ctx, AArch64::SVEBitsPerBlock / EC.Min);
|
||||
EVT ScalarVT =
|
||||
EVT::getIntegerVT(Ctx, AArch64::SVEBitsPerBlock / EC.getKnownMinValue());
|
||||
EVT MemVT = EVT::getVectorVT(Ctx, ScalarVT, EC * NumVec);
|
||||
|
||||
return MemVT;
|
||||
|
@ -3532,8 +3532,9 @@ SDValue AArch64TargetLowering::LowerSTORE(SDValue Op,
|
||||
// 256 bit non-temporal stores can be lowered to STNP. Do this as part of
|
||||
// the custom lowering, as there are no un-paired non-temporal stores and
|
||||
// legalization will break up 256 bit inputs.
|
||||
ElementCount EC = MemVT.getVectorElementCount();
|
||||
if (StoreNode->isNonTemporal() && MemVT.getSizeInBits() == 256u &&
|
||||
MemVT.getVectorElementCount().Min % 2u == 0 &&
|
||||
EC.isKnownEven() &&
|
||||
((MemVT.getScalarSizeInBits() == 8u ||
|
||||
MemVT.getScalarSizeInBits() == 16u ||
|
||||
MemVT.getScalarSizeInBits() == 32u ||
|
||||
@ -3542,11 +3543,11 @@ SDValue AArch64TargetLowering::LowerSTORE(SDValue Op,
|
||||
DAG.getNode(ISD::EXTRACT_SUBVECTOR, Dl,
|
||||
MemVT.getHalfNumVectorElementsVT(*DAG.getContext()),
|
||||
StoreNode->getValue(), DAG.getConstant(0, Dl, MVT::i64));
|
||||
SDValue Hi = DAG.getNode(
|
||||
ISD::EXTRACT_SUBVECTOR, Dl,
|
||||
MemVT.getHalfNumVectorElementsVT(*DAG.getContext()),
|
||||
StoreNode->getValue(),
|
||||
DAG.getConstant(MemVT.getVectorElementCount().Min / 2, Dl, MVT::i64));
|
||||
SDValue Hi =
|
||||
DAG.getNode(ISD::EXTRACT_SUBVECTOR, Dl,
|
||||
MemVT.getHalfNumVectorElementsVT(*DAG.getContext()),
|
||||
StoreNode->getValue(),
|
||||
DAG.getConstant(EC.getKnownMinValue() / 2, Dl, MVT::i64));
|
||||
SDValue Result = DAG.getMemIntrinsicNode(
|
||||
AArch64ISD::STNP, Dl, DAG.getVTList(MVT::Other),
|
||||
{StoreNode->getChain(), Lo, Hi, StoreNode->getBasePtr()},
|
||||
@ -10370,7 +10371,7 @@ SDValue AArch64TargetLowering::LowerSVEStructLoad(unsigned Intrinsic,
|
||||
{Intrinsic::aarch64_sve_ld4, {4, AArch64ISD::SVE_LD4_MERGE_ZERO}}};
|
||||
|
||||
std::tie(N, Opcode) = IntrinsicMap[Intrinsic];
|
||||
assert(VT.getVectorElementCount().Min % N == 0 &&
|
||||
assert(VT.getVectorElementCount().getKnownMinValue() % N == 0 &&
|
||||
"invalid tuple vector type!");
|
||||
|
||||
EVT SplitVT = EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(),
|
||||
@ -14443,7 +14444,7 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
|
||||
|
||||
uint64_t IdxConst = cast<ConstantSDNode>(Idx)->getZExtValue();
|
||||
EVT ResVT = N->getValueType(0);
|
||||
uint64_t NumLanes = ResVT.getVectorElementCount().Min;
|
||||
uint64_t NumLanes = ResVT.getVectorElementCount().getKnownMinValue();
|
||||
SDValue ExtIdx = DAG.getVectorIdxConstant(IdxConst * NumLanes, DL);
|
||||
SDValue Val =
|
||||
DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ResVT, Src1, ExtIdx);
|
||||
@ -14457,10 +14458,11 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
|
||||
SDValue Vec = N->getOperand(4);
|
||||
|
||||
EVT TupleVT = Tuple.getValueType();
|
||||
uint64_t TupleLanes = TupleVT.getVectorElementCount().Min;
|
||||
uint64_t TupleLanes = TupleVT.getVectorElementCount().getKnownMinValue();
|
||||
|
||||
uint64_t IdxConst = cast<ConstantSDNode>(Idx)->getZExtValue();
|
||||
uint64_t NumLanes = Vec.getValueType().getVectorElementCount().Min;
|
||||
uint64_t NumLanes =
|
||||
Vec.getValueType().getVectorElementCount().getKnownMinValue();
|
||||
|
||||
if ((TupleLanes % NumLanes) != 0)
|
||||
report_fatal_error("invalid tuple vector!");
|
||||
@ -14696,7 +14698,7 @@ void AArch64TargetLowering::ReplaceExtractSubVectorResults(
|
||||
|
||||
ElementCount ResEC = VT.getVectorElementCount();
|
||||
|
||||
if (InVT.getVectorElementCount().Min != (ResEC.Min * 2))
|
||||
if (InVT.getVectorElementCount() != (ResEC * 2))
|
||||
return;
|
||||
|
||||
auto *CIndex = dyn_cast<ConstantSDNode>(N->getOperand(1));
|
||||
@ -14704,7 +14706,7 @@ void AArch64TargetLowering::ReplaceExtractSubVectorResults(
|
||||
return;
|
||||
|
||||
unsigned Index = CIndex->getZExtValue();
|
||||
if ((Index != 0) && (Index != ResEC.Min))
|
||||
if ((Index != 0) && (Index != ResEC.getKnownMinValue()))
|
||||
return;
|
||||
|
||||
unsigned Opcode = (Index == 0) ? AArch64ISD::UUNPKLO : AArch64ISD::UUNPKHI;
|
||||
|
@ -340,17 +340,17 @@ Instruction *InstCombinerImpl::visitExtractElementInst(ExtractElementInst &EI) {
|
||||
auto *IndexC = dyn_cast<ConstantInt>(Index);
|
||||
if (IndexC) {
|
||||
ElementCount EC = EI.getVectorOperandType()->getElementCount();
|
||||
unsigned NumElts = EC.Min;
|
||||
unsigned NumElts = EC.getKnownMinValue();
|
||||
|
||||
// InstSimplify should handle cases where the index is invalid.
|
||||
// For fixed-length vector, it's invalid to extract out-of-range element.
|
||||
if (!EC.Scalable && IndexC->getValue().uge(NumElts))
|
||||
if (!EC.isScalable() && IndexC->getValue().uge(NumElts))
|
||||
return nullptr;
|
||||
|
||||
// This instruction only demands the single element from the input vector.
|
||||
// Skip for scalable type, the number of elements is unknown at
|
||||
// compile-time.
|
||||
if (!EC.Scalable && NumElts != 1) {
|
||||
if (!EC.isScalable() && NumElts != 1) {
|
||||
// If the input vector has a single use, simplify it based on this use
|
||||
// property.
|
||||
if (SrcVec->hasOneUse()) {
|
||||
|
@ -488,12 +488,13 @@ int FunctionComparator::cmpTypes(Type *TyL, Type *TyR) const {
|
||||
case Type::ScalableVectorTyID: {
|
||||
auto *STyL = cast<VectorType>(TyL);
|
||||
auto *STyR = cast<VectorType>(TyR);
|
||||
if (STyL->getElementCount().Scalable != STyR->getElementCount().Scalable)
|
||||
return cmpNumbers(STyL->getElementCount().Scalable,
|
||||
STyR->getElementCount().Scalable);
|
||||
if (STyL->getElementCount().Min != STyR->getElementCount().Min)
|
||||
return cmpNumbers(STyL->getElementCount().Min,
|
||||
STyR->getElementCount().Min);
|
||||
if (STyL->getElementCount().isScalable() !=
|
||||
STyR->getElementCount().isScalable())
|
||||
return cmpNumbers(STyL->getElementCount().isScalable(),
|
||||
STyR->getElementCount().isScalable());
|
||||
if (STyL->getElementCount() != STyR->getElementCount())
|
||||
return cmpNumbers(STyL->getElementCount().getKnownMinValue(),
|
||||
STyR->getElementCount().getKnownMinValue());
|
||||
return cmpTypes(STyL->getElementType(), STyR->getElementType());
|
||||
}
|
||||
}
|
||||
|
@ -342,7 +342,7 @@ static Type *getMemInstValueType(Value *I) {
|
||||
/// type is irregular if its allocated size doesn't equal the store size of an
|
||||
/// element of the corresponding vector type at the given vectorization factor.
|
||||
static bool hasIrregularType(Type *Ty, const DataLayout &DL, ElementCount VF) {
|
||||
assert(!VF.Scalable && "scalable vectors not yet supported.");
|
||||
assert(!VF.isScalable() && "scalable vectors not yet supported.");
|
||||
// Determine if an array of VF elements of type Ty is "bitcast compatible"
|
||||
// with a <VF x Ty> vector.
|
||||
if (VF.isVector()) {
|
||||
@ -899,8 +899,9 @@ void InnerLoopVectorizer::setDebugLocFromInst(IRBuilder<> &B, const Value *Ptr)
|
||||
const DILocation *DIL = Inst->getDebugLoc();
|
||||
if (DIL && Inst->getFunction()->isDebugInfoForProfiling() &&
|
||||
!isa<DbgInfoIntrinsic>(Inst)) {
|
||||
assert(!VF.Scalable && "scalable vectors not yet supported.");
|
||||
auto NewDIL = DIL->cloneByMultiplyingDuplicationFactor(UF * VF.Min);
|
||||
assert(!VF.isScalable() && "scalable vectors not yet supported.");
|
||||
auto NewDIL =
|
||||
DIL->cloneByMultiplyingDuplicationFactor(UF * VF.getKnownMinValue());
|
||||
if (NewDIL)
|
||||
B.SetCurrentDebugLocation(NewDIL.getValue());
|
||||
else
|
||||
@ -1216,7 +1217,7 @@ public:
|
||||
/// width \p VF. Return CM_Unknown if this instruction did not pass
|
||||
/// through the cost modeling.
|
||||
InstWidening getWideningDecision(Instruction *I, ElementCount VF) {
|
||||
assert(!VF.Scalable && "scalable vectors not yet supported.");
|
||||
assert(!VF.isScalable() && "scalable vectors not yet supported.");
|
||||
assert(VF.isVector() && "Expected VF >=2");
|
||||
|
||||
// Cost model is not run in the VPlan-native path - return conservative
|
||||
@ -1837,7 +1838,8 @@ void InnerLoopVectorizer::createVectorIntOrFpInductionPHI(
|
||||
|
||||
// Multiply the vectorization factor by the step using integer or
|
||||
// floating-point arithmetic as appropriate.
|
||||
Value *ConstVF = getSignedIntOrFpConstant(Step->getType(), VF.Min);
|
||||
Value *ConstVF =
|
||||
getSignedIntOrFpConstant(Step->getType(), VF.getKnownMinValue());
|
||||
Value *Mul = addFastMathFlag(Builder.CreateBinOp(MulOp, Step, ConstVF));
|
||||
|
||||
// Create a vector splat to use in the induction update.
|
||||
@ -1845,7 +1847,7 @@ void InnerLoopVectorizer::createVectorIntOrFpInductionPHI(
|
||||
// FIXME: If the step is non-constant, we create the vector splat with
|
||||
// IRBuilder. IRBuilder can constant-fold the multiply, but it doesn't
|
||||
// handle a constant vector splat.
|
||||
assert(!VF.Scalable && "scalable vectors not yet supported.");
|
||||
assert(!VF.isScalable() && "scalable vectors not yet supported.");
|
||||
Value *SplatVF = isa<Constant>(Mul)
|
||||
? ConstantVector::getSplat(VF, cast<Constant>(Mul))
|
||||
: Builder.CreateVectorSplat(VF, Mul);
|
||||
@ -1982,9 +1984,10 @@ void InnerLoopVectorizer::widenIntOrFpInduction(PHINode *IV, TruncInst *Trunc) {
|
||||
auto CreateSplatIV = [&](Value *ScalarIV, Value *Step) {
|
||||
Value *Broadcasted = getBroadcastInstrs(ScalarIV);
|
||||
for (unsigned Part = 0; Part < UF; ++Part) {
|
||||
assert(!VF.Scalable && "scalable vectors not yet supported.");
|
||||
Value *EntryPart = getStepVector(Broadcasted, VF.Min * Part, Step,
|
||||
ID.getInductionOpcode());
|
||||
assert(!VF.isScalable() && "scalable vectors not yet supported.");
|
||||
Value *EntryPart =
|
||||
getStepVector(Broadcasted, VF.getKnownMinValue() * Part, Step,
|
||||
ID.getInductionOpcode());
|
||||
VectorLoopValueMap.setVectorValue(EntryVal, Part, EntryPart);
|
||||
if (Trunc)
|
||||
addMetadata(EntryPart, Trunc);
|
||||
@ -2093,7 +2096,7 @@ void InnerLoopVectorizer::buildScalarSteps(Value *ScalarIV, Value *Step,
|
||||
const InductionDescriptor &ID) {
|
||||
// We shouldn't have to build scalar steps if we aren't vectorizing.
|
||||
assert(VF.isVector() && "VF should be greater than one");
|
||||
assert(!VF.Scalable &&
|
||||
assert(!VF.isScalable() &&
|
||||
"the code below assumes a fixed number of elements at compile time");
|
||||
// Get the value type and ensure it and the step have the same integer type.
|
||||
Type *ScalarIVTy = ScalarIV->getType()->getScalarType();
|
||||
@ -2118,12 +2121,12 @@ void InnerLoopVectorizer::buildScalarSteps(Value *ScalarIV, Value *Step,
|
||||
unsigned Lanes =
|
||||
Cost->isUniformAfterVectorization(cast<Instruction>(EntryVal), VF)
|
||||
? 1
|
||||
: VF.Min;
|
||||
: VF.getKnownMinValue();
|
||||
// Compute the scalar steps and save the results in VectorLoopValueMap.
|
||||
for (unsigned Part = 0; Part < UF; ++Part) {
|
||||
for (unsigned Lane = 0; Lane < Lanes; ++Lane) {
|
||||
auto *StartIdx =
|
||||
getSignedIntOrFpConstant(ScalarIVTy, VF.Min * Part + Lane);
|
||||
auto *StartIdx = getSignedIntOrFpConstant(
|
||||
ScalarIVTy, VF.getKnownMinValue() * Part + Lane);
|
||||
auto *Mul = addFastMathFlag(Builder.CreateBinOp(MulOp, StartIdx, Step));
|
||||
auto *Add = addFastMathFlag(Builder.CreateBinOp(AddOp, ScalarIV, Mul));
|
||||
VectorLoopValueMap.setScalarValue(EntryVal, {Part, Lane}, Add);
|
||||
@ -2166,9 +2169,10 @@ Value *InnerLoopVectorizer::getOrCreateVectorValue(Value *V, unsigned Part) {
|
||||
// is known to be uniform after vectorization, this corresponds to lane zero
|
||||
// of the Part unroll iteration. Otherwise, the last instruction is the one
|
||||
// we created for the last vector lane of the Part unroll iteration.
|
||||
assert(!VF.Scalable && "scalable vectors not yet supported.");
|
||||
unsigned LastLane =
|
||||
Cost->isUniformAfterVectorization(I, VF) ? 0 : VF.Min - 1;
|
||||
assert(!VF.isScalable() && "scalable vectors not yet supported.");
|
||||
unsigned LastLane = Cost->isUniformAfterVectorization(I, VF)
|
||||
? 0
|
||||
: VF.getKnownMinValue() - 1;
|
||||
auto *LastInst = cast<Instruction>(
|
||||
VectorLoopValueMap.getScalarValue(V, {Part, LastLane}));
|
||||
|
||||
@ -2190,10 +2194,10 @@ Value *InnerLoopVectorizer::getOrCreateVectorValue(Value *V, unsigned Part) {
|
||||
VectorLoopValueMap.setVectorValue(V, Part, VectorValue);
|
||||
} else {
|
||||
// Initialize packing with insertelements to start from undef.
|
||||
assert(!VF.Scalable && "VF is assumed to be non scalable.");
|
||||
assert(!VF.isScalable() && "VF is assumed to be non scalable.");
|
||||
Value *Undef = UndefValue::get(VectorType::get(V->getType(), VF));
|
||||
VectorLoopValueMap.setVectorValue(V, Part, Undef);
|
||||
for (unsigned Lane = 0; Lane < VF.Min; ++Lane)
|
||||
for (unsigned Lane = 0; Lane < VF.getKnownMinValue(); ++Lane)
|
||||
packScalarIntoVectorValue(V, {Part, Lane});
|
||||
VectorValue = VectorLoopValueMap.getVectorValue(V, Part);
|
||||
}
|
||||
@ -2257,10 +2261,10 @@ void InnerLoopVectorizer::packScalarIntoVectorValue(
|
||||
|
||||
Value *InnerLoopVectorizer::reverseVector(Value *Vec) {
|
||||
assert(Vec->getType()->isVectorTy() && "Invalid type");
|
||||
assert(!VF.Scalable && "Cannot reverse scalable vectors");
|
||||
assert(!VF.isScalable() && "Cannot reverse scalable vectors");
|
||||
SmallVector<int, 8> ShuffleMask;
|
||||
for (unsigned i = 0; i < VF.Min; ++i)
|
||||
ShuffleMask.push_back(VF.Min - i - 1);
|
||||
for (unsigned i = 0; i < VF.getKnownMinValue(); ++i)
|
||||
ShuffleMask.push_back(VF.getKnownMinValue() - i - 1);
|
||||
|
||||
return Builder.CreateShuffleVector(Vec, UndefValue::get(Vec->getType()),
|
||||
ShuffleMask, "reverse");
|
||||
@ -2314,7 +2318,7 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup(
|
||||
// Prepare for the vector type of the interleaved load/store.
|
||||
Type *ScalarTy = getMemInstValueType(Instr);
|
||||
unsigned InterleaveFactor = Group->getFactor();
|
||||
assert(!VF.Scalable && "scalable vectors not yet supported.");
|
||||
assert(!VF.isScalable() && "scalable vectors not yet supported.");
|
||||
auto *VecTy = VectorType::get(ScalarTy, VF * InterleaveFactor);
|
||||
|
||||
// Prepare for the new pointers.
|
||||
@ -2331,10 +2335,10 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup(
|
||||
// pointer operand of the interleaved access is supposed to be uniform. For
|
||||
// uniform instructions, we're only required to generate a value for the
|
||||
// first vector lane in each unroll iteration.
|
||||
assert(!VF.Scalable &&
|
||||
assert(!VF.isScalable() &&
|
||||
"scalable vector reverse operation is not implemented");
|
||||
if (Group->isReverse())
|
||||
Index += (VF.Min - 1) * Group->getFactor();
|
||||
Index += (VF.getKnownMinValue() - 1) * Group->getFactor();
|
||||
|
||||
for (unsigned Part = 0; Part < UF; Part++) {
|
||||
Value *AddrPart = State.get(Addr, {Part, 0});
|
||||
@ -2369,8 +2373,8 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup(
|
||||
|
||||
Value *MaskForGaps = nullptr;
|
||||
if (Group->requiresScalarEpilogue() && !Cost->isScalarEpilogueAllowed()) {
|
||||
assert(!VF.Scalable && "scalable vectors not yet supported.");
|
||||
MaskForGaps = createBitMaskForGaps(Builder, VF.Min, *Group);
|
||||
assert(!VF.isScalable() && "scalable vectors not yet supported.");
|
||||
MaskForGaps = createBitMaskForGaps(Builder, VF.getKnownMinValue(), *Group);
|
||||
assert(MaskForGaps && "Mask for Gaps is required but it is null");
|
||||
}
|
||||
|
||||
@ -2387,10 +2391,10 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup(
|
||||
if (BlockInMask) {
|
||||
Value *BlockInMaskPart = State.get(BlockInMask, Part);
|
||||
auto *Undefs = UndefValue::get(BlockInMaskPart->getType());
|
||||
assert(!VF.Scalable && "scalable vectors not yet supported.");
|
||||
assert(!VF.isScalable() && "scalable vectors not yet supported.");
|
||||
Value *ShuffledMask = Builder.CreateShuffleVector(
|
||||
BlockInMaskPart, Undefs,
|
||||
createReplicatedMask(InterleaveFactor, VF.Min),
|
||||
createReplicatedMask(InterleaveFactor, VF.getKnownMinValue()),
|
||||
"interleaved.mask");
|
||||
GroupMask = MaskForGaps
|
||||
? Builder.CreateBinOp(Instruction::And, ShuffledMask,
|
||||
@ -2417,15 +2421,16 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup(
|
||||
if (!Member)
|
||||
continue;
|
||||
|
||||
assert(!VF.Scalable && "scalable vectors not yet supported.");
|
||||
auto StrideMask = createStrideMask(I, InterleaveFactor, VF.Min);
|
||||
assert(!VF.isScalable() && "scalable vectors not yet supported.");
|
||||
auto StrideMask =
|
||||
createStrideMask(I, InterleaveFactor, VF.getKnownMinValue());
|
||||
for (unsigned Part = 0; Part < UF; Part++) {
|
||||
Value *StridedVec = Builder.CreateShuffleVector(
|
||||
NewLoads[Part], UndefVec, StrideMask, "strided.vec");
|
||||
|
||||
// If this member has different type, cast the result type.
|
||||
if (Member->getType() != ScalarTy) {
|
||||
assert(!VF.Scalable && "VF is assumed to be non scalable.");
|
||||
assert(!VF.isScalable() && "VF is assumed to be non scalable.");
|
||||
VectorType *OtherVTy = VectorType::get(Member->getType(), VF);
|
||||
StridedVec = createBitOrPointerCast(StridedVec, OtherVTy, DL);
|
||||
}
|
||||
@ -2440,7 +2445,7 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup(
|
||||
}
|
||||
|
||||
// The sub vector type for current instruction.
|
||||
assert(!VF.Scalable && "VF is assumed to be non scalable.");
|
||||
assert(!VF.isScalable() && "VF is assumed to be non scalable.");
|
||||
auto *SubVT = VectorType::get(ScalarTy, VF);
|
||||
|
||||
// Vectorize the interleaved store group.
|
||||
@ -2469,9 +2474,10 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup(
|
||||
Value *WideVec = concatenateVectors(Builder, StoredVecs);
|
||||
|
||||
// Interleave the elements in the wide vector.
|
||||
assert(!VF.Scalable && "scalable vectors not yet supported.");
|
||||
assert(!VF.isScalable() && "scalable vectors not yet supported.");
|
||||
Value *IVec = Builder.CreateShuffleVector(
|
||||
WideVec, UndefVec, createInterleaveMask(VF.Min, InterleaveFactor),
|
||||
WideVec, UndefVec,
|
||||
createInterleaveMask(VF.getKnownMinValue(), InterleaveFactor),
|
||||
"interleaved.vec");
|
||||
|
||||
Instruction *NewStoreInstr;
|
||||
@ -2480,7 +2486,8 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup(
|
||||
auto *Undefs = UndefValue::get(BlockInMaskPart->getType());
|
||||
Value *ShuffledMask = Builder.CreateShuffleVector(
|
||||
BlockInMaskPart, Undefs,
|
||||
createReplicatedMask(InterleaveFactor, VF.Min), "interleaved.mask");
|
||||
createReplicatedMask(InterleaveFactor, VF.getKnownMinValue()),
|
||||
"interleaved.mask");
|
||||
NewStoreInstr = Builder.CreateMaskedStore(
|
||||
IVec, AddrParts[Part], Group->getAlign(), ShuffledMask);
|
||||
}
|
||||
@ -2514,7 +2521,7 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr,
|
||||
|
||||
Type *ScalarDataTy = getMemInstValueType(Instr);
|
||||
|
||||
assert(!VF.Scalable && "scalable vectors not yet supported.");
|
||||
assert(!VF.isScalable() && "scalable vectors not yet supported.");
|
||||
auto *DataTy = VectorType::get(ScalarDataTy, VF);
|
||||
const Align Alignment = getLoadStoreAlignment(Instr);
|
||||
|
||||
@ -2550,16 +2557,16 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr,
|
||||
// If the address is consecutive but reversed, then the
|
||||
// wide store needs to start at the last vector element.
|
||||
PartPtr = cast<GetElementPtrInst>(Builder.CreateGEP(
|
||||
ScalarDataTy, Ptr, Builder.getInt32(-Part * VF.Min)));
|
||||
ScalarDataTy, Ptr, Builder.getInt32(-Part * VF.getKnownMinValue())));
|
||||
PartPtr->setIsInBounds(InBounds);
|
||||
PartPtr = cast<GetElementPtrInst>(Builder.CreateGEP(
|
||||
ScalarDataTy, PartPtr, Builder.getInt32(1 - VF.Min)));
|
||||
ScalarDataTy, PartPtr, Builder.getInt32(1 - VF.getKnownMinValue())));
|
||||
PartPtr->setIsInBounds(InBounds);
|
||||
if (isMaskRequired) // Reverse of a null all-one mask is a null mask.
|
||||
BlockInMaskParts[Part] = reverseVector(BlockInMaskParts[Part]);
|
||||
} else {
|
||||
PartPtr = cast<GetElementPtrInst>(Builder.CreateGEP(
|
||||
ScalarDataTy, Ptr, Builder.getInt32(Part * VF.Min)));
|
||||
ScalarDataTy, Ptr, Builder.getInt32(Part * VF.getKnownMinValue())));
|
||||
PartPtr->setIsInBounds(InBounds);
|
||||
}
|
||||
|
||||
@ -2756,8 +2763,8 @@ Value *InnerLoopVectorizer::getOrCreateVectorTripCount(Loop *L) {
|
||||
|
||||
Type *Ty = TC->getType();
|
||||
// This is where we can make the step a runtime constant.
|
||||
assert(!VF.Scalable && "scalable vectorization is not supported yet");
|
||||
Constant *Step = ConstantInt::get(Ty, VF.Min * UF);
|
||||
assert(!VF.isScalable() && "scalable vectorization is not supported yet");
|
||||
Constant *Step = ConstantInt::get(Ty, VF.getKnownMinValue() * UF);
|
||||
|
||||
// If the tail is to be folded by masking, round the number of iterations N
|
||||
// up to a multiple of Step instead of rounding down. This is done by first
|
||||
@ -2766,10 +2773,10 @@ Value *InnerLoopVectorizer::getOrCreateVectorTripCount(Loop *L) {
|
||||
// that it starts at zero and its Step is a power of two; the loop will then
|
||||
// exit, with the last early-exit vector comparison also producing all-true.
|
||||
if (Cost->foldTailByMasking()) {
|
||||
assert(isPowerOf2_32(VF.Min * UF) &&
|
||||
assert(isPowerOf2_32(VF.getKnownMinValue() * UF) &&
|
||||
"VF*UF must be a power of 2 when folding tail by masking");
|
||||
TC = Builder.CreateAdd(TC, ConstantInt::get(Ty, VF.Min * UF - 1),
|
||||
"n.rnd.up");
|
||||
TC = Builder.CreateAdd(
|
||||
TC, ConstantInt::get(Ty, VF.getKnownMinValue() * UF - 1), "n.rnd.up");
|
||||
}
|
||||
|
||||
// Now we need to generate the expression for the part of the loop that the
|
||||
@ -2846,9 +2853,10 @@ void InnerLoopVectorizer::emitMinimumIterationCountCheck(Loop *L,
|
||||
// If tail is to be folded, vector loop takes care of all iterations.
|
||||
Value *CheckMinIters = Builder.getFalse();
|
||||
if (!Cost->foldTailByMasking()) {
|
||||
assert(!VF.Scalable && "scalable vectors not yet supported.");
|
||||
assert(!VF.isScalable() && "scalable vectors not yet supported.");
|
||||
CheckMinIters = Builder.CreateICmp(
|
||||
P, Count, ConstantInt::get(Count->getType(), VF.Min * UF),
|
||||
P, Count,
|
||||
ConstantInt::get(Count->getType(), VF.getKnownMinValue() * UF),
|
||||
"min.iters.check");
|
||||
}
|
||||
// Create new preheader for vector loop.
|
||||
@ -3303,8 +3311,8 @@ BasicBlock *InnerLoopVectorizer::createVectorizedLoopSkeleton() {
|
||||
Value *StartIdx = ConstantInt::get(IdxTy, 0);
|
||||
// The loop step is equal to the vectorization factor (num of SIMD elements)
|
||||
// times the unroll factor (num of SIMD instructions).
|
||||
assert(!VF.Scalable && "scalable vectors not yet supported.");
|
||||
Constant *Step = ConstantInt::get(IdxTy, VF.Min * UF);
|
||||
assert(!VF.isScalable() && "scalable vectors not yet supported.");
|
||||
Constant *Step = ConstantInt::get(IdxTy, VF.getKnownMinValue() * UF);
|
||||
Value *CountRoundDown = getOrCreateVectorTripCount(Lp);
|
||||
Induction =
|
||||
createInductionVariable(Lp, StartIdx, CountRoundDown, Step,
|
||||
@ -3438,7 +3446,7 @@ static void cse(BasicBlock *BB) {
|
||||
unsigned LoopVectorizationCostModel::getVectorCallCost(CallInst *CI,
|
||||
ElementCount VF,
|
||||
bool &NeedToScalarize) {
|
||||
assert(!VF.Scalable && "scalable vectors not yet supported.");
|
||||
assert(!VF.isScalable() && "scalable vectors not yet supported.");
|
||||
Function *F = CI->getCalledFunction();
|
||||
Type *ScalarRetTy = CI->getType();
|
||||
SmallVector<Type *, 4> Tys, ScalarTys;
|
||||
@ -3463,7 +3471,7 @@ unsigned LoopVectorizationCostModel::getVectorCallCost(CallInst *CI,
|
||||
// packing the return values to a vector.
|
||||
unsigned ScalarizationCost = getScalarizationOverhead(CI, VF);
|
||||
|
||||
unsigned Cost = ScalarCallCost * VF.Min + ScalarizationCost;
|
||||
unsigned Cost = ScalarCallCost * VF.getKnownMinValue() + ScalarizationCost;
|
||||
|
||||
// If we can't emit a vector call for this function, then the currently found
|
||||
// cost is the cost we need to return.
|
||||
@ -3684,11 +3692,11 @@ void InnerLoopVectorizer::fixVectorizedLoop() {
|
||||
// profile is not inherently precise anyway. Note also possible bypass of
|
||||
// vector code caused by legality checks is ignored, assigning all the weight
|
||||
// to the vector loop, optimistically.
|
||||
assert(!VF.Scalable &&
|
||||
assert(!VF.isScalable() &&
|
||||
"cannot use scalable ElementCount to determine unroll factor");
|
||||
setProfileInfoAfterUnrolling(LI->getLoopFor(LoopScalarBody),
|
||||
LI->getLoopFor(LoopVectorBody),
|
||||
LI->getLoopFor(LoopScalarBody), VF.Min * UF);
|
||||
setProfileInfoAfterUnrolling(
|
||||
LI->getLoopFor(LoopScalarBody), LI->getLoopFor(LoopVectorBody),
|
||||
LI->getLoopFor(LoopScalarBody), VF.getKnownMinValue() * UF);
|
||||
}
|
||||
|
||||
void InnerLoopVectorizer::fixCrossIterationPHIs() {
|
||||
@ -3769,10 +3777,10 @@ void InnerLoopVectorizer::fixFirstOrderRecurrence(PHINode *Phi) {
|
||||
auto *VectorInit = ScalarInit;
|
||||
if (VF.isVector()) {
|
||||
Builder.SetInsertPoint(LoopVectorPreHeader->getTerminator());
|
||||
assert(!VF.Scalable && "VF is assumed to be non scalable.");
|
||||
assert(!VF.isScalable() && "VF is assumed to be non scalable.");
|
||||
VectorInit = Builder.CreateInsertElement(
|
||||
UndefValue::get(VectorType::get(VectorInit->getType(), VF)), VectorInit,
|
||||
Builder.getInt32(VF.Min - 1), "vector.recur.init");
|
||||
Builder.getInt32(VF.getKnownMinValue() - 1), "vector.recur.init");
|
||||
}
|
||||
|
||||
// We constructed a temporary phi node in the first phase of vectorization.
|
||||
@ -3813,11 +3821,11 @@ void InnerLoopVectorizer::fixFirstOrderRecurrence(PHINode *Phi) {
|
||||
|
||||
// We will construct a vector for the recurrence by combining the values for
|
||||
// the current and previous iterations. This is the required shuffle mask.
|
||||
assert(!VF.Scalable);
|
||||
SmallVector<int, 8> ShuffleMask(VF.Min);
|
||||
ShuffleMask[0] = VF.Min - 1;
|
||||
for (unsigned I = 1; I < VF.Min; ++I)
|
||||
ShuffleMask[I] = I + VF.Min - 1;
|
||||
assert(!VF.isScalable());
|
||||
SmallVector<int, 8> ShuffleMask(VF.getKnownMinValue());
|
||||
ShuffleMask[0] = VF.getKnownMinValue() - 1;
|
||||
for (unsigned I = 1; I < VF.getKnownMinValue(); ++I)
|
||||
ShuffleMask[I] = I + VF.getKnownMinValue() - 1;
|
||||
|
||||
// The vector from which to take the initial value for the current iteration
|
||||
// (actual or unrolled). Initially, this is the vector phi node.
|
||||
@ -3846,7 +3854,8 @@ void InnerLoopVectorizer::fixFirstOrderRecurrence(PHINode *Phi) {
|
||||
if (VF.isVector()) {
|
||||
Builder.SetInsertPoint(LoopMiddleBlock->getTerminator());
|
||||
ExtractForScalar = Builder.CreateExtractElement(
|
||||
ExtractForScalar, Builder.getInt32(VF.Min - 1), "vector.recur.extract");
|
||||
ExtractForScalar, Builder.getInt32(VF.getKnownMinValue() - 1),
|
||||
"vector.recur.extract");
|
||||
}
|
||||
// Extract the second last element in the middle block if the
|
||||
// Phi is used outside the loop. We need to extract the phi itself
|
||||
@ -3856,7 +3865,8 @@ void InnerLoopVectorizer::fixFirstOrderRecurrence(PHINode *Phi) {
|
||||
Value *ExtractForPhiUsedOutsideLoop = nullptr;
|
||||
if (VF.isVector())
|
||||
ExtractForPhiUsedOutsideLoop = Builder.CreateExtractElement(
|
||||
Incoming, Builder.getInt32(VF.Min - 2), "vector.recur.extract.for.phi");
|
||||
Incoming, Builder.getInt32(VF.getKnownMinValue() - 2),
|
||||
"vector.recur.extract.for.phi");
|
||||
// When loop is unrolled without vectorizing, initialize
|
||||
// ExtractForPhiUsedOutsideLoop with the value just prior to unrolled value of
|
||||
// `Incoming`. This is analogous to the vectorized case above: extracting the
|
||||
@ -4013,7 +4023,7 @@ void InnerLoopVectorizer::fixReduction(PHINode *Phi) {
|
||||
// entire expression in the smaller type.
|
||||
if (VF.isVector() && Phi->getType() != RdxDesc.getRecurrenceType()) {
|
||||
assert(!IsInLoopReductionPhi && "Unexpected truncated inloop reduction!");
|
||||
assert(!VF.Scalable && "scalable vectors not yet supported.");
|
||||
assert(!VF.isScalable() && "scalable vectors not yet supported.");
|
||||
Type *RdxVecTy = VectorType::get(RdxDesc.getRecurrenceType(), VF);
|
||||
Builder.SetInsertPoint(
|
||||
LI->getLoopFor(LoopVectorBody)->getLoopLatch()->getTerminator());
|
||||
@ -4145,7 +4155,7 @@ void InnerLoopVectorizer::clearReductionWrapFlags(
|
||||
}
|
||||
|
||||
void InnerLoopVectorizer::fixLCSSAPHIs() {
|
||||
assert(!VF.Scalable && "the code below assumes fixed width vectors");
|
||||
assert(!VF.isScalable() && "the code below assumes fixed width vectors");
|
||||
for (PHINode &LCSSAPhi : LoopExitBlock->phis()) {
|
||||
if (LCSSAPhi.getNumIncomingValues() == 1) {
|
||||
auto *IncomingValue = LCSSAPhi.getIncomingValue(0);
|
||||
@ -4155,7 +4165,7 @@ void InnerLoopVectorizer::fixLCSSAPHIs() {
|
||||
LastLane = Cost->isUniformAfterVectorization(
|
||||
cast<Instruction>(IncomingValue), VF)
|
||||
? 0
|
||||
: VF.Min - 1;
|
||||
: VF.getKnownMinValue() - 1;
|
||||
// Can be a loop invariant incoming value or the last scalar value to be
|
||||
// extracted from the vectorized loop.
|
||||
Builder.SetInsertPoint(LoopMiddleBlock->getTerminator());
|
||||
@ -4338,7 +4348,7 @@ void InnerLoopVectorizer::widenGEP(GetElementPtrInst *GEP, VPUser &Operands,
|
||||
|
||||
void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN, unsigned UF,
|
||||
ElementCount VF) {
|
||||
assert(!VF.Scalable && "scalable vectors not yet supported.");
|
||||
assert(!VF.isScalable() && "scalable vectors not yet supported.");
|
||||
PHINode *P = cast<PHINode>(PN);
|
||||
if (EnableVPlanNativePath) {
|
||||
// Currently we enter here in the VPlan-native path for non-induction
|
||||
@ -4403,11 +4413,12 @@ void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN, unsigned UF,
|
||||
// Determine the number of scalars we need to generate for each unroll
|
||||
// iteration. If the instruction is uniform, we only need to generate the
|
||||
// first lane. Otherwise, we generate all VF values.
|
||||
unsigned Lanes = Cost->isUniformAfterVectorization(P, VF) ? 1 : VF.Min;
|
||||
unsigned Lanes =
|
||||
Cost->isUniformAfterVectorization(P, VF) ? 1 : VF.getKnownMinValue();
|
||||
for (unsigned Part = 0; Part < UF; ++Part) {
|
||||
for (unsigned Lane = 0; Lane < Lanes; ++Lane) {
|
||||
Constant *Idx =
|
||||
ConstantInt::get(PtrInd->getType(), Lane + Part * VF.Min);
|
||||
Constant *Idx = ConstantInt::get(PtrInd->getType(),
|
||||
Lane + Part * VF.getKnownMinValue());
|
||||
Value *GlobalIdx = Builder.CreateAdd(PtrInd, Idx);
|
||||
Value *SclrGep =
|
||||
emitTransformedIndex(Builder, GlobalIdx, PSE.getSE(), DL, II);
|
||||
@ -4437,8 +4448,9 @@ void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN, unsigned UF,
|
||||
Exp.expandCodeFor(ScalarStep, PhiType, InductionLoc);
|
||||
Value *InductionGEP = GetElementPtrInst::Create(
|
||||
ScStValueType->getPointerElementType(), NewPointerPhi,
|
||||
Builder.CreateMul(ScalarStepValue,
|
||||
ConstantInt::get(PhiType, VF.Min * UF)),
|
||||
Builder.CreateMul(
|
||||
ScalarStepValue,
|
||||
ConstantInt::get(PhiType, VF.getKnownMinValue() * UF)),
|
||||
"ptr.ind", InductionLoc);
|
||||
NewPointerPhi->addIncoming(InductionGEP, LoopLatch);
|
||||
|
||||
@ -4448,15 +4460,17 @@ void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN, unsigned UF,
|
||||
for (unsigned Part = 0; Part < UF; ++Part) {
|
||||
SmallVector<Constant *, 8> Indices;
|
||||
// Create a vector of consecutive numbers from zero to VF.
|
||||
for (unsigned i = 0; i < VF.Min; ++i)
|
||||
Indices.push_back(ConstantInt::get(PhiType, i + Part * VF.Min));
|
||||
for (unsigned i = 0; i < VF.getKnownMinValue(); ++i)
|
||||
Indices.push_back(
|
||||
ConstantInt::get(PhiType, i + Part * VF.getKnownMinValue()));
|
||||
Constant *StartOffset = ConstantVector::get(Indices);
|
||||
|
||||
Value *GEP = Builder.CreateGEP(
|
||||
ScStValueType->getPointerElementType(), NewPointerPhi,
|
||||
Builder.CreateMul(StartOffset,
|
||||
Builder.CreateVectorSplat(VF.Min, ScalarStepValue),
|
||||
"vector.gep"));
|
||||
Builder.CreateMul(
|
||||
StartOffset,
|
||||
Builder.CreateVectorSplat(VF.getKnownMinValue(), ScalarStepValue),
|
||||
"vector.gep"));
|
||||
VectorLoopValueMap.setVectorValue(P, Part, GEP);
|
||||
}
|
||||
}
|
||||
@ -4483,7 +4497,7 @@ static bool mayDivideByZero(Instruction &I) {
|
||||
|
||||
void InnerLoopVectorizer::widenInstruction(Instruction &I, VPUser &User,
|
||||
VPTransformState &State) {
|
||||
assert(!VF.Scalable && "scalable vectors not yet supported.");
|
||||
assert(!VF.isScalable() && "scalable vectors not yet supported.");
|
||||
switch (I.getOpcode()) {
|
||||
case Instruction::Call:
|
||||
case Instruction::Br:
|
||||
@ -4571,7 +4585,7 @@ void InnerLoopVectorizer::widenInstruction(Instruction &I, VPUser &User,
|
||||
setDebugLocFromInst(Builder, CI);
|
||||
|
||||
/// Vectorize casts.
|
||||
assert(!VF.Scalable && "VF is assumed to be non scalable.");
|
||||
assert(!VF.isScalable() && "VF is assumed to be non scalable.");
|
||||
Type *DestTy =
|
||||
(VF.isScalar()) ? CI->getType() : VectorType::get(CI->getType(), VF);
|
||||
|
||||
@ -4601,7 +4615,7 @@ void InnerLoopVectorizer::widenCallInstruction(CallInst &I, VPUser &ArgOperands,
|
||||
|
||||
SmallVector<Type *, 4> Tys;
|
||||
for (Value *ArgOperand : CI->arg_operands())
|
||||
Tys.push_back(ToVectorTy(ArgOperand->getType(), VF.Min));
|
||||
Tys.push_back(ToVectorTy(ArgOperand->getType(), VF.getKnownMinValue()));
|
||||
|
||||
Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI);
|
||||
|
||||
@ -4633,7 +4647,7 @@ void InnerLoopVectorizer::widenCallInstruction(CallInst &I, VPUser &ArgOperands,
|
||||
// Use vector version of the intrinsic.
|
||||
Type *TysForDecl[] = {CI->getType()};
|
||||
if (VF.isVector()) {
|
||||
assert(!VF.Scalable && "VF is assumed to be non scalable.");
|
||||
assert(!VF.isScalable() && "VF is assumed to be non scalable.");
|
||||
TysForDecl[0] = VectorType::get(CI->getType()->getScalarType(), VF);
|
||||
}
|
||||
VectorF = Intrinsic::getDeclaration(M, ID, TysForDecl);
|
||||
@ -4872,7 +4886,7 @@ void LoopVectorizationCostModel::collectLoopScalars(ElementCount VF) {
|
||||
|
||||
bool LoopVectorizationCostModel::isScalarWithPredication(Instruction *I,
|
||||
ElementCount VF) {
|
||||
assert(!VF.Scalable && "scalable vectors not yet supported.");
|
||||
assert(!VF.isScalable() && "scalable vectors not yet supported.");
|
||||
if (!blockNeedsPredication(I->getParent()))
|
||||
return false;
|
||||
switch(I->getOpcode()) {
|
||||
@ -5357,7 +5371,7 @@ LoopVectorizationCostModel::computeFeasibleMaxVF(unsigned ConstTripCount) {
|
||||
Selected = false;
|
||||
}
|
||||
if (Selected) {
|
||||
MaxVF = VFs[i].Min;
|
||||
MaxVF = VFs[i].getKnownMinValue();
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -5558,8 +5572,9 @@ unsigned LoopVectorizationCostModel::selectInterleaveCount(ElementCount VF,
|
||||
}
|
||||
|
||||
// Clamp the interleave ranges to reasonable counts.
|
||||
assert(!VF.Scalable && "scalable vectors not yet supported.");
|
||||
unsigned MaxInterleaveCount = TTI.getMaxInterleaveFactor(VF.Min);
|
||||
assert(!VF.isScalable() && "scalable vectors not yet supported.");
|
||||
unsigned MaxInterleaveCount =
|
||||
TTI.getMaxInterleaveFactor(VF.getKnownMinValue());
|
||||
|
||||
// Check if the user has overridden the max.
|
||||
if (VF == 1) {
|
||||
@ -5573,7 +5588,8 @@ unsigned LoopVectorizationCostModel::selectInterleaveCount(ElementCount VF,
|
||||
// If trip count is known or estimated compile time constant, limit the
|
||||
// interleave count to be less than the trip count divided by VF.
|
||||
if (BestKnownTC) {
|
||||
MaxInterleaveCount = std::min(*BestKnownTC / VF.Min, MaxInterleaveCount);
|
||||
MaxInterleaveCount =
|
||||
std::min(*BestKnownTC / VF.getKnownMinValue(), MaxInterleaveCount);
|
||||
}
|
||||
|
||||
// If we did not calculate the cost for VF (because the user selected the VF)
|
||||
@ -5745,8 +5761,9 @@ LoopVectorizationCostModel::calculateRegisterUsage(ArrayRef<ElementCount> VFs) {
|
||||
if (Ty->isTokenTy())
|
||||
return 0U;
|
||||
unsigned TypeSize = DL.getTypeSizeInBits(Ty->getScalarType());
|
||||
assert(!VF.Scalable && "scalable vectors not yet supported.");
|
||||
return std::max<unsigned>(1, VF.Min * TypeSize / WidestRegister);
|
||||
assert(!VF.isScalable() && "scalable vectors not yet supported.");
|
||||
return std::max<unsigned>(1, VF.getKnownMinValue() * TypeSize /
|
||||
WidestRegister);
|
||||
};
|
||||
|
||||
for (unsigned int i = 0, s = IdxToInstr.size(); i < s; ++i) {
|
||||
@ -5973,19 +5990,20 @@ int LoopVectorizationCostModel::computePredInstDiscount(
|
||||
// the instruction as if it wasn't if-converted and instead remained in the
|
||||
// predicated block. We will scale this cost by block probability after
|
||||
// computing the scalarization overhead.
|
||||
assert(!VF.Scalable && "scalable vectors not yet supported.");
|
||||
assert(!VF.isScalable() && "scalable vectors not yet supported.");
|
||||
unsigned ScalarCost =
|
||||
VF.Min * getInstructionCost(I, ElementCount::getFixed(1)).first;
|
||||
VF.getKnownMinValue() *
|
||||
getInstructionCost(I, ElementCount::getFixed(1)).first;
|
||||
|
||||
// Compute the scalarization overhead of needed insertelement instructions
|
||||
// and phi nodes.
|
||||
if (isScalarWithPredication(I) && !I->getType()->isVoidTy()) {
|
||||
ScalarCost += TTI.getScalarizationOverhead(
|
||||
cast<VectorType>(ToVectorTy(I->getType(), VF)),
|
||||
APInt::getAllOnesValue(VF.Min), true, false);
|
||||
assert(!VF.Scalable && "scalable vectors not yet supported.");
|
||||
APInt::getAllOnesValue(VF.getKnownMinValue()), true, false);
|
||||
assert(!VF.isScalable() && "scalable vectors not yet supported.");
|
||||
ScalarCost +=
|
||||
VF.Min *
|
||||
VF.getKnownMinValue() *
|
||||
TTI.getCFInstrCost(Instruction::PHI, TTI::TCK_RecipThroughput);
|
||||
}
|
||||
|
||||
@ -6000,10 +6018,10 @@ int LoopVectorizationCostModel::computePredInstDiscount(
|
||||
if (canBeScalarized(J))
|
||||
Worklist.push_back(J);
|
||||
else if (needsExtract(J, VF)) {
|
||||
assert(!VF.Scalable && "scalable vectors not yet supported.");
|
||||
assert(!VF.isScalable() && "scalable vectors not yet supported.");
|
||||
ScalarCost += TTI.getScalarizationOverhead(
|
||||
cast<VectorType>(ToVectorTy(J->getType(), VF)),
|
||||
APInt::getAllOnesValue(VF.Min), false, true);
|
||||
APInt::getAllOnesValue(VF.getKnownMinValue()), false, true);
|
||||
}
|
||||
}
|
||||
|
||||
@ -6021,7 +6039,7 @@ int LoopVectorizationCostModel::computePredInstDiscount(
|
||||
|
||||
LoopVectorizationCostModel::VectorizationCostTy
|
||||
LoopVectorizationCostModel::expectedCost(ElementCount VF) {
|
||||
assert(!VF.Scalable && "scalable vectors not yet supported.");
|
||||
assert(!VF.isScalable() && "scalable vectors not yet supported.");
|
||||
VectorizationCostTy Cost;
|
||||
|
||||
// For each block.
|
||||
@ -6104,7 +6122,7 @@ LoopVectorizationCostModel::getMemInstScalarizationCost(Instruction *I,
|
||||
ElementCount VF) {
|
||||
assert(VF.isVector() &&
|
||||
"Scalarization cost of instruction implies vectorization.");
|
||||
assert(!VF.Scalable && "scalable vectors not yet supported.");
|
||||
assert(!VF.isScalable() && "scalable vectors not yet supported.");
|
||||
Type *ValTy = getMemInstValueType(I);
|
||||
auto SE = PSE.getSE();
|
||||
|
||||
@ -6117,12 +6135,13 @@ LoopVectorizationCostModel::getMemInstScalarizationCost(Instruction *I,
|
||||
const SCEV *PtrSCEV = getAddressAccessSCEV(Ptr, Legal, PSE, TheLoop);
|
||||
|
||||
// Get the cost of the scalar memory instruction and address computation.
|
||||
unsigned Cost = VF.Min * TTI.getAddressComputationCost(PtrTy, SE, PtrSCEV);
|
||||
unsigned Cost =
|
||||
VF.getKnownMinValue() * TTI.getAddressComputationCost(PtrTy, SE, PtrSCEV);
|
||||
|
||||
// Don't pass *I here, since it is scalar but will actually be part of a
|
||||
// vectorized loop where the user of it is a vectorized instruction.
|
||||
const Align Alignment = getLoadStoreAlignment(I);
|
||||
Cost += VF.Min *
|
||||
Cost += VF.getKnownMinValue() *
|
||||
TTI.getMemoryOpCost(I->getOpcode(), ValTy->getScalarType(), Alignment,
|
||||
AS, TTI::TCK_RecipThroughput);
|
||||
|
||||
@ -6190,9 +6209,10 @@ unsigned LoopVectorizationCostModel::getUniformMemOpCost(Instruction *I,
|
||||
return TTI.getAddressComputationCost(ValTy) +
|
||||
TTI.getMemoryOpCost(Instruction::Store, ValTy, Alignment, AS,
|
||||
CostKind) +
|
||||
(isLoopInvariantStoreValue ? 0 : TTI.getVectorInstrCost(
|
||||
Instruction::ExtractElement,
|
||||
VectorTy, VF.Min - 1));
|
||||
(isLoopInvariantStoreValue
|
||||
? 0
|
||||
: TTI.getVectorInstrCost(Instruction::ExtractElement, VectorTy,
|
||||
VF.getKnownMinValue() - 1));
|
||||
}
|
||||
|
||||
unsigned LoopVectorizationCostModel::getGatherScatterCost(Instruction *I,
|
||||
@ -6218,7 +6238,7 @@ unsigned LoopVectorizationCostModel::getInterleaveGroupCost(Instruction *I,
|
||||
assert(Group && "Fail to get an interleaved access group.");
|
||||
|
||||
unsigned InterleaveFactor = Group->getFactor();
|
||||
assert(!VF.Scalable && "scalable vectors not yet supported.");
|
||||
assert(!VF.isScalable() && "scalable vectors not yet supported.");
|
||||
auto *WideVecTy = VectorType::get(ValTy, VF * InterleaveFactor);
|
||||
|
||||
// Holds the indices of existing members in an interleaved load group.
|
||||
@ -6266,7 +6286,7 @@ unsigned LoopVectorizationCostModel::getMemoryInstructionCost(Instruction *I,
|
||||
LoopVectorizationCostModel::VectorizationCostTy
|
||||
LoopVectorizationCostModel::getInstructionCost(Instruction *I,
|
||||
ElementCount VF) {
|
||||
assert(!VF.Scalable &&
|
||||
assert(!VF.isScalable() &&
|
||||
"the cost model is not yet implemented for scalable vectorization");
|
||||
// If we know that this instruction will remain uniform, check the cost of
|
||||
// the scalar version.
|
||||
@ -6282,22 +6302,24 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I,
|
||||
auto InstSet = ForcedScalar->second;
|
||||
if (InstSet.count(I))
|
||||
return VectorizationCostTy(
|
||||
(getInstructionCost(I, ElementCount::getFixed(1)).first * VF.Min),
|
||||
(getInstructionCost(I, ElementCount::getFixed(1)).first *
|
||||
VF.getKnownMinValue()),
|
||||
false);
|
||||
}
|
||||
|
||||
Type *VectorTy;
|
||||
unsigned C = getInstructionCost(I, VF, VectorTy);
|
||||
|
||||
bool TypeNotScalarized = VF.isVector() && VectorTy->isVectorTy() &&
|
||||
TTI.getNumberOfParts(VectorTy) < VF.Min;
|
||||
bool TypeNotScalarized =
|
||||
VF.isVector() && VectorTy->isVectorTy() &&
|
||||
TTI.getNumberOfParts(VectorTy) < VF.getKnownMinValue();
|
||||
return VectorizationCostTy(C, TypeNotScalarized);
|
||||
}
|
||||
|
||||
unsigned LoopVectorizationCostModel::getScalarizationOverhead(Instruction *I,
|
||||
ElementCount VF) {
|
||||
|
||||
assert(!VF.Scalable &&
|
||||
assert(!VF.isScalable() &&
|
||||
"cannot compute scalarization overhead for scalable vectorization");
|
||||
if (VF.isScalar())
|
||||
return 0;
|
||||
@ -6307,7 +6329,8 @@ unsigned LoopVectorizationCostModel::getScalarizationOverhead(Instruction *I,
|
||||
if (!RetTy->isVoidTy() &&
|
||||
(!isa<LoadInst>(I) || !TTI.supportsEfficientVectorElementLoadStore()))
|
||||
Cost += TTI.getScalarizationOverhead(
|
||||
cast<VectorType>(RetTy), APInt::getAllOnesValue(VF.Min), true, false);
|
||||
cast<VectorType>(RetTy), APInt::getAllOnesValue(VF.getKnownMinValue()),
|
||||
true, false);
|
||||
|
||||
// Some targets keep addresses scalar.
|
||||
if (isa<LoadInst>(I) && !TTI.prefersVectorizedAddressing())
|
||||
@ -6323,13 +6346,12 @@ unsigned LoopVectorizationCostModel::getScalarizationOverhead(Instruction *I,
|
||||
|
||||
// Skip operands that do not require extraction/scalarization and do not incur
|
||||
// any overhead.
|
||||
return Cost +
|
||||
TTI.getOperandsScalarizationOverhead(filterExtractingOperands(Ops, VF),
|
||||
VF.Min);
|
||||
return Cost + TTI.getOperandsScalarizationOverhead(
|
||||
filterExtractingOperands(Ops, VF), VF.getKnownMinValue());
|
||||
}
|
||||
|
||||
void LoopVectorizationCostModel::setCostBasedWideningDecision(ElementCount VF) {
|
||||
assert(!VF.Scalable && "scalable vectors not yet supported.");
|
||||
assert(!VF.isScalable() && "scalable vectors not yet supported.");
|
||||
if (VF.isScalar())
|
||||
return;
|
||||
NumPredStores = 0;
|
||||
@ -6466,14 +6488,15 @@ void LoopVectorizationCostModel::setCostBasedWideningDecision(ElementCount VF) {
|
||||
// Scalarize a widened load of address.
|
||||
setWideningDecision(
|
||||
I, VF, CM_Scalarize,
|
||||
(VF.Min * getMemoryInstructionCost(I, ElementCount::getFixed(1))));
|
||||
(VF.getKnownMinValue() *
|
||||
getMemoryInstructionCost(I, ElementCount::getFixed(1))));
|
||||
else if (auto Group = getInterleavedAccessGroup(I)) {
|
||||
// Scalarize an interleave group of address loads.
|
||||
for (unsigned I = 0; I < Group->getFactor(); ++I) {
|
||||
if (Instruction *Member = Group->getMember(I))
|
||||
setWideningDecision(
|
||||
Member, VF, CM_Scalarize,
|
||||
(VF.Min *
|
||||
(VF.getKnownMinValue() *
|
||||
getMemoryInstructionCost(Member, ElementCount::getFixed(1))));
|
||||
}
|
||||
}
|
||||
@ -6515,12 +6538,14 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I,
|
||||
|
||||
if (ScalarPredicatedBB) {
|
||||
// Return cost for branches around scalarized and predicated blocks.
|
||||
assert(!VF.Scalable && "scalable vectors not yet supported.");
|
||||
assert(!VF.isScalable() && "scalable vectors not yet supported.");
|
||||
auto *Vec_i1Ty =
|
||||
VectorType::get(IntegerType::getInt1Ty(RetTy->getContext()), VF);
|
||||
return (TTI.getScalarizationOverhead(
|
||||
Vec_i1Ty, APInt::getAllOnesValue(VF.Min), false, true) +
|
||||
(TTI.getCFInstrCost(Instruction::Br, CostKind) * VF.Min));
|
||||
Vec_i1Ty, APInt::getAllOnesValue(VF.getKnownMinValue()),
|
||||
false, true) +
|
||||
(TTI.getCFInstrCost(Instruction::Br, CostKind) *
|
||||
VF.getKnownMinValue()));
|
||||
} else if (I->getParent() == TheLoop->getLoopLatch() || VF.isScalar())
|
||||
// The back-edge branch will remain, as will all scalar branches.
|
||||
return TTI.getCFInstrCost(Instruction::Br, CostKind);
|
||||
@ -6537,9 +6562,9 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I,
|
||||
// First-order recurrences are replaced by vector shuffles inside the loop.
|
||||
// NOTE: Don't use ToVectorTy as SK_ExtractSubvector expects a vector type.
|
||||
if (VF.isVector() && Legal->isFirstOrderRecurrence(Phi))
|
||||
return TTI.getShuffleCost(TargetTransformInfo::SK_ExtractSubvector,
|
||||
cast<VectorType>(VectorTy), VF.Min - 1,
|
||||
FixedVectorType::get(RetTy, 1));
|
||||
return TTI.getShuffleCost(
|
||||
TargetTransformInfo::SK_ExtractSubvector, cast<VectorType>(VectorTy),
|
||||
VF.getKnownMinValue() - 1, FixedVectorType::get(RetTy, 1));
|
||||
|
||||
// Phi nodes in non-header blocks (not inductions, reductions, etc.) are
|
||||
// converted into select instructions. We require N - 1 selects per phi
|
||||
@ -6568,11 +6593,12 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I,
|
||||
// that we will create. This cost is likely to be zero. The phi node
|
||||
// cost, if any, should be scaled by the block probability because it
|
||||
// models a copy at the end of each predicated block.
|
||||
Cost += VF.Min * TTI.getCFInstrCost(Instruction::PHI, CostKind);
|
||||
Cost += VF.getKnownMinValue() *
|
||||
TTI.getCFInstrCost(Instruction::PHI, CostKind);
|
||||
|
||||
// The cost of the non-predicated instruction.
|
||||
Cost +=
|
||||
VF.Min * TTI.getArithmeticInstrCost(I->getOpcode(), RetTy, CostKind);
|
||||
Cost += VF.getKnownMinValue() *
|
||||
TTI.getArithmeticInstrCost(I->getOpcode(), RetTy, CostKind);
|
||||
|
||||
// The cost of insertelement and extractelement instructions needed for
|
||||
// scalarization.
|
||||
@ -6611,15 +6637,15 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I,
|
||||
Op2VK = TargetTransformInfo::OK_UniformValue;
|
||||
|
||||
SmallVector<const Value *, 4> Operands(I->operand_values());
|
||||
unsigned N = isScalarAfterVectorization(I, VF) ? VF.Min : 1;
|
||||
unsigned N = isScalarAfterVectorization(I, VF) ? VF.getKnownMinValue() : 1;
|
||||
return N * TTI.getArithmeticInstrCost(
|
||||
I->getOpcode(), VectorTy, CostKind,
|
||||
TargetTransformInfo::OK_AnyValue,
|
||||
Op2VK, TargetTransformInfo::OP_None, Op2VP, Operands, I);
|
||||
}
|
||||
case Instruction::FNeg: {
|
||||
assert(!VF.Scalable && "VF is assumed to be non scalable.");
|
||||
unsigned N = isScalarAfterVectorization(I, VF) ? VF.Min : 1;
|
||||
assert(!VF.isScalable() && "VF is assumed to be non scalable.");
|
||||
unsigned N = isScalarAfterVectorization(I, VF) ? VF.getKnownMinValue() : 1;
|
||||
return N * TTI.getArithmeticInstrCost(
|
||||
I->getOpcode(), VectorTy, CostKind,
|
||||
TargetTransformInfo::OK_AnyValue,
|
||||
@ -6633,7 +6659,7 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I,
|
||||
bool ScalarCond = (SE->isLoopInvariant(CondSCEV, TheLoop));
|
||||
Type *CondTy = SI->getCondition()->getType();
|
||||
if (!ScalarCond) {
|
||||
assert(!VF.Scalable && "VF is assumed to be non scalable.");
|
||||
assert(!VF.isScalable() && "VF is assumed to be non scalable.");
|
||||
CondTy = VectorType::get(CondTy, VF);
|
||||
}
|
||||
return TTI.getCmpSelInstrCost(I->getOpcode(), VectorTy, CondTy,
|
||||
@ -6745,8 +6771,8 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I,
|
||||
}
|
||||
}
|
||||
|
||||
assert(!VF.Scalable && "VF is assumed to be non scalable");
|
||||
unsigned N = isScalarAfterVectorization(I, VF) ? VF.Min : 1;
|
||||
assert(!VF.isScalable() && "VF is assumed to be non scalable");
|
||||
unsigned N = isScalarAfterVectorization(I, VF) ? VF.getKnownMinValue() : 1;
|
||||
return N *
|
||||
TTI.getCastInstrCost(Opcode, VectorTy, SrcVecTy, CCH, CostKind, I);
|
||||
}
|
||||
@ -6761,9 +6787,8 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I,
|
||||
default:
|
||||
// The cost of executing VF copies of the scalar instruction. This opcode
|
||||
// is unknown. Assume that it is the same as 'mul'.
|
||||
return VF.Min *
|
||||
TTI.getArithmeticInstrCost(Instruction::Mul, VectorTy,
|
||||
CostKind) +
|
||||
return VF.getKnownMinValue() * TTI.getArithmeticInstrCost(
|
||||
Instruction::Mul, VectorTy, CostKind) +
|
||||
getScalarizationOverhead(I, VF);
|
||||
} // end of switch.
|
||||
}
|
||||
@ -6870,7 +6895,7 @@ static unsigned determineVPlanVF(const unsigned WidestVectorRegBits,
|
||||
|
||||
VectorizationFactor
|
||||
LoopVectorizationPlanner::planInVPlanNativePath(ElementCount UserVF) {
|
||||
assert(!UserVF.Scalable && "scalable vectors not yet supported");
|
||||
assert(!UserVF.isScalable() && "scalable vectors not yet supported");
|
||||
ElementCount VF = UserVF;
|
||||
// Outer loop handling: They may require CFG and instruction level
|
||||
// transformations before even evaluating whether vectorization is profitable.
|
||||
@ -6892,10 +6917,11 @@ LoopVectorizationPlanner::planInVPlanNativePath(ElementCount UserVF) {
|
||||
}
|
||||
}
|
||||
assert(EnableVPlanNativePath && "VPlan-native path is not enabled.");
|
||||
assert(isPowerOf2_32(VF.Min) && "VF needs to be a power of two");
|
||||
assert(isPowerOf2_32(VF.getKnownMinValue()) &&
|
||||
"VF needs to be a power of two");
|
||||
LLVM_DEBUG(dbgs() << "LV: Using " << (!UserVF.isZero() ? "user " : "")
|
||||
<< "VF " << VF << " to build VPlans.\n");
|
||||
buildVPlans(VF.Min, VF.Min);
|
||||
buildVPlans(VF.getKnownMinValue(), VF.getKnownMinValue());
|
||||
|
||||
// For VPlan build stress testing, we bail out after VPlan construction.
|
||||
if (VPlanBuildStressTest)
|
||||
@ -6912,9 +6938,10 @@ LoopVectorizationPlanner::planInVPlanNativePath(ElementCount UserVF) {
|
||||
|
||||
Optional<VectorizationFactor>
|
||||
LoopVectorizationPlanner::plan(ElementCount UserVF, unsigned UserIC) {
|
||||
assert(!UserVF.Scalable && "scalable vectorization not yet handled");
|
||||
assert(!UserVF.isScalable() && "scalable vectorization not yet handled");
|
||||
assert(OrigLoop->empty() && "Inner loop expected.");
|
||||
Optional<unsigned> MaybeMaxVF = CM.computeMaxVF(UserVF.Min, UserIC);
|
||||
Optional<unsigned> MaybeMaxVF =
|
||||
CM.computeMaxVF(UserVF.getKnownMinValue(), UserIC);
|
||||
if (!MaybeMaxVF) // Cases that should not to be vectorized nor interleaved.
|
||||
return None;
|
||||
|
||||
@ -6934,12 +6961,14 @@ LoopVectorizationPlanner::plan(ElementCount UserVF, unsigned UserIC) {
|
||||
|
||||
if (!UserVF.isZero()) {
|
||||
LLVM_DEBUG(dbgs() << "LV: Using user VF " << UserVF << ".\n");
|
||||
assert(isPowerOf2_32(UserVF.Min) && "VF needs to be a power of two");
|
||||
assert(isPowerOf2_32(UserVF.getKnownMinValue()) &&
|
||||
"VF needs to be a power of two");
|
||||
// Collect the instructions (and their associated costs) that will be more
|
||||
// profitable to scalarize.
|
||||
CM.selectUserVectorizationFactor(UserVF);
|
||||
CM.collectInLoopReductions();
|
||||
buildVPlansWithVPRecipes(UserVF.Min, UserVF.Min);
|
||||
buildVPlansWithVPRecipes(UserVF.getKnownMinValue(),
|
||||
UserVF.getKnownMinValue());
|
||||
LLVM_DEBUG(printPlans(dbgs()));
|
||||
return {{UserVF, 0}};
|
||||
}
|
||||
@ -7228,7 +7257,7 @@ VPRecipeBuilder::tryToWidenMemory(Instruction *I, VFRange &Range,
|
||||
"Must be called with either a load or store");
|
||||
|
||||
auto willWiden = [&](ElementCount VF) -> bool {
|
||||
assert(!VF.Scalable && "unexpected scalable ElementCount");
|
||||
assert(!VF.isScalable() && "unexpected scalable ElementCount");
|
||||
if (VF.isScalar())
|
||||
return false;
|
||||
LoopVectorizationCostModel::InstWidening Decision =
|
||||
@ -7762,7 +7791,7 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
|
||||
ElementCount VF = ElementCount::getFixed(Range.Start);
|
||||
Plan->addVF(VF);
|
||||
RSO << "Initial VPlan for VF={" << VF;
|
||||
for (VF.Min *= 2; VF.Min < Range.End; VF.Min *= 2) {
|
||||
for (VF *= 2; VF.getKnownMinValue() < Range.End; VF *= 2) {
|
||||
Plan->addVF(VF);
|
||||
RSO << "," << VF;
|
||||
}
|
||||
@ -7986,7 +8015,7 @@ void VPReplicateRecipe::execute(VPTransformState &State) {
|
||||
if (AlsoPack && State.VF.isVector()) {
|
||||
// If we're constructing lane 0, initialize to start from undef.
|
||||
if (State.Instance->Lane == 0) {
|
||||
assert(!State.VF.Scalable && "VF is assumed to be non scalable.");
|
||||
assert(!State.VF.isScalable() && "VF is assumed to be non scalable.");
|
||||
Value *Undef =
|
||||
UndefValue::get(VectorType::get(Ingredient->getType(), State.VF));
|
||||
State.ValueMap.setVectorValue(Ingredient, State.Instance->Part, Undef);
|
||||
@ -7999,7 +8028,7 @@ void VPReplicateRecipe::execute(VPTransformState &State) {
|
||||
// Generate scalar instances for all VF lanes of all UF parts, unless the
|
||||
// instruction is uniform inwhich case generate only the first lane for each
|
||||
// of the UF parts.
|
||||
unsigned EndLane = IsUniform ? 1 : State.VF.Min;
|
||||
unsigned EndLane = IsUniform ? 1 : State.VF.getKnownMinValue();
|
||||
for (unsigned Part = 0; Part < State.UF; ++Part)
|
||||
for (unsigned Lane = 0; Lane < EndLane; ++Lane)
|
||||
State.ILV->scalarizeInstruction(Ingredient, User, {Part, Lane},
|
||||
|
@ -300,8 +300,9 @@ void VPRegionBlock::execute(VPTransformState *State) {
|
||||
|
||||
for (unsigned Part = 0, UF = State->UF; Part < UF; ++Part) {
|
||||
State->Instance->Part = Part;
|
||||
assert(!State->VF.Scalable && "VF is assumed to be non scalable.");
|
||||
for (unsigned Lane = 0, VF = State->VF.Min; Lane < VF; ++Lane) {
|
||||
assert(!State->VF.isScalable() && "VF is assumed to be non scalable.");
|
||||
for (unsigned Lane = 0, VF = State->VF.getKnownMinValue(); Lane < VF;
|
||||
++Lane) {
|
||||
State->Instance->Lane = Lane;
|
||||
// Visit the VPBlocks connected to \p this, starting from it.
|
||||
for (VPBlockBase *Block : RPOT) {
|
||||
@ -388,7 +389,7 @@ void VPInstruction::generateInstruction(VPTransformState &State,
|
||||
Value *ScalarTC = State.TripCount;
|
||||
|
||||
auto *Int1Ty = Type::getInt1Ty(Builder.getContext());
|
||||
auto *PredTy = FixedVectorType::get(Int1Ty, State.VF.Min);
|
||||
auto *PredTy = FixedVectorType::get(Int1Ty, State.VF.getKnownMinValue());
|
||||
Instruction *Call = Builder.CreateIntrinsic(
|
||||
Intrinsic::get_active_lane_mask, {PredTy, ScalarTC->getType()},
|
||||
{VIVElem0, ScalarTC}, nullptr, "active.lane.mask");
|
||||
@ -840,14 +841,16 @@ void VPWidenCanonicalIVRecipe::execute(VPTransformState &State) {
|
||||
Type *STy = CanonicalIV->getType();
|
||||
IRBuilder<> Builder(State.CFG.PrevBB->getTerminator());
|
||||
ElementCount VF = State.VF;
|
||||
assert(!VF.Scalable && "the code following assumes non scalables ECs");
|
||||
Value *VStart = VF.isScalar() ? CanonicalIV
|
||||
: Builder.CreateVectorSplat(VF.Min, CanonicalIV,
|
||||
"broadcast");
|
||||
assert(!VF.isScalable() && "the code following assumes non scalables ECs");
|
||||
Value *VStart = VF.isScalar()
|
||||
? CanonicalIV
|
||||
: Builder.CreateVectorSplat(VF.getKnownMinValue(),
|
||||
CanonicalIV, "broadcast");
|
||||
for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) {
|
||||
SmallVector<Constant *, 8> Indices;
|
||||
for (unsigned Lane = 0; Lane < VF.Min; ++Lane)
|
||||
Indices.push_back(ConstantInt::get(STy, Part * VF.Min + Lane));
|
||||
for (unsigned Lane = 0; Lane < VF.getKnownMinValue(); ++Lane)
|
||||
Indices.push_back(
|
||||
ConstantInt::get(STy, Part * VF.getKnownMinValue() + Lane));
|
||||
// If VF == 1, there is only one iteration in the loop above, thus the
|
||||
// element pushed back into Indices is ConstantInt::get(STy, Part)
|
||||
Constant *VStep = VF == 1 ? Indices.back() : ConstantVector::get(Indices);
|
||||
|
@ -151,14 +151,15 @@ public:
|
||||
/// \return True if the map has a scalar entry for \p Key and \p Instance.
|
||||
bool hasScalarValue(Value *Key, const VPIteration &Instance) const {
|
||||
assert(Instance.Part < UF && "Queried Scalar Part is too large.");
|
||||
assert(Instance.Lane < VF.Min && "Queried Scalar Lane is too large.");
|
||||
assert(!VF.Scalable && "VF is assumed to be non scalable.");
|
||||
assert(Instance.Lane < VF.getKnownMinValue() &&
|
||||
"Queried Scalar Lane is too large.");
|
||||
assert(!VF.isScalable() && "VF is assumed to be non scalable.");
|
||||
|
||||
if (!hasAnyScalarValue(Key))
|
||||
return false;
|
||||
const ScalarParts &Entry = ScalarMapStorage.find(Key)->second;
|
||||
assert(Entry.size() == UF && "ScalarParts has wrong dimensions.");
|
||||
assert(Entry[Instance.Part].size() == VF.Min &&
|
||||
assert(Entry[Instance.Part].size() == VF.getKnownMinValue() &&
|
||||
"ScalarParts has wrong dimensions.");
|
||||
return Entry[Instance.Part][Instance.Lane] != nullptr;
|
||||
}
|
||||
@ -197,7 +198,7 @@ public:
|
||||
// TODO: Consider storing uniform values only per-part, as they occupy
|
||||
// lane 0 only, keeping the other VF-1 redundant entries null.
|
||||
for (unsigned Part = 0; Part < UF; ++Part)
|
||||
Entry[Part].resize(VF.Min, nullptr);
|
||||
Entry[Part].resize(VF.getKnownMinValue(), nullptr);
|
||||
ScalarMapStorage[Key] = Entry;
|
||||
}
|
||||
ScalarMapStorage[Key][Instance.Part][Instance.Lane] = Scalar;
|
||||
|
@ -71,8 +71,8 @@ TEST(ScalableVectorMVTsTest, HelperFuncs) {
|
||||
|
||||
// Check fields inside llvm::ElementCount
|
||||
EltCnt = Vnx4i32.getVectorElementCount();
|
||||
EXPECT_EQ(EltCnt.Min, 4U);
|
||||
ASSERT_TRUE(EltCnt.Scalable);
|
||||
EXPECT_EQ(EltCnt.getKnownMinValue(), 4U);
|
||||
ASSERT_TRUE(EltCnt.isScalable());
|
||||
|
||||
// Check that fixed-length vector types aren't scalable.
|
||||
EVT V8i32 = EVT::getVectorVT(Ctx, MVT::i32, 8);
|
||||
@ -82,8 +82,8 @@ TEST(ScalableVectorMVTsTest, HelperFuncs) {
|
||||
|
||||
// Check that llvm::ElementCount works for fixed-length types.
|
||||
EltCnt = V8i32.getVectorElementCount();
|
||||
EXPECT_EQ(EltCnt.Min, 8U);
|
||||
ASSERT_FALSE(EltCnt.Scalable);
|
||||
EXPECT_EQ(EltCnt.getKnownMinValue(), 8U);
|
||||
ASSERT_FALSE(EltCnt.isScalable());
|
||||
}
|
||||
|
||||
TEST(ScalableVectorMVTsTest, IRToVTTranslation) {
|
||||
|
@ -119,8 +119,8 @@ TEST(VectorTypesTest, FixedLength) {
|
||||
EXPECT_EQ(ConvTy->getElementType()->getScalarSizeInBits(), 64U);
|
||||
|
||||
EltCnt = V8Int64Ty->getElementCount();
|
||||
EXPECT_EQ(EltCnt.Min, 8U);
|
||||
ASSERT_FALSE(EltCnt.Scalable);
|
||||
EXPECT_EQ(EltCnt.getKnownMinValue(), 8U);
|
||||
ASSERT_FALSE(EltCnt.isScalable());
|
||||
}
|
||||
|
||||
TEST(VectorTypesTest, Scalable) {
|
||||
@ -215,8 +215,8 @@ TEST(VectorTypesTest, Scalable) {
|
||||
EXPECT_EQ(ConvTy->getElementType()->getScalarSizeInBits(), 64U);
|
||||
|
||||
EltCnt = ScV8Int64Ty->getElementCount();
|
||||
EXPECT_EQ(EltCnt.Min, 8U);
|
||||
ASSERT_TRUE(EltCnt.Scalable);
|
||||
EXPECT_EQ(EltCnt.getKnownMinValue(), 8U);
|
||||
ASSERT_TRUE(EltCnt.isScalable());
|
||||
}
|
||||
|
||||
TEST(VectorTypesTest, BaseVectorType) {
|
||||
@ -250,7 +250,7 @@ TEST(VectorTypesTest, BaseVectorType) {
|
||||
// test I == J
|
||||
VectorType *VI = VTys[I];
|
||||
ElementCount ECI = VI->getElementCount();
|
||||
EXPECT_EQ(isa<ScalableVectorType>(VI), ECI.Scalable);
|
||||
EXPECT_EQ(isa<ScalableVectorType>(VI), ECI.isScalable());
|
||||
|
||||
for (size_t J = I + 1, JEnd = VTys.size(); J < JEnd; ++J) {
|
||||
// test I < J
|
||||
|
Loading…
Reference in New Issue
Block a user