[SVE] Remove calls to VectorType::getNumElements from Transforms/Vectorize

Reviewed By: spatel Differential Revision: https://reviews.llvm.org/D82056
2024-12-14 19:49:36 +00:00 · 2020-08-27 11:19:46 -07:00 · 2020-08-27 11:19:46 -07:00 · 5e63083435
commit 5e63083435
parent a1bdf8f545
4 changed files with 35 additions and 32 deletions
--- a/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
@ -1027,8 +1027,8 @@ bool Vectorizer::vectorizeStoreChain(
  unsigned EltSzInBytes = Sz / 8;
  unsigned SzInBytes = EltSzInBytes * ChainSize;

-  VectorType *VecTy;
-  VectorType *VecStoreTy = dyn_cast<VectorType>(StoreTy);
+  FixedVectorType *VecTy;
+  auto *VecStoreTy = dyn_cast<FixedVectorType>(StoreTy);
  if (VecStoreTy)
    VecTy = FixedVectorType::get(StoreTy->getScalarType(),
                                 Chain.size() * VecStoreTy->getNumElements());
@ -1180,7 +1180,7 @@ bool Vectorizer::vectorizeLoadChain(
  unsigned EltSzInBytes = Sz / 8;
  unsigned SzInBytes = EltSzInBytes * ChainSize;
  VectorType *VecTy;
-  VectorType *VecLoadTy = dyn_cast<VectorType>(LoadTy);
+  auto *VecLoadTy = dyn_cast<FixedVectorType>(LoadTy);
  if (VecLoadTy)
    VecTy = FixedVectorType::get(LoadTy->getScalarType(),
                                 Chain.size() * VecLoadTy->getNumElements());
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@ -2035,7 +2035,7 @@ void InnerLoopVectorizer::widenIntOrFpInduction(PHINode *IV, TruncInst *Trunc) {
 Value *InnerLoopVectorizer::getStepVector(Value *Val, int StartIdx, Value *Step,
                                          Instruction::BinaryOps BinOp) {
  // Create and check the types.
-  auto *ValVTy = cast<VectorType>(Val->getType());
+  auto *ValVTy = cast<FixedVectorType>(Val->getType());
  int VLen = ValVTy->getNumElements();

  Type *STy = Val->getType()->getScalarType();
@ -2799,19 +2799,18 @@ Value *InnerLoopVectorizer::getOrCreateVectorTripCount(Loop *L) {
 Value *InnerLoopVectorizer::createBitOrPointerCast(Value *V, VectorType *DstVTy,
                                                   const DataLayout &DL) {
  // Verify that V is a vector type with same number of elements as DstVTy.
-  assert(isa<FixedVectorType>(DstVTy) &&
-         "Vector type is assumed to be fixed width.");
-  unsigned VF = DstVTy->getNumElements();
-  VectorType *SrcVecTy = cast<VectorType>(V->getType());
+  auto *DstFVTy = cast<FixedVectorType>(DstVTy);
+  unsigned VF = DstFVTy->getNumElements();
+  auto *SrcVecTy = cast<FixedVectorType>(V->getType());
  assert((VF == SrcVecTy->getNumElements()) && "Vector dimensions do not match");
  Type *SrcElemTy = SrcVecTy->getElementType();
-  Type *DstElemTy = DstVTy->getElementType();
+  Type *DstElemTy = DstFVTy->getElementType();
  assert((DL.getTypeSizeInBits(SrcElemTy) == DL.getTypeSizeInBits(DstElemTy)) &&
         "Vector elements must have same size");

  // Do a direct cast if element types are castable.
  if (CastInst::isBitOrNoopPointerCastable(SrcElemTy, DstElemTy, DL)) {
-    return Builder.CreateBitOrPointerCast(V, DstVTy);
+    return Builder.CreateBitOrPointerCast(V, DstFVTy);
  }
  // V cannot be directly casted to desired vector type.
  // May happen when V is a floating point vector but DstVTy is a vector of
@ -2825,7 +2824,7 @@ Value *InnerLoopVectorizer::createBitOrPointerCast(Value *V, VectorType *DstVTy,
      IntegerType::getIntNTy(V->getContext(), DL.getTypeSizeInBits(SrcElemTy));
  auto *VecIntTy = FixedVectorType::get(IntTy, VF);
  Value *CastVal = Builder.CreateBitOrPointerCast(V, VecIntTy);
-  return Builder.CreateBitOrPointerCast(CastVal, DstVTy);
+  return Builder.CreateBitOrPointerCast(CastVal, DstFVTy);
 }

 void InnerLoopVectorizer::emitMinimumIterationCountCheck(Loop *L,
@ -3526,7 +3525,8 @@ void InnerLoopVectorizer::truncateToMinimalBitwidths() {
      Type *ScalarTruncatedTy =
          IntegerType::get(OriginalTy->getContext(), KV.second);
      auto *TruncatedTy = FixedVectorType::get(
-          ScalarTruncatedTy, cast<VectorType>(OriginalTy)->getNumElements());
+          ScalarTruncatedTy,
+          cast<FixedVectorType>(OriginalTy)->getNumElements());
      if (TruncatedTy == OriginalTy)
        continue;

@ -3576,13 +3576,13 @@ void InnerLoopVectorizer::truncateToMinimalBitwidths() {
          break;
        }
      } else if (auto *SI = dyn_cast<ShuffleVectorInst>(I)) {
-        auto Elements0 =
-            cast<VectorType>(SI->getOperand(0)->getType())->getNumElements();
+        auto Elements0 = cast<FixedVectorType>(SI->getOperand(0)->getType())
+                             ->getNumElements();
        auto *O0 = B.CreateZExtOrTrunc(
            SI->getOperand(0),
            FixedVectorType::get(ScalarTruncatedTy, Elements0));
-        auto Elements1 =
-            cast<VectorType>(SI->getOperand(1)->getType())->getNumElements();
+        auto Elements1 = cast<FixedVectorType>(SI->getOperand(1)->getType())
+                             ->getNumElements();
        auto *O1 = B.CreateZExtOrTrunc(
            SI->getOperand(1),
            FixedVectorType::get(ScalarTruncatedTy, Elements1));
@ -3592,16 +3592,16 @@ void InnerLoopVectorizer::truncateToMinimalBitwidths() {
        // Don't do anything with the operands, just extend the result.
        continue;
      } else if (auto *IE = dyn_cast<InsertElementInst>(I)) {
-        auto Elements =
-            cast<VectorType>(IE->getOperand(0)->getType())->getNumElements();
+        auto Elements = cast<FixedVectorType>(IE->getOperand(0)->getType())
+                            ->getNumElements();
        auto *O0 = B.CreateZExtOrTrunc(
            IE->getOperand(0),
            FixedVectorType::get(ScalarTruncatedTy, Elements));
        auto *O1 = B.CreateZExtOrTrunc(IE->getOperand(1), ScalarTruncatedTy);
        NewI = B.CreateInsertElement(O0, O1, IE->getOperand(2));
      } else if (auto *EE = dyn_cast<ExtractElementInst>(I)) {
-        auto Elements =
-            cast<VectorType>(EE->getOperand(0)->getType())->getNumElements();
+        auto Elements = cast<FixedVectorType>(EE->getOperand(0)->getType())
+                            ->getNumElements();
        auto *O0 = B.CreateZExtOrTrunc(
            EE->getOperand(0),
            FixedVectorType::get(ScalarTruncatedTy, Elements));
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@ -286,7 +286,8 @@ static bool isCommutative(Instruction *I) {
 static Optional<TargetTransformInfo::ShuffleKind>
 isShuffle(ArrayRef<Value *> VL) {
  auto *EI0 = cast<ExtractElementInst>(VL[0]);
-  unsigned Size = EI0->getVectorOperandType()->getNumElements();
+  unsigned Size =
+      cast<FixedVectorType>(EI0->getVectorOperandType())->getNumElements();
  Value *Vec1 = nullptr;
  Value *Vec2 = nullptr;
  enum ShuffleMode { Unknown, Select, Permute };
@ -295,7 +296,7 @@ isShuffle(ArrayRef<Value *> VL) {
    auto *EI = cast<ExtractElementInst>(VL[I]);
    auto *Vec = EI->getVectorOperand();
    // All vector operands must have the same number of vector elements.
-    if (cast<VectorType>(Vec->getType())->getNumElements() != Size)
+    if (cast<FixedVectorType>(Vec->getType())->getNumElements() != Size)
      return None;
    auto *Idx = dyn_cast<ConstantInt>(EI->getIndexOperand());
    if (!Idx)
@ -1411,7 +1412,7 @@ private:

  /// \returns the scalarization cost for this type. Scalarization in this
  /// context means the creation of vectors from a group of scalars.
-  int getGatherCost(VectorType *Ty,
+  int getGatherCost(FixedVectorType *Ty,
                    const DenseSet<unsigned> &ShuffledIndices) const;

  /// \returns the scalarization cost for this list of values. Assuming that
@ -1424,7 +1425,7 @@ private:
  void setInsertPointAfterBundle(TreeEntry *E);

  /// \returns a vector from a collection of scalars in \p VL.
-  Value *Gather(ArrayRef<Value *> VL, VectorType *Ty);
+  Value *Gather(ArrayRef<Value *> VL, FixedVectorType *Ty);

  /// \returns whether the VectorizableTree is fully vectorizable and will
  /// be beneficial even the tree height is tiny.
@ -3166,7 +3167,7 @@ unsigned BoUpSLP::canMapToVector(Type *T, const DataLayout &DL) const {
      N *= AT->getNumElements();
      EltTy = AT->getElementType();
    } else {
-      auto *VT = cast<VectorType>(EltTy);
+      auto *VT = cast<FixedVectorType>(EltTy);
      N *= VT->getNumElements();
      EltTy = VT->getElementType();
    }
@ -3204,7 +3205,7 @@ bool BoUpSLP::canReuseExtract(ArrayRef<Value *> VL, Value *OpValue,
    if (!LI || !LI->isSimple() || !LI->hasNUses(VL.size()))
      return false;
  } else {
-    NElts = cast<VectorType>(Vec->getType())->getNumElements();
+    NElts = cast<FixedVectorType>(Vec->getType())->getNumElements();
  }

  if (NElts != VL.size())
@ -3255,8 +3256,8 @@ bool BoUpSLP::areAllUsersVectorized(Instruction *I) const {
 }

 static std::pair<unsigned, unsigned>
-getVectorCallCosts(CallInst *CI, VectorType *VecTy, TargetTransformInfo *TTI,
-                   TargetLibraryInfo *TLI) {
+getVectorCallCosts(CallInst *CI, FixedVectorType *VecTy,
+                   TargetTransformInfo *TTI, TargetLibraryInfo *TLI) {
  Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI);

  // Calculate the cost of the scalar and vector calls.
@ -3928,7 +3929,7 @@ int BoUpSLP::getTreeCost() {
  return Cost;
 }

-int BoUpSLP::getGatherCost(VectorType *Ty,
+int BoUpSLP::getGatherCost(FixedVectorType *Ty,
                           const DenseSet<unsigned> &ShuffledIndices) const {
  unsigned NumElts = Ty->getNumElements();
  APInt DemandedElts = APInt::getNullValue(NumElts);
@ -4041,7 +4042,7 @@ void BoUpSLP::setInsertPointAfterBundle(TreeEntry *E) {
  Builder.SetCurrentDebugLocation(Front->getDebugLoc());
 }

-Value *BoUpSLP::Gather(ArrayRef<Value *> VL, VectorType *Ty) {
+Value *BoUpSLP::Gather(ArrayRef<Value *> VL, FixedVectorType *Ty) {
  Value *Vec = UndefValue::get(Ty);
  // Generate the 'InsertElement' instruction.
  for (unsigned i = 0; i < Ty->getNumElements(); ++i) {
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@ -437,8 +437,10 @@ bool VectorCombine::foldBitcastShuf(Instruction &I) {
      TTI.getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, SrcTy))
    return false;

-  unsigned DestNumElts = DestTy->getNumElements();
-  unsigned SrcNumElts = SrcTy->getNumElements();
+  // FIXME: it should be possible to implement the computation of the widened
+  // shuffle mask in terms of ElementCount to work with scalable shuffles.
+  unsigned DestNumElts = cast<FixedVectorType>(DestTy)->getNumElements();
+  unsigned SrcNumElts = cast<FixedVectorType>(SrcTy)->getNumElements();
  SmallVector<int, 16> NewMask;
  if (SrcNumElts <= DestNumElts) {
    // The bitcast is from wide to narrow/equal elements. The shuffle mask can