Pass shufflevector indices as int instead of unsigned.

No functionality change intended.
2025-05-13 09:26:43 +00:00 · 2020-04-15 15:52:26 +02:00 · 2020-04-15 15:52:26 +02:00 · 316b49d373
commit 316b49d373
parent cb1ee34e9d
3 changed files with 57 additions and 59 deletions
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@ -11747,7 +11747,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
    // Splat the 8-bits of immediate 4 times to help the loop wrap around.
    Imm = (Imm & 0xff) * 0x01010101;

-    uint32_t Indices[16];
+    int Indices[16];
    for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
      for (unsigned i = 0; i != NumLaneElts; ++i) {
        unsigned Index = Imm % NumLaneElts;
--- a/llvm/lib/IR/AutoUpgrade.cpp
+++ b/llvm/lib/IR/AutoUpgrade.cpp
@ -912,7 +912,7 @@ static Value *UpgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder,
  // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
  // we'll just return the zero vector.
  if (Shift < 16) {
-    uint32_t Idxs[64];
+    int Idxs[64];
    // 256/512-bit version is split into 2/4 16-byte lanes.
    for (unsigned l = 0; l != NumElts; l += 16)
      for (unsigned i = 0; i != 16; ++i) {
@ -946,7 +946,7 @@ static Value *UpgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, Value *Op,
  // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
  // we'll just return the zero vector.
  if (Shift < 16) {
-    uint32_t Idxs[64];
+    int Idxs[64];
    // 256/512-bit version is split into 2/4 16-byte lanes.
    for (unsigned l = 0; l != NumElts; l += 16)
      for (unsigned i = 0; i != 16; ++i) {
@ -972,7 +972,7 @@ static Value *getX86MaskVec(IRBuilder<> &Builder, Value *Mask,
  // If we have less than 8 elements, then the starting mask was an i8 and
  // we need to extract down to the right number of elements.
  if (NumElts < 8) {
-    uint32_t Indices[4];
+    int Indices[4];
    for (unsigned i = 0; i != NumElts; ++i)
      Indices[i] = i;
    Mask = Builder.CreateShuffleVector(Mask, Mask,
@ -1041,7 +1041,7 @@ static Value *UpgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0,
    Op0 = llvm::Constant::getNullValue(Op0->getType());
  }

-  uint32_t Indices[64];
+  int Indices[64];
  // 256-bit palignr operates on 128-bit lanes so we need to handle that
  for (unsigned l = 0; l < NumElts; l += 16) {
    for (unsigned i = 0; i != 16; ++i) {
@ -1352,7 +1352,7 @@ static Value *ApplyX86MaskOn1BitsVec(IRBuilder<> &Builder, Value *Vec,
  }

  if (NumElts < 8) {
-    uint32_t Indices[8];
+    int Indices[8];
    for (unsigned i = 0; i != NumElts; ++i)
      Indices[i] = i;
    for (unsigned i = NumElts; i != 8; ++i)
@ -1878,7 +1878,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
      unsigned NumElts = CI->getType()->getScalarSizeInBits();
      Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), NumElts);
      Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), NumElts);
-      uint32_t Indices[64];
+      int Indices[64];
      for (unsigned i = 0; i != NumElts; ++i)
        Indices[i] = i;

@ -2127,8 +2127,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
      unsigned NumDstElts = DstTy->getNumElements();
      if (NumDstElts < SrcTy->getNumElements()) {
        assert(NumDstElts == 2 && "Unexpected vector size");
-        uint32_t ShuffleMask[2] = { 0, 1 };
-        Rep = Builder.CreateShuffleVector(Rep, Rep, ShuffleMask);
+        Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1});
      }

      bool IsPS2PD = SrcTy->getElementType()->isFloatTy();
@ -2159,8 +2158,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
      unsigned NumDstElts = DstTy->getNumElements();
      if (NumDstElts != SrcTy->getNumElements()) {
        assert(NumDstElts == 4 && "Unexpected vector size");
-        uint32_t ShuffleMask[4] = {0, 1, 2, 3};
-        Rep = Builder.CreateShuffleVector(Rep, Rep, ShuffleMask);
+        Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1, 2, 3});
      }
      Rep = Builder.CreateBitCast(
          Rep, VectorType::get(Type::getHalfTy(C), NumDstElts));
@ -2310,7 +2308,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
      unsigned NumDstElts = DstTy->getNumElements();

      // Extract a subvector of the first NumDstElts lanes and sign/zero extend.
-      SmallVector<uint32_t, 8> ShuffleMask(NumDstElts);
+      SmallVector<int, 8> ShuffleMask(NumDstElts);
      for (unsigned i = 0; i != NumDstElts; ++i)
        ShuffleMask[i] = i;

@ -2356,7 +2354,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
      unsigned NumElementsInLane = 128 / VT->getScalarSizeInBits();
      unsigned ControlBitsMask = NumLanes - 1;
      unsigned NumControlBits = NumLanes / 2;
-      SmallVector<uint32_t, 8> ShuffleMask(0);
+      SmallVector<int, 8> ShuffleMask(0);

      for (unsigned l = 0; l != NumLanes; ++l) {
        unsigned LaneMask = (Imm >> (l * NumControlBits)) & ControlBitsMask;
@ -2376,7 +2374,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
          cast<VectorType>(CI->getArgOperand(0)->getType())->getNumElements();
      unsigned NumDstElts = cast<VectorType>(CI->getType())->getNumElements();

-      SmallVector<uint32_t, 8> ShuffleMask(NumDstElts);
+      SmallVector<int, 8> ShuffleMask(NumDstElts);
      for (unsigned i = 0; i != NumDstElts; ++i)
        ShuffleMask[i] = i % NumSrcElts;

@ -2466,7 +2464,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
      VectorType *VecTy = cast<VectorType>(CI->getType());
      unsigned NumElts = VecTy->getNumElements();

-      SmallVector<uint32_t, 16> Idxs(NumElts);
+      SmallVector<int, 16> Idxs(NumElts);
      for (unsigned i = 0; i != NumElts; ++i)
        Idxs[i] = ((Imm >> (i%8)) & 1) ? i + NumElts : i;

@ -2486,7 +2484,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {

      // Extend the second operand into a vector the size of the destination.
      Value *UndefV = UndefValue::get(Op1->getType());
-      SmallVector<uint32_t, 8> Idxs(DstNumElts);
+      SmallVector<int, 8> Idxs(DstNumElts);
      for (unsigned i = 0; i != SrcNumElts; ++i)
        Idxs[i] = i;
      for (unsigned i = SrcNumElts; i != DstNumElts; ++i)
@ -2529,7 +2527,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
      Imm = Imm % Scale;

      // Get indexes for the subvector of the input vector.
-      SmallVector<uint32_t, 8> Idxs(DstNumElts);
+      SmallVector<int, 8> Idxs(DstNumElts);
      for (unsigned i = 0; i != DstNumElts; ++i) {
        Idxs[i] = i + (Imm * DstNumElts);
      }
@ -2548,7 +2546,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
      VectorType *VecTy = cast<VectorType>(CI->getType());
      unsigned NumElts = VecTy->getNumElements();

-      SmallVector<uint32_t, 8> Idxs(NumElts);
+      SmallVector<int, 8> Idxs(NumElts);
      for (unsigned i = 0; i != NumElts; ++i)
        Idxs[i] = (i & ~0x3) + ((Imm >> (2 * (i & 0x3))) & 3);

@ -2571,7 +2569,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {

      unsigned NumElts = cast<VectorType>(CI->getType())->getNumElements();
      unsigned HalfSize = NumElts / 2;
-      SmallVector<uint32_t, 8> ShuffleMask(NumElts);
+      SmallVector<int, 8> ShuffleMask(NumElts);

      // Determine which operand(s) are actually in use for this instruction.
      Value *V0 = (Imm & 0x02) ? CI->getArgOperand(1) : CI->getArgOperand(0);
@ -2605,7 +2603,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
      unsigned IdxSize = 64 / VecTy->getScalarSizeInBits();
      unsigned IdxMask = ((1 << IdxSize) - 1);

-      SmallVector<uint32_t, 8> Idxs(NumElts);
+      SmallVector<int, 8> Idxs(NumElts);
      // Lookup the bits for this element, wrapping around the immediate every
      // 8-bits. Elements are grouped into sets of 2 or 4 elements so we need
      // to offset by the first index of each group.
@ -2623,7 +2621,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
      unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
      unsigned NumElts = cast<VectorType>(CI->getType())->getNumElements();

-      SmallVector<uint32_t, 16> Idxs(NumElts);
+      SmallVector<int, 16> Idxs(NumElts);
      for (unsigned l = 0; l != NumElts; l += 8) {
        for (unsigned i = 0; i != 4; ++i)
          Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l;
@ -2642,7 +2640,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
      unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
      unsigned NumElts = cast<VectorType>(CI->getType())->getNumElements();

-      SmallVector<uint32_t, 16> Idxs(NumElts);
+      SmallVector<int, 16> Idxs(NumElts);
      for (unsigned l = 0; l != NumElts; l += 8) {
        for (unsigned i = 0; i != 4; ++i)
          Idxs[i + l] = i + l;
@ -2664,7 +2662,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
      unsigned NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
      unsigned HalfLaneElts = NumLaneElts / 2;

-      SmallVector<uint32_t, 16> Idxs(NumElts);
+      SmallVector<int, 16> Idxs(NumElts);
      for (unsigned i = 0; i != NumElts; ++i) {
        // Base index is the starting element of the lane.
        Idxs[i] = i - (i % NumLaneElts);
@ -2691,7 +2689,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
      if (Name.startswith("avx512.mask.movshdup."))
        Offset = 1;

-      SmallVector<uint32_t, 16> Idxs(NumElts);
+      SmallVector<int, 16> Idxs(NumElts);
      for (unsigned l = 0; l != NumElts; l += NumLaneElts)
        for (unsigned i = 0; i != NumLaneElts; i += 2) {
          Idxs[i + l + 0] = i + l + Offset;
@ -2709,7 +2707,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
      int NumElts = cast<VectorType>(CI->getType())->getNumElements();
      int NumLaneElts = 128/CI->getType()->getScalarSizeInBits();

-      SmallVector<uint32_t, 64> Idxs(NumElts);
+      SmallVector<int, 64> Idxs(NumElts);
      for (int l = 0; l != NumElts; l += NumLaneElts)
        for (int i = 0; i != NumLaneElts; ++i)
          Idxs[i + l] = l + (i / 2) + NumElts * (i % 2);
@ -2725,7 +2723,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
      int NumElts = cast<VectorType>(CI->getType())->getNumElements();
      int NumLaneElts = 128/CI->getType()->getScalarSizeInBits();

-      SmallVector<uint32_t, 64> Idxs(NumElts);
+      SmallVector<int, 64> Idxs(NumElts);
      for (int l = 0; l != NumElts; l += NumLaneElts)
        for (int i = 0; i != NumLaneElts; ++i)
          Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2);
@ -3304,7 +3302,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
        if (IsSubAdd)
          std::swap(Even, Odd);

-        SmallVector<uint32_t, 32> Idxs(NumElts);
+        SmallVector<int, 32> Idxs(NumElts);
        for (int i = 0; i != NumElts; ++i)
          Idxs[i] = i + (i % 2) * NumElts;

--- a/llvm/lib/Target/X86/X86InterleavedAccess.cpp
+++ b/llvm/lib/Target/X86/X86InterleavedAccess.cpp
@ -229,11 +229,11 @@ static MVT scaleVectorType(MVT VT) {
                          VT.getVectorNumElements() / 2);
 }

-static uint32_t Concat[] = {
-  0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
-  16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
-  32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
-  48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63 };
+static constexpr int Concat[] = {
+    0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
+    16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+    32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
+    48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63};

 // genShuffleBland - Creates shuffle according to two vectors.This function is
 // only works on instructions with lane inside 256 registers. According to
@ -251,9 +251,9 @@ static uint32_t Concat[] = {
 // By computing the shuffle on a sequence of 16 elements(one lane) and add the
 // correct offset. We are creating a vpsuffed + blend sequence between two
 // shuffles.
-static void genShuffleBland(MVT VT, ArrayRef<uint32_t> Mask,
-  SmallVectorImpl<uint32_t> &Out, int LowOffset,
-  int HighOffset) {
+static void genShuffleBland(MVT VT, ArrayRef<int> Mask,
+                            SmallVectorImpl<int> &Out, int LowOffset,
+                            int HighOffset) {
  assert(VT.getSizeInBits() >= 256 &&
    "This function doesn't accept width smaller then 256");
  unsigned NumOfElm = VT.getVectorNumElements();
@ -282,9 +282,9 @@ static void genShuffleBland(MVT VT, ArrayRef<uint32_t> Mask,
 // Invec[2] -  |2|5|8|11|     TransposedMatrix[2] - |8|9|10|11|

 static void reorderSubVector(MVT VT, SmallVectorImpl<Value *> &TransposedMatrix,
-  ArrayRef<Value *> Vec, ArrayRef<uint32_t> VPShuf,
-  unsigned VecElems, unsigned Stride,
-  IRBuilder<> &Builder) {
+                             ArrayRef<Value *> Vec, ArrayRef<int> VPShuf,
+                             unsigned VecElems, unsigned Stride,
+                             IRBuilder<> &Builder) {

  if (VecElems == 16) {
    for (unsigned i = 0; i < Stride; i++)
@ -293,7 +293,7 @@ static void reorderSubVector(MVT VT, SmallVectorImpl<Value *> &TransposedMatrix,
    return;
  }

-  SmallVector<uint32_t, 32> OptimizeShuf;
+  SmallVector<int, 32> OptimizeShuf;
  Value *Temp[8];

  for (unsigned i = 0; i < (VecElems / 16) * Stride; i += 2) {
@ -433,7 +433,7 @@ void X86InterleavedAccessGroup::interleave8bitStride4(
 //  For example shuffle pattern for VF 16 register size 256 -> lanes = 2
 //  {<[0|3|6|1|4|7|2|5]-[8|11|14|9|12|15|10|13]>}
 static void createShuffleStride(MVT VT, int Stride,
-                                SmallVectorImpl<uint32_t> &Mask) {
+                                SmallVectorImpl<int> &Mask) {
  int VectorSize = VT.getSizeInBits();
  int VF = VT.getVectorNumElements();
  int LaneCount = std::max(VectorSize / 128, 1);
@ -446,7 +446,7 @@ static void createShuffleStride(MVT VT, int Stride,
 //  inside mask a shuffleMask. A mask contains exactly 3 groups, where
 //  each group is a monotonically increasing sequence with stride 3.
 //  For example shuffleMask {0,3,6,1,4,7,2,5} => {3,3,2}
-static void setGroupSize(MVT VT, SmallVectorImpl<uint32_t> &SizeInfo) {
+static void setGroupSize(MVT VT, SmallVectorImpl<int> &SizeInfo) {
  int VectorSize = VT.getSizeInBits();
  int VF = VT.getVectorNumElements() / std::max(VectorSize / 128, 1);
  for (int i = 0, FirstGroupElement = 0; i < 3; i++) {
@ -470,7 +470,7 @@ static void setGroupSize(MVT VT, SmallVectorImpl<uint32_t> &SizeInfo) {
 //  direction of the alignment. (false - align to the "right" side while true -
 //  align to the "left" side)
 static void DecodePALIGNRMask(MVT VT, unsigned Imm,
-                              SmallVectorImpl<uint32_t> &ShuffleMask,
+                              SmallVectorImpl<int> &ShuffleMask,
                              bool AlignDirection = true, bool Unary = false) {
  unsigned NumElts = VT.getVectorNumElements();
  unsigned NumLanes = std::max((int)VT.getSizeInBits() / 128, 1);
@ -547,11 +547,11 @@ void X86InterleavedAccessGroup::deinterleave8bitStride3(
  // Matrix[2]= b5 c5 a6 b6 c6 a7 b7 c7

  TransposedMatrix.resize(3);
-  SmallVector<uint32_t, 32> VPShuf;
-  SmallVector<uint32_t, 32> VPAlign[2];
-  SmallVector<uint32_t, 32> VPAlign2;
-  SmallVector<uint32_t, 32> VPAlign3;
-  SmallVector<uint32_t, 3> GroupSize;
+  SmallVector<int, 32> VPShuf;
+  SmallVector<int, 32> VPAlign[2];
+  SmallVector<int, 32> VPAlign2;
+  SmallVector<int, 32> VPAlign3;
+  SmallVector<int, 3> GroupSize;
  Value *Vec[6], *TempVector[3];

  MVT VT = MVT::getVT(Shuffles[0]->getType());
@ -605,8 +605,8 @@ void X86InterleavedAccessGroup::deinterleave8bitStride3(
 // group2Shuffle reorder the shuffle stride back into continuous order.
 // For example For VF16 with Mask1 = {0,3,6,9,12,15,2,5,8,11,14,1,4,7,10,13} =>
 // MaskResult = {0,11,6,1,12,7,2,13,8,3,14,9,4,15,10,5}.
-static void group2Shuffle(MVT VT, SmallVectorImpl<uint32_t> &Mask,
-                          SmallVectorImpl<uint32_t> &Output) {
+static void group2Shuffle(MVT VT, SmallVectorImpl<int> &Mask,
+                          SmallVectorImpl<int> &Output) {
  int IndexGroup[3] = {0, 0, 0};
  int Index = 0;
  int VectorWidth = VT.getSizeInBits();
@ -633,11 +633,11 @@ void X86InterleavedAccessGroup::interleave8bitStride3(
  // Matrix[2]= c0 c1 c2 c3 c3 a7 b7 c7

  TransposedMatrix.resize(3);
-  SmallVector<uint32_t, 3> GroupSize;
-  SmallVector<uint32_t, 32> VPShuf;
-  SmallVector<uint32_t, 32> VPAlign[3];
-  SmallVector<uint32_t, 32> VPAlign2;
-  SmallVector<uint32_t, 32> VPAlign3;
+  SmallVector<int, 3> GroupSize;
+  SmallVector<int, 32> VPShuf;
+  SmallVector<int, 32> VPAlign[3];
+  SmallVector<int, 32> VPAlign2;
+  SmallVector<int, 32> VPAlign3;

  Value *Vec[3], *TempVector[3];
  MVT VT = MVT::getVectorVT(MVT::i8, VecElems);
@ -692,25 +692,25 @@ void X86InterleavedAccessGroup::transpose_4x4(
  TransposedMatrix.resize(4);

  // dst = src1[0,1],src2[0,1]
-  uint32_t IntMask1[] = {0, 1, 4, 5};
-  ArrayRef<uint32_t> Mask = makeArrayRef(IntMask1, 4);
+  static constexpr int IntMask1[] = {0, 1, 4, 5};
+  ArrayRef<int> Mask = makeArrayRef(IntMask1, 4);
  Value *IntrVec1 = Builder.CreateShuffleVector(Matrix[0], Matrix[2], Mask);
  Value *IntrVec2 = Builder.CreateShuffleVector(Matrix[1], Matrix[3], Mask);

  // dst = src1[2,3],src2[2,3]
-  uint32_t IntMask2[] = {2, 3, 6, 7};
+  static constexpr int IntMask2[] = {2, 3, 6, 7};
  Mask = makeArrayRef(IntMask2, 4);
  Value *IntrVec3 = Builder.CreateShuffleVector(Matrix[0], Matrix[2], Mask);
  Value *IntrVec4 = Builder.CreateShuffleVector(Matrix[1], Matrix[3], Mask);

  // dst = src1[0],src2[0],src1[2],src2[2]
-  uint32_t IntMask3[] = {0, 4, 2, 6};
+  static constexpr int IntMask3[] = {0, 4, 2, 6};
  Mask = makeArrayRef(IntMask3, 4);
  TransposedMatrix[0] = Builder.CreateShuffleVector(IntrVec1, IntrVec2, Mask);
  TransposedMatrix[2] = Builder.CreateShuffleVector(IntrVec3, IntrVec4, Mask);

  // dst = src1[1],src2[1],src1[3],src2[3]
-  uint32_t IntMask4[] = {1, 5, 3, 7};
+  static constexpr int IntMask4[] = {1, 5, 3, 7};
  Mask = makeArrayRef(IntMask4, 4);
  TransposedMatrix[1] = Builder.CreateShuffleVector(IntrVec1, IntrVec2, Mask);
  TransposedMatrix[3] = Builder.CreateShuffleVector(IntrVec3, IntrVec4, Mask);