mirror of
https://github.com/RPCSX/llvm.git
synced 2025-02-23 22:52:48 +00:00
[X86] Use APInt instead of SmallBitVector for tracking Zeroable elements in shuffle lowering
Summary: SmallBitVector uses a malloc for more than 58 bits on a 64-bit target and more than 27 bits on a 32-bit target. Some of the vector types we deal with here use more than those number of elements and therefore cause a malloc. APInt on the other hand supports up to 64 bits without a malloc. That's the maximum number of bits we need here so we can avoid a malloc for all cases by using APInt. Reviewers: RKSimon Reviewed By: RKSimon Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D30390 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@296354 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
72dbe0cc0e
commit
b84e77adb3
@ -8060,9 +8060,9 @@ static SDValue getV4X86ShuffleImm8ForMask(ArrayRef<int> Mask, const SDLoc &DL,
|
||||
/// zero. Many x86 shuffles can zero lanes cheaply and we often want to handle
|
||||
/// as many lanes with this technique as possible to simplify the remaining
|
||||
/// shuffle.
|
||||
static SmallBitVector computeZeroableShuffleElements(ArrayRef<int> Mask,
|
||||
SDValue V1, SDValue V2) {
|
||||
SmallBitVector Zeroable(Mask.size(), false);
|
||||
static APInt computeZeroableShuffleElements(ArrayRef<int> Mask,
|
||||
SDValue V1, SDValue V2) {
|
||||
APInt Zeroable(Mask.size(), 0);
|
||||
V1 = peekThroughBitcasts(V1);
|
||||
V2 = peekThroughBitcasts(V2);
|
||||
|
||||
@ -8077,7 +8077,7 @@ static SmallBitVector computeZeroableShuffleElements(ArrayRef<int> Mask,
|
||||
int M = Mask[i];
|
||||
// Handle the easy cases.
|
||||
if (M < 0 || (M >= 0 && M < Size && V1IsZero) || (M >= Size && V2IsZero)) {
|
||||
Zeroable[i] = true;
|
||||
Zeroable.setBit(i);
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -8095,17 +8095,19 @@ static SmallBitVector computeZeroableShuffleElements(ArrayRef<int> Mask,
|
||||
int Scale = Size / V->getNumOperands();
|
||||
SDValue Op = V.getOperand(M / Scale);
|
||||
if (Op.isUndef() || X86::isZeroNode(Op))
|
||||
Zeroable[i] = true;
|
||||
Zeroable.setBit(i);
|
||||
else if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
|
||||
APInt Val = Cst->getAPIntValue();
|
||||
Val = Val.lshr((M % Scale) * ScalarSizeInBits);
|
||||
Val = Val.getLoBits(ScalarSizeInBits);
|
||||
Zeroable[i] = (Val == 0);
|
||||
if (Val == 0)
|
||||
Zeroable.setBit(i);
|
||||
} else if (ConstantFPSDNode *Cst = dyn_cast<ConstantFPSDNode>(Op)) {
|
||||
APInt Val = Cst->getValueAPF().bitcastToAPInt();
|
||||
Val = Val.lshr((M % Scale) * ScalarSizeInBits);
|
||||
Val = Val.getLoBits(ScalarSizeInBits);
|
||||
Zeroable[i] = (Val == 0);
|
||||
if (Val == 0)
|
||||
Zeroable.setBit(i);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
@ -8119,7 +8121,8 @@ static SmallBitVector computeZeroableShuffleElements(ArrayRef<int> Mask,
|
||||
SDValue Op = V.getOperand((M * Scale) + j);
|
||||
AllZeroable &= (Op.isUndef() || X86::isZeroNode(Op));
|
||||
}
|
||||
Zeroable[i] = AllZeroable;
|
||||
if (AllZeroable)
|
||||
Zeroable.setBit(i);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
@ -8134,12 +8137,12 @@ static SmallBitVector computeZeroableShuffleElements(ArrayRef<int> Mask,
|
||||
//
|
||||
// The function looks for a sub-mask that the nonzero elements are in
|
||||
// increasing order. If such sub-mask exist. The function returns true.
|
||||
static bool isNonZeroElementsInOrder(const SmallBitVector &Zeroable,
|
||||
static bool isNonZeroElementsInOrder(const APInt &Zeroable,
|
||||
ArrayRef<int> Mask, const EVT &VectorType,
|
||||
bool &IsZeroSideLeft) {
|
||||
int NextElement = -1;
|
||||
// Check if the Mask's nonzero elements are in increasing order.
|
||||
for (int i = 0, e = Zeroable.size(); i < e; i++) {
|
||||
for (int i = 0, e = Mask.size(); i < e; i++) {
|
||||
// Checks if the mask's zeros elements are built from only zeros.
|
||||
assert(Mask[i] >= -1 && "Out of bound mask element!");
|
||||
if (Mask[i] < 0)
|
||||
@ -8163,7 +8166,7 @@ static bool isNonZeroElementsInOrder(const SmallBitVector &Zeroable,
|
||||
static SDValue lowerVectorShuffleWithPSHUFB(const SDLoc &DL, MVT VT,
|
||||
ArrayRef<int> Mask, SDValue V1,
|
||||
SDValue V2,
|
||||
const SmallBitVector &Zeroable,
|
||||
const APInt &Zeroable,
|
||||
const X86Subtarget &Subtarget,
|
||||
SelectionDAG &DAG) {
|
||||
int Size = Mask.size();
|
||||
@ -8218,19 +8221,9 @@ static SDValue getMaskNode(SDValue Mask, MVT MaskVT,
|
||||
const X86Subtarget &Subtarget, SelectionDAG &DAG,
|
||||
const SDLoc &dl);
|
||||
|
||||
// Function convertBitVectorToUnsigned - The function gets SmallBitVector
|
||||
// as argument and convert him to unsigned.
|
||||
// The output of the function is not(zeroable)
|
||||
static unsigned convertBitVectorToUnsigned(const SmallBitVector &Zeroable) {
|
||||
unsigned convertBit = 0;
|
||||
for (int i = 0, e = Zeroable.size(); i < e; i++)
|
||||
convertBit |= !(Zeroable[i]) << i;
|
||||
return convertBit;
|
||||
}
|
||||
|
||||
// X86 has dedicated shuffle that can be lowered to VEXPAND
|
||||
static SDValue lowerVectorShuffleToEXPAND(const SDLoc &DL, MVT VT,
|
||||
const SmallBitVector &Zeroable,
|
||||
const APInt &Zeroable,
|
||||
ArrayRef<int> Mask, SDValue &V1,
|
||||
SDValue &V2, SelectionDAG &DAG,
|
||||
const X86Subtarget &Subtarget) {
|
||||
@ -8238,7 +8231,7 @@ static SDValue lowerVectorShuffleToEXPAND(const SDLoc &DL, MVT VT,
|
||||
if (!isNonZeroElementsInOrder(Zeroable, Mask, V1.getValueType(),
|
||||
IsLeftZeroSide))
|
||||
return SDValue();
|
||||
unsigned VEXPANDMask = convertBitVectorToUnsigned(Zeroable);
|
||||
unsigned VEXPANDMask = (~Zeroable).getZExtValue();
|
||||
MVT IntegerType =
|
||||
MVT::getIntegerVT(std::max((int)VT.getVectorNumElements(), 8));
|
||||
SDValue MaskNode = DAG.getConstant(VEXPANDMask, DL, IntegerType);
|
||||
@ -8372,7 +8365,7 @@ static SDValue lowerVectorShuffleWithUNPCK(const SDLoc &DL, MVT VT,
|
||||
/// one of the inputs being zeroable.
|
||||
static SDValue lowerVectorShuffleAsBitMask(const SDLoc &DL, MVT VT, SDValue V1,
|
||||
SDValue V2, ArrayRef<int> Mask,
|
||||
const SmallBitVector &Zeroable,
|
||||
const APInt &Zeroable,
|
||||
SelectionDAG &DAG) {
|
||||
assert(!VT.isFloatingPoint() && "Floating point types are not supported");
|
||||
MVT EltVT = VT.getVectorElementType();
|
||||
@ -8441,7 +8434,7 @@ static SDValue getVectorMaskingNode(SDValue Op, SDValue Mask,
|
||||
/// that the shuffle mask is a blend, or convertible into a blend with zero.
|
||||
static SDValue lowerVectorShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1,
|
||||
SDValue V2, ArrayRef<int> Original,
|
||||
const SmallBitVector &Zeroable,
|
||||
const APInt &Zeroable,
|
||||
const X86Subtarget &Subtarget,
|
||||
SelectionDAG &DAG) {
|
||||
bool V1IsZero = ISD::isBuildVectorAllZeros(V1.getNode());
|
||||
@ -8899,7 +8892,7 @@ static SDValue lowerVectorShuffleAsRotate(const SDLoc &DL, MVT VT,
|
||||
static int matchVectorShuffleAsShift(MVT &ShiftVT, unsigned &Opcode,
|
||||
unsigned ScalarSizeInBits,
|
||||
ArrayRef<int> Mask, int MaskOffset,
|
||||
const SmallBitVector &Zeroable,
|
||||
const APInt &Zeroable,
|
||||
const X86Subtarget &Subtarget) {
|
||||
int Size = Mask.size();
|
||||
unsigned SizeInBits = Size * ScalarSizeInBits;
|
||||
@ -8961,7 +8954,7 @@ static int matchVectorShuffleAsShift(MVT &ShiftVT, unsigned &Opcode,
|
||||
|
||||
static SDValue lowerVectorShuffleAsShift(const SDLoc &DL, MVT VT, SDValue V1,
|
||||
SDValue V2, ArrayRef<int> Mask,
|
||||
const SmallBitVector &Zeroable,
|
||||
const APInt &Zeroable,
|
||||
const X86Subtarget &Subtarget,
|
||||
SelectionDAG &DAG) {
|
||||
int Size = Mask.size();
|
||||
@ -8997,12 +8990,12 @@ static SDValue lowerVectorShuffleAsShift(const SDLoc &DL, MVT VT, SDValue V1,
|
||||
/// \brief Try to lower a vector shuffle using SSE4a EXTRQ/INSERTQ.
|
||||
static SDValue lowerVectorShuffleWithSSE4A(const SDLoc &DL, MVT VT, SDValue V1,
|
||||
SDValue V2, ArrayRef<int> Mask,
|
||||
const SmallBitVector &Zeroable,
|
||||
const APInt &Zeroable,
|
||||
SelectionDAG &DAG) {
|
||||
int Size = Mask.size();
|
||||
int HalfSize = Size / 2;
|
||||
assert(Size == (int)VT.getVectorNumElements() && "Unexpected mask size");
|
||||
assert(!Zeroable.all() && "Fully zeroable shuffle mask");
|
||||
assert(!Zeroable.isAllOnesValue() && "Fully zeroable shuffle mask");
|
||||
|
||||
// Upper half must be undefined.
|
||||
if (!isUndefInRange(Mask, HalfSize, HalfSize))
|
||||
@ -9300,7 +9293,7 @@ static SDValue lowerVectorShuffleAsSpecificZeroOrAnyExtend(
|
||||
/// are both incredibly common and often quite performance sensitive.
|
||||
static SDValue lowerVectorShuffleAsZeroOrAnyExtend(
|
||||
const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask,
|
||||
const SmallBitVector &Zeroable, const X86Subtarget &Subtarget,
|
||||
const APInt &Zeroable, const X86Subtarget &Subtarget,
|
||||
SelectionDAG &DAG) {
|
||||
int Bits = VT.getSizeInBits();
|
||||
int NumLanes = Bits / 128;
|
||||
@ -9456,7 +9449,7 @@ static bool isShuffleFoldableLoad(SDValue V) {
|
||||
/// across all subtarget feature sets.
|
||||
static SDValue lowerVectorShuffleAsElementInsertion(
|
||||
const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask,
|
||||
const SmallBitVector &Zeroable, const X86Subtarget &Subtarget,
|
||||
const APInt &Zeroable, const X86Subtarget &Subtarget,
|
||||
SelectionDAG &DAG) {
|
||||
MVT ExtVT = VT;
|
||||
MVT EltVT = VT.getVectorElementType();
|
||||
@ -9810,7 +9803,7 @@ static SDValue lowerVectorShuffleAsBroadcast(const SDLoc &DL, MVT VT,
|
||||
// elements are zeroable.
|
||||
static bool matchVectorShuffleAsInsertPS(SDValue &V1, SDValue &V2,
|
||||
unsigned &InsertPSMask,
|
||||
const SmallBitVector &Zeroable,
|
||||
const APInt &Zeroable,
|
||||
ArrayRef<int> Mask,
|
||||
SelectionDAG &DAG) {
|
||||
assert(V1.getSimpleValueType().is128BitVector() && "Bad operand type!");
|
||||
@ -9899,7 +9892,7 @@ static bool matchVectorShuffleAsInsertPS(SDValue &V1, SDValue &V2,
|
||||
|
||||
static SDValue lowerVectorShuffleAsInsertPS(const SDLoc &DL, SDValue V1,
|
||||
SDValue V2, ArrayRef<int> Mask,
|
||||
const SmallBitVector &Zeroable,
|
||||
const APInt &Zeroable,
|
||||
SelectionDAG &DAG) {
|
||||
assert(V1.getSimpleValueType() == MVT::v4f32 && "Bad operand type!");
|
||||
assert(V2.getSimpleValueType() == MVT::v4f32 && "Bad operand type!");
|
||||
@ -10034,7 +10027,7 @@ static SDValue lowerVectorShuffleAsPermuteAndUnpack(const SDLoc &DL, MVT VT,
|
||||
/// it is better to avoid lowering through this for integer vectors where
|
||||
/// possible.
|
||||
static SDValue lowerV2F64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
|
||||
const SmallBitVector &Zeroable,
|
||||
const APInt &Zeroable,
|
||||
SDValue V1, SDValue V2,
|
||||
const X86Subtarget &Subtarget,
|
||||
SelectionDAG &DAG) {
|
||||
@ -10116,7 +10109,7 @@ static SDValue lowerV2F64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
|
||||
/// it falls back to the floating point shuffle operation with appropriate bit
|
||||
/// casting.
|
||||
static SDValue lowerV2I64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
|
||||
const SmallBitVector &Zeroable,
|
||||
const APInt &Zeroable,
|
||||
SDValue V1, SDValue V2,
|
||||
const X86Subtarget &Subtarget,
|
||||
SelectionDAG &DAG) {
|
||||
@ -10335,7 +10328,7 @@ static SDValue lowerVectorShuffleWithSHUFPS(const SDLoc &DL, MVT VT,
|
||||
/// domain crossing penalties, as these are sufficient to implement all v4f32
|
||||
/// shuffles.
|
||||
static SDValue lowerV4F32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
|
||||
const SmallBitVector &Zeroable,
|
||||
const APInt &Zeroable,
|
||||
SDValue V1, SDValue V2,
|
||||
const X86Subtarget &Subtarget,
|
||||
SelectionDAG &DAG) {
|
||||
@ -10418,7 +10411,7 @@ static SDValue lowerV4F32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
|
||||
/// We try to handle these with integer-domain shuffles where we can, but for
|
||||
/// blends we use the floating point domain blend instructions.
|
||||
static SDValue lowerV4I32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
|
||||
const SmallBitVector &Zeroable,
|
||||
const APInt &Zeroable,
|
||||
SDValue V1, SDValue V2,
|
||||
const X86Subtarget &Subtarget,
|
||||
SelectionDAG &DAG) {
|
||||
@ -10985,7 +10978,7 @@ static SDValue lowerV8I16GeneralSingleInputVectorShuffle(
|
||||
/// blend if only one input is used.
|
||||
static SDValue lowerVectorShuffleAsBlendOfPSHUFBs(
|
||||
const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask,
|
||||
const SmallBitVector &Zeroable, SelectionDAG &DAG, bool &V1InUse,
|
||||
const APInt &Zeroable, SelectionDAG &DAG, bool &V1InUse,
|
||||
bool &V2InUse) {
|
||||
SDValue V1Mask[16];
|
||||
SDValue V2Mask[16];
|
||||
@ -11046,7 +11039,7 @@ static SDValue lowerVectorShuffleAsBlendOfPSHUFBs(
|
||||
/// halves of the inputs separately (making them have relatively few inputs)
|
||||
/// and then concatenate them.
|
||||
static SDValue lowerV8I16VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
|
||||
const SmallBitVector &Zeroable,
|
||||
const APInt &Zeroable,
|
||||
SDValue V1, SDValue V2,
|
||||
const X86Subtarget &Subtarget,
|
||||
SelectionDAG &DAG) {
|
||||
@ -11230,7 +11223,7 @@ static int canLowerByDroppingEvenElements(ArrayRef<int> Mask,
|
||||
/// the existing lowering for v8i16 blends on each half, finally PACK-ing them
|
||||
/// back together.
|
||||
static SDValue lowerV16I8VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
|
||||
const SmallBitVector &Zeroable,
|
||||
const APInt &Zeroable,
|
||||
SDValue V1, SDValue V2,
|
||||
const X86Subtarget &Subtarget,
|
||||
SelectionDAG &DAG) {
|
||||
@ -11519,7 +11512,7 @@ static SDValue lowerV16I8VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
|
||||
/// dispatches to the lowering routines accordingly.
|
||||
static SDValue lower128BitVectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
|
||||
MVT VT, SDValue V1, SDValue V2,
|
||||
const SmallBitVector &Zeroable,
|
||||
const APInt &Zeroable,
|
||||
const X86Subtarget &Subtarget,
|
||||
SelectionDAG &DAG) {
|
||||
switch (VT.SimpleTy) {
|
||||
@ -11775,7 +11768,7 @@ static SDValue lowerVectorShuffleAsLanePermuteAndBlend(const SDLoc &DL, MVT VT,
|
||||
/// \brief Handle lowering 2-lane 128-bit shuffles.
|
||||
static SDValue lowerV2X128VectorShuffle(const SDLoc &DL, MVT VT, SDValue V1,
|
||||
SDValue V2, ArrayRef<int> Mask,
|
||||
const SmallBitVector &Zeroable,
|
||||
const APInt &Zeroable,
|
||||
const X86Subtarget &Subtarget,
|
||||
SelectionDAG &DAG) {
|
||||
SmallVector<int, 4> WidenedMask;
|
||||
@ -12310,7 +12303,7 @@ static SDValue lowerVectorShuffleWithPERMV(const SDLoc &DL, MVT VT,
|
||||
/// Also ends up handling lowering of 4-lane 64-bit integer shuffles when AVX2
|
||||
/// isn't available.
|
||||
static SDValue lowerV4F64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
|
||||
const SmallBitVector &Zeroable,
|
||||
const APInt &Zeroable,
|
||||
SDValue V1, SDValue V2,
|
||||
const X86Subtarget &Subtarget,
|
||||
SelectionDAG &DAG) {
|
||||
@ -12407,7 +12400,7 @@ static SDValue lowerV4F64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
|
||||
/// This routine is only called when we have AVX2 and thus a reasonable
|
||||
/// instruction set for v4i64 shuffling..
|
||||
static SDValue lowerV4I64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
|
||||
const SmallBitVector &Zeroable,
|
||||
const APInt &Zeroable,
|
||||
SDValue V1, SDValue V2,
|
||||
const X86Subtarget &Subtarget,
|
||||
SelectionDAG &DAG) {
|
||||
@ -12495,7 +12488,7 @@ static SDValue lowerV4I64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
|
||||
/// Also ends up handling lowering of 8-lane 32-bit integer shuffles when AVX2
|
||||
/// isn't available.
|
||||
static SDValue lowerV8F32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
|
||||
const SmallBitVector &Zeroable,
|
||||
const APInt &Zeroable,
|
||||
SDValue V1, SDValue V2,
|
||||
const X86Subtarget &Subtarget,
|
||||
SelectionDAG &DAG) {
|
||||
@ -12586,7 +12579,7 @@ static SDValue lowerV8F32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
|
||||
/// This routine is only called when we have AVX2 and thus a reasonable
|
||||
/// instruction set for v8i32 shuffling..
|
||||
static SDValue lowerV8I32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
|
||||
const SmallBitVector &Zeroable,
|
||||
const APInt &Zeroable,
|
||||
SDValue V1, SDValue V2,
|
||||
const X86Subtarget &Subtarget,
|
||||
SelectionDAG &DAG) {
|
||||
@ -12690,7 +12683,7 @@ static SDValue lowerV8I32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
|
||||
/// This routine is only called when we have AVX2 and thus a reasonable
|
||||
/// instruction set for v16i16 shuffling..
|
||||
static SDValue lowerV16I16VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
|
||||
const SmallBitVector &Zeroable,
|
||||
const APInt &Zeroable,
|
||||
SDValue V1, SDValue V2,
|
||||
const X86Subtarget &Subtarget,
|
||||
SelectionDAG &DAG) {
|
||||
@ -12776,7 +12769,7 @@ static SDValue lowerV16I16VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
|
||||
/// This routine is only called when we have AVX2 and thus a reasonable
|
||||
/// instruction set for v32i8 shuffling..
|
||||
static SDValue lowerV32I8VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
|
||||
const SmallBitVector &Zeroable,
|
||||
const APInt &Zeroable,
|
||||
SDValue V1, SDValue V2,
|
||||
const X86Subtarget &Subtarget,
|
||||
SelectionDAG &DAG) {
|
||||
@ -12849,7 +12842,7 @@ static SDValue lowerV32I8VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
|
||||
/// together based on the available instructions.
|
||||
static SDValue lower256BitVectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
|
||||
MVT VT, SDValue V1, SDValue V2,
|
||||
const SmallBitVector &Zeroable,
|
||||
const APInt &Zeroable,
|
||||
const X86Subtarget &Subtarget,
|
||||
SelectionDAG &DAG) {
|
||||
// If we have a single input to the zero element, insert that into V1 if we
|
||||
@ -13001,7 +12994,7 @@ static SDValue lowerV4X128VectorShuffle(const SDLoc &DL, MVT VT,
|
||||
|
||||
/// \brief Handle lowering of 8-lane 64-bit floating point shuffles.
|
||||
static SDValue lowerV8F64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
|
||||
const SmallBitVector &Zeroable,
|
||||
const APInt &Zeroable,
|
||||
SDValue V1, SDValue V2,
|
||||
const X86Subtarget &Subtarget,
|
||||
SelectionDAG &DAG) {
|
||||
@ -13057,7 +13050,7 @@ static SDValue lowerV8F64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
|
||||
|
||||
/// \brief Handle lowering of 16-lane 32-bit floating point shuffles.
|
||||
static SDValue lowerV16F32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
|
||||
const SmallBitVector &Zeroable,
|
||||
const APInt &Zeroable,
|
||||
SDValue V1, SDValue V2,
|
||||
const X86Subtarget &Subtarget,
|
||||
SelectionDAG &DAG) {
|
||||
@ -13103,7 +13096,7 @@ static SDValue lowerV16F32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
|
||||
|
||||
/// \brief Handle lowering of 8-lane 64-bit integer shuffles.
|
||||
static SDValue lowerV8I64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
|
||||
const SmallBitVector &Zeroable,
|
||||
const APInt &Zeroable,
|
||||
SDValue V1, SDValue V2,
|
||||
const X86Subtarget &Subtarget,
|
||||
SelectionDAG &DAG) {
|
||||
@ -13168,7 +13161,7 @@ static SDValue lowerV8I64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
|
||||
|
||||
/// \brief Handle lowering of 16-lane 32-bit integer shuffles.
|
||||
static SDValue lowerV16I32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
|
||||
const SmallBitVector &Zeroable,
|
||||
const APInt &Zeroable,
|
||||
SDValue V1, SDValue V2,
|
||||
const X86Subtarget &Subtarget,
|
||||
SelectionDAG &DAG) {
|
||||
@ -13239,7 +13232,7 @@ static SDValue lowerV16I32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
|
||||
|
||||
/// \brief Handle lowering of 32-lane 16-bit integer shuffles.
|
||||
static SDValue lowerV32I16VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
|
||||
const SmallBitVector &Zeroable,
|
||||
const APInt &Zeroable,
|
||||
SDValue V1, SDValue V2,
|
||||
const X86Subtarget &Subtarget,
|
||||
SelectionDAG &DAG) {
|
||||
@ -13290,7 +13283,7 @@ static SDValue lowerV32I16VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
|
||||
|
||||
/// \brief Handle lowering of 64-lane 8-bit integer shuffles.
|
||||
static SDValue lowerV64I8VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
|
||||
const SmallBitVector &Zeroable,
|
||||
const APInt &Zeroable,
|
||||
SDValue V1, SDValue V2,
|
||||
const X86Subtarget &Subtarget,
|
||||
SelectionDAG &DAG) {
|
||||
@ -13350,7 +13343,7 @@ static SDValue lowerV64I8VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
|
||||
/// together based on the available instructions.
|
||||
static SDValue lower512BitVectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
|
||||
MVT VT, SDValue V1, SDValue V2,
|
||||
const SmallBitVector &Zeroable,
|
||||
const APInt &Zeroable,
|
||||
const X86Subtarget &Subtarget,
|
||||
SelectionDAG &DAG) {
|
||||
assert(Subtarget.hasAVX512() &&
|
||||
@ -13572,8 +13565,8 @@ static SDValue lowerVectorShuffle(SDValue Op, const X86Subtarget &Subtarget,
|
||||
// We actually see shuffles that are entirely re-arrangements of a set of
|
||||
// zero inputs. This mostly happens while decomposing complex shuffles into
|
||||
// simple ones. Directly lower these as a buildvector of zeros.
|
||||
SmallBitVector Zeroable = computeZeroableShuffleElements(Mask, V1, V2);
|
||||
if (Zeroable.all())
|
||||
APInt Zeroable = computeZeroableShuffleElements(Mask, V1, V2);
|
||||
if (Zeroable.isAllOnesValue())
|
||||
return getZeroVector(VT, Subtarget, DAG, DL);
|
||||
|
||||
// Try to collapse shuffles into using a vector type with fewer elements but
|
||||
@ -26541,10 +26534,11 @@ static bool matchUnaryPermuteVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
|
||||
unsigned NumMaskElts = Mask.size();
|
||||
|
||||
bool ContainsZeros = false;
|
||||
SmallBitVector Zeroable(NumMaskElts, false);
|
||||
APInt Zeroable(NumMaskElts, false);
|
||||
for (unsigned i = 0; i != NumMaskElts; ++i) {
|
||||
int M = Mask[i];
|
||||
Zeroable[i] = isUndefOrZero(M);
|
||||
if (isUndefOrZero(M))
|
||||
Zeroable.setBit(i);
|
||||
ContainsZeros |= (M == SM_SentinelZero);
|
||||
}
|
||||
|
||||
@ -26825,12 +26819,12 @@ static bool matchBinaryPermuteVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
|
||||
// Attempt to combine to INSERTPS.
|
||||
if (AllowFloatDomain && EltSizeInBits == 32 && Subtarget.hasSSE41() &&
|
||||
MaskVT.is128BitVector()) {
|
||||
SmallBitVector Zeroable(4, false);
|
||||
APInt Zeroable(4, 0);
|
||||
for (unsigned i = 0; i != NumMaskElts; ++i)
|
||||
if (Mask[i] < 0)
|
||||
Zeroable[i] = true;
|
||||
Zeroable.setBit(i);
|
||||
|
||||
if (Zeroable.any() &&
|
||||
if (Zeroable.getBoolValue() &&
|
||||
matchVectorShuffleAsInsertPS(V1, V2, PermuteImm, Zeroable, Mask, DAG)) {
|
||||
Shuffle = X86ISD::INSERTPS;
|
||||
ShuffleVT = MVT::v4f32;
|
||||
|
Loading…
x
Reference in New Issue
Block a user