mirror of
https://github.com/RPCSX/llvm.git
synced 2025-01-10 14:10:58 +00:00
[X86][SSE] Consistently use the target shuffle root value type for vector size calculations. NFCI.
Preparation for adding 2 input support so we want to avoid unnecessary references to the input value type. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@277814 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
d55759c8bc
commit
0d58c98b16
@ -25092,8 +25092,10 @@ static bool combineX86ShuffleChain(SDValue Input, SDValue Root,
|
||||
|
||||
MVT VT = Input.getSimpleValueType();
|
||||
MVT RootVT = Root.getSimpleValueType();
|
||||
SDLoc DL(Root);
|
||||
assert(VT.getSizeInBits() == RootVT.getSizeInBits() &&
|
||||
"Vector size mismatch");
|
||||
|
||||
SDLoc DL(Root);
|
||||
SDValue Res;
|
||||
|
||||
unsigned NumBaseMaskElts = BaseMask.size();
|
||||
@ -25106,6 +25108,8 @@ static bool combineX86ShuffleChain(SDValue Input, SDValue Root,
|
||||
|
||||
unsigned RootSizeInBits = RootVT.getSizeInBits();
|
||||
unsigned BaseMaskEltSizeInBits = RootSizeInBits / NumBaseMaskElts;
|
||||
bool FloatDomain = VT.isFloatingPoint() ||
|
||||
(RootVT.is256BitVector() && !Subtarget.hasAVX2());
|
||||
|
||||
// Don't combine if we are a AVX512/EVEX target and the mask element size
|
||||
// is different from the root element size - this would prevent writemasks
|
||||
@ -25122,12 +25126,11 @@ static bool combineX86ShuffleChain(SDValue Input, SDValue Root,
|
||||
// TODO - handle 128/256-bit lane shuffles of 512-bit vectors.
|
||||
|
||||
// Handle 128-bit lane shuffles of 256-bit vectors.
|
||||
if (VT.is256BitVector() && NumBaseMaskElts == 2 &&
|
||||
if (RootVT.is256BitVector() && NumBaseMaskElts == 2 &&
|
||||
!isSequentialOrUndefOrZeroInRange(BaseMask, 0, 2, 0)) {
|
||||
if (Depth == 1 && Root.getOpcode() == X86ISD::VPERM2X128)
|
||||
return false; // Nothing to do!
|
||||
MVT ShuffleVT = (VT.isFloatingPoint() || !Subtarget.hasAVX2() ? MVT::v4f64
|
||||
: MVT::v4i64);
|
||||
MVT ShuffleVT = (FloatDomain ? MVT::v4f64 : MVT::v4i64);
|
||||
unsigned PermMask = 0;
|
||||
PermMask |= ((BaseMask[0] < 0 ? 0x8 : (BaseMask[0] & 1)) << 0);
|
||||
PermMask |= ((BaseMask[1] < 0 ? 0x8 : (BaseMask[1] & 1)) << 4);
|
||||
@ -25158,9 +25161,7 @@ static bool combineX86ShuffleChain(SDValue Input, SDValue Root,
|
||||
unsigned MaskEltSizeInBits = RootSizeInBits / NumMaskElts;
|
||||
|
||||
// Determine the effective mask value type.
|
||||
bool FloatDomain =
|
||||
(VT.isFloatingPoint() || (VT.is256BitVector() && !Subtarget.hasAVX2())) &&
|
||||
(32 <= MaskEltSizeInBits);
|
||||
FloatDomain &= (32 <= MaskEltSizeInBits);
|
||||
MVT MaskVT = FloatDomain ? MVT::getFloatingPointVT(MaskEltSizeInBits)
|
||||
: MVT::getIntegerVT(MaskEltSizeInBits);
|
||||
MaskVT = MVT::getVectorVT(MaskVT, NumMaskElts);
|
||||
@ -25265,11 +25266,11 @@ static bool combineX86ShuffleChain(SDValue Input, SDValue Root,
|
||||
// instructions, but in practice PSHUFB tends to be *very* fast so we're
|
||||
// more aggressive.
|
||||
if ((Depth >= 3 || HasVariableMask) &&
|
||||
((VT.is128BitVector() && Subtarget.hasSSSE3()) ||
|
||||
(VT.is256BitVector() && Subtarget.hasAVX2()) ||
|
||||
(VT.is512BitVector() && Subtarget.hasBWI()))) {
|
||||
((RootVT.is128BitVector() && Subtarget.hasSSSE3()) ||
|
||||
(RootVT.is256BitVector() && Subtarget.hasAVX2()) ||
|
||||
(RootVT.is512BitVector() && Subtarget.hasBWI()))) {
|
||||
SmallVector<SDValue, 16> PSHUFBMask;
|
||||
int NumBytes = VT.getSizeInBits() / 8;
|
||||
int NumBytes = RootVT.getSizeInBits() / 8;
|
||||
int Ratio = NumBytes / NumMaskElts;
|
||||
for (int i = 0; i < NumBytes; ++i) {
|
||||
int M = Mask[i / Ratio];
|
||||
|
Loading…
Reference in New Issue
Block a user