mirror of
https://github.com/RPCSX/llvm.git
synced 2024-12-01 07:30:31 +00:00
[x86] Run most of the rest of the shuffle combining over non-128-bit
vectors. This lets us fix the rest of the v16 lowering problems when pshufb is clearly better. We might still be able to improve some of the lowerings by enabling the other combine-based rewriting to fire for non-128-bit vectors, but this at least should remove any regressions from using the fancy v16i16 lowering strategy. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@230753 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
2d58cc5f1b
commit
c4179ffed3
@ -19149,7 +19149,9 @@ static bool combineX86ShuffleChain(SDValue Op, SDValue Root, ArrayRef<int> Mask,
|
||||
// Note that even with AVX we prefer the PSHUFD form of shuffle for integer
|
||||
// vectors because it can have a load folded into it that UNPCK cannot. This
|
||||
// doesn't preclude something switching to the shorter encoding post-RA.
|
||||
if (FloatDomain) {
|
||||
//
|
||||
// FIXME: Should teach these routines about AVX vector widths.
|
||||
if (FloatDomain && VT.getSizeInBits() == 128) {
|
||||
if (Mask.equals(0, 0) || Mask.equals(1, 1)) {
|
||||
bool Lo = Mask.equals(0, 0);
|
||||
unsigned Shuffle;
|
||||
@ -19213,7 +19215,7 @@ static bool combineX86ShuffleChain(SDValue Op, SDValue Root, ArrayRef<int> Mask,
|
||||
// We always canonicalize the 8 x i16 and 16 x i8 shuffles into their UNPCK
|
||||
// variants as none of these have single-instruction variants that are
|
||||
// superior to the UNPCK formulation.
|
||||
if (!FloatDomain &&
|
||||
if (!FloatDomain && VT.getSizeInBits() == 128 &&
|
||||
(Mask.equals(0, 0, 1, 1, 2, 2, 3, 3) ||
|
||||
Mask.equals(4, 4, 5, 5, 6, 6, 7, 7) ||
|
||||
Mask.equals(0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7) ||
|
||||
@ -19254,9 +19256,9 @@ static bool combineX86ShuffleChain(SDValue Op, SDValue Root, ArrayRef<int> Mask,
|
||||
// in practice PSHUFB tends to be *very* fast so we're more aggressive.
|
||||
if ((Depth >= 3 || HasPSHUFB) && Subtarget->hasSSSE3()) {
|
||||
SmallVector<SDValue, 16> PSHUFBMask;
|
||||
assert(Mask.size() <= 16 && "Can't shuffle elements smaller than bytes!");
|
||||
int Ratio = 16 / Mask.size();
|
||||
for (unsigned i = 0; i < 16; ++i) {
|
||||
int NumBytes = VT.getSizeInBits() / 8;
|
||||
int Ratio = NumBytes / Mask.size();
|
||||
for (int i = 0; i < NumBytes; ++i) {
|
||||
if (Mask[i / Ratio] == SM_SentinelUndef) {
|
||||
PSHUFBMask.push_back(DAG.getUNDEF(MVT::i8));
|
||||
continue;
|
||||
@ -19266,12 +19268,13 @@ static bool combineX86ShuffleChain(SDValue Op, SDValue Root, ArrayRef<int> Mask,
|
||||
: 255;
|
||||
PSHUFBMask.push_back(DAG.getConstant(M, MVT::i8));
|
||||
}
|
||||
Op = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Input);
|
||||
MVT ByteVT = MVT::getVectorVT(MVT::i8, NumBytes);
|
||||
Op = DAG.getNode(ISD::BITCAST, DL, ByteVT, Input);
|
||||
DCI.AddToWorklist(Op.getNode());
|
||||
SDValue PSHUFBMaskOp =
|
||||
DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v16i8, PSHUFBMask);
|
||||
DAG.getNode(ISD::BUILD_VECTOR, DL, ByteVT, PSHUFBMask);
|
||||
DCI.AddToWorklist(PSHUFBMaskOp.getNode());
|
||||
Op = DAG.getNode(X86ISD::PSHUFB, DL, MVT::v16i8, Op, PSHUFBMaskOp);
|
||||
Op = DAG.getNode(X86ISD::PSHUFB, DL, ByteVT, Op, PSHUFBMaskOp);
|
||||
DCI.AddToWorklist(Op.getNode());
|
||||
DCI.CombineTo(Root.getNode(), DAG.getNode(ISD::BITCAST, DL, RootVT, Op),
|
||||
/*AddTo*/ true);
|
||||
@ -19329,10 +19332,6 @@ static bool combineX86ShufflesRecursively(SDValue Op, SDValue Root,
|
||||
MVT VT = Op.getSimpleValueType();
|
||||
if (!VT.isVector())
|
||||
return false; // Bail if we hit a non-vector.
|
||||
// FIXME: This routine should be taught about 256-bit shuffles, or a 256-bit
|
||||
// version should be added.
|
||||
if (VT.getSizeInBits() != 128)
|
||||
return false;
|
||||
|
||||
assert(Root.getSimpleValueType().isVector() &&
|
||||
"Shuffles operate on vector types!");
|
||||
@ -19925,10 +19924,6 @@ static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG,
|
||||
if (Shuffle.getNode())
|
||||
return Shuffle;
|
||||
|
||||
// Only handle 128 wide vector from here on.
|
||||
if (!VT.is128BitVector())
|
||||
return SDValue();
|
||||
|
||||
// Try recursively combining arbitrary sequences of x86 shuffle
|
||||
// instructions into higher-order shuffles. We do this after combining
|
||||
// specific PSHUF instruction sequences into their minimal form so that we
|
||||
|
@ -159,9 +159,7 @@ define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_0
|
||||
; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00:
|
||||
; AVX2: # BB#0:
|
||||
; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,1,0,3,4,5,4,7]
|
||||
; AVX2-NEXT: vpshuflw {{.*#+}} ymm1 = ymm1[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15]
|
||||
; AVX2-NEXT: vpshufhw {{.*#+}} ymm1 = ymm1[0,1,2,3,4,4,4,4,8,9,10,11,12,12,12,12]
|
||||
; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
|
||||
; AVX2-NEXT: vpbroadcastw %xmm0, %ymm0
|
||||
; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
|
||||
; AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
|
||||
@ -323,9 +321,7 @@ define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_0
|
||||
;
|
||||
; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08:
|
||||
; AVX2: # BB#0:
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,1,0,3,4,5,4,7]
|
||||
; AVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15]
|
||||
; AVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,4,4,4,8,9,10,11,12,12,12,12]
|
||||
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
|
||||
; AVX2-NEXT: retq
|
||||
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
|
||||
ret <16 x i16> %shuffle
|
||||
@ -343,9 +339,7 @@ define <16 x i16> @shuffle_v16i16_07_07_07_07_07_07_07_07_15_15_15_15_15_15_15_1
|
||||
;
|
||||
; AVX2-LABEL: shuffle_v16i16_07_07_07_07_07_07_07_07_15_15_15_15_15_15_15_15:
|
||||
; AVX2: # BB#0:
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,3,2,3,4,7,6,7]
|
||||
; AVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,3,3,3,4,5,6,7,11,11,11,11,12,13,14,15]
|
||||
; AVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,7,7,7,8,9,10,11,15,15,15,15]
|
||||
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[14,15,14,15,14,15,14,15,14,15,14,15,14,15,14,15,30,31,30,31,30,31,30,31,30,31,30,31,30,31,30,31]
|
||||
; AVX2-NEXT: retq
|
||||
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
|
||||
ret <16 x i16> %shuffle
|
||||
@ -726,10 +720,8 @@ define <16 x i16> @shuffle_v16i16_00_16_00_16_00_16_00_16_08_24_08_24_08_24_08_2
|
||||
;
|
||||
; AVX2-LABEL: shuffle_v16i16_00_16_00_16_00_16_00_16_08_24_08_24_08_24_08_24:
|
||||
; AVX2: # BB#0:
|
||||
; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[0,1,0,1,4,5,0,1,0,1,0,1,12,13,0,1,16,17,16,17,20,21,16,17,16,17,16,17,28,29,16,17]
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4]
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,1,0,3,4,5,4,7]
|
||||
; AVX2-NEXT: vpshuflw {{.*#+}} ymm1 = ymm1[0,0,2,0,4,5,6,7,8,8,10,8,12,13,14,15]
|
||||
; AVX2-NEXT: vpshufhw {{.*#+}} ymm1 = ymm1[0,1,2,3,4,4,6,4,8,9,10,11,12,12,14,12]
|
||||
; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15]
|
||||
; AVX2-NEXT: retq
|
||||
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 16, i32 0, i32 16, i32 0, i32 16, i32 0, i32 16, i32 8, i32 24, i32 8, i32 24, i32 8, i32 24, i32 8, i32 24>
|
||||
@ -821,9 +813,7 @@ define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_01_00_08_08_08_08_08_08_09_0
|
||||
;
|
||||
; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_01_00_08_08_08_08_08_08_09_08:
|
||||
; AVX2: # BB#0:
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,1,0,3,4,5,4,7]
|
||||
; AVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15]
|
||||
; AVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,4,5,4,8,9,10,11,12,12,13,12]
|
||||
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,0,1,2,3,0,1,16,17,16,17,16,17,16,17,16,17,16,17,18,19,16,17]
|
||||
; AVX2-NEXT: retq
|
||||
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 8>
|
||||
ret <16 x i16> %shuffle
|
||||
@ -841,9 +831,7 @@ define <16 x i16> @shuffle_v16i16_00_00_00_00_00_02_00_00_08_08_08_08_08_10_08_0
|
||||
;
|
||||
; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_02_00_00_08_08_08_08_08_10_08_08:
|
||||
; AVX2: # BB#0:
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5]
|
||||
; AVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15]
|
||||
; AVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,6,4,4,8,9,10,11,12,14,12,12]
|
||||
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1,16,17,16,17,16,17,16,17,16,17,20,21,16,17,16,17]
|
||||
; AVX2-NEXT: retq
|
||||
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 10, i32 8, i32 8>
|
||||
ret <16 x i16> %shuffle
|
||||
@ -861,9 +849,7 @@ define <16 x i16> @shuffle_v16i16_00_00_00_00_03_00_00_00_08_08_08_08_11_08_08_0
|
||||
;
|
||||
; AVX2-LABEL: shuffle_v16i16_00_00_00_00_03_00_00_00_08_08_08_08_11_08_08_08:
|
||||
; AVX2: # BB#0:
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5]
|
||||
; AVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15]
|
||||
; AVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,4,4,4,8,9,10,11,15,12,12,12]
|
||||
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1,16,17,16,17,16,17,16,17,22,23,16,17,16,17,16,17]
|
||||
; AVX2-NEXT: retq
|
||||
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 11, i32 8, i32 8, i32 8>
|
||||
ret <16 x i16> %shuffle
|
||||
@ -881,9 +867,7 @@ define <16 x i16> @shuffle_v16i16_00_00_00_04_00_00_00_00_08_08_08_12_08_08_08_0
|
||||
;
|
||||
; AVX2-LABEL: shuffle_v16i16_00_00_00_04_00_00_00_00_08_08_08_12_08_08_08_08:
|
||||
; AVX2: # BB#0:
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,0,3,4,6,4,7]
|
||||
; AVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,2,4,5,6,7,8,8,8,10,12,13,14,15]
|
||||
; AVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,4,4,4,8,9,10,11,12,12,12,12]
|
||||
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1,16,17,16,17,16,17,24,25,16,17,16,17,16,17,16,17]
|
||||
; AVX2-NEXT: retq
|
||||
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 12, i32 8, i32 8, i32 8, i32 8>
|
||||
ret <16 x i16> %shuffle
|
||||
@ -901,9 +885,7 @@ define <16 x i16> @shuffle_v16i16_00_00_05_00_00_00_00_00_08_08_13_08_08_08_08_0
|
||||
;
|
||||
; AVX2-LABEL: shuffle_v16i16_00_00_05_00_00_00_00_00_08_08_13_08_08_08_08_08:
|
||||
; AVX2: # BB#0:
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,0,3,4,6,4,7]
|
||||
; AVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,3,0,4,5,6,7,8,8,11,8,12,13,14,15]
|
||||
; AVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,4,4,4,8,9,10,11,12,12,12,12]
|
||||
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1,16,17,16,17,26,27,16,17,16,17,16,17,16,17,16,17]
|
||||
; AVX2-NEXT: retq
|
||||
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 13, i32 8, i32 8, i32 8, i32 8, i32 8>
|
||||
ret <16 x i16> %shuffle
|
||||
@ -921,9 +903,7 @@ define <16 x i16> @shuffle_v16i16_00_06_00_00_00_00_00_00_08_14_08_08_08_08_08_0
|
||||
;
|
||||
; AVX2-LABEL: shuffle_v16i16_00_06_00_00_00_00_00_00_08_14_08_08_08_08_08_08:
|
||||
; AVX2: # BB#0:
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,3,0,3,4,7,4,7]
|
||||
; AVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,2,0,0,4,5,6,7,8,10,8,8,12,13,14,15]
|
||||
; AVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,4,4,4,8,9,10,11,12,12,12,12]
|
||||
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1,16,17,28,29,16,17,16,17,16,17,16,17,16,17,16,17]
|
||||
; AVX2-NEXT: retq
|
||||
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 14, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
|
||||
ret <16 x i16> %shuffle
|
||||
@ -941,9 +921,7 @@ define <16 x i16> @shuffle_v16i16_07_00_00_00_00_00_00_00_15_08_08_08_08_08_08_0
|
||||
;
|
||||
; AVX2-LABEL: shuffle_v16i16_07_00_00_00_00_00_00_00_15_08_08_08_08_08_08_08:
|
||||
; AVX2: # BB#0:
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,3,0,3,4,7,4,7]
|
||||
; AVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,0,0,0,4,5,6,7,11,8,8,8,12,13,14,15]
|
||||
; AVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,4,4,4,8,9,10,11,12,12,12,12]
|
||||
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1,30,31,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
|
||||
; AVX2-NEXT: retq
|
||||
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 15, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
|
||||
ret <16 x i16> %shuffle
|
||||
@ -2336,9 +2314,7 @@ define <16 x i16> @shuffle_v16i16_00_01_02_07_uu_uu_uu_uu_08_09_10_15_uu_uu_uu_u
|
||||
;
|
||||
; AVX2-LABEL: shuffle_v16i16_00_01_02_07_uu_uu_uu_uu_08_09_10_15_uu_uu_uu_uu:
|
||||
; AVX2: # BB#0:
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7]
|
||||
; AVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,7,6,7,8,9,10,11,12,15,14,15]
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7]
|
||||
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,14,15,4,5,14,15,12,13,14,15,16,17,18,19,20,21,30,31,20,21,30,31,28,29,30,31]
|
||||
; AVX2-NEXT: retq
|
||||
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 8, i32 9, i32 10, i32 15, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
ret <16 x i16> %shuffle
|
||||
@ -2378,9 +2354,7 @@ define <16 x i16> @shuffle_v16i16_04_05_06_03_uu_uu_uu_uu_12_13_14_11_uu_uu_uu_u
|
||||
;
|
||||
; AVX2-LABEL: shuffle_v16i16_04_05_06_03_uu_uu_uu_uu_12_13_14_11_uu_uu_uu_uu:
|
||||
; AVX2: # BB#0:
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,1,2,0,7,5,6,4]
|
||||
; AVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,3,2,3,4,5,6,7,8,11,10,11,12,13,14,15]
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,0,2,3,6,4,6,7]
|
||||
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,9,10,11,12,13,6,7,8,9,10,11,0,1,2,3,24,25,26,27,28,29,22,23,24,25,26,27,16,17,18,19]
|
||||
; AVX2-NEXT: retq
|
||||
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 5, i32 6, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 12, i32 13, i32 14, i32 11, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
ret <16 x i16> %shuffle
|
||||
@ -2652,9 +2626,7 @@ define <16 x i16> @shuffle_v16i16_01_00_17_16_03_02_19_26_09_08_25_24_11_10_27_2
|
||||
; AVX2-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,5,6,5]
|
||||
; AVX2-NEXT: vpshufb {{.*#+}} xmm2 = xmm2[0,1,2,3,2,3,0,1,8,9,10,11,6,7,4,5]
|
||||
; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,1,2,1,4,5,6,5]
|
||||
; AVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[1,0,2,3,4,5,6,7,9,8,10,11,12,13,14,15]
|
||||
; AVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,6,6,7,8,9,10,11,15,14,14,15]
|
||||
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[2,3,0,1,4,5,6,7,6,7,4,5,4,5,6,7,18,19,16,17,20,21,22,23,22,23,20,21,20,21,22,23]
|
||||
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
|
||||
; AVX2-NEXT: retq
|
||||
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 1, i32 0, i32 17, i32 16, i32 3, i32 2, i32 19, i32 26, i32 9, i32 8, i32 25, i32 24, i32 11, i32 10, i32 27, i32 26>
|
||||
@ -2786,10 +2758,7 @@ define <16 x i16> @shuffle_v16i16_00_03_02_21_uu_uu_uu_uu_08_11_10_29_uu_uu_uu_u
|
||||
; AVX2-LABEL: shuffle_v16i16_00_03_02_21_uu_uu_uu_uu_08_11_10_29_uu_uu_uu_uu:
|
||||
; AVX2: # BB#0:
|
||||
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,1,2,0,7,5,6,4]
|
||||
; AVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,6,5,6,7,8,9,10,11,14,13,14,15]
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,1,2,3,6,5,6,7]
|
||||
; AVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,3,2,1,4,5,6,7,8,11,10,9,12,13,14,15]
|
||||
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,6,7,4,5,10,11,0,1,10,11,0,1,2,3,16,17,22,23,20,21,26,27,16,17,26,27,16,17,18,19]
|
||||
; AVX2-NEXT: retq
|
||||
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 3, i32 2, i32 21, i32 undef, i32 undef, i32 undef, i32 undef, i32 8, i32 11, i32 10, i32 29, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
ret <16 x i16> %shuffle
|
||||
@ -2962,10 +2931,8 @@ define <16 x i16> @shuffle_v16i16_uu_uu_04_uu_16_18_20_uu_uu_uu_12_uu_24_26_28_u
|
||||
;
|
||||
; AVX2-LABEL: shuffle_v16i16_uu_uu_04_uu_16_18_20_uu_uu_uu_12_uu_24_26_28_uu:
|
||||
; AVX2: # BB#0:
|
||||
; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[0,1,4,5,4,5,6,7,0,1,4,5,8,9,4,5,16,17,20,21,20,21,22,23,16,17,20,21,24,25,20,21]
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7]
|
||||
; AVX2-NEXT: vpshuflw {{.*#+}} ymm1 = ymm1[0,2,2,3,4,5,6,7,8,10,10,11,12,13,14,15]
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,1,2,0,4,5,6,4]
|
||||
; AVX2-NEXT: vpshufhw {{.*#+}} ymm1 = ymm1[0,1,2,3,6,7,4,7,8,9,10,11,14,15,12,15]
|
||||
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
|
||||
; AVX2-NEXT: retq
|
||||
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 undef, i32 4, i32 undef, i32 16, i32 18, i32 20, i32 undef, i32 undef, i32 undef, i32 12, i32 undef, i32 24, i32 26, i32 28, i32 undef>
|
||||
@ -3277,9 +3244,7 @@ define <16 x i16> @shuffle_v16i16_23_uu_03_uu_20_20_05_uu_31_uu_11_uu_28_28_13_u
|
||||
; AVX2-LABEL: shuffle_v16i16_23_uu_03_uu_20_20_05_uu_31_uu_11_uu_28_28_13_uu:
|
||||
; AVX2: # BB#0:
|
||||
; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4],ymm0[5,6],ymm1[7],ymm0[8,9,10,11],ymm1[12],ymm0[13,14],ymm1[15]
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,1,2,3,7,5,6,7]
|
||||
; AVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[1,1,3,3,4,5,6,7,9,9,11,11,12,13,14,15]
|
||||
; AVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,4,5,7,8,9,10,11,12,12,13,15]
|
||||
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[14,15,14,15,6,7,6,7,8,9,8,9,10,11,14,15,30,31,30,31,22,23,22,23,24,25,24,25,26,27,30,31]
|
||||
; AVX2-NEXT: retq
|
||||
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 23, i32 undef, i32 3, i32 undef, i32 20, i32 20, i32 5, i32 undef, i32 31, i32 undef, i32 11, i32 undef, i32 28, i32 28, i32 13, i32 undef>
|
||||
ret <16 x i16> %shuffle
|
||||
|
@ -985,9 +985,7 @@ define <32 x i8> @shuffle_v32i8_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_
|
||||
; AVX2: # BB#0:
|
||||
; AVX2-NEXT: vpxor %ymm2, %ymm2, %ymm2
|
||||
; AVX2-NEXT: vpshufb %ymm2, %ymm1, %ymm1
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,1,0,3,4,5,4,7]
|
||||
; AVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15]
|
||||
; AVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,4,4,4,8,9,10,11,12,12,12,12]
|
||||
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
|
||||
; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
|
||||
; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
|
||||
; AVX2-NEXT: retq
|
||||
|
Loading…
Reference in New Issue
Block a user