mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-29 06:10:48 +00:00
[X86][AVX2] Prefer VPERMQ/VPERMPD over VINSERTI128/VINSERTF128 for unary shuffles
Using VPERMQ/VPERMPD allows memory folding of the (repeated) input where VINSERTI128/VINSERTF128 can not. Differential Revision: http://reviews.llvm.org/D19228 llvm-svn: 266728
This commit is contained in:
parent
36a326a5dc
commit
e1392bc92c
@ -10586,6 +10586,10 @@ static SDValue lowerV2X128VectorShuffle(SDLoc DL, MVT VT, SDValue V1,
|
||||
// subvector.
|
||||
bool OnlyUsesV1 = isShuffleEquivalent(V1, V2, Mask, {0, 1, 0, 1});
|
||||
if (OnlyUsesV1 || isShuffleEquivalent(V1, V2, Mask, {0, 1, 4, 5})) {
|
||||
// With AVX2 we should use VPERMQ/VPERMPD to allow memory folding.
|
||||
if (Subtarget.hasAVX2() && isSingleInputShuffleMask(Mask))
|
||||
return SDValue();
|
||||
|
||||
MVT SubVT = MVT::getVectorVT(VT.getVectorElementType(),
|
||||
VT.getVectorNumElements() / 2);
|
||||
SDValue LoV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, V1,
|
||||
@ -11038,8 +11042,9 @@ static SDValue lowerV4F64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
|
||||
|
||||
SmallVector<int, 4> WidenedMask;
|
||||
if (canWidenShuffleElements(Mask, WidenedMask))
|
||||
return lowerV2X128VectorShuffle(DL, MVT::v4f64, V1, V2, Mask, Subtarget,
|
||||
DAG);
|
||||
if (SDValue V = lowerV2X128VectorShuffle(DL, MVT::v4f64, V1, V2, Mask,
|
||||
Subtarget, DAG))
|
||||
return V;
|
||||
|
||||
if (isSingleInputShuffleMask(Mask)) {
|
||||
// Check for being able to broadcast a single element.
|
||||
@ -11133,8 +11138,9 @@ static SDValue lowerV4I64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
|
||||
|
||||
SmallVector<int, 4> WidenedMask;
|
||||
if (canWidenShuffleElements(Mask, WidenedMask))
|
||||
return lowerV2X128VectorShuffle(DL, MVT::v4i64, V1, V2, Mask, Subtarget,
|
||||
DAG);
|
||||
if (SDValue V = lowerV2X128VectorShuffle(DL, MVT::v4i64, V1, V2, Mask,
|
||||
Subtarget, DAG))
|
||||
return V;
|
||||
|
||||
if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v4i64, V1, V2, Mask,
|
||||
Subtarget, DAG))
|
||||
|
@ -35,21 +35,31 @@ entry:
|
||||
}
|
||||
|
||||
define <8 x float> @shuffle_v8f32_01230123(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
|
||||
; ALL-LABEL: shuffle_v8f32_01230123:
|
||||
; ALL: ## BB#0: ## %entry
|
||||
; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
|
||||
; ALL-NEXT: retq
|
||||
; AVX1-LABEL: shuffle_v8f32_01230123:
|
||||
; AVX1: ## BB#0: ## %entry
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: shuffle_v8f32_01230123:
|
||||
; AVX2: ## BB#0: ## %entry
|
||||
; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,0,1]
|
||||
; AVX2-NEXT: retq
|
||||
entry:
|
||||
%shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
|
||||
ret <8 x float> %shuffle
|
||||
}
|
||||
|
||||
define <8 x float> @shuffle_v8f32_01230123_mem(<8 x float>* %pa, <8 x float>* %pb) nounwind uwtable readnone ssp {
|
||||
; ALL-LABEL: shuffle_v8f32_01230123_mem:
|
||||
; ALL: ## BB#0: ## %entry
|
||||
; ALL-NEXT: vmovaps (%rdi), %ymm0
|
||||
; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
|
||||
; ALL-NEXT: retq
|
||||
; AVX1-LABEL: shuffle_v8f32_01230123_mem:
|
||||
; AVX1: ## BB#0: ## %entry
|
||||
; AVX1-NEXT: vmovaps (%rdi), %ymm0
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: shuffle_v8f32_01230123_mem:
|
||||
; AVX2: ## BB#0: ## %entry
|
||||
; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = mem[0,1,0,1]
|
||||
; AVX2-NEXT: retq
|
||||
entry:
|
||||
%a = load <8 x float>, <8 x float>* %pa
|
||||
%b = load <8 x float>, <8 x float>* %pb
|
||||
|
@ -455,7 +455,7 @@ define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_01_00_00_00_00_00_00_00_01_0
|
||||
; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_01_00_00_00_00_00_00_00_01_00:
|
||||
; AVX2: # BB#0:
|
||||
; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,2,3,0,1]
|
||||
; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
|
||||
; AVX2-NEXT: retq
|
||||
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0>
|
||||
ret <16 x i16> %shuffle
|
||||
@ -471,7 +471,7 @@ define <16 x i16> @shuffle_v16i16_00_00_00_00_00_02_00_00_00_00_00_00_00_02_00_0
|
||||
; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_02_00_00_00_00_00_00_00_02_00_00:
|
||||
; AVX2: # BB#0:
|
||||
; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1]
|
||||
; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
|
||||
; AVX2-NEXT: retq
|
||||
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0>
|
||||
ret <16 x i16> %shuffle
|
||||
@ -487,7 +487,7 @@ define <16 x i16> @shuffle_v16i16_00_00_00_00_03_00_00_00_00_00_00_00_03_00_00_0
|
||||
; AVX2-LABEL: shuffle_v16i16_00_00_00_00_03_00_00_00_00_00_00_00_03_00_00_00:
|
||||
; AVX2: # BB#0:
|
||||
; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1]
|
||||
; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
|
||||
; AVX2-NEXT: retq
|
||||
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0>
|
||||
ret <16 x i16> %shuffle
|
||||
@ -503,7 +503,7 @@ define <16 x i16> @shuffle_v16i16_00_00_00_04_00_00_00_00_00_00_00_04_00_00_00_0
|
||||
; AVX2-LABEL: shuffle_v16i16_00_00_00_04_00_00_00_00_00_00_00_04_00_00_00_00:
|
||||
; AVX2: # BB#0:
|
||||
; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1]
|
||||
; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
|
||||
; AVX2-NEXT: retq
|
||||
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0>
|
||||
ret <16 x i16> %shuffle
|
||||
@ -519,7 +519,7 @@ define <16 x i16> @shuffle_v16i16_00_00_05_00_00_00_00_00_00_00_05_00_00_00_00_0
|
||||
; AVX2-LABEL: shuffle_v16i16_00_00_05_00_00_00_00_00_00_00_05_00_00_00_00_00:
|
||||
; AVX2: # BB#0:
|
||||
; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1]
|
||||
; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
|
||||
; AVX2-NEXT: retq
|
||||
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0>
|
||||
ret <16 x i16> %shuffle
|
||||
@ -535,7 +535,7 @@ define <16 x i16> @shuffle_v16i16_00_06_00_00_00_00_00_00_00_06_00_00_00_00_00_0
|
||||
; AVX2-LABEL: shuffle_v16i16_00_06_00_00_00_00_00_00_00_06_00_00_00_00_00_00:
|
||||
; AVX2: # BB#0:
|
||||
; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1]
|
||||
; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
|
||||
; AVX2-NEXT: retq
|
||||
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
||||
ret <16 x i16> %shuffle
|
||||
@ -551,7 +551,7 @@ define <16 x i16> @shuffle_v16i16_07_00_00_00_00_00_00_00_07_00_00_00_00_00_00_0
|
||||
; AVX2-LABEL: shuffle_v16i16_07_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00:
|
||||
; AVX2: # BB#0:
|
||||
; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
|
||||
; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
|
||||
; AVX2-NEXT: retq
|
||||
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
||||
ret <16 x i16> %shuffle
|
||||
|
@ -818,7 +818,7 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00_
|
||||
; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00:
|
||||
; AVX2: # BB#0:
|
||||
; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0]
|
||||
; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
|
||||
; AVX2-NEXT: retq
|
||||
%shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0>
|
||||
ret <32 x i8> %shuffle
|
||||
@ -834,7 +834,7 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00_
|
||||
; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00:
|
||||
; AVX2: # BB#0:
|
||||
; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0]
|
||||
; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
|
||||
; AVX2-NEXT: retq
|
||||
%shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0>
|
||||
ret <32 x i8> %shuffle
|
||||
@ -850,7 +850,7 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00_
|
||||
; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00:
|
||||
; AVX2: # BB#0:
|
||||
; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0]
|
||||
; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
|
||||
; AVX2-NEXT: retq
|
||||
%shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
||||
ret <32 x i8> %shuffle
|
||||
@ -866,7 +866,7 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00_
|
||||
; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00:
|
||||
; AVX2: # BB#0:
|
||||
; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0]
|
||||
; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
|
||||
; AVX2-NEXT: retq
|
||||
%shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
||||
ret <32 x i8> %shuffle
|
||||
@ -882,7 +882,7 @@ define <32 x i8> @shuffle_v32i8_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00_
|
||||
; AVX2-LABEL: shuffle_v32i8_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
|
||||
; AVX2: # BB#0:
|
||||
; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
|
||||
; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
|
||||
; AVX2-NEXT: retq
|
||||
%shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 14, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 14, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
||||
ret <32 x i8> %shuffle
|
||||
@ -902,7 +902,7 @@ define <32 x i8> @shuffle_v32i8_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_
|
||||
; AVX2-NEXT: movl $15, %eax
|
||||
; AVX2-NEXT: vmovd %eax, %xmm1
|
||||
; AVX2-NEXT: vpshufb %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
|
||||
; AVX2-NEXT: retq
|
||||
%shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
||||
ret <32 x i8> %shuffle
|
||||
|
@ -849,15 +849,15 @@ define <4 x i64> @shuffle_v4i64_0451(<4 x i64> %a, <4 x i64> %b) {
|
||||
;
|
||||
; AVX2-LABEL: shuffle_v4i64_0451:
|
||||
; AVX2: # BB#0:
|
||||
; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,0,1,3]
|
||||
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,2,1]
|
||||
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5],ymm0[6,7]
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: shuffle_v4i64_0451:
|
||||
; AVX512VL: # BB#0:
|
||||
; AVX512VL-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,0,1,3]
|
||||
; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,2,1]
|
||||
; AVX512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5],ymm0[6,7]
|
||||
; AVX512VL-NEXT: retq
|
||||
%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 4, i32 5, i32 1>
|
||||
@ -893,14 +893,14 @@ define <4 x i64> @shuffle_v4i64_4015(<4 x i64> %a, <4 x i64> %b) {
|
||||
;
|
||||
; AVX2-LABEL: shuffle_v4i64_4015:
|
||||
; AVX2: # BB#0:
|
||||
; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm1, %ymm1
|
||||
; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,1,2,1]
|
||||
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,3]
|
||||
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5],ymm1[6,7]
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: shuffle_v4i64_4015:
|
||||
; AVX512VL: # BB#0:
|
||||
; AVX512VL-NEXT: vinserti32x4 $1, %xmm1, %ymm1, %ymm1
|
||||
; AVX512VL-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,1,2,1]
|
||||
; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,3]
|
||||
; AVX512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5],ymm1[6,7]
|
||||
; AVX512VL-NEXT: retq
|
||||
|
@ -679,11 +679,17 @@ define <8 x float> @shuffle_v8f32_f511235a(<8 x float> %a, <8 x float> %b) {
|
||||
}
|
||||
|
||||
define <8 x float> @shuffle_v8f32_32103210(<8 x float> %a, <8 x float> %b) {
|
||||
; ALL-LABEL: shuffle_v8f32_32103210:
|
||||
; ALL: # BB#0:
|
||||
; ALL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0]
|
||||
; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
|
||||
; ALL-NEXT: retq
|
||||
; AVX1-LABEL: shuffle_v8f32_32103210:
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0]
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: shuffle_v8f32_32103210:
|
||||
; AVX2: # BB#0:
|
||||
; AVX2-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0]
|
||||
; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,0,1]
|
||||
; AVX2-NEXT: retq
|
||||
%shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 3, i32 2, i32 1, i32 0>
|
||||
ret <8 x float> %shuffle
|
||||
}
|
||||
@ -1764,7 +1770,7 @@ define <8 x i32> @shuffle_v8i32_32103210(<8 x i32> %a, <8 x i32> %b) {
|
||||
; AVX2-LABEL: shuffle_v8i32_32103210:
|
||||
; AVX2: # BB#0:
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,2,1,0]
|
||||
; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
|
||||
; AVX2-NEXT: retq
|
||||
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 3, i32 2, i32 1, i32 0>
|
||||
ret <8 x i32> %shuffle
|
||||
|
Loading…
Reference in New Issue
Block a user