[x86] Use PALIGNR for v4i32 and v2i64 blends when appropriate.

There is no purpose in using it for single-input shuffles as
pshufd is just as fast and doesn't tie the two operands. This removes
a substantial amount of wrong-domain blend operations in SSSE3 mode. It
also completes the usage of PALIGNR for integer shuffles and addresses
one of the test cases Quentin hit with the new vector shuffle lowering.

There is still the question of whether and when to use this for floating
point shuffles. It is faster than shufps or shufpd but in the integer
domain. I don't yet really have a good heuristic here for when to use
this instruction for floating point vectors.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@218038 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Chandler Carruth 2014-09-18 09:00:25 +00:00
parent 088aa097d5
commit 72f0d9515e
5 changed files with 288 additions and 23 deletions

View File

@ -7439,6 +7439,12 @@ static SDValue lowerV2I64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
lowerVectorShuffleAsBlend(DL, MVT::v2i64, V1, V2, Mask, DAG))
return Blend;
// Try to use rotation instructions if available.
if (Subtarget->hasSSSE3())
if (SDValue Rotate = lowerVectorShuffleAsByteRotate(
DL, MVT::v2i64, V1, V2, Mask, DAG))
return Rotate;
// We implement this with SHUFPD which is pretty lame because it will likely
// incur 2 cycles of stall for integer vectors on Nehalem and older chips.
// However, all the alternatives are still more cycles and newer chips don't
@ -7732,6 +7738,12 @@ static SDValue lowerV4I32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
lowerVectorShuffleAsBlend(DL, MVT::v4i32, V1, V2, Mask, DAG))
return Blend;
// Try to use rotation instructions if available.
if (Subtarget->hasSSSE3())
if (SDValue Rotate = lowerVectorShuffleAsByteRotate(
DL, MVT::v4i32, V1, V2, Mask, DAG))
return Rotate;
// We implement this with SHUFPS because it can blend from two vectors.
// Because we're going to eventually use SHUFPS, we use SHUFPS even to build
// up the inputs, bypassing domain shift penalties that we would encur if we

View File

@ -222,17 +222,46 @@ define <2 x i64> @shuffle_v2i64_03_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64
ret <2 x i64> %shuffle
}
define <2 x i64> @shuffle_v2i64_12(<2 x i64> %a, <2 x i64> %b) {
; ALL-LABEL: @shuffle_v2i64_12
; ALL: shufpd {{.*}} # xmm0 = xmm0[1],xmm1[0]
; ALL-NEXT: retq
; SSE2-LABEL: @shuffle_v2i64_12
; SSE2: shufpd {{.*}} # xmm0 = xmm0[1],xmm1[0]
; SSE2-NEXT: retq
;
; SSE3-LABEL: @shuffle_v2i64_12
; SSE3: shufpd {{.*}} # xmm0 = xmm0[1],xmm1[0]
; SSE3-NEXT: retq
;
; SSSE3-LABEL: @shuffle_v2i64_12
; SSSE3: palignr {{.*}} # xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
; SSSE3-NEXT: movdqa %xmm1, %xmm0
; SSSE3-NEXT: retq
;
; SSE41-LABEL: @shuffle_v2i64_12
; SSE41: palignr {{.*}} # xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
; SSE41-NEXT: movdqa %xmm1, %xmm0
; SSE41-NEXT: retq
%shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 2>
ret <2 x i64> %shuffle
}
define <2 x i64> @shuffle_v2i64_12_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
; ALL-LABEL: @shuffle_v2i64_12_copy
; ALL: shufpd {{.*}} # xmm1 = xmm1[1],xmm2[0]
; ALL-NEXT: movapd %xmm1, %xmm0
; ALL-NEXT: retq
; SSE2-LABEL: @shuffle_v2i64_12_copy
; SSE2: shufpd {{.*}} # xmm1 = xmm1[1],xmm2[0]
; SSE2-NEXT: movapd %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSE3-LABEL: @shuffle_v2i64_12_copy
; SSE3: shufpd {{.*}} # xmm1 = xmm1[1],xmm2[0]
; SSE3-NEXT: movapd %xmm1, %xmm0
; SSE3-NEXT: retq
;
; SSSE3-LABEL: @shuffle_v2i64_12_copy
; SSSE3: palignr {{.*}} # xmm2 = xmm1[8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4,5,6,7]
; SSSE3-NEXT: movdqa %xmm2, %xmm0
; SSSE3-NEXT: retq
;
; SSE41-LABEL: @shuffle_v2i64_12_copy
; SSE41: palignr {{.*}} # xmm2 = xmm1[8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4,5,6,7]
; SSE41-NEXT: movdqa %xmm2, %xmm0
; SSE41-NEXT: retq
%shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 2>
ret <2 x i64> %shuffle
}
@ -314,18 +343,42 @@ define <2 x i64> @shuffle_v2i64_21_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64
ret <2 x i64> %shuffle
}
define <2 x i64> @shuffle_v2i64_30(<2 x i64> %a, <2 x i64> %b) {
; ALL-LABEL: @shuffle_v2i64_30
; ALL: shufpd {{.*}} # xmm1 = xmm1[1],xmm0[0]
; ALL-NEXT: movapd %xmm1, %xmm0
; ALL-NEXT: retq
; SSE2-LABEL: @shuffle_v2i64_30
; SSE2: shufpd {{.*}} # xmm1 = xmm1[1],xmm0[0]
; SSE2-NEXT: movapd %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSE3-LABEL: @shuffle_v2i64_30
; SSE3: shufpd {{.*}} # xmm1 = xmm1[1],xmm0[0]
; SSE3-NEXT: movapd %xmm1, %xmm0
; SSE3-NEXT: retq
;
; SSSE3-LABEL: @shuffle_v2i64_30
; SSSE3: palignr {{.*}} # xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
; SSSE3-NEXT: retq
%shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 0>
ret <2 x i64> %shuffle
}
define <2 x i64> @shuffle_v2i64_30_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
; ALL-LABEL: @shuffle_v2i64_30_copy
; ALL: shufpd {{.*}} # xmm2 = xmm2[1],xmm1[0]
; ALL-NEXT: movapd %xmm2, %xmm0
; ALL-NEXT: retq
; SSE2-LABEL: @shuffle_v2i64_30_copy
; SSE2: shufpd {{.*}} # xmm2 = xmm2[1],xmm1[0]
; SSE2-NEXT: movapd %xmm2, %xmm0
; SSE2-NEXT: retq
;
; SSE3-LABEL: @shuffle_v2i64_30_copy
; SSE3: shufpd {{.*}} # xmm2 = xmm2[1],xmm1[0]
; SSE3-NEXT: movapd %xmm2, %xmm0
; SSE3-NEXT: retq
;
; SSSE3-LABEL: @shuffle_v2i64_30_copy
; SSSE3: palignr {{.*}} # xmm1 = xmm2[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
; SSSE3-NEXT: movdqa %xmm1, %xmm0
; SSSE3-NEXT: retq
;
; SSE41-LABEL: @shuffle_v2i64_30_copy
; SSE41: palignr {{.*}} # xmm1 = xmm2[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
; SSE41-NEXT: movdqa %xmm1, %xmm0
; SSE41-NEXT: retq
%shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 0>
ret <2 x i64> %shuffle
}

View File

@ -571,3 +571,197 @@ define <4 x i32> @shuffle_v4i32_z6zz(i32 %i) {
%shuffle = shufflevector <4 x i32> zeroinitializer, <4 x i32> %a, <4 x i32> <i32 0, i32 6, i32 2, i32 3>
ret <4 x i32> %shuffle
}
define <4 x i32> @shuffle_v4i32_7012(<4 x i32> %a, <4 x i32> %b) {
; SSE2-LABEL: @shuffle_v4i32_7012
; SSE2: # BB#0:
; SSE2-NEXT: shufps {{.*}} # xmm1 = xmm1[3,0],xmm0[0,0]
; SSE2-NEXT: shufps {{.*}} # xmm1 = xmm1[0,2],xmm0[1,2]
; SSE2-NEXT: movaps %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSE3-LABEL: @shuffle_v4i32_7012
; SSE3: # BB#0:
; SSE3-NEXT: shufps {{.*}} # xmm1 = xmm1[3,0],xmm0[0,0]
; SSE3-NEXT: shufps {{.*}} # xmm1 = xmm1[0,2],xmm0[1,2]
; SSE3-NEXT: movaps %xmm1, %xmm0
; SSE3-NEXT: retq
;
; SSSE3-LABEL: @shuffle_v4i32_7012
; SSSE3: # BB#0:
; SSSE3-NEXT: palignr $12, {{.*}} # xmm0 = xmm1[12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: @shuffle_v4i32_7012
; SSE41: # BB#0:
; SSE41-NEXT: palignr $12, {{.*}} # xmm0 = xmm1[12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11]
; SSE41-NEXT: retq
;
; AVX1-LABEL: @shuffle_v4i32_7012
; AVX1: # BB#0:
; AVX1-NEXT: vpalignr $12, {{.*}} # xmm0 = xmm1[12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11]
; AVX1-NEXT: retq
%shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 7, i32 0, i32 1, i32 2>
ret <4 x i32> %shuffle
}
define <4 x i32> @shuffle_v4i32_6701(<4 x i32> %a, <4 x i32> %b) {
; SSE2-LABEL: @shuffle_v4i32_6701
; SSE2: # BB#0:
; SSE2-NEXT: shufpd {{.*}} # xmm1 = xmm1[1],xmm0[0]
; SSE2-NEXT: movapd %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSE3-LABEL: @shuffle_v4i32_6701
; SSE3: # BB#0:
; SSE3-NEXT: shufpd {{.*}} # xmm1 = xmm1[1],xmm0[0]
; SSE3-NEXT: movapd %xmm1, %xmm0
; SSE3-NEXT: retq
;
; SSSE3-LABEL: @shuffle_v4i32_6701
; SSSE3: # BB#0:
; SSSE3-NEXT: palignr $8, {{.*}} # xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: @shuffle_v4i32_6701
; SSE41: # BB#0:
; SSE41-NEXT: palignr $8, {{.*}} # xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
; SSE41-NEXT: retq
;
; AVX1-LABEL: @shuffle_v4i32_6701
; AVX1: # BB#0:
; AVX1-NEXT: vpalignr $8, {{.*}} # xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
; AVX1-NEXT: retq
%shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 6, i32 7, i32 0, i32 1>
ret <4 x i32> %shuffle
}
define <4 x i32> @shuffle_v4i32_5670(<4 x i32> %a, <4 x i32> %b) {
; SSE2-LABEL: @shuffle_v4i32_5670
; SSE2: # BB#0:
; SSE2-NEXT: shufps {{.*}} # xmm0 = xmm0[0,0],xmm1[3,0]
; SSE2-NEXT: shufps {{.*}} # xmm1 = xmm1[1,2],xmm0[2,0]
; SSE2-NEXT: movaps %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSE3-LABEL: @shuffle_v4i32_5670
; SSE3: # BB#0:
; SSE3-NEXT: shufps {{.*}} # xmm0 = xmm0[0,0],xmm1[3,0]
; SSE3-NEXT: shufps {{.*}} # xmm1 = xmm1[1,2],xmm0[2,0]
; SSE3-NEXT: movaps %xmm1, %xmm0
; SSE3-NEXT: retq
;
; SSSE3-LABEL: @shuffle_v4i32_5670
; SSSE3: # BB#0:
; SSSE3-NEXT: palignr $4, {{.*}} # xmm0 = xmm1[4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: @shuffle_v4i32_5670
; SSE41: # BB#0:
; SSE41-NEXT: palignr $4, {{.*}} # xmm0 = xmm1[4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3]
; SSE41-NEXT: retq
;
; AVX1-LABEL: @shuffle_v4i32_5670
; AVX1: # BB#0:
; AVX1-NEXT: vpalignr $4, {{.*}} # xmm0 = xmm1[4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3]
; AVX1-NEXT: retq
%shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 5, i32 6, i32 7, i32 0>
ret <4 x i32> %shuffle
}
define <4 x i32> @shuffle_v4i32_1234(<4 x i32> %a, <4 x i32> %b) {
; SSE2-LABEL: @shuffle_v4i32_1234
; SSE2: # BB#0:
; SSE2-NEXT: shufps {{.*}} # xmm1 = xmm1[0,0],xmm0[3,0]
; SSE2-NEXT: shufps {{.*}} # xmm0 = xmm0[1,2],xmm1[2,0]
; SSE2-NEXT: retq
;
; SSE3-LABEL: @shuffle_v4i32_1234
; SSE3: # BB#0:
; SSE3-NEXT: shufps {{.*}} # xmm1 = xmm1[0,0],xmm0[3,0]
; SSE3-NEXT: shufps {{.*}} # xmm0 = xmm0[1,2],xmm1[2,0]
; SSE3-NEXT: retq
;
; SSSE3-LABEL: @shuffle_v4i32_1234
; SSSE3: # BB#0:
; SSSE3-NEXT: palignr $4, {{.*}} # xmm1 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3]
; SSSE3-NEXT: movdqa %xmm1, %xmm0
; SSSE3-NEXT: retq
;
; SSE41-LABEL: @shuffle_v4i32_1234
; SSE41: # BB#0:
; SSE41-NEXT: palignr $4, {{.*}} # xmm1 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3]
; SSE41-NEXT: movdqa %xmm1, %xmm0
; SSE41-NEXT: retq
;
; AVX1-LABEL: @shuffle_v4i32_1234
; AVX1: # BB#0:
; AVX1-NEXT: vpalignr $4, {{.*}} # xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3]
; AVX1-NEXT: retq
%shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
ret <4 x i32> %shuffle
}
define <4 x i32> @shuffle_v4i32_2345(<4 x i32> %a, <4 x i32> %b) {
; SSE2-LABEL: @shuffle_v4i32_2345
; SSE2: # BB#0:
; SSE2-NEXT: shufpd {{.*}} # xmm0 = xmm0[1],xmm1[0]
; SSE2-NEXT: retq
;
; SSE3-LABEL: @shuffle_v4i32_2345
; SSE3: # BB#0:
; SSE3-NEXT: shufpd {{.*}} # xmm0 = xmm0[1],xmm1[0]
; SSE3-NEXT: retq
;
; SSSE3-LABEL: @shuffle_v4i32_2345
; SSSE3: # BB#0:
; SSSE3-NEXT: palignr $8, {{.*}} # xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
; SSSE3-NEXT: movdqa %xmm1, %xmm0
; SSSE3-NEXT: retq
;
; SSE41-LABEL: @shuffle_v4i32_2345
; SSE41: # BB#0:
; SSE41-NEXT: palignr $8, {{.*}} # xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
; SSE41-NEXT: movdqa %xmm1, %xmm0
; SSE41-NEXT: retq
;
; AVX1-LABEL: @shuffle_v4i32_2345
; AVX1: # BB#0:
; AVX1-NEXT: vpalignr $8, {{.*}} # xmm0 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
; AVX1-NEXT: retq
%shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
ret <4 x i32> %shuffle
}
define <4 x i32> @shuffle_v4i32_3456(<4 x i32> %a, <4 x i32> %b) {
; SSE2-LABEL: @shuffle_v4i32_3456
; SSE2: # BB#0:
; SSE2-NEXT: shufps {{.*}} # xmm0 = xmm0[3,0],xmm1[0,0]
; SSE2-NEXT: shufps {{.*}} # xmm0 = xmm0[0,2],xmm1[1,2]
; SSE2-NEXT: retq
;
; SSE3-LABEL: @shuffle_v4i32_3456
; SSE3: # BB#0:
; SSE3-NEXT: shufps {{.*}} # xmm0 = xmm0[3,0],xmm1[0,0]
; SSE3-NEXT: shufps {{.*}} # xmm0 = xmm0[0,2],xmm1[1,2]
; SSE3-NEXT: retq
;
; SSSE3-LABEL: @shuffle_v4i32_3456
; SSSE3: # BB#0:
; SSSE3-NEXT: palignr $12, {{.*}} # xmm1 = xmm0[12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11]
; SSSE3-NEXT: movdqa %xmm1, %xmm0
; SSSE3-NEXT: retq
;
; SSE41-LABEL: @shuffle_v4i32_3456
; SSE41: # BB#0:
; SSE41-NEXT: palignr $12, {{.*}} # xmm1 = xmm0[12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11]
; SSE41-NEXT: movdqa %xmm1, %xmm0
; SSE41-NEXT: retq
;
; AVX1-LABEL: @shuffle_v4i32_3456
; AVX1: # BB#0:
; AVX1-NEXT: vpalignr $12, {{.*}} # xmm0 = xmm0[12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11]
; AVX1-NEXT: retq
%shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 3, i32 4, i32 5, i32 6>
ret <4 x i32> %shuffle
}

View File

@ -21,10 +21,16 @@ define <8 x i16> @shuffle_v8i16_67452301(<8 x i16> %a, <8 x i16> %b) {
ret <8 x i16> %shuffle
}
define <8 x i16> @shuffle_v8i16_456789AB(<8 x i16> %a, <8 x i16> %b) {
; ALL-LABEL: @shuffle_v8i16_456789AB
; ALL: # BB#0:
; ALL: shufpd {{.*}} # xmm0 = xmm0[1],xmm1[0]
; ALL-NEXT: retq
; SSE2-LABEL: @shuffle_v8i16_456789AB
; SSE2: # BB#0:
; SSE2-NEXT: shufpd {{.*}} # xmm0 = xmm0[1],xmm1[0]
; SSE2-NEXT: retq
;
; SSSE3-LABEL: @shuffle_v8i16_456789AB
; SSSE3: # BB#0:
; SSSE3-NEXT: palignr {{.*}} # xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
; SSSE3-NEXT: movdqa %xmm1, %xmm0
; SSSE3-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
ret <8 x i16> %shuffle
}

View File

@ -29,7 +29,7 @@ define <4 x i64> @shuffle_v4i64_0112(<4 x i64> %a, <4 x i64> %b) {
; AVX1-LABEL: @shuffle_v4i64_0112
; AVX1: # BB#0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX1-NEXT: vshufpd {{.*}} # xmm1 = xmm0[1],xmm1[0]
; AVX1-NEXT: vpalignr {{.*}} # xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: retq
%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 1, i32 1, i32 2>
@ -75,7 +75,7 @@ define <4 x i64> @shuffle_v4i64_3330(<4 x i64> %a, <4 x i64> %b) {
; AVX1-LABEL: @shuffle_v4i64_3330
; AVX1: # BB#0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX1-NEXT: vshufpd {{.*}} # xmm0 = xmm1[1],xmm0[0]
; AVX1-NEXT: vpalignr {{.*}} # xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
; AVX1-NEXT: vpshufd {{.*}} # xmm1 = xmm1[2,3,2,3]
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq
@ -303,7 +303,7 @@ define <4 x i64> @shuffle_v4i64_0412(<4 x i64> %a, <4 x i64> %b) {
; AVX1-LABEL: @shuffle_v4i64_0412
; AVX1: # BB#0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; AVX1-NEXT: vshufpd {{.*}} # xmm2 = xmm0[1],xmm2[0]
; AVX1-NEXT: vpalignr {{.*}} # xmm2 = xmm0[8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4,5,6,7]
; AVX1-NEXT: vpshufd {{.*}} # xmm1 = xmm1[0,1,0,1]
; AVX1-NEXT: vpblendw {{.*}} # xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
@ -315,7 +315,7 @@ define <4 x i64> @shuffle_v4i64_4012(<4 x i64> %a, <4 x i64> %b) {
; AVX1-LABEL: @shuffle_v4i64_4012
; AVX1: # BB#0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; AVX1-NEXT: vshufpd {{.*}} # xmm2 = xmm0[1],xmm2[0]
; AVX1-NEXT: vpalignr {{.*}} # xmm2 = xmm0[8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4,5,6,7]
; AVX1-NEXT: vpshufd {{.*}} # xmm0 = xmm0[0,1,0,1]
; AVX1-NEXT: vpblendw {{.*}} # xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0