diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 8721d8ffe50..8343a856eda 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -7439,6 +7439,12 @@ static SDValue lowerV2I64VectorShuffle(SDValue Op, SDValue V1, SDValue V2, lowerVectorShuffleAsBlend(DL, MVT::v2i64, V1, V2, Mask, DAG)) return Blend; + // Try to use rotation instructions if available. + if (Subtarget->hasSSSE3()) + if (SDValue Rotate = lowerVectorShuffleAsByteRotate( + DL, MVT::v2i64, V1, V2, Mask, DAG)) + return Rotate; + // We implement this with SHUFPD which is pretty lame because it will likely // incur 2 cycles of stall for integer vectors on Nehalem and older chips. // However, all the alternatives are still more cycles and newer chips don't @@ -7732,6 +7738,12 @@ static SDValue lowerV4I32VectorShuffle(SDValue Op, SDValue V1, SDValue V2, lowerVectorShuffleAsBlend(DL, MVT::v4i32, V1, V2, Mask, DAG)) return Blend; + // Try to use rotation instructions if available. + if (Subtarget->hasSSSE3()) + if (SDValue Rotate = lowerVectorShuffleAsByteRotate( + DL, MVT::v4i32, V1, V2, Mask, DAG)) + return Rotate; + // We implement this with SHUFPS because it can blend from two vectors. // Because we're going to eventually use SHUFPS, we use SHUFPS even to build // up the inputs, bypassing domain shift penalties that we would encur if we diff --git a/test/CodeGen/X86/vector-shuffle-128-v2.ll b/test/CodeGen/X86/vector-shuffle-128-v2.ll index 621e9a7db80..06673936586 100644 --- a/test/CodeGen/X86/vector-shuffle-128-v2.ll +++ b/test/CodeGen/X86/vector-shuffle-128-v2.ll @@ -222,17 +222,46 @@ define <2 x i64> @shuffle_v2i64_03_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64 ret <2 x i64> %shuffle } define <2 x i64> @shuffle_v2i64_12(<2 x i64> %a, <2 x i64> %b) { -; ALL-LABEL: @shuffle_v2i64_12 -; ALL: shufpd {{.*}} # xmm0 = xmm0[1],xmm1[0] -; ALL-NEXT: retq +; SSE2-LABEL: @shuffle_v2i64_12 +; SSE2: shufpd {{.*}} # xmm0 = xmm0[1],xmm1[0] +; SSE2-NEXT: retq +; +; SSE3-LABEL: @shuffle_v2i64_12 +; SSE3: shufpd {{.*}} # xmm0 = xmm0[1],xmm1[0] +; SSE3-NEXT: retq +; +; SSSE3-LABEL: @shuffle_v2i64_12 +; SSSE3: palignr {{.*}} # xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7] +; SSSE3-NEXT: movdqa %xmm1, %xmm0 +; SSSE3-NEXT: retq +; +; SSE41-LABEL: @shuffle_v2i64_12 +; SSE41: palignr {{.*}} # xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7] +; SSE41-NEXT: movdqa %xmm1, %xmm0 +; SSE41-NEXT: retq %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> ret <2 x i64> %shuffle } define <2 x i64> @shuffle_v2i64_12_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) { -; ALL-LABEL: @shuffle_v2i64_12_copy -; ALL: shufpd {{.*}} # xmm1 = xmm1[1],xmm2[0] -; ALL-NEXT: movapd %xmm1, %xmm0 -; ALL-NEXT: retq +; SSE2-LABEL: @shuffle_v2i64_12_copy +; SSE2: shufpd {{.*}} # xmm1 = xmm1[1],xmm2[0] +; SSE2-NEXT: movapd %xmm1, %xmm0 +; SSE2-NEXT: retq +; +; SSE3-LABEL: @shuffle_v2i64_12_copy +; SSE3: shufpd {{.*}} # xmm1 = xmm1[1],xmm2[0] +; SSE3-NEXT: movapd %xmm1, %xmm0 +; SSE3-NEXT: retq +; +; SSSE3-LABEL: @shuffle_v2i64_12_copy +; SSSE3: palignr {{.*}} # xmm2 = xmm1[8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4,5,6,7] +; SSSE3-NEXT: movdqa %xmm2, %xmm0 +; SSSE3-NEXT: retq +; +; SSE41-LABEL: @shuffle_v2i64_12_copy +; SSE41: palignr {{.*}} # xmm2 = xmm1[8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4,5,6,7] +; SSE41-NEXT: movdqa %xmm2, %xmm0 +; SSE41-NEXT: retq %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> ret <2 x i64> %shuffle } @@ -314,18 +343,42 @@ define <2 x i64> @shuffle_v2i64_21_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64 ret <2 x i64> %shuffle } define <2 x i64> @shuffle_v2i64_30(<2 x i64> %a, <2 x i64> %b) { -; ALL-LABEL: @shuffle_v2i64_30 -; ALL: shufpd {{.*}} # xmm1 = xmm1[1],xmm0[0] -; ALL-NEXT: movapd %xmm1, %xmm0 -; ALL-NEXT: retq +; SSE2-LABEL: @shuffle_v2i64_30 +; SSE2: shufpd {{.*}} # xmm1 = xmm1[1],xmm0[0] +; SSE2-NEXT: movapd %xmm1, %xmm0 +; SSE2-NEXT: retq +; +; SSE3-LABEL: @shuffle_v2i64_30 +; SSE3: shufpd {{.*}} # xmm1 = xmm1[1],xmm0[0] +; SSE3-NEXT: movapd %xmm1, %xmm0 +; SSE3-NEXT: retq +; +; SSSE3-LABEL: @shuffle_v2i64_30 +; SSSE3: palignr {{.*}} # xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7] +; SSSE3-NEXT: retq %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> ret <2 x i64> %shuffle } define <2 x i64> @shuffle_v2i64_30_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) { -; ALL-LABEL: @shuffle_v2i64_30_copy -; ALL: shufpd {{.*}} # xmm2 = xmm2[1],xmm1[0] -; ALL-NEXT: movapd %xmm2, %xmm0 -; ALL-NEXT: retq +; SSE2-LABEL: @shuffle_v2i64_30_copy +; SSE2: shufpd {{.*}} # xmm2 = xmm2[1],xmm1[0] +; SSE2-NEXT: movapd %xmm2, %xmm0 +; SSE2-NEXT: retq +; +; SSE3-LABEL: @shuffle_v2i64_30_copy +; SSE3: shufpd {{.*}} # xmm2 = xmm2[1],xmm1[0] +; SSE3-NEXT: movapd %xmm2, %xmm0 +; SSE3-NEXT: retq +; +; SSSE3-LABEL: @shuffle_v2i64_30_copy +; SSSE3: palignr {{.*}} # xmm1 = xmm2[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7] +; SSSE3-NEXT: movdqa %xmm1, %xmm0 +; SSSE3-NEXT: retq +; +; SSE41-LABEL: @shuffle_v2i64_30_copy +; SSE41: palignr {{.*}} # xmm1 = xmm2[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7] +; SSE41-NEXT: movdqa %xmm1, %xmm0 +; SSE41-NEXT: retq %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> ret <2 x i64> %shuffle } diff --git a/test/CodeGen/X86/vector-shuffle-128-v4.ll b/test/CodeGen/X86/vector-shuffle-128-v4.ll index 31205c93f5a..077780416dc 100644 --- a/test/CodeGen/X86/vector-shuffle-128-v4.ll +++ b/test/CodeGen/X86/vector-shuffle-128-v4.ll @@ -571,3 +571,197 @@ define <4 x i32> @shuffle_v4i32_z6zz(i32 %i) { %shuffle = shufflevector <4 x i32> zeroinitializer, <4 x i32> %a, <4 x i32> ret <4 x i32> %shuffle } + +define <4 x i32> @shuffle_v4i32_7012(<4 x i32> %a, <4 x i32> %b) { +; SSE2-LABEL: @shuffle_v4i32_7012 +; SSE2: # BB#0: +; SSE2-NEXT: shufps {{.*}} # xmm1 = xmm1[3,0],xmm0[0,0] +; SSE2-NEXT: shufps {{.*}} # xmm1 = xmm1[0,2],xmm0[1,2] +; SSE2-NEXT: movaps %xmm1, %xmm0 +; SSE2-NEXT: retq +; +; SSE3-LABEL: @shuffle_v4i32_7012 +; SSE3: # BB#0: +; SSE3-NEXT: shufps {{.*}} # xmm1 = xmm1[3,0],xmm0[0,0] +; SSE3-NEXT: shufps {{.*}} # xmm1 = xmm1[0,2],xmm0[1,2] +; SSE3-NEXT: movaps %xmm1, %xmm0 +; SSE3-NEXT: retq +; +; SSSE3-LABEL: @shuffle_v4i32_7012 +; SSSE3: # BB#0: +; SSSE3-NEXT: palignr $12, {{.*}} # xmm0 = xmm1[12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11] +; SSSE3-NEXT: retq +; +; SSE41-LABEL: @shuffle_v4i32_7012 +; SSE41: # BB#0: +; SSE41-NEXT: palignr $12, {{.*}} # xmm0 = xmm1[12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11] +; SSE41-NEXT: retq +; +; AVX1-LABEL: @shuffle_v4i32_7012 +; AVX1: # BB#0: +; AVX1-NEXT: vpalignr $12, {{.*}} # xmm0 = xmm1[12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11] +; AVX1-NEXT: retq + %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %shuffle +} + +define <4 x i32> @shuffle_v4i32_6701(<4 x i32> %a, <4 x i32> %b) { +; SSE2-LABEL: @shuffle_v4i32_6701 +; SSE2: # BB#0: +; SSE2-NEXT: shufpd {{.*}} # xmm1 = xmm1[1],xmm0[0] +; SSE2-NEXT: movapd %xmm1, %xmm0 +; SSE2-NEXT: retq +; +; SSE3-LABEL: @shuffle_v4i32_6701 +; SSE3: # BB#0: +; SSE3-NEXT: shufpd {{.*}} # xmm1 = xmm1[1],xmm0[0] +; SSE3-NEXT: movapd %xmm1, %xmm0 +; SSE3-NEXT: retq +; +; SSSE3-LABEL: @shuffle_v4i32_6701 +; SSSE3: # BB#0: +; SSSE3-NEXT: palignr $8, {{.*}} # xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7] +; SSSE3-NEXT: retq +; +; SSE41-LABEL: @shuffle_v4i32_6701 +; SSE41: # BB#0: +; SSE41-NEXT: palignr $8, {{.*}} # xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7] +; SSE41-NEXT: retq +; +; AVX1-LABEL: @shuffle_v4i32_6701 +; AVX1: # BB#0: +; AVX1-NEXT: vpalignr $8, {{.*}} # xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7] +; AVX1-NEXT: retq + %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %shuffle +} + +define <4 x i32> @shuffle_v4i32_5670(<4 x i32> %a, <4 x i32> %b) { +; SSE2-LABEL: @shuffle_v4i32_5670 +; SSE2: # BB#0: +; SSE2-NEXT: shufps {{.*}} # xmm0 = xmm0[0,0],xmm1[3,0] +; SSE2-NEXT: shufps {{.*}} # xmm1 = xmm1[1,2],xmm0[2,0] +; SSE2-NEXT: movaps %xmm1, %xmm0 +; SSE2-NEXT: retq +; +; SSE3-LABEL: @shuffle_v4i32_5670 +; SSE3: # BB#0: +; SSE3-NEXT: shufps {{.*}} # xmm0 = xmm0[0,0],xmm1[3,0] +; SSE3-NEXT: shufps {{.*}} # xmm1 = xmm1[1,2],xmm0[2,0] +; SSE3-NEXT: movaps %xmm1, %xmm0 +; SSE3-NEXT: retq +; +; SSSE3-LABEL: @shuffle_v4i32_5670 +; SSSE3: # BB#0: +; SSSE3-NEXT: palignr $4, {{.*}} # xmm0 = xmm1[4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3] +; SSSE3-NEXT: retq +; +; SSE41-LABEL: @shuffle_v4i32_5670 +; SSE41: # BB#0: +; SSE41-NEXT: palignr $4, {{.*}} # xmm0 = xmm1[4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3] +; SSE41-NEXT: retq +; +; AVX1-LABEL: @shuffle_v4i32_5670 +; AVX1: # BB#0: +; AVX1-NEXT: vpalignr $4, {{.*}} # xmm0 = xmm1[4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3] +; AVX1-NEXT: retq + %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %shuffle +} + +define <4 x i32> @shuffle_v4i32_1234(<4 x i32> %a, <4 x i32> %b) { +; SSE2-LABEL: @shuffle_v4i32_1234 +; SSE2: # BB#0: +; SSE2-NEXT: shufps {{.*}} # xmm1 = xmm1[0,0],xmm0[3,0] +; SSE2-NEXT: shufps {{.*}} # xmm0 = xmm0[1,2],xmm1[2,0] +; SSE2-NEXT: retq +; +; SSE3-LABEL: @shuffle_v4i32_1234 +; SSE3: # BB#0: +; SSE3-NEXT: shufps {{.*}} # xmm1 = xmm1[0,0],xmm0[3,0] +; SSE3-NEXT: shufps {{.*}} # xmm0 = xmm0[1,2],xmm1[2,0] +; SSE3-NEXT: retq +; +; SSSE3-LABEL: @shuffle_v4i32_1234 +; SSSE3: # BB#0: +; SSSE3-NEXT: palignr $4, {{.*}} # xmm1 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3] +; SSSE3-NEXT: movdqa %xmm1, %xmm0 +; SSSE3-NEXT: retq +; +; SSE41-LABEL: @shuffle_v4i32_1234 +; SSE41: # BB#0: +; SSE41-NEXT: palignr $4, {{.*}} # xmm1 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3] +; SSE41-NEXT: movdqa %xmm1, %xmm0 +; SSE41-NEXT: retq +; +; AVX1-LABEL: @shuffle_v4i32_1234 +; AVX1: # BB#0: +; AVX1-NEXT: vpalignr $4, {{.*}} # xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3] +; AVX1-NEXT: retq + %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %shuffle +} + +define <4 x i32> @shuffle_v4i32_2345(<4 x i32> %a, <4 x i32> %b) { +; SSE2-LABEL: @shuffle_v4i32_2345 +; SSE2: # BB#0: +; SSE2-NEXT: shufpd {{.*}} # xmm0 = xmm0[1],xmm1[0] +; SSE2-NEXT: retq +; +; SSE3-LABEL: @shuffle_v4i32_2345 +; SSE3: # BB#0: +; SSE3-NEXT: shufpd {{.*}} # xmm0 = xmm0[1],xmm1[0] +; SSE3-NEXT: retq +; +; SSSE3-LABEL: @shuffle_v4i32_2345 +; SSSE3: # BB#0: +; SSSE3-NEXT: palignr $8, {{.*}} # xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7] +; SSSE3-NEXT: movdqa %xmm1, %xmm0 +; SSSE3-NEXT: retq +; +; SSE41-LABEL: @shuffle_v4i32_2345 +; SSE41: # BB#0: +; SSE41-NEXT: palignr $8, {{.*}} # xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7] +; SSE41-NEXT: movdqa %xmm1, %xmm0 +; SSE41-NEXT: retq +; +; AVX1-LABEL: @shuffle_v4i32_2345 +; AVX1: # BB#0: +; AVX1-NEXT: vpalignr $8, {{.*}} # xmm0 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7] +; AVX1-NEXT: retq + %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %shuffle +} + +define <4 x i32> @shuffle_v4i32_3456(<4 x i32> %a, <4 x i32> %b) { +; SSE2-LABEL: @shuffle_v4i32_3456 +; SSE2: # BB#0: +; SSE2-NEXT: shufps {{.*}} # xmm0 = xmm0[3,0],xmm1[0,0] +; SSE2-NEXT: shufps {{.*}} # xmm0 = xmm0[0,2],xmm1[1,2] +; SSE2-NEXT: retq +; +; SSE3-LABEL: @shuffle_v4i32_3456 +; SSE3: # BB#0: +; SSE3-NEXT: shufps {{.*}} # xmm0 = xmm0[3,0],xmm1[0,0] +; SSE3-NEXT: shufps {{.*}} # xmm0 = xmm0[0,2],xmm1[1,2] +; SSE3-NEXT: retq +; +; SSSE3-LABEL: @shuffle_v4i32_3456 +; SSSE3: # BB#0: +; SSSE3-NEXT: palignr $12, {{.*}} # xmm1 = xmm0[12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11] +; SSSE3-NEXT: movdqa %xmm1, %xmm0 +; SSSE3-NEXT: retq +; +; SSE41-LABEL: @shuffle_v4i32_3456 +; SSE41: # BB#0: +; SSE41-NEXT: palignr $12, {{.*}} # xmm1 = xmm0[12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11] +; SSE41-NEXT: movdqa %xmm1, %xmm0 +; SSE41-NEXT: retq +; +; AVX1-LABEL: @shuffle_v4i32_3456 +; AVX1: # BB#0: +; AVX1-NEXT: vpalignr $12, {{.*}} # xmm0 = xmm0[12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11] +; AVX1-NEXT: retq + %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %shuffle +} diff --git a/test/CodeGen/X86/vector-shuffle-128-v8.ll b/test/CodeGen/X86/vector-shuffle-128-v8.ll index 92a2282f0ed..b61e282404c 100644 --- a/test/CodeGen/X86/vector-shuffle-128-v8.ll +++ b/test/CodeGen/X86/vector-shuffle-128-v8.ll @@ -21,10 +21,16 @@ define <8 x i16> @shuffle_v8i16_67452301(<8 x i16> %a, <8 x i16> %b) { ret <8 x i16> %shuffle } define <8 x i16> @shuffle_v8i16_456789AB(<8 x i16> %a, <8 x i16> %b) { -; ALL-LABEL: @shuffle_v8i16_456789AB -; ALL: # BB#0: -; ALL: shufpd {{.*}} # xmm0 = xmm0[1],xmm1[0] -; ALL-NEXT: retq +; SSE2-LABEL: @shuffle_v8i16_456789AB +; SSE2: # BB#0: +; SSE2-NEXT: shufpd {{.*}} # xmm0 = xmm0[1],xmm1[0] +; SSE2-NEXT: retq +; +; SSSE3-LABEL: @shuffle_v8i16_456789AB +; SSSE3: # BB#0: +; SSSE3-NEXT: palignr {{.*}} # xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7] +; SSSE3-NEXT: movdqa %xmm1, %xmm0 +; SSSE3-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } diff --git a/test/CodeGen/X86/vector-shuffle-256-v4.ll b/test/CodeGen/X86/vector-shuffle-256-v4.ll index 757ef8bf176..982542b59b2 100644 --- a/test/CodeGen/X86/vector-shuffle-256-v4.ll +++ b/test/CodeGen/X86/vector-shuffle-256-v4.ll @@ -29,7 +29,7 @@ define <4 x i64> @shuffle_v4i64_0112(<4 x i64> %a, <4 x i64> %b) { ; AVX1-LABEL: @shuffle_v4i64_0112 ; AVX1: # BB#0: ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX1-NEXT: vshufpd {{.*}} # xmm1 = xmm0[1],xmm1[0] +; AVX1-NEXT: vpalignr {{.*}} # xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7] ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> @@ -75,7 +75,7 @@ define <4 x i64> @shuffle_v4i64_3330(<4 x i64> %a, <4 x i64> %b) { ; AVX1-LABEL: @shuffle_v4i64_3330 ; AVX1: # BB#0: ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX1-NEXT: vshufpd {{.*}} # xmm0 = xmm1[1],xmm0[0] +; AVX1-NEXT: vpalignr {{.*}} # xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7] ; AVX1-NEXT: vpshufd {{.*}} # xmm1 = xmm1[2,3,2,3] ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq @@ -303,7 +303,7 @@ define <4 x i64> @shuffle_v4i64_0412(<4 x i64> %a, <4 x i64> %b) { ; AVX1-LABEL: @shuffle_v4i64_0412 ; AVX1: # BB#0: ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 -; AVX1-NEXT: vshufpd {{.*}} # xmm2 = xmm0[1],xmm2[0] +; AVX1-NEXT: vpalignr {{.*}} # xmm2 = xmm0[8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4,5,6,7] ; AVX1-NEXT: vpshufd {{.*}} # xmm1 = xmm1[0,1,0,1] ; AVX1-NEXT: vpblendw {{.*}} # xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 @@ -315,7 +315,7 @@ define <4 x i64> @shuffle_v4i64_4012(<4 x i64> %a, <4 x i64> %b) { ; AVX1-LABEL: @shuffle_v4i64_4012 ; AVX1: # BB#0: ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 -; AVX1-NEXT: vshufpd {{.*}} # xmm2 = xmm0[1],xmm2[0] +; AVX1-NEXT: vpalignr {{.*}} # xmm2 = xmm0[8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4,5,6,7] ; AVX1-NEXT: vpshufd {{.*}} # xmm0 = xmm0[0,1,0,1] ; AVX1-NEXT: vpblendw {{.*}} # xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0