From d0491a6b5624e2a166ea48650f8df6d95aa06774 Mon Sep 17 00:00:00 2001 From: Ahmed Bougacha Date: Fri, 10 Feb 2017 19:51:47 +0000 Subject: [PATCH] [X86] Bitcast subvector before broadcasting it. Since r274013, we've been looking through bitcasts on broadcast inputs. In the scalar-folding case (from a load, build_vector, or sc2vec), the input type didn't matter, as we'd simply bitcast the resulting scalar back. However, when broadcasting a 128-bit-lane-aligned element, we create an EXTRACT_SUBVECTOR. Use proper types, by creating an extract_subvector of the original input type. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@294774 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 11 +++++- test/CodeGen/X86/vector-shuffle-256-v4.ll | 42 ++++++++++++++++++++++ test/CodeGen/X86/vector-shuffle-256-v8.ll | 18 ++++++++++ test/CodeGen/X86/vector-shuffle-512-v16.ll | 12 +++++++ test/CodeGen/X86/vector-shuffle-512-v8.ll | 18 ++++++++++ 5 files changed, 100 insertions(+), 1 deletion(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 0c97f8819bf..3bc7c5859b0 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -9687,7 +9687,16 @@ static SDValue lowerVectorShuffleAsBroadcast(const SDLoc &DL, MVT VT, if (((BroadcastIdx * EltSize) % 128) != 0) return SDValue(); - MVT ExtVT = MVT::getVectorVT(VT.getScalarType(), 128 / EltSize); + // The shuffle input might have been a bitcast we looked through; look at + // the original input vector. Emit an EXTRACT_SUBVECTOR of that type; we'll + // later bitcast it to BroadcastVT. + MVT SrcVT = V.getSimpleValueType(); + assert(SrcVT.getScalarSizeInBits() == BroadcastVT.getScalarSizeInBits() && + "Unexpected vector element size"); + assert(SrcVT.getVectorNumElements() == BroadcastVT.getVectorNumElements() && + "Unexpected vector num elements"); + + MVT ExtVT = MVT::getVectorVT(SrcVT.getScalarType(), 128 / EltSize); V = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtVT, V, DAG.getIntPtrConstant(BroadcastIdx, DL)); } diff --git a/test/CodeGen/X86/vector-shuffle-256-v4.ll b/test/CodeGen/X86/vector-shuffle-256-v4.ll index 1198514be5f..b03cf94dff0 100644 --- a/test/CodeGen/X86/vector-shuffle-256-v4.ll +++ b/test/CodeGen/X86/vector-shuffle-256-v4.ll @@ -129,6 +129,48 @@ define <4 x double> @shuffle_v4f64_2200(<4 x double> %a, <4 x double> %b) { ret <4 x double> %shuffle } +define <4 x double> @shuffle_v4f64_2222(<4 x double> %a, <4 x double> %b) { +; AVX1-LABEL: shuffle_v4f64_2222: +; AVX1: # BB#0: +; AVX1-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] +; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3] +; AVX1-NEXT: retq +; +; AVX2-LABEL: shuffle_v4f64_2222: +; AVX2: # BB#0: +; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,2,2,2] +; AVX2-NEXT: retq +; +; AVX512VL-LABEL: shuffle_v4f64_2222: +; AVX512VL: # BB#0: +; AVX512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,2,2,2] +; AVX512VL-NEXT: retq + %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> + ret <4 x double> %shuffle +} + +define <4 x double> @shuffle_v4f64_2222_bc(<4 x i64> %a, <4 x i64> %b) { +; AVX1-LABEL: shuffle_v4f64_2222_bc: +; AVX1: # BB#0: +; AVX1-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] +; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3] +; AVX1-NEXT: retq +; +; AVX2-LABEL: shuffle_v4f64_2222_bc: +; AVX2: # BB#0: +; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,2,2,2] +; AVX2-NEXT: retq +; +; AVX512VL-LABEL: shuffle_v4f64_2222_bc: +; AVX512VL: # BB#0: +; AVX512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,2,2,2] +; AVX512VL-NEXT: retq + %tmp0 = bitcast <4 x i64> %a to <4 x double> + %tmp1 = bitcast <4 x i64> %b to <4 x double> + %shuffle = shufflevector <4 x double> %tmp0, <4 x double> %tmp1, <4 x i32> + ret <4 x double> %shuffle +} + define <4 x double> @shuffle_v4f64_3330(<4 x double> %a, <4 x double> %b) { ; AVX1-LABEL: shuffle_v4f64_3330: ; AVX1: # BB#0: diff --git a/test/CodeGen/X86/vector-shuffle-256-v8.ll b/test/CodeGen/X86/vector-shuffle-256-v8.ll index cba15827d32..2f5b011a183 100644 --- a/test/CodeGen/X86/vector-shuffle-256-v8.ll +++ b/test/CodeGen/X86/vector-shuffle-256-v8.ll @@ -2048,6 +2048,24 @@ define <8 x i32> @shuffle_v8i32_44444444(<8 x i32> %a, <8 x i32> %b) { ret <8 x i32> %shuffle } +define <8 x i32> @shuffle_v8i32_44444444_bc(<8 x float> %a, <8 x float> %b) { +; AVX1-LABEL: shuffle_v8i32_44444444_bc: +; AVX1: # BB#0: +; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4] +; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3] +; AVX1-NEXT: retq +; +; AVX2OR512VL-LABEL: shuffle_v8i32_44444444_bc: +; AVX2OR512VL: # BB#0: +; AVX2OR512VL-NEXT: vextractf128 $1, %ymm0, %xmm0 +; AVX2OR512VL-NEXT: vbroadcastss %xmm0, %ymm0 +; AVX2OR512VL-NEXT: retq + %tmp0 = bitcast <8 x float> %a to <8 x i32> + %tmp1 = bitcast <8 x float> %b to <8 x i32> + %shuffle = shufflevector <8 x i32> %tmp0, <8 x i32> %tmp1, <8 x i32> + ret <8 x i32> %shuffle +} + define <8 x i32> @shuffle_v8i32_5555uuuu(<8 x i32> %a, <8 x i32> %b) { ; AVX1-LABEL: shuffle_v8i32_5555uuuu: ; AVX1: # BB#0: diff --git a/test/CodeGen/X86/vector-shuffle-512-v16.ll b/test/CodeGen/X86/vector-shuffle-512-v16.ll index 482f07bb0bb..69b080c0a7d 100644 --- a/test/CodeGen/X86/vector-shuffle-512-v16.ll +++ b/test/CodeGen/X86/vector-shuffle-512-v16.ll @@ -23,6 +23,18 @@ define <16 x float> @shuffle_v16f32_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08 ret <16 x float> %shuffle } +define <16 x float> @shuffle_v16f32_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_bc(<16 x i32> %a, <16 x i32> %b) { +; ALL-LABEL: shuffle_v16f32_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_bc: +; ALL: # BB#0: +; ALL-NEXT: vextracti32x4 $2, %zmm0, %xmm0 +; ALL-NEXT: vpbroadcastd %xmm0, %zmm0 +; ALL-NEXT: retq + %tmp0 = bitcast <16 x i32> %a to <16 x float> + %tmp1 = bitcast <16 x i32> %b to <16 x float> + %shuffle = shufflevector <16 x float> %tmp0, <16 x float> %tmp1, <16 x i32> + ret <16 x float> %shuffle +} + define <16 x float> @shuffle_v16f32_00_10_01_11_04_14_05_15_08_18_09_19_0c_1c_0d_1d(<16 x float> %a, <16 x float> %b) { ; ALL-LABEL: shuffle_v16f32_00_10_01_11_04_14_05_15_08_18_09_19_0c_1c_0d_1d: ; ALL: # BB#0: diff --git a/test/CodeGen/X86/vector-shuffle-512-v8.ll b/test/CodeGen/X86/vector-shuffle-512-v8.ll index a85e74b363b..f9b1041af08 100644 --- a/test/CodeGen/X86/vector-shuffle-512-v8.ll +++ b/test/CodeGen/X86/vector-shuffle-512-v8.ll @@ -48,6 +48,24 @@ define <8 x double> @shuffle_v8f64_44444444(<8 x double> %a, <8 x double> %b) { ret <8 x double> %shuffle } +define <8 x double> @shuffle_v8f64_44444444_bc(<8 x i64> %a, <8 x i64> %b) { +; AVX512F-LABEL: shuffle_v8f64_44444444_bc: +; AVX512F: # BB#0: +; AVX512F-NEXT: vextracti32x4 $2, %zmm0, %xmm0 +; AVX512F-NEXT: vpbroadcastq %xmm0, %zmm0 +; AVX512F-NEXT: retq +; +; AVX512F-32-LABEL: shuffle_v8f64_44444444_bc: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: vextracti32x4 $2, %zmm0, %xmm0 +; AVX512F-32-NEXT: vpbroadcastq %xmm0, %zmm0 +; AVX512F-32-NEXT: retl + %tmp0 = bitcast <8 x i64> %a to <8 x double> + %tmp1 = bitcast <8 x i64> %b to <8 x double> + %shuffle = shufflevector <8 x double> %tmp0, <8 x double> %tmp1, <8 x i32> + ret <8 x double> %shuffle +} + define <8 x double> @shuffle_v8f64_00000010(<8 x double> %a, <8 x double> %b) { ; AVX512F-LABEL: shuffle_v8f64_00000010: ; AVX512F: # BB#0: