diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 521008c50ca..b360cb8ccba 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -11827,6 +11827,17 @@ static SDValue lowerV32I16VectorShuffle(SDValue Op, SDValue V1, SDValue V2, DL, MVT::v32i16, V1, V2, Mask, Subtarget, DAG)) return Rotate; + if (isSingleInputShuffleMask(Mask)) { + SmallVector RepeatedMask; + if (is128BitLaneRepeatedShuffleMask(MVT::v32i16, Mask, RepeatedMask)) { + // As this is a single-input shuffle, the repeated mask should be + // a strictly valid v8i16 mask that we can pass through to the v8i16 + // lowering to handle even the v32 case. + return lowerV8I16GeneralSingleInputVectorShuffle( + DL, MVT::v32i16, V1, RepeatedMask, Subtarget, DAG); + } + } + return lowerVectorShuffleWithPERMV(DL, MVT::v32i16, Mask, V1, V2, DAG); } diff --git a/test/CodeGen/X86/vector-shuffle-512-v32.ll b/test/CodeGen/X86/vector-shuffle-512-v32.ll index f2279ef7e39..067c3f1f3ea 100644 --- a/test/CodeGen/X86/vector-shuffle-512-v32.ll +++ b/test/CodeGen/X86/vector-shuffle-512-v32.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; NOTE: Assertions have been autogenerated by update_llc_test_checks.py ; RUN: llc < %s -mcpu=x86-64 -mattr=+avx512f -mattr=+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512BW target triple = "x86_64-unknown-unknown" @@ -79,3 +79,31 @@ define <32 x i16> @shuffle_v32i16_z_0_z_2_z_4_z_6_z_8_z_10_z_12_z_14_z_16_z_18_z %c = shufflevector <32 x i16> %a, <32 x i16> zeroinitializer, <32 x i32> ret <32 x i16> %c } + +define <32 x i16> @shuffle_v32i16_1_1_0_0_4_5_6_7_9_9_8_8_12_13_14_15_17_17_16_16_20_21_22_23_25_25_24_24_28_29_30_31(<32 x i16> %a, <32 x i16> %b) { +; ALL-LABEL: shuffle_v32i16_1_1_0_0_4_5_6_7_9_9_8_8_12_13_14_15_17_17_16_16_20_21_22_23_25_25_24_24_28_29_30_31: +; ALL: # BB#0: +; ALL-NEXT: vpshuflw {{.*#+}} zmm0 = zmm0[1,1,0,0,4,5,6,7,9,9,8,8,12,13,14,15,17,17,16,16,20,21,22,23,25,25,24,24,28,29,30,31] +; ALL-NEXT: retq + %c = shufflevector <32 x i16> %a, <32 x i16> zeroinitializer, <32 x i32> + ret <32 x i16> %c +} + +define <32 x i16> @shuffle_v32i16_0_1_2_3_5_5_4_4_8_9_10_11_13_13_12_12_16_17_18_19_21_21_20_20_24_25_26_27_29_29_28_28(<32 x i16> %a, <32 x i16> %b) { +; ALL-LABEL: shuffle_v32i16_0_1_2_3_5_5_4_4_8_9_10_11_13_13_12_12_16_17_18_19_21_21_20_20_24_25_26_27_29_29_28_28: +; ALL: # BB#0: +; ALL-NEXT: vpshufhw {{.*#+}} zmm0 = zmm0[0,1,2,3,5,5,4,4,8,9,10,11,13,13,12,12,16,17,18,19,21,21,20,20,24,25,26,27,29,29,28,28] +; ALL-NEXT: retq + %c = shufflevector <32 x i16> %a, <32 x i16> zeroinitializer, <32 x i32> + ret <32 x i16> %c +} + +define <32 x i16> @shuffle_v32i16_1_1_0_0_5_5_4_4_9_9_11_11_13_13_12_12_17_17_19_19_21_21_20_20_25_25_27_27_29_29_28_28(<32 x i16> %a, <32 x i16> %b) { +; ALL-LABEL: shuffle_v32i16_1_1_0_0_5_5_4_4_9_9_11_11_13_13_12_12_17_17_19_19_21_21_20_20_25_25_27_27_29_29_28_28: +; ALL: # BB#0: +; ALL-NEXT: vpshuflw {{.*#+}} zmm0 = zmm0[1,1,0,0,4,5,6,7,9,9,8,8,12,13,14,15,17,17,16,16,20,21,22,23,25,25,24,24,28,29,30,31] +; ALL-NEXT: vpshufhw {{.*#+}} zmm0 = zmm0[0,1,2,3,5,5,4,4,8,9,10,11,13,13,12,12,16,17,18,19,21,21,20,20,24,25,26,27,29,29,28,28] +; ALL-NEXT: retq + %c = shufflevector <32 x i16> %a, <32 x i16> zeroinitializer, <32 x i32> + ret <32 x i16> %c +}