[X86] Add an X86ISD::RANGES opcode to use for the scalar intrinsics.

This fixes a bug where we selected packed instructions for scalar intrinsics.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@317999 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Craig Topper 2017-11-12 18:51:09 +00:00
parent fc0c3a0fe7
commit 5d947c3894
6 changed files with 18 additions and 26 deletions

View File

@ -25050,8 +25050,9 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::VPERMI: return "X86ISD::VPERMI";
case X86ISD::VPTERNLOG: return "X86ISD::VPTERNLOG";
case X86ISD::VFIXUPIMM: return "X86ISD::VFIXUPIMM";
case X86ISD::VFIXUPIMMS: return "X86ISD::VFIXUPIMMS";
case X86ISD::VFIXUPIMMS: return "X86ISD::VFIXUPIMMS";
case X86ISD::VRANGE: return "X86ISD::VRANGE";
case X86ISD::VRANGES: return "X86ISD::VRANGES";
case X86ISD::PMULUDQ: return "X86ISD::PMULUDQ";
case X86ISD::PMULDQ: return "X86ISD::PMULDQ";
case X86ISD::PSADBW: return "X86ISD::PSADBW";

View File

@ -426,7 +426,7 @@ namespace llvm {
VFIXUPIMM,
VFIXUPIMMS,
// Range Restriction Calculation For Packed Pairs of Float32/64 values.
VRANGE,
VRANGE, VRANGES,
// Reduce - Perform Reduction Transformation on scalar\packed FP.
VREDUCE, VREDUCES,
// RndScale - Round FP Values To Include A Given Number Of Fraction Bits.

View File

@ -8814,10 +8814,10 @@ defm VRANGEPS : avx512_common_fp_sae_packed_imm<"vrangeps", avx512vl_f32_info,
AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
defm VRANGESD: avx512_common_fp_sae_scalar_imm<"vrangesd", f64x_info,
0x51, X86VRange, HasDQI>,
0x51, X86Ranges, HasDQI>,
AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
defm VRANGESS: avx512_common_fp_sae_scalar_imm<"vrangess", f32x_info,
0x51, X86VRange, HasDQI>,
0x51, X86Ranges, HasDQI>,
AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
defm VREDUCESD: avx512_common_fp_sae_scalar_imm<"vreducesd", f64x_info,

View File

@ -517,6 +517,7 @@ def X86rsqrt14s : SDNode<"X86ISD::RSQRT14S", SDTFPBinOp>;
def X86rcp14s : SDNode<"X86ISD::RCP14S", SDTFPBinOp>;
def X86rsqrt28s : SDNode<"X86ISD::RSQRT28S", SDTFPBinOpRound>;
def X86rcp28s : SDNode<"X86ISD::RCP28S", SDTFPBinOpRound>;
def X86Ranges : SDNode<"X86ISD::VRANGES", SDTFPBinOpImmRound>;
def X86RndScales : SDNode<"X86ISD::VRNDSCALES", SDTFPBinOpImmRound>;
def X86Reduces : SDNode<"X86ISD::VREDUCES", SDTFPBinOpImmRound>;
def X86GetMants : SDNode<"X86ISD::VGETMANTS", SDTFPBinOpImmRound>;

View File

@ -999,8 +999,8 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_mask_range_ps_128, INTR_TYPE_3OP_MASK_RM, X86ISD::VRANGE, 0),
X86_INTRINSIC_DATA(avx512_mask_range_ps_256, INTR_TYPE_3OP_MASK_RM, X86ISD::VRANGE, 0),
X86_INTRINSIC_DATA(avx512_mask_range_ps_512, INTR_TYPE_3OP_MASK_RM, X86ISD::VRANGE, 0),
X86_INTRINSIC_DATA(avx512_mask_range_sd, INTR_TYPE_SCALAR_MASK_RM, X86ISD::VRANGE, 0),
X86_INTRINSIC_DATA(avx512_mask_range_ss, INTR_TYPE_SCALAR_MASK_RM, X86ISD::VRANGE, 0),
X86_INTRINSIC_DATA(avx512_mask_range_sd, INTR_TYPE_SCALAR_MASK_RM, X86ISD::VRANGES, 0),
X86_INTRINSIC_DATA(avx512_mask_range_ss, INTR_TYPE_SCALAR_MASK_RM, X86ISD::VRANGES, 0),
X86_INTRINSIC_DATA(avx512_mask_reduce_pd_128, INTR_TYPE_2OP_MASK_RM, X86ISD::VREDUCE, 0),
X86_INTRINSIC_DATA(avx512_mask_reduce_pd_256, INTR_TYPE_2OP_MASK_RM, X86ISD::VREDUCE, 0),
X86_INTRINSIC_DATA(avx512_mask_reduce_pd_512, INTR_TYPE_2OP_MASK_RM, X86ISD::VREDUCE, 0),

View File

@ -293,7 +293,7 @@ define <4 x float>@test_int_x86_avx512_mask_range_ss(<4 x float> %x0, <4 x float
; AVX512DQVL-NEXT: vrangess $4, {sae}, %xmm1, %xmm0, %xmm2 {%k1}
; AVX512DQVL-NEXT: vrangess $4, {sae}, %xmm1, %xmm0, %xmm3
; AVX512DQVL-NEXT: vaddps %xmm3, %xmm2, %xmm2
; AVX512DQVL-NEXT: vrangeps $4, %xmm1, %xmm0, %xmm0
; AVX512DQVL-NEXT: vrangess $4, %xmm1, %xmm0, %xmm0
; AVX512DQVL-NEXT: vaddps %xmm2, %xmm0, %xmm0
; AVX512DQVL-NEXT: retq
%res = call <4 x float> @llvm.x86.avx512.mask.range.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4, i32 4, i32 8)
@ -323,25 +323,15 @@ define <2 x double>@test_int_x86_avx512_mask_reduce_sd(<2 x double> %x0, <2 x do
declare <2 x double> @llvm.x86.avx512.mask.range.sd(<2 x double>, <2 x double>,<2 x double>, i8, i32, i32)
define <2 x double>@test_int_x86_avx512_mask_range_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4) {
; AVX512DQ-LABEL: test_int_x86_avx512_mask_range_sd:
; AVX512DQ: ## BB#0:
; AVX512DQ-NEXT: vrangesd $4, %xmm1, %xmm0, %xmm3
; AVX512DQ-NEXT: kmovw %edi, %k1
; AVX512DQ-NEXT: vrangesd $4, %xmm1, %xmm0, %xmm2 {%k1}
; AVX512DQ-NEXT: vrangesd $4, {sae}, %xmm1, %xmm0, %xmm0
; AVX512DQ-NEXT: vaddpd %xmm0, %xmm2, %xmm0
; AVX512DQ-NEXT: vaddpd %xmm0, %xmm3, %xmm0
; AVX512DQ-NEXT: retq
;
; AVX512DQVL-LABEL: test_int_x86_avx512_mask_range_sd:
; AVX512DQVL: ## BB#0:
; AVX512DQVL-NEXT: vrangepd $4, %xmm1, %xmm0, %xmm3
; AVX512DQVL-NEXT: kmovw %edi, %k1
; AVX512DQVL-NEXT: vrangesd $4, %xmm1, %xmm0, %xmm2 {%k1}
; AVX512DQVL-NEXT: vrangesd $4, {sae}, %xmm1, %xmm0, %xmm0
; AVX512DQVL-NEXT: vaddpd %xmm0, %xmm2, %xmm0
; AVX512DQVL-NEXT: vaddpd %xmm0, %xmm3, %xmm0
; AVX512DQVL-NEXT: retq
; CHECK-LABEL: test_int_x86_avx512_mask_range_sd:
; CHECK: ## BB#0:
; CHECK-NEXT: vrangesd $4, %xmm1, %xmm0, %xmm3
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vrangesd $4, %xmm1, %xmm0, %xmm2 {%k1}
; CHECK-NEXT: vrangesd $4, {sae}, %xmm1, %xmm0, %xmm0
; CHECK-NEXT: vaddpd %xmm0, %xmm2, %xmm0
; CHECK-NEXT: vaddpd %xmm0, %xmm3, %xmm0
; CHECK-NEXT: retq
%res = call <2 x double> @llvm.x86.avx512.mask.range.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4, i32 4, i32 4)
%res1 = call <2 x double> @llvm.x86.avx512.mask.range.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 -1, i32 4, i32 8)
%res2 = call <2 x double> @llvm.x86.avx512.mask.range.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 -1, i32 4, i32 4)