mirror of
https://github.com/RPCS3/llvm.git
synced 2025-03-04 16:47:41 +00:00
[X86] Add an X86ISD::RANGES opcode to use for the scalar intrinsics.
This fixes a bug where we selected packed instructions for scalar intrinsics. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@317999 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
fc0c3a0fe7
commit
5d947c3894
@ -25050,8 +25050,9 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
|
||||
case X86ISD::VPERMI: return "X86ISD::VPERMI";
|
||||
case X86ISD::VPTERNLOG: return "X86ISD::VPTERNLOG";
|
||||
case X86ISD::VFIXUPIMM: return "X86ISD::VFIXUPIMM";
|
||||
case X86ISD::VFIXUPIMMS: return "X86ISD::VFIXUPIMMS";
|
||||
case X86ISD::VFIXUPIMMS: return "X86ISD::VFIXUPIMMS";
|
||||
case X86ISD::VRANGE: return "X86ISD::VRANGE";
|
||||
case X86ISD::VRANGES: return "X86ISD::VRANGES";
|
||||
case X86ISD::PMULUDQ: return "X86ISD::PMULUDQ";
|
||||
case X86ISD::PMULDQ: return "X86ISD::PMULDQ";
|
||||
case X86ISD::PSADBW: return "X86ISD::PSADBW";
|
||||
|
@ -426,7 +426,7 @@ namespace llvm {
|
||||
VFIXUPIMM,
|
||||
VFIXUPIMMS,
|
||||
// Range Restriction Calculation For Packed Pairs of Float32/64 values.
|
||||
VRANGE,
|
||||
VRANGE, VRANGES,
|
||||
// Reduce - Perform Reduction Transformation on scalar\packed FP.
|
||||
VREDUCE, VREDUCES,
|
||||
// RndScale - Round FP Values To Include A Given Number Of Fraction Bits.
|
||||
|
@ -8814,10 +8814,10 @@ defm VRANGEPS : avx512_common_fp_sae_packed_imm<"vrangeps", avx512vl_f32_info,
|
||||
AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
|
||||
|
||||
defm VRANGESD: avx512_common_fp_sae_scalar_imm<"vrangesd", f64x_info,
|
||||
0x51, X86VRange, HasDQI>,
|
||||
0x51, X86Ranges, HasDQI>,
|
||||
AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
|
||||
defm VRANGESS: avx512_common_fp_sae_scalar_imm<"vrangess", f32x_info,
|
||||
0x51, X86VRange, HasDQI>,
|
||||
0x51, X86Ranges, HasDQI>,
|
||||
AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
|
||||
|
||||
defm VREDUCESD: avx512_common_fp_sae_scalar_imm<"vreducesd", f64x_info,
|
||||
|
@ -517,6 +517,7 @@ def X86rsqrt14s : SDNode<"X86ISD::RSQRT14S", SDTFPBinOp>;
|
||||
def X86rcp14s : SDNode<"X86ISD::RCP14S", SDTFPBinOp>;
|
||||
def X86rsqrt28s : SDNode<"X86ISD::RSQRT28S", SDTFPBinOpRound>;
|
||||
def X86rcp28s : SDNode<"X86ISD::RCP28S", SDTFPBinOpRound>;
|
||||
def X86Ranges : SDNode<"X86ISD::VRANGES", SDTFPBinOpImmRound>;
|
||||
def X86RndScales : SDNode<"X86ISD::VRNDSCALES", SDTFPBinOpImmRound>;
|
||||
def X86Reduces : SDNode<"X86ISD::VREDUCES", SDTFPBinOpImmRound>;
|
||||
def X86GetMants : SDNode<"X86ISD::VGETMANTS", SDTFPBinOpImmRound>;
|
||||
|
@ -999,8 +999,8 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
|
||||
X86_INTRINSIC_DATA(avx512_mask_range_ps_128, INTR_TYPE_3OP_MASK_RM, X86ISD::VRANGE, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_range_ps_256, INTR_TYPE_3OP_MASK_RM, X86ISD::VRANGE, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_range_ps_512, INTR_TYPE_3OP_MASK_RM, X86ISD::VRANGE, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_range_sd, INTR_TYPE_SCALAR_MASK_RM, X86ISD::VRANGE, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_range_ss, INTR_TYPE_SCALAR_MASK_RM, X86ISD::VRANGE, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_range_sd, INTR_TYPE_SCALAR_MASK_RM, X86ISD::VRANGES, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_range_ss, INTR_TYPE_SCALAR_MASK_RM, X86ISD::VRANGES, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_reduce_pd_128, INTR_TYPE_2OP_MASK_RM, X86ISD::VREDUCE, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_reduce_pd_256, INTR_TYPE_2OP_MASK_RM, X86ISD::VREDUCE, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_reduce_pd_512, INTR_TYPE_2OP_MASK_RM, X86ISD::VREDUCE, 0),
|
||||
|
@ -293,7 +293,7 @@ define <4 x float>@test_int_x86_avx512_mask_range_ss(<4 x float> %x0, <4 x float
|
||||
; AVX512DQVL-NEXT: vrangess $4, {sae}, %xmm1, %xmm0, %xmm2 {%k1}
|
||||
; AVX512DQVL-NEXT: vrangess $4, {sae}, %xmm1, %xmm0, %xmm3
|
||||
; AVX512DQVL-NEXT: vaddps %xmm3, %xmm2, %xmm2
|
||||
; AVX512DQVL-NEXT: vrangeps $4, %xmm1, %xmm0, %xmm0
|
||||
; AVX512DQVL-NEXT: vrangess $4, %xmm1, %xmm0, %xmm0
|
||||
; AVX512DQVL-NEXT: vaddps %xmm2, %xmm0, %xmm0
|
||||
; AVX512DQVL-NEXT: retq
|
||||
%res = call <4 x float> @llvm.x86.avx512.mask.range.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4, i32 4, i32 8)
|
||||
@ -323,25 +323,15 @@ define <2 x double>@test_int_x86_avx512_mask_reduce_sd(<2 x double> %x0, <2 x do
|
||||
declare <2 x double> @llvm.x86.avx512.mask.range.sd(<2 x double>, <2 x double>,<2 x double>, i8, i32, i32)
|
||||
|
||||
define <2 x double>@test_int_x86_avx512_mask_range_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4) {
|
||||
; AVX512DQ-LABEL: test_int_x86_avx512_mask_range_sd:
|
||||
; AVX512DQ: ## BB#0:
|
||||
; AVX512DQ-NEXT: vrangesd $4, %xmm1, %xmm0, %xmm3
|
||||
; AVX512DQ-NEXT: kmovw %edi, %k1
|
||||
; AVX512DQ-NEXT: vrangesd $4, %xmm1, %xmm0, %xmm2 {%k1}
|
||||
; AVX512DQ-NEXT: vrangesd $4, {sae}, %xmm1, %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vaddpd %xmm0, %xmm2, %xmm0
|
||||
; AVX512DQ-NEXT: vaddpd %xmm0, %xmm3, %xmm0
|
||||
; AVX512DQ-NEXT: retq
|
||||
;
|
||||
; AVX512DQVL-LABEL: test_int_x86_avx512_mask_range_sd:
|
||||
; AVX512DQVL: ## BB#0:
|
||||
; AVX512DQVL-NEXT: vrangepd $4, %xmm1, %xmm0, %xmm3
|
||||
; AVX512DQVL-NEXT: kmovw %edi, %k1
|
||||
; AVX512DQVL-NEXT: vrangesd $4, %xmm1, %xmm0, %xmm2 {%k1}
|
||||
; AVX512DQVL-NEXT: vrangesd $4, {sae}, %xmm1, %xmm0, %xmm0
|
||||
; AVX512DQVL-NEXT: vaddpd %xmm0, %xmm2, %xmm0
|
||||
; AVX512DQVL-NEXT: vaddpd %xmm0, %xmm3, %xmm0
|
||||
; AVX512DQVL-NEXT: retq
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_range_sd:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vrangesd $4, %xmm1, %xmm0, %xmm3
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vrangesd $4, %xmm1, %xmm0, %xmm2 {%k1}
|
||||
; CHECK-NEXT: vrangesd $4, {sae}, %xmm1, %xmm0, %xmm0
|
||||
; CHECK-NEXT: vaddpd %xmm0, %xmm2, %xmm0
|
||||
; CHECK-NEXT: vaddpd %xmm0, %xmm3, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <2 x double> @llvm.x86.avx512.mask.range.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4, i32 4, i32 4)
|
||||
%res1 = call <2 x double> @llvm.x86.avx512.mask.range.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 -1, i32 4, i32 8)
|
||||
%res2 = call <2 x double> @llvm.x86.avx512.mask.range.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 -1, i32 4, i32 4)
|
||||
|
Loading…
x
Reference in New Issue
Block a user