mirror of
https://github.com/RPCSX/llvm.git
synced 2025-01-24 21:25:41 +00:00
AVX-512: Added mask and rounding mode for scalar arithmetics
Added more tests for scalar instructions to destinguish between AVX and AVX-512 forms. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@230891 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
aac6cc3360
commit
975e9b99aa
@ -3243,28 +3243,95 @@ defm VPANDN : avx512_binop_rm_vl_dq<0xDF, 0xDF, "vpandn", X86andnp,
|
||||
//===----------------------------------------------------------------------===//
|
||||
// AVX-512 FP arithmetic
|
||||
//===----------------------------------------------------------------------===//
|
||||
multiclass avx512_fp_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
|
||||
SDNode OpNode, SDNode VecNode, OpndItins itins,
|
||||
bit IsCommutable> {
|
||||
|
||||
multiclass avx512_binop_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
||||
SizeItins itins> {
|
||||
defm SSZ : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "ss"), OpNode, FR32X,
|
||||
f32mem, itins.s, 0>, XS, EVEX_4V, VEX_LIG,
|
||||
EVEX_CD8<32, CD8VT1>;
|
||||
defm SDZ : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "sd"), OpNode, FR64X,
|
||||
f64mem, itins.d, 0>, XD, VEX_W, EVEX_4V, VEX_LIG,
|
||||
EVEX_CD8<64, CD8VT1>;
|
||||
defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
|
||||
(ins _.RC:$src1, _.RC:$src2), OpcodeStr,
|
||||
"$src2, $src1", "$src1, $src2",
|
||||
(VecNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
|
||||
(i32 FROUND_CURRENT)),
|
||||
"", itins.rr, IsCommutable>;
|
||||
|
||||
defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
|
||||
(ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
|
||||
"$src2, $src1", "$src1, $src2",
|
||||
(VecNode (_.VT _.RC:$src1),
|
||||
(_.VT (scalar_to_vector (_.ScalarLdFrag addr:$src2))),
|
||||
(i32 FROUND_CURRENT)),
|
||||
"", itins.rm, IsCommutable>;
|
||||
let isCodeGenOnly = 1, isCommutable = IsCommutable,
|
||||
Predicates = [HasAVX512] in {
|
||||
def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
|
||||
(ins _.FRC:$src1, _.FRC:$src2),
|
||||
OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
|
||||
[(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))],
|
||||
itins.rr>;
|
||||
def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
|
||||
(ins _.FRC:$src1, _.ScalarMemOp:$src2),
|
||||
OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
|
||||
[(set _.FRC:$dst, (OpNode _.FRC:$src1,
|
||||
(_.ScalarLdFrag addr:$src2)))], itins.rr>;
|
||||
}
|
||||
}
|
||||
|
||||
let isCommutable = 1 in {
|
||||
defm VADD : avx512_binop_s<0x58, "add", fadd, SSE_ALU_ITINS_S>;
|
||||
defm VMUL : avx512_binop_s<0x59, "mul", fmul, SSE_ALU_ITINS_S>;
|
||||
defm VMIN : avx512_binop_s<0x5D, "min", X86fmin, SSE_ALU_ITINS_S>;
|
||||
defm VMAX : avx512_binop_s<0x5F, "max", X86fmax, SSE_ALU_ITINS_S>;
|
||||
multiclass avx512_fp_scalar_round<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
|
||||
SDNode VecNode, OpndItins itins, bit IsCommutable> {
|
||||
|
||||
defm rrb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
|
||||
(ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
|
||||
"$rc, $src2, $src1", "$src1, $src2, $rc",
|
||||
(VecNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
|
||||
(i32 imm:$rc)), "", itins.rr, IsCommutable>,
|
||||
EVEX_B, EVEX_RC;
|
||||
}
|
||||
let isCommutable = 0 in {
|
||||
defm VSUB : avx512_binop_s<0x5C, "sub", fsub, SSE_ALU_ITINS_S>;
|
||||
defm VDIV : avx512_binop_s<0x5E, "div", fdiv, SSE_ALU_ITINS_S>;
|
||||
multiclass avx512_fp_scalar_sae<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
|
||||
SDNode VecNode, OpndItins itins, bit IsCommutable> {
|
||||
|
||||
defm rrb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
|
||||
(ins _.RC:$src1, _.RC:$src2), OpcodeStr,
|
||||
"$src2, $src1", "$src1, $src2",
|
||||
(VecNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
|
||||
(i32 FROUND_NO_EXC)), "{sae}">, EVEX_B;
|
||||
}
|
||||
|
||||
multiclass avx512_binop_s_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
||||
SDNode VecNode,
|
||||
SizeItins itins, bit IsCommutable> {
|
||||
defm SSZ : avx512_fp_scalar<opc, OpcodeStr#"ss", f32x_info, OpNode, VecNode,
|
||||
itins.s, IsCommutable>,
|
||||
avx512_fp_scalar_round<opc, OpcodeStr#"ss", f32x_info, VecNode,
|
||||
itins.s, IsCommutable>,
|
||||
XS, EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>;
|
||||
defm SDZ : avx512_fp_scalar<opc, OpcodeStr#"sd", f64x_info, OpNode, VecNode,
|
||||
itins.d, IsCommutable>,
|
||||
avx512_fp_scalar_round<opc, OpcodeStr#"sd", f64x_info, VecNode,
|
||||
itins.d, IsCommutable>,
|
||||
XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>;
|
||||
}
|
||||
|
||||
multiclass avx512_binop_s_sae<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
||||
SDNode VecNode,
|
||||
SizeItins itins, bit IsCommutable> {
|
||||
defm SSZ : avx512_fp_scalar<opc, OpcodeStr#"ss", f32x_info, OpNode, VecNode,
|
||||
itins.s, IsCommutable>,
|
||||
avx512_fp_scalar_sae<opc, OpcodeStr#"ss", f32x_info, VecNode,
|
||||
itins.s, IsCommutable>,
|
||||
XS, EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>;
|
||||
defm SDZ : avx512_fp_scalar<opc, OpcodeStr#"sd", f64x_info, OpNode, VecNode,
|
||||
itins.d, IsCommutable>,
|
||||
avx512_fp_scalar_sae<opc, OpcodeStr#"sd", f64x_info, VecNode,
|
||||
itins.d, IsCommutable>,
|
||||
XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>;
|
||||
}
|
||||
defm VADD : avx512_binop_s_round<0x58, "vadd", fadd, X86faddRnd, SSE_ALU_ITINS_S, 1>;
|
||||
defm VMUL : avx512_binop_s_round<0x59, "vmul", fmul, X86fmulRnd, SSE_ALU_ITINS_S, 1>;
|
||||
defm VSUB : avx512_binop_s_round<0x5C, "vsub", fsub, X86fsubRnd, SSE_ALU_ITINS_S, 0>;
|
||||
defm VDIV : avx512_binop_s_round<0x5E, "vdiv", fdiv, X86fdivRnd, SSE_ALU_ITINS_S, 0>;
|
||||
defm VMIN : avx512_binop_s_sae <0x5D, "vmin", X86fmin, X86fminRnd, SSE_ALU_ITINS_S, 1>;
|
||||
defm VMAX : avx512_binop_s_sae <0x5F, "vmax", X86fmax, X86fmaxRnd, SSE_ALU_ITINS_S, 1>;
|
||||
|
||||
multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
||||
X86VectorVTInfo _, bit IsCommutable> {
|
||||
defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
|
||||
|
@ -282,6 +282,8 @@ def X86faddRnd : SDNode<"X86ISD::FADD_RND", SDTFPBinOpRound>;
|
||||
def X86fsubRnd : SDNode<"X86ISD::FSUB_RND", SDTFPBinOpRound>;
|
||||
def X86fmulRnd : SDNode<"X86ISD::FMUL_RND", SDTFPBinOpRound>;
|
||||
def X86fdivRnd : SDNode<"X86ISD::FDIV_RND", SDTFPBinOpRound>;
|
||||
def X86fmaxRnd : SDNode<"X86ISD::FMAX", SDTFPBinOpRound>;
|
||||
def X86fminRnd : SDNode<"X86ISD::FMIN", SDTFPBinOpRound>;
|
||||
|
||||
def X86Fmadd : SDNode<"X86ISD::FMADD", SDTFma>;
|
||||
def X86Fnmadd : SDNode<"X86ISD::FNMADD", SDTFma>;
|
||||
|
@ -3567,7 +3567,7 @@ multiclass sse1_fp_unop_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
||||
defm V#NAME#SS : avx_fp_unop_s<opc, "v"#OpcodeStr##ss, FR32, v4f32, f32,
|
||||
f32mem, ssmem, sse_load_f32,
|
||||
!cast<Intrinsic>("int_x86_sse_"##OpcodeStr##_ss), OpNode,
|
||||
itins, HasAVX, "SS">, XS, VEX_4V, VEX_LIG;
|
||||
itins, UseAVX, "SS">, XS, VEX_4V, VEX_LIG;
|
||||
}
|
||||
|
||||
multiclass sse2_fp_unop_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
||||
@ -3579,7 +3579,7 @@ multiclass sse2_fp_unop_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
||||
defm V#NAME#SD : avx_fp_unop_s<opc, "v"#OpcodeStr##sd, FR64, v2f64, f64,
|
||||
f64mem, sdmem, sse_load_f64,
|
||||
!cast<Intrinsic>("int_x86_sse2_"##OpcodeStr##_sd),
|
||||
OpNode, itins, HasAVX, "SD">, XD, VEX_4V, VEX_LIG;
|
||||
OpNode, itins, UseAVX, "SD">, XD, VEX_4V, VEX_LIG;
|
||||
}
|
||||
|
||||
// Square root.
|
||||
|
92
test/CodeGen/X86/avx512-scalar.ll
Normal file
92
test/CodeGen/X86/avx512-scalar.ll
Normal file
@ -0,0 +1,92 @@
|
||||
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl --show-mc-encoding | FileCheck %s --check-prefix AVX512
|
||||
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx --show-mc-encoding | FileCheck %s --check-prefix AVX
|
||||
|
||||
; AVX512-LABEL: @test_fdiv
|
||||
; AVX512: vdivss %xmm{{.*}} ## encoding: [0x62
|
||||
; AVX-LABEL: @test_fdiv
|
||||
; AVX: vdivss %xmm{{.*}} ## encoding: [0xc5
|
||||
|
||||
define float @test_fdiv(float %a, float %b) {
|
||||
%c = fdiv float %a, %b
|
||||
ret float %c
|
||||
}
|
||||
|
||||
; AVX512-LABEL: @test_fsub
|
||||
; AVX512: vsubss %xmm{{.*}} ## encoding: [0x62
|
||||
; AVX-LABEL: @test_fsub
|
||||
; AVX: vsubss %xmm{{.*}} ## encoding: [0xc5
|
||||
|
||||
define float @test_fsub(float %a, float %b) {
|
||||
%c = fsub float %a, %b
|
||||
ret float %c
|
||||
}
|
||||
|
||||
; AVX512-LABEL: @test_fadd
|
||||
; AVX512: vaddsd %xmm{{.*}} ## encoding: [0x62
|
||||
; AVX-LABEL: @test_fadd
|
||||
; AVX: vaddsd %xmm{{.*}} ## encoding: [0xc5
|
||||
|
||||
define double @test_fadd(double %a, double %b) {
|
||||
%c = fadd double %a, %b
|
||||
ret double %c
|
||||
}
|
||||
|
||||
declare float @llvm.trunc.f32(float %Val)
|
||||
declare double @llvm.trunc.f64(double %Val)
|
||||
declare float @llvm.rint.f32(float %Val)
|
||||
declare double @llvm.rint.f64(double %Val)
|
||||
declare double @llvm.sqrt.f64(double %Val)
|
||||
declare float @llvm.sqrt.f32(float %Val)
|
||||
|
||||
; AVX512-LABEL: @test_trunc
|
||||
; AVX512: vrndscaless
|
||||
; AVX-LABEL: @test_trunc
|
||||
; AVX: vroundss
|
||||
|
||||
define float @test_trunc(float %a) {
|
||||
%c = call float @llvm.trunc.f32(float %a)
|
||||
ret float %c
|
||||
}
|
||||
|
||||
; AVX512-LABEL: @test_sqrt
|
||||
; AVX512: vsqrtsd %xmm{{.*}} ## encoding: [0x62
|
||||
; AVX-LABEL: @test_sqrt
|
||||
; AVX: vsqrtsd %xmm{{.*}} ## encoding: [0xc5
|
||||
|
||||
define double @test_sqrt(double %a) {
|
||||
%c = call double @llvm.sqrt.f64(double %a)
|
||||
ret double %c
|
||||
}
|
||||
|
||||
; AVX512-LABEL: @test_rint
|
||||
; AVX512: vrndscaless
|
||||
; AVX-LABEL: @test_rint
|
||||
; AVX: vroundss
|
||||
|
||||
define float @test_rint(float %a) {
|
||||
%c = call float @llvm.rint.f32(float %a)
|
||||
ret float %c
|
||||
}
|
||||
|
||||
; AVX512-LABEL: @test_vmax
|
||||
; AVX512: vmaxss %xmm{{.*}} ## encoding: [0x62
|
||||
; AVX-LABEL: @test_vmax
|
||||
; AVX: vmaxss %xmm{{.*}} ## encoding: [0xc5
|
||||
|
||||
define float @test_vmax(float %i, float %j) {
|
||||
%cmp_res = fcmp ogt float %i, %j
|
||||
%max = select i1 %cmp_res, float %i, float %j
|
||||
ret float %max
|
||||
}
|
||||
|
||||
; AVX512-LABEL: @test_mov
|
||||
; AVX512: vcmpltss %xmm{{.*}} ## encoding: [0x62
|
||||
; AVX-LABEL: @test_mov
|
||||
; AVX: vcmpltss %xmm{{.*}} ## encoding: [0xc5
|
||||
|
||||
define float @test_mov(float %a, float %b, float %i, float %j) {
|
||||
%cmp_res = fcmp ogt float %i, %j
|
||||
%max = select i1 %cmp_res, float %b, float %a
|
||||
ret float %max
|
||||
}
|
||||
|
@ -215,11 +215,17 @@ static inline bool inheritsFrom(InstructionContext child,
|
||||
return inheritsFrom(child, IC_EVEX_W_K) ||
|
||||
inheritsFrom(child, IC_EVEX_L_W_K);
|
||||
case IC_EVEX_XS_K:
|
||||
case IC_EVEX_XS_K_B:
|
||||
case IC_EVEX_XS_KZ_B:
|
||||
return inheritsFrom(child, IC_EVEX_W_XS_K) ||
|
||||
inheritsFrom(child, IC_EVEX_L_W_XS_K);
|
||||
case IC_EVEX_XD_K:
|
||||
case IC_EVEX_XD_K_B:
|
||||
case IC_EVEX_XD_KZ_B:
|
||||
return inheritsFrom(child, IC_EVEX_W_XD_K) ||
|
||||
inheritsFrom(child, IC_EVEX_L_W_XD_K);
|
||||
case IC_EVEX_XS_B:
|
||||
case IC_EVEX_XD_B:
|
||||
case IC_EVEX_K_B:
|
||||
case IC_EVEX_KZ:
|
||||
return false;
|
||||
@ -253,6 +259,12 @@ static inline bool inheritsFrom(InstructionContext child,
|
||||
case IC_EVEX_W_KZ:
|
||||
case IC_EVEX_W_XS_KZ:
|
||||
case IC_EVEX_W_XD_KZ:
|
||||
case IC_EVEX_W_XS_B:
|
||||
case IC_EVEX_W_XD_B:
|
||||
case IC_EVEX_W_XS_K_B:
|
||||
case IC_EVEX_W_XD_K_B:
|
||||
case IC_EVEX_W_XS_KZ_B:
|
||||
case IC_EVEX_W_XD_KZ_B:
|
||||
case IC_EVEX_W_OPSIZE_KZ:
|
||||
case IC_EVEX_W_OPSIZE_KZ_B:
|
||||
return false;
|
||||
|
Loading…
x
Reference in New Issue
Block a user