mirror of
https://github.com/capstone-engine/llvm-capstone.git
synced 2025-01-08 17:11:33 +00:00
[AVX-512] Allow legacy scalar min/max intrinsics to select EVEX instructions when available
This patch introduces new X86ISD::FMAXS and X86ISD::FMINS opcodes. The legacy intrinsics now lower to this node. As do the AVX-512 masked intrinsics when the rounding mode is CUR_DIRECTION. I've merged a copy of the tablegen multiclass avx512_fp_scalar into avx512_fp_scalar_sae. avx512_fp_scalar still needs to support CUR_DIRECTION appearing as a rounding mode for X86ISD::FADD_ROUND and others. Differential revision: https://reviews.llvm.org/D30186 llvm-svn: 295810
This commit is contained in:
parent
5cd6c5cacf
commit
56d4022997
@ -18991,6 +18991,14 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget &Subtarget
|
||||
SDValue Src2 = Op.getOperand(2);
|
||||
SDValue passThru = Op.getOperand(3);
|
||||
SDValue Mask = Op.getOperand(4);
|
||||
unsigned IntrWithRoundingModeOpcode = IntrData->Opc1;
|
||||
if (IntrWithRoundingModeOpcode != 0) {
|
||||
SDValue Rnd = Op.getOperand(5);
|
||||
if (!isRoundModeCurDirection(Rnd))
|
||||
return getScalarMaskingNode(DAG.getNode(IntrWithRoundingModeOpcode,
|
||||
dl, VT, Src1, Src2, Rnd),
|
||||
Mask, passThru, Subtarget, DAG);
|
||||
}
|
||||
return getScalarMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src1, Src2),
|
||||
Mask, passThru, Subtarget, DAG);
|
||||
}
|
||||
@ -23910,8 +23918,10 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
|
||||
case X86ISD::ABS: return "X86ISD::ABS";
|
||||
case X86ISD::CONFLICT: return "X86ISD::CONFLICT";
|
||||
case X86ISD::FMAX: return "X86ISD::FMAX";
|
||||
case X86ISD::FMAXS: return "X86ISD::FMAXS";
|
||||
case X86ISD::FMAX_RND: return "X86ISD::FMAX_RND";
|
||||
case X86ISD::FMIN: return "X86ISD::FMIN";
|
||||
case X86ISD::FMINS: return "X86ISD::FMINS";
|
||||
case X86ISD::FMIN_RND: return "X86ISD::FMIN_RND";
|
||||
case X86ISD::FMAXC: return "X86ISD::FMAXC";
|
||||
case X86ISD::FMINC: return "X86ISD::FMINC";
|
||||
|
@ -251,6 +251,9 @@ namespace llvm {
|
||||
/// Commutative FMIN and FMAX.
|
||||
FMAXC, FMINC,
|
||||
|
||||
/// Scalar intrinsic floating point max and min.
|
||||
FMAXS, FMINS,
|
||||
|
||||
/// Floating point reciprocal-sqrt and reciprocal approximation.
|
||||
/// Note that these typically require refinement
|
||||
/// in order to obtain suitable precision.
|
||||
|
@ -4195,13 +4195,43 @@ multiclass avx512_fp_scalar_round<bits<8> opc, string OpcodeStr,X86VectorVTInfo
|
||||
EVEX_B, EVEX_RC;
|
||||
}
|
||||
multiclass avx512_fp_scalar_sae<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
|
||||
SDNode VecNode, OpndItins itins, bit IsCommutable> {
|
||||
let ExeDomain = _.ExeDomain in
|
||||
SDNode OpNode, SDNode VecNode, SDNode SaeNode,
|
||||
OpndItins itins, bit IsCommutable> {
|
||||
let ExeDomain = _.ExeDomain in {
|
||||
defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
|
||||
(ins _.RC:$src1, _.RC:$src2), OpcodeStr,
|
||||
"$src2, $src1", "$src1, $src2",
|
||||
(_.VT (VecNode _.RC:$src1, _.RC:$src2)),
|
||||
itins.rr>;
|
||||
|
||||
defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
|
||||
(ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
|
||||
"$src2, $src1", "$src1, $src2",
|
||||
(_.VT (VecNode _.RC:$src1,
|
||||
_.ScalarIntMemCPat:$src2)),
|
||||
itins.rm>;
|
||||
|
||||
let isCodeGenOnly = 1, Predicates = [HasAVX512] in {
|
||||
def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
|
||||
(ins _.FRC:$src1, _.FRC:$src2),
|
||||
OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
|
||||
[(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))],
|
||||
itins.rr> {
|
||||
let isCommutable = IsCommutable;
|
||||
}
|
||||
def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
|
||||
(ins _.FRC:$src1, _.ScalarMemOp:$src2),
|
||||
OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
|
||||
[(set _.FRC:$dst, (OpNode _.FRC:$src1,
|
||||
(_.ScalarLdFrag addr:$src2)))], itins.rm>;
|
||||
}
|
||||
|
||||
defm rrb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
|
||||
(ins _.RC:$src1, _.RC:$src2), OpcodeStr,
|
||||
"{sae}, $src2, $src1", "$src1, $src2, {sae}",
|
||||
(VecNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
|
||||
(SaeNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
|
||||
(i32 FROUND_NO_EXC))>, EVEX_B;
|
||||
}
|
||||
}
|
||||
|
||||
multiclass avx512_binop_s_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
||||
@ -4220,25 +4250,23 @@ multiclass avx512_binop_s_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
||||
}
|
||||
|
||||
multiclass avx512_binop_s_sae<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
||||
SDNode VecNode,
|
||||
SDNode VecNode, SDNode SaeNode,
|
||||
SizeItins itins, bit IsCommutable> {
|
||||
defm SSZ : avx512_fp_scalar<opc, OpcodeStr#"ss", f32x_info, OpNode, VecNode,
|
||||
itins.s, IsCommutable>,
|
||||
avx512_fp_scalar_sae<opc, OpcodeStr#"ss", f32x_info, VecNode,
|
||||
itins.s, IsCommutable>,
|
||||
defm SSZ : avx512_fp_scalar_sae<opc, OpcodeStr#"ss", f32x_info, OpNode,
|
||||
VecNode, SaeNode, itins.s, IsCommutable>,
|
||||
XS, EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>;
|
||||
defm SDZ : avx512_fp_scalar<opc, OpcodeStr#"sd", f64x_info, OpNode, VecNode,
|
||||
itins.d, IsCommutable>,
|
||||
avx512_fp_scalar_sae<opc, OpcodeStr#"sd", f64x_info, VecNode,
|
||||
itins.d, IsCommutable>,
|
||||
defm SDZ : avx512_fp_scalar_sae<opc, OpcodeStr#"sd", f64x_info, OpNode,
|
||||
VecNode, SaeNode, itins.d, IsCommutable>,
|
||||
XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>;
|
||||
}
|
||||
defm VADD : avx512_binop_s_round<0x58, "vadd", fadd, X86faddRnd, SSE_ALU_ITINS_S, 1>;
|
||||
defm VMUL : avx512_binop_s_round<0x59, "vmul", fmul, X86fmulRnd, SSE_MUL_ITINS_S, 1>;
|
||||
defm VSUB : avx512_binop_s_round<0x5C, "vsub", fsub, X86fsubRnd, SSE_ALU_ITINS_S, 0>;
|
||||
defm VDIV : avx512_binop_s_round<0x5E, "vdiv", fdiv, X86fdivRnd, SSE_DIV_ITINS_S, 0>;
|
||||
defm VMIN : avx512_binop_s_sae <0x5D, "vmin", X86fmin, X86fminRnd, SSE_ALU_ITINS_S, 0>;
|
||||
defm VMAX : avx512_binop_s_sae <0x5F, "vmax", X86fmax, X86fmaxRnd, SSE_ALU_ITINS_S, 0>;
|
||||
defm VMIN : avx512_binop_s_sae <0x5D, "vmin", X86fmin, X86fmins, X86fminRnd,
|
||||
SSE_ALU_ITINS_S, 0>;
|
||||
defm VMAX : avx512_binop_s_sae <0x5F, "vmax", X86fmax, X86fmaxs, X86fmaxRnd,
|
||||
SSE_ALU_ITINS_S, 0>;
|
||||
|
||||
// MIN/MAX nodes are commutable under "unsafe-fp-math". In this case we use
|
||||
// X86fminc and X86fmaxc instead of X86fmin and X86fmax
|
||||
|
@ -455,7 +455,7 @@ class SI_Int<bits<8> o, Format F, dag outs, dag ins, string asm,
|
||||
Domain d = GenericDomain>
|
||||
: I<o, F, outs, ins, asm, pattern, itin, d> {
|
||||
let Predicates = !if(!eq(OpEnc.Value, EncEVEX.Value), [HasAVX512],
|
||||
!if(!eq(OpEnc.Value, EncVEX.Value), [HasAVX],
|
||||
!if(!eq(OpEnc.Value, EncVEX.Value), [UseAVX],
|
||||
!if(!eq(OpPrefix.Value, XS.Value), [UseSSE1],
|
||||
!if(!eq(OpPrefix.Value, XD.Value), [UseSSE2],
|
||||
!if(!eq(OpPrefix.Value, PD.Value), [UseSSE2],
|
||||
|
@ -42,6 +42,8 @@ def SDTX86CmpTestSae : SDTypeProfile<1, 3, [SDTCisVT<0, i32>,
|
||||
|
||||
def X86fmin : SDNode<"X86ISD::FMIN", SDTFPBinOp>;
|
||||
def X86fmax : SDNode<"X86ISD::FMAX", SDTFPBinOp>;
|
||||
def X86fmins : SDNode<"X86ISD::FMINS", SDTFPBinOp>;
|
||||
def X86fmaxs : SDNode<"X86ISD::FMAXS", SDTFPBinOp>;
|
||||
|
||||
// Commutative and Associative FMIN and FMAX.
|
||||
def X86fminc : SDNode<"X86ISD::FMINC", SDTFPBinOp,
|
||||
|
@ -259,8 +259,8 @@ multiclass sse12_fp_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
||||
|
||||
/// sse12_fp_scalar_int - SSE 1 & 2 scalar instructions intrinsics class
|
||||
multiclass sse12_fp_scalar_int<bits<8> opc, string OpcodeStr,
|
||||
SDPatternOperator Int, RegisterClass RC,
|
||||
string asm, Operand memopr,
|
||||
SDPatternOperator OpNode, RegisterClass RC,
|
||||
ValueType VT, string asm, Operand memopr,
|
||||
ComplexPattern mem_cpat, Domain d,
|
||||
OpndItins itins, bit Is2Addr = 1> {
|
||||
let isCodeGenOnly = 1, hasSideEffects = 0 in {
|
||||
@ -268,14 +268,14 @@ let isCodeGenOnly = 1, hasSideEffects = 0 in {
|
||||
!if(Is2Addr,
|
||||
!strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
|
||||
!strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
|
||||
[(set RC:$dst, (Int RC:$src1, RC:$src2))], itins.rr, d>,
|
||||
[(set RC:$dst, (VT (OpNode RC:$src1, RC:$src2)))], itins.rr, d>,
|
||||
Sched<[itins.Sched]>;
|
||||
let mayLoad = 1 in
|
||||
def rm_Int : SI_Int<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, memopr:$src2),
|
||||
!if(Is2Addr,
|
||||
!strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
|
||||
!strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
|
||||
[(set RC:$dst, (Int RC:$src1, mem_cpat:$src2))], itins.rm, d>,
|
||||
[(set RC:$dst, (VT (OpNode RC:$src1, mem_cpat:$src2)))], itins.rm, d>,
|
||||
Sched<[itins.Sched.Folded, ReadAfterLd]>;
|
||||
}
|
||||
}
|
||||
@ -3047,21 +3047,20 @@ multiclass basic_sse12_fp_binop_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
||||
}
|
||||
|
||||
multiclass basic_sse12_fp_binop_s_int<bits<8> opc, string OpcodeStr,
|
||||
SDPatternOperator IntSS,
|
||||
SDPatternOperator IntSD,
|
||||
SDPatternOperator OpNode,
|
||||
SizeItins itins> {
|
||||
defm V#NAME#SS : sse12_fp_scalar_int<opc, OpcodeStr, IntSS, VR128,
|
||||
defm V#NAME#SS : sse12_fp_scalar_int<opc, OpcodeStr, OpNode, VR128, v4f32,
|
||||
!strconcat(OpcodeStr, "ss"), ssmem, sse_load_f32,
|
||||
SSEPackedSingle, itins.s, 0>, XS, VEX_4V, VEX_LIG, VEX_WIG;
|
||||
defm V#NAME#SD : sse12_fp_scalar_int<opc, OpcodeStr, IntSD, VR128,
|
||||
defm V#NAME#SD : sse12_fp_scalar_int<opc, OpcodeStr, OpNode, VR128, v2f64,
|
||||
!strconcat(OpcodeStr, "sd"), sdmem, sse_load_f64,
|
||||
SSEPackedDouble, itins.d, 0>, XD, VEX_4V, VEX_LIG, VEX_WIG;
|
||||
|
||||
let Constraints = "$src1 = $dst" in {
|
||||
defm SS : sse12_fp_scalar_int<opc, OpcodeStr, IntSS, VR128,
|
||||
defm SS : sse12_fp_scalar_int<opc, OpcodeStr, OpNode, VR128, v4f32,
|
||||
!strconcat(OpcodeStr, "ss"), ssmem, sse_load_f32,
|
||||
SSEPackedSingle, itins.s>, XS;
|
||||
defm SD : sse12_fp_scalar_int<opc, OpcodeStr, IntSD, VR128,
|
||||
defm SD : sse12_fp_scalar_int<opc, OpcodeStr, OpNode, VR128, v2f64,
|
||||
!strconcat(OpcodeStr, "sd"), sdmem, sse_load_f64,
|
||||
SSEPackedDouble, itins.d>, XD;
|
||||
}
|
||||
@ -3070,29 +3069,23 @@ multiclass basic_sse12_fp_binop_s_int<bits<8> opc, string OpcodeStr,
|
||||
// Binary Arithmetic instructions
|
||||
defm ADD : basic_sse12_fp_binop_p<0x58, "add", fadd, SSE_ALU_ITINS_P>,
|
||||
basic_sse12_fp_binop_s<0x58, "add", fadd, SSE_ALU_ITINS_S>,
|
||||
basic_sse12_fp_binop_s_int<0x58, "add", null_frag, null_frag,
|
||||
SSE_ALU_ITINS_S>;
|
||||
basic_sse12_fp_binop_s_int<0x58, "add", null_frag, SSE_ALU_ITINS_S>;
|
||||
defm MUL : basic_sse12_fp_binop_p<0x59, "mul", fmul, SSE_MUL_ITINS_P>,
|
||||
basic_sse12_fp_binop_s<0x59, "mul", fmul, SSE_MUL_ITINS_S>,
|
||||
basic_sse12_fp_binop_s_int<0x59, "mul", null_frag, null_frag,
|
||||
SSE_MUL_ITINS_S>;
|
||||
basic_sse12_fp_binop_s_int<0x59, "mul", null_frag, SSE_MUL_ITINS_S>;
|
||||
let isCommutable = 0 in {
|
||||
defm SUB : basic_sse12_fp_binop_p<0x5C, "sub", fsub, SSE_ALU_ITINS_P>,
|
||||
basic_sse12_fp_binop_s<0x5C, "sub", fsub, SSE_ALU_ITINS_S>,
|
||||
basic_sse12_fp_binop_s_int<0x5C, "sub", null_frag, null_frag,
|
||||
SSE_ALU_ITINS_S>;
|
||||
basic_sse12_fp_binop_s_int<0x5C, "sub", null_frag,SSE_ALU_ITINS_S>;
|
||||
defm DIV : basic_sse12_fp_binop_p<0x5E, "div", fdiv, SSE_DIV_ITINS_P>,
|
||||
basic_sse12_fp_binop_s<0x5E, "div", fdiv, SSE_DIV_ITINS_S>,
|
||||
basic_sse12_fp_binop_s_int<0x5E, "div", null_frag, null_frag,
|
||||
SSE_DIV_ITINS_S>;
|
||||
basic_sse12_fp_binop_s_int<0x5E, "div", null_frag,SSE_DIV_ITINS_S>;
|
||||
defm MAX : basic_sse12_fp_binop_p<0x5F, "max", X86fmax, SSE_ALU_ITINS_P>,
|
||||
basic_sse12_fp_binop_s<0x5F, "max", X86fmax, SSE_ALU_ITINS_S>,
|
||||
basic_sse12_fp_binop_s_int<0x5F, "max", int_x86_sse_max_ss,
|
||||
int_x86_sse2_max_sd, SSE_ALU_ITINS_S>;
|
||||
basic_sse12_fp_binop_s_int<0x5F, "max", X86fmaxs, SSE_ALU_ITINS_S>;
|
||||
defm MIN : basic_sse12_fp_binop_p<0x5D, "min", X86fmin, SSE_ALU_ITINS_P>,
|
||||
basic_sse12_fp_binop_s<0x5D, "min", X86fmin, SSE_ALU_ITINS_S>,
|
||||
basic_sse12_fp_binop_s_int<0x5D, "min", int_x86_sse_min_ss,
|
||||
int_x86_sse2_min_sd, SSE_ALU_ITINS_S>;
|
||||
basic_sse12_fp_binop_s_int<0x5D, "min", X86fmins, SSE_ALU_ITINS_S>;
|
||||
}
|
||||
|
||||
let isCodeGenOnly = 1 in {
|
||||
|
@ -811,18 +811,18 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
|
||||
X86ISD::FMAX_RND),
|
||||
X86_INTRINSIC_DATA(avx512_mask_max_ps_512, INTR_TYPE_2OP_MASK, X86ISD::FMAX,
|
||||
X86ISD::FMAX_RND),
|
||||
X86_INTRINSIC_DATA(avx512_mask_max_sd_round, INTR_TYPE_SCALAR_MASK_RM,
|
||||
X86ISD::FMAX_RND, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_max_ss_round, INTR_TYPE_SCALAR_MASK_RM,
|
||||
X86ISD::FMAX_RND, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_max_sd_round, INTR_TYPE_SCALAR_MASK,
|
||||
X86ISD::FMAXS, X86ISD::FMAX_RND),
|
||||
X86_INTRINSIC_DATA(avx512_mask_max_ss_round, INTR_TYPE_SCALAR_MASK,
|
||||
X86ISD::FMAXS, X86ISD::FMAX_RND),
|
||||
X86_INTRINSIC_DATA(avx512_mask_min_pd_512, INTR_TYPE_2OP_MASK, X86ISD::FMIN,
|
||||
X86ISD::FMIN_RND),
|
||||
X86_INTRINSIC_DATA(avx512_mask_min_ps_512, INTR_TYPE_2OP_MASK, X86ISD::FMIN,
|
||||
X86ISD::FMIN_RND),
|
||||
X86_INTRINSIC_DATA(avx512_mask_min_sd_round, INTR_TYPE_SCALAR_MASK_RM,
|
||||
X86ISD::FMIN_RND, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_min_ss_round, INTR_TYPE_SCALAR_MASK_RM,
|
||||
X86ISD::FMIN_RND, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_min_sd_round, INTR_TYPE_SCALAR_MASK,
|
||||
X86ISD::FMINS, X86ISD::FMIN_RND),
|
||||
X86_INTRINSIC_DATA(avx512_mask_min_ss_round, INTR_TYPE_SCALAR_MASK,
|
||||
X86ISD::FMINS, X86ISD::FMIN_RND),
|
||||
X86_INTRINSIC_DATA(avx512_mask_mul_pd_512, INTR_TYPE_2OP_MASK, ISD::FMUL,
|
||||
X86ISD::FMUL_RND),
|
||||
X86_INTRINSIC_DATA(avx512_mask_mul_ps_512, INTR_TYPE_2OP_MASK, ISD::FMUL,
|
||||
@ -1604,7 +1604,9 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
|
||||
X86_INTRINSIC_DATA(sse_comilt_ss, COMI, X86ISD::COMI, ISD::SETLT),
|
||||
X86_INTRINSIC_DATA(sse_comineq_ss, COMI, X86ISD::COMI, ISD::SETNE),
|
||||
X86_INTRINSIC_DATA(sse_max_ps, INTR_TYPE_2OP, X86ISD::FMAX, 0),
|
||||
X86_INTRINSIC_DATA(sse_max_ss, INTR_TYPE_2OP, X86ISD::FMAXS, 0),
|
||||
X86_INTRINSIC_DATA(sse_min_ps, INTR_TYPE_2OP, X86ISD::FMIN, 0),
|
||||
X86_INTRINSIC_DATA(sse_min_ss, INTR_TYPE_2OP, X86ISD::FMINS, 0),
|
||||
X86_INTRINSIC_DATA(sse_movmsk_ps, INTR_TYPE_1OP, X86ISD::MOVMSK, 0),
|
||||
X86_INTRINSIC_DATA(sse_rcp_ps, INTR_TYPE_1OP, X86ISD::FRCP, 0),
|
||||
X86_INTRINSIC_DATA(sse_rsqrt_ps, INTR_TYPE_1OP, X86ISD::FRSQRT, 0),
|
||||
@ -1627,7 +1629,9 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
|
||||
X86_INTRINSIC_DATA(sse2_cvttpd2dq, INTR_TYPE_1OP, X86ISD::CVTTP2SI, 0),
|
||||
X86_INTRINSIC_DATA(sse2_cvttps2dq, INTR_TYPE_1OP, ISD::FP_TO_SINT, 0),
|
||||
X86_INTRINSIC_DATA(sse2_max_pd, INTR_TYPE_2OP, X86ISD::FMAX, 0),
|
||||
X86_INTRINSIC_DATA(sse2_max_sd, INTR_TYPE_2OP, X86ISD::FMAXS, 0),
|
||||
X86_INTRINSIC_DATA(sse2_min_pd, INTR_TYPE_2OP, X86ISD::FMIN, 0),
|
||||
X86_INTRINSIC_DATA(sse2_min_sd, INTR_TYPE_2OP, X86ISD::FMINS, 0),
|
||||
X86_INTRINSIC_DATA(sse2_movmsk_pd, INTR_TYPE_1OP, X86ISD::MOVMSK, 0),
|
||||
X86_INTRINSIC_DATA(sse2_packssdw_128, INTR_TYPE_2OP, X86ISD::PACKSS, 0),
|
||||
X86_INTRINSIC_DATA(sse2_packsswb_128, INTR_TYPE_2OP, X86ISD::PACKSS, 0),
|
||||
|
@ -322,10 +322,15 @@ define <4 x float> @test_x86_sse_max_ss(<4 x float> %a0, <4 x float> %a1) {
|
||||
; SSE-NEXT: maxss %xmm1, %xmm0 ## encoding: [0xf3,0x0f,0x5f,0xc1]
|
||||
; SSE-NEXT: retl ## encoding: [0xc3]
|
||||
;
|
||||
; VCHECK-LABEL: test_x86_sse_max_ss:
|
||||
; VCHECK: ## BB#0:
|
||||
; VCHECK-NEXT: vmaxss %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0x5f,0xc1]
|
||||
; VCHECK-NEXT: retl ## encoding: [0xc3]
|
||||
; AVX2-LABEL: test_x86_sse_max_ss:
|
||||
; AVX2: ## BB#0:
|
||||
; AVX2-NEXT: vmaxss %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0x5f,0xc1]
|
||||
; AVX2-NEXT: retl ## encoding: [0xc3]
|
||||
;
|
||||
; SKX-LABEL: test_x86_sse_max_ss:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vmaxss %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x5f,0xc1]
|
||||
; SKX-NEXT: retl ## encoding: [0xc3]
|
||||
%res = call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
|
||||
ret <4 x float> %res
|
||||
}
|
||||
@ -359,10 +364,15 @@ define <4 x float> @test_x86_sse_min_ss(<4 x float> %a0, <4 x float> %a1) {
|
||||
; SSE-NEXT: minss %xmm1, %xmm0 ## encoding: [0xf3,0x0f,0x5d,0xc1]
|
||||
; SSE-NEXT: retl ## encoding: [0xc3]
|
||||
;
|
||||
; VCHECK-LABEL: test_x86_sse_min_ss:
|
||||
; VCHECK: ## BB#0:
|
||||
; VCHECK-NEXT: vminss %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0x5d,0xc1]
|
||||
; VCHECK-NEXT: retl ## encoding: [0xc3]
|
||||
; AVX2-LABEL: test_x86_sse_min_ss:
|
||||
; AVX2: ## BB#0:
|
||||
; AVX2-NEXT: vminss %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0x5d,0xc1]
|
||||
; AVX2-NEXT: retl ## encoding: [0xc3]
|
||||
;
|
||||
; SKX-LABEL: test_x86_sse_min_ss:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vminss %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x5d,0xc1]
|
||||
; SKX-NEXT: retl ## encoding: [0xc3]
|
||||
%res = call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
|
||||
ret <4 x float> %res
|
||||
}
|
||||
|
@ -587,10 +587,15 @@ define <2 x double> @test_x86_sse2_max_sd(<2 x double> %a0, <2 x double> %a1) {
|
||||
; SSE-NEXT: maxsd %xmm1, %xmm0 ## encoding: [0xf2,0x0f,0x5f,0xc1]
|
||||
; SSE-NEXT: retl ## encoding: [0xc3]
|
||||
;
|
||||
; VCHECK-LABEL: test_x86_sse2_max_sd:
|
||||
; VCHECK: ## BB#0:
|
||||
; VCHECK-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x5f,0xc1]
|
||||
; VCHECK-NEXT: retl ## encoding: [0xc3]
|
||||
; AVX2-LABEL: test_x86_sse2_max_sd:
|
||||
; AVX2: ## BB#0:
|
||||
; AVX2-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x5f,0xc1]
|
||||
; AVX2-NEXT: retl ## encoding: [0xc3]
|
||||
;
|
||||
; SKX-LABEL: test_x86_sse2_max_sd:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x5f,0xc1]
|
||||
; SKX-NEXT: retl ## encoding: [0xc3]
|
||||
%res = call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
|
||||
ret <2 x double> %res
|
||||
}
|
||||
@ -624,10 +629,15 @@ define <2 x double> @test_x86_sse2_min_sd(<2 x double> %a0, <2 x double> %a1) {
|
||||
; SSE-NEXT: minsd %xmm1, %xmm0 ## encoding: [0xf2,0x0f,0x5d,0xc1]
|
||||
; SSE-NEXT: retl ## encoding: [0xc3]
|
||||
;
|
||||
; VCHECK-LABEL: test_x86_sse2_min_sd:
|
||||
; VCHECK: ## BB#0:
|
||||
; VCHECK-NEXT: vminsd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x5d,0xc1]
|
||||
; VCHECK-NEXT: retl ## encoding: [0xc3]
|
||||
; AVX2-LABEL: test_x86_sse2_min_sd:
|
||||
; AVX2: ## BB#0:
|
||||
; AVX2-NEXT: vminsd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x5d,0xc1]
|
||||
; AVX2-NEXT: retl ## encoding: [0xc3]
|
||||
;
|
||||
; SKX-LABEL: test_x86_sse2_min_sd:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vminsd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x5d,0xc1]
|
||||
; SKX-NEXT: retl ## encoding: [0xc3]
|
||||
%res = call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
|
||||
ret <2 x double> %res
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user