[FPEnv] Add constrained CEIL/FLOOR/ROUND/TRUNC intrinsics

Differential Revision: https://reviews.llvm.org/D53411

llvm-svn: 346141
This commit is contained in:
Cameron McInally 2018-11-05 15:59:49 +00:00
parent 243da175c1
commit 1bc93c50fd
15 changed files with 659 additions and 1 deletions

View File

@ -14580,6 +14580,151 @@ mode is determined by the runtime floating-point environment. The rounding
mode argument is only intended as information to the compiler.
'``llvm.experimental.constrained.ceil``' Intrinsic
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Syntax:
"""""""
::
declare <type>
@llvm.experimental.constrained.ceil(<type> <op1>,
metadata <rounding mode>,
metadata <exception behavior>)
Overview:
"""""""""
The '``llvm.experimental.constrained.ceil``' intrinsic returns the ceiling of the
first operand.
Arguments:
""""""""""
The first argument and the return value are floating-point numbers of the same
type.
The second and third arguments specify the rounding mode and exception
behavior as described above. The rounding mode is currently unused for this
intrinsic.
Semantics:
""""""""""
This function returns the same values as the libm ``ceil`` functions
would and handles error conditions in the same way.
'``llvm.experimental.constrained.floor``' Intrinsic
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Syntax:
"""""""
::
declare <type>
@llvm.experimental.constrained.floor(<type> <op1>,
metadata <rounding mode>,
metadata <exception behavior>)
Overview:
"""""""""
The '``llvm.experimental.constrained.floor``' intrinsic returns the floor of the
first operand.
Arguments:
""""""""""
The first argument and the return value are floating-point numbers of the same
type.
The second and third arguments specify the rounding mode and exception
behavior as described above. The rounding mode is currently unused for this
intrinsic.
Semantics:
""""""""""
This function returns the same values as the libm ``floor`` functions
would and handles error conditions in the same way.
'``llvm.experimental.constrained.round``' Intrinsic
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Syntax:
"""""""
::
declare <type>
@llvm.experimental.constrained.round(<type> <op1>,
metadata <rounding mode>,
metadata <exception behavior>)
Overview:
"""""""""
The '``llvm.experimental.constrained.round``' intrinsic returns the first
operand rounded to the nearest integer.
Arguments:
""""""""""
The first argument and the return value are floating-point numbers of the same
type.
The second and third arguments specify the rounding mode and exception
behavior as described above. The rounding mode is currently unused for this
intrinsic.
Semantics:
""""""""""
This function returns the same values as the libm ``round`` functions
would and handles error conditions in the same way.
'``llvm.experimental.constrained.trunc``' Intrinsic
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Syntax:
"""""""
::
declare <type>
@llvm.experimental.constrained.trunc(<type> <op1>,
metadata <truncing mode>,
metadata <exception behavior>)
Overview:
"""""""""
The '``llvm.experimental.constrained.trunc``' intrinsic returns the first
operand rounded to the nearest integer not larger in magnitude than the
operand.
Arguments:
""""""""""
The first argument and the return value are floating-point numbers of the same
type.
The second and third arguments specify the truncing mode and exception
behavior as described above. The truncing mode is currently unused for this
intrinsic.
Semantics:
""""""""""
This function returns the same values as the libm ``trunc`` functions
would and handles error conditions in the same way.
General Intrinsics
------------------

View File

@ -289,6 +289,7 @@ namespace ISD {
STRICT_FSQRT, STRICT_FPOW, STRICT_FPOWI, STRICT_FSIN, STRICT_FCOS,
STRICT_FEXP, STRICT_FEXP2, STRICT_FLOG, STRICT_FLOG10, STRICT_FLOG2,
STRICT_FRINT, STRICT_FNEARBYINT, STRICT_FMAXNUM, STRICT_FMINNUM,
STRICT_FCEIL, STRICT_FFLOOR, STRICT_FROUND, STRICT_FTRUNC,
/// FMA - Perform a * b + c with no intermediate rounding step.
FMA,

View File

@ -674,6 +674,10 @@ public:
case ISD::STRICT_FNEARBYINT:
case ISD::STRICT_FMAXNUM:
case ISD::STRICT_FMINNUM:
case ISD::STRICT_FCEIL:
case ISD::STRICT_FFLOOR:
case ISD::STRICT_FROUND:
case ISD::STRICT_FTRUNC:
return true;
}
}

View File

@ -821,6 +821,10 @@ public:
case ISD::STRICT_FNEARBYINT: EqOpc = ISD::FNEARBYINT; break;
case ISD::STRICT_FMAXNUM: EqOpc = ISD::FMAXNUM; break;
case ISD::STRICT_FMINNUM: EqOpc = ISD::FMINNUM; break;
case ISD::STRICT_FCEIL: EqOpc = ISD::FCEIL; break;
case ISD::STRICT_FFLOOR: EqOpc = ISD::FFLOOR; break;
case ISD::STRICT_FROUND: EqOpc = ISD::FROUND; break;
case ISD::STRICT_FTRUNC: EqOpc = ISD::FTRUNC; break;
}
auto Action = getOperationAction(EqOpc, VT);

View File

@ -253,6 +253,10 @@ namespace llvm {
case Intrinsic::experimental_constrained_nearbyint:
case Intrinsic::experimental_constrained_maxnum:
case Intrinsic::experimental_constrained_minnum:
case Intrinsic::experimental_constrained_ceil:
case Intrinsic::experimental_constrained_floor:
case Intrinsic::experimental_constrained_round:
case Intrinsic::experimental_constrained_trunc:
return true;
default: return false;
}

View File

@ -576,9 +576,25 @@ let IntrProperties = [IntrInaccessibleMemOnly] in {
LLVMMatchType<0>,
llvm_metadata_ty,
llvm_metadata_ty ]>;
def int_experimental_constrained_ceil : Intrinsic<[ llvm_anyfloat_ty ],
[ LLVMMatchType<0>,
llvm_metadata_ty,
llvm_metadata_ty ]>;
def int_experimental_constrained_floor : Intrinsic<[ llvm_anyfloat_ty ],
[ LLVMMatchType<0>,
llvm_metadata_ty,
llvm_metadata_ty ]>;
def int_experimental_constrained_round : Intrinsic<[ llvm_anyfloat_ty ],
[ LLVMMatchType<0>,
llvm_metadata_ty,
llvm_metadata_ty ]>;
def int_experimental_constrained_trunc : Intrinsic<[ llvm_anyfloat_ty ],
[ LLVMMatchType<0>,
llvm_metadata_ty,
llvm_metadata_ty ]>;
}
// FIXME: Add intrinsics for fcmp, fptrunc, fpext, fptoui and fptosi.
// FIXME: Add intrinsics for fabs, copysign, floor, ceil, trunc and round?
// FIXME: Add intrinsics for fabs and copysign?
//===------------------------- Expect Intrinsics --------------------------===//

View File

@ -1110,6 +1110,10 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
case ISD::STRICT_FNEARBYINT:
case ISD::STRICT_FMAXNUM:
case ISD::STRICT_FMINNUM:
case ISD::STRICT_FCEIL:
case ISD::STRICT_FFLOOR:
case ISD::STRICT_FROUND:
case ISD::STRICT_FTRUNC:
// These pseudo-ops get legalized as if they were their non-strict
// equivalent. For instance, if ISD::FSQRT is legal then ISD::STRICT_FSQRT
// is also legal, but if ISD::FSQRT requires expansion then so does
@ -3940,16 +3944,19 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
RTLIB::EXP2_PPCF128));
break;
case ISD::FTRUNC:
case ISD::STRICT_FTRUNC:
Results.push_back(ExpandFPLibCall(Node, RTLIB::TRUNC_F32, RTLIB::TRUNC_F64,
RTLIB::TRUNC_F80, RTLIB::TRUNC_F128,
RTLIB::TRUNC_PPCF128));
break;
case ISD::FFLOOR:
case ISD::STRICT_FFLOOR:
Results.push_back(ExpandFPLibCall(Node, RTLIB::FLOOR_F32, RTLIB::FLOOR_F64,
RTLIB::FLOOR_F80, RTLIB::FLOOR_F128,
RTLIB::FLOOR_PPCF128));
break;
case ISD::FCEIL:
case ISD::STRICT_FCEIL:
Results.push_back(ExpandFPLibCall(Node, RTLIB::CEIL_F32, RTLIB::CEIL_F64,
RTLIB::CEIL_F80, RTLIB::CEIL_F128,
RTLIB::CEIL_PPCF128));
@ -3969,6 +3976,7 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
RTLIB::NEARBYINT_PPCF128));
break;
case ISD::FROUND:
case ISD::STRICT_FROUND:
Results.push_back(ExpandFPLibCall(Node, RTLIB::ROUND_F32,
RTLIB::ROUND_F64,
RTLIB::ROUND_F80,

View File

@ -308,6 +308,10 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
case ISD::STRICT_FNEARBYINT:
case ISD::STRICT_FMAXNUM:
case ISD::STRICT_FMINNUM:
case ISD::STRICT_FCEIL:
case ISD::STRICT_FFLOOR:
case ISD::STRICT_FROUND:
case ISD::STRICT_FTRUNC:
// These pseudo-ops get legalized as if they were their non-strict
// equivalent. For instance, if ISD::FSQRT is legal then ISD::STRICT_FSQRT
// is also legal, but if ISD::FSQRT requires expansion then so does
@ -758,6 +762,10 @@ SDValue VectorLegalizer::Expand(SDValue Op) {
case ISD::STRICT_FNEARBYINT:
case ISD::STRICT_FMAXNUM:
case ISD::STRICT_FMINNUM:
case ISD::STRICT_FCEIL:
case ISD::STRICT_FFLOOR:
case ISD::STRICT_FROUND:
case ISD::STRICT_FTRUNC:
return ExpandStrictFPOp(Op);
default:
return DAG.UnrollVectorOp(Op.getNode());

View File

@ -166,6 +166,10 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
case ISD::STRICT_FNEARBYINT:
case ISD::STRICT_FMAXNUM:
case ISD::STRICT_FMINNUM:
case ISD::STRICT_FCEIL:
case ISD::STRICT_FFLOOR:
case ISD::STRICT_FROUND:
case ISD::STRICT_FTRUNC:
R = ScalarizeVecRes_StrictFPOp(N);
break;
}
@ -838,6 +842,10 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::STRICT_FNEARBYINT:
case ISD::STRICT_FMAXNUM:
case ISD::STRICT_FMINNUM:
case ISD::STRICT_FCEIL:
case ISD::STRICT_FFLOOR:
case ISD::STRICT_FROUND:
case ISD::STRICT_FTRUNC:
SplitVecRes_StrictFPOp(N, Lo, Hi);
break;
}
@ -2406,6 +2414,10 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
case ISD::STRICT_FNEARBYINT:
case ISD::STRICT_FMAXNUM:
case ISD::STRICT_FMINNUM:
case ISD::STRICT_FCEIL:
case ISD::STRICT_FFLOOR:
case ISD::STRICT_FROUND:
case ISD::STRICT_FTRUNC:
Res = WidenVecRes_StrictFP(N);
break;

View File

@ -7384,6 +7384,10 @@ SDNode* SelectionDAG::mutateStrictFPToFP(SDNode *Node) {
break;
case ISD::STRICT_FMAXNUM: NewOpc = ISD::FMAXNUM; break;
case ISD::STRICT_FMINNUM: NewOpc = ISD::FMINNUM; break;
case ISD::STRICT_FCEIL: NewOpc = ISD::FCEIL; IsUnary = true; break;
case ISD::STRICT_FFLOOR: NewOpc = ISD::FFLOOR; IsUnary = true; break;
case ISD::STRICT_FROUND: NewOpc = ISD::FROUND; IsUnary = true; break;
case ISD::STRICT_FTRUNC: NewOpc = ISD::FTRUNC; IsUnary = true; break;
}
// We're taking this node out of the chain, so we need to re-link things.

View File

@ -5633,6 +5633,10 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
case Intrinsic::experimental_constrained_nearbyint:
case Intrinsic::experimental_constrained_maxnum:
case Intrinsic::experimental_constrained_minnum:
case Intrinsic::experimental_constrained_ceil:
case Intrinsic::experimental_constrained_floor:
case Intrinsic::experimental_constrained_round:
case Intrinsic::experimental_constrained_trunc:
visitConstrainedFPIntrinsic(cast<ConstrainedFPIntrinsic>(I));
return nullptr;
case Intrinsic::fmuladd: {
@ -6386,6 +6390,18 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic(
case Intrinsic::experimental_constrained_minnum:
Opcode = ISD::STRICT_FMINNUM;
break;
case Intrinsic::experimental_constrained_ceil:
Opcode = ISD::STRICT_FCEIL;
break;
case Intrinsic::experimental_constrained_floor:
Opcode = ISD::STRICT_FFLOOR;
break;
case Intrinsic::experimental_constrained_round:
Opcode = ISD::STRICT_FROUND;
break;
case Intrinsic::experimental_constrained_trunc:
Opcode = ISD::STRICT_FTRUNC;
break;
}
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDValue Chain = getRoot();

View File

@ -193,13 +193,17 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::STRICT_FCOS: return "strict_fcos";
case ISD::FSINCOS: return "fsincos";
case ISD::FTRUNC: return "ftrunc";
case ISD::STRICT_FTRUNC: return "strict_ftrunc";
case ISD::FFLOOR: return "ffloor";
case ISD::STRICT_FFLOOR: return "strict_ffloor";
case ISD::FCEIL: return "fceil";
case ISD::STRICT_FCEIL: return "strict_fceil";
case ISD::FRINT: return "frint";
case ISD::STRICT_FRINT: return "strict_frint";
case ISD::FNEARBYINT: return "fnearbyint";
case ISD::STRICT_FNEARBYINT: return "strict_fnearbyint";
case ISD::FROUND: return "fround";
case ISD::STRICT_FROUND: return "strict_fround";
case ISD::FEXP: return "fexp";
case ISD::STRICT_FEXP: return "strict_fexp";
case ISD::FEXP2: return "fexp2";

View File

@ -152,6 +152,10 @@ bool ConstrainedFPIntrinsic::isUnaryOp() const {
case Intrinsic::experimental_constrained_log2:
case Intrinsic::experimental_constrained_rint:
case Intrinsic::experimental_constrained_nearbyint:
case Intrinsic::experimental_constrained_ceil:
case Intrinsic::experimental_constrained_floor:
case Intrinsic::experimental_constrained_round:
case Intrinsic::experimental_constrained_trunc:
return true;
}
}

View File

@ -4106,6 +4106,10 @@ void Verifier::visitIntrinsicCallSite(Intrinsic::ID ID, CallSite CS) {
case Intrinsic::experimental_constrained_nearbyint:
case Intrinsic::experimental_constrained_maxnum:
case Intrinsic::experimental_constrained_minnum:
case Intrinsic::experimental_constrained_ceil:
case Intrinsic::experimental_constrained_floor:
case Intrinsic::experimental_constrained_round:
case Intrinsic::experimental_constrained_trunc:
visitConstrainedFPIntrinsic(
cast<ConstrainedFPIntrinsic>(*CS.getInstruction()));
break;

View File

@ -2423,6 +2423,409 @@ entry:
ret <4 x double> %min
}
define <1 x float> @constrained_vector_ceil_v1f32() {
; CHECK-LABEL: constrained_vector_ceil_v1f32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT: callq ceilf
; CHECK-NEXT: popq %rax
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
entry:
%ceil = call <1 x float> @llvm.experimental.constrained.ceil.v1f32(
<1 x float> <float 1.5>,
metadata !"round.dynamic",
metadata !"fpexcept.strict")
ret <1 x float> %ceil
}
define <2 x double> @constrained_vector_ceil_v2f64() {
; CHECK-LABEL: constrained_vector_ceil_v2f64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: subq $24, %rsp
; CHECK-NEXT: .cfi_def_cfa_offset 32
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; CHECK-NEXT: callq ceil
; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; CHECK-NEXT: callq ceil
; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
; CHECK-NEXT: # xmm0 = xmm0[0],mem[0]
; CHECK-NEXT: addq $24, %rsp
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
entry:
%ceil = call <2 x double> @llvm.experimental.constrained.ceil.v2f64(
<2 x double> <double 1.1, double 1.9>,
metadata !"round.dynamic",
metadata !"fpexcept.strict")
ret <2 x double> %ceil
}
define <3 x float> @constrained_vector_ceil_v3f32() {
; CHECK-LABEL: constrained_vector_ceil_v3f32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: subq $40, %rsp
; CHECK-NEXT: .cfi_def_cfa_offset 48
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT: callq ceilf
; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT: callq ceilf
; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT: callq ceilf
; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload
; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
; CHECK-NEXT: # xmm1 = xmm1[0],mem[0]
; CHECK-NEXT: movaps %xmm1, %xmm0
; CHECK-NEXT: addq $40, %rsp
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
entry:
%ceil = call <3 x float> @llvm.experimental.constrained.ceil.v3f32(
<3 x float> <float 1.5, float 2.5, float 3.5>,
metadata !"round.dynamic",
metadata !"fpexcept.strict")
ret <3 x float> %ceil
}
define <3 x double> @constrained_vector_ceil_v3f64() {
; CHECK-LABEL: constrained_vector_ceil_v3f64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: subq $24, %rsp
; CHECK-NEXT: .cfi_def_cfa_offset 32
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; CHECK-NEXT: callq ceil
; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; CHECK-NEXT: callq ceil
; CHECK-NEXT: movsd %xmm0, (%rsp) # 8-byte Spill
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; CHECK-NEXT: callq ceil
; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp)
; CHECK-NEXT: fldl {{[0-9]+}}(%rsp)
; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload
; CHECK-NEXT: # xmm0 = mem[0],zero
; CHECK-NEXT: movsd (%rsp), %xmm1 # 8-byte Reload
; CHECK-NEXT: # xmm1 = mem[0],zero
; CHECK-NEXT: addq $24, %rsp
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
entry:
%ceil = call <3 x double> @llvm.experimental.constrained.ceil.v3f64(
<3 x double> <double 1.1, double 1.9, double 1.5>,
metadata !"round.dynamic",
metadata !"fpexcept.strict")
ret <3 x double> %ceil
}
define <1 x float> @constrained_vector_floor_v1f32() {
; CHECK-LABEL: constrained_vector_floor_v1f32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT: callq floorf
; CHECK-NEXT: popq %rax
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
entry:
%floor = call <1 x float> @llvm.experimental.constrained.floor.v1f32(
<1 x float> <float 1.5>,
metadata !"round.dynamic",
metadata !"fpexcept.strict")
ret <1 x float> %floor
}
define <2 x double> @constrained_vector_floor_v2f64() {
; CHECK-LABEL: constrained_vector_floor_v2f64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: subq $24, %rsp
; CHECK-NEXT: .cfi_def_cfa_offset 32
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; CHECK-NEXT: callq floor
; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; CHECK-NEXT: callq floor
; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
; CHECK-NEXT: # xmm0 = xmm0[0],mem[0]
; CHECK-NEXT: addq $24, %rsp
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
entry:
%floor = call <2 x double> @llvm.experimental.constrained.floor.v2f64(
<2 x double> <double 1.1, double 1.9>,
metadata !"round.dynamic",
metadata !"fpexcept.strict")
ret <2 x double> %floor
}
define <3 x float> @constrained_vector_floor_v3f32() {
; CHECK-LABEL: constrained_vector_floor_v3f32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: subq $40, %rsp
; CHECK-NEXT: .cfi_def_cfa_offset 48
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT: callq floorf
; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT: callq floorf
; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT: callq floorf
; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload
; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
; CHECK-NEXT: # xmm1 = xmm1[0],mem[0]
; CHECK-NEXT: movaps %xmm1, %xmm0
; CHECK-NEXT: addq $40, %rsp
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
entry:
%floor = call <3 x float> @llvm.experimental.constrained.floor.v3f32(
<3 x float> <float 1.5, float 2.5, float 3.5>,
metadata !"round.dynamic",
metadata !"fpexcept.strict")
ret <3 x float> %floor
}
define <3 x double> @constrained_vector_floor_v3f64() {
; CHECK-LABEL: constrained_vector_floor_v3f64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: subq $24, %rsp
; CHECK-NEXT: .cfi_def_cfa_offset 32
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; CHECK-NEXT: callq floor
; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; CHECK-NEXT: callq floor
; CHECK-NEXT: movsd %xmm0, (%rsp) # 8-byte Spill
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; CHECK-NEXT: callq floor
; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp)
; CHECK-NEXT: fldl {{[0-9]+}}(%rsp)
; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload
; CHECK-NEXT: # xmm0 = mem[0],zero
; CHECK-NEXT: movsd (%rsp), %xmm1 # 8-byte Reload
; CHECK-NEXT: # xmm1 = mem[0],zero
; CHECK-NEXT: addq $24, %rsp
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
entry:
%floor = call <3 x double> @llvm.experimental.constrained.floor.v3f64(
<3 x double> <double 1.1, double 1.9, double 1.5>,
metadata !"round.dynamic",
metadata !"fpexcept.strict")
ret <3 x double> %floor
}
define <1 x float> @constrained_vector_round_v1f32() {
; CHECK-LABEL: constrained_vector_round_v1f32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT: callq roundf
; CHECK-NEXT: popq %rax
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
entry:
%round = call <1 x float> @llvm.experimental.constrained.round.v1f32(
<1 x float> <float 1.5>,
metadata !"round.dynamic",
metadata !"fpexcept.strict")
ret <1 x float> %round
}
define <2 x double> @constrained_vector_round_v2f64() {
; CHECK-LABEL: constrained_vector_round_v2f64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: subq $24, %rsp
; CHECK-NEXT: .cfi_def_cfa_offset 32
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; CHECK-NEXT: callq round
; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; CHECK-NEXT: callq round
; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
; CHECK-NEXT: # xmm0 = xmm0[0],mem[0]
; CHECK-NEXT: addq $24, %rsp
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
entry:
%round = call <2 x double> @llvm.experimental.constrained.round.v2f64(
<2 x double> <double 1.1, double 1.9>,
metadata !"round.dynamic",
metadata !"fpexcept.strict")
ret <2 x double> %round
}
define <3 x float> @constrained_vector_round_v3f32() {
; CHECK-LABEL: constrained_vector_round_v3f32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: subq $40, %rsp
; CHECK-NEXT: .cfi_def_cfa_offset 48
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT: callq roundf
; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT: callq roundf
; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT: callq roundf
; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload
; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
; CHECK-NEXT: # xmm1 = xmm1[0],mem[0]
; CHECK-NEXT: movaps %xmm1, %xmm0
; CHECK-NEXT: addq $40, %rsp
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
entry:
%round = call <3 x float> @llvm.experimental.constrained.round.v3f32(
<3 x float> <float 1.5, float 2.5, float 3.5>,
metadata !"round.dynamic",
metadata !"fpexcept.strict")
ret <3 x float> %round
}
define <3 x double> @constrained_vector_round_v3f64() {
; CHECK-LABEL: constrained_vector_round_v3f64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: subq $24, %rsp
; CHECK-NEXT: .cfi_def_cfa_offset 32
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; CHECK-NEXT: callq round
; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; CHECK-NEXT: callq round
; CHECK-NEXT: movsd %xmm0, (%rsp) # 8-byte Spill
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; CHECK-NEXT: callq round
; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp)
; CHECK-NEXT: fldl {{[0-9]+}}(%rsp)
; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload
; CHECK-NEXT: # xmm0 = mem[0],zero
; CHECK-NEXT: movsd (%rsp), %xmm1 # 8-byte Reload
; CHECK-NEXT: # xmm1 = mem[0],zero
; CHECK-NEXT: addq $24, %rsp
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
entry:
%round = call <3 x double> @llvm.experimental.constrained.round.v3f64(
<3 x double> <double 1.1, double 1.9, double 1.5>,
metadata !"round.dynamic",
metadata !"fpexcept.strict")
ret <3 x double> %round
}
define <1 x float> @constrained_vector_trunc_v1f32() {
; CHECK-LABEL: constrained_vector_trunc_v1f32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT: callq truncf
; CHECK-NEXT: popq %rax
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
entry:
%trunc = call <1 x float> @llvm.experimental.constrained.trunc.v1f32(
<1 x float> <float 1.5>,
metadata !"round.dynamic",
metadata !"fpexcept.strict")
ret <1 x float> %trunc
}
define <2 x double> @constrained_vector_trunc_v2f64() {
; CHECK-LABEL: constrained_vector_trunc_v2f64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: subq $24, %rsp
; CHECK-NEXT: .cfi_def_cfa_offset 32
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; CHECK-NEXT: callq trunc
; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; CHECK-NEXT: callq trunc
; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
; CHECK-NEXT: # xmm0 = xmm0[0],mem[0]
; CHECK-NEXT: addq $24, %rsp
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
entry:
%trunc = call <2 x double> @llvm.experimental.constrained.trunc.v2f64(
<2 x double> <double 1.1, double 1.9>,
metadata !"round.dynamic",
metadata !"fpexcept.strict")
ret <2 x double> %trunc
}
define <3 x float> @constrained_vector_trunc_v3f32() {
; CHECK-LABEL: constrained_vector_trunc_v3f32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: subq $40, %rsp
; CHECK-NEXT: .cfi_def_cfa_offset 48
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT: callq truncf
; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT: callq truncf
; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT: callq truncf
; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload
; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
; CHECK-NEXT: # xmm1 = xmm1[0],mem[0]
; CHECK-NEXT: movaps %xmm1, %xmm0
; CHECK-NEXT: addq $40, %rsp
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
entry:
%trunc = call <3 x float> @llvm.experimental.constrained.trunc.v3f32(
<3 x float> <float 1.5, float 2.5, float 3.5>,
metadata !"round.dynamic",
metadata !"fpexcept.strict")
ret <3 x float> %trunc
}
define <3 x double> @constrained_vector_trunc_v3f64() {
; CHECK-LABEL: constrained_vector_trunc_v3f64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: subq $24, %rsp
; CHECK-NEXT: .cfi_def_cfa_offset 32
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; CHECK-NEXT: callq trunc
; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; CHECK-NEXT: callq trunc
; CHECK-NEXT: movsd %xmm0, (%rsp) # 8-byte Spill
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; CHECK-NEXT: callq trunc
; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp)
; CHECK-NEXT: fldl {{[0-9]+}}(%rsp)
; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload
; CHECK-NEXT: # xmm0 = mem[0],zero
; CHECK-NEXT: movsd (%rsp), %xmm1 # 8-byte Reload
; CHECK-NEXT: # xmm1 = mem[0],zero
; CHECK-NEXT: addq $24, %rsp
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
entry:
%trunc = call <3 x double> @llvm.experimental.constrained.trunc.v3f64(
<3 x double> <double 1.1, double 1.9, double 1.5>,
metadata !"round.dynamic",
metadata !"fpexcept.strict")
ret <3 x double> %trunc
}
; Single width declarations
declare <2 x double> @llvm.experimental.constrained.fadd.v2f64(<2 x double>, <2 x double>, metadata, metadata)
declare <2 x double> @llvm.experimental.constrained.fsub.v2f64(<2 x double>, <2 x double>, metadata, metadata)
@ -2443,6 +2846,10 @@ declare <2 x double> @llvm.experimental.constrained.rint.v2f64(<2 x double>, met
declare <2 x double> @llvm.experimental.constrained.nearbyint.v2f64(<2 x double>, metadata, metadata)
declare <2 x double> @llvm.experimental.constrained.maxnum.v2f64(<2 x double>, <2 x double>, metadata, metadata)
declare <2 x double> @llvm.experimental.constrained.minnum.v2f64(<2 x double>, <2 x double>, metadata, metadata)
declare <2 x double> @llvm.experimental.constrained.ceil.v2f64(<2 x double>, metadata, metadata)
declare <2 x double> @llvm.experimental.constrained.floor.v2f64(<2 x double>, metadata, metadata)
declare <2 x double> @llvm.experimental.constrained.round.v2f64(<2 x double>, metadata, metadata)
declare <2 x double> @llvm.experimental.constrained.trunc.v2f64(<2 x double>, metadata, metadata)
; Scalar width declarations
declare <1 x float> @llvm.experimental.constrained.fadd.v1f32(<1 x float>, <1 x float>, metadata, metadata)
@ -2464,6 +2871,10 @@ declare <1 x float> @llvm.experimental.constrained.rint.v1f32(<1 x float>, metad
declare <1 x float> @llvm.experimental.constrained.nearbyint.v1f32(<1 x float>, metadata, metadata)
declare <1 x float> @llvm.experimental.constrained.maxnum.v1f32(<1 x float>, <1 x float>, metadata, metadata)
declare <1 x float> @llvm.experimental.constrained.minnum.v1f32(<1 x float>, <1 x float>, metadata, metadata)
declare <1 x float> @llvm.experimental.constrained.ceil.v1f32(<1 x float>, metadata, metadata)
declare <1 x float> @llvm.experimental.constrained.floor.v1f32(<1 x float>, metadata, metadata)
declare <1 x float> @llvm.experimental.constrained.round.v1f32(<1 x float>, metadata, metadata)
declare <1 x float> @llvm.experimental.constrained.trunc.v1f32(<1 x float>, metadata, metadata)
; Illegal width declarations
declare <3 x float> @llvm.experimental.constrained.fadd.v3f32(<3 x float>, <3 x float>, metadata, metadata)
@ -2504,6 +2915,14 @@ declare <3 x float> @llvm.experimental.constrained.maxnum.v3f32(<3 x float>, <3
declare <3 x double> @llvm.experimental.constrained.maxnum.v3f64(<3 x double>, <3 x double>, metadata, metadata)
declare <3 x float> @llvm.experimental.constrained.minnum.v3f32(<3 x float>, <3 x float>, metadata, metadata)
declare <3 x double> @llvm.experimental.constrained.minnum.v3f64(<3 x double>, <3 x double>, metadata, metadata)
declare <3 x float> @llvm.experimental.constrained.ceil.v3f32(<3 x float>, metadata, metadata)
declare <3 x double> @llvm.experimental.constrained.ceil.v3f64(<3 x double>, metadata, metadata)
declare <3 x float> @llvm.experimental.constrained.floor.v3f32(<3 x float>, metadata, metadata)
declare <3 x double> @llvm.experimental.constrained.floor.v3f64(<3 x double>, metadata, metadata)
declare <3 x float> @llvm.experimental.constrained.round.v3f32(<3 x float>, metadata, metadata)
declare <3 x double> @llvm.experimental.constrained.round.v3f64(<3 x double>, metadata, metadata)
declare <3 x float> @llvm.experimental.constrained.trunc.v3f32(<3 x float>, metadata, metadata)
declare <3 x double> @llvm.experimental.constrained.trunc.v3f64(<3 x double>, metadata, metadata)
; Double width declarations
declare <4 x double> @llvm.experimental.constrained.fadd.v4f64(<4 x double>, <4 x double>, metadata, metadata)
@ -2525,3 +2944,8 @@ declare <4 x double> @llvm.experimental.constrained.rint.v4f64(<4 x double>, met
declare <4 x double> @llvm.experimental.constrained.nearbyint.v4f64(<4 x double>, metadata, metadata)
declare <4 x double> @llvm.experimental.constrained.maxnum.v4f64(<4 x double>, <4 x double>, metadata, metadata)
declare <4 x double> @llvm.experimental.constrained.minnum.v4f64(<4 x double>, <4 x double>, metadata, metadata)
declare <4 x double> @llvm.experimental.constrained.ceil.v4f64(<4 x double>, metadata, metadata)
declare <4 x double> @llvm.experimental.constrained.floor.v4f64(<4 x double>, metadata, metadata)
declare <4 x double> @llvm.experimental.constrained.round.v4f64(<4 x double>, metadata, metadata)
declare <4 x double> @llvm.experimental.constrained.trunc.v4f64(<4 x double>, metadata, metadata)