Add constrained fptrunc and fpext intrinsics.

The new fptrunc and fpext intrinsics are constrained versions of the
regular fptrunc and fpext instructions.

Reviewed by:	Andrew Kaylor, Craig Topper, Cameron McInally, Conner Abbot
Approved by:	Craig Topper
Differential Revision: https://reviews.llvm.org/D55897

llvm-svn: 360581
This commit is contained in:
Kevin P. Neal 2019-05-13 13:23:30 +00:00
parent 4039373bd7
commit 2a670b2329
18 changed files with 668 additions and 32 deletions

View File

@ -14823,6 +14823,77 @@ The result produced is the product of the first two operands added to the third
operand computed with infinite precision, and then rounded to the target
precision.
'``llvm.experimental.constrained.fptrunc``' Intrinsic
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Syntax:
"""""""
::
declare <ty2>
@llvm.experimental.constrained.fptrunc(<type> <value>,
metadata <rounding mode>,
metadata <exception behavior>)
Overview:
"""""""""
The '``llvm.experimental.constrained.fptrunc``' intrinsic truncates ``value``
to type ``ty2``.
Arguments:
""""""""""
The first argument to the '``llvm.experimental.constrained.fptrunc``'
intrinsic must be :ref:`floating point <t_floating>` or :ref:`vector
<t_vector>` of floating point values. This argument must be larger in size
than the result.
The second and third arguments specify the rounding mode and exception
behavior as described above.
Semantics:
""""""""""
The result produced is a floating point value truncated to be smaller in size
than the operand.
'``llvm.experimental.constrained.fpext``' Intrinsic
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Syntax:
"""""""
::
declare <ty2>
@llvm.experimental.constrained.fpext(<type> <value>,
metadata <exception behavior>)
Overview:
"""""""""
The '``llvm.experimental.constrained.fpext``' intrinsic extends a
floating-point ``value`` to a larger floating-point value.
Arguments:
""""""""""
The first argument to the '``llvm.experimental.constrained.fpext``'
intrinsic must be :ref:`floating point <t_floating>` or :ref:`vector
<t_vector>` of floating point values. This argument must be smaller in size
than the result.
The second argument specifies the exception behavior as described above.
Semantics:
""""""""""
The result produced is a floating point value extended to be larger in size
than the operand. All restrictions that apply to the fpext instruction also
apply to this intrinsic.
Constrained libm-equivalent Intrinsics
--------------------------------------

View File

@ -297,6 +297,26 @@ namespace ISD {
STRICT_FRINT, STRICT_FNEARBYINT, STRICT_FMAXNUM, STRICT_FMINNUM,
STRICT_FCEIL, STRICT_FFLOOR, STRICT_FROUND, STRICT_FTRUNC,
/// X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating
/// point type down to the precision of the destination VT. TRUNC is a
/// flag, which is always an integer that is zero or one. If TRUNC is 0,
/// this is a normal rounding, if it is 1, this FP_ROUND is known to not
/// change the value of Y.
///
/// The TRUNC = 1 case is used in cases where we know that the value will
/// not be modified by the node, because Y is not using any of the extra
/// precision of source type. This allows certain transformations like
/// STRICT_FP_EXTEND(STRICT_FP_ROUND(X,1)) -> X which are not safe for
/// STRICT_FP_EXTEND(STRICT_FP_ROUND(X,0)) because the extra bits aren't
/// removed.
/// It is used to limit optimizations while the DAG is being optimized.
STRICT_FP_ROUND,
/// X = STRICT_FP_EXTEND(Y) - Extend a smaller FP type into a larger FP
/// type.
/// It is used to limit optimizations while the DAG is being optimized.
STRICT_FP_EXTEND,
/// FMA - Perform a * b + c with no intermediate rounding step.
FMA,

View File

@ -691,6 +691,8 @@ public:
case ISD::STRICT_FFLOOR:
case ISD::STRICT_FROUND:
case ISD::STRICT_FTRUNC:
case ISD::STRICT_FP_ROUND:
case ISD::STRICT_FP_EXTEND:
return true;
}
}

View File

@ -891,6 +891,8 @@ public:
case ISD::STRICT_FFLOOR: EqOpc = ISD::FFLOOR; break;
case ISD::STRICT_FROUND: EqOpc = ISD::FROUND; break;
case ISD::STRICT_FTRUNC: EqOpc = ISD::FTRUNC; break;
case ISD::STRICT_FP_ROUND: EqOpc = ISD::FP_ROUND; break;
case ISD::STRICT_FP_EXTEND: EqOpc = ISD::FP_EXTEND; break;
}
auto Action = getOperationAction(EqOpc, VT);

View File

@ -238,6 +238,8 @@ namespace llvm {
case Intrinsic::experimental_constrained_fdiv:
case Intrinsic::experimental_constrained_frem:
case Intrinsic::experimental_constrained_fma:
case Intrinsic::experimental_constrained_fptrunc:
case Intrinsic::experimental_constrained_fpext:
case Intrinsic::experimental_constrained_sqrt:
case Intrinsic::experimental_constrained_pow:
case Intrinsic::experimental_constrained_powi:

View File

@ -607,6 +607,15 @@ let IntrProperties = [IntrInaccessibleMemOnly] in {
llvm_metadata_ty,
llvm_metadata_ty ]>;
def int_experimental_constrained_fptrunc : Intrinsic<[ llvm_anyfloat_ty ],
[ llvm_anyfloat_ty,
llvm_metadata_ty,
llvm_metadata_ty ]>;
def int_experimental_constrained_fpext : Intrinsic<[ llvm_anyfloat_ty ],
[ llvm_anyfloat_ty,
llvm_metadata_ty ]>;
// These intrinsics are sensitive to the rounding mode so we need constrained
// versions of each of them. When strict rounding and exception control are
// not required the non-constrained versions of these intrinsics should be
@ -688,9 +697,7 @@ let IntrProperties = [IntrInaccessibleMemOnly] in {
llvm_metadata_ty,
llvm_metadata_ty ]>;
}
// FIXME: Add intrinsics for fcmp, fptrunc, fpext, fptoui and fptosi.
// FIXME: Add intrinsics for fabs and copysign?
// FIXME: Add intrinsics for fcmp, fptoui and fptosi.
//===------------------------- Expect Intrinsics --------------------------===//
//

View File

@ -154,6 +154,8 @@ private:
SDValue EmitStackConvert(SDValue SrcOp, EVT SlotVT, EVT DestVT,
const SDLoc &dl);
SDValue EmitStackConvert(SDValue SrcOp, EVT SlotVT, EVT DestVT,
const SDLoc &dl, SDValue ChainIn);
SDValue ExpandBUILD_VECTOR(SDNode *Node);
SDValue ExpandSCALAR_TO_VECTOR(SDNode *Node);
void ExpandDYNAMIC_STACKALLOC(SDNode *Node,
@ -1115,6 +1117,8 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
case ISD::STRICT_FFLOOR:
case ISD::STRICT_FROUND:
case ISD::STRICT_FTRUNC:
case ISD::STRICT_FP_ROUND:
case ISD::STRICT_FP_EXTEND:
// These pseudo-ops get legalized as if they were their non-strict
// equivalent. For instance, if ISD::FSQRT is legal then ISD::STRICT_FSQRT
// is also legal, but if ISD::FSQRT requires expansion then so does
@ -1741,6 +1745,12 @@ bool SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT, SDValue &LHS,
/// The resultant code need not be legal.
SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp, EVT SlotVT,
EVT DestVT, const SDLoc &dl) {
return EmitStackConvert(SrcOp, SlotVT, DestVT, dl, DAG.getEntryNode());
}
SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp, EVT SlotVT,
EVT DestVT, const SDLoc &dl,
SDValue Chain) {
// Create the stack frame object.
unsigned SrcAlign = DAG.getDataLayout().getPrefTypeAlignment(
SrcOp.getValueType().getTypeForEVT(*DAG.getContext()));
@ -1762,12 +1772,12 @@ SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp, EVT SlotVT,
SDValue Store;
if (SrcSize > SlotSize)
Store = DAG.getTruncStore(DAG.getEntryNode(), dl, SrcOp, FIPtr, PtrInfo,
Store = DAG.getTruncStore(Chain, dl, SrcOp, FIPtr, PtrInfo,
SlotVT, SrcAlign);
else {
assert(SrcSize == SlotSize && "Invalid store");
Store =
DAG.getStore(DAG.getEntryNode(), dl, SrcOp, FIPtr, PtrInfo, SrcAlign);
DAG.getStore(Chain, dl, SrcOp, FIPtr, PtrInfo, SrcAlign);
}
// Result is a load from the stack slot.
@ -2777,12 +2787,27 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
}
break;
}
case ISD::STRICT_FP_ROUND:
Tmp1 = EmitStackConvert(Node->getOperand(1),
Node->getValueType(0),
Node->getValueType(0), dl, Node->getOperand(0));
ReplaceNode(Node, Tmp1.getNode());
LLVM_DEBUG(dbgs() << "Successfully expanded STRICT_FP_ROUND node\n");
return true;
case ISD::FP_ROUND:
case ISD::BITCAST:
Tmp1 = EmitStackConvert(Node->getOperand(0), Node->getValueType(0),
Tmp1 = EmitStackConvert(Node->getOperand(0),
Node->getValueType(0),
Node->getValueType(0), dl);
Results.push_back(Tmp1);
break;
case ISD::STRICT_FP_EXTEND:
Tmp1 = EmitStackConvert(Node->getOperand(1),
Node->getOperand(1).getValueType(),
Node->getValueType(0), dl, Node->getOperand(0));
ReplaceNode(Node, Tmp1.getNode());
LLVM_DEBUG(dbgs() << "Successfully expanded STRICT_FP_EXTEND node\n");
return true;
case ISD::FP_EXTEND:
Tmp1 = EmitStackConvert(Node->getOperand(0),
Node->getOperand(0).getValueType(),

View File

@ -687,6 +687,7 @@ private:
SDValue ScalarizeVecRes_BUILD_VECTOR(SDNode *N);
SDValue ScalarizeVecRes_EXTRACT_SUBVECTOR(SDNode *N);
SDValue ScalarizeVecRes_FP_ROUND(SDNode *N);
SDValue ScalarizeVecRes_STRICT_FP_ROUND(SDNode *N);
SDValue ScalarizeVecRes_FPOWI(SDNode *N);
SDValue ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N);
SDValue ScalarizeVecRes_LOAD(LoadSDNode *N);
@ -710,6 +711,7 @@ private:
SDValue ScalarizeVecOp_VSETCC(SDNode *N);
SDValue ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo);
SDValue ScalarizeVecOp_FP_ROUND(SDNode *N, unsigned OpNo);
SDValue ScalarizeVecOp_STRICT_FP_ROUND(SDNode *N, unsigned OpNo);
SDValue ScalarizeVecOp_VECREDUCE(SDNode *N);
//===--------------------------------------------------------------------===//
@ -820,6 +822,7 @@ private:
SDValue WidenVecRes_StrictFP(SDNode *N);
SDValue WidenVecRes_OverflowOp(SDNode *N, unsigned ResNo);
SDValue WidenVecRes_Convert(SDNode *N);
SDValue WidenVecRes_Convert_StrictFP(SDNode *N);
SDValue WidenVecRes_FCOPYSIGN(SDNode *N);
SDValue WidenVecRes_POWI(SDNode *N);
SDValue WidenVecRes_Shift(SDNode *N);

View File

@ -331,6 +331,8 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
case ISD::STRICT_FFLOOR:
case ISD::STRICT_FROUND:
case ISD::STRICT_FTRUNC:
case ISD::STRICT_FP_ROUND:
case ISD::STRICT_FP_EXTEND:
// These pseudo-ops get legalized as if they were their non-strict
// equivalent. For instance, if ISD::FSQRT is legal then ISD::STRICT_FSQRT
// is also legal, but if ISD::FSQRT requires expansion then so does
@ -1301,7 +1303,7 @@ SDValue VectorLegalizer::ExpandStrictFPOp(SDValue Op) {
if (OperVT.isVector())
Oper = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
EltVT, Oper, Idx);
OperVT.getVectorElementType(), Oper, Idx);
Opers.push_back(Oper);
}

View File

@ -50,6 +50,7 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
case ISD::BITCAST: R = ScalarizeVecRes_BITCAST(N); break;
case ISD::BUILD_VECTOR: R = ScalarizeVecRes_BUILD_VECTOR(N); break;
case ISD::EXTRACT_SUBVECTOR: R = ScalarizeVecRes_EXTRACT_SUBVECTOR(N); break;
case ISD::STRICT_FP_ROUND: R = ScalarizeVecRes_STRICT_FP_ROUND(N); break;
case ISD::FP_ROUND: R = ScalarizeVecRes_FP_ROUND(N); break;
case ISD::FP_ROUND_INREG: R = ScalarizeVecRes_InregOp(N); break;
case ISD::FPOWI: R = ScalarizeVecRes_FPOWI(N); break;
@ -170,6 +171,7 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
case ISD::STRICT_FFLOOR:
case ISD::STRICT_FROUND:
case ISD::STRICT_FTRUNC:
case ISD::STRICT_FP_EXTEND:
R = ScalarizeVecRes_StrictFPOp(N);
break;
case ISD::UADDO:
@ -321,6 +323,18 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_FP_ROUND(SDNode *N) {
NewVT, Op, N->getOperand(1));
}
SDValue DAGTypeLegalizer::ScalarizeVecRes_STRICT_FP_ROUND(SDNode *N) {
EVT NewVT = N->getValueType(0).getVectorElementType();
SDValue Op = GetScalarizedVector(N->getOperand(1));
SDValue Res = DAG.getNode(ISD::STRICT_FP_ROUND, SDLoc(N),
{ NewVT, MVT::Other },
{ N->getOperand(0), Op, N->getOperand(2) });
// Legalize the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
return Res;
}
SDValue DAGTypeLegalizer::ScalarizeVecRes_FPOWI(SDNode *N) {
SDValue Op = GetScalarizedVector(N->getOperand(0));
return DAG.getNode(ISD::FPOWI, SDLoc(N),
@ -604,6 +618,9 @@ bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) {
case ISD::STORE:
Res = ScalarizeVecOp_STORE(cast<StoreSDNode>(N), OpNo);
break;
case ISD::STRICT_FP_ROUND:
Res = ScalarizeVecOp_STRICT_FP_ROUND(N, OpNo);
break;
case ISD::FP_ROUND:
Res = ScalarizeVecOp_FP_ROUND(N, OpNo);
break;
@ -752,6 +769,20 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_FP_ROUND(SDNode *N, unsigned OpNo) {
return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), N->getValueType(0), Res);
}
SDValue DAGTypeLegalizer::ScalarizeVecOp_STRICT_FP_ROUND(SDNode *N,
unsigned OpNo) {
assert(OpNo == 1 && "Wrong operand for scalarization!");
SDValue Elt = GetScalarizedVector(N->getOperand(1));
SDValue Res = DAG.getNode(ISD::STRICT_FP_ROUND, SDLoc(N),
{ N->getValueType(0).getVectorElementType(),
MVT::Other },
{ N->getOperand(0), Elt, N->getOperand(2) });
// Legalize the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), N->getValueType(0), Res);
}
SDValue DAGTypeLegalizer::ScalarizeVecOp_VECREDUCE(SDNode *N) {
SDValue Res = GetScalarizedVector(N->getOperand(0));
// Result type may be wider than element type.
@ -844,7 +875,9 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::FNEARBYINT:
case ISD::FNEG:
case ISD::FP_EXTEND:
case ISD::STRICT_FP_EXTEND:
case ISD::FP_ROUND:
case ISD::STRICT_FP_ROUND:
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT:
case ISD::FRINT:
@ -1615,15 +1648,34 @@ void DAGTypeLegalizer::SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo,
// If the input also splits, handle it directly for a compile time speedup.
// Otherwise split it by hand.
EVT InVT = N->getOperand(0).getValueType();
unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0;
EVT InVT = N->getOperand(OpNo).getValueType();
if (getTypeAction(InVT) == TargetLowering::TypeSplitVector)
GetSplitVector(N->getOperand(0), Lo, Hi);
GetSplitVector(N->getOperand(OpNo), Lo, Hi);
else
std::tie(Lo, Hi) = DAG.SplitVectorOperand(N, 0);
std::tie(Lo, Hi) = DAG.SplitVectorOperand(N, OpNo);
if (N->getOpcode() == ISD::FP_ROUND) {
Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo, N->getOperand(1));
Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi, N->getOperand(1));
} else if (N->getOpcode() == ISD::STRICT_FP_ROUND) {
Lo = DAG.getNode(N->getOpcode(), dl, { LoVT, MVT::Other },
{ N->getOperand(0), Lo, N->getOperand(2) });
Hi = DAG.getNode(N->getOpcode(), dl, { HiVT, MVT::Other },
{ N->getOperand(0), Hi, N->getOperand(2) });
SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
Lo.getValue(1), Hi.getValue(1));
ReplaceValueWith(SDValue(N, 1), NewChain);
} else if (N->isStrictFPOpcode()) {
Lo = DAG.getNode(N->getOpcode(), dl, { LoVT, MVT::Other },
{ N->getOperand(0), Lo });
Hi = DAG.getNode(N->getOpcode(), dl, { HiVT, MVT::Other },
{ N->getOperand(0), Hi });
// Legalize the chain result - switch anything that used the old chain to
// use the new one.
SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
Lo.getValue(1), Hi.getValue(1));
ReplaceValueWith(SDValue(N, 1), NewChain);
} else {
Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo);
Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi);
@ -1824,6 +1876,7 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
case ISD::TRUNCATE:
Res = SplitVecOp_TruncateHelper(N);
break;
case ISD::STRICT_FP_ROUND:
case ISD::FP_ROUND: Res = SplitVecOp_FP_ROUND(N); break;
case ISD::FCOPYSIGN: Res = SplitVecOp_FCOPYSIGN(N); break;
case ISD::STORE:
@ -1853,6 +1906,7 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
case ISD::CTTZ:
case ISD::CTLZ:
case ISD::CTPOP:
case ISD::STRICT_FP_EXTEND:
case ISD::FP_EXTEND:
case ISD::SIGN_EXTEND:
case ISD::ZERO_EXTEND:
@ -1894,6 +1948,10 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
if (Res.getNode() == N)
return true;
if (N->isStrictFPOpcode())
assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 2 &&
"Invalid operand expansion");
else
assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&
"Invalid operand expansion");
@ -1982,14 +2040,30 @@ SDValue DAGTypeLegalizer::SplitVecOp_UnaryOp(SDNode *N) {
EVT ResVT = N->getValueType(0);
SDValue Lo, Hi;
SDLoc dl(N);
GetSplitVector(N->getOperand(0), Lo, Hi);
GetSplitVector(N->getOperand(N->isStrictFPOpcode() ? 1 : 0), Lo, Hi);
EVT InVT = Lo.getValueType();
EVT OutVT = EVT::getVectorVT(*DAG.getContext(), ResVT.getVectorElementType(),
InVT.getVectorNumElements());
if (N->isStrictFPOpcode()) {
Lo = DAG.getNode(N->getOpcode(), dl, { OutVT, MVT::Other },
{ N->getOperand(0), Lo });
Hi = DAG.getNode(N->getOpcode(), dl, { OutVT, MVT::Other },
{ N->getOperand(0), Hi });
// Build a factor node to remember that this operation is independent
// of the other one.
SDValue Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
Hi.getValue(1));
// Legalize the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(N, 1), Ch);
} else {
Lo = DAG.getNode(N->getOpcode(), dl, OutVT, Lo);
Hi = DAG.getNode(N->getOpcode(), dl, OutVT, Hi);
}
return DAG.getNode(ISD::CONCAT_VECTORS, dl, ResVT, Lo, Hi);
}
@ -2461,14 +2535,26 @@ SDValue DAGTypeLegalizer::SplitVecOp_FP_ROUND(SDNode *N) {
EVT ResVT = N->getValueType(0);
SDValue Lo, Hi;
SDLoc DL(N);
GetSplitVector(N->getOperand(0), Lo, Hi);
GetSplitVector(N->getOperand(N->isStrictFPOpcode() ? 1 : 0), Lo, Hi);
EVT InVT = Lo.getValueType();
EVT OutVT = EVT::getVectorVT(*DAG.getContext(), ResVT.getVectorElementType(),
InVT.getVectorNumElements());
if (N->isStrictFPOpcode()) {
Lo = DAG.getNode(N->getOpcode(), DL, { OutVT, MVT::Other },
{ N->getOperand(0), Lo, N->getOperand(2) });
Hi = DAG.getNode(N->getOpcode(), DL, { OutVT, MVT::Other },
{ N->getOperand(0), Hi, N->getOperand(2) });
// Legalize the chain result - switch anything that used the old chain to
// use the new one.
SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
Lo.getValue(1), Hi.getValue(1));
ReplaceValueWith(SDValue(N, 1), NewChain);
} else {
Lo = DAG.getNode(ISD::FP_ROUND, DL, OutVT, Lo, N->getOperand(1));
Hi = DAG.getNode(ISD::FP_ROUND, DL, OutVT, Hi, N->getOperand(1));
}
return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResVT, Lo, Hi);
}
@ -2632,6 +2718,11 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
Res = WidenVecRes_Convert(N);
break;
case ISD::STRICT_FP_EXTEND:
case ISD::STRICT_FP_ROUND:
Res = WidenVecRes_Convert_StrictFP(N);
break;
case ISD::FABS:
case ISD::FCEIL:
case ISD::FCOS:
@ -3109,6 +3200,43 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
return DAG.getBuildVector(WidenVT, DL, Ops);
}
SDValue DAGTypeLegalizer::WidenVecRes_Convert_StrictFP(SDNode *N) {
SDValue InOp = N->getOperand(1);
SDLoc DL(N);
SmallVector<SDValue, 4> NewOps(N->op_begin(), N->op_end());
EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
unsigned WidenNumElts = WidenVT.getVectorNumElements();
SmallVector<EVT, 2> WidenVTs = { WidenVT, MVT::Other };
EVT InVT = InOp.getValueType();
EVT InEltVT = InVT.getVectorElementType();
unsigned Opcode = N->getOpcode();
// FIXME: Optimizations need to be implemented here.
// Otherwise unroll into some nasty scalar code and rebuild the vector.
EVT EltVT = WidenVT.getVectorElementType();
SmallVector<EVT, 2> EltVTs = { EltVT, MVT::Other };
SmallVector<SDValue, 16> Ops(WidenNumElts, DAG.getUNDEF(EltVT));
SmallVector<SDValue, 32> OpChains;
// Use the original element count so we don't do more scalar opts than
// necessary.
unsigned MinElts = N->getValueType(0).getVectorNumElements();
for (unsigned i=0; i < MinElts; ++i) {
NewOps[1] = DAG.getNode(
ISD::EXTRACT_VECTOR_ELT, DL, InEltVT, InOp,
DAG.getConstant(i, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
Ops[i] = DAG.getNode(Opcode, DL, EltVTs, NewOps);
OpChains.push_back(Ops[i].getValue(1));
}
SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OpChains);
ReplaceValueWith(SDValue(N, 1), NewChain);
return DAG.getBuildVector(WidenVT, DL, Ops);
}
SDValue DAGTypeLegalizer::WidenVecRes_EXTEND_VECTOR_INREG(SDNode *N) {
unsigned Opcode = N->getOpcode();
SDValue InOp = N->getOperand(0);
@ -3895,6 +4023,7 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) {
break;
case ISD::FP_EXTEND:
case ISD::STRICT_FP_EXTEND:
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT:
case ISD::SINT_TO_FP:
@ -3929,6 +4058,10 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) {
return true;
if (N->isStrictFPOpcode())
assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 2 &&
"Invalid operand expansion");
else
assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&
"Invalid operand expansion");
@ -4010,7 +4143,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) {
EVT EltVT = VT.getVectorElementType();
SDLoc dl(N);
unsigned NumElts = VT.getVectorNumElements();
SDValue InOp = N->getOperand(0);
SDValue InOp = N->getOperand(N->isStrictFPOpcode() ? 1 : 0);
assert(getTypeAction(InOp.getValueType()) ==
TargetLowering::TypeWidenVector &&
"Unexpected type action");
@ -4019,10 +4152,19 @@ SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) {
unsigned Opcode = N->getOpcode();
// See if a widened result type would be legal, if so widen the node.
// FIXME: This isn't safe for StrictFP. Other optimization here is needed.
EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT,
InVT.getVectorNumElements());
if (TLI.isTypeLegal(WideVT)) {
SDValue Res = DAG.getNode(Opcode, dl, WideVT, InOp);
if (TLI.isTypeLegal(WideVT) && !N->isStrictFPOpcode()) {
SDValue Res;
if (N->isStrictFPOpcode()) {
Res = DAG.getNode(Opcode, dl, { WideVT, MVT::Other },
{ N->getOperand(0), InOp });
// Legalize the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
} else
Res = DAG.getNode(Opcode, dl, WideVT, InOp);
return DAG.getNode(
ISD::EXTRACT_SUBVECTOR, dl, VT, Res,
DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
@ -4032,12 +4174,26 @@ SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) {
// Unroll the convert into some scalar code and create a nasty build vector.
SmallVector<SDValue, 16> Ops(NumElts);
for (unsigned i=0; i < NumElts; ++i)
if (N->isStrictFPOpcode()) {
SmallVector<SDValue, 4> NewOps(N->op_begin(), N->op_end());
SmallVector<SDValue, 32> OpChains;
for (unsigned i=0; i < NumElts; ++i) {
NewOps[1] = DAG.getNode(
ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp,
DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
Ops[i] = DAG.getNode(Opcode, dl, { EltVT, MVT::Other }, NewOps);
OpChains.push_back(Ops[i].getValue(1));
}
SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OpChains);
ReplaceValueWith(SDValue(N, 1), NewChain);
} else {
for (unsigned i = 0; i < NumElts; ++i)
Ops[i] = DAG.getNode(
Opcode, dl, EltVT,
DAG.getNode(
ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp,
DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))));
}
return DAG.getBuildVector(VT, dl, Ops);
}

View File

@ -7611,6 +7611,10 @@ SDNode* SelectionDAG::mutateStrictFPToFP(SDNode *Node) {
case ISD::STRICT_FFLOOR: NewOpc = ISD::FFLOOR; IsUnary = true; break;
case ISD::STRICT_FROUND: NewOpc = ISD::FROUND; IsUnary = true; break;
case ISD::STRICT_FTRUNC: NewOpc = ISD::FTRUNC; IsUnary = true; break;
// STRICT_FP_ROUND takes an extra argument describing whether or not
// the value will be changed by this node. See ISDOpcodes.h for details.
case ISD::STRICT_FP_ROUND: NewOpc = ISD::FP_ROUND; break;
case ISD::STRICT_FP_EXTEND: NewOpc = ISD::FP_EXTEND; IsUnary = true; break;
}
// We're taking this node out of the chain, so we need to re-link things.
@ -7618,8 +7622,19 @@ SDNode* SelectionDAG::mutateStrictFPToFP(SDNode *Node) {
SDValue OutputChain = SDValue(Node, 1);
ReplaceAllUsesOfValueWith(OutputChain, InputChain);
SDVTList VTs = getVTList(Node->getOperand(1).getValueType());
SDVTList VTs;
SDNode *Res = nullptr;
switch (OrigOpc) {
default:
VTs = getVTList(Node->getOperand(1).getValueType());
break;
case ISD::STRICT_FP_ROUND:
case ISD::STRICT_FP_EXTEND:
VTs = getVTList(Node->getValueType(0));
break;
}
if (IsUnary)
Res = MorphNodeTo(Node, NewOpc, VTs, { Node->getOperand(1) });
else if (IsTernary)

View File

@ -6078,6 +6078,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
case Intrinsic::experimental_constrained_fdiv:
case Intrinsic::experimental_constrained_frem:
case Intrinsic::experimental_constrained_fma:
case Intrinsic::experimental_constrained_fptrunc:
case Intrinsic::experimental_constrained_fpext:
case Intrinsic::experimental_constrained_sqrt:
case Intrinsic::experimental_constrained_pow:
case Intrinsic::experimental_constrained_powi:
@ -6834,6 +6836,12 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic(
case Intrinsic::experimental_constrained_fma:
Opcode = ISD::STRICT_FMA;
break;
case Intrinsic::experimental_constrained_fptrunc:
Opcode = ISD::STRICT_FP_ROUND;
break;
case Intrinsic::experimental_constrained_fpext:
Opcode = ISD::STRICT_FP_EXTEND;
break;
case Intrinsic::experimental_constrained_sqrt:
Opcode = ISD::STRICT_FSQRT;
break;
@ -6897,7 +6905,12 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic(
SDVTList VTs = DAG.getVTList(ValueVTs);
SDValue Result;
if (FPI.isUnaryOp())
if (Opcode == ISD::STRICT_FP_ROUND)
Result = DAG.getNode(Opcode, sdl, VTs,
{ Chain, getValue(FPI.getArgOperand(0)),
DAG.getTargetConstant(0, sdl,
TLI.getPointerTy(DAG.getDataLayout())) });
else if (FPI.isUnaryOp())
Result = DAG.getNode(Opcode, sdl, VTs,
{ Chain, getValue(FPI.getArgOperand(0)) });
else if (FPI.isTernaryOp())

View File

@ -313,9 +313,11 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::ZERO_EXTEND_VECTOR_INREG: return "zero_extend_vector_inreg";
case ISD::TRUNCATE: return "truncate";
case ISD::FP_ROUND: return "fp_round";
case ISD::STRICT_FP_ROUND: return "strict_fp_round";
case ISD::FLT_ROUNDS_: return "flt_rounds";
case ISD::FP_ROUND_INREG: return "fp_round_inreg";
case ISD::FP_EXTEND: return "fp_extend";
case ISD::STRICT_FP_EXTEND: return "strict_fp_extend";
case ISD::SINT_TO_FP: return "sint_to_fp";
case ISD::UINT_TO_FP: return "uint_to_fp";

View File

@ -142,6 +142,8 @@ bool ConstrainedFPIntrinsic::isUnaryOp() const {
switch (getIntrinsicID()) {
default:
return false;
case Intrinsic::experimental_constrained_fptrunc:
case Intrinsic::experimental_constrained_fpext:
case Intrinsic::experimental_constrained_sqrt:
case Intrinsic::experimental_constrained_sin:
case Intrinsic::experimental_constrained_cos:

View File

@ -4209,6 +4209,8 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
case Intrinsic::experimental_constrained_fdiv:
case Intrinsic::experimental_constrained_frem:
case Intrinsic::experimental_constrained_fma:
case Intrinsic::experimental_constrained_fptrunc:
case Intrinsic::experimental_constrained_fpext:
case Intrinsic::experimental_constrained_sqrt:
case Intrinsic::experimental_constrained_pow:
case Intrinsic::experimental_constrained_powi:
@ -4687,6 +4689,47 @@ void Verifier::visitConstrainedFPIntrinsic(ConstrainedFPIntrinsic &FPI) {
HasRoundingMD = true;
break;
case Intrinsic::experimental_constrained_fptrunc:
case Intrinsic::experimental_constrained_fpext: {
if (FPI.getIntrinsicID() == Intrinsic::experimental_constrained_fptrunc) {
Assert((NumOperands == 3),
"invalid arguments for constrained FP intrinsic", &FPI);
HasRoundingMD = true;
} else {
Assert((NumOperands == 2),
"invalid arguments for constrained FP intrinsic", &FPI);
}
HasExceptionMD = true;
Value *Operand = FPI.getArgOperand(0);
Type *OperandTy = Operand->getType();
Value *Result = &FPI;
Type *ResultTy = Result->getType();
Assert(OperandTy->isFPOrFPVectorTy(),
"Intrinsic first argument must be FP or FP vector", &FPI);
Assert(ResultTy->isFPOrFPVectorTy(),
"Intrinsic result must be FP or FP vector", &FPI);
Assert(OperandTy->isVectorTy() == ResultTy->isVectorTy(),
"Intrinsic first argument and result disagree on vector use", &FPI);
if (OperandTy->isVectorTy()) {
auto *OperandVecTy = cast<VectorType>(OperandTy);
auto *ResultVecTy = cast<VectorType>(ResultTy);
Assert(OperandVecTy->getNumElements() == ResultVecTy->getNumElements(),
"Intrinsic first argument and result vector lengths must be equal",
&FPI);
}
if (FPI.getIntrinsicID() == Intrinsic::experimental_constrained_fptrunc) {
Assert(OperandTy->getScalarSizeInBits() > ResultTy->getScalarSizeInBits(),
"Intrinsic first argument's type must be larger than result type",
&FPI);
} else {
Assert(OperandTy->getScalarSizeInBits() < ResultTy->getScalarSizeInBits(),
"Intrinsic first argument's type must be smaller than result type",
&FPI);
}
}
break;
default:
llvm_unreachable("Invalid constrained FP intrinsic!");
}

View File

@ -286,6 +286,29 @@ entry:
ret double %rem
}
; Verify that round(42.1) isn't simplified when the rounding mode is
; unknown.
; Verify that no gross errors happen.
; CHECK-LABEL: @f21
; COMMON: cvtsd2ss
define float @f21() {
entry:
%result = call float @llvm.experimental.constrained.fptrunc.f32.f64(
double 42.1,
metadata !"round.dynamic",
metadata !"fpexcept.strict")
ret float %result
}
; CHECK-LABEL: @f22
; COMMON: cvtss2sd
define double @f22(float %x) {
entry:
%result = call double @llvm.experimental.constrained.fpext.f64.f32(float %x,
metadata !"fpexcept.strict")
ret double %result
}
@llvm.fp.env = thread_local global i8 zeroinitializer, section "llvm.metadata"
declare double @llvm.experimental.constrained.fadd.f64(double, double, metadata, metadata)
declare double @llvm.experimental.constrained.fsub.f64(double, double, metadata, metadata)
@ -306,3 +329,6 @@ declare double @llvm.experimental.constrained.rint.f64(double, metadata, metadat
declare double @llvm.experimental.constrained.nearbyint.f64(double, metadata, metadata)
declare float @llvm.experimental.constrained.fma.f32(float, float, float, metadata, metadata)
declare double @llvm.experimental.constrained.fma.f64(double, double, double, metadata, metadata)
declare float @llvm.experimental.constrained.fptrunc.f32.f64(double, metadata, metadata)
declare double @llvm.experimental.constrained.fpext.f64.f32(float, metadata)

View File

@ -3831,6 +3831,217 @@ entry:
ret <4 x double> %min
}
define <1 x float> @constrained_vector_fptrunc_v1f64() {
; CHECK-LABEL: constrained_vector_fptrunc_v1f64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; CHECK-NEXT: cvtsd2ss %xmm0, %xmm0
; CHECK-NEXT: retq
;
; AVX-LABEL: constrained_vector_fptrunc_v1f64:
; AVX: # %bb.0: # %entry
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; AVX-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0
; AVX-NEXT: retq
entry:
%result = call <1 x float> @llvm.experimental.constrained.fptrunc.v1f32.v1f64(
<1 x double><double 42.1>,
metadata !"round.dynamic",
metadata !"fpexcept.strict")
ret <1 x float> %result
}
define <2 x float> @constrained_vector_fptrunc_v2f64() {
; CHECK-LABEL: constrained_vector_fptrunc_v2f64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; CHECK-NEXT: cvtsd2ss %xmm0, %xmm1
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; CHECK-NEXT: cvtsd2ss %xmm0, %xmm0
; CHECK-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; CHECK-NEXT: retq
;
; AVX-LABEL: constrained_vector_fptrunc_v2f64:
; AVX: # %bb.0: # %entry
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; AVX-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0
; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
; AVX-NEXT: vcvtsd2ss %xmm1, %xmm1, %xmm1
; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
; AVX-NEXT: retq
entry:
%result = call <2 x float> @llvm.experimental.constrained.fptrunc.v2f32.v2f64(
<2 x double><double 42.1, double 42.2>,
metadata !"round.dynamic",
metadata !"fpexcept.strict")
ret <2 x float> %result
}
define <3 x float> @constrained_vector_fptrunc_v3f64() {
; CHECK-LABEL: constrained_vector_fptrunc_v3f64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; CHECK-NEXT: cvtsd2ss %xmm0, %xmm1
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; CHECK-NEXT: cvtsd2ss %xmm0, %xmm0
; CHECK-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
; CHECK-NEXT: cvtsd2ss %xmm1, %xmm1
; CHECK-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; CHECK-NEXT: retq
;
; AVX-LABEL: constrained_vector_fptrunc_v3f64:
; AVX: # %bb.0: # %entry
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; AVX-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0
; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
; AVX-NEXT: vcvtsd2ss %xmm1, %xmm1, %xmm1
; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
; AVX-NEXT: vcvtsd2ss %xmm1, %xmm1, %xmm1
; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3]
; AVX-NEXT: retq
entry:
%result = call <3 x float> @llvm.experimental.constrained.fptrunc.v3f32.v3f64(
<3 x double><double 42.1, double 42.2,
double 42.3>,
metadata !"round.dynamic",
metadata !"fpexcept.strict")
ret <3 x float> %result
}
define <4 x float> @constrained_vector_fptrunc_v4f64() {
; CHECK-LABEL: constrained_vector_fptrunc_v4f64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; CHECK-NEXT: cvtsd2ss %xmm0, %xmm0
; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
; CHECK-NEXT: cvtsd2ss %xmm1, %xmm1
; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; CHECK-NEXT: cvtsd2ss %xmm0, %xmm2
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; CHECK-NEXT: cvtsd2ss %xmm0, %xmm0
; CHECK-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; CHECK-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; CHECK-NEXT: retq
;
; AVX-LABEL: constrained_vector_fptrunc_v4f64:
; AVX: # %bb.0: # %entry
; AVX-NEXT: vcvtpd2psy {{.*}}(%rip), %xmm0
; AVX-NEXT: retq
entry:
%result = call <4 x float> @llvm.experimental.constrained.fptrunc.v4f32.v4f64(
<4 x double><double 42.1, double 42.2,
double 42.3, double 42.4>,
metadata !"round.dynamic",
metadata !"fpexcept.strict")
ret <4 x float> %result
}
define <1 x double> @constrained_vector_fpext_v1f32() {
; CHECK-LABEL: constrained_vector_fpext_v1f32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT: cvtss2sd %xmm0, %xmm0
; CHECK-NEXT: retq
;
; AVX-LABEL: constrained_vector_fpext_v1f32:
; AVX: # %bb.0: # %entry
; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
; AVX-NEXT: retq
entry:
%result = call <1 x double> @llvm.experimental.constrained.fpext.v1f64.v1f32(
<1 x float><float 42.0>,
metadata !"fpexcept.strict")
ret <1 x double> %result
}
define <2 x double> @constrained_vector_fpext_v2f32() {
; CHECK-LABEL: constrained_vector_fpext_v2f32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT: cvtss2sd %xmm0, %xmm1
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT: cvtss2sd %xmm0, %xmm0
; CHECK-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; CHECK-NEXT: retq
;
; AVX-LABEL: constrained_vector_fpext_v2f32:
; AVX: # %bb.0: # %entry
; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; AVX-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; AVX-NEXT: retq
entry:
%result = call <2 x double> @llvm.experimental.constrained.fpext.v2f64.v2f32(
<2 x float><float 42.0, float 43.0>,
metadata !"fpexcept.strict")
ret <2 x double> %result
}
define <3 x double> @constrained_vector_fpext_v3f32() {
; CHECK-LABEL: constrained_vector_fpext_v3f32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT: cvtss2sd %xmm0, %xmm0
; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; CHECK-NEXT: cvtss2sd %xmm1, %xmm1
; CHECK-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; CHECK-NEXT: cvtss2sd %xmm2, %xmm2
; CHECK-NEXT: movsd %xmm2, -{{[0-9]+}}(%rsp)
; CHECK-NEXT: fldl -{{[0-9]+}}(%rsp)
; CHECK-NEXT: retq
;
; AVX-LABEL: constrained_vector_fpext_v3f32:
; AVX: # %bb.0: # %entry
; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; AVX-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; AVX-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX-NEXT: retq
entry:
%result = call <3 x double> @llvm.experimental.constrained.fpext.v3f64.v3f32(
<3 x float><float 42.0, float 43.0,
float 44.0>,
metadata !"fpexcept.strict")
ret <3 x double> %result
}
define <4 x double> @constrained_vector_fpext_v4f32() {
; CHECK-LABEL: constrained_vector_fpext_v4f32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT: cvtss2sd %xmm0, %xmm1
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT: cvtss2sd %xmm0, %xmm0
; CHECK-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; CHECK-NEXT: cvtss2sd %xmm1, %xmm2
; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; CHECK-NEXT: cvtss2sd %xmm1, %xmm1
; CHECK-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
; CHECK-NEXT: retq
;
; AVX-LABEL: constrained_vector_fpext_v4f32:
; AVX: # %bb.0: # %entry
; AVX-NEXT: vcvtps2pd {{.*}}(%rip), %ymm0
; AVX-NEXT: retq
entry:
%result = call <4 x double> @llvm.experimental.constrained.fpext.v4f64.v4f32(
<4 x float><float 42.0, float 43.0,
float 44.0, float 45.0>,
metadata !"fpexcept.strict")
ret <4 x double> %result
}
define <1 x float> @constrained_vector_ceil_v1f32() {
; CHECK-LABEL: constrained_vector_ceil_v1f32:
; CHECK: # %bb.0: # %entry
@ -4413,6 +4624,8 @@ declare <2 x double> @llvm.experimental.constrained.rint.v2f64(<2 x double>, met
declare <2 x double> @llvm.experimental.constrained.nearbyint.v2f64(<2 x double>, metadata, metadata)
declare <2 x double> @llvm.experimental.constrained.maxnum.v2f64(<2 x double>, <2 x double>, metadata, metadata)
declare <2 x double> @llvm.experimental.constrained.minnum.v2f64(<2 x double>, <2 x double>, metadata, metadata)
declare <2 x float> @llvm.experimental.constrained.fptrunc.v2f32.v2f64(<2 x double>, metadata, metadata)
declare <2 x double> @llvm.experimental.constrained.fpext.v2f64.v2f32(<2 x float>, metadata)
declare <2 x double> @llvm.experimental.constrained.ceil.v2f64(<2 x double>, metadata, metadata)
declare <2 x double> @llvm.experimental.constrained.floor.v2f64(<2 x double>, metadata, metadata)
declare <2 x double> @llvm.experimental.constrained.round.v2f64(<2 x double>, metadata, metadata)
@ -4438,6 +4651,8 @@ declare <1 x float> @llvm.experimental.constrained.rint.v1f32(<1 x float>, metad
declare <1 x float> @llvm.experimental.constrained.nearbyint.v1f32(<1 x float>, metadata, metadata)
declare <1 x float> @llvm.experimental.constrained.maxnum.v1f32(<1 x float>, <1 x float>, metadata, metadata)
declare <1 x float> @llvm.experimental.constrained.minnum.v1f32(<1 x float>, <1 x float>, metadata, metadata)
declare <1 x float> @llvm.experimental.constrained.fptrunc.v1f32.v1f64(<1 x double>, metadata, metadata)
declare <1 x double> @llvm.experimental.constrained.fpext.v1f64.v1f32(<1 x float>, metadata)
declare <1 x float> @llvm.experimental.constrained.ceil.v1f32(<1 x float>, metadata, metadata)
declare <1 x float> @llvm.experimental.constrained.floor.v1f32(<1 x float>, metadata, metadata)
declare <1 x float> @llvm.experimental.constrained.round.v1f32(<1 x float>, metadata, metadata)
@ -4482,6 +4697,8 @@ declare <3 x float> @llvm.experimental.constrained.maxnum.v3f32(<3 x float>, <3
declare <3 x double> @llvm.experimental.constrained.maxnum.v3f64(<3 x double>, <3 x double>, metadata, metadata)
declare <3 x float> @llvm.experimental.constrained.minnum.v3f32(<3 x float>, <3 x float>, metadata, metadata)
declare <3 x double> @llvm.experimental.constrained.minnum.v3f64(<3 x double>, <3 x double>, metadata, metadata)
declare <3 x float> @llvm.experimental.constrained.fptrunc.v3f32.v3f64(<3 x double>, metadata, metadata)
declare <3 x double> @llvm.experimental.constrained.fpext.v3f64.v3f32(<3 x float>, metadata)
declare <3 x float> @llvm.experimental.constrained.ceil.v3f32(<3 x float>, metadata, metadata)
declare <3 x double> @llvm.experimental.constrained.ceil.v3f64(<3 x double>, metadata, metadata)
declare <3 x float> @llvm.experimental.constrained.floor.v3f32(<3 x float>, metadata, metadata)
@ -4511,6 +4728,8 @@ declare <4 x double> @llvm.experimental.constrained.rint.v4f64(<4 x double>, met
declare <4 x double> @llvm.experimental.constrained.nearbyint.v4f64(<4 x double>, metadata, metadata)
declare <4 x double> @llvm.experimental.constrained.maxnum.v4f64(<4 x double>, <4 x double>, metadata, metadata)
declare <4 x double> @llvm.experimental.constrained.minnum.v4f64(<4 x double>, <4 x double>, metadata, metadata)
declare <4 x float> @llvm.experimental.constrained.fptrunc.v4f32.v4f64(<4 x double>, metadata, metadata)
declare <4 x double> @llvm.experimental.constrained.fpext.v4f64.v4f32(<4 x float>, metadata)
declare <4 x double> @llvm.experimental.constrained.ceil.v4f64(<4 x double>, metadata, metadata)
declare <4 x double> @llvm.experimental.constrained.floor.v4f64(<4 x double>, metadata, metadata)
declare <4 x double> @llvm.experimental.constrained.round.v4f64(<4 x double>, metadata, metadata)

View File

@ -242,6 +242,30 @@ entry:
ret double %result
}
; Verify that fptrunc(42.1) isn't simplified when the rounding mode is
; unknown.
; CHECK-LABEL: f20
; CHECK: call float @llvm.experimental.constrained.fptrunc
define float @f20() {
entry:
%result = call float @llvm.experimental.constrained.fptrunc.f32.f64(
double 42.1,
metadata !"round.dynamic",
metadata !"fpexcept.strict")
ret float %result
}
; Verify that fpext(42.1) isn't simplified when the rounding mode is
; unknown.
; CHECK-LABEL: f21
; CHECK: call double @llvm.experimental.constrained.fpext
define double @f21() {
entry:
%result = call double @llvm.experimental.constrained.fpext.f64.f32(float 42.0,
metadata !"fpexcept.strict")
ret double %result
}
@llvm.fp.env = thread_local global i8 zeroinitializer, section "llvm.metadata"
declare double @llvm.experimental.constrained.fdiv.f64(double, double, metadata, metadata)
declare double @llvm.experimental.constrained.fmul.f64(double, double, metadata, metadata)
@ -260,3 +284,5 @@ declare double @llvm.experimental.constrained.log2.f64(double, metadata, metadat
declare double @llvm.experimental.constrained.rint.f64(double, metadata, metadata)
declare double @llvm.experimental.constrained.nearbyint.f64(double, metadata, metadata)
declare double @llvm.experimental.constrained.fma.f64(double, double, double, metadata, metadata)
declare float @llvm.experimental.constrained.fptrunc.f32.f64(double, metadata, metadata)
declare double @llvm.experimental.constrained.fpext.f64.f32(float, metadata)