mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-02-27 07:45:35 +00:00
Add constrained fptrunc and fpext intrinsics.
The new fptrunc and fpext intrinsics are constrained versions of the regular fptrunc and fpext instructions. Reviewed by: Andrew Kaylor, Craig Topper, Cameron McInally, Conner Abbot Approved by: Craig Topper Differential Revision: https://reviews.llvm.org/D55897 llvm-svn: 360581
This commit is contained in:
parent
4039373bd7
commit
2a670b2329
@ -14823,6 +14823,77 @@ The result produced is the product of the first two operands added to the third
|
||||
operand computed with infinite precision, and then rounded to the target
|
||||
precision.
|
||||
|
||||
'``llvm.experimental.constrained.fptrunc``' Intrinsic
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
Syntax:
|
||||
"""""""
|
||||
|
||||
::
|
||||
|
||||
declare <ty2>
|
||||
@llvm.experimental.constrained.fptrunc(<type> <value>,
|
||||
metadata <rounding mode>,
|
||||
metadata <exception behavior>)
|
||||
|
||||
Overview:
|
||||
"""""""""
|
||||
|
||||
The '``llvm.experimental.constrained.fptrunc``' intrinsic truncates ``value``
|
||||
to type ``ty2``.
|
||||
|
||||
Arguments:
|
||||
""""""""""
|
||||
|
||||
The first argument to the '``llvm.experimental.constrained.fptrunc``'
|
||||
intrinsic must be :ref:`floating point <t_floating>` or :ref:`vector
|
||||
<t_vector>` of floating point values. This argument must be larger in size
|
||||
than the result.
|
||||
|
||||
The second and third arguments specify the rounding mode and exception
|
||||
behavior as described above.
|
||||
|
||||
Semantics:
|
||||
""""""""""
|
||||
|
||||
The result produced is a floating point value truncated to be smaller in size
|
||||
than the operand.
|
||||
|
||||
'``llvm.experimental.constrained.fpext``' Intrinsic
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
Syntax:
|
||||
"""""""
|
||||
|
||||
::
|
||||
|
||||
declare <ty2>
|
||||
@llvm.experimental.constrained.fpext(<type> <value>,
|
||||
metadata <exception behavior>)
|
||||
|
||||
Overview:
|
||||
"""""""""
|
||||
|
||||
The '``llvm.experimental.constrained.fpext``' intrinsic extends a
|
||||
floating-point ``value`` to a larger floating-point value.
|
||||
|
||||
Arguments:
|
||||
""""""""""
|
||||
|
||||
The first argument to the '``llvm.experimental.constrained.fpext``'
|
||||
intrinsic must be :ref:`floating point <t_floating>` or :ref:`vector
|
||||
<t_vector>` of floating point values. This argument must be smaller in size
|
||||
than the result.
|
||||
|
||||
The second argument specifies the exception behavior as described above.
|
||||
|
||||
Semantics:
|
||||
""""""""""
|
||||
|
||||
The result produced is a floating point value extended to be larger in size
|
||||
than the operand. All restrictions that apply to the fpext instruction also
|
||||
apply to this intrinsic.
|
||||
|
||||
Constrained libm-equivalent Intrinsics
|
||||
--------------------------------------
|
||||
|
||||
|
@ -297,6 +297,26 @@ namespace ISD {
|
||||
STRICT_FRINT, STRICT_FNEARBYINT, STRICT_FMAXNUM, STRICT_FMINNUM,
|
||||
STRICT_FCEIL, STRICT_FFLOOR, STRICT_FROUND, STRICT_FTRUNC,
|
||||
|
||||
/// X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating
|
||||
/// point type down to the precision of the destination VT. TRUNC is a
|
||||
/// flag, which is always an integer that is zero or one. If TRUNC is 0,
|
||||
/// this is a normal rounding, if it is 1, this FP_ROUND is known to not
|
||||
/// change the value of Y.
|
||||
///
|
||||
/// The TRUNC = 1 case is used in cases where we know that the value will
|
||||
/// not be modified by the node, because Y is not using any of the extra
|
||||
/// precision of source type. This allows certain transformations like
|
||||
/// STRICT_FP_EXTEND(STRICT_FP_ROUND(X,1)) -> X which are not safe for
|
||||
/// STRICT_FP_EXTEND(STRICT_FP_ROUND(X,0)) because the extra bits aren't
|
||||
/// removed.
|
||||
/// It is used to limit optimizations while the DAG is being optimized.
|
||||
STRICT_FP_ROUND,
|
||||
|
||||
/// X = STRICT_FP_EXTEND(Y) - Extend a smaller FP type into a larger FP
|
||||
/// type.
|
||||
/// It is used to limit optimizations while the DAG is being optimized.
|
||||
STRICT_FP_EXTEND,
|
||||
|
||||
/// FMA - Perform a * b + c with no intermediate rounding step.
|
||||
FMA,
|
||||
|
||||
|
@ -691,6 +691,8 @@ public:
|
||||
case ISD::STRICT_FFLOOR:
|
||||
case ISD::STRICT_FROUND:
|
||||
case ISD::STRICT_FTRUNC:
|
||||
case ISD::STRICT_FP_ROUND:
|
||||
case ISD::STRICT_FP_EXTEND:
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
@ -891,6 +891,8 @@ public:
|
||||
case ISD::STRICT_FFLOOR: EqOpc = ISD::FFLOOR; break;
|
||||
case ISD::STRICT_FROUND: EqOpc = ISD::FROUND; break;
|
||||
case ISD::STRICT_FTRUNC: EqOpc = ISD::FTRUNC; break;
|
||||
case ISD::STRICT_FP_ROUND: EqOpc = ISD::FP_ROUND; break;
|
||||
case ISD::STRICT_FP_EXTEND: EqOpc = ISD::FP_EXTEND; break;
|
||||
}
|
||||
|
||||
auto Action = getOperationAction(EqOpc, VT);
|
||||
|
@ -238,6 +238,8 @@ namespace llvm {
|
||||
case Intrinsic::experimental_constrained_fdiv:
|
||||
case Intrinsic::experimental_constrained_frem:
|
||||
case Intrinsic::experimental_constrained_fma:
|
||||
case Intrinsic::experimental_constrained_fptrunc:
|
||||
case Intrinsic::experimental_constrained_fpext:
|
||||
case Intrinsic::experimental_constrained_sqrt:
|
||||
case Intrinsic::experimental_constrained_pow:
|
||||
case Intrinsic::experimental_constrained_powi:
|
||||
|
@ -607,6 +607,15 @@ let IntrProperties = [IntrInaccessibleMemOnly] in {
|
||||
llvm_metadata_ty,
|
||||
llvm_metadata_ty ]>;
|
||||
|
||||
def int_experimental_constrained_fptrunc : Intrinsic<[ llvm_anyfloat_ty ],
|
||||
[ llvm_anyfloat_ty,
|
||||
llvm_metadata_ty,
|
||||
llvm_metadata_ty ]>;
|
||||
|
||||
def int_experimental_constrained_fpext : Intrinsic<[ llvm_anyfloat_ty ],
|
||||
[ llvm_anyfloat_ty,
|
||||
llvm_metadata_ty ]>;
|
||||
|
||||
// These intrinsics are sensitive to the rounding mode so we need constrained
|
||||
// versions of each of them. When strict rounding and exception control are
|
||||
// not required the non-constrained versions of these intrinsics should be
|
||||
@ -688,9 +697,7 @@ let IntrProperties = [IntrInaccessibleMemOnly] in {
|
||||
llvm_metadata_ty,
|
||||
llvm_metadata_ty ]>;
|
||||
}
|
||||
// FIXME: Add intrinsics for fcmp, fptrunc, fpext, fptoui and fptosi.
|
||||
// FIXME: Add intrinsics for fabs and copysign?
|
||||
|
||||
// FIXME: Add intrinsics for fcmp, fptoui and fptosi.
|
||||
|
||||
//===------------------------- Expect Intrinsics --------------------------===//
|
||||
//
|
||||
|
@ -154,6 +154,8 @@ private:
|
||||
|
||||
SDValue EmitStackConvert(SDValue SrcOp, EVT SlotVT, EVT DestVT,
|
||||
const SDLoc &dl);
|
||||
SDValue EmitStackConvert(SDValue SrcOp, EVT SlotVT, EVT DestVT,
|
||||
const SDLoc &dl, SDValue ChainIn);
|
||||
SDValue ExpandBUILD_VECTOR(SDNode *Node);
|
||||
SDValue ExpandSCALAR_TO_VECTOR(SDNode *Node);
|
||||
void ExpandDYNAMIC_STACKALLOC(SDNode *Node,
|
||||
@ -1115,6 +1117,8 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
|
||||
case ISD::STRICT_FFLOOR:
|
||||
case ISD::STRICT_FROUND:
|
||||
case ISD::STRICT_FTRUNC:
|
||||
case ISD::STRICT_FP_ROUND:
|
||||
case ISD::STRICT_FP_EXTEND:
|
||||
// These pseudo-ops get legalized as if they were their non-strict
|
||||
// equivalent. For instance, if ISD::FSQRT is legal then ISD::STRICT_FSQRT
|
||||
// is also legal, but if ISD::FSQRT requires expansion then so does
|
||||
@ -1741,6 +1745,12 @@ bool SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT, SDValue &LHS,
|
||||
/// The resultant code need not be legal.
|
||||
SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp, EVT SlotVT,
|
||||
EVT DestVT, const SDLoc &dl) {
|
||||
return EmitStackConvert(SrcOp, SlotVT, DestVT, dl, DAG.getEntryNode());
|
||||
}
|
||||
|
||||
SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp, EVT SlotVT,
|
||||
EVT DestVT, const SDLoc &dl,
|
||||
SDValue Chain) {
|
||||
// Create the stack frame object.
|
||||
unsigned SrcAlign = DAG.getDataLayout().getPrefTypeAlignment(
|
||||
SrcOp.getValueType().getTypeForEVT(*DAG.getContext()));
|
||||
@ -1761,19 +1771,19 @@ SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp, EVT SlotVT,
|
||||
// later than DestVT.
|
||||
SDValue Store;
|
||||
|
||||
if (SrcSize > SlotSize)
|
||||
Store = DAG.getTruncStore(DAG.getEntryNode(), dl, SrcOp, FIPtr, PtrInfo,
|
||||
if (SrcSize > SlotSize)
|
||||
Store = DAG.getTruncStore(Chain, dl, SrcOp, FIPtr, PtrInfo,
|
||||
SlotVT, SrcAlign);
|
||||
else {
|
||||
assert(SrcSize == SlotSize && "Invalid store");
|
||||
Store =
|
||||
DAG.getStore(DAG.getEntryNode(), dl, SrcOp, FIPtr, PtrInfo, SrcAlign);
|
||||
DAG.getStore(Chain, dl, SrcOp, FIPtr, PtrInfo, SrcAlign);
|
||||
}
|
||||
|
||||
// Result is a load from the stack slot.
|
||||
if (SlotSize == DestSize)
|
||||
return DAG.getLoad(DestVT, dl, Store, FIPtr, PtrInfo, DestAlign);
|
||||
|
||||
|
||||
assert(SlotSize < DestSize && "Unknown extension!");
|
||||
return DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT, Store, FIPtr, PtrInfo, SlotVT,
|
||||
DestAlign);
|
||||
@ -2777,12 +2787,27 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
|
||||
}
|
||||
break;
|
||||
}
|
||||
case ISD::STRICT_FP_ROUND:
|
||||
Tmp1 = EmitStackConvert(Node->getOperand(1),
|
||||
Node->getValueType(0),
|
||||
Node->getValueType(0), dl, Node->getOperand(0));
|
||||
ReplaceNode(Node, Tmp1.getNode());
|
||||
LLVM_DEBUG(dbgs() << "Successfully expanded STRICT_FP_ROUND node\n");
|
||||
return true;
|
||||
case ISD::FP_ROUND:
|
||||
case ISD::BITCAST:
|
||||
Tmp1 = EmitStackConvert(Node->getOperand(0), Node->getValueType(0),
|
||||
Tmp1 = EmitStackConvert(Node->getOperand(0),
|
||||
Node->getValueType(0),
|
||||
Node->getValueType(0), dl);
|
||||
Results.push_back(Tmp1);
|
||||
break;
|
||||
case ISD::STRICT_FP_EXTEND:
|
||||
Tmp1 = EmitStackConvert(Node->getOperand(1),
|
||||
Node->getOperand(1).getValueType(),
|
||||
Node->getValueType(0), dl, Node->getOperand(0));
|
||||
ReplaceNode(Node, Tmp1.getNode());
|
||||
LLVM_DEBUG(dbgs() << "Successfully expanded STRICT_FP_EXTEND node\n");
|
||||
return true;
|
||||
case ISD::FP_EXTEND:
|
||||
Tmp1 = EmitStackConvert(Node->getOperand(0),
|
||||
Node->getOperand(0).getValueType(),
|
||||
|
@ -687,6 +687,7 @@ private:
|
||||
SDValue ScalarizeVecRes_BUILD_VECTOR(SDNode *N);
|
||||
SDValue ScalarizeVecRes_EXTRACT_SUBVECTOR(SDNode *N);
|
||||
SDValue ScalarizeVecRes_FP_ROUND(SDNode *N);
|
||||
SDValue ScalarizeVecRes_STRICT_FP_ROUND(SDNode *N);
|
||||
SDValue ScalarizeVecRes_FPOWI(SDNode *N);
|
||||
SDValue ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N);
|
||||
SDValue ScalarizeVecRes_LOAD(LoadSDNode *N);
|
||||
@ -710,6 +711,7 @@ private:
|
||||
SDValue ScalarizeVecOp_VSETCC(SDNode *N);
|
||||
SDValue ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo);
|
||||
SDValue ScalarizeVecOp_FP_ROUND(SDNode *N, unsigned OpNo);
|
||||
SDValue ScalarizeVecOp_STRICT_FP_ROUND(SDNode *N, unsigned OpNo);
|
||||
SDValue ScalarizeVecOp_VECREDUCE(SDNode *N);
|
||||
|
||||
//===--------------------------------------------------------------------===//
|
||||
@ -820,6 +822,7 @@ private:
|
||||
SDValue WidenVecRes_StrictFP(SDNode *N);
|
||||
SDValue WidenVecRes_OverflowOp(SDNode *N, unsigned ResNo);
|
||||
SDValue WidenVecRes_Convert(SDNode *N);
|
||||
SDValue WidenVecRes_Convert_StrictFP(SDNode *N);
|
||||
SDValue WidenVecRes_FCOPYSIGN(SDNode *N);
|
||||
SDValue WidenVecRes_POWI(SDNode *N);
|
||||
SDValue WidenVecRes_Shift(SDNode *N);
|
||||
|
@ -331,6 +331,8 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
|
||||
case ISD::STRICT_FFLOOR:
|
||||
case ISD::STRICT_FROUND:
|
||||
case ISD::STRICT_FTRUNC:
|
||||
case ISD::STRICT_FP_ROUND:
|
||||
case ISD::STRICT_FP_EXTEND:
|
||||
// These pseudo-ops get legalized as if they were their non-strict
|
||||
// equivalent. For instance, if ISD::FSQRT is legal then ISD::STRICT_FSQRT
|
||||
// is also legal, but if ISD::FSQRT requires expansion then so does
|
||||
@ -1301,7 +1303,7 @@ SDValue VectorLegalizer::ExpandStrictFPOp(SDValue Op) {
|
||||
|
||||
if (OperVT.isVector())
|
||||
Oper = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
|
||||
EltVT, Oper, Idx);
|
||||
OperVT.getVectorElementType(), Oper, Idx);
|
||||
|
||||
Opers.push_back(Oper);
|
||||
}
|
||||
|
@ -50,6 +50,7 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
|
||||
case ISD::BITCAST: R = ScalarizeVecRes_BITCAST(N); break;
|
||||
case ISD::BUILD_VECTOR: R = ScalarizeVecRes_BUILD_VECTOR(N); break;
|
||||
case ISD::EXTRACT_SUBVECTOR: R = ScalarizeVecRes_EXTRACT_SUBVECTOR(N); break;
|
||||
case ISD::STRICT_FP_ROUND: R = ScalarizeVecRes_STRICT_FP_ROUND(N); break;
|
||||
case ISD::FP_ROUND: R = ScalarizeVecRes_FP_ROUND(N); break;
|
||||
case ISD::FP_ROUND_INREG: R = ScalarizeVecRes_InregOp(N); break;
|
||||
case ISD::FPOWI: R = ScalarizeVecRes_FPOWI(N); break;
|
||||
@ -170,6 +171,7 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
|
||||
case ISD::STRICT_FFLOOR:
|
||||
case ISD::STRICT_FROUND:
|
||||
case ISD::STRICT_FTRUNC:
|
||||
case ISD::STRICT_FP_EXTEND:
|
||||
R = ScalarizeVecRes_StrictFPOp(N);
|
||||
break;
|
||||
case ISD::UADDO:
|
||||
@ -321,6 +323,18 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_FP_ROUND(SDNode *N) {
|
||||
NewVT, Op, N->getOperand(1));
|
||||
}
|
||||
|
||||
SDValue DAGTypeLegalizer::ScalarizeVecRes_STRICT_FP_ROUND(SDNode *N) {
|
||||
EVT NewVT = N->getValueType(0).getVectorElementType();
|
||||
SDValue Op = GetScalarizedVector(N->getOperand(1));
|
||||
SDValue Res = DAG.getNode(ISD::STRICT_FP_ROUND, SDLoc(N),
|
||||
{ NewVT, MVT::Other },
|
||||
{ N->getOperand(0), Op, N->getOperand(2) });
|
||||
// Legalize the chain result - switch anything that used the old chain to
|
||||
// use the new one.
|
||||
ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
|
||||
return Res;
|
||||
}
|
||||
|
||||
SDValue DAGTypeLegalizer::ScalarizeVecRes_FPOWI(SDNode *N) {
|
||||
SDValue Op = GetScalarizedVector(N->getOperand(0));
|
||||
return DAG.getNode(ISD::FPOWI, SDLoc(N),
|
||||
@ -604,6 +618,9 @@ bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) {
|
||||
case ISD::STORE:
|
||||
Res = ScalarizeVecOp_STORE(cast<StoreSDNode>(N), OpNo);
|
||||
break;
|
||||
case ISD::STRICT_FP_ROUND:
|
||||
Res = ScalarizeVecOp_STRICT_FP_ROUND(N, OpNo);
|
||||
break;
|
||||
case ISD::FP_ROUND:
|
||||
Res = ScalarizeVecOp_FP_ROUND(N, OpNo);
|
||||
break;
|
||||
@ -752,6 +769,20 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_FP_ROUND(SDNode *N, unsigned OpNo) {
|
||||
return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), N->getValueType(0), Res);
|
||||
}
|
||||
|
||||
SDValue DAGTypeLegalizer::ScalarizeVecOp_STRICT_FP_ROUND(SDNode *N,
|
||||
unsigned OpNo) {
|
||||
assert(OpNo == 1 && "Wrong operand for scalarization!");
|
||||
SDValue Elt = GetScalarizedVector(N->getOperand(1));
|
||||
SDValue Res = DAG.getNode(ISD::STRICT_FP_ROUND, SDLoc(N),
|
||||
{ N->getValueType(0).getVectorElementType(),
|
||||
MVT::Other },
|
||||
{ N->getOperand(0), Elt, N->getOperand(2) });
|
||||
// Legalize the chain result - switch anything that used the old chain to
|
||||
// use the new one.
|
||||
ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
|
||||
return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), N->getValueType(0), Res);
|
||||
}
|
||||
|
||||
SDValue DAGTypeLegalizer::ScalarizeVecOp_VECREDUCE(SDNode *N) {
|
||||
SDValue Res = GetScalarizedVector(N->getOperand(0));
|
||||
// Result type may be wider than element type.
|
||||
@ -844,7 +875,9 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
|
||||
case ISD::FNEARBYINT:
|
||||
case ISD::FNEG:
|
||||
case ISD::FP_EXTEND:
|
||||
case ISD::STRICT_FP_EXTEND:
|
||||
case ISD::FP_ROUND:
|
||||
case ISD::STRICT_FP_ROUND:
|
||||
case ISD::FP_TO_SINT:
|
||||
case ISD::FP_TO_UINT:
|
||||
case ISD::FRINT:
|
||||
@ -1615,15 +1648,34 @@ void DAGTypeLegalizer::SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo,
|
||||
|
||||
// If the input also splits, handle it directly for a compile time speedup.
|
||||
// Otherwise split it by hand.
|
||||
EVT InVT = N->getOperand(0).getValueType();
|
||||
unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0;
|
||||
EVT InVT = N->getOperand(OpNo).getValueType();
|
||||
if (getTypeAction(InVT) == TargetLowering::TypeSplitVector)
|
||||
GetSplitVector(N->getOperand(0), Lo, Hi);
|
||||
GetSplitVector(N->getOperand(OpNo), Lo, Hi);
|
||||
else
|
||||
std::tie(Lo, Hi) = DAG.SplitVectorOperand(N, 0);
|
||||
std::tie(Lo, Hi) = DAG.SplitVectorOperand(N, OpNo);
|
||||
|
||||
if (N->getOpcode() == ISD::FP_ROUND) {
|
||||
Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo, N->getOperand(1));
|
||||
Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi, N->getOperand(1));
|
||||
} else if (N->getOpcode() == ISD::STRICT_FP_ROUND) {
|
||||
Lo = DAG.getNode(N->getOpcode(), dl, { LoVT, MVT::Other },
|
||||
{ N->getOperand(0), Lo, N->getOperand(2) });
|
||||
Hi = DAG.getNode(N->getOpcode(), dl, { HiVT, MVT::Other },
|
||||
{ N->getOperand(0), Hi, N->getOperand(2) });
|
||||
SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
|
||||
Lo.getValue(1), Hi.getValue(1));
|
||||
ReplaceValueWith(SDValue(N, 1), NewChain);
|
||||
} else if (N->isStrictFPOpcode()) {
|
||||
Lo = DAG.getNode(N->getOpcode(), dl, { LoVT, MVT::Other },
|
||||
{ N->getOperand(0), Lo });
|
||||
Hi = DAG.getNode(N->getOpcode(), dl, { HiVT, MVT::Other },
|
||||
{ N->getOperand(0), Hi });
|
||||
// Legalize the chain result - switch anything that used the old chain to
|
||||
// use the new one.
|
||||
SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
|
||||
Lo.getValue(1), Hi.getValue(1));
|
||||
ReplaceValueWith(SDValue(N, 1), NewChain);
|
||||
} else {
|
||||
Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo);
|
||||
Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi);
|
||||
@ -1824,6 +1876,7 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
|
||||
case ISD::TRUNCATE:
|
||||
Res = SplitVecOp_TruncateHelper(N);
|
||||
break;
|
||||
case ISD::STRICT_FP_ROUND:
|
||||
case ISD::FP_ROUND: Res = SplitVecOp_FP_ROUND(N); break;
|
||||
case ISD::FCOPYSIGN: Res = SplitVecOp_FCOPYSIGN(N); break;
|
||||
case ISD::STORE:
|
||||
@ -1853,6 +1906,7 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
|
||||
case ISD::CTTZ:
|
||||
case ISD::CTLZ:
|
||||
case ISD::CTPOP:
|
||||
case ISD::STRICT_FP_EXTEND:
|
||||
case ISD::FP_EXTEND:
|
||||
case ISD::SIGN_EXTEND:
|
||||
case ISD::ZERO_EXTEND:
|
||||
@ -1894,7 +1948,11 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
|
||||
if (Res.getNode() == N)
|
||||
return true;
|
||||
|
||||
assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&
|
||||
if (N->isStrictFPOpcode())
|
||||
assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 2 &&
|
||||
"Invalid operand expansion");
|
||||
else
|
||||
assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&
|
||||
"Invalid operand expansion");
|
||||
|
||||
ReplaceValueWith(SDValue(N, 0), Res);
|
||||
@ -1982,14 +2040,30 @@ SDValue DAGTypeLegalizer::SplitVecOp_UnaryOp(SDNode *N) {
|
||||
EVT ResVT = N->getValueType(0);
|
||||
SDValue Lo, Hi;
|
||||
SDLoc dl(N);
|
||||
GetSplitVector(N->getOperand(0), Lo, Hi);
|
||||
GetSplitVector(N->getOperand(N->isStrictFPOpcode() ? 1 : 0), Lo, Hi);
|
||||
EVT InVT = Lo.getValueType();
|
||||
|
||||
EVT OutVT = EVT::getVectorVT(*DAG.getContext(), ResVT.getVectorElementType(),
|
||||
InVT.getVectorNumElements());
|
||||
|
||||
Lo = DAG.getNode(N->getOpcode(), dl, OutVT, Lo);
|
||||
Hi = DAG.getNode(N->getOpcode(), dl, OutVT, Hi);
|
||||
if (N->isStrictFPOpcode()) {
|
||||
Lo = DAG.getNode(N->getOpcode(), dl, { OutVT, MVT::Other },
|
||||
{ N->getOperand(0), Lo });
|
||||
Hi = DAG.getNode(N->getOpcode(), dl, { OutVT, MVT::Other },
|
||||
{ N->getOperand(0), Hi });
|
||||
|
||||
// Build a factor node to remember that this operation is independent
|
||||
// of the other one.
|
||||
SDValue Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
|
||||
Hi.getValue(1));
|
||||
|
||||
// Legalize the chain result - switch anything that used the old chain to
|
||||
// use the new one.
|
||||
ReplaceValueWith(SDValue(N, 1), Ch);
|
||||
} else {
|
||||
Lo = DAG.getNode(N->getOpcode(), dl, OutVT, Lo);
|
||||
Hi = DAG.getNode(N->getOpcode(), dl, OutVT, Hi);
|
||||
}
|
||||
|
||||
return DAG.getNode(ISD::CONCAT_VECTORS, dl, ResVT, Lo, Hi);
|
||||
}
|
||||
@ -2461,14 +2535,26 @@ SDValue DAGTypeLegalizer::SplitVecOp_FP_ROUND(SDNode *N) {
|
||||
EVT ResVT = N->getValueType(0);
|
||||
SDValue Lo, Hi;
|
||||
SDLoc DL(N);
|
||||
GetSplitVector(N->getOperand(0), Lo, Hi);
|
||||
GetSplitVector(N->getOperand(N->isStrictFPOpcode() ? 1 : 0), Lo, Hi);
|
||||
EVT InVT = Lo.getValueType();
|
||||
|
||||
EVT OutVT = EVT::getVectorVT(*DAG.getContext(), ResVT.getVectorElementType(),
|
||||
InVT.getVectorNumElements());
|
||||
|
||||
Lo = DAG.getNode(ISD::FP_ROUND, DL, OutVT, Lo, N->getOperand(1));
|
||||
Hi = DAG.getNode(ISD::FP_ROUND, DL, OutVT, Hi, N->getOperand(1));
|
||||
if (N->isStrictFPOpcode()) {
|
||||
Lo = DAG.getNode(N->getOpcode(), DL, { OutVT, MVT::Other },
|
||||
{ N->getOperand(0), Lo, N->getOperand(2) });
|
||||
Hi = DAG.getNode(N->getOpcode(), DL, { OutVT, MVT::Other },
|
||||
{ N->getOperand(0), Hi, N->getOperand(2) });
|
||||
// Legalize the chain result - switch anything that used the old chain to
|
||||
// use the new one.
|
||||
SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
|
||||
Lo.getValue(1), Hi.getValue(1));
|
||||
ReplaceValueWith(SDValue(N, 1), NewChain);
|
||||
} else {
|
||||
Lo = DAG.getNode(ISD::FP_ROUND, DL, OutVT, Lo, N->getOperand(1));
|
||||
Hi = DAG.getNode(ISD::FP_ROUND, DL, OutVT, Hi, N->getOperand(1));
|
||||
}
|
||||
|
||||
return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResVT, Lo, Hi);
|
||||
}
|
||||
@ -2632,6 +2718,11 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
|
||||
Res = WidenVecRes_Convert(N);
|
||||
break;
|
||||
|
||||
case ISD::STRICT_FP_EXTEND:
|
||||
case ISD::STRICT_FP_ROUND:
|
||||
Res = WidenVecRes_Convert_StrictFP(N);
|
||||
break;
|
||||
|
||||
case ISD::FABS:
|
||||
case ISD::FCEIL:
|
||||
case ISD::FCOS:
|
||||
@ -3109,6 +3200,43 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
|
||||
return DAG.getBuildVector(WidenVT, DL, Ops);
|
||||
}
|
||||
|
||||
SDValue DAGTypeLegalizer::WidenVecRes_Convert_StrictFP(SDNode *N) {
|
||||
SDValue InOp = N->getOperand(1);
|
||||
SDLoc DL(N);
|
||||
SmallVector<SDValue, 4> NewOps(N->op_begin(), N->op_end());
|
||||
|
||||
EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
|
||||
unsigned WidenNumElts = WidenVT.getVectorNumElements();
|
||||
SmallVector<EVT, 2> WidenVTs = { WidenVT, MVT::Other };
|
||||
|
||||
EVT InVT = InOp.getValueType();
|
||||
EVT InEltVT = InVT.getVectorElementType();
|
||||
|
||||
unsigned Opcode = N->getOpcode();
|
||||
|
||||
// FIXME: Optimizations need to be implemented here.
|
||||
|
||||
// Otherwise unroll into some nasty scalar code and rebuild the vector.
|
||||
EVT EltVT = WidenVT.getVectorElementType();
|
||||
SmallVector<EVT, 2> EltVTs = { EltVT, MVT::Other };
|
||||
SmallVector<SDValue, 16> Ops(WidenNumElts, DAG.getUNDEF(EltVT));
|
||||
SmallVector<SDValue, 32> OpChains;
|
||||
// Use the original element count so we don't do more scalar opts than
|
||||
// necessary.
|
||||
unsigned MinElts = N->getValueType(0).getVectorNumElements();
|
||||
for (unsigned i=0; i < MinElts; ++i) {
|
||||
NewOps[1] = DAG.getNode(
|
||||
ISD::EXTRACT_VECTOR_ELT, DL, InEltVT, InOp,
|
||||
DAG.getConstant(i, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
|
||||
Ops[i] = DAG.getNode(Opcode, DL, EltVTs, NewOps);
|
||||
OpChains.push_back(Ops[i].getValue(1));
|
||||
}
|
||||
SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OpChains);
|
||||
ReplaceValueWith(SDValue(N, 1), NewChain);
|
||||
|
||||
return DAG.getBuildVector(WidenVT, DL, Ops);
|
||||
}
|
||||
|
||||
SDValue DAGTypeLegalizer::WidenVecRes_EXTEND_VECTOR_INREG(SDNode *N) {
|
||||
unsigned Opcode = N->getOpcode();
|
||||
SDValue InOp = N->getOperand(0);
|
||||
@ -3895,6 +4023,7 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) {
|
||||
break;
|
||||
|
||||
case ISD::FP_EXTEND:
|
||||
case ISD::STRICT_FP_EXTEND:
|
||||
case ISD::FP_TO_SINT:
|
||||
case ISD::FP_TO_UINT:
|
||||
case ISD::SINT_TO_FP:
|
||||
@ -3929,8 +4058,12 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) {
|
||||
return true;
|
||||
|
||||
|
||||
assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&
|
||||
"Invalid operand expansion");
|
||||
if (N->isStrictFPOpcode())
|
||||
assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 2 &&
|
||||
"Invalid operand expansion");
|
||||
else
|
||||
assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&
|
||||
"Invalid operand expansion");
|
||||
|
||||
ReplaceValueWith(SDValue(N, 0), Res);
|
||||
return false;
|
||||
@ -4010,7 +4143,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) {
|
||||
EVT EltVT = VT.getVectorElementType();
|
||||
SDLoc dl(N);
|
||||
unsigned NumElts = VT.getVectorNumElements();
|
||||
SDValue InOp = N->getOperand(0);
|
||||
SDValue InOp = N->getOperand(N->isStrictFPOpcode() ? 1 : 0);
|
||||
assert(getTypeAction(InOp.getValueType()) ==
|
||||
TargetLowering::TypeWidenVector &&
|
||||
"Unexpected type action");
|
||||
@ -4019,10 +4152,19 @@ SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) {
|
||||
unsigned Opcode = N->getOpcode();
|
||||
|
||||
// See if a widened result type would be legal, if so widen the node.
|
||||
// FIXME: This isn't safe for StrictFP. Other optimization here is needed.
|
||||
EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT,
|
||||
InVT.getVectorNumElements());
|
||||
if (TLI.isTypeLegal(WideVT)) {
|
||||
SDValue Res = DAG.getNode(Opcode, dl, WideVT, InOp);
|
||||
if (TLI.isTypeLegal(WideVT) && !N->isStrictFPOpcode()) {
|
||||
SDValue Res;
|
||||
if (N->isStrictFPOpcode()) {
|
||||
Res = DAG.getNode(Opcode, dl, { WideVT, MVT::Other },
|
||||
{ N->getOperand(0), InOp });
|
||||
// Legalize the chain result - switch anything that used the old chain to
|
||||
// use the new one.
|
||||
ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
|
||||
} else
|
||||
Res = DAG.getNode(Opcode, dl, WideVT, InOp);
|
||||
return DAG.getNode(
|
||||
ISD::EXTRACT_SUBVECTOR, dl, VT, Res,
|
||||
DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
|
||||
@ -4032,12 +4174,26 @@ SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) {
|
||||
|
||||
// Unroll the convert into some scalar code and create a nasty build vector.
|
||||
SmallVector<SDValue, 16> Ops(NumElts);
|
||||
for (unsigned i=0; i < NumElts; ++i)
|
||||
Ops[i] = DAG.getNode(
|
||||
Opcode, dl, EltVT,
|
||||
DAG.getNode(
|
||||
ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp,
|
||||
DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))));
|
||||
if (N->isStrictFPOpcode()) {
|
||||
SmallVector<SDValue, 4> NewOps(N->op_begin(), N->op_end());
|
||||
SmallVector<SDValue, 32> OpChains;
|
||||
for (unsigned i=0; i < NumElts; ++i) {
|
||||
NewOps[1] = DAG.getNode(
|
||||
ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp,
|
||||
DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
|
||||
Ops[i] = DAG.getNode(Opcode, dl, { EltVT, MVT::Other }, NewOps);
|
||||
OpChains.push_back(Ops[i].getValue(1));
|
||||
}
|
||||
SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OpChains);
|
||||
ReplaceValueWith(SDValue(N, 1), NewChain);
|
||||
} else {
|
||||
for (unsigned i = 0; i < NumElts; ++i)
|
||||
Ops[i] = DAG.getNode(
|
||||
Opcode, dl, EltVT,
|
||||
DAG.getNode(
|
||||
ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp,
|
||||
DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))));
|
||||
}
|
||||
|
||||
return DAG.getBuildVector(VT, dl, Ops);
|
||||
}
|
||||
|
@ -7611,6 +7611,10 @@ SDNode* SelectionDAG::mutateStrictFPToFP(SDNode *Node) {
|
||||
case ISD::STRICT_FFLOOR: NewOpc = ISD::FFLOOR; IsUnary = true; break;
|
||||
case ISD::STRICT_FROUND: NewOpc = ISD::FROUND; IsUnary = true; break;
|
||||
case ISD::STRICT_FTRUNC: NewOpc = ISD::FTRUNC; IsUnary = true; break;
|
||||
// STRICT_FP_ROUND takes an extra argument describing whether or not
|
||||
// the value will be changed by this node. See ISDOpcodes.h for details.
|
||||
case ISD::STRICT_FP_ROUND: NewOpc = ISD::FP_ROUND; break;
|
||||
case ISD::STRICT_FP_EXTEND: NewOpc = ISD::FP_EXTEND; IsUnary = true; break;
|
||||
}
|
||||
|
||||
// We're taking this node out of the chain, so we need to re-link things.
|
||||
@ -7618,8 +7622,19 @@ SDNode* SelectionDAG::mutateStrictFPToFP(SDNode *Node) {
|
||||
SDValue OutputChain = SDValue(Node, 1);
|
||||
ReplaceAllUsesOfValueWith(OutputChain, InputChain);
|
||||
|
||||
SDVTList VTs = getVTList(Node->getOperand(1).getValueType());
|
||||
SDVTList VTs;
|
||||
SDNode *Res = nullptr;
|
||||
|
||||
switch (OrigOpc) {
|
||||
default:
|
||||
VTs = getVTList(Node->getOperand(1).getValueType());
|
||||
break;
|
||||
case ISD::STRICT_FP_ROUND:
|
||||
case ISD::STRICT_FP_EXTEND:
|
||||
VTs = getVTList(Node->getValueType(0));
|
||||
break;
|
||||
}
|
||||
|
||||
if (IsUnary)
|
||||
Res = MorphNodeTo(Node, NewOpc, VTs, { Node->getOperand(1) });
|
||||
else if (IsTernary)
|
||||
|
@ -6078,6 +6078,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
|
||||
case Intrinsic::experimental_constrained_fdiv:
|
||||
case Intrinsic::experimental_constrained_frem:
|
||||
case Intrinsic::experimental_constrained_fma:
|
||||
case Intrinsic::experimental_constrained_fptrunc:
|
||||
case Intrinsic::experimental_constrained_fpext:
|
||||
case Intrinsic::experimental_constrained_sqrt:
|
||||
case Intrinsic::experimental_constrained_pow:
|
||||
case Intrinsic::experimental_constrained_powi:
|
||||
@ -6834,6 +6836,12 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic(
|
||||
case Intrinsic::experimental_constrained_fma:
|
||||
Opcode = ISD::STRICT_FMA;
|
||||
break;
|
||||
case Intrinsic::experimental_constrained_fptrunc:
|
||||
Opcode = ISD::STRICT_FP_ROUND;
|
||||
break;
|
||||
case Intrinsic::experimental_constrained_fpext:
|
||||
Opcode = ISD::STRICT_FP_EXTEND;
|
||||
break;
|
||||
case Intrinsic::experimental_constrained_sqrt:
|
||||
Opcode = ISD::STRICT_FSQRT;
|
||||
break;
|
||||
@ -6897,7 +6905,12 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic(
|
||||
|
||||
SDVTList VTs = DAG.getVTList(ValueVTs);
|
||||
SDValue Result;
|
||||
if (FPI.isUnaryOp())
|
||||
if (Opcode == ISD::STRICT_FP_ROUND)
|
||||
Result = DAG.getNode(Opcode, sdl, VTs,
|
||||
{ Chain, getValue(FPI.getArgOperand(0)),
|
||||
DAG.getTargetConstant(0, sdl,
|
||||
TLI.getPointerTy(DAG.getDataLayout())) });
|
||||
else if (FPI.isUnaryOp())
|
||||
Result = DAG.getNode(Opcode, sdl, VTs,
|
||||
{ Chain, getValue(FPI.getArgOperand(0)) });
|
||||
else if (FPI.isTernaryOp())
|
||||
|
@ -313,9 +313,11 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
|
||||
case ISD::ZERO_EXTEND_VECTOR_INREG: return "zero_extend_vector_inreg";
|
||||
case ISD::TRUNCATE: return "truncate";
|
||||
case ISD::FP_ROUND: return "fp_round";
|
||||
case ISD::STRICT_FP_ROUND: return "strict_fp_round";
|
||||
case ISD::FLT_ROUNDS_: return "flt_rounds";
|
||||
case ISD::FP_ROUND_INREG: return "fp_round_inreg";
|
||||
case ISD::FP_EXTEND: return "fp_extend";
|
||||
case ISD::STRICT_FP_EXTEND: return "strict_fp_extend";
|
||||
|
||||
case ISD::SINT_TO_FP: return "sint_to_fp";
|
||||
case ISD::UINT_TO_FP: return "uint_to_fp";
|
||||
|
@ -142,6 +142,8 @@ bool ConstrainedFPIntrinsic::isUnaryOp() const {
|
||||
switch (getIntrinsicID()) {
|
||||
default:
|
||||
return false;
|
||||
case Intrinsic::experimental_constrained_fptrunc:
|
||||
case Intrinsic::experimental_constrained_fpext:
|
||||
case Intrinsic::experimental_constrained_sqrt:
|
||||
case Intrinsic::experimental_constrained_sin:
|
||||
case Intrinsic::experimental_constrained_cos:
|
||||
|
@ -4209,6 +4209,8 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
|
||||
case Intrinsic::experimental_constrained_fdiv:
|
||||
case Intrinsic::experimental_constrained_frem:
|
||||
case Intrinsic::experimental_constrained_fma:
|
||||
case Intrinsic::experimental_constrained_fptrunc:
|
||||
case Intrinsic::experimental_constrained_fpext:
|
||||
case Intrinsic::experimental_constrained_sqrt:
|
||||
case Intrinsic::experimental_constrained_pow:
|
||||
case Intrinsic::experimental_constrained_powi:
|
||||
@ -4687,6 +4689,47 @@ void Verifier::visitConstrainedFPIntrinsic(ConstrainedFPIntrinsic &FPI) {
|
||||
HasRoundingMD = true;
|
||||
break;
|
||||
|
||||
case Intrinsic::experimental_constrained_fptrunc:
|
||||
case Intrinsic::experimental_constrained_fpext: {
|
||||
if (FPI.getIntrinsicID() == Intrinsic::experimental_constrained_fptrunc) {
|
||||
Assert((NumOperands == 3),
|
||||
"invalid arguments for constrained FP intrinsic", &FPI);
|
||||
HasRoundingMD = true;
|
||||
} else {
|
||||
Assert((NumOperands == 2),
|
||||
"invalid arguments for constrained FP intrinsic", &FPI);
|
||||
}
|
||||
HasExceptionMD = true;
|
||||
|
||||
Value *Operand = FPI.getArgOperand(0);
|
||||
Type *OperandTy = Operand->getType();
|
||||
Value *Result = &FPI;
|
||||
Type *ResultTy = Result->getType();
|
||||
Assert(OperandTy->isFPOrFPVectorTy(),
|
||||
"Intrinsic first argument must be FP or FP vector", &FPI);
|
||||
Assert(ResultTy->isFPOrFPVectorTy(),
|
||||
"Intrinsic result must be FP or FP vector", &FPI);
|
||||
Assert(OperandTy->isVectorTy() == ResultTy->isVectorTy(),
|
||||
"Intrinsic first argument and result disagree on vector use", &FPI);
|
||||
if (OperandTy->isVectorTy()) {
|
||||
auto *OperandVecTy = cast<VectorType>(OperandTy);
|
||||
auto *ResultVecTy = cast<VectorType>(ResultTy);
|
||||
Assert(OperandVecTy->getNumElements() == ResultVecTy->getNumElements(),
|
||||
"Intrinsic first argument and result vector lengths must be equal",
|
||||
&FPI);
|
||||
}
|
||||
if (FPI.getIntrinsicID() == Intrinsic::experimental_constrained_fptrunc) {
|
||||
Assert(OperandTy->getScalarSizeInBits() > ResultTy->getScalarSizeInBits(),
|
||||
"Intrinsic first argument's type must be larger than result type",
|
||||
&FPI);
|
||||
} else {
|
||||
Assert(OperandTy->getScalarSizeInBits() < ResultTy->getScalarSizeInBits(),
|
||||
"Intrinsic first argument's type must be smaller than result type",
|
||||
&FPI);
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
llvm_unreachable("Invalid constrained FP intrinsic!");
|
||||
}
|
||||
|
@ -286,6 +286,29 @@ entry:
|
||||
ret double %rem
|
||||
}
|
||||
|
||||
; Verify that round(42.1) isn't simplified when the rounding mode is
|
||||
; unknown.
|
||||
; Verify that no gross errors happen.
|
||||
; CHECK-LABEL: @f21
|
||||
; COMMON: cvtsd2ss
|
||||
define float @f21() {
|
||||
entry:
|
||||
%result = call float @llvm.experimental.constrained.fptrunc.f32.f64(
|
||||
double 42.1,
|
||||
metadata !"round.dynamic",
|
||||
metadata !"fpexcept.strict")
|
||||
ret float %result
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @f22
|
||||
; COMMON: cvtss2sd
|
||||
define double @f22(float %x) {
|
||||
entry:
|
||||
%result = call double @llvm.experimental.constrained.fpext.f64.f32(float %x,
|
||||
metadata !"fpexcept.strict")
|
||||
ret double %result
|
||||
}
|
||||
|
||||
@llvm.fp.env = thread_local global i8 zeroinitializer, section "llvm.metadata"
|
||||
declare double @llvm.experimental.constrained.fadd.f64(double, double, metadata, metadata)
|
||||
declare double @llvm.experimental.constrained.fsub.f64(double, double, metadata, metadata)
|
||||
@ -306,3 +329,6 @@ declare double @llvm.experimental.constrained.rint.f64(double, metadata, metadat
|
||||
declare double @llvm.experimental.constrained.nearbyint.f64(double, metadata, metadata)
|
||||
declare float @llvm.experimental.constrained.fma.f32(float, float, float, metadata, metadata)
|
||||
declare double @llvm.experimental.constrained.fma.f64(double, double, double, metadata, metadata)
|
||||
declare float @llvm.experimental.constrained.fptrunc.f32.f64(double, metadata, metadata)
|
||||
declare double @llvm.experimental.constrained.fpext.f64.f32(float, metadata)
|
||||
|
||||
|
@ -3831,6 +3831,217 @@ entry:
|
||||
ret <4 x double> %min
|
||||
}
|
||||
|
||||
define <1 x float> @constrained_vector_fptrunc_v1f64() {
|
||||
; CHECK-LABEL: constrained_vector_fptrunc_v1f64:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; CHECK-NEXT: cvtsd2ss %xmm0, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: constrained_vector_fptrunc_v1f64:
|
||||
; AVX: # %bb.0: # %entry
|
||||
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; AVX-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0
|
||||
; AVX-NEXT: retq
|
||||
entry:
|
||||
%result = call <1 x float> @llvm.experimental.constrained.fptrunc.v1f32.v1f64(
|
||||
<1 x double><double 42.1>,
|
||||
metadata !"round.dynamic",
|
||||
metadata !"fpexcept.strict")
|
||||
ret <1 x float> %result
|
||||
}
|
||||
|
||||
define <2 x float> @constrained_vector_fptrunc_v2f64() {
|
||||
; CHECK-LABEL: constrained_vector_fptrunc_v2f64:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; CHECK-NEXT: cvtsd2ss %xmm0, %xmm1
|
||||
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; CHECK-NEXT: cvtsd2ss %xmm0, %xmm0
|
||||
; CHECK-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
|
||||
; CHECK-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: constrained_vector_fptrunc_v2f64:
|
||||
; AVX: # %bb.0: # %entry
|
||||
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; AVX-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0
|
||||
; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
|
||||
; AVX-NEXT: vcvtsd2ss %xmm1, %xmm1, %xmm1
|
||||
; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
|
||||
; AVX-NEXT: retq
|
||||
entry:
|
||||
%result = call <2 x float> @llvm.experimental.constrained.fptrunc.v2f32.v2f64(
|
||||
<2 x double><double 42.1, double 42.2>,
|
||||
metadata !"round.dynamic",
|
||||
metadata !"fpexcept.strict")
|
||||
ret <2 x float> %result
|
||||
}
|
||||
|
||||
define <3 x float> @constrained_vector_fptrunc_v3f64() {
|
||||
; CHECK-LABEL: constrained_vector_fptrunc_v3f64:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; CHECK-NEXT: cvtsd2ss %xmm0, %xmm1
|
||||
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; CHECK-NEXT: cvtsd2ss %xmm0, %xmm0
|
||||
; CHECK-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
|
||||
; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
|
||||
; CHECK-NEXT: cvtsd2ss %xmm1, %xmm1
|
||||
; CHECK-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
||||
; CHECK-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: constrained_vector_fptrunc_v3f64:
|
||||
; AVX: # %bb.0: # %entry
|
||||
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; AVX-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0
|
||||
; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
|
||||
; AVX-NEXT: vcvtsd2ss %xmm1, %xmm1, %xmm1
|
||||
; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
|
||||
; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
|
||||
; AVX-NEXT: vcvtsd2ss %xmm1, %xmm1, %xmm1
|
||||
; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3]
|
||||
; AVX-NEXT: retq
|
||||
entry:
|
||||
%result = call <3 x float> @llvm.experimental.constrained.fptrunc.v3f32.v3f64(
|
||||
<3 x double><double 42.1, double 42.2,
|
||||
double 42.3>,
|
||||
metadata !"round.dynamic",
|
||||
metadata !"fpexcept.strict")
|
||||
ret <3 x float> %result
|
||||
}
|
||||
|
||||
define <4 x float> @constrained_vector_fptrunc_v4f64() {
|
||||
; CHECK-LABEL: constrained_vector_fptrunc_v4f64:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; CHECK-NEXT: cvtsd2ss %xmm0, %xmm0
|
||||
; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
|
||||
; CHECK-NEXT: cvtsd2ss %xmm1, %xmm1
|
||||
; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
|
||||
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; CHECK-NEXT: cvtsd2ss %xmm0, %xmm2
|
||||
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; CHECK-NEXT: cvtsd2ss %xmm0, %xmm0
|
||||
; CHECK-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
|
||||
; CHECK-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
||||
; CHECK-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: constrained_vector_fptrunc_v4f64:
|
||||
; AVX: # %bb.0: # %entry
|
||||
; AVX-NEXT: vcvtpd2psy {{.*}}(%rip), %xmm0
|
||||
; AVX-NEXT: retq
|
||||
entry:
|
||||
%result = call <4 x float> @llvm.experimental.constrained.fptrunc.v4f32.v4f64(
|
||||
<4 x double><double 42.1, double 42.2,
|
||||
double 42.3, double 42.4>,
|
||||
metadata !"round.dynamic",
|
||||
metadata !"fpexcept.strict")
|
||||
ret <4 x float> %result
|
||||
}
|
||||
|
||||
define <1 x double> @constrained_vector_fpext_v1f32() {
|
||||
; CHECK-LABEL: constrained_vector_fpext_v1f32:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; CHECK-NEXT: cvtss2sd %xmm0, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: constrained_vector_fpext_v1f32:
|
||||
; AVX: # %bb.0: # %entry
|
||||
; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; AVX-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
|
||||
; AVX-NEXT: retq
|
||||
entry:
|
||||
%result = call <1 x double> @llvm.experimental.constrained.fpext.v1f64.v1f32(
|
||||
<1 x float><float 42.0>,
|
||||
metadata !"fpexcept.strict")
|
||||
ret <1 x double> %result
|
||||
}
|
||||
|
||||
define <2 x double> @constrained_vector_fpext_v2f32() {
|
||||
; CHECK-LABEL: constrained_vector_fpext_v2f32:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; CHECK-NEXT: cvtss2sd %xmm0, %xmm1
|
||||
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; CHECK-NEXT: cvtss2sd %xmm0, %xmm0
|
||||
; CHECK-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
||||
; CHECK-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: constrained_vector_fpext_v2f32:
|
||||
; AVX: # %bb.0: # %entry
|
||||
; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; AVX-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
|
||||
; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; AVX-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
|
||||
; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
|
||||
; AVX-NEXT: retq
|
||||
entry:
|
||||
%result = call <2 x double> @llvm.experimental.constrained.fpext.v2f64.v2f32(
|
||||
<2 x float><float 42.0, float 43.0>,
|
||||
metadata !"fpexcept.strict")
|
||||
ret <2 x double> %result
|
||||
}
|
||||
|
||||
define <3 x double> @constrained_vector_fpext_v3f32() {
|
||||
; CHECK-LABEL: constrained_vector_fpext_v3f32:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; CHECK-NEXT: cvtss2sd %xmm0, %xmm0
|
||||
; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; CHECK-NEXT: cvtss2sd %xmm1, %xmm1
|
||||
; CHECK-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
|
||||
; CHECK-NEXT: cvtss2sd %xmm2, %xmm2
|
||||
; CHECK-NEXT: movsd %xmm2, -{{[0-9]+}}(%rsp)
|
||||
; CHECK-NEXT: fldl -{{[0-9]+}}(%rsp)
|
||||
; CHECK-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: constrained_vector_fpext_v3f32:
|
||||
; AVX: # %bb.0: # %entry
|
||||
; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; AVX-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
|
||||
; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; AVX-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
|
||||
; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
|
||||
; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; AVX-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
|
||||
; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
||||
; AVX-NEXT: retq
|
||||
entry:
|
||||
%result = call <3 x double> @llvm.experimental.constrained.fpext.v3f64.v3f32(
|
||||
<3 x float><float 42.0, float 43.0,
|
||||
float 44.0>,
|
||||
metadata !"fpexcept.strict")
|
||||
ret <3 x double> %result
|
||||
}
|
||||
|
||||
define <4 x double> @constrained_vector_fpext_v4f32() {
|
||||
; CHECK-LABEL: constrained_vector_fpext_v4f32:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; CHECK-NEXT: cvtss2sd %xmm0, %xmm1
|
||||
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; CHECK-NEXT: cvtss2sd %xmm0, %xmm0
|
||||
; CHECK-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
||||
; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; CHECK-NEXT: cvtss2sd %xmm1, %xmm2
|
||||
; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; CHECK-NEXT: cvtss2sd %xmm1, %xmm1
|
||||
; CHECK-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
|
||||
; CHECK-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: constrained_vector_fpext_v4f32:
|
||||
; AVX: # %bb.0: # %entry
|
||||
; AVX-NEXT: vcvtps2pd {{.*}}(%rip), %ymm0
|
||||
; AVX-NEXT: retq
|
||||
entry:
|
||||
%result = call <4 x double> @llvm.experimental.constrained.fpext.v4f64.v4f32(
|
||||
<4 x float><float 42.0, float 43.0,
|
||||
float 44.0, float 45.0>,
|
||||
metadata !"fpexcept.strict")
|
||||
ret <4 x double> %result
|
||||
}
|
||||
|
||||
define <1 x float> @constrained_vector_ceil_v1f32() {
|
||||
; CHECK-LABEL: constrained_vector_ceil_v1f32:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
@ -4413,6 +4624,8 @@ declare <2 x double> @llvm.experimental.constrained.rint.v2f64(<2 x double>, met
|
||||
declare <2 x double> @llvm.experimental.constrained.nearbyint.v2f64(<2 x double>, metadata, metadata)
|
||||
declare <2 x double> @llvm.experimental.constrained.maxnum.v2f64(<2 x double>, <2 x double>, metadata, metadata)
|
||||
declare <2 x double> @llvm.experimental.constrained.minnum.v2f64(<2 x double>, <2 x double>, metadata, metadata)
|
||||
declare <2 x float> @llvm.experimental.constrained.fptrunc.v2f32.v2f64(<2 x double>, metadata, metadata)
|
||||
declare <2 x double> @llvm.experimental.constrained.fpext.v2f64.v2f32(<2 x float>, metadata)
|
||||
declare <2 x double> @llvm.experimental.constrained.ceil.v2f64(<2 x double>, metadata, metadata)
|
||||
declare <2 x double> @llvm.experimental.constrained.floor.v2f64(<2 x double>, metadata, metadata)
|
||||
declare <2 x double> @llvm.experimental.constrained.round.v2f64(<2 x double>, metadata, metadata)
|
||||
@ -4438,6 +4651,8 @@ declare <1 x float> @llvm.experimental.constrained.rint.v1f32(<1 x float>, metad
|
||||
declare <1 x float> @llvm.experimental.constrained.nearbyint.v1f32(<1 x float>, metadata, metadata)
|
||||
declare <1 x float> @llvm.experimental.constrained.maxnum.v1f32(<1 x float>, <1 x float>, metadata, metadata)
|
||||
declare <1 x float> @llvm.experimental.constrained.minnum.v1f32(<1 x float>, <1 x float>, metadata, metadata)
|
||||
declare <1 x float> @llvm.experimental.constrained.fptrunc.v1f32.v1f64(<1 x double>, metadata, metadata)
|
||||
declare <1 x double> @llvm.experimental.constrained.fpext.v1f64.v1f32(<1 x float>, metadata)
|
||||
declare <1 x float> @llvm.experimental.constrained.ceil.v1f32(<1 x float>, metadata, metadata)
|
||||
declare <1 x float> @llvm.experimental.constrained.floor.v1f32(<1 x float>, metadata, metadata)
|
||||
declare <1 x float> @llvm.experimental.constrained.round.v1f32(<1 x float>, metadata, metadata)
|
||||
@ -4482,6 +4697,8 @@ declare <3 x float> @llvm.experimental.constrained.maxnum.v3f32(<3 x float>, <3
|
||||
declare <3 x double> @llvm.experimental.constrained.maxnum.v3f64(<3 x double>, <3 x double>, metadata, metadata)
|
||||
declare <3 x float> @llvm.experimental.constrained.minnum.v3f32(<3 x float>, <3 x float>, metadata, metadata)
|
||||
declare <3 x double> @llvm.experimental.constrained.minnum.v3f64(<3 x double>, <3 x double>, metadata, metadata)
|
||||
declare <3 x float> @llvm.experimental.constrained.fptrunc.v3f32.v3f64(<3 x double>, metadata, metadata)
|
||||
declare <3 x double> @llvm.experimental.constrained.fpext.v3f64.v3f32(<3 x float>, metadata)
|
||||
declare <3 x float> @llvm.experimental.constrained.ceil.v3f32(<3 x float>, metadata, metadata)
|
||||
declare <3 x double> @llvm.experimental.constrained.ceil.v3f64(<3 x double>, metadata, metadata)
|
||||
declare <3 x float> @llvm.experimental.constrained.floor.v3f32(<3 x float>, metadata, metadata)
|
||||
@ -4511,6 +4728,8 @@ declare <4 x double> @llvm.experimental.constrained.rint.v4f64(<4 x double>, met
|
||||
declare <4 x double> @llvm.experimental.constrained.nearbyint.v4f64(<4 x double>, metadata, metadata)
|
||||
declare <4 x double> @llvm.experimental.constrained.maxnum.v4f64(<4 x double>, <4 x double>, metadata, metadata)
|
||||
declare <4 x double> @llvm.experimental.constrained.minnum.v4f64(<4 x double>, <4 x double>, metadata, metadata)
|
||||
declare <4 x float> @llvm.experimental.constrained.fptrunc.v4f32.v4f64(<4 x double>, metadata, metadata)
|
||||
declare <4 x double> @llvm.experimental.constrained.fpext.v4f64.v4f32(<4 x float>, metadata)
|
||||
declare <4 x double> @llvm.experimental.constrained.ceil.v4f64(<4 x double>, metadata, metadata)
|
||||
declare <4 x double> @llvm.experimental.constrained.floor.v4f64(<4 x double>, metadata, metadata)
|
||||
declare <4 x double> @llvm.experimental.constrained.round.v4f64(<4 x double>, metadata, metadata)
|
||||
|
@ -242,6 +242,30 @@ entry:
|
||||
ret double %result
|
||||
}
|
||||
|
||||
; Verify that fptrunc(42.1) isn't simplified when the rounding mode is
|
||||
; unknown.
|
||||
; CHECK-LABEL: f20
|
||||
; CHECK: call float @llvm.experimental.constrained.fptrunc
|
||||
define float @f20() {
|
||||
entry:
|
||||
%result = call float @llvm.experimental.constrained.fptrunc.f32.f64(
|
||||
double 42.1,
|
||||
metadata !"round.dynamic",
|
||||
metadata !"fpexcept.strict")
|
||||
ret float %result
|
||||
}
|
||||
|
||||
; Verify that fpext(42.1) isn't simplified when the rounding mode is
|
||||
; unknown.
|
||||
; CHECK-LABEL: f21
|
||||
; CHECK: call double @llvm.experimental.constrained.fpext
|
||||
define double @f21() {
|
||||
entry:
|
||||
%result = call double @llvm.experimental.constrained.fpext.f64.f32(float 42.0,
|
||||
metadata !"fpexcept.strict")
|
||||
ret double %result
|
||||
}
|
||||
|
||||
@llvm.fp.env = thread_local global i8 zeroinitializer, section "llvm.metadata"
|
||||
declare double @llvm.experimental.constrained.fdiv.f64(double, double, metadata, metadata)
|
||||
declare double @llvm.experimental.constrained.fmul.f64(double, double, metadata, metadata)
|
||||
@ -260,3 +284,5 @@ declare double @llvm.experimental.constrained.log2.f64(double, metadata, metadat
|
||||
declare double @llvm.experimental.constrained.rint.f64(double, metadata, metadata)
|
||||
declare double @llvm.experimental.constrained.nearbyint.f64(double, metadata, metadata)
|
||||
declare double @llvm.experimental.constrained.fma.f64(double, double, double, metadata, metadata)
|
||||
declare float @llvm.experimental.constrained.fptrunc.f32.f64(double, metadata, metadata)
|
||||
declare double @llvm.experimental.constrained.fpext.f64.f32(float, metadata)
|
||||
|
Loading…
x
Reference in New Issue
Block a user