This adds constrained intrinsics for the signed and unsigned conversions

of integers to floating point.

This includes some of Craig Topper's changes for promotion support from
D71130.

Differential Revision: https://reviews.llvm.org/D69275
This commit is contained in:
Kevin P. Neal 2019-10-16 15:24:47 -04:00
parent 1ed832e424
commit b1d8576b0a
17 changed files with 2833 additions and 91 deletions

View File

@ -15667,6 +15667,78 @@ Semantics:
The result produced is a signed integer converted from the floating
point operand. The value is truncated, so it is rounded towards zero.
'``llvm.experimental.constrained.uitofp``' Intrinsic
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Syntax:
"""""""
::
declare <ty2>
@llvm.experimental.constrained.uitofp(<type> <value>,
metadata <rounding mode>,
metadata <exception behavior>)
Overview:
"""""""""
The '``llvm.experimental.constrained.uitofp``' intrinsic converts an
unsigned integer ``value`` to a floating-point of type ``ty2``.
Arguments:
""""""""""
The first argument to the '``llvm.experimental.constrained.uitofp``'
intrinsic must be an :ref:`integer <t_integer>` or :ref:`vector
<t_vector>` of integer values.
The second and third arguments specify the rounding mode and exception
behavior as described above.
Semantics:
""""""""""
An inexact floating-point exception will be raised if rounding is required.
Any result produced is a floating point value converted from the input
integer operand.
'``llvm.experimental.constrained.sitofp``' Intrinsic
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Syntax:
"""""""
::
declare <ty2>
@llvm.experimental.constrained.sitofp(<type> <value>,
metadata <rounding mode>,
metadata <exception behavior>)
Overview:
"""""""""
The '``llvm.experimental.constrained.sitofp``' intrinsic converts a
signed integer ``value`` to a floating-point of type ``ty2``.
Arguments:
""""""""""
The first argument to the '``llvm.experimental.constrained.sitofp``'
intrinsic must be an :ref:`integer <t_integer>` or :ref:`vector
<t_vector>` of integer values.
The second and third arguments specify the rounding mode and exception
behavior as described above.
Semantics:
""""""""""
An inexact floating-point exception will be raised if rounding is required.
Any result produced is a floating point value converted from the input
integer operand.
'``llvm.experimental.constrained.fptrunc``' Intrinsic
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

View File

@ -312,6 +312,13 @@ namespace ISD {
STRICT_FP_TO_SINT,
STRICT_FP_TO_UINT,
/// STRICT_[US]INT_TO_FP - Convert a signed or unsigned integer to
/// a floating point value. These have the same semantics as sitofp and
/// uitofp in IR.
/// They are used to limit optimizations while the DAG is being optimized.
STRICT_SINT_TO_FP,
STRICT_UINT_TO_FP,
/// X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating
/// point type down to the precision of the destination VT. TRUNC is a
/// flag, which is always an integer that is zero or one. If TRUNC is 0,

View File

@ -811,6 +811,11 @@ public:
/// float type VT, by either extending or rounding (by truncation).
SDValue getFPExtendOrRound(SDValue Op, const SDLoc &DL, EVT VT);
/// Convert Op, which must be a STRICT operation of float type, to the
/// float type VT, by either extending or rounding (by truncation).
std::pair<SDValue, SDValue>
getStrictFPExtendOrRound(SDValue Op, SDValue Chain, const SDLoc &DL, EVT VT);
/// Convert Op, which must be of integer type, to the
/// integer type VT, by either any-extending or truncating it.
SDValue getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT);

View File

@ -4123,14 +4123,18 @@ public:
/// Expand float to UINT conversion
/// \param N Node to expand
/// \param Result output after conversion
/// \param Chain output chain after conversion
/// \returns True, if the expansion was successful, false otherwise
bool expandFP_TO_UINT(SDNode *N, SDValue &Result, SDValue &Chain, SelectionDAG &DAG) const;
bool expandFP_TO_UINT(SDNode *N, SDValue &Result, SDValue &Chain,
SelectionDAG &DAG) const;
/// Expand UINT(i64) to double(f64) conversion
/// \param N Node to expand
/// \param Result output after conversion
/// \param Chain output chain after conversion
/// \returns True, if the expansion was successful, false otherwise
bool expandUINT_TO_FP(SDNode *N, SDValue &Result, SelectionDAG &DAG) const;
bool expandUINT_TO_FP(SDNode *N, SDValue &Result, SDValue &Chain,
SelectionDAG &DAG) const;
/// Expand fminnum/fmaxnum into fminnum_ieee/fmaxnum_ieee with quieted inputs.
SDValue expandFMINNUM_FMAXNUM(SDNode *N, SelectionDAG &DAG) const;

View File

@ -41,6 +41,8 @@ INSTRUCTION(FMul, 2, 1, experimental_constrained_fmul, FMUL)
INSTRUCTION(FDiv, 2, 1, experimental_constrained_fdiv, FDIV)
INSTRUCTION(FRem, 2, 1, experimental_constrained_frem, FREM)
INSTRUCTION(FPExt, 1, 0, experimental_constrained_fpext, FP_EXTEND)
INSTRUCTION(SIToFP, 1, 1, experimental_constrained_sitofp, SINT_TO_FP)
INSTRUCTION(UIToFP, 1, 1, experimental_constrained_uitofp, UINT_TO_FP)
INSTRUCTION(FPToSI, 1, 0, experimental_constrained_fptosi, FP_TO_SINT)
INSTRUCTION(FPToUI, 1, 0, experimental_constrained_fptoui, FP_TO_UINT)
INSTRUCTION(FPTrunc, 1, 1, experimental_constrained_fptrunc, FP_ROUND)

View File

@ -640,6 +640,16 @@ let IntrProperties = [IntrInaccessibleMemOnly, IntrWillReturn] in {
[ llvm_anyfloat_ty,
llvm_metadata_ty ]>;
def int_experimental_constrained_sitofp : Intrinsic<[ llvm_anyfloat_ty ],
[ llvm_anyint_ty,
llvm_metadata_ty,
llvm_metadata_ty ]>;
def int_experimental_constrained_uitofp : Intrinsic<[ llvm_anyfloat_ty ],
[ llvm_anyint_ty,
llvm_metadata_ty,
llvm_metadata_ty ]>;
def int_experimental_constrained_fptrunc : Intrinsic<[ llvm_anyfloat_ty ],
[ llvm_anyfloat_ty,
llvm_metadata_ty,

View File

@ -173,10 +173,9 @@ private:
SDValue NewIntValue) const;
SDValue ExpandFCOPYSIGN(SDNode *Node) const;
SDValue ExpandFABS(SDNode *Node) const;
SDValue ExpandLegalINT_TO_FP(bool isSigned, SDValue Op0, EVT DestVT,
const SDLoc &dl);
SDValue PromoteLegalINT_TO_FP(SDValue LegalOp, EVT DestVT, bool isSigned,
const SDLoc &dl);
SDValue ExpandLegalINT_TO_FP(SDNode *Node, SDValue &Chain);
void PromoteLegalINT_TO_FP(SDNode *N, const SDLoc &dl,
SmallVectorImpl<SDValue> &Results);
void PromoteLegalFP_TO_INT(SDNode *N, const SDLoc &dl,
SmallVectorImpl<SDValue> &Results);
@ -1010,6 +1009,8 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
Action = TLI.getOperationAction(Node->getOpcode(),
Node->getOperand(0).getValueType());
break;
case ISD::STRICT_SINT_TO_FP:
case ISD::STRICT_UINT_TO_FP:
case ISD::STRICT_LRINT:
case ISD::STRICT_LLRINT:
case ISD::STRICT_LROUND:
@ -2338,9 +2339,14 @@ SelectionDAGLegalize::ExpandSinCosLibCall(SDNode *Node,
/// INT_TO_FP operation of the specified operand when the target requests that
/// we expand it. At this point, we know that the result and operand types are
/// legal for the target.
SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue Op0,
EVT DestVT,
const SDLoc &dl) {
SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(SDNode *Node,
SDValue &Chain) {
bool isSigned = (Node->getOpcode() == ISD::STRICT_SINT_TO_FP ||
Node->getOpcode() == ISD::SINT_TO_FP);
EVT DestVT = Node->getValueType(0);
SDLoc dl(Node);
unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
SDValue Op0 = Node->getOperand(OpNo);
EVT SrcVT = Op0.getValueType();
// TODO: Should any fast-math-flags be set for the created nodes?
@ -2387,16 +2393,38 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue Op0,
BitsToDouble(0x4330000080000000ULL) :
BitsToDouble(0x4330000000000000ULL),
dl, MVT::f64);
// subtract the bias
SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::f64, Load, Bias);
// final result
SDValue Result = DAG.getFPExtendOrRound(Sub, dl, DestVT);
// Subtract the bias and get the final result.
SDValue Sub;
SDValue Result;
if (Node->isStrictFPOpcode()) {
Sub = DAG.getNode(ISD::STRICT_FSUB, dl, {MVT::f64, MVT::Other},
{Node->getOperand(0), Load, Bias});
if (DestVT != Sub.getValueType()) {
std::pair<SDValue, SDValue> ResultPair;
ResultPair =
DAG.getStrictFPExtendOrRound(Sub, SDValue(Node, 1), dl, DestVT);
Result = ResultPair.first;
Chain = ResultPair.second;
}
else
Result = Sub;
} else {
Sub = DAG.getNode(ISD::FSUB, dl, MVT::f64, Load, Bias);
Result = DAG.getFPExtendOrRound(Sub, dl, DestVT);
}
return Result;
}
assert(!isSigned && "Legalize cannot Expand SINT_TO_FP for i64 yet");
// Code below here assumes !isSigned without checking again.
// FIXME: This can produce slightly incorrect results. See details in
// FIXME: https://reviews.llvm.org/D69275
SDValue Tmp1 = DAG.getNode(ISD::SINT_TO_FP, dl, DestVT, Op0);
SDValue Tmp1;
if (Node->isStrictFPOpcode()) {
Tmp1 = DAG.getNode(ISD::STRICT_SINT_TO_FP, dl, { DestVT, MVT::Other },
{ Node->getOperand(0), Op0 });
} else
Tmp1 = DAG.getNode(ISD::SINT_TO_FP, dl, DestVT, Op0);
SDValue SignSet = DAG.getSetCC(dl, getSetCCResultType(SrcVT), Op0,
DAG.getConstant(0, dl, SrcVT), ISD::SETLT);
@ -2442,6 +2470,13 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue Op0,
FudgeInReg = Handle.getValue();
}
if (Node->isStrictFPOpcode()) {
SDValue Result = DAG.getNode(ISD::STRICT_FADD, dl, { DestVT, MVT::Other },
{ Tmp1.getValue(1), Tmp1, FudgeInReg });
Chain = Result.getValue(1);
return Result;
}
return DAG.getNode(ISD::FADD, dl, DestVT, Tmp1, FudgeInReg);
}
@ -2450,9 +2485,16 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue Op0,
/// we promote it. At this point, we know that the result and operand types are
/// legal for the target, and that there is a legal UINT_TO_FP or SINT_TO_FP
/// operation that takes a larger input.
SDValue SelectionDAGLegalize::PromoteLegalINT_TO_FP(SDValue LegalOp, EVT DestVT,
bool isSigned,
const SDLoc &dl) {
void SelectionDAGLegalize::PromoteLegalINT_TO_FP(
SDNode *N, const SDLoc &dl, SmallVectorImpl<SDValue> &Results) {
bool IsStrict = N->isStrictFPOpcode();
bool IsSigned = N->getOpcode() == ISD::SINT_TO_FP ||
N->getOpcode() == ISD::STRICT_SINT_TO_FP;
EVT DestVT = N->getValueType(0);
SDValue LegalOp = N->getOperand(IsStrict ? 1 : 0);
unsigned UIntOp = IsStrict ? ISD::STRICT_UINT_TO_FP : ISD::UINT_TO_FP;
unsigned SIntOp = IsStrict ? ISD::STRICT_SINT_TO_FP : ISD::SINT_TO_FP;
// First step, figure out the appropriate *INT_TO_FP operation to use.
EVT NewInTy = LegalOp.getValueType();
@ -2464,15 +2506,16 @@ SDValue SelectionDAGLegalize::PromoteLegalINT_TO_FP(SDValue LegalOp, EVT DestVT,
assert(NewInTy.isInteger() && "Ran out of possibilities!");
// If the target supports SINT_TO_FP of this type, use it.
if (TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, NewInTy)) {
OpToUse = ISD::SINT_TO_FP;
if (TLI.isOperationLegalOrCustom(SIntOp, NewInTy)) {
OpToUse = SIntOp;
break;
}
if (isSigned) continue;
if (IsSigned)
continue;
// If the target supports UINT_TO_FP of this type, use it.
if (TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, NewInTy)) {
OpToUse = ISD::UINT_TO_FP;
if (TLI.isOperationLegalOrCustom(UIntOp, NewInTy)) {
OpToUse = UIntOp;
break;
}
@ -2481,9 +2524,20 @@ SDValue SelectionDAGLegalize::PromoteLegalINT_TO_FP(SDValue LegalOp, EVT DestVT,
// Okay, we found the operation and type to use. Zero extend our input to the
// desired type then run the operation on it.
return DAG.getNode(OpToUse, dl, DestVT,
DAG.getNode(isSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
dl, NewInTy, LegalOp));
if (IsStrict) {
SDValue Res =
DAG.getNode(OpToUse, dl, {DestVT, MVT::Other},
{N->getOperand(0),
DAG.getNode(IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
dl, NewInTy, LegalOp)});
Results.push_back(Res);
Results.push_back(Res.getValue(1));
}
Results.push_back(
DAG.getNode(OpToUse, dl, DestVT,
DAG.getNode(IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
dl, NewInTy, LegalOp)));
}
/// This function is responsible for legalizing a
@ -2899,15 +2953,20 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
break;
}
case ISD::UINT_TO_FP:
if (TLI.expandUINT_TO_FP(Node, Tmp1, DAG)) {
case ISD::STRICT_UINT_TO_FP:
if (TLI.expandUINT_TO_FP(Node, Tmp1, Tmp2, DAG)) {
Results.push_back(Tmp1);
if (Node->isStrictFPOpcode())
Results.push_back(Tmp2);
break;
}
LLVM_FALLTHROUGH;
case ISD::SINT_TO_FP:
Tmp1 = ExpandLegalINT_TO_FP(Node->getOpcode() == ISD::SINT_TO_FP,
Node->getOperand(0), Node->getValueType(0), dl);
case ISD::STRICT_SINT_TO_FP:
Tmp1 = ExpandLegalINT_TO_FP(Node, Tmp2);
Results.push_back(Tmp1);
if (Node->isStrictFPOpcode())
Results.push_back(Tmp2);
break;
case ISD::FP_TO_SINT:
if (TLI.expandFP_TO_SINT(Node, Tmp1, DAG))
@ -4194,6 +4253,9 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
Node->getOpcode() == ISD::INSERT_VECTOR_ELT) {
OVT = Node->getOperand(0).getSimpleValueType();
}
if (Node->getOpcode() == ISD::STRICT_UINT_TO_FP ||
Node->getOpcode() == ISD::STRICT_SINT_TO_FP)
OVT = Node->getOperand(1).getSimpleValueType();
if (Node->getOpcode() == ISD::BR_CC)
OVT = Node->getOperand(2).getSimpleValueType();
MVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), OVT);
@ -4248,10 +4310,10 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
PromoteLegalFP_TO_INT(Node, dl, Results);
break;
case ISD::UINT_TO_FP:
case ISD::STRICT_UINT_TO_FP:
case ISD::SINT_TO_FP:
Tmp1 = PromoteLegalINT_TO_FP(Node->getOperand(0), Node->getValueType(0),
Node->getOpcode() == ISD::SINT_TO_FP, dl);
Results.push_back(Tmp1);
case ISD::STRICT_SINT_TO_FP:
PromoteLegalINT_TO_FP(Node, dl, Results);
break;
case ISD::VAARG: {
SDValue Chain = Node->getOperand(0); // Get the chain.

View File

@ -307,23 +307,27 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
return TranslateLegalizeResults(Op, Result);
TargetLowering::LegalizeAction Action = TargetLowering::Legal;
EVT ValVT;
switch (Op.getOpcode()) {
default:
return TranslateLegalizeResults(Op, Result);
#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
case ISD::STRICT_##DAGN:
#include "llvm/IR/ConstrainedOps.def"
Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
ValVT = Node->getValueType(0);
if (Op.getOpcode() == ISD::STRICT_SINT_TO_FP ||
Op.getOpcode() == ISD::STRICT_UINT_TO_FP)
ValVT = Node->getOperand(1).getValueType();
Action = TLI.getOperationAction(Node->getOpcode(), ValVT);
// If we're asked to expand a strict vector floating-point operation,
// by default we're going to simply unroll it. That is usually the
// best approach, except in the case where the resulting strict (scalar)
// operations would themselves use the fallback mutation to non-strict.
// In that specific case, just do the fallback on the vector op.
if (Action == TargetLowering::Expand && !TLI.isStrictFPEnabled() &&
TLI.getStrictFPOperationAction(Node->getOpcode(),
Node->getValueType(0))
== TargetLowering::Legal) {
EVT EltVT = Node->getValueType(0).getVectorElementType();
TLI.getStrictFPOperationAction(Node->getOpcode(), ValVT) ==
TargetLowering::Legal) {
EVT EltVT = ValVT.getVectorElementType();
if (TLI.getOperationAction(Node->getOpcode(), EltVT)
== TargetLowering::Expand &&
TLI.getStrictFPOperationAction(Node->getOpcode(), EltVT)
@ -1153,18 +1157,29 @@ SDValue VectorLegalizer::ExpandFP_TO_UINT(SDValue Op) {
}
SDValue VectorLegalizer::ExpandUINT_TO_FLOAT(SDValue Op) {
EVT VT = Op.getOperand(0).getValueType();
bool IsStrict = Op.getNode()->isStrictFPOpcode();
unsigned OpNo = IsStrict ? 1 : 0;
SDValue Src = Op.getOperand(OpNo);
EVT VT = Src.getValueType();
SDLoc DL(Op);
// Attempt to expand using TargetLowering.
SDValue Result;
if (TLI.expandUINT_TO_FP(Op.getNode(), Result, DAG))
SDValue Chain;
if (TLI.expandUINT_TO_FP(Op.getNode(), Result, Chain, DAG)) {
if (IsStrict)
// Relink the chain
DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), Chain);
return Result;
}
// Make sure that the SINT_TO_FP and SRL instructions are available.
if (TLI.getOperationAction(ISD::SINT_TO_FP, VT) == TargetLowering::Expand ||
TLI.getOperationAction(ISD::SRL, VT) == TargetLowering::Expand)
return DAG.UnrollVectorOp(Op.getNode());
if (((!IsStrict && TLI.getOperationAction(ISD::SINT_TO_FP, VT) ==
TargetLowering::Expand) ||
(IsStrict && TLI.getOperationAction(ISD::STRICT_SINT_TO_FP, VT) ==
TargetLowering::Expand)) ||
TLI.getOperationAction(ISD::SRL, VT) == TargetLowering::Expand)
return IsStrict ? SDValue() : DAG.UnrollVectorOp(Op.getNode());
unsigned BW = VT.getScalarSizeInBits();
assert((BW == 64 || BW == 32) &&
@ -1182,8 +1197,31 @@ SDValue VectorLegalizer::ExpandUINT_TO_FLOAT(SDValue Op) {
SDValue TWOHW = DAG.getConstantFP(1ULL << (BW / 2), DL, Op.getValueType());
// Clear upper part of LO, lower HI
SDValue HI = DAG.getNode(ISD::SRL, DL, VT, Op.getOperand(0), HalfWord);
SDValue LO = DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), HalfWordMask);
SDValue HI = DAG.getNode(ISD::SRL, DL, VT, Src, HalfWord);
SDValue LO = DAG.getNode(ISD::AND, DL, VT, Src, HalfWordMask);
if (IsStrict) {
// Convert hi and lo to floats
// Convert the hi part back to the upper values
// TODO: Can any fast-math-flags be set on these nodes?
SDValue fHI =
DAG.getNode(ISD::STRICT_SINT_TO_FP, DL, {Op.getValueType(), MVT::Other},
{Op.getOperand(0), HI});
fHI = DAG.getNode(ISD::STRICT_FMUL, DL, {Op.getValueType(), MVT::Other},
{SDValue(fHI.getNode(), 1), fHI, TWOHW});
SDValue fLO =
DAG.getNode(ISD::STRICT_SINT_TO_FP, DL, {Op.getValueType(), MVT::Other},
{SDValue(fHI.getNode(), 1), LO});
// Add the two halves
SDValue Result =
DAG.getNode(ISD::STRICT_FADD, DL, {Op.getValueType(), MVT::Other},
{SDValue(fLO.getNode(), 1), fHI, fLO});
// Relink the chain
DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), SDValue(Result.getNode(), 1));
return Result;
}
// Convert hi and lo to floats
// Convert the hi part back to the upper values
@ -1318,7 +1356,12 @@ SDValue VectorLegalizer::ExpandFixedPointMul(SDValue Op) {
}
SDValue VectorLegalizer::ExpandStrictFPOp(SDValue Op) {
EVT VT = Op.getValueType();
if (Op.getOpcode() == ISD::STRICT_UINT_TO_FP) {
if (SDValue Res = ExpandUINT_TO_FLOAT(Op))
return Res;
}
EVT VT = Op.getValue(0).getValueType();
EVT EltVT = VT.getVectorElementType();
unsigned NumElems = VT.getVectorNumElements();
unsigned NumOpers = Op.getNumOperands();

View File

@ -572,6 +572,8 @@ bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) {
case ISD::UINT_TO_FP:
Res = ScalarizeVecOp_UnaryOp(N);
break;
case ISD::STRICT_SINT_TO_FP:
case ISD::STRICT_UINT_TO_FP:
case ISD::STRICT_FP_TO_SINT:
case ISD::STRICT_FP_TO_UINT:
Res = ScalarizeVecOp_UnaryOp_StrictFP(N);
@ -1931,9 +1933,12 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
case ISD::VSELECT:
Res = SplitVecOp_VSELECT(N, OpNo);
break;
case ISD::STRICT_SINT_TO_FP:
case ISD::STRICT_UINT_TO_FP:
case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP:
if (N->getValueType(0).bitsLT(N->getOperand(0).getValueType()))
if (N->getValueType(0).bitsLT(
N->getOperand(N->isStrictFPOpcode() ? 1 : 0).getValueType()))
Res = SplitVecOp_TruncateHelper(N);
else
Res = SplitVecOp_UnaryOp(N);
@ -2494,7 +2499,8 @@ SDValue DAGTypeLegalizer::SplitVecOp_TruncateHelper(SDNode *N) {
//
// Without this transform, the original truncate would end up being
// scalarized, which is pretty much always a last resort.
SDValue InVec = N->getOperand(0);
unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0;
SDValue InVec = N->getOperand(OpNo);
EVT InVT = InVec->getValueType(0);
EVT OutVT = N->getValueType(0);
unsigned NumElements = OutVT.getVectorNumElements();
@ -2538,8 +2544,23 @@ SDValue DAGTypeLegalizer::SplitVecOp_TruncateHelper(SDNode *N) {
EVT::getIntegerVT(*DAG.getContext(), InElementSize/2);
EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), HalfElementVT,
NumElements/2);
SDValue HalfLo = DAG.getNode(N->getOpcode(), DL, HalfVT, InLoVec);
SDValue HalfHi = DAG.getNode(N->getOpcode(), DL, HalfVT, InHiVec);
SDValue HalfLo;
SDValue HalfHi;
SDValue Chain;
if (N->isStrictFPOpcode()) {
HalfLo = DAG.getNode(N->getOpcode(), DL, {HalfVT, MVT::Other},
{N->getOperand(0), HalfLo});
HalfHi = DAG.getNode(N->getOpcode(), DL, {HalfVT, MVT::Other},
{N->getOperand(0), HalfHi});
// Legalize the chain result - switch anything that used the old chain to
// use the new one.
Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, HalfLo.getValue(1),
HalfHi.getValue(1));
} else {
HalfLo = DAG.getNode(N->getOpcode(), DL, HalfVT, InLoVec);
HalfHi = DAG.getNode(N->getOpcode(), DL, HalfVT, InHiVec);
}
// Concatenate them to get the full intermediate truncation result.
EVT InterVT = EVT::getVectorVT(*DAG.getContext(), HalfElementVT, NumElements);
SDValue InterVec = DAG.getNode(ISD::CONCAT_VECTORS, DL, InterVT, HalfLo,
@ -2548,6 +2569,17 @@ SDValue DAGTypeLegalizer::SplitVecOp_TruncateHelper(SDNode *N) {
// type. This should normally be something that ends up being legal directly,
// but in theory if a target has very wide vectors and an annoyingly
// restricted set of legal types, this split can chain to build things up.
if (N->isStrictFPOpcode()) {
SDValue Res = DAG.getNode(
ISD::STRICT_FP_ROUND, DL, {OutVT, MVT::Other},
{Chain, InterVec,
DAG.getTargetConstant(0, DL, TLI.getPointerTy(DAG.getDataLayout()))});
// Relink the chain
ReplaceValueWith(SDValue(N, 1), SDValue(Res.getNode(), 1));
return Res;
}
return IsFloat
? DAG.getNode(ISD::FP_ROUND, DL, OutVT, InterVec,
DAG.getTargetConstant(
@ -3000,6 +3032,8 @@ SDValue DAGTypeLegalizer::WidenVecRes_StrictFP(SDNode *N) {
case ISD::STRICT_FP_ROUND:
case ISD::STRICT_FP_TO_SINT:
case ISD::STRICT_FP_TO_UINT:
case ISD::STRICT_SINT_TO_FP:
case ISD::STRICT_UINT_TO_FP:
return WidenVecRes_Convert_StrictFP(N);
default:
break;
@ -4120,7 +4154,9 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) {
case ISD::FP_TO_UINT:
case ISD::STRICT_FP_TO_UINT:
case ISD::SINT_TO_FP:
case ISD::STRICT_SINT_TO_FP:
case ISD::UINT_TO_FP:
case ISD::STRICT_UINT_TO_FP:
case ISD::TRUNCATE:
Res = WidenVecOp_Convert(N);
break;

View File

@ -1117,6 +1117,20 @@ SDValue SelectionDAG::getFPExtendOrRound(SDValue Op, const SDLoc &DL, EVT VT) {
: getNode(ISD::FP_ROUND, DL, VT, Op, getIntPtrConstant(0, DL));
}
std::pair<SDValue, SDValue>
SelectionDAG::getStrictFPExtendOrRound(SDValue Op, SDValue Chain,
const SDLoc &DL, EVT VT) {
assert(!VT.bitsEq(Op.getValueType()) &&
"Strict no-op FP extend/round not allowed.");
SDValue Res =
VT.bitsGT(Op.getValueType())
? getNode(ISD::STRICT_FP_EXTEND, DL, {VT, MVT::Other}, {Chain, Op})
: getNode(ISD::STRICT_FP_ROUND, DL, {VT, MVT::Other},
{Chain, Op, getIntPtrConstant(0, DL)});
return std::pair<SDValue, SDValue>(Res, SDValue(Res.getNode(), 1));
}
SDValue SelectionDAG::getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT) {
return VT.bitsGT(Op.getValueType()) ?
getNode(ISD::ANY_EXTEND, DL, VT, Op) :

View File

@ -326,7 +326,9 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::STRICT_FP_EXTEND: return "strict_fp_extend";
case ISD::SINT_TO_FP: return "sint_to_fp";
case ISD::STRICT_SINT_TO_FP: return "strict_sint_to_fp";
case ISD::UINT_TO_FP: return "uint_to_fp";
case ISD::STRICT_UINT_TO_FP: return "strict_uint_to_fp";
case ISD::FP_TO_SINT: return "fp_to_sint";
case ISD::STRICT_FP_TO_SINT: return "strict_fp_to_sint";
case ISD::FP_TO_UINT: return "fp_to_uint";

View File

@ -6116,8 +6116,10 @@ bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result,
}
bool TargetLowering::expandUINT_TO_FP(SDNode *Node, SDValue &Result,
SDValue &Chain,
SelectionDAG &DAG) const {
SDValue Src = Node->getOperand(0);
unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
SDValue Src = Node->getOperand(OpNo);
EVT SrcVT = Src.getValueType();
EVT DstVT = Node->getValueType(0);
@ -6140,7 +6142,13 @@ bool TargetLowering::expandUINT_TO_FP(SDNode *Node, SDValue &Result,
// For unsigned conversions, convert them to signed conversions using the
// algorithm from the x86_64 __floatundidf in compiler_rt.
SDValue Fast = DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, Src);
SDValue Fast;
if (Node->isStrictFPOpcode()) {
Fast = DAG.getNode(ISD::STRICT_SINT_TO_FP, dl, {DstVT, MVT::Other},
{Node->getOperand(0), Src});
Chain = SDValue(Fast.getNode(), 1);
} else
Fast = DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, Src);
SDValue ShiftConst = DAG.getConstant(1, dl, ShiftVT);
SDValue Shr = DAG.getNode(ISD::SRL, dl, SrcVT, Src, ShiftConst);
@ -6148,8 +6156,17 @@ bool TargetLowering::expandUINT_TO_FP(SDNode *Node, SDValue &Result,
SDValue And = DAG.getNode(ISD::AND, dl, SrcVT, Src, AndConst);
SDValue Or = DAG.getNode(ISD::OR, dl, SrcVT, And, Shr);
SDValue SignCvt = DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, Or);
SDValue Slow = DAG.getNode(ISD::FADD, dl, DstVT, SignCvt, SignCvt);
SDValue Slow;
if (Node->isStrictFPOpcode()) {
SDValue SignCvt = DAG.getNode(ISD::STRICT_SINT_TO_FP, dl,
{DstVT, MVT::Other}, {Chain, Or});
Slow = DAG.getNode(ISD::STRICT_FADD, dl, { DstVT, MVT::Other },
{ SignCvt.getValue(1), SignCvt, SignCvt });
Chain = Slow.getValue(1);
} else {
SDValue SignCvt = DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, Or);
Slow = DAG.getNode(ISD::FADD, dl, DstVT, SignCvt, SignCvt);
}
// TODO: This really should be implemented using a branch rather than a
// select. We happen to get lucky and machinesink does the right
@ -6192,8 +6209,18 @@ bool TargetLowering::expandUINT_TO_FP(SDNode *Node, SDValue &Result,
SDValue HiOr = DAG.getNode(ISD::OR, dl, SrcVT, Hi, TwoP84);
SDValue LoFlt = DAG.getBitcast(DstVT, LoOr);
SDValue HiFlt = DAG.getBitcast(DstVT, HiOr);
SDValue HiSub = DAG.getNode(ISD::FSUB, dl, DstVT, HiFlt, TwoP84PlusTwoP52);
Result = DAG.getNode(ISD::FADD, dl, DstVT, LoFlt, HiSub);
if (Node->isStrictFPOpcode()) {
SDValue HiSub =
DAG.getNode(ISD::STRICT_FSUB, dl, {DstVT, MVT::Other},
{Node->getOperand(0), HiFlt, TwoP84PlusTwoP52});
Result = DAG.getNode(ISD::STRICT_FADD, dl, {DstVT, MVT::Other},
{HiSub.getValue(1), LoFlt, HiSub});
Chain = Result.getValue(1);
} else {
SDValue HiSub =
DAG.getNode(ISD::FSUB, dl, DstVT, HiFlt, TwoP84PlusTwoP52);
Result = DAG.getNode(ISD::FADD, dl, DstVT, LoFlt, HiSub);
}
return true;
}

View File

@ -4804,6 +4804,28 @@ void Verifier::visitConstrainedFPIntrinsic(ConstrainedFPIntrinsic &FPI) {
}
break;
case Intrinsic::experimental_constrained_sitofp:
case Intrinsic::experimental_constrained_uitofp: {
Value *Operand = FPI.getArgOperand(0);
uint64_t NumSrcElem = 0;
Assert(Operand->getType()->isIntOrIntVectorTy(),
"Intrinsic first argument must be integer", &FPI);
if (auto *OperandT = dyn_cast<VectorType>(Operand->getType())) {
NumSrcElem = OperandT->getNumElements();
}
Operand = &FPI;
Assert((NumSrcElem > 0) == Operand->getType()->isVectorTy(),
"Intrinsic first argument and result disagree on vector use", &FPI);
Assert(Operand->getType()->isFPOrFPVectorTy(),
"Intrinsic result must be a floating point", &FPI);
if (auto *OperandT = dyn_cast<VectorType>(Operand->getType())) {
Assert(NumSrcElem == OperandT->getNumElements(),
"Intrinsic first argument and result vector lengths must be equal",
&FPI);
}
} break;
case Intrinsic::experimental_constrained_fptrunc:
case Intrinsic::experimental_constrained_fpext: {
Value *Operand = FPI.getArgOperand(0);

View File

@ -228,26 +228,34 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
if (!Subtarget.useSoftFloat()) {
// Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this
// operation.
setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i8, Promote);
setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i16, Promote);
// We have an algorithm for SSE2, and we turn this into a 64-bit
// FILD or VCVTUSI2SS/SD for other targets.
setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Custom);
// We have an algorithm for SSE2->double, and we turn this into a
// 64-bit FILD followed by conditional FADD for other targets.
setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i64, Custom);
// Promote i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have
// this operation.
setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i8, Promote);
// SSE has no i16 to fp conversion, only i32. We promote in the handler
// to allow f80 to use i16 and f64 to use i16 with sse1 only
setOperationAction(ISD::SINT_TO_FP, MVT::i16, Custom);
setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i16, Custom);
// f32 and f64 cases are Legal with SSE1/SSE2, f80 case is not
setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Custom);
// In 32-bit mode these are custom lowered. In 64-bit mode F32 and F64
// are Legal, f80 is custom lowered.
setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i64, Custom);
// Promote i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have
// this operation.
@ -985,9 +993,12 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FP_TO_UINT, MVT::v4i16, Custom);
setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i32, Legal);
setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom);
setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2i32, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Custom);
setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2i32, Custom);
// Fast v2f32 UINT_TO_FP( v2i32 ) custom conversion.
setOperationAction(ISD::UINT_TO_FP, MVT::v2f32, Custom);
@ -18421,8 +18432,13 @@ static SDValue LowerFunnelShift(SDValue Op, const X86Subtarget &Subtarget,
static SDValue LowerI64IntToFP_AVX512DQ(SDValue Op, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
assert((Op.getOpcode() == ISD::SINT_TO_FP ||
Op.getOpcode() == ISD::UINT_TO_FP) && "Unexpected opcode!");
SDValue Src = Op.getOperand(0);
Op.getOpcode() == ISD::STRICT_SINT_TO_FP ||
Op.getOpcode() == ISD::STRICT_UINT_TO_FP ||
Op.getOpcode() == ISD::UINT_TO_FP) &&
"Unexpected opcode!");
bool IsStrict = Op->isStrictFPOpcode();
unsigned OpNo = IsStrict ? 1 : 0;
SDValue Src = Op.getOperand(OpNo);
MVT SrcVT = Src.getSimpleValueType();
MVT VT = Op.getSimpleValueType();
@ -18439,7 +18455,17 @@ static SDValue LowerI64IntToFP_AVX512DQ(SDValue Op, SelectionDAG &DAG,
SDLoc dl(Op);
SDValue InVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VecInVT, Src);
if (IsStrict) {
SDValue CvtVec = DAG.getNode(Op.getOpcode(), dl, {VecVT, MVT::Other},
{Op.getOperand(0), InVec});
SDValue Chain = CvtVec.getValue(1);
SDValue Value = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, CvtVec,
DAG.getIntPtrConstant(0, dl));
return DAG.getMergeValues({Value, Chain}, dl);
}
SDValue CvtVec = DAG.getNode(Op.getOpcode(), dl, VecVT, InVec);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, CvtVec,
DAG.getIntPtrConstant(0, dl));
}
@ -18510,7 +18536,9 @@ static SDValue vectorizeExtractedCast(SDValue Cast, SelectionDAG &DAG,
SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op,
SelectionDAG &DAG) const {
SDValue Src = Op.getOperand(0);
bool IsStrict = Op->isStrictFPOpcode();
unsigned OpNo = IsStrict ? 1 : 0;
SDValue Src = Op.getOperand(OpNo);
MVT SrcVT = Src.getSimpleValueType();
MVT VT = Op.getSimpleValueType();
SDLoc dl(Op);
@ -18519,7 +18547,8 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op,
return Extract;
if (SrcVT.isVector()) {
if (SrcVT == MVT::v2i32 && VT == MVT::v2f64) {
if (SrcVT == MVT::v2i32 && VT == MVT::v2f64 && !IsStrict) {
// FIXME: A strict version of CVTSI2P is needed.
return DAG.getNode(X86ISD::CVTSI2P, dl, VT,
DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i32, Src,
DAG.getUNDEF(SrcVT)));
@ -18545,13 +18574,17 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op,
// SSE doesn't have an i16 conversion so we need to promote.
if (SrcVT == MVT::i16 && (UseSSEReg || VT == MVT::f128)) {
SDValue Ext = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i32, Src);
if (IsStrict)
return DAG.getNode(ISD::STRICT_SINT_TO_FP, dl, {VT, MVT::Other},
{Op.getOperand(0), Ext});
return DAG.getNode(ISD::SINT_TO_FP, dl, VT, Ext);
}
if (VT == MVT::f128)
return LowerF128Call(Op, DAG, RTLIB::getSINTTOFP(SrcVT, VT));
SDValue ValueToStore = Op.getOperand(0);
SDValue ValueToStore = Src;
if (SrcVT == MVT::i64 && UseSSEReg && !Subtarget.is64Bit())
// Bitcasting to f64 here allows us to do a single 64-bit store from
// an SSE register, avoiding the store forwarding penalty that would come
@ -18563,10 +18596,16 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op,
auto PtrVT = getPointerTy(MF.getDataLayout());
int SSFI = MF.getFrameInfo().CreateStackObject(Size, Size, false);
SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
SDValue Chain = DAG.getStore(
DAG.getEntryNode(), dl, ValueToStore, StackSlot,
SDValue Chain = IsStrict ? Op->getOperand(0) : DAG.getEntryNode();
Chain = DAG.getStore(
Chain, dl, ValueToStore, StackSlot,
MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SSFI));
return BuildFILD(Op, SrcVT, Chain, StackSlot, DAG).first;
std::pair<SDValue, SDValue> Tmp = BuildFILD(Op, SrcVT, Chain, StackSlot, DAG);
if (IsStrict)
return DAG.getMergeValues({Tmp.first, Tmp.second}, dl);
return Tmp.first;
}
std::pair<SDValue, SDValue> X86TargetLowering::BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain,
@ -18654,6 +18693,8 @@ static SDValue LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG,
#endif
*/
bool IsStrict = Op->isStrictFPOpcode();
unsigned OpNo = IsStrict ? 1 : 0;
SDLoc dl(Op);
LLVMContext *Context = DAG.getContext();
@ -18674,8 +18715,8 @@ static SDValue LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG,
SDValue CPIdx1 = DAG.getConstantPool(C1, PtrVT, 16);
// Load the 64-bit value into an XMM register.
SDValue XR1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64,
Op.getOperand(0));
SDValue XR1 =
DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Op.getOperand(OpNo));
SDValue CLod0 =
DAG.getLoad(MVT::v4i32, dl, DAG.getEntryNode(), CPIdx0,
MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
@ -18688,32 +18729,50 @@ static SDValue LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG,
MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
/* Alignment = */ 16);
SDValue XR2F = DAG.getBitcast(MVT::v2f64, Unpck1);
SDValue Sub;
SDValue Chain;
// TODO: Are there any fast-math-flags to propagate here?
SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::v2f64, XR2F, CLod1);
if (IsStrict) {
Sub = DAG.getNode(ISD::STRICT_FSUB, dl, {MVT::v2f64, MVT::Other},
{Op.getOperand(0), XR2F, CLod1});
Chain = Sub.getValue(1);
} else
Sub = DAG.getNode(ISD::FSUB, dl, MVT::v2f64, XR2F, CLod1);
SDValue Result;
if (Subtarget.hasSSE3() && shouldUseHorizontalOp(true, DAG, Subtarget)) {
if (!IsStrict && Subtarget.hasSSE3() &&
shouldUseHorizontalOp(true, DAG, Subtarget)) {
// FIXME: Do we need a STRICT version of FHADD?
Result = DAG.getNode(X86ISD::FHADD, dl, MVT::v2f64, Sub, Sub);
} else {
SDValue Shuffle = DAG.getVectorShuffle(MVT::v2f64, dl, Sub, Sub, {1,-1});
Result = DAG.getNode(ISD::FADD, dl, MVT::v2f64, Shuffle, Sub);
if (IsStrict) {
Result = DAG.getNode(ISD::STRICT_FADD, dl, {MVT::v2f64, MVT::Other},
{Chain, Shuffle, Sub});
Chain = Result.getValue(1);
} else
Result = DAG.getNode(ISD::FADD, dl, MVT::v2f64, Shuffle, Sub);
}
Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Result,
DAG.getIntPtrConstant(0, dl));
if (IsStrict)
return DAG.getMergeValues({Result, Chain}, dl);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Result,
DAG.getIntPtrConstant(0, dl));
return Result;
}
/// 32-bit unsigned integer to float expansion.
static SDValue LowerUINT_TO_FP_i32(SDValue Op, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
unsigned OpNo = Op.getNode()->isStrictFPOpcode() ? 1 : 0;
SDLoc dl(Op);
// FP constant to bias correct the final result.
SDValue Bias = DAG.getConstantFP(BitsToDouble(0x4330000000000000ULL), dl,
MVT::f64);
// Load the 32-bit value into an XMM register.
SDValue Load = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32,
Op.getOperand(0));
SDValue Load =
DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, Op.getOperand(OpNo));
// Zero out the upper parts of the register.
Load = getShuffleVectorZeroOrUndef(Load, 0, true, Subtarget, DAG);
@ -18733,6 +18792,23 @@ static SDValue LowerUINT_TO_FP_i32(SDValue Op, SelectionDAG &DAG,
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64,
DAG.getBitcast(MVT::v2f64, Or), DAG.getIntPtrConstant(0, dl));
if (Op.getNode()->isStrictFPOpcode()) {
// Subtract the bias.
// TODO: Are there any fast-math-flags to propagate here?
SDValue Chain = Op.getOperand(0);
SDValue Sub = DAG.getNode(ISD::STRICT_FSUB, dl, {MVT::f64, MVT::Other},
{Chain, Or, Bias});
if (Op.getValueType() == Sub.getValueType())
return Sub;
// Handle final rounding.
std::pair<SDValue, SDValue> ResultPair = DAG.getStrictFPExtendOrRound(
Sub, Sub.getValue(1), dl, Op.getSimpleValueType());
return DAG.getMergeValues({ResultPair.first, ResultPair.second}, dl);
}
// Subtract the bias.
// TODO: Are there any fast-math-flags to propagate here?
SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::f64, Or, Bias);
@ -18747,6 +18823,10 @@ static SDValue lowerUINT_TO_FP_v2i32(SDValue Op, SelectionDAG &DAG,
if (Op.getSimpleValueType() != MVT::v2f64)
return SDValue();
// FIXME: Need to fix the lack of StrictFP support here.
if (Op.getNode()->isStrictFPOpcode())
return SDValue();
SDValue N0 = Op.getOperand(0);
assert(N0.getSimpleValueType() == MVT::v2i32 && "Unexpected input type");
@ -18873,7 +18953,8 @@ static SDValue lowerUINT_TO_FP_vXi32(SDValue Op, SelectionDAG &DAG,
static SDValue lowerUINT_TO_FP_vec(SDValue Op, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
SDValue N0 = Op.getOperand(0);
unsigned OpNo = Op.getNode()->isStrictFPOpcode() ? 1 : 0;
SDValue N0 = Op.getOperand(OpNo);
MVT SrcVT = N0.getSimpleValueType();
SDLoc dl(Op);
@ -18891,11 +18972,14 @@ static SDValue lowerUINT_TO_FP_vec(SDValue Op, SelectionDAG &DAG,
SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
SelectionDAG &DAG) const {
SDValue N0 = Op.getOperand(0);
bool IsStrict = Op->isStrictFPOpcode();
unsigned OpNo = IsStrict ? 1 : 0;
SDValue Src = Op.getOperand(OpNo);
SDLoc dl(Op);
auto PtrVT = getPointerTy(DAG.getDataLayout());
MVT SrcVT = N0.getSimpleValueType();
MVT SrcVT = Src.getSimpleValueType();
MVT DstVT = Op.getSimpleValueType();
SDValue Chain = IsStrict ? Op.getOperand(0) : DAG.getEntryNode();
if (DstVT == MVT::f128)
return LowerF128Call(Op, DAG, RTLIB::getUINTTOFP(SrcVT, DstVT));
@ -18915,8 +18999,11 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
// Promote i32 to i64 and use a signed conversion on 64-bit targets.
if (SrcVT == MVT::i32 && Subtarget.is64Bit()) {
N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, N0);
return DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, N0);
Src = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, Src);
if (IsStrict)
return DAG.getNode(ISD::STRICT_SINT_TO_FP, dl, {DstVT, MVT::Other},
{Chain, Src});
return DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, Src);
}
if (SDValue V = LowerI64IntToFP_AVX512DQ(Op, DAG, Subtarget))
@ -18933,22 +19020,28 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
SDValue StackSlot = DAG.CreateStackTemporary(MVT::i64);
if (SrcVT == MVT::i32) {
SDValue OffsetSlot = DAG.getMemBasePlusOffset(StackSlot, 4, dl);
SDValue Store1 = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0),
StackSlot, MachinePointerInfo());
SDValue Store1 =
DAG.getStore(Chain, dl, Src, StackSlot, MachinePointerInfo());
SDValue Store2 = DAG.getStore(Store1, dl, DAG.getConstant(0, dl, MVT::i32),
OffsetSlot, MachinePointerInfo());
return BuildFILD(Op, MVT::i64, Store2, StackSlot, DAG).first;
std::pair<SDValue, SDValue> Tmp =
BuildFILD(Op, MVT::i64, Store2, StackSlot, DAG);
if (IsStrict)
return DAG.getMergeValues({Tmp.first, Tmp.second}, dl);
return Tmp.first;
}
assert(SrcVT == MVT::i64 && "Unexpected type in UINT_TO_FP");
SDValue ValueToStore = Op.getOperand(0);
if (isScalarFPTypeInSSEReg(Op.getValueType()) && !Subtarget.is64Bit())
SDValue ValueToStore = Src;
if (isScalarFPTypeInSSEReg(Op.getValueType()) && !Subtarget.is64Bit()) {
// Bitcasting to f64 here allows us to do a single 64-bit store from
// an SSE register, avoiding the store forwarding penalty that would come
// with two 32-bit stores.
ValueToStore = DAG.getBitcast(MVT::f64, ValueToStore);
SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, ValueToStore, StackSlot,
MachinePointerInfo());
}
SDValue Store =
DAG.getStore(Chain, dl, ValueToStore, StackSlot, MachinePointerInfo());
// For i64 source, we need to add the appropriate power of 2 if the input
// was negative. This is the same as the optimization in
// DAGTypeLegalizer::ExpandIntOp_UNIT_TO_FP, and for it to be safe here,
@ -18963,13 +19056,14 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
SDValue Ops[] = { Store, StackSlot };
SDValue Fild = DAG.getMemIntrinsicNode(X86ISD::FILD, dl, Tys, Ops,
MVT::i64, MMO);
Chain = Fild.getValue(1);
APInt FF(32, 0x5F800000ULL);
// Check whether the sign bit is set.
SDValue SignSet = DAG.getSetCC(
dl, getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::i64),
Op.getOperand(0), DAG.getConstant(0, dl, MVT::i64), ISD::SETLT);
Op.getOperand(OpNo), DAG.getConstant(0, dl, MVT::i64), ISD::SETLT);
// Build a 64 bit pair (0, FF) in the constant pool, with FF in the lo bits.
SDValue FudgePtr = DAG.getConstantPool(
@ -18984,11 +19078,18 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
// Load the value out, extending it from f32 to f80.
// FIXME: Avoid the extend by constructing the right constant pool?
SDValue Fudge = DAG.getExtLoad(
ISD::EXTLOAD, dl, MVT::f80, DAG.getEntryNode(), FudgePtr,
ISD::EXTLOAD, dl, MVT::f80, Chain, FudgePtr,
MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), MVT::f32,
/* Alignment = */ 4);
Chain = Fudge.getValue(1);
// Extend everything to 80 bits to force it to be done on x87.
// TODO: Are there any fast-math-flags to propagate here?
if (IsStrict) {
SDValue Add = DAG.getNode(ISD::STRICT_FADD, dl, {MVT::f80, MVT::Other},
{Chain, Fild, Fudge});
return DAG.getNode(ISD::STRICT_FP_ROUND, dl, {DstVT, MVT::Other},
{Add.getValue(1), Add, DAG.getIntPtrConstant(0, dl)});
}
SDValue Add = DAG.getNode(ISD::FADD, dl, MVT::f80, Fild, Fudge);
return DAG.getNode(ISD::FP_ROUND, dl, DstVT, Add,
DAG.getIntPtrConstant(0, dl));
@ -19042,10 +19143,7 @@ X86TargetLowering::FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
int SSFI = MF.getFrameInfo().CreateStackObject(MemSize, MemSize, false);
SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
if (IsStrict)
Chain = Op.getOperand(0);
else
Chain = DAG.getEntryNode();
Chain = IsStrict ? Op.getOperand(0) : DAG.getEntryNode();
SDValue Adjust; // 0x0 or 0x80000000, for result sign bit adjustment.
@ -28013,7 +28111,9 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::SRL_PARTS: return LowerShiftParts(Op, DAG);
case ISD::FSHL:
case ISD::FSHR: return LowerFunnelShift(Op, Subtarget, DAG);
case ISD::STRICT_SINT_TO_FP:
case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG);
case ISD::STRICT_UINT_TO_FP:
case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG);
case ISD::TRUNCATE: return LowerTRUNCATE(Op, DAG);
case ISD::ZERO_EXTEND: return LowerZERO_EXTEND(Op, Subtarget, DAG);

View File

@ -1941,6 +1941,762 @@ entry:
ret i64 %result
}
; Verify that sitofp(%x) isn't simplified when the rounding mode is
; unknown.
; Verify that no gross errors happen.
define double @sifdb(i8 %x) #0 {
; X87-LABEL: sifdb:
; X87: # %bb.0: # %entry
; X87-NEXT: pushl %eax
; X87-NEXT: .cfi_def_cfa_offset 8
; X87-NEXT: movsbl {{[0-9]+}}(%esp), %eax
; X87-NEXT: movw %ax, {{[0-9]+}}(%esp)
; X87-NEXT: filds {{[0-9]+}}(%esp)
; X87-NEXT: popl %eax
; X87-NEXT: .cfi_def_cfa_offset 4
; X87-NEXT: retl
;
; X86-SSE-LABEL: sifdb:
; X86-SSE: # %bb.0: # %entry
; X86-SSE-NEXT: subl $12, %esp
; X86-SSE-NEXT: .cfi_def_cfa_offset 16
; X86-SSE-NEXT: movsbl {{[0-9]+}}(%esp), %eax
; X86-SSE-NEXT: cvtsi2sd %eax, %xmm0
; X86-SSE-NEXT: movsd %xmm0, (%esp)
; X86-SSE-NEXT: fldl (%esp)
; X86-SSE-NEXT: addl $12, %esp
; X86-SSE-NEXT: .cfi_def_cfa_offset 4
; X86-SSE-NEXT: retl
;
; SSE-LABEL: sifdb:
; SSE: # %bb.0: # %entry
; SSE-NEXT: movsbl %dil, %eax
; SSE-NEXT: cvtsi2sd %eax, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: sifdb:
; AVX: # %bb.0: # %entry
; AVX-NEXT: movsbl %dil, %eax
; AVX-NEXT: vcvtsi2sd %eax, %xmm0, %xmm0
; AVX-NEXT: retq
entry:
%result = call double @llvm.experimental.constrained.sitofp.f64.i8(i8 %x,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
ret double %result
}
define double @sifdw(i16 %x) #0 {
; X87-LABEL: sifdw:
; X87: # %bb.0: # %entry
; X87-NEXT: pushl %eax
; X87-NEXT: .cfi_def_cfa_offset 8
; X87-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X87-NEXT: movw %ax, {{[0-9]+}}(%esp)
; X87-NEXT: filds {{[0-9]+}}(%esp)
; X87-NEXT: popl %eax
; X87-NEXT: .cfi_def_cfa_offset 4
; X87-NEXT: retl
;
; X86-SSE-LABEL: sifdw:
; X86-SSE: # %bb.0: # %entry
; X86-SSE-NEXT: subl $12, %esp
; X86-SSE-NEXT: .cfi_def_cfa_offset 16
; X86-SSE-NEXT: movswl {{[0-9]+}}(%esp), %eax
; X86-SSE-NEXT: cvtsi2sd %eax, %xmm0
; X86-SSE-NEXT: movsd %xmm0, (%esp)
; X86-SSE-NEXT: fldl (%esp)
; X86-SSE-NEXT: addl $12, %esp
; X86-SSE-NEXT: .cfi_def_cfa_offset 4
; X86-SSE-NEXT: retl
;
; SSE-LABEL: sifdw:
; SSE: # %bb.0: # %entry
; SSE-NEXT: movswl %di, %eax
; SSE-NEXT: cvtsi2sd %eax, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: sifdw:
; AVX: # %bb.0: # %entry
; AVX-NEXT: movswl %di, %eax
; AVX-NEXT: vcvtsi2sd %eax, %xmm0, %xmm0
; AVX-NEXT: retq
entry:
%result = call double @llvm.experimental.constrained.sitofp.f64.i16(i16 %x,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
ret double %result
}
define double @sifdi(i32 %x) #0 {
; X87-LABEL: sifdi:
; X87: # %bb.0: # %entry
; X87-NEXT: pushl %eax
; X87-NEXT: .cfi_def_cfa_offset 8
; X87-NEXT: movl {{[0-9]+}}(%esp), %eax
; X87-NEXT: movl %eax, (%esp)
; X87-NEXT: fildl (%esp)
; X87-NEXT: popl %eax
; X87-NEXT: .cfi_def_cfa_offset 4
; X87-NEXT: retl
;
; X86-SSE-LABEL: sifdi:
; X86-SSE: # %bb.0: # %entry
; X86-SSE-NEXT: subl $12, %esp
; X86-SSE-NEXT: .cfi_def_cfa_offset 16
; X86-SSE-NEXT: cvtsi2sdl {{[0-9]+}}(%esp), %xmm0
; X86-SSE-NEXT: movsd %xmm0, (%esp)
; X86-SSE-NEXT: fldl (%esp)
; X86-SSE-NEXT: addl $12, %esp
; X86-SSE-NEXT: .cfi_def_cfa_offset 4
; X86-SSE-NEXT: retl
;
; SSE-LABEL: sifdi:
; SSE: # %bb.0: # %entry
; SSE-NEXT: cvtsi2sd %edi, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: sifdi:
; AVX: # %bb.0: # %entry
; AVX-NEXT: vcvtsi2sd %edi, %xmm0, %xmm0
; AVX-NEXT: retq
entry:
%result = call double @llvm.experimental.constrained.sitofp.f64.i32(i32 %x,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
ret double %result
}
define float @siffb(i8 %x) #0 {
; X87-LABEL: siffb:
; X87: # %bb.0: # %entry
; X87-NEXT: pushl %eax
; X87-NEXT: .cfi_def_cfa_offset 8
; X87-NEXT: movsbl {{[0-9]+}}(%esp), %eax
; X87-NEXT: movw %ax, {{[0-9]+}}(%esp)
; X87-NEXT: filds {{[0-9]+}}(%esp)
; X87-NEXT: popl %eax
; X87-NEXT: .cfi_def_cfa_offset 4
; X87-NEXT: retl
;
; X86-SSE-LABEL: siffb:
; X86-SSE: # %bb.0: # %entry
; X86-SSE-NEXT: pushl %eax
; X86-SSE-NEXT: .cfi_def_cfa_offset 8
; X86-SSE-NEXT: movsbl {{[0-9]+}}(%esp), %eax
; X86-SSE-NEXT: cvtsi2ss %eax, %xmm0
; X86-SSE-NEXT: movss %xmm0, (%esp)
; X86-SSE-NEXT: flds (%esp)
; X86-SSE-NEXT: popl %eax
; X86-SSE-NEXT: .cfi_def_cfa_offset 4
; X86-SSE-NEXT: retl
;
; SSE-LABEL: siffb:
; SSE: # %bb.0: # %entry
; SSE-NEXT: movsbl %dil, %eax
; SSE-NEXT: cvtsi2ss %eax, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: siffb:
; AVX: # %bb.0: # %entry
; AVX-NEXT: movsbl %dil, %eax
; AVX-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0
; AVX-NEXT: retq
entry:
%result = call float @llvm.experimental.constrained.sitofp.f32.i8(i8 %x,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
ret float %result
}
define float @siffw(i16 %x) #0 {
; X87-LABEL: siffw:
; X87: # %bb.0: # %entry
; X87-NEXT: pushl %eax
; X87-NEXT: .cfi_def_cfa_offset 8
; X87-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X87-NEXT: movw %ax, {{[0-9]+}}(%esp)
; X87-NEXT: filds {{[0-9]+}}(%esp)
; X87-NEXT: popl %eax
; X87-NEXT: .cfi_def_cfa_offset 4
; X87-NEXT: retl
;
; X86-SSE-LABEL: siffw:
; X86-SSE: # %bb.0: # %entry
; X86-SSE-NEXT: pushl %eax
; X86-SSE-NEXT: .cfi_def_cfa_offset 8
; X86-SSE-NEXT: movswl {{[0-9]+}}(%esp), %eax
; X86-SSE-NEXT: cvtsi2ss %eax, %xmm0
; X86-SSE-NEXT: movss %xmm0, (%esp)
; X86-SSE-NEXT: flds (%esp)
; X86-SSE-NEXT: popl %eax
; X86-SSE-NEXT: .cfi_def_cfa_offset 4
; X86-SSE-NEXT: retl
;
; SSE-LABEL: siffw:
; SSE: # %bb.0: # %entry
; SSE-NEXT: movswl %di, %eax
; SSE-NEXT: cvtsi2ss %eax, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: siffw:
; AVX: # %bb.0: # %entry
; AVX-NEXT: movswl %di, %eax
; AVX-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0
; AVX-NEXT: retq
entry:
%result = call float @llvm.experimental.constrained.sitofp.f32.i16(i16 %x,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
ret float %result
}
define float @siffi(i32 %x) #0 {
; X87-LABEL: siffi:
; X87: # %bb.0: # %entry
; X87-NEXT: pushl %eax
; X87-NEXT: .cfi_def_cfa_offset 8
; X87-NEXT: movl {{[0-9]+}}(%esp), %eax
; X87-NEXT: movl %eax, (%esp)
; X87-NEXT: fildl (%esp)
; X87-NEXT: popl %eax
; X87-NEXT: .cfi_def_cfa_offset 4
; X87-NEXT: retl
;
; X86-SSE-LABEL: siffi:
; X86-SSE: # %bb.0: # %entry
; X86-SSE-NEXT: pushl %eax
; X86-SSE-NEXT: .cfi_def_cfa_offset 8
; X86-SSE-NEXT: cvtsi2ssl {{[0-9]+}}(%esp), %xmm0
; X86-SSE-NEXT: movss %xmm0, (%esp)
; X86-SSE-NEXT: flds (%esp)
; X86-SSE-NEXT: popl %eax
; X86-SSE-NEXT: .cfi_def_cfa_offset 4
; X86-SSE-NEXT: retl
;
; SSE-LABEL: siffi:
; SSE: # %bb.0: # %entry
; SSE-NEXT: cvtsi2ss %edi, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: siffi:
; AVX: # %bb.0: # %entry
; AVX-NEXT: vcvtsi2ss %edi, %xmm0, %xmm0
; AVX-NEXT: retq
entry:
%result = call float @llvm.experimental.constrained.sitofp.f32.i32(i32 %x,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
ret float %result
}
define double @sifdl(i64 %x) #0 {
; X87-LABEL: sifdl:
; X87: # %bb.0: # %entry
; X87-NEXT: subl $12, %esp
; X87-NEXT: .cfi_def_cfa_offset 16
; X87-NEXT: movl {{[0-9]+}}(%esp), %eax
; X87-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X87-NEXT: movl %ecx, {{[0-9]+}}(%esp)
; X87-NEXT: movl %eax, (%esp)
; X87-NEXT: fildll (%esp)
; X87-NEXT: addl $12, %esp
; X87-NEXT: .cfi_def_cfa_offset 4
; X87-NEXT: retl
;
; X86-SSE-LABEL: sifdl:
; X86-SSE: # %bb.0: # %entry
; X86-SSE-NEXT: subl $20, %esp
; X86-SSE-NEXT: .cfi_def_cfa_offset 24
; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; X86-SSE-NEXT: movlps %xmm0, {{[0-9]+}}(%esp)
; X86-SSE-NEXT: fildll {{[0-9]+}}(%esp)
; X86-SSE-NEXT: fstpl (%esp)
; X86-SSE-NEXT: fldl (%esp)
; X86-SSE-NEXT: addl $20, %esp
; X86-SSE-NEXT: .cfi_def_cfa_offset 4
; X86-SSE-NEXT: retl
;
; SSE-LABEL: sifdl:
; SSE: # %bb.0: # %entry
; SSE-NEXT: cvtsi2sd %rdi, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: sifdl:
; AVX: # %bb.0: # %entry
; AVX-NEXT: vcvtsi2sd %rdi, %xmm0, %xmm0
; AVX-NEXT: retq
entry:
%result = call double @llvm.experimental.constrained.sitofp.f64.i64(i64 %x,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
ret double %result
}
define float @siffl(i64 %x) #0 {
; X87-LABEL: siffl:
; X87: # %bb.0: # %entry
; X87-NEXT: subl $12, %esp
; X87-NEXT: .cfi_def_cfa_offset 16
; X87-NEXT: movl {{[0-9]+}}(%esp), %eax
; X87-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X87-NEXT: movl %ecx, {{[0-9]+}}(%esp)
; X87-NEXT: movl %eax, (%esp)
; X87-NEXT: fildll (%esp)
; X87-NEXT: addl $12, %esp
; X87-NEXT: .cfi_def_cfa_offset 4
; X87-NEXT: retl
;
; X86-SSE-LABEL: siffl:
; X86-SSE: # %bb.0: # %entry
; X86-SSE-NEXT: subl $20, %esp
; X86-SSE-NEXT: .cfi_def_cfa_offset 24
; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; X86-SSE-NEXT: movlps %xmm0, {{[0-9]+}}(%esp)
; X86-SSE-NEXT: fildll {{[0-9]+}}(%esp)
; X86-SSE-NEXT: fstps {{[0-9]+}}(%esp)
; X86-SSE-NEXT: flds {{[0-9]+}}(%esp)
; X86-SSE-NEXT: addl $20, %esp
; X86-SSE-NEXT: .cfi_def_cfa_offset 4
; X86-SSE-NEXT: retl
;
; SSE-LABEL: siffl:
; SSE: # %bb.0: # %entry
; SSE-NEXT: cvtsi2ss %rdi, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: siffl:
; AVX: # %bb.0: # %entry
; AVX-NEXT: vcvtsi2ss %rdi, %xmm0, %xmm0
; AVX-NEXT: retq
entry:
%result = call float @llvm.experimental.constrained.sitofp.f32.i64(i64 %x,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
ret float %result
}
; Verify that uitofp(%x) isn't simplified when the rounding mode is
; unknown.
; Verify that no gross errors happen.
define double @uifdb(i8 %x) #0 {
; X87-LABEL: uifdb:
; X87: # %bb.0: # %entry
; X87-NEXT: pushl %eax
; X87-NEXT: .cfi_def_cfa_offset 8
; X87-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X87-NEXT: movw %ax, {{[0-9]+}}(%esp)
; X87-NEXT: filds {{[0-9]+}}(%esp)
; X87-NEXT: popl %eax
; X87-NEXT: .cfi_def_cfa_offset 4
; X87-NEXT: retl
;
; X86-SSE-LABEL: uifdb:
; X86-SSE: # %bb.0: # %entry
; X86-SSE-NEXT: subl $12, %esp
; X86-SSE-NEXT: .cfi_def_cfa_offset 16
; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-SSE-NEXT: cvtsi2sd %eax, %xmm0
; X86-SSE-NEXT: movsd %xmm0, (%esp)
; X86-SSE-NEXT: fldl (%esp)
; X86-SSE-NEXT: addl $12, %esp
; X86-SSE-NEXT: .cfi_def_cfa_offset 4
; X86-SSE-NEXT: retl
;
; SSE-LABEL: uifdb:
; SSE: # %bb.0: # %entry
; SSE-NEXT: movzbl %dil, %eax
; SSE-NEXT: cvtsi2sd %eax, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: uifdb:
; AVX: # %bb.0: # %entry
; AVX-NEXT: movzbl %dil, %eax
; AVX-NEXT: vcvtsi2sd %eax, %xmm0, %xmm0
; AVX-NEXT: retq
entry:
%result = call double @llvm.experimental.constrained.uitofp.f64.i8(i8 %x,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
ret double %result
}
define double @uifdw(i16 %x) #0 {
; X87-LABEL: uifdw:
; X87: # %bb.0: # %entry
; X87-NEXT: pushl %eax
; X87-NEXT: .cfi_def_cfa_offset 8
; X87-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X87-NEXT: movl %eax, (%esp)
; X87-NEXT: fildl (%esp)
; X87-NEXT: popl %eax
; X87-NEXT: .cfi_def_cfa_offset 4
; X87-NEXT: retl
;
; X86-SSE-LABEL: uifdw:
; X86-SSE: # %bb.0: # %entry
; X86-SSE-NEXT: subl $12, %esp
; X86-SSE-NEXT: .cfi_def_cfa_offset 16
; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-SSE-NEXT: cvtsi2sd %eax, %xmm0
; X86-SSE-NEXT: movsd %xmm0, (%esp)
; X86-SSE-NEXT: fldl (%esp)
; X86-SSE-NEXT: addl $12, %esp
; X86-SSE-NEXT: .cfi_def_cfa_offset 4
; X86-SSE-NEXT: retl
;
; SSE-LABEL: uifdw:
; SSE: # %bb.0: # %entry
; SSE-NEXT: movzwl %di, %eax
; SSE-NEXT: cvtsi2sd %eax, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: uifdw:
; AVX: # %bb.0: # %entry
; AVX-NEXT: movzwl %di, %eax
; AVX-NEXT: vcvtsi2sd %eax, %xmm0, %xmm0
; AVX-NEXT: retq
entry:
%result = call double @llvm.experimental.constrained.uitofp.f64.i16(i16 %x,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
ret double %result
}
define double @uifdi(i32 %x) #0 {
; X87-LABEL: uifdi:
; X87: # %bb.0: # %entry
; X87-NEXT: subl $12, %esp
; X87-NEXT: .cfi_def_cfa_offset 16
; X87-NEXT: movl {{[0-9]+}}(%esp), %eax
; X87-NEXT: movl %eax, (%esp)
; X87-NEXT: movl $0, {{[0-9]+}}(%esp)
; X87-NEXT: fildll (%esp)
; X87-NEXT: addl $12, %esp
; X87-NEXT: .cfi_def_cfa_offset 4
; X87-NEXT: retl
;
; X86-SSE-LABEL: uifdi:
; X86-SSE: # %bb.0: # %entry
; X86-SSE-NEXT: subl $12, %esp
; X86-SSE-NEXT: .cfi_def_cfa_offset 16
; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; X86-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X86-SSE-NEXT: orpd %xmm0, %xmm1
; X86-SSE-NEXT: subsd %xmm0, %xmm1
; X86-SSE-NEXT: movsd %xmm1, (%esp)
; X86-SSE-NEXT: fldl (%esp)
; X86-SSE-NEXT: addl $12, %esp
; X86-SSE-NEXT: .cfi_def_cfa_offset 4
; X86-SSE-NEXT: retl
;
; SSE-LABEL: uifdi:
; SSE: # %bb.0: # %entry
; SSE-NEXT: movl %edi, %eax
; SSE-NEXT: cvtsi2sd %rax, %xmm0
; SSE-NEXT: retq
;
; AVX1-LABEL: uifdi:
; AVX1: # %bb.0: # %entry
; AVX1-NEXT: movl %edi, %eax
; AVX1-NEXT: vcvtsi2sd %rax, %xmm0, %xmm0
; AVX1-NEXT: retq
;
; AVX512-LABEL: uifdi:
; AVX512: # %bb.0: # %entry
; AVX512-NEXT: vcvtusi2sd %edi, %xmm0, %xmm0
; AVX512-NEXT: retq
entry:
%result = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 %x,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
ret double %result
}
define double @uifdl(i64 %x) #0 {
; X87-LABEL: uifdl:
; X87: # %bb.0: # %entry
; X87-NEXT: subl $20, %esp
; X87-NEXT: .cfi_def_cfa_offset 24
; X87-NEXT: movl {{[0-9]+}}(%esp), %eax
; X87-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X87-NEXT: movl %ecx, {{[0-9]+}}(%esp)
; X87-NEXT: movl %eax, (%esp)
; X87-NEXT: xorl %eax, %eax
; X87-NEXT: testl %ecx, %ecx
; X87-NEXT: setns %al
; X87-NEXT: fildll (%esp)
; X87-NEXT: fadds {{\.LCPI.*}}(,%eax,4)
; X87-NEXT: fstpl {{[0-9]+}}(%esp)
; X87-NEXT: fldl {{[0-9]+}}(%esp)
; X87-NEXT: addl $20, %esp
; X87-NEXT: .cfi_def_cfa_offset 4
; X87-NEXT: retl
;
; X86-SSE-LABEL: uifdl:
; X86-SSE: # %bb.0: # %entry
; X86-SSE-NEXT: subl $12, %esp
; X86-SSE-NEXT: .cfi_def_cfa_offset 16
; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; X86-SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
; X86-SSE-NEXT: subpd {{\.LCPI.*}}, %xmm0
; X86-SSE-NEXT: movapd %xmm0, %xmm1
; X86-SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; X86-SSE-NEXT: addpd %xmm0, %xmm1
; X86-SSE-NEXT: movlpd %xmm1, (%esp)
; X86-SSE-NEXT: fldl (%esp)
; X86-SSE-NEXT: addl $12, %esp
; X86-SSE-NEXT: .cfi_def_cfa_offset 4
; X86-SSE-NEXT: retl
;
; SSE-LABEL: uifdl:
; SSE: # %bb.0: # %entry
; SSE-NEXT: movq %rdi, %xmm1
; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1]
; SSE-NEXT: subpd {{.*}}(%rip), %xmm1
; SSE-NEXT: movapd %xmm1, %xmm0
; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
; SSE-NEXT: addpd %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX1-LABEL: uifdl:
; AVX1: # %bb.0: # %entry
; AVX1-NEXT: vmovq %rdi, %xmm0
; AVX1-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
; AVX1-NEXT: vsubpd {{.*}}(%rip), %xmm0, %xmm0
; AVX1-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX1-NEXT: vaddpd %xmm0, %xmm1, %xmm0
; AVX1-NEXT: retq
;
; AVX512-LABEL: uifdl:
; AVX512: # %bb.0: # %entry
; AVX512-NEXT: vcvtusi2sd %rdi, %xmm0, %xmm0
; AVX512-NEXT: retq
entry:
%result = call double @llvm.experimental.constrained.uitofp.f64.i64(i64 %x,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
ret double %result
}
define float @uiffb(i8 %x) #0 {
; X87-LABEL: uiffb:
; X87: # %bb.0: # %entry
; X87-NEXT: pushl %eax
; X87-NEXT: .cfi_def_cfa_offset 8
; X87-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X87-NEXT: movw %ax, {{[0-9]+}}(%esp)
; X87-NEXT: filds {{[0-9]+}}(%esp)
; X87-NEXT: popl %eax
; X87-NEXT: .cfi_def_cfa_offset 4
; X87-NEXT: retl
;
; X86-SSE-LABEL: uiffb:
; X86-SSE: # %bb.0: # %entry
; X86-SSE-NEXT: pushl %eax
; X86-SSE-NEXT: .cfi_def_cfa_offset 8
; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-SSE-NEXT: cvtsi2ss %eax, %xmm0
; X86-SSE-NEXT: movss %xmm0, (%esp)
; X86-SSE-NEXT: flds (%esp)
; X86-SSE-NEXT: popl %eax
; X86-SSE-NEXT: .cfi_def_cfa_offset 4
; X86-SSE-NEXT: retl
;
; SSE-LABEL: uiffb:
; SSE: # %bb.0: # %entry
; SSE-NEXT: movzbl %dil, %eax
; SSE-NEXT: cvtsi2ss %eax, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: uiffb:
; AVX: # %bb.0: # %entry
; AVX-NEXT: movzbl %dil, %eax
; AVX-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0
; AVX-NEXT: retq
entry:
%result = call float @llvm.experimental.constrained.uitofp.f32.i8(i8 %x,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
ret float %result
}
define float @uiffw(i16 %x) #0 {
; X87-LABEL: uiffw:
; X87: # %bb.0: # %entry
; X87-NEXT: pushl %eax
; X87-NEXT: .cfi_def_cfa_offset 8
; X87-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X87-NEXT: movl %eax, (%esp)
; X87-NEXT: fildl (%esp)
; X87-NEXT: popl %eax
; X87-NEXT: .cfi_def_cfa_offset 4
; X87-NEXT: retl
;
; X86-SSE-LABEL: uiffw:
; X86-SSE: # %bb.0: # %entry
; X86-SSE-NEXT: pushl %eax
; X86-SSE-NEXT: .cfi_def_cfa_offset 8
; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-SSE-NEXT: cvtsi2ss %eax, %xmm0
; X86-SSE-NEXT: movss %xmm0, (%esp)
; X86-SSE-NEXT: flds (%esp)
; X86-SSE-NEXT: popl %eax
; X86-SSE-NEXT: .cfi_def_cfa_offset 4
; X86-SSE-NEXT: retl
;
; SSE-LABEL: uiffw:
; SSE: # %bb.0: # %entry
; SSE-NEXT: movzwl %di, %eax
; SSE-NEXT: cvtsi2ss %eax, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: uiffw:
; AVX: # %bb.0: # %entry
; AVX-NEXT: movzwl %di, %eax
; AVX-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0
; AVX-NEXT: retq
entry:
%result = call float @llvm.experimental.constrained.uitofp.f32.i16(i16 %x,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
ret float %result
}
define float @uiffi(i32 %x) #0 {
; X87-LABEL: uiffi:
; X87: # %bb.0: # %entry
; X87-NEXT: subl $12, %esp
; X87-NEXT: .cfi_def_cfa_offset 16
; X87-NEXT: movl {{[0-9]+}}(%esp), %eax
; X87-NEXT: movl %eax, (%esp)
; X87-NEXT: movl $0, {{[0-9]+}}(%esp)
; X87-NEXT: fildll (%esp)
; X87-NEXT: addl $12, %esp
; X87-NEXT: .cfi_def_cfa_offset 4
; X87-NEXT: retl
;
; X86-SSE-LABEL: uiffi:
; X86-SSE: # %bb.0: # %entry
; X86-SSE-NEXT: pushl %eax
; X86-SSE-NEXT: .cfi_def_cfa_offset 8
; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; X86-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X86-SSE-NEXT: orpd %xmm0, %xmm1
; X86-SSE-NEXT: subsd %xmm0, %xmm1
; X86-SSE-NEXT: xorps %xmm0, %xmm0
; X86-SSE-NEXT: cvtsd2ss %xmm1, %xmm0
; X86-SSE-NEXT: movss %xmm0, (%esp)
; X86-SSE-NEXT: flds (%esp)
; X86-SSE-NEXT: popl %eax
; X86-SSE-NEXT: .cfi_def_cfa_offset 4
; X86-SSE-NEXT: retl
;
; SSE-LABEL: uiffi:
; SSE: # %bb.0: # %entry
; SSE-NEXT: movl %edi, %eax
; SSE-NEXT: cvtsi2ss %rax, %xmm0
; SSE-NEXT: retq
;
; AVX1-LABEL: uiffi:
; AVX1: # %bb.0: # %entry
; AVX1-NEXT: movl %edi, %eax
; AVX1-NEXT: vcvtsi2ss %rax, %xmm0, %xmm0
; AVX1-NEXT: retq
;
; AVX512-LABEL: uiffi:
; AVX512: # %bb.0: # %entry
; AVX512-NEXT: vcvtusi2ss %edi, %xmm0, %xmm0
; AVX512-NEXT: retq
entry:
%result = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 %x,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
ret float %result
}
define float @uiffl(i64 %x) #0 {
; X87-LABEL: uiffl:
; X87: # %bb.0: # %entry
; X87-NEXT: subl $20, %esp
; X87-NEXT: .cfi_def_cfa_offset 24
; X87-NEXT: movl {{[0-9]+}}(%esp), %eax
; X87-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X87-NEXT: movl %ecx, {{[0-9]+}}(%esp)
; X87-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X87-NEXT: xorl %eax, %eax
; X87-NEXT: testl %ecx, %ecx
; X87-NEXT: setns %al
; X87-NEXT: fildll {{[0-9]+}}(%esp)
; X87-NEXT: fadds {{\.LCPI.*}}(,%eax,4)
; X87-NEXT: fstps {{[0-9]+}}(%esp)
; X87-NEXT: flds {{[0-9]+}}(%esp)
; X87-NEXT: addl $20, %esp
; X87-NEXT: .cfi_def_cfa_offset 4
; X87-NEXT: retl
;
; X86-SSE-LABEL: uiffl:
; X86-SSE: # %bb.0: # %entry
; X86-SSE-NEXT: subl $20, %esp
; X86-SSE-NEXT: .cfi_def_cfa_offset 24
; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; X86-SSE-NEXT: movlps %xmm0, {{[0-9]+}}(%esp)
; X86-SSE-NEXT: xorl %eax, %eax
; X86-SSE-NEXT: cmpl $0, {{[0-9]+}}(%esp)
; X86-SSE-NEXT: setns %al
; X86-SSE-NEXT: fildll {{[0-9]+}}(%esp)
; X86-SSE-NEXT: fadds {{\.LCPI.*}}(,%eax,4)
; X86-SSE-NEXT: fstps {{[0-9]+}}(%esp)
; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X86-SSE-NEXT: movss %xmm0, (%esp)
; X86-SSE-NEXT: flds (%esp)
; X86-SSE-NEXT: addl $20, %esp
; X86-SSE-NEXT: .cfi_def_cfa_offset 4
; X86-SSE-NEXT: retl
;
; SSE-LABEL: uiffl:
; SSE: # %bb.0: # %entry
; SSE-NEXT: testq %rdi, %rdi
; SSE-NEXT: js .LBB52_1
; SSE-NEXT: # %bb.2: # %entry
; SSE-NEXT: cvtsi2ss %rdi, %xmm0
; SSE-NEXT: retq
; SSE-NEXT: .LBB52_1:
; SSE-NEXT: movq %rdi, %rax
; SSE-NEXT: shrq %rax
; SSE-NEXT: andl $1, %edi
; SSE-NEXT: orq %rax, %rdi
; SSE-NEXT: cvtsi2ss %rdi, %xmm0
; SSE-NEXT: addss %xmm0, %xmm0
; SSE-NEXT: retq
;
; AVX1-LABEL: uiffl:
; AVX1: # %bb.0: # %entry
; AVX1-NEXT: testq %rdi, %rdi
; AVX1-NEXT: js .LBB52_1
; AVX1-NEXT: # %bb.2: # %entry
; AVX1-NEXT: vcvtsi2ss %rdi, %xmm0, %xmm0
; AVX1-NEXT: retq
; AVX1-NEXT: .LBB52_1:
; AVX1-NEXT: movq %rdi, %rax
; AVX1-NEXT: shrq %rax
; AVX1-NEXT: andl $1, %edi
; AVX1-NEXT: orq %rax, %rdi
; AVX1-NEXT: vcvtsi2ss %rdi, %xmm0, %xmm0
; AVX1-NEXT: vaddss %xmm0, %xmm0, %xmm0
; AVX1-NEXT: retq
;
; AVX512-LABEL: uiffl:
; AVX512: # %bb.0: # %entry
; AVX512-NEXT: vcvtusi2ss %rdi, %xmm0, %xmm0
; AVX512-NEXT: retq
entry:
%result = call float @llvm.experimental.constrained.uitofp.f32.i64(i64 %x,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
ret float %result
}
attributes #0 = { strictfp }
@llvm.fp.env = thread_local global i8 zeroinitializer, section "llvm.metadata"
@ -1981,3 +2737,19 @@ declare i32 @llvm.experimental.constrained.lround.i32.f64(double, metadata)
declare i32 @llvm.experimental.constrained.lround.i32.f32(float, metadata)
declare i64 @llvm.experimental.constrained.llround.i64.f64(double, metadata)
declare i64 @llvm.experimental.constrained.llround.i64.f32(float, metadata)
declare double @llvm.experimental.constrained.sitofp.f64.i8(i8, metadata, metadata)
declare double @llvm.experimental.constrained.sitofp.f64.i16(i16, metadata, metadata)
declare double @llvm.experimental.constrained.sitofp.f64.i32(i32, metadata, metadata)
declare double @llvm.experimental.constrained.sitofp.f64.i64(i64, metadata, metadata)
declare float @llvm.experimental.constrained.sitofp.f32.i8(i8, metadata, metadata)
declare float @llvm.experimental.constrained.sitofp.f32.i16(i16, metadata, metadata)
declare float @llvm.experimental.constrained.sitofp.f32.i32(i32, metadata, metadata)
declare float @llvm.experimental.constrained.sitofp.f32.i64(i64, metadata, metadata)
declare double @llvm.experimental.constrained.uitofp.f64.i8(i8, metadata, metadata)
declare double @llvm.experimental.constrained.uitofp.f64.i16(i16, metadata, metadata)
declare double @llvm.experimental.constrained.uitofp.f64.i32(i32, metadata, metadata)
declare double @llvm.experimental.constrained.uitofp.f64.i64(i64, metadata, metadata)
declare float @llvm.experimental.constrained.uitofp.f32.i8(i8, metadata, metadata)
declare float @llvm.experimental.constrained.uitofp.f32.i16(i16, metadata, metadata)
declare float @llvm.experimental.constrained.uitofp.f32.i32(i32, metadata, metadata)
declare float @llvm.experimental.constrained.uitofp.f32.i64(i64, metadata, metadata)

File diff suppressed because it is too large Load Diff

View File

@ -373,6 +373,28 @@ entry:
ret i64 %result
}
; Verify that sitofp(42) isn't simplified when the rounding mode is unknown.
; CHECK-LABEL: @f30
; CHECK: call double @llvm.experimental.constrained.sitofp
define double @f30() #0 {
entry:
%result = call double @llvm.experimental.constrained.sitofp.f64.i32(i32 42,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
ret double %result
}
; Verify that uitofp(42) isn't simplified when the rounding mode is unknown.
; CHECK-LABEL: @f31
; CHECK: call double @llvm.experimental.constrained.uitofp
define double @f31() #0 {
entry:
%result = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 42,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
ret double %result
}
attributes #0 = { strictfp }
@llvm.fp.env = thread_local global i8 zeroinitializer, section "llvm.metadata"
@ -405,3 +427,5 @@ declare i32 @llvm.experimental.constrained.lround.i32.f64(double, metadata)
declare i32 @llvm.experimental.constrained.lround.i32.f32(float, metadata)
declare i64 @llvm.experimental.constrained.llround.i64.f64(double, metadata)
declare i64 @llvm.experimental.constrained.llround.i64.f32(float, metadata)
declare double @llvm.experimental.constrained.sitofp.f64.i32(i32, metadata, metadata)
declare double @llvm.experimental.constrained.uitofp.f64.i32(i32, metadata, metadata)