mirror of
https://github.com/capstone-engine/llvm-capstone.git
synced 2025-02-01 22:53:29 +00:00
This adds constrained intrinsics for the signed and unsigned conversions
of integers to floating point. This includes some of Craig Topper's changes for promotion support from D71130. Differential Revision: https://reviews.llvm.org/D69275
This commit is contained in:
parent
1ed832e424
commit
b1d8576b0a
@ -15667,6 +15667,78 @@ Semantics:
|
||||
The result produced is a signed integer converted from the floating
|
||||
point operand. The value is truncated, so it is rounded towards zero.
|
||||
|
||||
'``llvm.experimental.constrained.uitofp``' Intrinsic
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
Syntax:
|
||||
"""""""
|
||||
|
||||
::
|
||||
|
||||
declare <ty2>
|
||||
@llvm.experimental.constrained.uitofp(<type> <value>,
|
||||
metadata <rounding mode>,
|
||||
metadata <exception behavior>)
|
||||
|
||||
Overview:
|
||||
"""""""""
|
||||
|
||||
The '``llvm.experimental.constrained.uitofp``' intrinsic converts an
|
||||
unsigned integer ``value`` to a floating-point of type ``ty2``.
|
||||
|
||||
Arguments:
|
||||
""""""""""
|
||||
|
||||
The first argument to the '``llvm.experimental.constrained.uitofp``'
|
||||
intrinsic must be an :ref:`integer <t_integer>` or :ref:`vector
|
||||
<t_vector>` of integer values.
|
||||
|
||||
The second and third arguments specify the rounding mode and exception
|
||||
behavior as described above.
|
||||
|
||||
Semantics:
|
||||
""""""""""
|
||||
|
||||
An inexact floating-point exception will be raised if rounding is required.
|
||||
Any result produced is a floating point value converted from the input
|
||||
integer operand.
|
||||
|
||||
'``llvm.experimental.constrained.sitofp``' Intrinsic
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
Syntax:
|
||||
"""""""
|
||||
|
||||
::
|
||||
|
||||
declare <ty2>
|
||||
@llvm.experimental.constrained.sitofp(<type> <value>,
|
||||
metadata <rounding mode>,
|
||||
metadata <exception behavior>)
|
||||
|
||||
Overview:
|
||||
"""""""""
|
||||
|
||||
The '``llvm.experimental.constrained.sitofp``' intrinsic converts a
|
||||
signed integer ``value`` to a floating-point of type ``ty2``.
|
||||
|
||||
Arguments:
|
||||
""""""""""
|
||||
|
||||
The first argument to the '``llvm.experimental.constrained.sitofp``'
|
||||
intrinsic must be an :ref:`integer <t_integer>` or :ref:`vector
|
||||
<t_vector>` of integer values.
|
||||
|
||||
The second and third arguments specify the rounding mode and exception
|
||||
behavior as described above.
|
||||
|
||||
Semantics:
|
||||
""""""""""
|
||||
|
||||
An inexact floating-point exception will be raised if rounding is required.
|
||||
Any result produced is a floating point value converted from the input
|
||||
integer operand.
|
||||
|
||||
'``llvm.experimental.constrained.fptrunc``' Intrinsic
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
|
@ -312,6 +312,13 @@ namespace ISD {
|
||||
STRICT_FP_TO_SINT,
|
||||
STRICT_FP_TO_UINT,
|
||||
|
||||
/// STRICT_[US]INT_TO_FP - Convert a signed or unsigned integer to
|
||||
/// a floating point value. These have the same semantics as sitofp and
|
||||
/// uitofp in IR.
|
||||
/// They are used to limit optimizations while the DAG is being optimized.
|
||||
STRICT_SINT_TO_FP,
|
||||
STRICT_UINT_TO_FP,
|
||||
|
||||
/// X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating
|
||||
/// point type down to the precision of the destination VT. TRUNC is a
|
||||
/// flag, which is always an integer that is zero or one. If TRUNC is 0,
|
||||
|
@ -811,6 +811,11 @@ public:
|
||||
/// float type VT, by either extending or rounding (by truncation).
|
||||
SDValue getFPExtendOrRound(SDValue Op, const SDLoc &DL, EVT VT);
|
||||
|
||||
/// Convert Op, which must be a STRICT operation of float type, to the
|
||||
/// float type VT, by either extending or rounding (by truncation).
|
||||
std::pair<SDValue, SDValue>
|
||||
getStrictFPExtendOrRound(SDValue Op, SDValue Chain, const SDLoc &DL, EVT VT);
|
||||
|
||||
/// Convert Op, which must be of integer type, to the
|
||||
/// integer type VT, by either any-extending or truncating it.
|
||||
SDValue getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT);
|
||||
|
@ -4123,14 +4123,18 @@ public:
|
||||
/// Expand float to UINT conversion
|
||||
/// \param N Node to expand
|
||||
/// \param Result output after conversion
|
||||
/// \param Chain output chain after conversion
|
||||
/// \returns True, if the expansion was successful, false otherwise
|
||||
bool expandFP_TO_UINT(SDNode *N, SDValue &Result, SDValue &Chain, SelectionDAG &DAG) const;
|
||||
bool expandFP_TO_UINT(SDNode *N, SDValue &Result, SDValue &Chain,
|
||||
SelectionDAG &DAG) const;
|
||||
|
||||
/// Expand UINT(i64) to double(f64) conversion
|
||||
/// \param N Node to expand
|
||||
/// \param Result output after conversion
|
||||
/// \param Chain output chain after conversion
|
||||
/// \returns True, if the expansion was successful, false otherwise
|
||||
bool expandUINT_TO_FP(SDNode *N, SDValue &Result, SelectionDAG &DAG) const;
|
||||
bool expandUINT_TO_FP(SDNode *N, SDValue &Result, SDValue &Chain,
|
||||
SelectionDAG &DAG) const;
|
||||
|
||||
/// Expand fminnum/fmaxnum into fminnum_ieee/fmaxnum_ieee with quieted inputs.
|
||||
SDValue expandFMINNUM_FMAXNUM(SDNode *N, SelectionDAG &DAG) const;
|
||||
|
@ -41,6 +41,8 @@ INSTRUCTION(FMul, 2, 1, experimental_constrained_fmul, FMUL)
|
||||
INSTRUCTION(FDiv, 2, 1, experimental_constrained_fdiv, FDIV)
|
||||
INSTRUCTION(FRem, 2, 1, experimental_constrained_frem, FREM)
|
||||
INSTRUCTION(FPExt, 1, 0, experimental_constrained_fpext, FP_EXTEND)
|
||||
INSTRUCTION(SIToFP, 1, 1, experimental_constrained_sitofp, SINT_TO_FP)
|
||||
INSTRUCTION(UIToFP, 1, 1, experimental_constrained_uitofp, UINT_TO_FP)
|
||||
INSTRUCTION(FPToSI, 1, 0, experimental_constrained_fptosi, FP_TO_SINT)
|
||||
INSTRUCTION(FPToUI, 1, 0, experimental_constrained_fptoui, FP_TO_UINT)
|
||||
INSTRUCTION(FPTrunc, 1, 1, experimental_constrained_fptrunc, FP_ROUND)
|
||||
|
@ -640,6 +640,16 @@ let IntrProperties = [IntrInaccessibleMemOnly, IntrWillReturn] in {
|
||||
[ llvm_anyfloat_ty,
|
||||
llvm_metadata_ty ]>;
|
||||
|
||||
def int_experimental_constrained_sitofp : Intrinsic<[ llvm_anyfloat_ty ],
|
||||
[ llvm_anyint_ty,
|
||||
llvm_metadata_ty,
|
||||
llvm_metadata_ty ]>;
|
||||
|
||||
def int_experimental_constrained_uitofp : Intrinsic<[ llvm_anyfloat_ty ],
|
||||
[ llvm_anyint_ty,
|
||||
llvm_metadata_ty,
|
||||
llvm_metadata_ty ]>;
|
||||
|
||||
def int_experimental_constrained_fptrunc : Intrinsic<[ llvm_anyfloat_ty ],
|
||||
[ llvm_anyfloat_ty,
|
||||
llvm_metadata_ty,
|
||||
|
@ -173,10 +173,9 @@ private:
|
||||
SDValue NewIntValue) const;
|
||||
SDValue ExpandFCOPYSIGN(SDNode *Node) const;
|
||||
SDValue ExpandFABS(SDNode *Node) const;
|
||||
SDValue ExpandLegalINT_TO_FP(bool isSigned, SDValue Op0, EVT DestVT,
|
||||
const SDLoc &dl);
|
||||
SDValue PromoteLegalINT_TO_FP(SDValue LegalOp, EVT DestVT, bool isSigned,
|
||||
const SDLoc &dl);
|
||||
SDValue ExpandLegalINT_TO_FP(SDNode *Node, SDValue &Chain);
|
||||
void PromoteLegalINT_TO_FP(SDNode *N, const SDLoc &dl,
|
||||
SmallVectorImpl<SDValue> &Results);
|
||||
void PromoteLegalFP_TO_INT(SDNode *N, const SDLoc &dl,
|
||||
SmallVectorImpl<SDValue> &Results);
|
||||
|
||||
@ -1010,6 +1009,8 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
|
||||
Action = TLI.getOperationAction(Node->getOpcode(),
|
||||
Node->getOperand(0).getValueType());
|
||||
break;
|
||||
case ISD::STRICT_SINT_TO_FP:
|
||||
case ISD::STRICT_UINT_TO_FP:
|
||||
case ISD::STRICT_LRINT:
|
||||
case ISD::STRICT_LLRINT:
|
||||
case ISD::STRICT_LROUND:
|
||||
@ -2338,9 +2339,14 @@ SelectionDAGLegalize::ExpandSinCosLibCall(SDNode *Node,
|
||||
/// INT_TO_FP operation of the specified operand when the target requests that
|
||||
/// we expand it. At this point, we know that the result and operand types are
|
||||
/// legal for the target.
|
||||
SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue Op0,
|
||||
EVT DestVT,
|
||||
const SDLoc &dl) {
|
||||
SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(SDNode *Node,
|
||||
SDValue &Chain) {
|
||||
bool isSigned = (Node->getOpcode() == ISD::STRICT_SINT_TO_FP ||
|
||||
Node->getOpcode() == ISD::SINT_TO_FP);
|
||||
EVT DestVT = Node->getValueType(0);
|
||||
SDLoc dl(Node);
|
||||
unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
|
||||
SDValue Op0 = Node->getOperand(OpNo);
|
||||
EVT SrcVT = Op0.getValueType();
|
||||
|
||||
// TODO: Should any fast-math-flags be set for the created nodes?
|
||||
@ -2387,16 +2393,38 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue Op0,
|
||||
BitsToDouble(0x4330000080000000ULL) :
|
||||
BitsToDouble(0x4330000000000000ULL),
|
||||
dl, MVT::f64);
|
||||
// subtract the bias
|
||||
SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::f64, Load, Bias);
|
||||
// final result
|
||||
SDValue Result = DAG.getFPExtendOrRound(Sub, dl, DestVT);
|
||||
// Subtract the bias and get the final result.
|
||||
SDValue Sub;
|
||||
SDValue Result;
|
||||
if (Node->isStrictFPOpcode()) {
|
||||
Sub = DAG.getNode(ISD::STRICT_FSUB, dl, {MVT::f64, MVT::Other},
|
||||
{Node->getOperand(0), Load, Bias});
|
||||
if (DestVT != Sub.getValueType()) {
|
||||
std::pair<SDValue, SDValue> ResultPair;
|
||||
ResultPair =
|
||||
DAG.getStrictFPExtendOrRound(Sub, SDValue(Node, 1), dl, DestVT);
|
||||
Result = ResultPair.first;
|
||||
Chain = ResultPair.second;
|
||||
}
|
||||
else
|
||||
Result = Sub;
|
||||
} else {
|
||||
Sub = DAG.getNode(ISD::FSUB, dl, MVT::f64, Load, Bias);
|
||||
Result = DAG.getFPExtendOrRound(Sub, dl, DestVT);
|
||||
}
|
||||
return Result;
|
||||
}
|
||||
assert(!isSigned && "Legalize cannot Expand SINT_TO_FP for i64 yet");
|
||||
// Code below here assumes !isSigned without checking again.
|
||||
// FIXME: This can produce slightly incorrect results. See details in
|
||||
// FIXME: https://reviews.llvm.org/D69275
|
||||
|
||||
SDValue Tmp1 = DAG.getNode(ISD::SINT_TO_FP, dl, DestVT, Op0);
|
||||
SDValue Tmp1;
|
||||
if (Node->isStrictFPOpcode()) {
|
||||
Tmp1 = DAG.getNode(ISD::STRICT_SINT_TO_FP, dl, { DestVT, MVT::Other },
|
||||
{ Node->getOperand(0), Op0 });
|
||||
} else
|
||||
Tmp1 = DAG.getNode(ISD::SINT_TO_FP, dl, DestVT, Op0);
|
||||
|
||||
SDValue SignSet = DAG.getSetCC(dl, getSetCCResultType(SrcVT), Op0,
|
||||
DAG.getConstant(0, dl, SrcVT), ISD::SETLT);
|
||||
@ -2442,6 +2470,13 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue Op0,
|
||||
FudgeInReg = Handle.getValue();
|
||||
}
|
||||
|
||||
if (Node->isStrictFPOpcode()) {
|
||||
SDValue Result = DAG.getNode(ISD::STRICT_FADD, dl, { DestVT, MVT::Other },
|
||||
{ Tmp1.getValue(1), Tmp1, FudgeInReg });
|
||||
Chain = Result.getValue(1);
|
||||
return Result;
|
||||
}
|
||||
|
||||
return DAG.getNode(ISD::FADD, dl, DestVT, Tmp1, FudgeInReg);
|
||||
}
|
||||
|
||||
@ -2450,9 +2485,16 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue Op0,
|
||||
/// we promote it. At this point, we know that the result and operand types are
|
||||
/// legal for the target, and that there is a legal UINT_TO_FP or SINT_TO_FP
|
||||
/// operation that takes a larger input.
|
||||
SDValue SelectionDAGLegalize::PromoteLegalINT_TO_FP(SDValue LegalOp, EVT DestVT,
|
||||
bool isSigned,
|
||||
const SDLoc &dl) {
|
||||
void SelectionDAGLegalize::PromoteLegalINT_TO_FP(
|
||||
SDNode *N, const SDLoc &dl, SmallVectorImpl<SDValue> &Results) {
|
||||
bool IsStrict = N->isStrictFPOpcode();
|
||||
bool IsSigned = N->getOpcode() == ISD::SINT_TO_FP ||
|
||||
N->getOpcode() == ISD::STRICT_SINT_TO_FP;
|
||||
EVT DestVT = N->getValueType(0);
|
||||
SDValue LegalOp = N->getOperand(IsStrict ? 1 : 0);
|
||||
unsigned UIntOp = IsStrict ? ISD::STRICT_UINT_TO_FP : ISD::UINT_TO_FP;
|
||||
unsigned SIntOp = IsStrict ? ISD::STRICT_SINT_TO_FP : ISD::SINT_TO_FP;
|
||||
|
||||
// First step, figure out the appropriate *INT_TO_FP operation to use.
|
||||
EVT NewInTy = LegalOp.getValueType();
|
||||
|
||||
@ -2464,15 +2506,16 @@ SDValue SelectionDAGLegalize::PromoteLegalINT_TO_FP(SDValue LegalOp, EVT DestVT,
|
||||
assert(NewInTy.isInteger() && "Ran out of possibilities!");
|
||||
|
||||
// If the target supports SINT_TO_FP of this type, use it.
|
||||
if (TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, NewInTy)) {
|
||||
OpToUse = ISD::SINT_TO_FP;
|
||||
if (TLI.isOperationLegalOrCustom(SIntOp, NewInTy)) {
|
||||
OpToUse = SIntOp;
|
||||
break;
|
||||
}
|
||||
if (isSigned) continue;
|
||||
if (IsSigned)
|
||||
continue;
|
||||
|
||||
// If the target supports UINT_TO_FP of this type, use it.
|
||||
if (TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, NewInTy)) {
|
||||
OpToUse = ISD::UINT_TO_FP;
|
||||
if (TLI.isOperationLegalOrCustom(UIntOp, NewInTy)) {
|
||||
OpToUse = UIntOp;
|
||||
break;
|
||||
}
|
||||
|
||||
@ -2481,9 +2524,20 @@ SDValue SelectionDAGLegalize::PromoteLegalINT_TO_FP(SDValue LegalOp, EVT DestVT,
|
||||
|
||||
// Okay, we found the operation and type to use. Zero extend our input to the
|
||||
// desired type then run the operation on it.
|
||||
return DAG.getNode(OpToUse, dl, DestVT,
|
||||
DAG.getNode(isSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
|
||||
dl, NewInTy, LegalOp));
|
||||
if (IsStrict) {
|
||||
SDValue Res =
|
||||
DAG.getNode(OpToUse, dl, {DestVT, MVT::Other},
|
||||
{N->getOperand(0),
|
||||
DAG.getNode(IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
|
||||
dl, NewInTy, LegalOp)});
|
||||
Results.push_back(Res);
|
||||
Results.push_back(Res.getValue(1));
|
||||
}
|
||||
|
||||
Results.push_back(
|
||||
DAG.getNode(OpToUse, dl, DestVT,
|
||||
DAG.getNode(IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
|
||||
dl, NewInTy, LegalOp)));
|
||||
}
|
||||
|
||||
/// This function is responsible for legalizing a
|
||||
@ -2899,15 +2953,20 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
|
||||
break;
|
||||
}
|
||||
case ISD::UINT_TO_FP:
|
||||
if (TLI.expandUINT_TO_FP(Node, Tmp1, DAG)) {
|
||||
case ISD::STRICT_UINT_TO_FP:
|
||||
if (TLI.expandUINT_TO_FP(Node, Tmp1, Tmp2, DAG)) {
|
||||
Results.push_back(Tmp1);
|
||||
if (Node->isStrictFPOpcode())
|
||||
Results.push_back(Tmp2);
|
||||
break;
|
||||
}
|
||||
LLVM_FALLTHROUGH;
|
||||
case ISD::SINT_TO_FP:
|
||||
Tmp1 = ExpandLegalINT_TO_FP(Node->getOpcode() == ISD::SINT_TO_FP,
|
||||
Node->getOperand(0), Node->getValueType(0), dl);
|
||||
case ISD::STRICT_SINT_TO_FP:
|
||||
Tmp1 = ExpandLegalINT_TO_FP(Node, Tmp2);
|
||||
Results.push_back(Tmp1);
|
||||
if (Node->isStrictFPOpcode())
|
||||
Results.push_back(Tmp2);
|
||||
break;
|
||||
case ISD::FP_TO_SINT:
|
||||
if (TLI.expandFP_TO_SINT(Node, Tmp1, DAG))
|
||||
@ -4194,6 +4253,9 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
|
||||
Node->getOpcode() == ISD::INSERT_VECTOR_ELT) {
|
||||
OVT = Node->getOperand(0).getSimpleValueType();
|
||||
}
|
||||
if (Node->getOpcode() == ISD::STRICT_UINT_TO_FP ||
|
||||
Node->getOpcode() == ISD::STRICT_SINT_TO_FP)
|
||||
OVT = Node->getOperand(1).getSimpleValueType();
|
||||
if (Node->getOpcode() == ISD::BR_CC)
|
||||
OVT = Node->getOperand(2).getSimpleValueType();
|
||||
MVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), OVT);
|
||||
@ -4248,10 +4310,10 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
|
||||
PromoteLegalFP_TO_INT(Node, dl, Results);
|
||||
break;
|
||||
case ISD::UINT_TO_FP:
|
||||
case ISD::STRICT_UINT_TO_FP:
|
||||
case ISD::SINT_TO_FP:
|
||||
Tmp1 = PromoteLegalINT_TO_FP(Node->getOperand(0), Node->getValueType(0),
|
||||
Node->getOpcode() == ISD::SINT_TO_FP, dl);
|
||||
Results.push_back(Tmp1);
|
||||
case ISD::STRICT_SINT_TO_FP:
|
||||
PromoteLegalINT_TO_FP(Node, dl, Results);
|
||||
break;
|
||||
case ISD::VAARG: {
|
||||
SDValue Chain = Node->getOperand(0); // Get the chain.
|
||||
|
@ -307,23 +307,27 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
|
||||
return TranslateLegalizeResults(Op, Result);
|
||||
|
||||
TargetLowering::LegalizeAction Action = TargetLowering::Legal;
|
||||
EVT ValVT;
|
||||
switch (Op.getOpcode()) {
|
||||
default:
|
||||
return TranslateLegalizeResults(Op, Result);
|
||||
#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
|
||||
case ISD::STRICT_##DAGN:
|
||||
#include "llvm/IR/ConstrainedOps.def"
|
||||
Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
|
||||
ValVT = Node->getValueType(0);
|
||||
if (Op.getOpcode() == ISD::STRICT_SINT_TO_FP ||
|
||||
Op.getOpcode() == ISD::STRICT_UINT_TO_FP)
|
||||
ValVT = Node->getOperand(1).getValueType();
|
||||
Action = TLI.getOperationAction(Node->getOpcode(), ValVT);
|
||||
// If we're asked to expand a strict vector floating-point operation,
|
||||
// by default we're going to simply unroll it. That is usually the
|
||||
// best approach, except in the case where the resulting strict (scalar)
|
||||
// operations would themselves use the fallback mutation to non-strict.
|
||||
// In that specific case, just do the fallback on the vector op.
|
||||
if (Action == TargetLowering::Expand && !TLI.isStrictFPEnabled() &&
|
||||
TLI.getStrictFPOperationAction(Node->getOpcode(),
|
||||
Node->getValueType(0))
|
||||
== TargetLowering::Legal) {
|
||||
EVT EltVT = Node->getValueType(0).getVectorElementType();
|
||||
TLI.getStrictFPOperationAction(Node->getOpcode(), ValVT) ==
|
||||
TargetLowering::Legal) {
|
||||
EVT EltVT = ValVT.getVectorElementType();
|
||||
if (TLI.getOperationAction(Node->getOpcode(), EltVT)
|
||||
== TargetLowering::Expand &&
|
||||
TLI.getStrictFPOperationAction(Node->getOpcode(), EltVT)
|
||||
@ -1153,18 +1157,29 @@ SDValue VectorLegalizer::ExpandFP_TO_UINT(SDValue Op) {
|
||||
}
|
||||
|
||||
SDValue VectorLegalizer::ExpandUINT_TO_FLOAT(SDValue Op) {
|
||||
EVT VT = Op.getOperand(0).getValueType();
|
||||
bool IsStrict = Op.getNode()->isStrictFPOpcode();
|
||||
unsigned OpNo = IsStrict ? 1 : 0;
|
||||
SDValue Src = Op.getOperand(OpNo);
|
||||
EVT VT = Src.getValueType();
|
||||
SDLoc DL(Op);
|
||||
|
||||
// Attempt to expand using TargetLowering.
|
||||
SDValue Result;
|
||||
if (TLI.expandUINT_TO_FP(Op.getNode(), Result, DAG))
|
||||
SDValue Chain;
|
||||
if (TLI.expandUINT_TO_FP(Op.getNode(), Result, Chain, DAG)) {
|
||||
if (IsStrict)
|
||||
// Relink the chain
|
||||
DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), Chain);
|
||||
return Result;
|
||||
}
|
||||
|
||||
// Make sure that the SINT_TO_FP and SRL instructions are available.
|
||||
if (TLI.getOperationAction(ISD::SINT_TO_FP, VT) == TargetLowering::Expand ||
|
||||
TLI.getOperationAction(ISD::SRL, VT) == TargetLowering::Expand)
|
||||
return DAG.UnrollVectorOp(Op.getNode());
|
||||
if (((!IsStrict && TLI.getOperationAction(ISD::SINT_TO_FP, VT) ==
|
||||
TargetLowering::Expand) ||
|
||||
(IsStrict && TLI.getOperationAction(ISD::STRICT_SINT_TO_FP, VT) ==
|
||||
TargetLowering::Expand)) ||
|
||||
TLI.getOperationAction(ISD::SRL, VT) == TargetLowering::Expand)
|
||||
return IsStrict ? SDValue() : DAG.UnrollVectorOp(Op.getNode());
|
||||
|
||||
unsigned BW = VT.getScalarSizeInBits();
|
||||
assert((BW == 64 || BW == 32) &&
|
||||
@ -1182,8 +1197,31 @@ SDValue VectorLegalizer::ExpandUINT_TO_FLOAT(SDValue Op) {
|
||||
SDValue TWOHW = DAG.getConstantFP(1ULL << (BW / 2), DL, Op.getValueType());
|
||||
|
||||
// Clear upper part of LO, lower HI
|
||||
SDValue HI = DAG.getNode(ISD::SRL, DL, VT, Op.getOperand(0), HalfWord);
|
||||
SDValue LO = DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), HalfWordMask);
|
||||
SDValue HI = DAG.getNode(ISD::SRL, DL, VT, Src, HalfWord);
|
||||
SDValue LO = DAG.getNode(ISD::AND, DL, VT, Src, HalfWordMask);
|
||||
|
||||
if (IsStrict) {
|
||||
// Convert hi and lo to floats
|
||||
// Convert the hi part back to the upper values
|
||||
// TODO: Can any fast-math-flags be set on these nodes?
|
||||
SDValue fHI =
|
||||
DAG.getNode(ISD::STRICT_SINT_TO_FP, DL, {Op.getValueType(), MVT::Other},
|
||||
{Op.getOperand(0), HI});
|
||||
fHI = DAG.getNode(ISD::STRICT_FMUL, DL, {Op.getValueType(), MVT::Other},
|
||||
{SDValue(fHI.getNode(), 1), fHI, TWOHW});
|
||||
SDValue fLO =
|
||||
DAG.getNode(ISD::STRICT_SINT_TO_FP, DL, {Op.getValueType(), MVT::Other},
|
||||
{SDValue(fHI.getNode(), 1), LO});
|
||||
|
||||
// Add the two halves
|
||||
SDValue Result =
|
||||
DAG.getNode(ISD::STRICT_FADD, DL, {Op.getValueType(), MVT::Other},
|
||||
{SDValue(fLO.getNode(), 1), fHI, fLO});
|
||||
|
||||
// Relink the chain
|
||||
DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), SDValue(Result.getNode(), 1));
|
||||
return Result;
|
||||
}
|
||||
|
||||
// Convert hi and lo to floats
|
||||
// Convert the hi part back to the upper values
|
||||
@ -1318,7 +1356,12 @@ SDValue VectorLegalizer::ExpandFixedPointMul(SDValue Op) {
|
||||
}
|
||||
|
||||
SDValue VectorLegalizer::ExpandStrictFPOp(SDValue Op) {
|
||||
EVT VT = Op.getValueType();
|
||||
if (Op.getOpcode() == ISD::STRICT_UINT_TO_FP) {
|
||||
if (SDValue Res = ExpandUINT_TO_FLOAT(Op))
|
||||
return Res;
|
||||
}
|
||||
|
||||
EVT VT = Op.getValue(0).getValueType();
|
||||
EVT EltVT = VT.getVectorElementType();
|
||||
unsigned NumElems = VT.getVectorNumElements();
|
||||
unsigned NumOpers = Op.getNumOperands();
|
||||
|
@ -572,6 +572,8 @@ bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) {
|
||||
case ISD::UINT_TO_FP:
|
||||
Res = ScalarizeVecOp_UnaryOp(N);
|
||||
break;
|
||||
case ISD::STRICT_SINT_TO_FP:
|
||||
case ISD::STRICT_UINT_TO_FP:
|
||||
case ISD::STRICT_FP_TO_SINT:
|
||||
case ISD::STRICT_FP_TO_UINT:
|
||||
Res = ScalarizeVecOp_UnaryOp_StrictFP(N);
|
||||
@ -1931,9 +1933,12 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
|
||||
case ISD::VSELECT:
|
||||
Res = SplitVecOp_VSELECT(N, OpNo);
|
||||
break;
|
||||
case ISD::STRICT_SINT_TO_FP:
|
||||
case ISD::STRICT_UINT_TO_FP:
|
||||
case ISD::SINT_TO_FP:
|
||||
case ISD::UINT_TO_FP:
|
||||
if (N->getValueType(0).bitsLT(N->getOperand(0).getValueType()))
|
||||
if (N->getValueType(0).bitsLT(
|
||||
N->getOperand(N->isStrictFPOpcode() ? 1 : 0).getValueType()))
|
||||
Res = SplitVecOp_TruncateHelper(N);
|
||||
else
|
||||
Res = SplitVecOp_UnaryOp(N);
|
||||
@ -2494,7 +2499,8 @@ SDValue DAGTypeLegalizer::SplitVecOp_TruncateHelper(SDNode *N) {
|
||||
//
|
||||
// Without this transform, the original truncate would end up being
|
||||
// scalarized, which is pretty much always a last resort.
|
||||
SDValue InVec = N->getOperand(0);
|
||||
unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0;
|
||||
SDValue InVec = N->getOperand(OpNo);
|
||||
EVT InVT = InVec->getValueType(0);
|
||||
EVT OutVT = N->getValueType(0);
|
||||
unsigned NumElements = OutVT.getVectorNumElements();
|
||||
@ -2538,8 +2544,23 @@ SDValue DAGTypeLegalizer::SplitVecOp_TruncateHelper(SDNode *N) {
|
||||
EVT::getIntegerVT(*DAG.getContext(), InElementSize/2);
|
||||
EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), HalfElementVT,
|
||||
NumElements/2);
|
||||
SDValue HalfLo = DAG.getNode(N->getOpcode(), DL, HalfVT, InLoVec);
|
||||
SDValue HalfHi = DAG.getNode(N->getOpcode(), DL, HalfVT, InHiVec);
|
||||
|
||||
SDValue HalfLo;
|
||||
SDValue HalfHi;
|
||||
SDValue Chain;
|
||||
if (N->isStrictFPOpcode()) {
|
||||
HalfLo = DAG.getNode(N->getOpcode(), DL, {HalfVT, MVT::Other},
|
||||
{N->getOperand(0), HalfLo});
|
||||
HalfHi = DAG.getNode(N->getOpcode(), DL, {HalfVT, MVT::Other},
|
||||
{N->getOperand(0), HalfHi});
|
||||
// Legalize the chain result - switch anything that used the old chain to
|
||||
// use the new one.
|
||||
Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, HalfLo.getValue(1),
|
||||
HalfHi.getValue(1));
|
||||
} else {
|
||||
HalfLo = DAG.getNode(N->getOpcode(), DL, HalfVT, InLoVec);
|
||||
HalfHi = DAG.getNode(N->getOpcode(), DL, HalfVT, InHiVec);
|
||||
}
|
||||
// Concatenate them to get the full intermediate truncation result.
|
||||
EVT InterVT = EVT::getVectorVT(*DAG.getContext(), HalfElementVT, NumElements);
|
||||
SDValue InterVec = DAG.getNode(ISD::CONCAT_VECTORS, DL, InterVT, HalfLo,
|
||||
@ -2548,6 +2569,17 @@ SDValue DAGTypeLegalizer::SplitVecOp_TruncateHelper(SDNode *N) {
|
||||
// type. This should normally be something that ends up being legal directly,
|
||||
// but in theory if a target has very wide vectors and an annoyingly
|
||||
// restricted set of legal types, this split can chain to build things up.
|
||||
|
||||
if (N->isStrictFPOpcode()) {
|
||||
SDValue Res = DAG.getNode(
|
||||
ISD::STRICT_FP_ROUND, DL, {OutVT, MVT::Other},
|
||||
{Chain, InterVec,
|
||||
DAG.getTargetConstant(0, DL, TLI.getPointerTy(DAG.getDataLayout()))});
|
||||
// Relink the chain
|
||||
ReplaceValueWith(SDValue(N, 1), SDValue(Res.getNode(), 1));
|
||||
return Res;
|
||||
}
|
||||
|
||||
return IsFloat
|
||||
? DAG.getNode(ISD::FP_ROUND, DL, OutVT, InterVec,
|
||||
DAG.getTargetConstant(
|
||||
@ -3000,6 +3032,8 @@ SDValue DAGTypeLegalizer::WidenVecRes_StrictFP(SDNode *N) {
|
||||
case ISD::STRICT_FP_ROUND:
|
||||
case ISD::STRICT_FP_TO_SINT:
|
||||
case ISD::STRICT_FP_TO_UINT:
|
||||
case ISD::STRICT_SINT_TO_FP:
|
||||
case ISD::STRICT_UINT_TO_FP:
|
||||
return WidenVecRes_Convert_StrictFP(N);
|
||||
default:
|
||||
break;
|
||||
@ -4120,7 +4154,9 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) {
|
||||
case ISD::FP_TO_UINT:
|
||||
case ISD::STRICT_FP_TO_UINT:
|
||||
case ISD::SINT_TO_FP:
|
||||
case ISD::STRICT_SINT_TO_FP:
|
||||
case ISD::UINT_TO_FP:
|
||||
case ISD::STRICT_UINT_TO_FP:
|
||||
case ISD::TRUNCATE:
|
||||
Res = WidenVecOp_Convert(N);
|
||||
break;
|
||||
|
@ -1117,6 +1117,20 @@ SDValue SelectionDAG::getFPExtendOrRound(SDValue Op, const SDLoc &DL, EVT VT) {
|
||||
: getNode(ISD::FP_ROUND, DL, VT, Op, getIntPtrConstant(0, DL));
|
||||
}
|
||||
|
||||
std::pair<SDValue, SDValue>
|
||||
SelectionDAG::getStrictFPExtendOrRound(SDValue Op, SDValue Chain,
|
||||
const SDLoc &DL, EVT VT) {
|
||||
assert(!VT.bitsEq(Op.getValueType()) &&
|
||||
"Strict no-op FP extend/round not allowed.");
|
||||
SDValue Res =
|
||||
VT.bitsGT(Op.getValueType())
|
||||
? getNode(ISD::STRICT_FP_EXTEND, DL, {VT, MVT::Other}, {Chain, Op})
|
||||
: getNode(ISD::STRICT_FP_ROUND, DL, {VT, MVT::Other},
|
||||
{Chain, Op, getIntPtrConstant(0, DL)});
|
||||
|
||||
return std::pair<SDValue, SDValue>(Res, SDValue(Res.getNode(), 1));
|
||||
}
|
||||
|
||||
SDValue SelectionDAG::getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT) {
|
||||
return VT.bitsGT(Op.getValueType()) ?
|
||||
getNode(ISD::ANY_EXTEND, DL, VT, Op) :
|
||||
|
@ -326,7 +326,9 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
|
||||
case ISD::STRICT_FP_EXTEND: return "strict_fp_extend";
|
||||
|
||||
case ISD::SINT_TO_FP: return "sint_to_fp";
|
||||
case ISD::STRICT_SINT_TO_FP: return "strict_sint_to_fp";
|
||||
case ISD::UINT_TO_FP: return "uint_to_fp";
|
||||
case ISD::STRICT_UINT_TO_FP: return "strict_uint_to_fp";
|
||||
case ISD::FP_TO_SINT: return "fp_to_sint";
|
||||
case ISD::STRICT_FP_TO_SINT: return "strict_fp_to_sint";
|
||||
case ISD::FP_TO_UINT: return "fp_to_uint";
|
||||
|
@ -6116,8 +6116,10 @@ bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result,
|
||||
}
|
||||
|
||||
bool TargetLowering::expandUINT_TO_FP(SDNode *Node, SDValue &Result,
|
||||
SDValue &Chain,
|
||||
SelectionDAG &DAG) const {
|
||||
SDValue Src = Node->getOperand(0);
|
||||
unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
|
||||
SDValue Src = Node->getOperand(OpNo);
|
||||
EVT SrcVT = Src.getValueType();
|
||||
EVT DstVT = Node->getValueType(0);
|
||||
|
||||
@ -6140,7 +6142,13 @@ bool TargetLowering::expandUINT_TO_FP(SDNode *Node, SDValue &Result,
|
||||
|
||||
// For unsigned conversions, convert them to signed conversions using the
|
||||
// algorithm from the x86_64 __floatundidf in compiler_rt.
|
||||
SDValue Fast = DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, Src);
|
||||
SDValue Fast;
|
||||
if (Node->isStrictFPOpcode()) {
|
||||
Fast = DAG.getNode(ISD::STRICT_SINT_TO_FP, dl, {DstVT, MVT::Other},
|
||||
{Node->getOperand(0), Src});
|
||||
Chain = SDValue(Fast.getNode(), 1);
|
||||
} else
|
||||
Fast = DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, Src);
|
||||
|
||||
SDValue ShiftConst = DAG.getConstant(1, dl, ShiftVT);
|
||||
SDValue Shr = DAG.getNode(ISD::SRL, dl, SrcVT, Src, ShiftConst);
|
||||
@ -6148,8 +6156,17 @@ bool TargetLowering::expandUINT_TO_FP(SDNode *Node, SDValue &Result,
|
||||
SDValue And = DAG.getNode(ISD::AND, dl, SrcVT, Src, AndConst);
|
||||
SDValue Or = DAG.getNode(ISD::OR, dl, SrcVT, And, Shr);
|
||||
|
||||
SDValue SignCvt = DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, Or);
|
||||
SDValue Slow = DAG.getNode(ISD::FADD, dl, DstVT, SignCvt, SignCvt);
|
||||
SDValue Slow;
|
||||
if (Node->isStrictFPOpcode()) {
|
||||
SDValue SignCvt = DAG.getNode(ISD::STRICT_SINT_TO_FP, dl,
|
||||
{DstVT, MVT::Other}, {Chain, Or});
|
||||
Slow = DAG.getNode(ISD::STRICT_FADD, dl, { DstVT, MVT::Other },
|
||||
{ SignCvt.getValue(1), SignCvt, SignCvt });
|
||||
Chain = Slow.getValue(1);
|
||||
} else {
|
||||
SDValue SignCvt = DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, Or);
|
||||
Slow = DAG.getNode(ISD::FADD, dl, DstVT, SignCvt, SignCvt);
|
||||
}
|
||||
|
||||
// TODO: This really should be implemented using a branch rather than a
|
||||
// select. We happen to get lucky and machinesink does the right
|
||||
@ -6192,8 +6209,18 @@ bool TargetLowering::expandUINT_TO_FP(SDNode *Node, SDValue &Result,
|
||||
SDValue HiOr = DAG.getNode(ISD::OR, dl, SrcVT, Hi, TwoP84);
|
||||
SDValue LoFlt = DAG.getBitcast(DstVT, LoOr);
|
||||
SDValue HiFlt = DAG.getBitcast(DstVT, HiOr);
|
||||
SDValue HiSub = DAG.getNode(ISD::FSUB, dl, DstVT, HiFlt, TwoP84PlusTwoP52);
|
||||
Result = DAG.getNode(ISD::FADD, dl, DstVT, LoFlt, HiSub);
|
||||
if (Node->isStrictFPOpcode()) {
|
||||
SDValue HiSub =
|
||||
DAG.getNode(ISD::STRICT_FSUB, dl, {DstVT, MVT::Other},
|
||||
{Node->getOperand(0), HiFlt, TwoP84PlusTwoP52});
|
||||
Result = DAG.getNode(ISD::STRICT_FADD, dl, {DstVT, MVT::Other},
|
||||
{HiSub.getValue(1), LoFlt, HiSub});
|
||||
Chain = Result.getValue(1);
|
||||
} else {
|
||||
SDValue HiSub =
|
||||
DAG.getNode(ISD::FSUB, dl, DstVT, HiFlt, TwoP84PlusTwoP52);
|
||||
Result = DAG.getNode(ISD::FADD, dl, DstVT, LoFlt, HiSub);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -4804,6 +4804,28 @@ void Verifier::visitConstrainedFPIntrinsic(ConstrainedFPIntrinsic &FPI) {
|
||||
}
|
||||
break;
|
||||
|
||||
case Intrinsic::experimental_constrained_sitofp:
|
||||
case Intrinsic::experimental_constrained_uitofp: {
|
||||
Value *Operand = FPI.getArgOperand(0);
|
||||
uint64_t NumSrcElem = 0;
|
||||
Assert(Operand->getType()->isIntOrIntVectorTy(),
|
||||
"Intrinsic first argument must be integer", &FPI);
|
||||
if (auto *OperandT = dyn_cast<VectorType>(Operand->getType())) {
|
||||
NumSrcElem = OperandT->getNumElements();
|
||||
}
|
||||
|
||||
Operand = &FPI;
|
||||
Assert((NumSrcElem > 0) == Operand->getType()->isVectorTy(),
|
||||
"Intrinsic first argument and result disagree on vector use", &FPI);
|
||||
Assert(Operand->getType()->isFPOrFPVectorTy(),
|
||||
"Intrinsic result must be a floating point", &FPI);
|
||||
if (auto *OperandT = dyn_cast<VectorType>(Operand->getType())) {
|
||||
Assert(NumSrcElem == OperandT->getNumElements(),
|
||||
"Intrinsic first argument and result vector lengths must be equal",
|
||||
&FPI);
|
||||
}
|
||||
} break;
|
||||
|
||||
case Intrinsic::experimental_constrained_fptrunc:
|
||||
case Intrinsic::experimental_constrained_fpext: {
|
||||
Value *Operand = FPI.getArgOperand(0);
|
||||
|
@ -228,26 +228,34 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
|
||||
if (!Subtarget.useSoftFloat()) {
|
||||
// Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this
|
||||
// operation.
|
||||
setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
|
||||
setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
|
||||
setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i8, Promote);
|
||||
setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
|
||||
setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i16, Promote);
|
||||
// We have an algorithm for SSE2, and we turn this into a 64-bit
|
||||
// FILD or VCVTUSI2SS/SD for other targets.
|
||||
setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
|
||||
setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Custom);
|
||||
// We have an algorithm for SSE2->double, and we turn this into a
|
||||
// 64-bit FILD followed by conditional FADD for other targets.
|
||||
setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
|
||||
setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i64, Custom);
|
||||
|
||||
// Promote i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have
|
||||
// this operation.
|
||||
setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
|
||||
setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
|
||||
setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i8, Promote);
|
||||
// SSE has no i16 to fp conversion, only i32. We promote in the handler
|
||||
// to allow f80 to use i16 and f64 to use i16 with sse1 only
|
||||
setOperationAction(ISD::SINT_TO_FP, MVT::i16, Custom);
|
||||
setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i16, Custom);
|
||||
// f32 and f64 cases are Legal with SSE1/SSE2, f80 case is not
|
||||
setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
|
||||
setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Custom);
|
||||
// In 32-bit mode these are custom lowered. In 64-bit mode F32 and F64
|
||||
// are Legal, f80 is custom lowered.
|
||||
setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
|
||||
setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i64, Custom);
|
||||
|
||||
// Promote i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have
|
||||
// this operation.
|
||||
@ -985,9 +993,12 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
|
||||
setOperationAction(ISD::FP_TO_UINT, MVT::v4i16, Custom);
|
||||
|
||||
setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
|
||||
setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i32, Legal);
|
||||
setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom);
|
||||
setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2i32, Custom);
|
||||
|
||||
setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Custom);
|
||||
setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2i32, Custom);
|
||||
|
||||
// Fast v2f32 UINT_TO_FP( v2i32 ) custom conversion.
|
||||
setOperationAction(ISD::UINT_TO_FP, MVT::v2f32, Custom);
|
||||
@ -18421,8 +18432,13 @@ static SDValue LowerFunnelShift(SDValue Op, const X86Subtarget &Subtarget,
|
||||
static SDValue LowerI64IntToFP_AVX512DQ(SDValue Op, SelectionDAG &DAG,
|
||||
const X86Subtarget &Subtarget) {
|
||||
assert((Op.getOpcode() == ISD::SINT_TO_FP ||
|
||||
Op.getOpcode() == ISD::UINT_TO_FP) && "Unexpected opcode!");
|
||||
SDValue Src = Op.getOperand(0);
|
||||
Op.getOpcode() == ISD::STRICT_SINT_TO_FP ||
|
||||
Op.getOpcode() == ISD::STRICT_UINT_TO_FP ||
|
||||
Op.getOpcode() == ISD::UINT_TO_FP) &&
|
||||
"Unexpected opcode!");
|
||||
bool IsStrict = Op->isStrictFPOpcode();
|
||||
unsigned OpNo = IsStrict ? 1 : 0;
|
||||
SDValue Src = Op.getOperand(OpNo);
|
||||
MVT SrcVT = Src.getSimpleValueType();
|
||||
MVT VT = Op.getSimpleValueType();
|
||||
|
||||
@ -18439,7 +18455,17 @@ static SDValue LowerI64IntToFP_AVX512DQ(SDValue Op, SelectionDAG &DAG,
|
||||
|
||||
SDLoc dl(Op);
|
||||
SDValue InVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VecInVT, Src);
|
||||
if (IsStrict) {
|
||||
SDValue CvtVec = DAG.getNode(Op.getOpcode(), dl, {VecVT, MVT::Other},
|
||||
{Op.getOperand(0), InVec});
|
||||
SDValue Chain = CvtVec.getValue(1);
|
||||
SDValue Value = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, CvtVec,
|
||||
DAG.getIntPtrConstant(0, dl));
|
||||
return DAG.getMergeValues({Value, Chain}, dl);
|
||||
}
|
||||
|
||||
SDValue CvtVec = DAG.getNode(Op.getOpcode(), dl, VecVT, InVec);
|
||||
|
||||
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, CvtVec,
|
||||
DAG.getIntPtrConstant(0, dl));
|
||||
}
|
||||
@ -18510,7 +18536,9 @@ static SDValue vectorizeExtractedCast(SDValue Cast, SelectionDAG &DAG,
|
||||
|
||||
SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op,
|
||||
SelectionDAG &DAG) const {
|
||||
SDValue Src = Op.getOperand(0);
|
||||
bool IsStrict = Op->isStrictFPOpcode();
|
||||
unsigned OpNo = IsStrict ? 1 : 0;
|
||||
SDValue Src = Op.getOperand(OpNo);
|
||||
MVT SrcVT = Src.getSimpleValueType();
|
||||
MVT VT = Op.getSimpleValueType();
|
||||
SDLoc dl(Op);
|
||||
@ -18519,7 +18547,8 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op,
|
||||
return Extract;
|
||||
|
||||
if (SrcVT.isVector()) {
|
||||
if (SrcVT == MVT::v2i32 && VT == MVT::v2f64) {
|
||||
if (SrcVT == MVT::v2i32 && VT == MVT::v2f64 && !IsStrict) {
|
||||
// FIXME: A strict version of CVTSI2P is needed.
|
||||
return DAG.getNode(X86ISD::CVTSI2P, dl, VT,
|
||||
DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i32, Src,
|
||||
DAG.getUNDEF(SrcVT)));
|
||||
@ -18545,13 +18574,17 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op,
|
||||
// SSE doesn't have an i16 conversion so we need to promote.
|
||||
if (SrcVT == MVT::i16 && (UseSSEReg || VT == MVT::f128)) {
|
||||
SDValue Ext = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i32, Src);
|
||||
if (IsStrict)
|
||||
return DAG.getNode(ISD::STRICT_SINT_TO_FP, dl, {VT, MVT::Other},
|
||||
{Op.getOperand(0), Ext});
|
||||
|
||||
return DAG.getNode(ISD::SINT_TO_FP, dl, VT, Ext);
|
||||
}
|
||||
|
||||
if (VT == MVT::f128)
|
||||
return LowerF128Call(Op, DAG, RTLIB::getSINTTOFP(SrcVT, VT));
|
||||
|
||||
SDValue ValueToStore = Op.getOperand(0);
|
||||
SDValue ValueToStore = Src;
|
||||
if (SrcVT == MVT::i64 && UseSSEReg && !Subtarget.is64Bit())
|
||||
// Bitcasting to f64 here allows us to do a single 64-bit store from
|
||||
// an SSE register, avoiding the store forwarding penalty that would come
|
||||
@ -18563,10 +18596,16 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op,
|
||||
auto PtrVT = getPointerTy(MF.getDataLayout());
|
||||
int SSFI = MF.getFrameInfo().CreateStackObject(Size, Size, false);
|
||||
SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
|
||||
SDValue Chain = DAG.getStore(
|
||||
DAG.getEntryNode(), dl, ValueToStore, StackSlot,
|
||||
SDValue Chain = IsStrict ? Op->getOperand(0) : DAG.getEntryNode();
|
||||
Chain = DAG.getStore(
|
||||
Chain, dl, ValueToStore, StackSlot,
|
||||
MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SSFI));
|
||||
return BuildFILD(Op, SrcVT, Chain, StackSlot, DAG).first;
|
||||
std::pair<SDValue, SDValue> Tmp = BuildFILD(Op, SrcVT, Chain, StackSlot, DAG);
|
||||
|
||||
if (IsStrict)
|
||||
return DAG.getMergeValues({Tmp.first, Tmp.second}, dl);
|
||||
|
||||
return Tmp.first;
|
||||
}
|
||||
|
||||
std::pair<SDValue, SDValue> X86TargetLowering::BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain,
|
||||
@ -18654,6 +18693,8 @@ static SDValue LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG,
|
||||
#endif
|
||||
*/
|
||||
|
||||
bool IsStrict = Op->isStrictFPOpcode();
|
||||
unsigned OpNo = IsStrict ? 1 : 0;
|
||||
SDLoc dl(Op);
|
||||
LLVMContext *Context = DAG.getContext();
|
||||
|
||||
@ -18674,8 +18715,8 @@ static SDValue LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG,
|
||||
SDValue CPIdx1 = DAG.getConstantPool(C1, PtrVT, 16);
|
||||
|
||||
// Load the 64-bit value into an XMM register.
|
||||
SDValue XR1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64,
|
||||
Op.getOperand(0));
|
||||
SDValue XR1 =
|
||||
DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Op.getOperand(OpNo));
|
||||
SDValue CLod0 =
|
||||
DAG.getLoad(MVT::v4i32, dl, DAG.getEntryNode(), CPIdx0,
|
||||
MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
|
||||
@ -18688,32 +18729,50 @@ static SDValue LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG,
|
||||
MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
|
||||
/* Alignment = */ 16);
|
||||
SDValue XR2F = DAG.getBitcast(MVT::v2f64, Unpck1);
|
||||
SDValue Sub;
|
||||
SDValue Chain;
|
||||
// TODO: Are there any fast-math-flags to propagate here?
|
||||
SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::v2f64, XR2F, CLod1);
|
||||
if (IsStrict) {
|
||||
Sub = DAG.getNode(ISD::STRICT_FSUB, dl, {MVT::v2f64, MVT::Other},
|
||||
{Op.getOperand(0), XR2F, CLod1});
|
||||
Chain = Sub.getValue(1);
|
||||
} else
|
||||
Sub = DAG.getNode(ISD::FSUB, dl, MVT::v2f64, XR2F, CLod1);
|
||||
SDValue Result;
|
||||
|
||||
if (Subtarget.hasSSE3() && shouldUseHorizontalOp(true, DAG, Subtarget)) {
|
||||
if (!IsStrict && Subtarget.hasSSE3() &&
|
||||
shouldUseHorizontalOp(true, DAG, Subtarget)) {
|
||||
// FIXME: Do we need a STRICT version of FHADD?
|
||||
Result = DAG.getNode(X86ISD::FHADD, dl, MVT::v2f64, Sub, Sub);
|
||||
} else {
|
||||
SDValue Shuffle = DAG.getVectorShuffle(MVT::v2f64, dl, Sub, Sub, {1,-1});
|
||||
Result = DAG.getNode(ISD::FADD, dl, MVT::v2f64, Shuffle, Sub);
|
||||
if (IsStrict) {
|
||||
Result = DAG.getNode(ISD::STRICT_FADD, dl, {MVT::v2f64, MVT::Other},
|
||||
{Chain, Shuffle, Sub});
|
||||
Chain = Result.getValue(1);
|
||||
} else
|
||||
Result = DAG.getNode(ISD::FADD, dl, MVT::v2f64, Shuffle, Sub);
|
||||
}
|
||||
Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Result,
|
||||
DAG.getIntPtrConstant(0, dl));
|
||||
if (IsStrict)
|
||||
return DAG.getMergeValues({Result, Chain}, dl);
|
||||
|
||||
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Result,
|
||||
DAG.getIntPtrConstant(0, dl));
|
||||
return Result;
|
||||
}
|
||||
|
||||
/// 32-bit unsigned integer to float expansion.
|
||||
static SDValue LowerUINT_TO_FP_i32(SDValue Op, SelectionDAG &DAG,
|
||||
const X86Subtarget &Subtarget) {
|
||||
unsigned OpNo = Op.getNode()->isStrictFPOpcode() ? 1 : 0;
|
||||
SDLoc dl(Op);
|
||||
// FP constant to bias correct the final result.
|
||||
SDValue Bias = DAG.getConstantFP(BitsToDouble(0x4330000000000000ULL), dl,
|
||||
MVT::f64);
|
||||
|
||||
// Load the 32-bit value into an XMM register.
|
||||
SDValue Load = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32,
|
||||
Op.getOperand(0));
|
||||
SDValue Load =
|
||||
DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, Op.getOperand(OpNo));
|
||||
|
||||
// Zero out the upper parts of the register.
|
||||
Load = getShuffleVectorZeroOrUndef(Load, 0, true, Subtarget, DAG);
|
||||
@ -18733,6 +18792,23 @@ static SDValue LowerUINT_TO_FP_i32(SDValue Op, SelectionDAG &DAG,
|
||||
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64,
|
||||
DAG.getBitcast(MVT::v2f64, Or), DAG.getIntPtrConstant(0, dl));
|
||||
|
||||
if (Op.getNode()->isStrictFPOpcode()) {
|
||||
// Subtract the bias.
|
||||
// TODO: Are there any fast-math-flags to propagate here?
|
||||
SDValue Chain = Op.getOperand(0);
|
||||
SDValue Sub = DAG.getNode(ISD::STRICT_FSUB, dl, {MVT::f64, MVT::Other},
|
||||
{Chain, Or, Bias});
|
||||
|
||||
if (Op.getValueType() == Sub.getValueType())
|
||||
return Sub;
|
||||
|
||||
// Handle final rounding.
|
||||
std::pair<SDValue, SDValue> ResultPair = DAG.getStrictFPExtendOrRound(
|
||||
Sub, Sub.getValue(1), dl, Op.getSimpleValueType());
|
||||
|
||||
return DAG.getMergeValues({ResultPair.first, ResultPair.second}, dl);
|
||||
}
|
||||
|
||||
// Subtract the bias.
|
||||
// TODO: Are there any fast-math-flags to propagate here?
|
||||
SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::f64, Or, Bias);
|
||||
@ -18747,6 +18823,10 @@ static SDValue lowerUINT_TO_FP_v2i32(SDValue Op, SelectionDAG &DAG,
|
||||
if (Op.getSimpleValueType() != MVT::v2f64)
|
||||
return SDValue();
|
||||
|
||||
// FIXME: Need to fix the lack of StrictFP support here.
|
||||
if (Op.getNode()->isStrictFPOpcode())
|
||||
return SDValue();
|
||||
|
||||
SDValue N0 = Op.getOperand(0);
|
||||
assert(N0.getSimpleValueType() == MVT::v2i32 && "Unexpected input type");
|
||||
|
||||
@ -18873,7 +18953,8 @@ static SDValue lowerUINT_TO_FP_vXi32(SDValue Op, SelectionDAG &DAG,
|
||||
|
||||
static SDValue lowerUINT_TO_FP_vec(SDValue Op, SelectionDAG &DAG,
|
||||
const X86Subtarget &Subtarget) {
|
||||
SDValue N0 = Op.getOperand(0);
|
||||
unsigned OpNo = Op.getNode()->isStrictFPOpcode() ? 1 : 0;
|
||||
SDValue N0 = Op.getOperand(OpNo);
|
||||
MVT SrcVT = N0.getSimpleValueType();
|
||||
SDLoc dl(Op);
|
||||
|
||||
@ -18891,11 +18972,14 @@ static SDValue lowerUINT_TO_FP_vec(SDValue Op, SelectionDAG &DAG,
|
||||
|
||||
SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
|
||||
SelectionDAG &DAG) const {
|
||||
SDValue N0 = Op.getOperand(0);
|
||||
bool IsStrict = Op->isStrictFPOpcode();
|
||||
unsigned OpNo = IsStrict ? 1 : 0;
|
||||
SDValue Src = Op.getOperand(OpNo);
|
||||
SDLoc dl(Op);
|
||||
auto PtrVT = getPointerTy(DAG.getDataLayout());
|
||||
MVT SrcVT = N0.getSimpleValueType();
|
||||
MVT SrcVT = Src.getSimpleValueType();
|
||||
MVT DstVT = Op.getSimpleValueType();
|
||||
SDValue Chain = IsStrict ? Op.getOperand(0) : DAG.getEntryNode();
|
||||
|
||||
if (DstVT == MVT::f128)
|
||||
return LowerF128Call(Op, DAG, RTLIB::getUINTTOFP(SrcVT, DstVT));
|
||||
@ -18915,8 +18999,11 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
|
||||
|
||||
// Promote i32 to i64 and use a signed conversion on 64-bit targets.
|
||||
if (SrcVT == MVT::i32 && Subtarget.is64Bit()) {
|
||||
N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, N0);
|
||||
return DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, N0);
|
||||
Src = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, Src);
|
||||
if (IsStrict)
|
||||
return DAG.getNode(ISD::STRICT_SINT_TO_FP, dl, {DstVT, MVT::Other},
|
||||
{Chain, Src});
|
||||
return DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, Src);
|
||||
}
|
||||
|
||||
if (SDValue V = LowerI64IntToFP_AVX512DQ(Op, DAG, Subtarget))
|
||||
@ -18933,22 +19020,28 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
|
||||
SDValue StackSlot = DAG.CreateStackTemporary(MVT::i64);
|
||||
if (SrcVT == MVT::i32) {
|
||||
SDValue OffsetSlot = DAG.getMemBasePlusOffset(StackSlot, 4, dl);
|
||||
SDValue Store1 = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0),
|
||||
StackSlot, MachinePointerInfo());
|
||||
SDValue Store1 =
|
||||
DAG.getStore(Chain, dl, Src, StackSlot, MachinePointerInfo());
|
||||
SDValue Store2 = DAG.getStore(Store1, dl, DAG.getConstant(0, dl, MVT::i32),
|
||||
OffsetSlot, MachinePointerInfo());
|
||||
return BuildFILD(Op, MVT::i64, Store2, StackSlot, DAG).first;
|
||||
std::pair<SDValue, SDValue> Tmp =
|
||||
BuildFILD(Op, MVT::i64, Store2, StackSlot, DAG);
|
||||
if (IsStrict)
|
||||
return DAG.getMergeValues({Tmp.first, Tmp.second}, dl);
|
||||
|
||||
return Tmp.first;
|
||||
}
|
||||
|
||||
assert(SrcVT == MVT::i64 && "Unexpected type in UINT_TO_FP");
|
||||
SDValue ValueToStore = Op.getOperand(0);
|
||||
if (isScalarFPTypeInSSEReg(Op.getValueType()) && !Subtarget.is64Bit())
|
||||
SDValue ValueToStore = Src;
|
||||
if (isScalarFPTypeInSSEReg(Op.getValueType()) && !Subtarget.is64Bit()) {
|
||||
// Bitcasting to f64 here allows us to do a single 64-bit store from
|
||||
// an SSE register, avoiding the store forwarding penalty that would come
|
||||
// with two 32-bit stores.
|
||||
ValueToStore = DAG.getBitcast(MVT::f64, ValueToStore);
|
||||
SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, ValueToStore, StackSlot,
|
||||
MachinePointerInfo());
|
||||
}
|
||||
SDValue Store =
|
||||
DAG.getStore(Chain, dl, ValueToStore, StackSlot, MachinePointerInfo());
|
||||
// For i64 source, we need to add the appropriate power of 2 if the input
|
||||
// was negative. This is the same as the optimization in
|
||||
// DAGTypeLegalizer::ExpandIntOp_UNIT_TO_FP, and for it to be safe here,
|
||||
@ -18963,13 +19056,14 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
|
||||
SDValue Ops[] = { Store, StackSlot };
|
||||
SDValue Fild = DAG.getMemIntrinsicNode(X86ISD::FILD, dl, Tys, Ops,
|
||||
MVT::i64, MMO);
|
||||
Chain = Fild.getValue(1);
|
||||
|
||||
APInt FF(32, 0x5F800000ULL);
|
||||
|
||||
// Check whether the sign bit is set.
|
||||
SDValue SignSet = DAG.getSetCC(
|
||||
dl, getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::i64),
|
||||
Op.getOperand(0), DAG.getConstant(0, dl, MVT::i64), ISD::SETLT);
|
||||
Op.getOperand(OpNo), DAG.getConstant(0, dl, MVT::i64), ISD::SETLT);
|
||||
|
||||
// Build a 64 bit pair (0, FF) in the constant pool, with FF in the lo bits.
|
||||
SDValue FudgePtr = DAG.getConstantPool(
|
||||
@ -18984,11 +19078,18 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
|
||||
// Load the value out, extending it from f32 to f80.
|
||||
// FIXME: Avoid the extend by constructing the right constant pool?
|
||||
SDValue Fudge = DAG.getExtLoad(
|
||||
ISD::EXTLOAD, dl, MVT::f80, DAG.getEntryNode(), FudgePtr,
|
||||
ISD::EXTLOAD, dl, MVT::f80, Chain, FudgePtr,
|
||||
MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), MVT::f32,
|
||||
/* Alignment = */ 4);
|
||||
Chain = Fudge.getValue(1);
|
||||
// Extend everything to 80 bits to force it to be done on x87.
|
||||
// TODO: Are there any fast-math-flags to propagate here?
|
||||
if (IsStrict) {
|
||||
SDValue Add = DAG.getNode(ISD::STRICT_FADD, dl, {MVT::f80, MVT::Other},
|
||||
{Chain, Fild, Fudge});
|
||||
return DAG.getNode(ISD::STRICT_FP_ROUND, dl, {DstVT, MVT::Other},
|
||||
{Add.getValue(1), Add, DAG.getIntPtrConstant(0, dl)});
|
||||
}
|
||||
SDValue Add = DAG.getNode(ISD::FADD, dl, MVT::f80, Fild, Fudge);
|
||||
return DAG.getNode(ISD::FP_ROUND, dl, DstVT, Add,
|
||||
DAG.getIntPtrConstant(0, dl));
|
||||
@ -19042,10 +19143,7 @@ X86TargetLowering::FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
|
||||
int SSFI = MF.getFrameInfo().CreateStackObject(MemSize, MemSize, false);
|
||||
SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
|
||||
|
||||
if (IsStrict)
|
||||
Chain = Op.getOperand(0);
|
||||
else
|
||||
Chain = DAG.getEntryNode();
|
||||
Chain = IsStrict ? Op.getOperand(0) : DAG.getEntryNode();
|
||||
|
||||
SDValue Adjust; // 0x0 or 0x80000000, for result sign bit adjustment.
|
||||
|
||||
@ -28013,7 +28111,9 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
|
||||
case ISD::SRL_PARTS: return LowerShiftParts(Op, DAG);
|
||||
case ISD::FSHL:
|
||||
case ISD::FSHR: return LowerFunnelShift(Op, Subtarget, DAG);
|
||||
case ISD::STRICT_SINT_TO_FP:
|
||||
case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG);
|
||||
case ISD::STRICT_UINT_TO_FP:
|
||||
case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG);
|
||||
case ISD::TRUNCATE: return LowerTRUNCATE(Op, DAG);
|
||||
case ISD::ZERO_EXTEND: return LowerZERO_EXTEND(Op, Subtarget, DAG);
|
||||
|
@ -1941,6 +1941,762 @@ entry:
|
||||
ret i64 %result
|
||||
}
|
||||
|
||||
; Verify that sitofp(%x) isn't simplified when the rounding mode is
|
||||
; unknown.
|
||||
; Verify that no gross errors happen.
|
||||
define double @sifdb(i8 %x) #0 {
|
||||
; X87-LABEL: sifdb:
|
||||
; X87: # %bb.0: # %entry
|
||||
; X87-NEXT: pushl %eax
|
||||
; X87-NEXT: .cfi_def_cfa_offset 8
|
||||
; X87-NEXT: movsbl {{[0-9]+}}(%esp), %eax
|
||||
; X87-NEXT: movw %ax, {{[0-9]+}}(%esp)
|
||||
; X87-NEXT: filds {{[0-9]+}}(%esp)
|
||||
; X87-NEXT: popl %eax
|
||||
; X87-NEXT: .cfi_def_cfa_offset 4
|
||||
; X87-NEXT: retl
|
||||
;
|
||||
; X86-SSE-LABEL: sifdb:
|
||||
; X86-SSE: # %bb.0: # %entry
|
||||
; X86-SSE-NEXT: subl $12, %esp
|
||||
; X86-SSE-NEXT: .cfi_def_cfa_offset 16
|
||||
; X86-SSE-NEXT: movsbl {{[0-9]+}}(%esp), %eax
|
||||
; X86-SSE-NEXT: cvtsi2sd %eax, %xmm0
|
||||
; X86-SSE-NEXT: movsd %xmm0, (%esp)
|
||||
; X86-SSE-NEXT: fldl (%esp)
|
||||
; X86-SSE-NEXT: addl $12, %esp
|
||||
; X86-SSE-NEXT: .cfi_def_cfa_offset 4
|
||||
; X86-SSE-NEXT: retl
|
||||
;
|
||||
; SSE-LABEL: sifdb:
|
||||
; SSE: # %bb.0: # %entry
|
||||
; SSE-NEXT: movsbl %dil, %eax
|
||||
; SSE-NEXT: cvtsi2sd %eax, %xmm0
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: sifdb:
|
||||
; AVX: # %bb.0: # %entry
|
||||
; AVX-NEXT: movsbl %dil, %eax
|
||||
; AVX-NEXT: vcvtsi2sd %eax, %xmm0, %xmm0
|
||||
; AVX-NEXT: retq
|
||||
entry:
|
||||
%result = call double @llvm.experimental.constrained.sitofp.f64.i8(i8 %x,
|
||||
metadata !"round.dynamic",
|
||||
metadata !"fpexcept.strict") #0
|
||||
ret double %result
|
||||
}
|
||||
|
||||
define double @sifdw(i16 %x) #0 {
|
||||
; X87-LABEL: sifdw:
|
||||
; X87: # %bb.0: # %entry
|
||||
; X87-NEXT: pushl %eax
|
||||
; X87-NEXT: .cfi_def_cfa_offset 8
|
||||
; X87-NEXT: movzwl {{[0-9]+}}(%esp), %eax
|
||||
; X87-NEXT: movw %ax, {{[0-9]+}}(%esp)
|
||||
; X87-NEXT: filds {{[0-9]+}}(%esp)
|
||||
; X87-NEXT: popl %eax
|
||||
; X87-NEXT: .cfi_def_cfa_offset 4
|
||||
; X87-NEXT: retl
|
||||
;
|
||||
; X86-SSE-LABEL: sifdw:
|
||||
; X86-SSE: # %bb.0: # %entry
|
||||
; X86-SSE-NEXT: subl $12, %esp
|
||||
; X86-SSE-NEXT: .cfi_def_cfa_offset 16
|
||||
; X86-SSE-NEXT: movswl {{[0-9]+}}(%esp), %eax
|
||||
; X86-SSE-NEXT: cvtsi2sd %eax, %xmm0
|
||||
; X86-SSE-NEXT: movsd %xmm0, (%esp)
|
||||
; X86-SSE-NEXT: fldl (%esp)
|
||||
; X86-SSE-NEXT: addl $12, %esp
|
||||
; X86-SSE-NEXT: .cfi_def_cfa_offset 4
|
||||
; X86-SSE-NEXT: retl
|
||||
;
|
||||
; SSE-LABEL: sifdw:
|
||||
; SSE: # %bb.0: # %entry
|
||||
; SSE-NEXT: movswl %di, %eax
|
||||
; SSE-NEXT: cvtsi2sd %eax, %xmm0
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: sifdw:
|
||||
; AVX: # %bb.0: # %entry
|
||||
; AVX-NEXT: movswl %di, %eax
|
||||
; AVX-NEXT: vcvtsi2sd %eax, %xmm0, %xmm0
|
||||
; AVX-NEXT: retq
|
||||
entry:
|
||||
%result = call double @llvm.experimental.constrained.sitofp.f64.i16(i16 %x,
|
||||
metadata !"round.dynamic",
|
||||
metadata !"fpexcept.strict") #0
|
||||
ret double %result
|
||||
}
|
||||
|
||||
define double @sifdi(i32 %x) #0 {
|
||||
; X87-LABEL: sifdi:
|
||||
; X87: # %bb.0: # %entry
|
||||
; X87-NEXT: pushl %eax
|
||||
; X87-NEXT: .cfi_def_cfa_offset 8
|
||||
; X87-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X87-NEXT: movl %eax, (%esp)
|
||||
; X87-NEXT: fildl (%esp)
|
||||
; X87-NEXT: popl %eax
|
||||
; X87-NEXT: .cfi_def_cfa_offset 4
|
||||
; X87-NEXT: retl
|
||||
;
|
||||
; X86-SSE-LABEL: sifdi:
|
||||
; X86-SSE: # %bb.0: # %entry
|
||||
; X86-SSE-NEXT: subl $12, %esp
|
||||
; X86-SSE-NEXT: .cfi_def_cfa_offset 16
|
||||
; X86-SSE-NEXT: cvtsi2sdl {{[0-9]+}}(%esp), %xmm0
|
||||
; X86-SSE-NEXT: movsd %xmm0, (%esp)
|
||||
; X86-SSE-NEXT: fldl (%esp)
|
||||
; X86-SSE-NEXT: addl $12, %esp
|
||||
; X86-SSE-NEXT: .cfi_def_cfa_offset 4
|
||||
; X86-SSE-NEXT: retl
|
||||
;
|
||||
; SSE-LABEL: sifdi:
|
||||
; SSE: # %bb.0: # %entry
|
||||
; SSE-NEXT: cvtsi2sd %edi, %xmm0
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: sifdi:
|
||||
; AVX: # %bb.0: # %entry
|
||||
; AVX-NEXT: vcvtsi2sd %edi, %xmm0, %xmm0
|
||||
; AVX-NEXT: retq
|
||||
entry:
|
||||
%result = call double @llvm.experimental.constrained.sitofp.f64.i32(i32 %x,
|
||||
metadata !"round.dynamic",
|
||||
metadata !"fpexcept.strict") #0
|
||||
ret double %result
|
||||
}
|
||||
|
||||
define float @siffb(i8 %x) #0 {
|
||||
; X87-LABEL: siffb:
|
||||
; X87: # %bb.0: # %entry
|
||||
; X87-NEXT: pushl %eax
|
||||
; X87-NEXT: .cfi_def_cfa_offset 8
|
||||
; X87-NEXT: movsbl {{[0-9]+}}(%esp), %eax
|
||||
; X87-NEXT: movw %ax, {{[0-9]+}}(%esp)
|
||||
; X87-NEXT: filds {{[0-9]+}}(%esp)
|
||||
; X87-NEXT: popl %eax
|
||||
; X87-NEXT: .cfi_def_cfa_offset 4
|
||||
; X87-NEXT: retl
|
||||
;
|
||||
; X86-SSE-LABEL: siffb:
|
||||
; X86-SSE: # %bb.0: # %entry
|
||||
; X86-SSE-NEXT: pushl %eax
|
||||
; X86-SSE-NEXT: .cfi_def_cfa_offset 8
|
||||
; X86-SSE-NEXT: movsbl {{[0-9]+}}(%esp), %eax
|
||||
; X86-SSE-NEXT: cvtsi2ss %eax, %xmm0
|
||||
; X86-SSE-NEXT: movss %xmm0, (%esp)
|
||||
; X86-SSE-NEXT: flds (%esp)
|
||||
; X86-SSE-NEXT: popl %eax
|
||||
; X86-SSE-NEXT: .cfi_def_cfa_offset 4
|
||||
; X86-SSE-NEXT: retl
|
||||
;
|
||||
; SSE-LABEL: siffb:
|
||||
; SSE: # %bb.0: # %entry
|
||||
; SSE-NEXT: movsbl %dil, %eax
|
||||
; SSE-NEXT: cvtsi2ss %eax, %xmm0
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: siffb:
|
||||
; AVX: # %bb.0: # %entry
|
||||
; AVX-NEXT: movsbl %dil, %eax
|
||||
; AVX-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0
|
||||
; AVX-NEXT: retq
|
||||
entry:
|
||||
%result = call float @llvm.experimental.constrained.sitofp.f32.i8(i8 %x,
|
||||
metadata !"round.dynamic",
|
||||
metadata !"fpexcept.strict") #0
|
||||
ret float %result
|
||||
}
|
||||
|
||||
define float @siffw(i16 %x) #0 {
|
||||
; X87-LABEL: siffw:
|
||||
; X87: # %bb.0: # %entry
|
||||
; X87-NEXT: pushl %eax
|
||||
; X87-NEXT: .cfi_def_cfa_offset 8
|
||||
; X87-NEXT: movzwl {{[0-9]+}}(%esp), %eax
|
||||
; X87-NEXT: movw %ax, {{[0-9]+}}(%esp)
|
||||
; X87-NEXT: filds {{[0-9]+}}(%esp)
|
||||
; X87-NEXT: popl %eax
|
||||
; X87-NEXT: .cfi_def_cfa_offset 4
|
||||
; X87-NEXT: retl
|
||||
;
|
||||
; X86-SSE-LABEL: siffw:
|
||||
; X86-SSE: # %bb.0: # %entry
|
||||
; X86-SSE-NEXT: pushl %eax
|
||||
; X86-SSE-NEXT: .cfi_def_cfa_offset 8
|
||||
; X86-SSE-NEXT: movswl {{[0-9]+}}(%esp), %eax
|
||||
; X86-SSE-NEXT: cvtsi2ss %eax, %xmm0
|
||||
; X86-SSE-NEXT: movss %xmm0, (%esp)
|
||||
; X86-SSE-NEXT: flds (%esp)
|
||||
; X86-SSE-NEXT: popl %eax
|
||||
; X86-SSE-NEXT: .cfi_def_cfa_offset 4
|
||||
; X86-SSE-NEXT: retl
|
||||
;
|
||||
; SSE-LABEL: siffw:
|
||||
; SSE: # %bb.0: # %entry
|
||||
; SSE-NEXT: movswl %di, %eax
|
||||
; SSE-NEXT: cvtsi2ss %eax, %xmm0
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: siffw:
|
||||
; AVX: # %bb.0: # %entry
|
||||
; AVX-NEXT: movswl %di, %eax
|
||||
; AVX-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0
|
||||
; AVX-NEXT: retq
|
||||
entry:
|
||||
%result = call float @llvm.experimental.constrained.sitofp.f32.i16(i16 %x,
|
||||
metadata !"round.dynamic",
|
||||
metadata !"fpexcept.strict") #0
|
||||
ret float %result
|
||||
}
|
||||
|
||||
define float @siffi(i32 %x) #0 {
|
||||
; X87-LABEL: siffi:
|
||||
; X87: # %bb.0: # %entry
|
||||
; X87-NEXT: pushl %eax
|
||||
; X87-NEXT: .cfi_def_cfa_offset 8
|
||||
; X87-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X87-NEXT: movl %eax, (%esp)
|
||||
; X87-NEXT: fildl (%esp)
|
||||
; X87-NEXT: popl %eax
|
||||
; X87-NEXT: .cfi_def_cfa_offset 4
|
||||
; X87-NEXT: retl
|
||||
;
|
||||
; X86-SSE-LABEL: siffi:
|
||||
; X86-SSE: # %bb.0: # %entry
|
||||
; X86-SSE-NEXT: pushl %eax
|
||||
; X86-SSE-NEXT: .cfi_def_cfa_offset 8
|
||||
; X86-SSE-NEXT: cvtsi2ssl {{[0-9]+}}(%esp), %xmm0
|
||||
; X86-SSE-NEXT: movss %xmm0, (%esp)
|
||||
; X86-SSE-NEXT: flds (%esp)
|
||||
; X86-SSE-NEXT: popl %eax
|
||||
; X86-SSE-NEXT: .cfi_def_cfa_offset 4
|
||||
; X86-SSE-NEXT: retl
|
||||
;
|
||||
; SSE-LABEL: siffi:
|
||||
; SSE: # %bb.0: # %entry
|
||||
; SSE-NEXT: cvtsi2ss %edi, %xmm0
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: siffi:
|
||||
; AVX: # %bb.0: # %entry
|
||||
; AVX-NEXT: vcvtsi2ss %edi, %xmm0, %xmm0
|
||||
; AVX-NEXT: retq
|
||||
entry:
|
||||
%result = call float @llvm.experimental.constrained.sitofp.f32.i32(i32 %x,
|
||||
metadata !"round.dynamic",
|
||||
metadata !"fpexcept.strict") #0
|
||||
ret float %result
|
||||
}
|
||||
|
||||
define double @sifdl(i64 %x) #0 {
|
||||
; X87-LABEL: sifdl:
|
||||
; X87: # %bb.0: # %entry
|
||||
; X87-NEXT: subl $12, %esp
|
||||
; X87-NEXT: .cfi_def_cfa_offset 16
|
||||
; X87-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X87-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X87-NEXT: movl %ecx, {{[0-9]+}}(%esp)
|
||||
; X87-NEXT: movl %eax, (%esp)
|
||||
; X87-NEXT: fildll (%esp)
|
||||
; X87-NEXT: addl $12, %esp
|
||||
; X87-NEXT: .cfi_def_cfa_offset 4
|
||||
; X87-NEXT: retl
|
||||
;
|
||||
; X86-SSE-LABEL: sifdl:
|
||||
; X86-SSE: # %bb.0: # %entry
|
||||
; X86-SSE-NEXT: subl $20, %esp
|
||||
; X86-SSE-NEXT: .cfi_def_cfa_offset 24
|
||||
; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; X86-SSE-NEXT: movlps %xmm0, {{[0-9]+}}(%esp)
|
||||
; X86-SSE-NEXT: fildll {{[0-9]+}}(%esp)
|
||||
; X86-SSE-NEXT: fstpl (%esp)
|
||||
; X86-SSE-NEXT: fldl (%esp)
|
||||
; X86-SSE-NEXT: addl $20, %esp
|
||||
; X86-SSE-NEXT: .cfi_def_cfa_offset 4
|
||||
; X86-SSE-NEXT: retl
|
||||
;
|
||||
; SSE-LABEL: sifdl:
|
||||
; SSE: # %bb.0: # %entry
|
||||
; SSE-NEXT: cvtsi2sd %rdi, %xmm0
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: sifdl:
|
||||
; AVX: # %bb.0: # %entry
|
||||
; AVX-NEXT: vcvtsi2sd %rdi, %xmm0, %xmm0
|
||||
; AVX-NEXT: retq
|
||||
entry:
|
||||
%result = call double @llvm.experimental.constrained.sitofp.f64.i64(i64 %x,
|
||||
metadata !"round.dynamic",
|
||||
metadata !"fpexcept.strict") #0
|
||||
ret double %result
|
||||
}
|
||||
|
||||
define float @siffl(i64 %x) #0 {
|
||||
; X87-LABEL: siffl:
|
||||
; X87: # %bb.0: # %entry
|
||||
; X87-NEXT: subl $12, %esp
|
||||
; X87-NEXT: .cfi_def_cfa_offset 16
|
||||
; X87-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X87-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X87-NEXT: movl %ecx, {{[0-9]+}}(%esp)
|
||||
; X87-NEXT: movl %eax, (%esp)
|
||||
; X87-NEXT: fildll (%esp)
|
||||
; X87-NEXT: addl $12, %esp
|
||||
; X87-NEXT: .cfi_def_cfa_offset 4
|
||||
; X87-NEXT: retl
|
||||
;
|
||||
; X86-SSE-LABEL: siffl:
|
||||
; X86-SSE: # %bb.0: # %entry
|
||||
; X86-SSE-NEXT: subl $20, %esp
|
||||
; X86-SSE-NEXT: .cfi_def_cfa_offset 24
|
||||
; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; X86-SSE-NEXT: movlps %xmm0, {{[0-9]+}}(%esp)
|
||||
; X86-SSE-NEXT: fildll {{[0-9]+}}(%esp)
|
||||
; X86-SSE-NEXT: fstps {{[0-9]+}}(%esp)
|
||||
; X86-SSE-NEXT: flds {{[0-9]+}}(%esp)
|
||||
; X86-SSE-NEXT: addl $20, %esp
|
||||
; X86-SSE-NEXT: .cfi_def_cfa_offset 4
|
||||
; X86-SSE-NEXT: retl
|
||||
;
|
||||
; SSE-LABEL: siffl:
|
||||
; SSE: # %bb.0: # %entry
|
||||
; SSE-NEXT: cvtsi2ss %rdi, %xmm0
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: siffl:
|
||||
; AVX: # %bb.0: # %entry
|
||||
; AVX-NEXT: vcvtsi2ss %rdi, %xmm0, %xmm0
|
||||
; AVX-NEXT: retq
|
||||
entry:
|
||||
%result = call float @llvm.experimental.constrained.sitofp.f32.i64(i64 %x,
|
||||
metadata !"round.dynamic",
|
||||
metadata !"fpexcept.strict") #0
|
||||
ret float %result
|
||||
}
|
||||
|
||||
; Verify that uitofp(%x) isn't simplified when the rounding mode is
|
||||
; unknown.
|
||||
; Verify that no gross errors happen.
|
||||
define double @uifdb(i8 %x) #0 {
|
||||
; X87-LABEL: uifdb:
|
||||
; X87: # %bb.0: # %entry
|
||||
; X87-NEXT: pushl %eax
|
||||
; X87-NEXT: .cfi_def_cfa_offset 8
|
||||
; X87-NEXT: movzbl {{[0-9]+}}(%esp), %eax
|
||||
; X87-NEXT: movw %ax, {{[0-9]+}}(%esp)
|
||||
; X87-NEXT: filds {{[0-9]+}}(%esp)
|
||||
; X87-NEXT: popl %eax
|
||||
; X87-NEXT: .cfi_def_cfa_offset 4
|
||||
; X87-NEXT: retl
|
||||
;
|
||||
; X86-SSE-LABEL: uifdb:
|
||||
; X86-SSE: # %bb.0: # %entry
|
||||
; X86-SSE-NEXT: subl $12, %esp
|
||||
; X86-SSE-NEXT: .cfi_def_cfa_offset 16
|
||||
; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax
|
||||
; X86-SSE-NEXT: cvtsi2sd %eax, %xmm0
|
||||
; X86-SSE-NEXT: movsd %xmm0, (%esp)
|
||||
; X86-SSE-NEXT: fldl (%esp)
|
||||
; X86-SSE-NEXT: addl $12, %esp
|
||||
; X86-SSE-NEXT: .cfi_def_cfa_offset 4
|
||||
; X86-SSE-NEXT: retl
|
||||
;
|
||||
; SSE-LABEL: uifdb:
|
||||
; SSE: # %bb.0: # %entry
|
||||
; SSE-NEXT: movzbl %dil, %eax
|
||||
; SSE-NEXT: cvtsi2sd %eax, %xmm0
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: uifdb:
|
||||
; AVX: # %bb.0: # %entry
|
||||
; AVX-NEXT: movzbl %dil, %eax
|
||||
; AVX-NEXT: vcvtsi2sd %eax, %xmm0, %xmm0
|
||||
; AVX-NEXT: retq
|
||||
entry:
|
||||
%result = call double @llvm.experimental.constrained.uitofp.f64.i8(i8 %x,
|
||||
metadata !"round.dynamic",
|
||||
metadata !"fpexcept.strict") #0
|
||||
ret double %result
|
||||
}
|
||||
|
||||
define double @uifdw(i16 %x) #0 {
|
||||
; X87-LABEL: uifdw:
|
||||
; X87: # %bb.0: # %entry
|
||||
; X87-NEXT: pushl %eax
|
||||
; X87-NEXT: .cfi_def_cfa_offset 8
|
||||
; X87-NEXT: movzwl {{[0-9]+}}(%esp), %eax
|
||||
; X87-NEXT: movl %eax, (%esp)
|
||||
; X87-NEXT: fildl (%esp)
|
||||
; X87-NEXT: popl %eax
|
||||
; X87-NEXT: .cfi_def_cfa_offset 4
|
||||
; X87-NEXT: retl
|
||||
;
|
||||
; X86-SSE-LABEL: uifdw:
|
||||
; X86-SSE: # %bb.0: # %entry
|
||||
; X86-SSE-NEXT: subl $12, %esp
|
||||
; X86-SSE-NEXT: .cfi_def_cfa_offset 16
|
||||
; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax
|
||||
; X86-SSE-NEXT: cvtsi2sd %eax, %xmm0
|
||||
; X86-SSE-NEXT: movsd %xmm0, (%esp)
|
||||
; X86-SSE-NEXT: fldl (%esp)
|
||||
; X86-SSE-NEXT: addl $12, %esp
|
||||
; X86-SSE-NEXT: .cfi_def_cfa_offset 4
|
||||
; X86-SSE-NEXT: retl
|
||||
;
|
||||
; SSE-LABEL: uifdw:
|
||||
; SSE: # %bb.0: # %entry
|
||||
; SSE-NEXT: movzwl %di, %eax
|
||||
; SSE-NEXT: cvtsi2sd %eax, %xmm0
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: uifdw:
|
||||
; AVX: # %bb.0: # %entry
|
||||
; AVX-NEXT: movzwl %di, %eax
|
||||
; AVX-NEXT: vcvtsi2sd %eax, %xmm0, %xmm0
|
||||
; AVX-NEXT: retq
|
||||
entry:
|
||||
%result = call double @llvm.experimental.constrained.uitofp.f64.i16(i16 %x,
|
||||
metadata !"round.dynamic",
|
||||
metadata !"fpexcept.strict") #0
|
||||
ret double %result
|
||||
}
|
||||
|
||||
define double @uifdi(i32 %x) #0 {
|
||||
; X87-LABEL: uifdi:
|
||||
; X87: # %bb.0: # %entry
|
||||
; X87-NEXT: subl $12, %esp
|
||||
; X87-NEXT: .cfi_def_cfa_offset 16
|
||||
; X87-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X87-NEXT: movl %eax, (%esp)
|
||||
; X87-NEXT: movl $0, {{[0-9]+}}(%esp)
|
||||
; X87-NEXT: fildll (%esp)
|
||||
; X87-NEXT: addl $12, %esp
|
||||
; X87-NEXT: .cfi_def_cfa_offset 4
|
||||
; X87-NEXT: retl
|
||||
;
|
||||
; X86-SSE-LABEL: uifdi:
|
||||
; X86-SSE: # %bb.0: # %entry
|
||||
; X86-SSE-NEXT: subl $12, %esp
|
||||
; X86-SSE-NEXT: .cfi_def_cfa_offset 16
|
||||
; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; X86-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; X86-SSE-NEXT: orpd %xmm0, %xmm1
|
||||
; X86-SSE-NEXT: subsd %xmm0, %xmm1
|
||||
; X86-SSE-NEXT: movsd %xmm1, (%esp)
|
||||
; X86-SSE-NEXT: fldl (%esp)
|
||||
; X86-SSE-NEXT: addl $12, %esp
|
||||
; X86-SSE-NEXT: .cfi_def_cfa_offset 4
|
||||
; X86-SSE-NEXT: retl
|
||||
;
|
||||
; SSE-LABEL: uifdi:
|
||||
; SSE: # %bb.0: # %entry
|
||||
; SSE-NEXT: movl %edi, %eax
|
||||
; SSE-NEXT: cvtsi2sd %rax, %xmm0
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: uifdi:
|
||||
; AVX1: # %bb.0: # %entry
|
||||
; AVX1-NEXT: movl %edi, %eax
|
||||
; AVX1-NEXT: vcvtsi2sd %rax, %xmm0, %xmm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: uifdi:
|
||||
; AVX512: # %bb.0: # %entry
|
||||
; AVX512-NEXT: vcvtusi2sd %edi, %xmm0, %xmm0
|
||||
; AVX512-NEXT: retq
|
||||
entry:
|
||||
%result = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 %x,
|
||||
metadata !"round.dynamic",
|
||||
metadata !"fpexcept.strict") #0
|
||||
ret double %result
|
||||
}
|
||||
|
||||
define double @uifdl(i64 %x) #0 {
|
||||
; X87-LABEL: uifdl:
|
||||
; X87: # %bb.0: # %entry
|
||||
; X87-NEXT: subl $20, %esp
|
||||
; X87-NEXT: .cfi_def_cfa_offset 24
|
||||
; X87-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X87-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X87-NEXT: movl %ecx, {{[0-9]+}}(%esp)
|
||||
; X87-NEXT: movl %eax, (%esp)
|
||||
; X87-NEXT: xorl %eax, %eax
|
||||
; X87-NEXT: testl %ecx, %ecx
|
||||
; X87-NEXT: setns %al
|
||||
; X87-NEXT: fildll (%esp)
|
||||
; X87-NEXT: fadds {{\.LCPI.*}}(,%eax,4)
|
||||
; X87-NEXT: fstpl {{[0-9]+}}(%esp)
|
||||
; X87-NEXT: fldl {{[0-9]+}}(%esp)
|
||||
; X87-NEXT: addl $20, %esp
|
||||
; X87-NEXT: .cfi_def_cfa_offset 4
|
||||
; X87-NEXT: retl
|
||||
;
|
||||
; X86-SSE-LABEL: uifdl:
|
||||
; X86-SSE: # %bb.0: # %entry
|
||||
; X86-SSE-NEXT: subl $12, %esp
|
||||
; X86-SSE-NEXT: .cfi_def_cfa_offset 16
|
||||
; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; X86-SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
|
||||
; X86-SSE-NEXT: subpd {{\.LCPI.*}}, %xmm0
|
||||
; X86-SSE-NEXT: movapd %xmm0, %xmm1
|
||||
; X86-SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
|
||||
; X86-SSE-NEXT: addpd %xmm0, %xmm1
|
||||
; X86-SSE-NEXT: movlpd %xmm1, (%esp)
|
||||
; X86-SSE-NEXT: fldl (%esp)
|
||||
; X86-SSE-NEXT: addl $12, %esp
|
||||
; X86-SSE-NEXT: .cfi_def_cfa_offset 4
|
||||
; X86-SSE-NEXT: retl
|
||||
;
|
||||
; SSE-LABEL: uifdl:
|
||||
; SSE: # %bb.0: # %entry
|
||||
; SSE-NEXT: movq %rdi, %xmm1
|
||||
; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1]
|
||||
; SSE-NEXT: subpd {{.*}}(%rip), %xmm1
|
||||
; SSE-NEXT: movapd %xmm1, %xmm0
|
||||
; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
|
||||
; SSE-NEXT: addpd %xmm1, %xmm0
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: uifdl:
|
||||
; AVX1: # %bb.0: # %entry
|
||||
; AVX1-NEXT: vmovq %rdi, %xmm0
|
||||
; AVX1-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
|
||||
; AVX1-NEXT: vsubpd {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
|
||||
; AVX1-NEXT: vaddpd %xmm0, %xmm1, %xmm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: uifdl:
|
||||
; AVX512: # %bb.0: # %entry
|
||||
; AVX512-NEXT: vcvtusi2sd %rdi, %xmm0, %xmm0
|
||||
; AVX512-NEXT: retq
|
||||
entry:
|
||||
%result = call double @llvm.experimental.constrained.uitofp.f64.i64(i64 %x,
|
||||
metadata !"round.dynamic",
|
||||
metadata !"fpexcept.strict") #0
|
||||
ret double %result
|
||||
}
|
||||
|
||||
define float @uiffb(i8 %x) #0 {
|
||||
; X87-LABEL: uiffb:
|
||||
; X87: # %bb.0: # %entry
|
||||
; X87-NEXT: pushl %eax
|
||||
; X87-NEXT: .cfi_def_cfa_offset 8
|
||||
; X87-NEXT: movzbl {{[0-9]+}}(%esp), %eax
|
||||
; X87-NEXT: movw %ax, {{[0-9]+}}(%esp)
|
||||
; X87-NEXT: filds {{[0-9]+}}(%esp)
|
||||
; X87-NEXT: popl %eax
|
||||
; X87-NEXT: .cfi_def_cfa_offset 4
|
||||
; X87-NEXT: retl
|
||||
;
|
||||
; X86-SSE-LABEL: uiffb:
|
||||
; X86-SSE: # %bb.0: # %entry
|
||||
; X86-SSE-NEXT: pushl %eax
|
||||
; X86-SSE-NEXT: .cfi_def_cfa_offset 8
|
||||
; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax
|
||||
; X86-SSE-NEXT: cvtsi2ss %eax, %xmm0
|
||||
; X86-SSE-NEXT: movss %xmm0, (%esp)
|
||||
; X86-SSE-NEXT: flds (%esp)
|
||||
; X86-SSE-NEXT: popl %eax
|
||||
; X86-SSE-NEXT: .cfi_def_cfa_offset 4
|
||||
; X86-SSE-NEXT: retl
|
||||
;
|
||||
; SSE-LABEL: uiffb:
|
||||
; SSE: # %bb.0: # %entry
|
||||
; SSE-NEXT: movzbl %dil, %eax
|
||||
; SSE-NEXT: cvtsi2ss %eax, %xmm0
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: uiffb:
|
||||
; AVX: # %bb.0: # %entry
|
||||
; AVX-NEXT: movzbl %dil, %eax
|
||||
; AVX-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0
|
||||
; AVX-NEXT: retq
|
||||
entry:
|
||||
%result = call float @llvm.experimental.constrained.uitofp.f32.i8(i8 %x,
|
||||
metadata !"round.dynamic",
|
||||
metadata !"fpexcept.strict") #0
|
||||
ret float %result
|
||||
}
|
||||
|
||||
define float @uiffw(i16 %x) #0 {
|
||||
; X87-LABEL: uiffw:
|
||||
; X87: # %bb.0: # %entry
|
||||
; X87-NEXT: pushl %eax
|
||||
; X87-NEXT: .cfi_def_cfa_offset 8
|
||||
; X87-NEXT: movzwl {{[0-9]+}}(%esp), %eax
|
||||
; X87-NEXT: movl %eax, (%esp)
|
||||
; X87-NEXT: fildl (%esp)
|
||||
; X87-NEXT: popl %eax
|
||||
; X87-NEXT: .cfi_def_cfa_offset 4
|
||||
; X87-NEXT: retl
|
||||
;
|
||||
; X86-SSE-LABEL: uiffw:
|
||||
; X86-SSE: # %bb.0: # %entry
|
||||
; X86-SSE-NEXT: pushl %eax
|
||||
; X86-SSE-NEXT: .cfi_def_cfa_offset 8
|
||||
; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax
|
||||
; X86-SSE-NEXT: cvtsi2ss %eax, %xmm0
|
||||
; X86-SSE-NEXT: movss %xmm0, (%esp)
|
||||
; X86-SSE-NEXT: flds (%esp)
|
||||
; X86-SSE-NEXT: popl %eax
|
||||
; X86-SSE-NEXT: .cfi_def_cfa_offset 4
|
||||
; X86-SSE-NEXT: retl
|
||||
;
|
||||
; SSE-LABEL: uiffw:
|
||||
; SSE: # %bb.0: # %entry
|
||||
; SSE-NEXT: movzwl %di, %eax
|
||||
; SSE-NEXT: cvtsi2ss %eax, %xmm0
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: uiffw:
|
||||
; AVX: # %bb.0: # %entry
|
||||
; AVX-NEXT: movzwl %di, %eax
|
||||
; AVX-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0
|
||||
; AVX-NEXT: retq
|
||||
entry:
|
||||
%result = call float @llvm.experimental.constrained.uitofp.f32.i16(i16 %x,
|
||||
metadata !"round.dynamic",
|
||||
metadata !"fpexcept.strict") #0
|
||||
ret float %result
|
||||
}
|
||||
|
||||
define float @uiffi(i32 %x) #0 {
|
||||
; X87-LABEL: uiffi:
|
||||
; X87: # %bb.0: # %entry
|
||||
; X87-NEXT: subl $12, %esp
|
||||
; X87-NEXT: .cfi_def_cfa_offset 16
|
||||
; X87-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X87-NEXT: movl %eax, (%esp)
|
||||
; X87-NEXT: movl $0, {{[0-9]+}}(%esp)
|
||||
; X87-NEXT: fildll (%esp)
|
||||
; X87-NEXT: addl $12, %esp
|
||||
; X87-NEXT: .cfi_def_cfa_offset 4
|
||||
; X87-NEXT: retl
|
||||
;
|
||||
; X86-SSE-LABEL: uiffi:
|
||||
; X86-SSE: # %bb.0: # %entry
|
||||
; X86-SSE-NEXT: pushl %eax
|
||||
; X86-SSE-NEXT: .cfi_def_cfa_offset 8
|
||||
; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; X86-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; X86-SSE-NEXT: orpd %xmm0, %xmm1
|
||||
; X86-SSE-NEXT: subsd %xmm0, %xmm1
|
||||
; X86-SSE-NEXT: xorps %xmm0, %xmm0
|
||||
; X86-SSE-NEXT: cvtsd2ss %xmm1, %xmm0
|
||||
; X86-SSE-NEXT: movss %xmm0, (%esp)
|
||||
; X86-SSE-NEXT: flds (%esp)
|
||||
; X86-SSE-NEXT: popl %eax
|
||||
; X86-SSE-NEXT: .cfi_def_cfa_offset 4
|
||||
; X86-SSE-NEXT: retl
|
||||
;
|
||||
; SSE-LABEL: uiffi:
|
||||
; SSE: # %bb.0: # %entry
|
||||
; SSE-NEXT: movl %edi, %eax
|
||||
; SSE-NEXT: cvtsi2ss %rax, %xmm0
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: uiffi:
|
||||
; AVX1: # %bb.0: # %entry
|
||||
; AVX1-NEXT: movl %edi, %eax
|
||||
; AVX1-NEXT: vcvtsi2ss %rax, %xmm0, %xmm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: uiffi:
|
||||
; AVX512: # %bb.0: # %entry
|
||||
; AVX512-NEXT: vcvtusi2ss %edi, %xmm0, %xmm0
|
||||
; AVX512-NEXT: retq
|
||||
entry:
|
||||
%result = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 %x,
|
||||
metadata !"round.dynamic",
|
||||
metadata !"fpexcept.strict") #0
|
||||
ret float %result
|
||||
}
|
||||
|
||||
define float @uiffl(i64 %x) #0 {
|
||||
; X87-LABEL: uiffl:
|
||||
; X87: # %bb.0: # %entry
|
||||
; X87-NEXT: subl $20, %esp
|
||||
; X87-NEXT: .cfi_def_cfa_offset 24
|
||||
; X87-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X87-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X87-NEXT: movl %ecx, {{[0-9]+}}(%esp)
|
||||
; X87-NEXT: movl %eax, {{[0-9]+}}(%esp)
|
||||
; X87-NEXT: xorl %eax, %eax
|
||||
; X87-NEXT: testl %ecx, %ecx
|
||||
; X87-NEXT: setns %al
|
||||
; X87-NEXT: fildll {{[0-9]+}}(%esp)
|
||||
; X87-NEXT: fadds {{\.LCPI.*}}(,%eax,4)
|
||||
; X87-NEXT: fstps {{[0-9]+}}(%esp)
|
||||
; X87-NEXT: flds {{[0-9]+}}(%esp)
|
||||
; X87-NEXT: addl $20, %esp
|
||||
; X87-NEXT: .cfi_def_cfa_offset 4
|
||||
; X87-NEXT: retl
|
||||
;
|
||||
; X86-SSE-LABEL: uiffl:
|
||||
; X86-SSE: # %bb.0: # %entry
|
||||
; X86-SSE-NEXT: subl $20, %esp
|
||||
; X86-SSE-NEXT: .cfi_def_cfa_offset 24
|
||||
; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; X86-SSE-NEXT: movlps %xmm0, {{[0-9]+}}(%esp)
|
||||
; X86-SSE-NEXT: xorl %eax, %eax
|
||||
; X86-SSE-NEXT: cmpl $0, {{[0-9]+}}(%esp)
|
||||
; X86-SSE-NEXT: setns %al
|
||||
; X86-SSE-NEXT: fildll {{[0-9]+}}(%esp)
|
||||
; X86-SSE-NEXT: fadds {{\.LCPI.*}}(,%eax,4)
|
||||
; X86-SSE-NEXT: fstps {{[0-9]+}}(%esp)
|
||||
; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; X86-SSE-NEXT: movss %xmm0, (%esp)
|
||||
; X86-SSE-NEXT: flds (%esp)
|
||||
; X86-SSE-NEXT: addl $20, %esp
|
||||
; X86-SSE-NEXT: .cfi_def_cfa_offset 4
|
||||
; X86-SSE-NEXT: retl
|
||||
;
|
||||
; SSE-LABEL: uiffl:
|
||||
; SSE: # %bb.0: # %entry
|
||||
; SSE-NEXT: testq %rdi, %rdi
|
||||
; SSE-NEXT: js .LBB52_1
|
||||
; SSE-NEXT: # %bb.2: # %entry
|
||||
; SSE-NEXT: cvtsi2ss %rdi, %xmm0
|
||||
; SSE-NEXT: retq
|
||||
; SSE-NEXT: .LBB52_1:
|
||||
; SSE-NEXT: movq %rdi, %rax
|
||||
; SSE-NEXT: shrq %rax
|
||||
; SSE-NEXT: andl $1, %edi
|
||||
; SSE-NEXT: orq %rax, %rdi
|
||||
; SSE-NEXT: cvtsi2ss %rdi, %xmm0
|
||||
; SSE-NEXT: addss %xmm0, %xmm0
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: uiffl:
|
||||
; AVX1: # %bb.0: # %entry
|
||||
; AVX1-NEXT: testq %rdi, %rdi
|
||||
; AVX1-NEXT: js .LBB52_1
|
||||
; AVX1-NEXT: # %bb.2: # %entry
|
||||
; AVX1-NEXT: vcvtsi2ss %rdi, %xmm0, %xmm0
|
||||
; AVX1-NEXT: retq
|
||||
; AVX1-NEXT: .LBB52_1:
|
||||
; AVX1-NEXT: movq %rdi, %rax
|
||||
; AVX1-NEXT: shrq %rax
|
||||
; AVX1-NEXT: andl $1, %edi
|
||||
; AVX1-NEXT: orq %rax, %rdi
|
||||
; AVX1-NEXT: vcvtsi2ss %rdi, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vaddss %xmm0, %xmm0, %xmm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: uiffl:
|
||||
; AVX512: # %bb.0: # %entry
|
||||
; AVX512-NEXT: vcvtusi2ss %rdi, %xmm0, %xmm0
|
||||
; AVX512-NEXT: retq
|
||||
entry:
|
||||
%result = call float @llvm.experimental.constrained.uitofp.f32.i64(i64 %x,
|
||||
metadata !"round.dynamic",
|
||||
metadata !"fpexcept.strict") #0
|
||||
ret float %result
|
||||
}
|
||||
|
||||
attributes #0 = { strictfp }
|
||||
|
||||
@llvm.fp.env = thread_local global i8 zeroinitializer, section "llvm.metadata"
|
||||
@ -1981,3 +2737,19 @@ declare i32 @llvm.experimental.constrained.lround.i32.f64(double, metadata)
|
||||
declare i32 @llvm.experimental.constrained.lround.i32.f32(float, metadata)
|
||||
declare i64 @llvm.experimental.constrained.llround.i64.f64(double, metadata)
|
||||
declare i64 @llvm.experimental.constrained.llround.i64.f32(float, metadata)
|
||||
declare double @llvm.experimental.constrained.sitofp.f64.i8(i8, metadata, metadata)
|
||||
declare double @llvm.experimental.constrained.sitofp.f64.i16(i16, metadata, metadata)
|
||||
declare double @llvm.experimental.constrained.sitofp.f64.i32(i32, metadata, metadata)
|
||||
declare double @llvm.experimental.constrained.sitofp.f64.i64(i64, metadata, metadata)
|
||||
declare float @llvm.experimental.constrained.sitofp.f32.i8(i8, metadata, metadata)
|
||||
declare float @llvm.experimental.constrained.sitofp.f32.i16(i16, metadata, metadata)
|
||||
declare float @llvm.experimental.constrained.sitofp.f32.i32(i32, metadata, metadata)
|
||||
declare float @llvm.experimental.constrained.sitofp.f32.i64(i64, metadata, metadata)
|
||||
declare double @llvm.experimental.constrained.uitofp.f64.i8(i8, metadata, metadata)
|
||||
declare double @llvm.experimental.constrained.uitofp.f64.i16(i16, metadata, metadata)
|
||||
declare double @llvm.experimental.constrained.uitofp.f64.i32(i32, metadata, metadata)
|
||||
declare double @llvm.experimental.constrained.uitofp.f64.i64(i64, metadata, metadata)
|
||||
declare float @llvm.experimental.constrained.uitofp.f32.i8(i8, metadata, metadata)
|
||||
declare float @llvm.experimental.constrained.uitofp.f32.i16(i16, metadata, metadata)
|
||||
declare float @llvm.experimental.constrained.uitofp.f32.i32(i32, metadata, metadata)
|
||||
declare float @llvm.experimental.constrained.uitofp.f32.i64(i64, metadata, metadata)
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -373,6 +373,28 @@ entry:
|
||||
ret i64 %result
|
||||
}
|
||||
|
||||
; Verify that sitofp(42) isn't simplified when the rounding mode is unknown.
|
||||
; CHECK-LABEL: @f30
|
||||
; CHECK: call double @llvm.experimental.constrained.sitofp
|
||||
define double @f30() #0 {
|
||||
entry:
|
||||
%result = call double @llvm.experimental.constrained.sitofp.f64.i32(i32 42,
|
||||
metadata !"round.dynamic",
|
||||
metadata !"fpexcept.strict") #0
|
||||
ret double %result
|
||||
}
|
||||
|
||||
; Verify that uitofp(42) isn't simplified when the rounding mode is unknown.
|
||||
; CHECK-LABEL: @f31
|
||||
; CHECK: call double @llvm.experimental.constrained.uitofp
|
||||
define double @f31() #0 {
|
||||
entry:
|
||||
%result = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 42,
|
||||
metadata !"round.dynamic",
|
||||
metadata !"fpexcept.strict") #0
|
||||
ret double %result
|
||||
}
|
||||
|
||||
attributes #0 = { strictfp }
|
||||
|
||||
@llvm.fp.env = thread_local global i8 zeroinitializer, section "llvm.metadata"
|
||||
@ -405,3 +427,5 @@ declare i32 @llvm.experimental.constrained.lround.i32.f64(double, metadata)
|
||||
declare i32 @llvm.experimental.constrained.lround.i32.f32(float, metadata)
|
||||
declare i64 @llvm.experimental.constrained.llround.i64.f64(double, metadata)
|
||||
declare i64 @llvm.experimental.constrained.llround.i64.f32(float, metadata)
|
||||
declare double @llvm.experimental.constrained.sitofp.f64.i32(i32, metadata, metadata)
|
||||
declare double @llvm.experimental.constrained.uitofp.f64.i32(i32, metadata, metadata)
|
||||
|
Loading…
x
Reference in New Issue
Block a user