Add ‘llvm.experimental.constrained.fma‘ Intrinsic.

Differential Revision: http://reviews.llvm.org/D36335

llvm-svn: 311629
This commit is contained in:
Wei Ding 2017-08-24 04:18:24 +00:00
parent 41739d608d
commit cb5ec6af94
12 changed files with 163 additions and 35 deletions

View File

@ -13021,6 +13021,41 @@ The value produced is the floating point remainder from the division of the two
value operands and has the same type as the operands. The remainder has the
same sign as the dividend.
'``llvm.experimental.constrained.fma``' Intrinsic
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Syntax:
"""""""
::
declare <type>
@llvm.experimental.constrained.fma(<type> <op1>, <type> <op2>, <type> <op3>,
metadata <rounding mode>,
metadata <exception behavior>)
Overview:
"""""""""
The '``llvm.experimental.constrained.fma``' intrinsic returns the result of a
fused-multiply-add operation on its operands.
Arguments:
""""""""""
The first three arguments to the '``llvm.experimental.constrained.fma``'
intrinsic must be :ref:`floating point <t_floating>` or :ref:`vector
<t_vector>` of floating point values. All arguments must have identical types.
The fourth and fifth arguments specify the rounding mode and exception behavior
as described above.
Semantics:
""""""""""
The result produced is the product of the first two operands added to the third
operand computed with infinite precision, and then rounded to the target
precision.
Constrained libm-equivalent Intrinsics
--------------------------------------

View File

@ -263,6 +263,7 @@ namespace ISD {
/// They are used to limit optimizations while the DAG is being
/// optimized.
STRICT_FADD, STRICT_FSUB, STRICT_FMUL, STRICT_FDIV, STRICT_FREM,
STRICT_FMA,
/// Constrained versions of libm-equivalent floating point intrinsics.
/// These will be lowered to the equivalent non-constrained pseudo-op

View File

@ -623,13 +623,14 @@ public:
/// Test if this node is a strict floating point pseudo-op.
bool isStrictFPOpcode() {
switch (NodeType) {
default:
default:
return false;
case ISD::STRICT_FADD:
case ISD::STRICT_FSUB:
case ISD::STRICT_FMUL:
case ISD::STRICT_FDIV:
case ISD::STRICT_FREM:
case ISD::STRICT_FMA:
case ISD::STRICT_FSQRT:
case ISD::STRICT_FPOW:
case ISD::STRICT_FPOWI:

View File

@ -167,6 +167,7 @@ namespace llvm {
};
bool isUnaryOp() const;
bool isTernaryOp() const;
RoundingMode getRoundingMode() const;
ExceptionBehavior getExceptionBehavior() const;
@ -178,6 +179,7 @@ namespace llvm {
case Intrinsic::experimental_constrained_fmul:
case Intrinsic::experimental_constrained_fdiv:
case Intrinsic::experimental_constrained_frem:
case Intrinsic::experimental_constrained_fma:
case Intrinsic::experimental_constrained_sqrt:
case Intrinsic::experimental_constrained_pow:
case Intrinsic::experimental_constrained_powi:

View File

@ -490,6 +490,13 @@ let IntrProperties = [IntrInaccessibleMemOnly] in {
llvm_metadata_ty,
llvm_metadata_ty ]>;
def int_experimental_constrained_fma : Intrinsic<[ llvm_anyfloat_ty ],
[ LLVMMatchType<0>,
LLVMMatchType<0>,
LLVMMatchType<0>,
llvm_metadata_ty,
llvm_metadata_ty ]>;
// These intrinsics are sensitive to the rounding mode so we need constrained
// versions of each of them. When strict rounding and exception control are
// not required the non-constrained versions of these intrinsics should be

View File

@ -907,6 +907,7 @@ getStrictFPOpcodeAction(const TargetLowering &TLI, unsigned Opcode, EVT VT) {
case ISD::STRICT_FSQRT: EqOpc = ISD::FSQRT; break;
case ISD::STRICT_FPOW: EqOpc = ISD::FPOW; break;
case ISD::STRICT_FPOWI: EqOpc = ISD::FPOWI; break;
case ISD::STRICT_FMA: EqOpc = ISD::FMA; break;
case ISD::STRICT_FSIN: EqOpc = ISD::FSIN; break;
case ISD::STRICT_FCOS: EqOpc = ISD::FCOS; break;
case ISD::STRICT_FEXP: EqOpc = ISD::FEXP; break;
@ -1072,6 +1073,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
}
break;
case ISD::STRICT_FSQRT:
case ISD::STRICT_FMA:
case ISD::STRICT_FPOW:
case ISD::STRICT_FPOWI:
case ISD::STRICT_FSIN:
@ -1240,7 +1242,7 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) {
// If the index is dependent on the store we will introduce a cycle when
// creating the load (the load uses the index, and by replacing the chain
// we will make the index dependent on the load). Also, the store might be
// dependent on the extractelement and introduce a cycle when creating
// dependent on the extractelement and introduce a cycle when creating
// the load.
if (SDNode::hasPredecessorHelper(ST, Visited, Worklist) ||
ST->hasPredecessor(Op.getNode()))
@ -4065,6 +4067,10 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
Results.push_back(ExpandFPLibCall(Node, RTLIB::FMA_F32, RTLIB::FMA_F64,
RTLIB::FMA_F80, RTLIB::FMA_F128,
RTLIB::FMA_PPCF128));
case ISD::STRICT_FMA:
Results.push_back(ExpandFPLibCall(Node, RTLIB::FMA_F32, RTLIB::FMA_F64,
RTLIB::FMA_F80, RTLIB::FMA_F128,
RTLIB::FMA_PPCF128));
break;
case ISD::FADD:
Results.push_back(ExpandFPLibCall(Node, RTLIB::ADD_F32, RTLIB::ADD_F64,

View File

@ -6695,6 +6695,7 @@ SDNode* SelectionDAG::mutateStrictFPToFP(SDNode *Node) {
unsigned OrigOpc = Node->getOpcode();
unsigned NewOpc;
bool IsUnary = false;
bool IsTernary = false;
switch (OrigOpc) {
default:
llvm_unreachable("mutateStrictFPToFP called with unexpected opcode!");
@ -6703,6 +6704,7 @@ SDNode* SelectionDAG::mutateStrictFPToFP(SDNode *Node) {
case ISD::STRICT_FMUL: NewOpc = ISD::FMUL; break;
case ISD::STRICT_FDIV: NewOpc = ISD::FDIV; break;
case ISD::STRICT_FREM: NewOpc = ISD::FREM; break;
case ISD::STRICT_FMA: NewOpc = ISD::FMA; IsTernary = true; break;
case ISD::STRICT_FSQRT: NewOpc = ISD::FSQRT; IsUnary = true; break;
case ISD::STRICT_FPOW: NewOpc = ISD::FPOW; break;
case ISD::STRICT_FPOWI: NewOpc = ISD::FPOWI; break;
@ -6729,6 +6731,10 @@ SDNode* SelectionDAG::mutateStrictFPToFP(SDNode *Node) {
SDNode *Res = nullptr;
if (IsUnary)
Res = MorphNodeTo(Node, NewOpc, VTs, { Node->getOperand(1) });
else if (IsTernary)
Res = MorphNodeTo(Node, NewOpc, VTs, { Node->getOperand(1),
Node->getOperand(2),
Node->getOperand(3)});
else
Res = MorphNodeTo(Node, NewOpc, VTs, { Node->getOperand(1),
Node->getOperand(2) });

View File

@ -5432,6 +5432,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
case Intrinsic::experimental_constrained_fmul:
case Intrinsic::experimental_constrained_fdiv:
case Intrinsic::experimental_constrained_frem:
case Intrinsic::experimental_constrained_fma:
case Intrinsic::experimental_constrained_sqrt:
case Intrinsic::experimental_constrained_pow:
case Intrinsic::experimental_constrained_powi:
@ -5963,6 +5964,9 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic(
case Intrinsic::experimental_constrained_frem:
Opcode = ISD::STRICT_FREM;
break;
case Intrinsic::experimental_constrained_fma:
Opcode = ISD::STRICT_FMA;
break;
case Intrinsic::experimental_constrained_sqrt:
Opcode = ISD::STRICT_FSQRT;
break;
@ -6009,10 +6013,15 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic(
SDVTList VTs = DAG.getVTList(ValueVTs);
SDValue Result;
if (FPI.isUnaryOp())
Result = DAG.getNode(Opcode, sdl, VTs,
Result = DAG.getNode(Opcode, sdl, VTs,
{ Chain, getValue(FPI.getArgOperand(0)) });
else if (FPI.isTernaryOp())
Result = DAG.getNode(Opcode, sdl, VTs,
{ Chain, getValue(FPI.getArgOperand(0)),
getValue(FPI.getArgOperand(1)),
getValue(FPI.getArgOperand(2)) });
else
Result = DAG.getNode(Opcode, sdl, VTs,
Result = DAG.getNode(Opcode, sdl, VTs,
{ Chain, getValue(FPI.getArgOperand(0)),
getValue(FPI.getArgOperand(1)) });

View File

@ -14,10 +14,10 @@
// are all subclasses of the CallInst class. Note that none of these classes
// has state or virtual methods, which is an important part of this gross/neat
// hack working.
//
//
// In some cases, arguments to intrinsics need to be generic and are defined as
// type pointer to empty struct { }*. To access the real item of interest the
// cast instruction needs to be stripped away.
// cast instruction needs to be stripped away.
//
//===----------------------------------------------------------------------===//
@ -98,7 +98,7 @@ Value *InstrProfIncrementInst::getStep() const {
ConstrainedFPIntrinsic::RoundingMode
ConstrainedFPIntrinsic::getRoundingMode() const {
unsigned NumOperands = getNumArgOperands();
Metadata *MD =
Metadata *MD =
dyn_cast<MetadataAsValue>(getArgOperand(NumOperands - 2))->getMetadata();
if (!MD || !isa<MDString>(MD))
return rmInvalid;
@ -118,7 +118,7 @@ ConstrainedFPIntrinsic::getRoundingMode() const {
ConstrainedFPIntrinsic::ExceptionBehavior
ConstrainedFPIntrinsic::getExceptionBehavior() const {
unsigned NumOperands = getNumArgOperands();
Metadata *MD =
Metadata *MD =
dyn_cast<MetadataAsValue>(getArgOperand(NumOperands - 1))->getMetadata();
if (!MD || !isa<MDString>(MD))
return ebInvalid;
@ -132,7 +132,7 @@ ConstrainedFPIntrinsic::getExceptionBehavior() const {
bool ConstrainedFPIntrinsic::isUnaryOp() const {
switch (getIntrinsicID()) {
default:
default:
return false;
case Intrinsic::experimental_constrained_sqrt:
case Intrinsic::experimental_constrained_sin:
@ -147,3 +147,13 @@ bool ConstrainedFPIntrinsic::isUnaryOp() const {
return true;
}
}
bool ConstrainedFPIntrinsic::isTernaryOp() const {
switch (getIntrinsicID()) {
default:
return false;
case Intrinsic::experimental_constrained_fma:
return true;
}
}

View File

@ -3973,6 +3973,7 @@ void Verifier::visitIntrinsicCallSite(Intrinsic::ID ID, CallSite CS) {
case Intrinsic::experimental_constrained_fmul:
case Intrinsic::experimental_constrained_fdiv:
case Intrinsic::experimental_constrained_frem:
case Intrinsic::experimental_constrained_fma:
case Intrinsic::experimental_constrained_sqrt:
case Intrinsic::experimental_constrained_pow:
case Intrinsic::experimental_constrained_powi:
@ -4433,8 +4434,9 @@ static DISubprogram *getSubprogram(Metadata *LocalScope) {
void Verifier::visitConstrainedFPIntrinsic(ConstrainedFPIntrinsic &FPI) {
unsigned NumOperands = FPI.getNumArgOperands();
Assert(((NumOperands == 3 && FPI.isUnaryOp()) || (NumOperands == 4)),
"invalid arguments for constrained FP intrinsic", &FPI);
Assert(((NumOperands == 5 && FPI.isTernaryOp()) ||
(NumOperands == 3 && FPI.isUnaryOp()) || (NumOperands == 4)),
"invalid arguments for constrained FP intrinsic", &FPI);
Assert(isa<MetadataAsValue>(FPI.getArgOperand(NumOperands-1)),
"invalid exception behavior argument", &FPI);
Assert(isa<MetadataAsValue>(FPI.getArgOperand(NumOperands-2)),

View File

@ -1,4 +1,5 @@
; RUN: llc -O3 -mtriple=x86_64-pc-linux < %s | FileCheck %s
; RUN: llc -O3 -mtriple=x86_64-pc-linux < %s | FileCheck --check-prefix=COMMON --check-prefix=NO-FMA --check-prefix=FMACALL64 --check-prefix=FMACALL32 %s
; RUN: llc -O3 -mtriple=x86_64-pc-linux -mattr=+fma < %s | FileCheck -check-prefix=COMMON --check-prefix=HAS-FMA --check-prefix=FMA64 --check-prefix=FMA32 %s
; Verify that constants aren't folded to inexact results when the rounding mode
; is unknown.
@ -9,7 +10,7 @@
; }
;
; CHECK-LABEL: f1
; CHECK: divsd
; COMMON: divsd
define double @f1() {
entry:
%div = call double @llvm.experimental.constrained.fdiv.f64(
@ -29,7 +30,7 @@ entry:
; }
;
; CHECK-LABEL: f2
; CHECK: subsd
; COMMON: subsd
define double @f2(double %a) {
entry:
%div = call double @llvm.experimental.constrained.fsub.f64(
@ -50,9 +51,9 @@ entry:
; }
;
; CHECK-LABEL: f3:
; CHECK: subsd
; CHECK: mulsd
; CHECK: subsd
; COMMON: subsd
; COMMON: mulsd
; COMMON: subsd
define double @f3(double %a, double %b) {
entry:
%sub = call double @llvm.experimental.constrained.fsub.f64(
@ -81,11 +82,11 @@ entry:
; return a;
; }
;
;
;
; CHECK-LABEL: f4:
; CHECK: testl
; CHECK: jle
; CHECK: addsd
; COMMON: testl
; COMMON: jle
; COMMON: addsd
define double @f4(i32 %n, double %a) {
entry:
%cmp = icmp sgt i32 %n, 0
@ -105,7 +106,7 @@ if.end:
; Verify that sqrt(42.0) isn't simplified when the rounding mode is unknown.
; CHECK-LABEL: f5
; CHECK: sqrtsd
; COMMON: sqrtsd
define double @f5() {
entry:
%result = call double @llvm.experimental.constrained.sqrt.f64(double 42.0,
@ -116,7 +117,7 @@ entry:
; Verify that pow(42.1, 3.0) isn't simplified when the rounding mode is unknown.
; CHECK-LABEL: f6
; CHECK: pow
; COMMON: pow
define double @f6() {
entry:
%result = call double @llvm.experimental.constrained.pow.f64(double 42.1,
@ -128,7 +129,7 @@ entry:
; Verify that powi(42.1, 3) isn't simplified when the rounding mode is unknown.
; CHECK-LABEL: f7
; CHECK: powi
; COMMON: powi
define double @f7() {
entry:
%result = call double @llvm.experimental.constrained.powi.f64(double 42.1,
@ -140,7 +141,7 @@ entry:
; Verify that sin(42.0) isn't simplified when the rounding mode is unknown.
; CHECK-LABEL: f8
; CHECK: sin
; COMMON: sin
define double @f8() {
entry:
%result = call double @llvm.experimental.constrained.sin.f64(double 42.0,
@ -151,7 +152,7 @@ entry:
; Verify that cos(42.0) isn't simplified when the rounding mode is unknown.
; CHECK-LABEL: f9
; CHECK: cos
; COMMON: cos
define double @f9() {
entry:
%result = call double @llvm.experimental.constrained.cos.f64(double 42.0,
@ -162,7 +163,7 @@ entry:
; Verify that exp(42.0) isn't simplified when the rounding mode is unknown.
; CHECK-LABEL: f10
; CHECK: exp
; COMMON: exp
define double @f10() {
entry:
%result = call double @llvm.experimental.constrained.exp.f64(double 42.0,
@ -173,7 +174,7 @@ entry:
; Verify that exp2(42.1) isn't simplified when the rounding mode is unknown.
; CHECK-LABEL: f11
; CHECK: exp2
; COMMON: exp2
define double @f11() {
entry:
%result = call double @llvm.experimental.constrained.exp2.f64(double 42.1,
@ -184,7 +185,7 @@ entry:
; Verify that log(42.0) isn't simplified when the rounding mode is unknown.
; CHECK-LABEL: f12
; CHECK: log
; COMMON: log
define double @f12() {
entry:
%result = call double @llvm.experimental.constrained.log.f64(double 42.0,
@ -195,7 +196,7 @@ entry:
; Verify that log10(42.0) isn't simplified when the rounding mode is unknown.
; CHECK-LABEL: f13
; CHECK: log10
; COMMON: log10
define double @f13() {
entry:
%result = call double @llvm.experimental.constrained.log10.f64(double 42.0,
@ -206,7 +207,7 @@ entry:
; Verify that log2(42.0) isn't simplified when the rounding mode is unknown.
; CHECK-LABEL: f14
; CHECK: log2
; COMMON: log2
define double @f14() {
entry:
%result = call double @llvm.experimental.constrained.log2.f64(double 42.0,
@ -217,7 +218,8 @@ entry:
; Verify that rint(42.1) isn't simplified when the rounding mode is unknown.
; CHECK-LABEL: f15
; CHECK: rint
; NO-FMA: rint
; HAS-FMA: vroundsd
define double @f15() {
entry:
%result = call double @llvm.experimental.constrained.rint.f64(double 42.1,
@ -229,7 +231,8 @@ entry:
; Verify that nearbyint(42.1) isn't simplified when the rounding mode is
; unknown.
; CHECK-LABEL: f16
; CHECK: nearbyint
; NO-FMA: nearbyint
; HAS-FMA: vroundsd
define double @f16() {
entry:
%result = call double @llvm.experimental.constrained.nearbyint.f64(
@ -239,6 +242,38 @@ entry:
ret double %result
}
; Verify that fma(3.5) isn't simplified when the rounding mode is
; unknown.
; CHECK-LABEL: f17
; FMACALL32: jmp fmaf # TAILCALL
; FMA32: vfmadd213ss
define float @f17() {
entry:
%result = call float @llvm.experimental.constrained.fma.f32(
float 3.5,
float 3.5,
float 3.5,
metadata !"round.dynamic",
metadata !"fpexcept.strict")
ret float %result
}
; Verify that fma(42.1) isn't simplified when the rounding mode is
; unknown.
; CHECK-LABEL: f18
; FMACALL64: jmp fma # TAILCALL
; FMA64: vfmadd213sd
define double @f18() {
entry:
%result = call double @llvm.experimental.constrained.fma.f64(
double 42.1,
double 42.1,
double 42.1,
metadata !"round.dynamic",
metadata !"fpexcept.strict")
ret double %result
}
@llvm.fp.env = thread_local global i8 zeroinitializer, section "llvm.metadata"
declare double @llvm.experimental.constrained.fdiv.f64(double, double, metadata, metadata)
declare double @llvm.experimental.constrained.fmul.f64(double, double, metadata, metadata)
@ -256,3 +291,5 @@ declare double @llvm.experimental.constrained.log10.f64(double, metadata, metada
declare double @llvm.experimental.constrained.log2.f64(double, metadata, metadata)
declare double @llvm.experimental.constrained.rint.f64(double, metadata, metadata)
declare double @llvm.experimental.constrained.nearbyint.f64(double, metadata, metadata)
declare float @llvm.experimental.constrained.fma.f32(float, float, float, metadata, metadata)
declare double @llvm.experimental.constrained.fma.f64(double, double, double, metadata, metadata)

View File

@ -73,7 +73,7 @@ entry:
; return a;
; }
;
;
;
; CHECK-LABEL: @f4
; CHECK-NOT: select
; CHECK: br i1 %cmp
@ -94,7 +94,6 @@ if.end:
ret double %a.0
}
; Verify that sqrt(42.0) isn't simplified when the rounding mode is unknown.
; CHECK-LABEL: f5
; CHECK: call double @llvm.experimental.constrained.sqrt
@ -231,6 +230,18 @@ entry:
ret double %result
}
; Verify that fma(42.1) isn't simplified when the rounding mode is
; unknown.
; CHECK-LABEL: f17
; CHECK: call double @llvm.experimental.constrained.fma
define double @f17() {
entry:
%result = call double @llvm.experimental.constrained.fma.f64(double 42.1, double 42.1, double 42.1,
metadata !"round.dynamic",
metadata !"fpexcept.strict")
ret double %result
}
@llvm.fp.env = thread_local global i8 zeroinitializer, section "llvm.metadata"
declare double @llvm.experimental.constrained.fdiv.f64(double, double, metadata, metadata)
declare double @llvm.experimental.constrained.fmul.f64(double, double, metadata, metadata)
@ -248,3 +259,4 @@ declare double @llvm.experimental.constrained.log10.f64(double, metadata, metada
declare double @llvm.experimental.constrained.log2.f64(double, metadata, metadata)
declare double @llvm.experimental.constrained.rint.f64(double, metadata, metadata)
declare double @llvm.experimental.constrained.nearbyint.f64(double, metadata, metadata)
declare double @llvm.experimental.constrained.fma.f64(double, double, double, metadata, metadata)