mirror of
https://github.com/RPCS3/llvm.git
synced 2025-01-17 23:44:43 +00:00
Add an intrinsic and codegen support for fused multiply-accumulate. The intent
is to use this for architectures that have a native FMA instruction. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@134742 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
cc0ddc707d
commit
33390848a7
@ -241,6 +241,7 @@
|
||||
<li><a href="#int_pow">'<tt>llvm.pow.*</tt>' Intrinsic</a></li>
|
||||
<li><a href="#int_exp">'<tt>llvm.exp.*</tt>' Intrinsic</a></li>
|
||||
<li><a href="#int_log">'<tt>llvm.log.*</tt>' Intrinsic</a></li>
|
||||
<li><a href="#int_fma">'<tt>llvm.fma.*</tt>' Intrinsic</a></li>
|
||||
</ol>
|
||||
</li>
|
||||
<li><a href="#int_manip">Bit Manipulation Intrinsics</a>
|
||||
@ -6570,6 +6571,37 @@ LLVM</a>.</p>
|
||||
<p>This function returns the same values as the libm <tt>log</tt> functions
|
||||
would, and handles error conditions in the same way.</p>
|
||||
|
||||
<h4>
|
||||
<a name="int_fma">'<tt>llvm.fma.*</tt>' Intrinsic</a>
|
||||
</h4>
|
||||
|
||||
<div>
|
||||
|
||||
<h5>Syntax:</h5>
|
||||
<p>This is an overloaded intrinsic. You can use <tt>llvm.fma</tt> on any
|
||||
floating point or vector of floating point type. Not all targets support all
|
||||
types however.</p>
|
||||
|
||||
<pre>
|
||||
declare float @llvm.fma.f32(float %a, float %b, float %c)
|
||||
declare double @llvm.fma.f64(double %a, double %b, double %c)
|
||||
declare x86_fp80 @llvm.fma.f80(x86_fp80 %a, x86_fp80 %b, x86_fp80 %c)
|
||||
declare fp128 @llvm.fma.f128(fp128 %a, fp128 %b, fp128 %c)
|
||||
declare ppc_fp128 @llvm.fma.ppcf128(ppc_fp128 %a, ppc_fp128 %b, ppc_fp128 %c)
|
||||
</pre>
|
||||
|
||||
<h5>Overview:</h5>
|
||||
<p>The '<tt>llvm.fma.*</tt>' intrinsics perform the fused multiply-accumulate
|
||||
operation.</p>
|
||||
|
||||
<h5>Arguments:</h5>
|
||||
<p>The argument and return value are floating point numbers of the same
|
||||
type.</p>
|
||||
|
||||
<h5>Semantics:</h5>
|
||||
<p>This function returns the same values as the libm <tt>fma</tt> functions
|
||||
would.</p>
|
||||
|
||||
</div>
|
||||
|
||||
<!-- ======================================================================= -->
|
||||
|
@ -232,7 +232,7 @@ namespace ISD {
|
||||
SMULO, UMULO,
|
||||
|
||||
// Simple binary floating point operators.
|
||||
FADD, FSUB, FMUL, FDIV, FREM,
|
||||
FADD, FSUB, FMUL, FMA, FDIV, FREM,
|
||||
|
||||
// FCOPYSIGN(X, Y) - Return the value of X with the sign of Y. NOTE: This
|
||||
// DAG node does not require that X and Y have the same type, just that they
|
||||
|
@ -103,6 +103,10 @@ namespace RTLIB {
|
||||
REM_F64,
|
||||
REM_F80,
|
||||
REM_PPCF128,
|
||||
FMA_F32,
|
||||
FMA_F64,
|
||||
FMA_F80,
|
||||
FMA_PPCF128,
|
||||
POWI_F32,
|
||||
POWI_F64,
|
||||
POWI_F80,
|
||||
|
@ -255,6 +255,12 @@ let Properties = [IntrReadMem] in {
|
||||
def int_exp2 : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
|
||||
}
|
||||
|
||||
let Properties = [IntrNoMem] in {
|
||||
def int_fma : Intrinsic<[llvm_anyfloat_ty],
|
||||
[LLVMMatchType<0>, LLVMMatchType<0>,
|
||||
LLVMMatchType<0>]>;
|
||||
}
|
||||
|
||||
// NOTE: these are internal interfaces.
|
||||
def int_setjmp : Intrinsic<[llvm_i32_ty], [llvm_ptr_ty]>;
|
||||
def int_longjmp : Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty]>;
|
||||
|
@ -3351,6 +3351,10 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
|
||||
Results.push_back(ExpandFPLibCall(Node, RTLIB::REM_F32, RTLIB::REM_F64,
|
||||
RTLIB::REM_F80, RTLIB::REM_PPCF128));
|
||||
break;
|
||||
case ISD::FMA:
|
||||
Results.push_back(ExpandFPLibCall(Node, RTLIB::FMA_F32, RTLIB::FMA_F64,
|
||||
RTLIB::FMA_F80, RTLIB::FMA_PPCF128));
|
||||
break;
|
||||
case ISD::FP16_TO_FP32:
|
||||
Results.push_back(ExpandLibCall(RTLIB::FPEXT_F16_F32, Node, false));
|
||||
break;
|
||||
|
@ -74,6 +74,7 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
|
||||
case ISD::FLOG: R = SoftenFloatRes_FLOG(N); break;
|
||||
case ISD::FLOG2: R = SoftenFloatRes_FLOG2(N); break;
|
||||
case ISD::FLOG10: R = SoftenFloatRes_FLOG10(N); break;
|
||||
case ISD::FMA: R = SoftenFloatRes_FMA(N); break;
|
||||
case ISD::FMUL: R = SoftenFloatRes_FMUL(N); break;
|
||||
case ISD::FNEARBYINT: R = SoftenFloatRes_FNEARBYINT(N); break;
|
||||
case ISD::FNEG: R = SoftenFloatRes_FNEG(N); break;
|
||||
@ -294,6 +295,19 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG10(SDNode *N) {
|
||||
NVT, &Op, 1, false, N->getDebugLoc());
|
||||
}
|
||||
|
||||
SDValue DAGTypeLegalizer::SoftenFloatRes_FMA(SDNode *N) {
|
||||
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
|
||||
SDValue Ops[3] = { GetSoftenedFloat(N->getOperand(0)),
|
||||
GetSoftenedFloat(N->getOperand(1)),
|
||||
GetSoftenedFloat(N->getOperand(2)) };
|
||||
return MakeLibCall(GetFPLibCall(N->getValueType(0),
|
||||
RTLIB::FMA_F32,
|
||||
RTLIB::FMA_F64,
|
||||
RTLIB::FMA_F80,
|
||||
RTLIB::FMA_PPCF128),
|
||||
NVT, Ops, 3, false, N->getDebugLoc());
|
||||
}
|
||||
|
||||
SDValue DAGTypeLegalizer::SoftenFloatRes_FMUL(SDNode *N) {
|
||||
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
|
||||
SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
|
||||
@ -837,6 +851,7 @@ void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) {
|
||||
case ISD::FLOG: ExpandFloatRes_FLOG(N, Lo, Hi); break;
|
||||
case ISD::FLOG2: ExpandFloatRes_FLOG2(N, Lo, Hi); break;
|
||||
case ISD::FLOG10: ExpandFloatRes_FLOG10(N, Lo, Hi); break;
|
||||
case ISD::FMA: ExpandFloatRes_FMA(N, Lo, Hi); break;
|
||||
case ISD::FMUL: ExpandFloatRes_FMUL(N, Lo, Hi); break;
|
||||
case ISD::FNEARBYINT: ExpandFloatRes_FNEARBYINT(N, Lo, Hi); break;
|
||||
case ISD::FNEG: ExpandFloatRes_FNEG(N, Lo, Hi); break;
|
||||
@ -989,6 +1004,19 @@ void DAGTypeLegalizer::ExpandFloatRes_FLOG10(SDNode *N,
|
||||
GetPairElements(Call, Lo, Hi);
|
||||
}
|
||||
|
||||
void DAGTypeLegalizer::ExpandFloatRes_FMA(SDNode *N, SDValue &Lo,
|
||||
SDValue &Hi) {
|
||||
SDValue Ops[3] = { N->getOperand(0), N->getOperand(1), N->getOperand(2) };
|
||||
SDValue Call = MakeLibCall(GetFPLibCall(N->getValueType(0),
|
||||
RTLIB::FMA_F32,
|
||||
RTLIB::FMA_F64,
|
||||
RTLIB::FMA_F80,
|
||||
RTLIB::FMA_PPCF128),
|
||||
N->getValueType(0), Ops, 3, false,
|
||||
N->getDebugLoc());
|
||||
GetPairElements(Call, Lo, Hi);
|
||||
}
|
||||
|
||||
void DAGTypeLegalizer::ExpandFloatRes_FMUL(SDNode *N, SDValue &Lo,
|
||||
SDValue &Hi) {
|
||||
SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
|
||||
|
@ -378,6 +378,7 @@ private:
|
||||
SDValue SoftenFloatRes_FLOG(SDNode *N);
|
||||
SDValue SoftenFloatRes_FLOG2(SDNode *N);
|
||||
SDValue SoftenFloatRes_FLOG10(SDNode *N);
|
||||
SDValue SoftenFloatRes_FMA(SDNode *N);
|
||||
SDValue SoftenFloatRes_FMUL(SDNode *N);
|
||||
SDValue SoftenFloatRes_FNEARBYINT(SDNode *N);
|
||||
SDValue SoftenFloatRes_FNEG(SDNode *N);
|
||||
@ -442,6 +443,7 @@ private:
|
||||
void ExpandFloatRes_FLOG (SDNode *N, SDValue &Lo, SDValue &Hi);
|
||||
void ExpandFloatRes_FLOG2 (SDNode *N, SDValue &Lo, SDValue &Hi);
|
||||
void ExpandFloatRes_FLOG10 (SDNode *N, SDValue &Lo, SDValue &Hi);
|
||||
void ExpandFloatRes_FMA (SDNode *N, SDValue &Lo, SDValue &Hi);
|
||||
void ExpandFloatRes_FMUL (SDNode *N, SDValue &Lo, SDValue &Hi);
|
||||
void ExpandFloatRes_FNEARBYINT(SDNode *N, SDValue &Lo, SDValue &Hi);
|
||||
void ExpandFloatRes_FNEG (SDNode *N, SDValue &Lo, SDValue &Hi);
|
||||
|
@ -5878,6 +5878,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
|
||||
case ISD::FSUB: return "fsub";
|
||||
case ISD::FMUL: return "fmul";
|
||||
case ISD::FDIV: return "fdiv";
|
||||
case ISD::FMA: return "fma";
|
||||
case ISD::FREM: return "frem";
|
||||
case ISD::FCOPYSIGN: return "fcopysign";
|
||||
case ISD::FGETSIGN: return "fgetsign";
|
||||
|
@ -4651,6 +4651,13 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
|
||||
case Intrinsic::pow:
|
||||
visitPow(I);
|
||||
return 0;
|
||||
case Intrinsic::fma:
|
||||
setValue(&I, DAG.getNode(ISD::FMA, dl,
|
||||
getValue(I.getArgOperand(0)).getValueType(),
|
||||
getValue(I.getArgOperand(0)),
|
||||
getValue(I.getArgOperand(1)),
|
||||
getValue(I.getArgOperand(2))));
|
||||
return 0;
|
||||
case Intrinsic::convert_to_fp16:
|
||||
setValue(&I, DAG.getNode(ISD::FP32_TO_FP16, dl,
|
||||
MVT::i16, getValue(I.getArgOperand(0))));
|
||||
|
@ -139,6 +139,10 @@ static void InitLibcallNames(const char **Names) {
|
||||
Names[RTLIB::REM_F64] = "fmod";
|
||||
Names[RTLIB::REM_F80] = "fmodl";
|
||||
Names[RTLIB::REM_PPCF128] = "fmodl";
|
||||
Names[RTLIB::FMA_F32] = "fmaf";
|
||||
Names[RTLIB::FMA_F64] = "fma";
|
||||
Names[RTLIB::FMA_F80] = "fmal";
|
||||
Names[RTLIB::FMA_PPCF128] = "fmal";
|
||||
Names[RTLIB::POWI_F32] = "__powisf2";
|
||||
Names[RTLIB::POWI_F64] = "__powidf2";
|
||||
Names[RTLIB::POWI_F80] = "__powixf2";
|
||||
|
@ -708,6 +708,9 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
|
||||
setOperationAction(ISD::FPOW, MVT::f64, Expand);
|
||||
setOperationAction(ISD::FPOW, MVT::f32, Expand);
|
||||
|
||||
setOperationAction(ISD::FMA, MVT::f64, Expand);
|
||||
setOperationAction(ISD::FMA, MVT::f32, Expand);
|
||||
|
||||
// Various VFP goodness
|
||||
if (!UseSoftFloat && !Subtarget->isThumb1Only()) {
|
||||
// int <-> fp are custom expanded into bit_convert + ARMISD ops.
|
||||
|
@ -122,6 +122,9 @@ AlphaTargetLowering::AlphaTargetLowering(TargetMachine &TM)
|
||||
setOperationAction(ISD::FPOW , MVT::f32, Expand);
|
||||
setOperationAction(ISD::FPOW , MVT::f64, Expand);
|
||||
|
||||
setOperationAction(ISD::FMA, MVT::f64, Expand);
|
||||
setOperationAction(ISD::FMA, MVT::f32, Expand);
|
||||
|
||||
setOperationAction(ISD::SETCC, MVT::f32, Promote);
|
||||
|
||||
setOperationAction(ISD::BITCAST, MVT::f32, Promote);
|
||||
|
@ -221,6 +221,9 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
|
||||
setOperationAction(ISD::FSQRT, MVT::f64, Expand);
|
||||
setOperationAction(ISD::FSQRT, MVT::f32, Expand);
|
||||
|
||||
setOperationAction(ISD::FMA, MVT::f64, Expand);
|
||||
setOperationAction(ISD::FMA, MVT::f32, Expand);
|
||||
|
||||
setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
|
||||
setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
|
||||
|
||||
|
@ -69,6 +69,7 @@ MBlazeTargetLowering::MBlazeTargetLowering(MBlazeTargetMachine &TM)
|
||||
|
||||
// Floating point operations which are not supported
|
||||
setOperationAction(ISD::FREM, MVT::f32, Expand);
|
||||
setOperationAction(ISD::FMA, MVT::f32, Expand);
|
||||
setOperationAction(ISD::UINT_TO_FP, MVT::i8, Expand);
|
||||
setOperationAction(ISD::UINT_TO_FP, MVT::i16, Expand);
|
||||
setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
|
||||
|
@ -146,6 +146,8 @@ MipsTargetLowering(MipsTargetMachine &TM)
|
||||
setOperationAction(ISD::FLOG2, MVT::f32, Expand);
|
||||
setOperationAction(ISD::FLOG10, MVT::f32, Expand);
|
||||
setOperationAction(ISD::FEXP, MVT::f32, Expand);
|
||||
setOperationAction(ISD::FMA, MVT::f32, Expand);
|
||||
setOperationAction(ISD::FMA, MVT::f64, Expand);
|
||||
|
||||
setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand);
|
||||
setOperationAction(ISD::EHSELECTION, MVT::i32, Expand);
|
||||
|
@ -125,10 +125,12 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
|
||||
setOperationAction(ISD::FCOS , MVT::f64, Expand);
|
||||
setOperationAction(ISD::FREM , MVT::f64, Expand);
|
||||
setOperationAction(ISD::FPOW , MVT::f64, Expand);
|
||||
setOperationAction(ISD::FMA , MVT::f64, Expand);
|
||||
setOperationAction(ISD::FSIN , MVT::f32, Expand);
|
||||
setOperationAction(ISD::FCOS , MVT::f32, Expand);
|
||||
setOperationAction(ISD::FREM , MVT::f32, Expand);
|
||||
setOperationAction(ISD::FPOW , MVT::f32, Expand);
|
||||
setOperationAction(ISD::FMA , MVT::f32, Expand);
|
||||
|
||||
setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
|
||||
|
||||
|
@ -754,9 +754,11 @@ SparcTargetLowering::SparcTargetLowering(TargetMachine &TM)
|
||||
setOperationAction(ISD::FSIN , MVT::f64, Expand);
|
||||
setOperationAction(ISD::FCOS , MVT::f64, Expand);
|
||||
setOperationAction(ISD::FREM , MVT::f64, Expand);
|
||||
setOperationAction(ISD::FMA , MVT::f64, Expand);
|
||||
setOperationAction(ISD::FSIN , MVT::f32, Expand);
|
||||
setOperationAction(ISD::FCOS , MVT::f32, Expand);
|
||||
setOperationAction(ISD::FREM , MVT::f32, Expand);
|
||||
setOperationAction(ISD::FMA , MVT::f32, Expand);
|
||||
setOperationAction(ISD::CTPOP, MVT::i32, Expand);
|
||||
setOperationAction(ISD::CTTZ , MVT::i32, Expand);
|
||||
setOperationAction(ISD::CTLZ , MVT::i32, Expand);
|
||||
|
@ -142,6 +142,8 @@ SystemZTargetLowering::SystemZTargetLowering(SystemZTargetMachine &tm) :
|
||||
setOperationAction(ISD::FCOS, MVT::f64, Expand);
|
||||
setOperationAction(ISD::FREM, MVT::f32, Expand);
|
||||
setOperationAction(ISD::FREM, MVT::f64, Expand);
|
||||
setOperationAction(ISD::FMA, MVT::f32, Expand);
|
||||
setOperationAction(ISD::FMA, MVT::f64, Expand);
|
||||
|
||||
// We have only 64-bit bitconverts
|
||||
setOperationAction(ISD::BITCAST, MVT::f32, Expand);
|
||||
|
@ -646,6 +646,10 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
|
||||
addLegalFPImmediate(APFloat(-1.0f)); // FLD1/FCHS
|
||||
}
|
||||
|
||||
// We don't support FMA.
|
||||
setOperationAction(ISD::FMA, MVT::f64, Expand);
|
||||
setOperationAction(ISD::FMA, MVT::f32, Expand);
|
||||
|
||||
// Long double always uses X87.
|
||||
if (!UseSoftFloat) {
|
||||
addRegisterClass(MVT::f80, X86::RFP80RegisterClass);
|
||||
@ -670,6 +674,8 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
|
||||
setOperationAction(ISD::FSIN , MVT::f80 , Expand);
|
||||
setOperationAction(ISD::FCOS , MVT::f80 , Expand);
|
||||
}
|
||||
|
||||
setOperationAction(ISD::FMA, MVT::f80, Expand);
|
||||
}
|
||||
|
||||
// Always use a library call for pow.
|
||||
|
33
test/CodeGen/X86/fma.ll
Normal file
33
test/CodeGen/X86/fma.ll
Normal file
@ -0,0 +1,33 @@
|
||||
; RUN: llc < %s -mtriple=i386-apple-darwin10 | FileCheck %s
|
||||
; RUN: llc < %s -mtriple=x86_64-apple-darwin10 | FileCheck %s
|
||||
|
||||
; CHECK: test_f32
|
||||
; CHECK: _fmaf
|
||||
|
||||
define float @test_f32(float %a, float %b, float %c) nounwind readnone ssp {
|
||||
entry:
|
||||
%call = tail call float @llvm.fma.f32(float %a, float %b, float %c) nounwind readnone
|
||||
ret float %call
|
||||
}
|
||||
|
||||
; CHECK: test_f64
|
||||
; CHECK: _fma
|
||||
|
||||
define double @test_f64(double %a, double %b, double %c) nounwind readnone ssp {
|
||||
entry:
|
||||
%call = tail call double @llvm.fma.f64(double %a, double %b, double %c) nounwind readnone
|
||||
ret double %call
|
||||
}
|
||||
|
||||
; CHECK: test_f80
|
||||
; CHECK: _fmal
|
||||
|
||||
define x86_fp80 @test_f80(x86_fp80 %a, x86_fp80 %b, x86_fp80 %c) nounwind readnone ssp {
|
||||
entry:
|
||||
%call = tail call x86_fp80 @llvm.fma.f80(x86_fp80 %a, x86_fp80 %b, x86_fp80 %c) nounwind readnone
|
||||
ret x86_fp80 %call
|
||||
}
|
||||
|
||||
declare float @llvm.fma.f32(float, float, float) nounwind readnone
|
||||
declare double @llvm.fma.f64(double, double, double) nounwind readnone
|
||||
declare x86_fp80 @llvm.fma.f80(x86_fp80, x86_fp80, x86_fp80) nounwind readnone
|
Loading…
x
Reference in New Issue
Block a user