mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-23 04:16:42 +00:00
Add a new intrinsic: llvm.fmuladd. This intrinsic represents a multiply-add
expression (a * b + c) that can be implemented as a fused multiply-add (fma) if the target determines that this will be more efficient. This intrinsic will be used to implement FP_CONTRACT support and an aggressive FMA formation mode. If your target has a fast FMA instruction you should override the isFMAFasterThanMulAndAdd method in TargetLowering to return true. llvm-svn: 158014
This commit is contained in:
parent
00b608400c
commit
30d8fb4deb
@ -277,6 +277,11 @@
|
||||
<li><a href="#int_umul_overflow">'<tt>llvm.umul.with.overflow.*</tt> Intrinsics</a></li>
|
||||
</ol>
|
||||
</li>
|
||||
<li><a href="#spec_arithmetic">Specialised Arithmetic Intrinsics</a>
|
||||
<ol>
|
||||
<li><a href="#fmuladd">'<tt>llvm.fmuladd</tt> Intrinsic</a></li>
|
||||
</ol>
|
||||
</li>
|
||||
<li><a href="#int_fp16">Half Precision Floating Point Intrinsics</a>
|
||||
<ol>
|
||||
<li><a href="#int_convert_to_fp16">'<tt>llvm.convert.to.fp16</tt>' Intrinsic</a></li>
|
||||
@ -7945,6 +7950,52 @@ LLVM</a>.</p>
|
||||
|
||||
</div>
|
||||
|
||||
<!-- ======================================================================= -->
|
||||
<h3>
|
||||
<a name="spec_arithmetic">Specialised Arithmetic Intrinsics</a>
|
||||
</h3>
|
||||
|
||||
<!-- _______________________________________________________________________ -->
|
||||
|
||||
<h4>
|
||||
<a name="fmuladd">'<tt>llvm.fmuladd.*</tt>' Intrinsic</a>
|
||||
</h4>
|
||||
|
||||
<div>
|
||||
|
||||
<h5>Syntax:</h5>
|
||||
<pre>
|
||||
declare float @llvm.fmuladd.f32(float %a, float %b, float %c)
|
||||
declare double @llvm.fmuladd.f64(double %a, double %b, double %c)
|
||||
</pre>
|
||||
|
||||
<h5>Overview:</h5>
|
||||
<p>The '<tt>llvm.fmuladd.*</tt>' intrinsic functions represent multiply-add
|
||||
expressions that can be fused if the code generator determines that the fused
|
||||
expression would be legal and efficient.</p>
|
||||
|
||||
<h5>Arguments:</h5>
|
||||
<p>The '<tt>llvm.fmuladd.*</tt>' intrinsics each take three arguments: two
|
||||
multiplicands, a and b, and an addend c.</p>
|
||||
|
||||
<h5>Semantics:</h5>
|
||||
<p>The expression:</p>
|
||||
<pre>
|
||||
%0 = call float @llvm.fmuladd.f32(%a, %b, %c)
|
||||
</pre>
|
||||
<p>is equivalent to the expression a * b + c, except that rounding will not be
|
||||
performed between the multiplication and addition steps if the code generator
|
||||
fuses the operations. Fusion is not guaranteed, even if the target platform
|
||||
supports it. If a fused multiply-add is required the corresponding llvm.fma.*
|
||||
intrinsic function should be used instead.</p>
|
||||
|
||||
<h5>Examples:</h5>
|
||||
<pre>
|
||||
%r2 = call float @llvm.fmuladd.f32(float %a, float %b, float %c) ; yields {float}:r2 = (a * b) + c
|
||||
</pre>
|
||||
|
||||
</div>
|
||||
|
||||
<!-- ======================================================================= -->
|
||||
<h3>
|
||||
<a name="int_fp16">Half Precision Floating Point Intrinsics</a>
|
||||
|
@ -266,6 +266,10 @@ let Properties = [IntrNoMem] in {
|
||||
def int_fma : Intrinsic<[llvm_anyfloat_ty],
|
||||
[LLVMMatchType<0>, LLVMMatchType<0>,
|
||||
LLVMMatchType<0>]>;
|
||||
|
||||
def int_fmuladd : Intrinsic<[llvm_anyfloat_ty],
|
||||
[LLVMMatchType<0>, LLVMMatchType<0>,
|
||||
LLVMMatchType<0>]>;
|
||||
}
|
||||
|
||||
// NOTE: these are internal interfaces.
|
||||
|
@ -1657,6 +1657,14 @@ public:
|
||||
return false;
|
||||
}
|
||||
|
||||
/// isFMAFasterThanMulAndAdd - Return true if an FMA operation is faster than
|
||||
/// a pair of mul and add instructions. fmuladd intrinsics will be expanded to
|
||||
/// FMAs when this method returns true (and FMAs are legal), otherwise fmuladd
|
||||
/// is expanded to mul + add.
|
||||
virtual bool isFMAFasterThanMulAndAdd(EVT) const {
|
||||
return false;
|
||||
}
|
||||
|
||||
/// isNarrowingProfitable - Return true if it's profitable to narrow
|
||||
/// operations of type VT1 to VT2. e.g. on x86, it's profitable to narrow
|
||||
/// from i32 to i8 but not from i32 to i16.
|
||||
|
@ -4932,6 +4932,27 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
|
||||
getValue(I.getArgOperand(1)),
|
||||
getValue(I.getArgOperand(2))));
|
||||
return 0;
|
||||
case Intrinsic::fmuladd: {
|
||||
EVT VT = TLI.getValueType(I.getType());
|
||||
if (TLI.isOperationLegal(ISD::FMA, VT) && TLI.isFMAFasterThanMulAndAdd(VT)){
|
||||
setValue(&I, DAG.getNode(ISD::FMA, dl,
|
||||
getValue(I.getArgOperand(0)).getValueType(),
|
||||
getValue(I.getArgOperand(0)),
|
||||
getValue(I.getArgOperand(1)),
|
||||
getValue(I.getArgOperand(2))));
|
||||
} else {
|
||||
SDValue Mul = DAG.getNode(ISD::FMUL, dl,
|
||||
getValue(I.getArgOperand(0)).getValueType(),
|
||||
getValue(I.getArgOperand(0)),
|
||||
getValue(I.getArgOperand(1)));
|
||||
SDValue Add = DAG.getNode(ISD::FADD, dl,
|
||||
getValue(I.getArgOperand(0)).getValueType(),
|
||||
Mul,
|
||||
getValue(I.getArgOperand(2)));
|
||||
setValue(&I, Add);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
case Intrinsic::convert_to_fp16:
|
||||
setValue(&I, DAG.getNode(ISD::FP32_TO_FP16, dl,
|
||||
MVT::i16, getValue(I.getArgOperand(0))));
|
||||
|
Loading…
x
Reference in New Issue
Block a user