Add a new intrinsic: llvm.fmuladd. This intrinsic represents a multiply-add

expression (a * b + c) that can be implemented as a fused multiply-add (fma)
if the target determines that this will be more efficient. This intrinsic
will be used to implement FP_CONTRACT support and an aggressive FMA formation
mode.

If your target has a fast FMA instruction you should override the
isFMAFasterThanMulAndAdd method in TargetLowering to return true.

llvm-svn: 158014
This commit is contained in:
Lang Hames 2012-06-05 19:07:46 +00:00
parent 00b608400c
commit 30d8fb4deb
4 changed files with 84 additions and 0 deletions

View File

@ -277,6 +277,11 @@
<li><a href="#int_umul_overflow">'<tt>llvm.umul.with.overflow.*</tt> Intrinsics</a></li>
</ol>
</li>
<li><a href="#spec_arithmetic">Specialised Arithmetic Intrinsics</a>
<ol>
<li><a href="#fmuladd">'<tt>llvm.fmuladd</tt> Intrinsic</a></li>
</ol>
</li>
<li><a href="#int_fp16">Half Precision Floating Point Intrinsics</a>
<ol>
<li><a href="#int_convert_to_fp16">'<tt>llvm.convert.to.fp16</tt>' Intrinsic</a></li>
@ -7945,6 +7950,52 @@ LLVM</a>.</p>
</div>
<!-- ======================================================================= -->
<h3>
<a name="spec_arithmetic">Specialised Arithmetic Intrinsics</a>
</h3>
<!-- _______________________________________________________________________ -->
<h4>
<a name="fmuladd">'<tt>llvm.fmuladd.*</tt>' Intrinsic</a>
</h4>
<div>
<h5>Syntax:</h5>
<pre>
declare float @llvm.fmuladd.f32(float %a, float %b, float %c)
declare double @llvm.fmuladd.f64(double %a, double %b, double %c)
</pre>
<h5>Overview:</h5>
<p>The '<tt>llvm.fmuladd.*</tt>' intrinsic functions represent multiply-add
expressions that can be fused if the code generator determines that the fused
expression would be legal and efficient.</p>
<h5>Arguments:</h5>
<p>The '<tt>llvm.fmuladd.*</tt>' intrinsics each take three arguments: two
multiplicands, a and b, and an addend c.</p>
<h5>Semantics:</h5>
<p>The expression:</p>
<pre>
%0 = call float @llvm.fmuladd.f32(%a, %b, %c)
</pre>
<p>is equivalent to the expression a * b + c, except that rounding will not be
performed between the multiplication and addition steps if the code generator
fuses the operations. Fusion is not guaranteed, even if the target platform
supports it. If a fused multiply-add is required the corresponding llvm.fma.*
intrinsic function should be used instead.</p>
<h5>Examples:</h5>
<pre>
%r2 = call float @llvm.fmuladd.f32(float %a, float %b, float %c) ; yields {float}:r2 = (a * b) + c
</pre>
</div>
<!-- ======================================================================= -->
<h3>
<a name="int_fp16">Half Precision Floating Point Intrinsics</a>

View File

@ -266,6 +266,10 @@ let Properties = [IntrNoMem] in {
def int_fma : Intrinsic<[llvm_anyfloat_ty],
[LLVMMatchType<0>, LLVMMatchType<0>,
LLVMMatchType<0>]>;
def int_fmuladd : Intrinsic<[llvm_anyfloat_ty],
[LLVMMatchType<0>, LLVMMatchType<0>,
LLVMMatchType<0>]>;
}
// NOTE: these are internal interfaces.

View File

@ -1657,6 +1657,14 @@ public:
return false;
}
/// isFMAFasterThanMulAndAdd - Return true if an FMA operation is faster than
/// a pair of mul and add instructions. fmuladd intrinsics will be expanded to
/// FMAs when this method returns true (and FMAs are legal), otherwise fmuladd
/// is expanded to mul + add.
virtual bool isFMAFasterThanMulAndAdd(EVT) const {
return false;
}
/// isNarrowingProfitable - Return true if it's profitable to narrow
/// operations of type VT1 to VT2. e.g. on x86, it's profitable to narrow
/// from i32 to i8 but not from i32 to i16.

View File

@ -4932,6 +4932,27 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
getValue(I.getArgOperand(1)),
getValue(I.getArgOperand(2))));
return 0;
case Intrinsic::fmuladd: {
EVT VT = TLI.getValueType(I.getType());
if (TLI.isOperationLegal(ISD::FMA, VT) && TLI.isFMAFasterThanMulAndAdd(VT)){
setValue(&I, DAG.getNode(ISD::FMA, dl,
getValue(I.getArgOperand(0)).getValueType(),
getValue(I.getArgOperand(0)),
getValue(I.getArgOperand(1)),
getValue(I.getArgOperand(2))));
} else {
SDValue Mul = DAG.getNode(ISD::FMUL, dl,
getValue(I.getArgOperand(0)).getValueType(),
getValue(I.getArgOperand(0)),
getValue(I.getArgOperand(1)));
SDValue Add = DAG.getNode(ISD::FADD, dl,
getValue(I.getArgOperand(0)).getValueType(),
Mul,
getValue(I.getArgOperand(2)));
setValue(&I, Add);
}
return 0;
}
case Intrinsic::convert_to_fp16:
setValue(&I, DAG.getNode(ISD::FP32_TO_FP16, dl,
MVT::i16, getValue(I.getArgOperand(0))));