mirror of
https://github.com/RPCS3/llvm.git
synced 2024-12-27 06:35:30 +00:00
Use the new predicate support that Evan Cheng added to remove some code
from the DAGToDAG cpp file. This adds pattern support for vector and scalar fma, which passes test/Regression/CodeGen/PowerPC/fma.ll, and does the right thing in the presence of -disable-excess-fp-precision. Allows us to match: void %foo(<4 x float> * %a) { entry: %tmp1 = load <4 x float> * %a; %tmp2 = mul <4 x float> %tmp1, %tmp1 %tmp3 = add <4 x float> %tmp2, %tmp1 store <4 x float> %tmp3, <4 x float> *%a ret void } As: _foo: li r2, 0 lvx v0, r2, r3 vmaddfp v0, v0, v0, v0 stvx v0, r2, r3 blr Or, with llc -disable-excess-fp-precision, _foo: li r2, 0 lvx v0, r2, r3 vxor v1, v1, v1 vmaddfp v1, v0, v0, v1 vaddfp v0, v1, v0 stvx v0, r2, r3 blr git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@24719 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
1dae25e23a
commit
a07da92624
@ -885,53 +885,6 @@ SDOperand PPCDAGToDAGISel::Select(SDOperand Op) {
|
|||||||
CurDAG->getTargetFrameIndex(FI, MVT::i32),
|
CurDAG->getTargetFrameIndex(FI, MVT::i32),
|
||||||
getI32Imm(0));
|
getI32Imm(0));
|
||||||
}
|
}
|
||||||
case ISD::FADD: {
|
|
||||||
MVT::ValueType Ty = N->getValueType(0);
|
|
||||||
if (!NoExcessFPPrecision) { // Match FMA ops
|
|
||||||
if (N->getOperand(0).getOpcode() == ISD::FMUL &&
|
|
||||||
N->getOperand(0).Val->hasOneUse()) {
|
|
||||||
++FusedFP; // Statistic
|
|
||||||
return CurDAG->SelectNodeTo(N, Ty == MVT::f64 ? PPC::FMADD :PPC::FMADDS,
|
|
||||||
Ty, Select(N->getOperand(0).getOperand(0)),
|
|
||||||
Select(N->getOperand(0).getOperand(1)),
|
|
||||||
Select(N->getOperand(1)));
|
|
||||||
} else if (N->getOperand(1).getOpcode() == ISD::FMUL &&
|
|
||||||
N->getOperand(1).hasOneUse()) {
|
|
||||||
++FusedFP; // Statistic
|
|
||||||
return CurDAG->SelectNodeTo(N, Ty == MVT::f64 ? PPC::FMADD :PPC::FMADDS,
|
|
||||||
Ty, Select(N->getOperand(1).getOperand(0)),
|
|
||||||
Select(N->getOperand(1).getOperand(1)),
|
|
||||||
Select(N->getOperand(0)));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Other cases are autogenerated.
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case ISD::FSUB: {
|
|
||||||
MVT::ValueType Ty = N->getValueType(0);
|
|
||||||
|
|
||||||
if (!NoExcessFPPrecision) { // Match FMA ops
|
|
||||||
if (N->getOperand(0).getOpcode() == ISD::FMUL &&
|
|
||||||
N->getOperand(0).Val->hasOneUse()) {
|
|
||||||
++FusedFP; // Statistic
|
|
||||||
return CurDAG->SelectNodeTo(N, Ty == MVT::f64 ? PPC::FMSUB:PPC::FMSUBS,
|
|
||||||
Ty, Select(N->getOperand(0).getOperand(0)),
|
|
||||||
Select(N->getOperand(0).getOperand(1)),
|
|
||||||
Select(N->getOperand(1)));
|
|
||||||
} else if (N->getOperand(1).getOpcode() == ISD::FMUL &&
|
|
||||||
N->getOperand(1).Val->hasOneUse()) {
|
|
||||||
++FusedFP; // Statistic
|
|
||||||
return CurDAG->SelectNodeTo(N, Ty == MVT::f64 ?PPC::FNMSUB:PPC::FNMSUBS,
|
|
||||||
Ty, Select(N->getOperand(1).getOperand(0)),
|
|
||||||
Select(N->getOperand(1).getOperand(1)),
|
|
||||||
Select(N->getOperand(0)));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Other cases are autogenerated.
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case ISD::SDIV: {
|
case ISD::SDIV: {
|
||||||
// FIXME: since this depends on the setting of the carry flag from the srawi
|
// FIXME: since this depends on the setting of the carry flag from the srawi
|
||||||
// we should really be making notes about that for the scheduler.
|
// we should really be making notes about that for the scheduler.
|
||||||
|
@ -168,7 +168,7 @@ def crbitm: Operand<i8> {
|
|||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
// PowerPC Instruction Predicate Definitions.
|
// PowerPC Instruction Predicate Definitions.
|
||||||
def FPContractions : Predicate<"!NoExcessFPPrecision">;
|
def FPContractions : Predicate<"NoExcessFPPrecision">;
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
// PowerPC Instruction Definitions.
|
// PowerPC Instruction Definitions.
|
||||||
@ -746,22 +746,26 @@ def FNMADD : AForm_1<63, 31,
|
|||||||
(ops F8RC:$FRT, F8RC:$FRA, F8RC:$FRC, F8RC:$FRB),
|
(ops F8RC:$FRT, F8RC:$FRA, F8RC:$FRC, F8RC:$FRB),
|
||||||
"fnmadd $FRT, $FRA, $FRC, $FRB", FPFused,
|
"fnmadd $FRT, $FRA, $FRC, $FRB", FPFused,
|
||||||
[(set F8RC:$FRT, (fneg (fadd (fmul F8RC:$FRA, F8RC:$FRC),
|
[(set F8RC:$FRT, (fneg (fadd (fmul F8RC:$FRA, F8RC:$FRC),
|
||||||
F8RC:$FRB)))]>;
|
F8RC:$FRB)))]>,
|
||||||
|
Requires<[FPContractions]>;
|
||||||
def FNMADDS : AForm_1<59, 31,
|
def FNMADDS : AForm_1<59, 31,
|
||||||
(ops F4RC:$FRT, F4RC:$FRA, F4RC:$FRC, F4RC:$FRB),
|
(ops F4RC:$FRT, F4RC:$FRA, F4RC:$FRC, F4RC:$FRB),
|
||||||
"fnmadds $FRT, $FRA, $FRC, $FRB", FPGeneral,
|
"fnmadds $FRT, $FRA, $FRC, $FRB", FPGeneral,
|
||||||
[(set F4RC:$FRT, (fneg (fadd (fmul F4RC:$FRA, F4RC:$FRC),
|
[(set F4RC:$FRT, (fneg (fadd (fmul F4RC:$FRA, F4RC:$FRC),
|
||||||
F4RC:$FRB)))]>;
|
F4RC:$FRB)))]>,
|
||||||
|
Requires<[FPContractions]>;
|
||||||
def FNMSUB : AForm_1<63, 30,
|
def FNMSUB : AForm_1<63, 30,
|
||||||
(ops F8RC:$FRT, F8RC:$FRA, F8RC:$FRC, F8RC:$FRB),
|
(ops F8RC:$FRT, F8RC:$FRA, F8RC:$FRC, F8RC:$FRB),
|
||||||
"fnmsub $FRT, $FRA, $FRC, $FRB", FPFused,
|
"fnmsub $FRT, $FRA, $FRC, $FRB", FPFused,
|
||||||
[(set F8RC:$FRT, (fneg (fsub (fmul F8RC:$FRA, F8RC:$FRC),
|
[(set F8RC:$FRT, (fneg (fsub (fmul F8RC:$FRA, F8RC:$FRC),
|
||||||
F8RC:$FRB)))]>;
|
F8RC:$FRB)))]>,
|
||||||
|
Requires<[FPContractions]>;
|
||||||
def FNMSUBS : AForm_1<59, 30,
|
def FNMSUBS : AForm_1<59, 30,
|
||||||
(ops F4RC:$FRT, F4RC:$FRA, F4RC:$FRC, F4RC:$FRB),
|
(ops F4RC:$FRT, F4RC:$FRA, F4RC:$FRC, F4RC:$FRB),
|
||||||
"fnmsubs $FRT, $FRA, $FRC, $FRB", FPGeneral,
|
"fnmsubs $FRT, $FRA, $FRC, $FRB", FPGeneral,
|
||||||
[(set F4RC:$FRT, (fneg (fsub (fmul F4RC:$FRA, F4RC:$FRC),
|
[(set F4RC:$FRT, (fneg (fsub (fmul F4RC:$FRA, F4RC:$FRC),
|
||||||
F4RC:$FRB)))]>;
|
F4RC:$FRB)))]>,
|
||||||
|
Requires<[FPContractions]>;
|
||||||
// FSEL is artificially split into 4 and 8-byte forms for the result. To avoid
|
// FSEL is artificially split into 4 and 8-byte forms for the result. To avoid
|
||||||
// having 4 of these, force the comparison to always be an 8-byte double (code
|
// having 4 of these, force the comparison to always be an 8-byte double (code
|
||||||
// should use an FMRSD if the input comparison value really wants to be a float)
|
// should use an FMRSD if the input comparison value really wants to be a float)
|
||||||
@ -848,12 +852,14 @@ def RLDICR : MDForm_1<30, 1,
|
|||||||
def VMADDFP : VAForm_1<46, (ops VRRC:$vD, VRRC:$vA, VRRC:$vB, VRRC:$vC),
|
def VMADDFP : VAForm_1<46, (ops VRRC:$vD, VRRC:$vA, VRRC:$vB, VRRC:$vC),
|
||||||
"vmaddfp $vD, $vA, $vC, $vB", VecFP,
|
"vmaddfp $vD, $vA, $vC, $vB", VecFP,
|
||||||
[(set VRRC:$vD, (fadd (fmul VRRC:$vA, VRRC:$vC),
|
[(set VRRC:$vD, (fadd (fmul VRRC:$vA, VRRC:$vC),
|
||||||
VRRC:$vB))]>;
|
VRRC:$vB))]>,
|
||||||
|
Requires<[FPContractions]>;
|
||||||
def VNMSUBFP: VAForm_1<47, (ops VRRC:$vD, VRRC:$vA, VRRC:$vB, VRRC:$vC),
|
def VNMSUBFP: VAForm_1<47, (ops VRRC:$vD, VRRC:$vA, VRRC:$vB, VRRC:$vC),
|
||||||
"vnmsubfp $vD, $vA, $vC, $vB", VecFP,
|
"vnmsubfp $vD, $vA, $vC, $vB", VecFP,
|
||||||
[(set VRRC:$vD, (fneg (fsub (fmul VRRC:$vA,
|
[(set VRRC:$vD, (fneg (fsub (fmul VRRC:$vA,
|
||||||
VRRC:$vC),
|
VRRC:$vC),
|
||||||
VRRC:$vB)))]>;
|
VRRC:$vB)))]>,
|
||||||
|
Requires<[FPContractions]>;
|
||||||
|
|
||||||
// VX-Form instructions. AltiVec arithmetic ops.
|
// VX-Form instructions. AltiVec arithmetic ops.
|
||||||
def VADDFP : VXForm_1<10, (ops VRRC:$vD, VRRC:$vA, VRRC:$vB),
|
def VADDFP : VXForm_1<10, (ops VRRC:$vD, VRRC:$vA, VRRC:$vB),
|
||||||
@ -971,6 +977,14 @@ def : Pat<(add GPRC:$in, (PPChi tconstpool:$g, 0)),
|
|||||||
def : Pat<(fmul VRRC:$vA, VRRC:$vB),
|
def : Pat<(fmul VRRC:$vA, VRRC:$vB),
|
||||||
(VMADDFP VRRC:$vA, (V_SET0), VRRC:$vB)>;
|
(VMADDFP VRRC:$vA, (V_SET0), VRRC:$vB)>;
|
||||||
|
|
||||||
|
// Fused negative multiply subtract, alternate pattern
|
||||||
|
def : Pat<(fsub F8RC:$B, (fmul F8RC:$A, F8RC:$C)),
|
||||||
|
(FNMSUB F8RC:$A, F8RC:$C, F8RC:$B)>,
|
||||||
|
Requires<[FPContractions]>;
|
||||||
|
def : Pat<(fsub F4RC:$B, (fmul F4RC:$A, F4RC:$C)),
|
||||||
|
(FNMSUBS F4RC:$A, F4RC:$C, F4RC:$B)>,
|
||||||
|
Requires<[FPContractions]>;
|
||||||
|
|
||||||
// Fused multiply add and multiply sub for packed float. These are represented
|
// Fused multiply add and multiply sub for packed float. These are represented
|
||||||
// separately from the real instructions above, for operations that must have
|
// separately from the real instructions above, for operations that must have
|
||||||
// the additional precision, such as Newton-Rhapson (used by divide, sqrt)
|
// the additional precision, such as Newton-Rhapson (used by divide, sqrt)
|
||||||
|
Loading…
Reference in New Issue
Block a user