mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-02-09 22:04:10 +00:00
Utilize new SDNode flag functionality to expand current support for fma
Summary: This patch originated from D47388 and is a proper subset of the originating changes, containing only the fmf optimization guard extensions. Reviewers: spatel, hfinkel, wristow, arsenm, javed.absar, rampitec, nhaehnle, nemanjai Reviewed By: rampitec, nhaehnle Subscribers: tpr, nemanjai, wdng Differential Revision: https://reviews.llvm.org/D47918 llvm-svn: 334876
This commit is contained in:
parent
91fb960d92
commit
2600a27111
@ -9699,8 +9699,9 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
|
||||
return SDValue();
|
||||
|
||||
SDNodeFlags Flags = N->getFlags();
|
||||
bool CanFuse = Options.UnsafeFPMath || isContractable(N);
|
||||
bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
|
||||
Options.UnsafeFPMath || HasFMAD);
|
||||
CanFuse || HasFMAD);
|
||||
// If the addition is not contractable, do not combine.
|
||||
if (!AllowFusionGlobally && !isContractable(N))
|
||||
return SDValue();
|
||||
@ -9772,9 +9773,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
|
||||
// More folding opportunities when target permits.
|
||||
if (Aggressive) {
|
||||
// fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z))
|
||||
// FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF
|
||||
// are currently only supported on binary nodes.
|
||||
if (Options.UnsafeFPMath &&
|
||||
if (CanFuse &&
|
||||
N0.getOpcode() == PreferredFusedOpcode &&
|
||||
N0.getOperand(2).getOpcode() == ISD::FMUL &&
|
||||
N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) {
|
||||
@ -9787,9 +9786,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
|
||||
}
|
||||
|
||||
// fold (fadd x, (fma y, z, (fmul u, v)) -> (fma y, z (fma u, v, x))
|
||||
// FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF
|
||||
// are currently only supported on binary nodes.
|
||||
if (Options.UnsafeFPMath &&
|
||||
if (CanFuse &&
|
||||
N1->getOpcode() == PreferredFusedOpcode &&
|
||||
N1.getOperand(2).getOpcode() == ISD::FMUL &&
|
||||
N1->hasOneUse() && N1.getOperand(2)->hasOneUse()) {
|
||||
@ -9913,8 +9910,9 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
|
||||
return SDValue();
|
||||
|
||||
const SDNodeFlags Flags = N->getFlags();
|
||||
bool CanFuse = Options.UnsafeFPMath || isContractable(N);
|
||||
bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
|
||||
Options.UnsafeFPMath || HasFMAD);
|
||||
CanFuse || HasFMAD);
|
||||
|
||||
// If the subtraction is not contractable, do not combine.
|
||||
if (!AllowFusionGlobally && !isContractable(N))
|
||||
@ -9945,11 +9943,12 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
|
||||
|
||||
// fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
|
||||
// Note: Commutes FSUB operands.
|
||||
if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse()))
|
||||
if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) {
|
||||
return DAG.getNode(PreferredFusedOpcode, SL, VT,
|
||||
DAG.getNode(ISD::FNEG, SL, VT,
|
||||
N1.getOperand(0)),
|
||||
N1.getOperand(1), N0, Flags);
|
||||
}
|
||||
|
||||
// fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
|
||||
if (N0.getOpcode() == ISD::FNEG && isContractableFMUL(N0.getOperand(0)) &&
|
||||
@ -10045,9 +10044,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
|
||||
if (Aggressive) {
|
||||
// fold (fsub (fma x, y, (fmul u, v)), z)
|
||||
// -> (fma x, y (fma u, v, (fneg z)))
|
||||
// FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF
|
||||
// are currently only supported on binary nodes.
|
||||
if (Options.UnsafeFPMath && N0.getOpcode() == PreferredFusedOpcode &&
|
||||
if (CanFuse && N0.getOpcode() == PreferredFusedOpcode &&
|
||||
isContractableFMUL(N0.getOperand(2)) && N0->hasOneUse() &&
|
||||
N0.getOperand(2)->hasOneUse()) {
|
||||
return DAG.getNode(PreferredFusedOpcode, SL, VT,
|
||||
@ -10061,9 +10058,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
|
||||
|
||||
// fold (fsub x, (fma y, z, (fmul u, v)))
|
||||
// -> (fma (fneg y), z, (fma (fneg u), v, x))
|
||||
// FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF
|
||||
// are currently only supported on binary nodes.
|
||||
if (Options.UnsafeFPMath && N1.getOpcode() == PreferredFusedOpcode &&
|
||||
if (CanFuse && N1.getOpcode() == PreferredFusedOpcode &&
|
||||
isContractableFMUL(N1.getOperand(2))) {
|
||||
SDValue N20 = N1.getOperand(2).getOperand(0);
|
||||
SDValue N21 = N1.getOperand(2).getOperand(1);
|
||||
@ -10687,6 +10682,7 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {
|
||||
|
||||
// FMA nodes have flags that propagate to the created nodes.
|
||||
const SDNodeFlags Flags = N->getFlags();
|
||||
bool UnsafeFPMath = Options.UnsafeFPMath || isContractable(N);
|
||||
|
||||
// Constant fold FMA.
|
||||
if (isa<ConstantFPSDNode>(N0) &&
|
||||
@ -10695,7 +10691,7 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {
|
||||
return DAG.getNode(ISD::FMA, DL, VT, N0, N1, N2);
|
||||
}
|
||||
|
||||
if (Options.UnsafeFPMath) {
|
||||
if (UnsafeFPMath) {
|
||||
if (N0CFP && N0CFP->isZero())
|
||||
return N2;
|
||||
if (N1CFP && N1CFP->isZero())
|
||||
@ -10712,7 +10708,7 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {
|
||||
!isConstantFPBuildVectorOrConstantFP(N1))
|
||||
return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2);
|
||||
|
||||
if (Options.UnsafeFPMath) {
|
||||
if (UnsafeFPMath) {
|
||||
// (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
|
||||
if (N2.getOpcode() == ISD::FMUL && N0 == N2.getOperand(0) &&
|
||||
isConstantFPBuildVectorOrConstantFP(N1) &&
|
||||
@ -10758,7 +10754,7 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {
|
||||
}
|
||||
}
|
||||
|
||||
if (Options.UnsafeFPMath) {
|
||||
if (UnsafeFPMath) {
|
||||
// (fma x, c, x) -> (fmul x, (c+1))
|
||||
if (N1CFP && N0 == N2) {
|
||||
return DAG.getNode(ISD::FMUL, DL, VT, N0,
|
||||
|
@ -12,8 +12,7 @@ define <2 x float> @fma_1(<2 x float> %A, <2 x float> %B, <2 x float> %C) {
|
||||
; the contract on the fadd
|
||||
define <2 x float> @fma_2(<2 x float> %A, <2 x float> %B, <2 x float> %C) {
|
||||
; CHECK-LABEL: fma_2:
|
||||
; CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
|
||||
; CHECK: fadd {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
|
||||
; CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
|
||||
%tmp1 = fmul <2 x float> %A, %B;
|
||||
%tmp2 = fadd contract <2 x float> %C, %tmp1;
|
||||
ret <2 x float> %tmp2
|
||||
@ -40,8 +39,7 @@ define <2 x float> @fma_sub_1(<2 x float> %A, <2 x float> %B, <2 x float> %C) {
|
||||
; the contract on the fsub
|
||||
define <2 x float> @fma_sub_2(<2 x float> %A, <2 x float> %B, <2 x float> %C) {
|
||||
; CHECK-LABEL: fma_sub_2:
|
||||
; CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
|
||||
; CHECK: fsub {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
|
||||
; CHECK: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
|
||||
%tmp1 = fmul <2 x float> %A, %B;
|
||||
%tmp2 = fsub contract <2 x float> %C, %tmp1;
|
||||
ret <2 x float> %tmp2
|
||||
|
@ -22,10 +22,10 @@ define float @can_fma_with_fewer_uses(float %f1, float %f2, float %f3, float %f4
|
||||
define float @no_fma_with_fewer_uses(float %f1, float %f2, float %f3, float %f4) {
|
||||
; CHECK-LABEL: no_fma_with_fewer_uses:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: xsmulsp 0, 3, 4
|
||||
; CHECK-NEXT: xsmulsp 13, 1, 2
|
||||
; CHECK-NEXT: xsmaddasp 0, 1, 2
|
||||
; CHECK-NEXT: xsdivsp 1, 13, 0
|
||||
; CHECK-NEXT: xsmulsp 0, 1, 2
|
||||
; CHECK-NEXT: fmr 1, 0
|
||||
; CHECK-NEXT: xsmaddasp 1, 3, 4
|
||||
; CHECK-NEXT: xsdivsp 1, 0, 1
|
||||
; CHECK-NEXT: blr
|
||||
%mul1 = fmul contract float %f1, %f2
|
||||
%mul2 = fmul float %f3, %f4
|
||||
|
@ -15,15 +15,14 @@ declare float @llvm.sqrt.f32(float)
|
||||
; X * Y + Z --> fma(X, Y, Z)
|
||||
|
||||
; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fadd_contract1:'
|
||||
; FMFDEBUG: fmul {{t[0-9]+}}, {{t[0-9]+}}
|
||||
; FMFDEBUG: fadd contract {{t[0-9]+}}, {{t[0-9]+}}
|
||||
; FMFDEBUG: fma contract {{t[0-9]+}}, {{t[0-9]+}}, {{t[0-9]+}}
|
||||
; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fadd_contract1:'
|
||||
|
||||
define float @fmul_fadd_contract1(float %x, float %y, float %z) {
|
||||
; FMF-LABEL: fmul_fadd_contract1:
|
||||
; FMF: # %bb.0:
|
||||
; FMF-NEXT: xsmulsp 0, 1, 2
|
||||
; FMF-NEXT: xsaddsp 1, 0, 3
|
||||
; FMF-NEXT: xsmaddasp 3, 1, 2
|
||||
; FMF-NEXT: fmr 1, 3
|
||||
; FMF-NEXT: blr
|
||||
;
|
||||
; GLOBAL-LABEL: fmul_fadd_contract1:
|
||||
@ -62,15 +61,14 @@ define float @fmul_fadd_contract2(float %x, float %y, float %z) {
|
||||
; Reassociation implies that FMA contraction is allowed.
|
||||
|
||||
; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fadd_reassoc1:'
|
||||
; FMFDEBUG: fmul {{t[0-9]+}}, {{t[0-9]+}}
|
||||
; FMFDEBUG: fadd reassoc {{t[0-9]+}}, {{t[0-9]+}}
|
||||
; FMFDEBUG: fma reassoc {{t[0-9]+}}, {{t[0-9]+}}, {{t[0-9]+}}
|
||||
; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fadd_reassoc1:'
|
||||
|
||||
define float @fmul_fadd_reassoc1(float %x, float %y, float %z) {
|
||||
; FMF-LABEL: fmul_fadd_reassoc1:
|
||||
; FMF: # %bb.0:
|
||||
; FMF-NEXT: xsmulsp 0, 1, 2
|
||||
; FMF-NEXT: xsaddsp 1, 0, 3
|
||||
; FMF-NEXT: xsmaddasp 3, 1, 2
|
||||
; FMF-NEXT: fmr 1, 3
|
||||
; FMF-NEXT: blr
|
||||
;
|
||||
; GLOBAL-LABEL: fmul_fadd_reassoc1:
|
||||
@ -156,7 +154,7 @@ define float @fmul_fadd_fast2(float %x, float %y, float %z) {
|
||||
; This is the minimum FMF needed for this transform - the FMA allows reassociation.
|
||||
|
||||
; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_reassoc1:'
|
||||
; FMFDEBUG: fma reassoc {{t[0-9]+}}
|
||||
; FMFDEBUG: fmul reassoc {{t[0-9]+}},
|
||||
; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fma_reassoc1:'
|
||||
|
||||
; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_reassoc1:'
|
||||
@ -169,12 +167,7 @@ define float @fmul_fma_reassoc1(float %x) {
|
||||
; FMF-NEXT: addis 3, 2, .LCPI6_0@toc@ha
|
||||
; FMF-NEXT: addi 3, 3, .LCPI6_0@toc@l
|
||||
; FMF-NEXT: lfsx 0, 0, 3
|
||||
; FMF-NEXT: addis 3, 2, .LCPI6_1@toc@ha
|
||||
; FMF-NEXT: addi 3, 3, .LCPI6_1@toc@l
|
||||
; FMF-NEXT: lfsx 2, 0, 3
|
||||
; FMF-NEXT: xsmulsp 0, 1, 0
|
||||
; FMF-NEXT: xsmaddasp 0, 1, 2
|
||||
; FMF-NEXT: fmr 1, 0
|
||||
; FMF-NEXT: xsmulsp 1, 1, 0
|
||||
; FMF-NEXT: blr
|
||||
;
|
||||
; GLOBAL-LABEL: fmul_fma_reassoc1:
|
||||
@ -193,7 +186,6 @@ define float @fmul_fma_reassoc1(float %x) {
|
||||
|
||||
; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_reassoc2:'
|
||||
; FMFDEBUG: fmul reassoc {{t[0-9]+}}
|
||||
; FMFDEBUG: fma reassoc {{t[0-9]+}}
|
||||
; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fma_reassoc2:'
|
||||
|
||||
; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_reassoc2:'
|
||||
@ -206,12 +198,7 @@ define float @fmul_fma_reassoc2(float %x) {
|
||||
; FMF-NEXT: addis 3, 2, .LCPI7_0@toc@ha
|
||||
; FMF-NEXT: addi 3, 3, .LCPI7_0@toc@l
|
||||
; FMF-NEXT: lfsx 0, 0, 3
|
||||
; FMF-NEXT: addis 3, 2, .LCPI7_1@toc@ha
|
||||
; FMF-NEXT: addi 3, 3, .LCPI7_1@toc@l
|
||||
; FMF-NEXT: lfsx 2, 0, 3
|
||||
; FMF-NEXT: xsmulsp 0, 1, 0
|
||||
; FMF-NEXT: xsmaddasp 0, 1, 2
|
||||
; FMF-NEXT: fmr 1, 0
|
||||
; FMF-NEXT: xsmulsp 1, 1, 0
|
||||
; FMF-NEXT: blr
|
||||
;
|
||||
; GLOBAL-LABEL: fmul_fma_reassoc2:
|
||||
@ -229,7 +216,7 @@ define float @fmul_fma_reassoc2(float %x) {
|
||||
; The FMA is now fully 'fast'. This implies that reassociation is allowed.
|
||||
|
||||
; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_fast1:'
|
||||
; FMFDEBUG: fma nnan ninf nsz arcp contract afn reassoc {{t[0-9]+}}
|
||||
; FMFDEBUG: fmul nnan ninf nsz arcp contract afn reassoc {{t[0-9]+}}
|
||||
; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fma_fast1:'
|
||||
|
||||
; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_fast1:'
|
||||
@ -242,12 +229,7 @@ define float @fmul_fma_fast1(float %x) {
|
||||
; FMF-NEXT: addis 3, 2, .LCPI8_0@toc@ha
|
||||
; FMF-NEXT: addi 3, 3, .LCPI8_0@toc@l
|
||||
; FMF-NEXT: lfsx 0, 0, 3
|
||||
; FMF-NEXT: addis 3, 2, .LCPI8_1@toc@ha
|
||||
; FMF-NEXT: addi 3, 3, .LCPI8_1@toc@l
|
||||
; FMF-NEXT: lfsx 2, 0, 3
|
||||
; FMF-NEXT: xsmulsp 0, 1, 0
|
||||
; FMF-NEXT: xsmaddasp 0, 1, 2
|
||||
; FMF-NEXT: fmr 1, 0
|
||||
; FMF-NEXT: xsmulsp 1, 1, 0
|
||||
; FMF-NEXT: blr
|
||||
;
|
||||
; GLOBAL-LABEL: fmul_fma_fast1:
|
||||
@ -266,7 +248,6 @@ define float @fmul_fma_fast1(float %x) {
|
||||
|
||||
; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_fast2:'
|
||||
; FMFDEBUG: fmul nnan ninf nsz arcp contract afn reassoc {{t[0-9]+}}
|
||||
; FMFDEBUG: fma nnan ninf nsz arcp contract afn reassoc {{t[0-9]+}}
|
||||
; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fma_fast2:'
|
||||
|
||||
; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_fast2:'
|
||||
@ -279,12 +260,7 @@ define float @fmul_fma_fast2(float %x) {
|
||||
; FMF-NEXT: addis 3, 2, .LCPI9_0@toc@ha
|
||||
; FMF-NEXT: addi 3, 3, .LCPI9_0@toc@l
|
||||
; FMF-NEXT: lfsx 0, 0, 3
|
||||
; FMF-NEXT: addis 3, 2, .LCPI9_1@toc@ha
|
||||
; FMF-NEXT: addi 3, 3, .LCPI9_1@toc@l
|
||||
; FMF-NEXT: lfsx 2, 0, 3
|
||||
; FMF-NEXT: xsmulsp 0, 1, 0
|
||||
; FMF-NEXT: xsmaddasp 0, 1, 2
|
||||
; FMF-NEXT: fmr 1, 0
|
||||
; FMF-NEXT: xsmulsp 1, 1, 0
|
||||
; FMF-NEXT: blr
|
||||
;
|
||||
; GLOBAL-LABEL: fmul_fma_fast2:
|
||||
|
Loading…
x
Reference in New Issue
Block a user