mirror of
https://github.com/RPCS3/llvm.git
synced 2025-01-27 13:42:24 +00:00
[PowerPC] Implement combineRepeatedFPDivisors
This does not matter on newer cores (where we can use reciprocal estimates in fast-math mode anyway), but for older cores this allows us to generate better fast-math code where we have multiple FDIVs with a common divisor. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@222710 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
521c9dc7d8
commit
5d6f185653
@ -7526,6 +7526,28 @@ SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand,
|
|||||||
return SDValue();
|
return SDValue();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool PPCTargetLowering::combineRepeatedFPDivisors(unsigned NumUsers) const {
|
||||||
|
// Note: This functionality is used only when unsafe-fp-math is enabled, and
|
||||||
|
// on cores with reciprocal estimates (which are used when unsafe-fp-math is
|
||||||
|
// enabled for division), this functionality is redundant with the default
|
||||||
|
// combiner logic (once the division -> reciprocal/multiply transformation
|
||||||
|
// has taken place). As a result, this matters more for older cores than for
|
||||||
|
// newer ones.
|
||||||
|
|
||||||
|
// Combine multiple FDIVs with the same divisor into multiple FMULs by the
|
||||||
|
// reciprocal if there are two or more FDIVs (for embedded cores with only
|
||||||
|
// one FP pipeline) for three or more FDIVs (for generic OOO cores).
|
||||||
|
switch (Subtarget.getDarwinDirective()) {
|
||||||
|
default:
|
||||||
|
return NumUsers > 2;
|
||||||
|
case PPC::DIR_440:
|
||||||
|
case PPC::DIR_A2:
|
||||||
|
case PPC::DIR_E500mc:
|
||||||
|
case PPC::DIR_E5500:
|
||||||
|
return NumUsers > 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base,
|
static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base,
|
||||||
unsigned Bytes, int Dist,
|
unsigned Bytes, int Dist,
|
||||||
SelectionDAG &DAG) {
|
SelectionDAG &DAG) {
|
||||||
|
@ -704,6 +704,7 @@ namespace llvm {
|
|||||||
bool &UseOneConstNR) const override;
|
bool &UseOneConstNR) const override;
|
||||||
SDValue getRecipEstimate(SDValue Operand, DAGCombinerInfo &DCI,
|
SDValue getRecipEstimate(SDValue Operand, DAGCombinerInfo &DCI,
|
||||||
unsigned &RefinementSteps) const override;
|
unsigned &RefinementSteps) const override;
|
||||||
|
bool combineRepeatedFPDivisors(unsigned NumUsers) const override;
|
||||||
|
|
||||||
CCAssignFn *useFastISelCCs(unsigned Flag) const;
|
CCAssignFn *useFastISelCCs(unsigned Flag) const;
|
||||||
};
|
};
|
||||||
|
39
test/CodeGen/PowerPC/fdiv-combine.ll
Normal file
39
test/CodeGen/PowerPC/fdiv-combine.ll
Normal file
@ -0,0 +1,39 @@
|
|||||||
|
; RUN: llc -mcpu=ppc64 < %s | FileCheck %s
|
||||||
|
target datalayout = "E-m:e-i64:64-n32:64"
|
||||||
|
target triple = "powerpc64-unknown-linux-gnu"
|
||||||
|
|
||||||
|
; Following test case checks:
|
||||||
|
; a / D; b / D; c / D;
|
||||||
|
; =>
|
||||||
|
; recip = 1.0 / D; a * recip; b * recip; c * recip;
|
||||||
|
|
||||||
|
define void @three_fdiv_double(double %D, double %a, double %b, double %c) #0 {
|
||||||
|
; CHECK-LABEL: three_fdiv_double:
|
||||||
|
; CHECK: fdiv
|
||||||
|
; CHECK-NEXT-NOT: fdiv
|
||||||
|
; CHECK: fmul
|
||||||
|
; CHECK: fmul
|
||||||
|
; CHECK: fmul
|
||||||
|
%div = fdiv double %a, %D
|
||||||
|
%div1 = fdiv double %b, %D
|
||||||
|
%div2 = fdiv double %c, %D
|
||||||
|
tail call void @foo_3d(double %div, double %div1, double %div2)
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define void @two_fdiv_double(double %D, double %a, double %b) #0 {
|
||||||
|
; CHECK-LABEL: two_fdiv_double:
|
||||||
|
; CHECK: fdiv
|
||||||
|
; CHECK: fdiv
|
||||||
|
; CHECK-NEXT-NOT: fmul
|
||||||
|
%div = fdiv double %a, %D
|
||||||
|
%div1 = fdiv double %b, %D
|
||||||
|
tail call void @foo_2d(double %div, double %div1)
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
declare void @foo_3d(double, double, double)
|
||||||
|
declare void @foo_3_2xd(<2 x double>, <2 x double>, <2 x double>)
|
||||||
|
declare void @foo_2d(double, double)
|
||||||
|
|
||||||
|
attributes #0 = { "unsafe-fp-math"="true" }
|
Loading…
x
Reference in New Issue
Block a user