mirror of
https://github.com/RPCS3/llvm.git
synced 2025-01-26 20:57:15 +00:00
[PowerPC] Implement combineRepeatedFPDivisors
This does not matter on newer cores (where we can use reciprocal estimates in fast-math mode anyway), but for older cores this allows us to generate better fast-math code where we have multiple FDIVs with a common divisor. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@222710 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
521c9dc7d8
commit
5d6f185653
@ -7526,6 +7526,28 @@ SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand,
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
bool PPCTargetLowering::combineRepeatedFPDivisors(unsigned NumUsers) const {
|
||||
// Note: This functionality is used only when unsafe-fp-math is enabled, and
|
||||
// on cores with reciprocal estimates (which are used when unsafe-fp-math is
|
||||
// enabled for division), this functionality is redundant with the default
|
||||
// combiner logic (once the division -> reciprocal/multiply transformation
|
||||
// has taken place). As a result, this matters more for older cores than for
|
||||
// newer ones.
|
||||
|
||||
// Combine multiple FDIVs with the same divisor into multiple FMULs by the
|
||||
// reciprocal if there are two or more FDIVs (for embedded cores with only
|
||||
// one FP pipeline) for three or more FDIVs (for generic OOO cores).
|
||||
switch (Subtarget.getDarwinDirective()) {
|
||||
default:
|
||||
return NumUsers > 2;
|
||||
case PPC::DIR_440:
|
||||
case PPC::DIR_A2:
|
||||
case PPC::DIR_E500mc:
|
||||
case PPC::DIR_E5500:
|
||||
return NumUsers > 1;
|
||||
}
|
||||
}
|
||||
|
||||
static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base,
|
||||
unsigned Bytes, int Dist,
|
||||
SelectionDAG &DAG) {
|
||||
|
@ -704,6 +704,7 @@ namespace llvm {
|
||||
bool &UseOneConstNR) const override;
|
||||
SDValue getRecipEstimate(SDValue Operand, DAGCombinerInfo &DCI,
|
||||
unsigned &RefinementSteps) const override;
|
||||
bool combineRepeatedFPDivisors(unsigned NumUsers) const override;
|
||||
|
||||
CCAssignFn *useFastISelCCs(unsigned Flag) const;
|
||||
};
|
||||
|
39
test/CodeGen/PowerPC/fdiv-combine.ll
Normal file
39
test/CodeGen/PowerPC/fdiv-combine.ll
Normal file
@ -0,0 +1,39 @@
|
||||
; RUN: llc -mcpu=ppc64 < %s | FileCheck %s
|
||||
target datalayout = "E-m:e-i64:64-n32:64"
|
||||
target triple = "powerpc64-unknown-linux-gnu"
|
||||
|
||||
; Following test case checks:
|
||||
; a / D; b / D; c / D;
|
||||
; =>
|
||||
; recip = 1.0 / D; a * recip; b * recip; c * recip;
|
||||
|
||||
define void @three_fdiv_double(double %D, double %a, double %b, double %c) #0 {
|
||||
; CHECK-LABEL: three_fdiv_double:
|
||||
; CHECK: fdiv
|
||||
; CHECK-NEXT-NOT: fdiv
|
||||
; CHECK: fmul
|
||||
; CHECK: fmul
|
||||
; CHECK: fmul
|
||||
%div = fdiv double %a, %D
|
||||
%div1 = fdiv double %b, %D
|
||||
%div2 = fdiv double %c, %D
|
||||
tail call void @foo_3d(double %div, double %div1, double %div2)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @two_fdiv_double(double %D, double %a, double %b) #0 {
|
||||
; CHECK-LABEL: two_fdiv_double:
|
||||
; CHECK: fdiv
|
||||
; CHECK: fdiv
|
||||
; CHECK-NEXT-NOT: fmul
|
||||
%div = fdiv double %a, %D
|
||||
%div1 = fdiv double %b, %D
|
||||
tail call void @foo_2d(double %div, double %div1)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @foo_3d(double, double, double)
|
||||
declare void @foo_3_2xd(<2 x double>, <2 x double>, <2 x double>)
|
||||
declare void @foo_2d(double, double)
|
||||
|
||||
attributes #0 = { "unsafe-fp-math"="true" }
|
Loading…
x
Reference in New Issue
Block a user