[PowerPC] Implement combineRepeatedFPDivisors

This does not matter on newer cores (where we can use reciprocal estimates in
fast-math mode anyway), but for older cores this allows us to generate better
fast-math code where we have multiple FDIVs with a common divisor.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@222710 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Hal Finkel 2014-11-24 23:45:21 +00:00
parent 521c9dc7d8
commit 5d6f185653
3 changed files with 62 additions and 0 deletions

View File

@ -7526,6 +7526,28 @@ SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand,
return SDValue(); return SDValue();
} }
bool PPCTargetLowering::combineRepeatedFPDivisors(unsigned NumUsers) const {
// Note: This functionality is used only when unsafe-fp-math is enabled, and
// on cores with reciprocal estimates (which are used when unsafe-fp-math is
// enabled for division), this functionality is redundant with the default
// combiner logic (once the division -> reciprocal/multiply transformation
// has taken place). As a result, this matters more for older cores than for
// newer ones.
// Combine multiple FDIVs with the same divisor into multiple FMULs by the
// reciprocal if there are two or more FDIVs (for embedded cores with only
// one FP pipeline) for three or more FDIVs (for generic OOO cores).
switch (Subtarget.getDarwinDirective()) {
default:
return NumUsers > 2;
case PPC::DIR_440:
case PPC::DIR_A2:
case PPC::DIR_E500mc:
case PPC::DIR_E5500:
return NumUsers > 1;
}
}
static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base, static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base,
unsigned Bytes, int Dist, unsigned Bytes, int Dist,
SelectionDAG &DAG) { SelectionDAG &DAG) {

View File

@ -704,6 +704,7 @@ namespace llvm {
bool &UseOneConstNR) const override; bool &UseOneConstNR) const override;
SDValue getRecipEstimate(SDValue Operand, DAGCombinerInfo &DCI, SDValue getRecipEstimate(SDValue Operand, DAGCombinerInfo &DCI,
unsigned &RefinementSteps) const override; unsigned &RefinementSteps) const override;
bool combineRepeatedFPDivisors(unsigned NumUsers) const override;
CCAssignFn *useFastISelCCs(unsigned Flag) const; CCAssignFn *useFastISelCCs(unsigned Flag) const;
}; };

View File

@ -0,0 +1,39 @@
; RUN: llc -mcpu=ppc64 < %s | FileCheck %s
target datalayout = "E-m:e-i64:64-n32:64"
target triple = "powerpc64-unknown-linux-gnu"
; Following test case checks:
; a / D; b / D; c / D;
; =>
; recip = 1.0 / D; a * recip; b * recip; c * recip;
define void @three_fdiv_double(double %D, double %a, double %b, double %c) #0 {
; CHECK-LABEL: three_fdiv_double:
; CHECK: fdiv
; CHECK-NEXT-NOT: fdiv
; CHECK: fmul
; CHECK: fmul
; CHECK: fmul
%div = fdiv double %a, %D
%div1 = fdiv double %b, %D
%div2 = fdiv double %c, %D
tail call void @foo_3d(double %div, double %div1, double %div2)
ret void
}
define void @two_fdiv_double(double %D, double %a, double %b) #0 {
; CHECK-LABEL: two_fdiv_double:
; CHECK: fdiv
; CHECK: fdiv
; CHECK-NEXT-NOT: fmul
%div = fdiv double %a, %D
%div1 = fdiv double %b, %D
tail call void @foo_2d(double %div, double %div1)
ret void
}
declare void @foo_3d(double, double, double)
declare void @foo_3_2xd(<2 x double>, <2 x double>, <2 x double>)
declare void @foo_2d(double, double)
attributes #0 = { "unsafe-fp-math"="true" }