Split the estimate() interface into separate functions for each type. NFC.

It was hacky to use an opcode as a switch because it won't always match (rsqrte != sqrte), and it looks like we'll need to add more special casing per arch than I had hoped for. Eg, x86 will prefer a different NR estimate implementation. ARM will want to use it's 'step' instructions. There also don't appear to be any new estimate instructions in any arch in a long, long time. Altivec vloge and vexpte may have been the first and last in that field... git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@218698 91177308-0d34-0410-b5e6-96231b3b80d8
2024-12-20 11:08:27 +00:00 · 2014-09-30 20:28:48 +00:00 · 2014-09-30 20:28:48 +00:00 · cafc85bf1e
commit cafc85bf1e
parent 9952c922c2
4 changed files with 61 additions and 34 deletions
--- a/include/llvm/Target/TargetLowering.h
+++ b/include/llvm/Target/TargetLowering.h
@ -2624,21 +2624,37 @@ public:
    return SDValue();
  }

-  /// Hooks for building estimates in place of, for example, slower divisions
-  /// and square roots. These are not builder functions themselves, just the
-  /// target-specific variables needed for building the estimate algorithm.
-
-  /// Return an estimate value for the input opcode and input operand.
-  /// The RefinementSteps output is the number of refinement iterations
-  /// required to generate a sufficient (though not necessarily IEEE-754
-  /// compliant) estimate for the value type.
+  /// Hooks for building estimates in place of slower divisions and square
+  /// roots.
+  
+  /// Return a reciprocal square root estimate value for the input operand.
+  /// The RefinementSteps output is the number of Newton-Raphson refinement
+  /// iterations required to generate a sufficient (though not necessarily
+  /// IEEE-754 compliant) estimate for the value type.
+  /// A target may choose to implement its own refinement within this function.
+  /// If that's true, then return '0' as the number of RefinementSteps to avoid
+  /// any further refinement of the estimate.
  /// An empty SDValue return means no estimate sequence can be created.
-  virtual SDValue getEstimate(unsigned Opcode, SDValue Operand,
+  virtual SDValue getRsqrtEstimate(SDValue Operand,
                              DAGCombinerInfo &DCI,
                              unsigned &RefinementSteps) const {
    return SDValue();
  }
-  
+
+  /// Return a reciprocal estimate value for the input operand.
+  /// The RefinementSteps output is the number of Newton-Raphson refinement
+  /// iterations required to generate a sufficient (though not necessarily
+  /// IEEE-754 compliant) estimate for the value type.
+  /// A target may choose to implement its own refinement within this function.
+  /// If that's true, then return '0' as the number of RefinementSteps to avoid
+  /// any further refinement of the estimate.
+  /// An empty SDValue return means no estimate sequence can be created.
+  virtual SDValue getRecipEstimate(SDValue Operand,
+                                   DAGCombinerInfo &DCI,
+                                   unsigned &RefinementSteps) const {
+    return SDValue();
+  }
+
  //===--------------------------------------------------------------------===//
  // Legalization utility functions
  //
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@ -11779,7 +11779,7 @@ SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op) {
  TargetLowering::DAGCombinerInfo DCI(DAG, Level, false, this);

  unsigned Iterations;
-  if (SDValue Est = TLI.getEstimate(ISD::FDIV, Op, DCI, Iterations)) {
+  if (SDValue Est = TLI.getRecipEstimate(Op, DCI, Iterations)) {
    // Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
    // For the reciprocal, we need to find the zero of the function:
    //   F(X) = A X - 1 [which has a zero at X = 1/A]
@ -11820,7 +11820,7 @@ SDValue DAGCombiner::BuildRsqrtEstimate(SDValue Op) {
  // Expose the DAG combiner to the target combiner implementations.
  TargetLowering::DAGCombinerInfo DCI(DAG, Level, false, this);
  unsigned Iterations;
-  if (SDValue Est = TLI.getEstimate(ISD::FSQRT, Op, DCI, Iterations)) {
+  if (SDValue Est = TLI.getRsqrtEstimate(Op, DCI, Iterations)) {
    // Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
    // For the reciprocal sqrt, we need to find the zero of the function:
    //   F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@ -7458,25 +7458,14 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
 // Target Optimization Hooks
 //===----------------------------------------------------------------------===//

-SDValue PPCTargetLowering::getEstimate(unsigned Opcode, SDValue Operand,
-                                       DAGCombinerInfo &DCI,
-                                       unsigned &RefinementSteps) const {
+SDValue PPCTargetLowering::getRsqrtEstimate(SDValue Operand,
+                                            DAGCombinerInfo &DCI,
+                                            unsigned &RefinementSteps) const {
  EVT VT = Operand.getValueType();
-  SDValue RV;
-  if (Opcode == ISD::FSQRT) {
-    if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) ||
-        (VT == MVT::f64 && Subtarget.hasFRSQRTE())  ||
-        (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
-        (VT == MVT::v2f64 && Subtarget.hasVSX()))
-      RV = DCI.DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand);
-  } else if (Opcode == ISD::FDIV) {
-    if ((VT == MVT::f32 && Subtarget.hasFRES()) ||
-        (VT == MVT::f64 && Subtarget.hasFRE())  ||
-        (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
-        (VT == MVT::v2f64 && Subtarget.hasVSX()))
-      RV = DCI.DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand);
-  }
-  if (RV.getNode()) {
+  if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) ||
+      (VT == MVT::f64 && Subtarget.hasFRSQRTE())  ||
+      (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
+      (VT == MVT::v2f64 && Subtarget.hasVSX())) {
    // Convergence is quadratic, so we essentially double the number of digits
    // correct after every iteration. For both FRE and FRSQRTE, the minimum
    // architected relative accuracy is 2^-5. When hasRecipPrec(), this is
@ -7484,8 +7473,29 @@ SDValue PPCTargetLowering::getEstimate(unsigned Opcode, SDValue Operand,
    RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3;
    if (VT.getScalarType() == MVT::f64)
      ++RefinementSteps;
+    return DCI.DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand);
  }
-  return RV;
+  return SDValue();
+}
+
+SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand,
+                                            DAGCombinerInfo &DCI,
+                                            unsigned &RefinementSteps) const {
+  EVT VT = Operand.getValueType();
+  if ((VT == MVT::f32 && Subtarget.hasFRES()) ||
+      (VT == MVT::f64 && Subtarget.hasFRE())  ||
+      (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
+      (VT == MVT::v2f64 && Subtarget.hasVSX())) {
+    // Convergence is quadratic, so we essentially double the number of digits
+    // correct after every iteration. For both FRE and FRSQRTE, the minimum
+    // architected relative accuracy is 2^-5. When hasRecipPrec(), this is
+    // 2^-14. IEEE float has 23 digits and double has 52 digits.
+    RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3;
+    if (VT.getScalarType() == MVT::f64)
+      ++RefinementSteps;
+    return DCI.DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand);
+  }
+  return SDValue();
 }

 static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base,
--- a/lib/Target/PowerPC/PPCISelLowering.h
+++ b/lib/Target/PowerPC/PPCISelLowering.h
@ -701,9 +701,10 @@ namespace llvm {
    SDValue DAGCombineExtBoolTrunc(SDNode *N, DAGCombinerInfo &DCI) const;
    SDValue DAGCombineTruncBoolExt(SDNode *N, DAGCombinerInfo &DCI) const;

-    SDValue getEstimate(unsigned Opcode, SDValue Operand,
-                        DAGCombinerInfo &DCI,
-                        unsigned &RefinementSteps) const override;
+    SDValue getRsqrtEstimate(SDValue Operand, DAGCombinerInfo &DCI,
+                             unsigned &RefinementSteps) const override;
+    SDValue getRecipEstimate(SDValue Operand, DAGCombinerInfo &DCI,
+                             unsigned &RefinementSteps) const override;

    CCAssignFn *useFastISelCCs(unsigned Flag) const;
  };