[PowerPC] Add custom lowering for SELECT_CC fp128 using xsmaxcqp

Power ISA 3.1 adds xsmaxcqp/xsmincqp for quad-precision type-c max/min selection, and this opens the opportunity to improve instruction selection on: llvm.maxnum.f128, llvm.minnum.f128, and select_cc ordered gt/lt and (don't care) gt/lt. Reviewed By: nemanjai, shchenz, amyk Differential Revision: https://reviews.llvm.org/D117006
2025-02-23 11:43:32 +00:00 · 2022-02-09 21:48:28 -05:00 · 2022-02-09 21:48:28 -05:00 · 097a95f2df
commit 097a95f2df
parent f3481f43bb
5 changed files with 90 additions and 12 deletions
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@ -1283,6 +1283,10 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
      setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Legal);
      setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i64, Legal);
    }
+
+    if (Subtarget.hasP10Vector()) {
+      setOperationAction(ISD::SELECT_CC, MVT::f128, Custom);
+    }
  }

  if (Subtarget.pairedVectorMemops()) {
@ -1605,8 +1609,8 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
  switch ((PPCISD::NodeType)Opcode) {
  case PPCISD::FIRST_NUMBER:    break;
  case PPCISD::FSEL:            return "PPCISD::FSEL";
-  case PPCISD::XSMAXCDP:        return "PPCISD::XSMAXCDP";
-  case PPCISD::XSMINCDP:        return "PPCISD::XSMINCDP";
+  case PPCISD::XSMAXC:          return "PPCISD::XSMAXC";
+  case PPCISD::XSMINC:          return "PPCISD::XSMINC";
  case PPCISD::FCFID:           return "PPCISD::FCFID";
  case PPCISD::FCFIDU:          return "PPCISD::FCFIDU";
  case PPCISD::FCFIDS:          return "PPCISD::FCFIDS";
@ -7898,7 +7902,7 @@ SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {

  SDNodeFlags Flags = Op.getNode()->getFlags();

-  // We have xsmaxcdp/xsmincdp which are OK to emit even in the
+  // We have xsmaxc[dq]p/xsminc[dq]p which are OK to emit even in the
  // presence of infinities.
  if (Subtarget.hasP9Vector() && LHS == TV && RHS == FV) {
    switch (CC) {
@ -7906,10 +7910,10 @@ SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
      break;
    case ISD::SETOGT:
    case ISD::SETGT:
-      return DAG.getNode(PPCISD::XSMAXCDP, dl, Op.getValueType(), LHS, RHS);
+      return DAG.getNode(PPCISD::XSMAXC, dl, Op.getValueType(), LHS, RHS);
    case ISD::SETOLT:
    case ISD::SETLT:
-      return DAG.getNode(PPCISD::XSMINCDP, dl, Op.getValueType(), LHS, RHS);
+      return DAG.getNode(PPCISD::XSMINC, dl, Op.getValueType(), LHS, RHS);
    }
  }

--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@ -51,9 +51,9 @@ namespace llvm {
    ///
    FSEL,

-    /// XSMAXCDP, XSMINCDP - C-type min/max instructions.
-    XSMAXCDP,
-    XSMINCDP,
+    /// XSMAXC[DQ]P, XSMINC[DQ]P - C-type min/max instructions.
+    XSMAXC,
+    XSMINC,

    /// FCFID - The FCFID instruction, taking an f64 operand and producing
    /// and f64 value containing the FP representation of the integer that
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@ -198,8 +198,8 @@ def PPCfsel   : SDNode<"PPCISD::FSEL",
   // Type constraint for fsel.
   SDTypeProfile<1, 3, [SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>,
                        SDTCisFP<0>, SDTCisVT<1, f64>]>, []>;
-def PPCxsmaxc : SDNode<"PPCISD::XSMAXCDP", SDT_PPCFPMinMax, []>;
-def PPCxsminc : SDNode<"PPCISD::XSMINCDP", SDT_PPCFPMinMax, []>;
+def PPCxsmaxc : SDNode<"PPCISD::XSMAXC", SDT_PPCFPMinMax, []>;
+def PPCxsminc : SDNode<"PPCISD::XSMINC", SDT_PPCFPMinMax, []>;
 def PPChi       : SDNode<"PPCISD::Hi", SDTIntBinOp, []>;
 def PPClo       : SDNode<"PPCISD::Lo", SDTIntBinOp, []>;
 def PPCtoc_entry: SDNode<"PPCISD::TOC_ENTRY", SDTIntBinOp,
--- a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td
@ -2398,8 +2398,10 @@ let Predicates = [IsISA3_1] in {
 let Predicates = [IsISA3_1, HasVSX] in {
  def XVCVSPBF16 : XX2_XT6_XO5_XB6<60, 17, 475, "xvcvspbf16", vsrc, []>;
  def XVCVBF16SPN : XX2_XT6_XO5_XB6<60, 16, 475, "xvcvbf16spn", vsrc, []>;
-  def XSMAXCQP : X_VT5_VA5_VB5<63, 676, "xsmaxcqp", []>;
-  def XSMINCQP : X_VT5_VA5_VB5<63, 740, "xsmincqp", []>;
+  def XSMAXCQP : X_VT5_VA5_VB5<63, 676, "xsmaxcqp",
+                               [(set f128:$vT, (PPCxsmaxc f128:$vA, f128:$vB))]>;
+  def XSMINCQP : X_VT5_VA5_VB5<63, 740, "xsmincqp",
+                               [(set f128:$vT, (PPCxsminc f128:$vA, f128:$vB))]>;
 }

 // Multiclass defining patterns for Set Boolean Extension Reverse Instructions.
--- a/llvm/test/CodeGen/PowerPC/scalar-min-max-p10.ll
+++ b/llvm/test/CodeGen/PowerPC/scalar-min-max-p10.ll
@ -0,0 +1,72 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mcpu=pwr10 -ppc-asm-full-reg-names --enable-unsafe-fp-math \
+; RUN:   -verify-machineinstrs --enable-no-signed-zeros-fp-math \
+; RUN:   --enable-no-nans-fp-math \
+; RUN:   -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s
+; RUN: llc -mcpu=pwr10 -ppc-asm-full-reg-names -verify-machineinstrs \
+; RUN:   -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s
+define dso_local fp128 @testqmax(fp128 %a, fp128 %b) local_unnamed_addr {
+; CHECK-LABEL: testqmax:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xsmaxcqp v2, v2, v3
+; CHECK-NEXT:    blr
+entry:
+  %cmp = fcmp ogt fp128 %a, %b
+  %cond = select i1 %cmp, fp128 %a, fp128 %b
+  ret fp128 %cond
+}
+
+define dso_local fp128 @testqmin(fp128 %a, fp128 %b) local_unnamed_addr {
+; CHECK-LABEL: testqmin:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xsmincqp v2, v2, v3
+; CHECK-NEXT:    blr
+entry:
+  %cmp = fcmp olt fp128 %a, %b
+  %cond = select i1 %cmp, fp128 %a, fp128 %b
+  ret fp128 %cond
+}
+
+define dso_local fp128 @testqmax_fast(fp128 %a, fp128 %b) local_unnamed_addr {
+; CHECK-LABEL: testqmax_fast:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xsmaxcqp v2, v2, v3
+; CHECK-NEXT:    blr
+entry:
+  %cmp = fcmp nnan ninf ogt fp128 %a, %b
+  %cond = select i1 %cmp, fp128 %a, fp128 %b
+  ret fp128 %cond
+}
+
+define dso_local fp128 @testqmin_fast(fp128 %a, fp128 %b) local_unnamed_addr {
+; CHECK-LABEL: testqmin_fast:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xsmincqp v2, v2, v3
+; CHECK-NEXT:    blr
+entry:
+  %cmp = fcmp nnan ninf olt fp128 %a, %b
+  %cond = select i1 %cmp, fp128 %a, fp128 %b
+  ret fp128 %cond
+}
+
+declare fp128 @llvm.maxnum.f128(fp128, fp128)
+define dso_local fp128 @testq_intrinsic_maxnum(fp128 %a, fp128 %b) local_unnamed_addr {
+; CHECK-LABEL: testq_intrinsic_maxnum:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xsmaxcqp v2, v2, v3
+; CHECK-NEXT:    blr
+entry:
+  %0 = tail call fast fp128 @llvm.maxnum.f128(fp128 %a, fp128 %b)
+  ret fp128 %0
+}
+
+declare fp128 @llvm.minnum.f128(fp128, fp128)
+define dso_local fp128 @testq_intrinsic_minnum(fp128 %a, fp128 %b) local_unnamed_addr {
+; CHECK-LABEL: testq_intrinsic_minnum:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xsmincqp v2, v2, v3
+; CHECK-NEXT:    blr
+entry:
+  %0 = tail call fast fp128 @llvm.minnum.f128(fp128 %a, fp128 %b)
+  ret fp128 %0
+}