[PowerPC] Add custom lowering for SELECT_CC fp128 using xsmaxcqp

Power ISA 3.1 adds xsmaxcqp/xsmincqp for quad-precision type-c max/min selection,
and this opens the opportunity to improve instruction selection on: llvm.maxnum.f128,
llvm.minnum.f128, and select_cc ordered gt/lt and (don't care) gt/lt.

Reviewed By: nemanjai, shchenz, amyk

Differential Revision: https://reviews.llvm.org/D117006
This commit is contained in:
Ting Wang 2022-02-09 21:48:28 -05:00
parent f3481f43bb
commit 097a95f2df
5 changed files with 90 additions and 12 deletions

View File

@ -1283,6 +1283,10 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Legal);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i64, Legal);
}
if (Subtarget.hasP10Vector()) {
setOperationAction(ISD::SELECT_CC, MVT::f128, Custom);
}
}
if (Subtarget.pairedVectorMemops()) {
@ -1605,8 +1609,8 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
switch ((PPCISD::NodeType)Opcode) {
case PPCISD::FIRST_NUMBER: break;
case PPCISD::FSEL: return "PPCISD::FSEL";
case PPCISD::XSMAXCDP: return "PPCISD::XSMAXCDP";
case PPCISD::XSMINCDP: return "PPCISD::XSMINCDP";
case PPCISD::XSMAXC: return "PPCISD::XSMAXC";
case PPCISD::XSMINC: return "PPCISD::XSMINC";
case PPCISD::FCFID: return "PPCISD::FCFID";
case PPCISD::FCFIDU: return "PPCISD::FCFIDU";
case PPCISD::FCFIDS: return "PPCISD::FCFIDS";
@ -7898,7 +7902,7 @@ SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
SDNodeFlags Flags = Op.getNode()->getFlags();
// We have xsmaxcdp/xsmincdp which are OK to emit even in the
// We have xsmaxc[dq]p/xsminc[dq]p which are OK to emit even in the
// presence of infinities.
if (Subtarget.hasP9Vector() && LHS == TV && RHS == FV) {
switch (CC) {
@ -7906,10 +7910,10 @@ SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
break;
case ISD::SETOGT:
case ISD::SETGT:
return DAG.getNode(PPCISD::XSMAXCDP, dl, Op.getValueType(), LHS, RHS);
return DAG.getNode(PPCISD::XSMAXC, dl, Op.getValueType(), LHS, RHS);
case ISD::SETOLT:
case ISD::SETLT:
return DAG.getNode(PPCISD::XSMINCDP, dl, Op.getValueType(), LHS, RHS);
return DAG.getNode(PPCISD::XSMINC, dl, Op.getValueType(), LHS, RHS);
}
}

View File

@ -51,9 +51,9 @@ namespace llvm {
///
FSEL,
/// XSMAXCDP, XSMINCDP - C-type min/max instructions.
XSMAXCDP,
XSMINCDP,
/// XSMAXC[DQ]P, XSMINC[DQ]P - C-type min/max instructions.
XSMAXC,
XSMINC,
/// FCFID - The FCFID instruction, taking an f64 operand and producing
/// and f64 value containing the FP representation of the integer that

View File

@ -198,8 +198,8 @@ def PPCfsel : SDNode<"PPCISD::FSEL",
// Type constraint for fsel.
SDTypeProfile<1, 3, [SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>,
SDTCisFP<0>, SDTCisVT<1, f64>]>, []>;
def PPCxsmaxc : SDNode<"PPCISD::XSMAXCDP", SDT_PPCFPMinMax, []>;
def PPCxsminc : SDNode<"PPCISD::XSMINCDP", SDT_PPCFPMinMax, []>;
def PPCxsmaxc : SDNode<"PPCISD::XSMAXC", SDT_PPCFPMinMax, []>;
def PPCxsminc : SDNode<"PPCISD::XSMINC", SDT_PPCFPMinMax, []>;
def PPChi : SDNode<"PPCISD::Hi", SDTIntBinOp, []>;
def PPClo : SDNode<"PPCISD::Lo", SDTIntBinOp, []>;
def PPCtoc_entry: SDNode<"PPCISD::TOC_ENTRY", SDTIntBinOp,

View File

@ -2398,8 +2398,10 @@ let Predicates = [IsISA3_1] in {
let Predicates = [IsISA3_1, HasVSX] in {
def XVCVSPBF16 : XX2_XT6_XO5_XB6<60, 17, 475, "xvcvspbf16", vsrc, []>;
def XVCVBF16SPN : XX2_XT6_XO5_XB6<60, 16, 475, "xvcvbf16spn", vsrc, []>;
def XSMAXCQP : X_VT5_VA5_VB5<63, 676, "xsmaxcqp", []>;
def XSMINCQP : X_VT5_VA5_VB5<63, 740, "xsmincqp", []>;
def XSMAXCQP : X_VT5_VA5_VB5<63, 676, "xsmaxcqp",
[(set f128:$vT, (PPCxsmaxc f128:$vA, f128:$vB))]>;
def XSMINCQP : X_VT5_VA5_VB5<63, 740, "xsmincqp",
[(set f128:$vT, (PPCxsminc f128:$vA, f128:$vB))]>;
}
// Multiclass defining patterns for Set Boolean Extension Reverse Instructions.

View File

@ -0,0 +1,72 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mcpu=pwr10 -ppc-asm-full-reg-names --enable-unsafe-fp-math \
; RUN: -verify-machineinstrs --enable-no-signed-zeros-fp-math \
; RUN: --enable-no-nans-fp-math \
; RUN: -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s
; RUN: llc -mcpu=pwr10 -ppc-asm-full-reg-names -verify-machineinstrs \
; RUN: -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s
define dso_local fp128 @testqmax(fp128 %a, fp128 %b) local_unnamed_addr {
; CHECK-LABEL: testqmax:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xsmaxcqp v2, v2, v3
; CHECK-NEXT: blr
entry:
%cmp = fcmp ogt fp128 %a, %b
%cond = select i1 %cmp, fp128 %a, fp128 %b
ret fp128 %cond
}
define dso_local fp128 @testqmin(fp128 %a, fp128 %b) local_unnamed_addr {
; CHECK-LABEL: testqmin:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xsmincqp v2, v2, v3
; CHECK-NEXT: blr
entry:
%cmp = fcmp olt fp128 %a, %b
%cond = select i1 %cmp, fp128 %a, fp128 %b
ret fp128 %cond
}
define dso_local fp128 @testqmax_fast(fp128 %a, fp128 %b) local_unnamed_addr {
; CHECK-LABEL: testqmax_fast:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xsmaxcqp v2, v2, v3
; CHECK-NEXT: blr
entry:
%cmp = fcmp nnan ninf ogt fp128 %a, %b
%cond = select i1 %cmp, fp128 %a, fp128 %b
ret fp128 %cond
}
define dso_local fp128 @testqmin_fast(fp128 %a, fp128 %b) local_unnamed_addr {
; CHECK-LABEL: testqmin_fast:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xsmincqp v2, v2, v3
; CHECK-NEXT: blr
entry:
%cmp = fcmp nnan ninf olt fp128 %a, %b
%cond = select i1 %cmp, fp128 %a, fp128 %b
ret fp128 %cond
}
declare fp128 @llvm.maxnum.f128(fp128, fp128)
define dso_local fp128 @testq_intrinsic_maxnum(fp128 %a, fp128 %b) local_unnamed_addr {
; CHECK-LABEL: testq_intrinsic_maxnum:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xsmaxcqp v2, v2, v3
; CHECK-NEXT: blr
entry:
%0 = tail call fast fp128 @llvm.maxnum.f128(fp128 %a, fp128 %b)
ret fp128 %0
}
declare fp128 @llvm.minnum.f128(fp128, fp128)
define dso_local fp128 @testq_intrinsic_minnum(fp128 %a, fp128 %b) local_unnamed_addr {
; CHECK-LABEL: testq_intrinsic_minnum:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xsmincqp v2, v2, v3
; CHECK-NEXT: blr
entry:
%0 = tail call fast fp128 @llvm.minnum.f128(fp128 %a, fp128 %b)
ret fp128 %0
}