[PowerPC] Enhance the selection(ISD::VSELECT) of vector type

To make ISD::VSELECT available(legal) so long as there are altivec instruction, otherwise it's default behavior is expanding, which is legalized at type-legalization phase. Use xxsel to match vselect if vsx is open, or use vsel. Differential Revision: https://reviews.llvm.org/D49531 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@346824 91177308-0d34-0410-b5e6-96231b3b80d8
2025-03-01 15:19:05 +00:00 · 2018-11-14 02:34:45 +00:00 · 2018-11-14 02:34:45 +00:00 · 382877c17c
commit 382877c17c
parent 7215ab82d4
5 changed files with 133 additions and 20 deletions
--- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@ -4736,14 +4736,6 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
    CurDAG->SelectNodeTo(N, SelectCCOp, N->getValueType(0), Ops);
    return;
  }
-  case ISD::VSELECT:
-    if (PPCSubTarget->hasVSX()) {
-      SDValue Ops[] = { N->getOperand(2), N->getOperand(1), N->getOperand(0) };
-      CurDAG->SelectNodeTo(N, PPC::XXSEL, N->getValueType(0), Ops);
-      return;
-    }
-    break;
-
  case ISD::VECTOR_SHUFFLE:
    if (PPCSubTarget->hasVSX() && (N->getValueType(0) == MVT::v2f64 ||
                                  N->getValueType(0) == MVT::v2i64)) {
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@ -586,6 +586,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
      AddPromotedToType (ISD::LOAD  , VT, MVT::v4i32);
      setOperationAction(ISD::SELECT, VT, Promote);
      AddPromotedToType (ISD::SELECT, VT, MVT::v4i32);
+      setOperationAction(ISD::VSELECT, VT, Legal);
      setOperationAction(ISD::SELECT_CC, VT, Promote);
      AddPromotedToType (ISD::SELECT_CC, VT, MVT::v4i32);
      setOperationAction(ISD::STORE, VT, Promote);
@ -626,7 +627,6 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
      setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand);
      setOperationAction(ISD::FPOW, VT, Expand);
      setOperationAction(ISD::BSWAP, VT, Expand);
-      setOperationAction(ISD::VSELECT, VT, Expand);
      setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
      setOperationAction(ISD::ROTL, VT, Expand);
      setOperationAction(ISD::ROTR, VT, Expand);
@ -727,12 +727,6 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
      setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
      setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);

-      setOperationAction(ISD::VSELECT, MVT::v16i8, Legal);
-      setOperationAction(ISD::VSELECT, MVT::v8i16, Legal);
-      setOperationAction(ISD::VSELECT, MVT::v4i32, Legal);
-      setOperationAction(ISD::VSELECT, MVT::v4f32, Legal);
-      setOperationAction(ISD::VSELECT, MVT::v2f64, Legal);
-
      // Share the Altivec comparison restrictions.
      setCondCodeAction(ISD::SETUO, MVT::v2f64, Expand);
      setCondCodeAction(ISD::SETUEQ, MVT::v2f64, Expand);
--- a/lib/Target/PowerPC/PPCInstrAltivec.td
+++ b/lib/Target/PowerPC/PPCInstrAltivec.td
@ -1051,6 +1051,20 @@ def : Pat<(v4f32 (ftrunc v4f32:$vA)),
 def : Pat<(v4f32 (fnearbyint v4f32:$vA)),
          (VRFIN $vA)>;

+// Vector selection
+def : Pat<(v16i8 (vselect v16i8:$vA, v16i8:$vB, v16i8:$vC)),
+          (VSEL $vC, $vB, $vA)>;
+def : Pat<(v8i16 (vselect v8i16:$vA, v8i16:$vB, v8i16:$vC)),
+          (VSEL $vC, $vB, $vA)>;
+def : Pat<(v4i32 (vselect v4i32:$vA, v4i32:$vB, v4i32:$vC)),
+          (VSEL $vC, $vB, $vA)>;
+def : Pat<(v2i64 (vselect v2i64:$vA, v2i64:$vB, v2i64:$vC)),
+          (VSEL $vC, $vB, $vA)>;
+def : Pat<(v4f32 (vselect v4i32:$vA, v4f32:$vB, v4f32:$vC)),
+          (VSEL $vC, $vB, $vA)>;
+def : Pat<(v2f64 (vselect v2i64:$vA, v2f64:$vB, v2f64:$vC)),
+          (VSEL $vC, $vB, $vA)>;
+
 } // end HasAltivec

 def HasP8Altivec : Predicate<"PPCSubTarget->hasP8Altivec()">;
--- a/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/lib/Target/PowerPC/PPCInstrVSX.td
@ -1152,6 +1152,26 @@ def : Pat<(int_ppc_vsx_xvrsqrtesp v4f32:$A),
 def : Pat<(int_ppc_vsx_xvrsqrtedp v2f64:$A),
          (XVRSQRTEDP $A)>;

+// Vector selection
+def : Pat<(v16i8 (vselect v16i8:$vA, v16i8:$vB, v16i8:$vC)),
+          (COPY_TO_REGCLASS 
+                 (XXSEL (COPY_TO_REGCLASS $vC, VSRC),
+                        (COPY_TO_REGCLASS $vB, VSRC), 
+                        (COPY_TO_REGCLASS $vA, VSRC)), VRRC)>;
+def : Pat<(v8i16 (vselect v8i16:$vA, v8i16:$vB, v8i16:$vC)),
+          (COPY_TO_REGCLASS 
+                 (XXSEL (COPY_TO_REGCLASS $vC, VSRC),
+                        (COPY_TO_REGCLASS $vB, VSRC), 
+                        (COPY_TO_REGCLASS $vA, VSRC)), VRRC)>;
+def : Pat<(vselect v4i32:$vA, v4i32:$vB, v4i32:$vC),
+          (XXSEL $vC, $vB, $vA)>;
+def : Pat<(vselect v2i64:$vA, v2i64:$vB, v2i64:$vC),
+          (XXSEL $vC, $vB, $vA)>;
+def : Pat<(vselect v4i32:$vA, v4f32:$vB, v4f32:$vC),
+          (XXSEL $vC, $vB, $vA)>;
+def : Pat<(vselect v2i64:$vA, v2f64:$vB, v2f64:$vC),
+          (XXSEL $vC, $vB, $vA)>;
+
 let Predicates = [IsLittleEndian] in {
 def : Pat<(f64 (PPCfcfid (PPCmtvsra (i64 (vector_extract v2i64:$S, 0))))),
          (f64 (XSCVSXDDP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>;
--- a/test/CodeGen/PowerPC/vec_select.ll
+++ b/test/CodeGen/PowerPC/vec_select.ll
@ -1,7 +1,100 @@
-; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-linux-gnu -mattr=+altivec | FileCheck %s
+; RUN: llc -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names < %s -mtriple=powerpc64-linux-gnu -mcpu=pwr8 -mattr=+vsx | FileCheck %s -check-prefix=CHECK-VSX
+; RUN: llc -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names < %s -mtriple=powerpc64-linux-gnu -mcpu=pwr8 -mattr=-vsx | FileCheck %s -check-prefix=CHECK-NOVSX
+; RUN: llc -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names < %s -mtriple=powerpc64le-linux-gnu -mcpu=pwr8 -mattr=+vsx | FileCheck %s -check-prefix=CHECK-VSX
+; RUN: llc -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names < %s -mtriple=powerpc64le-linux-gnu -mcpu=pwr8 -mattr=-vsx | FileCheck %s -check-prefix=CHECK-NOVSX

-; CHECK: vsel_float
-define <4 x float> @vsel_float(<4 x float> %v1, <4 x float> %v2) {
-  %vsel = select <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x float> %v1, <4 x float> %v2
-  ret <4 x float> %vsel
+define <4 x float> @test1(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %d) {
+entry:
+  %m = fcmp oeq <4 x float> %c, %d
+  %v = select <4 x i1> %m, <4 x float> %a, <4 x float> %b
+  ret <4 x float> %v
 }
+; CHECK-VSX-LABLE: test1
+; CHECK-VSX: xvcmpeqsp [[REG1:(vs|v)[0-9]+]], v4, v5
+; CHECK-VSX: xxsel v2, v3, v2, [[REG1]]
+; CHECK-VSX: blr
+
+; CHECK-NOVSX-LABLE: test1
+; CHECK-NOVSX: vcmpeqfp v[[REG1:[0-9]+]], v4, v5
+; CHECK-NOVSX: vsel v2, v3, v2, v[[REG1]]
+; CHECK-NOVSX: blr
+
+define <2 x double> @test2(<2 x double> %a, <2 x double> %b, <2 x double> %c, <2 x double> %d) {
+entry:
+  %m = fcmp oeq <2 x double> %c, %d
+  %v = select <2 x i1> %m, <2 x double> %a, <2 x double> %b
+  ret <2 x double> %v
+}
+; CHECK-VSX-LABLE: test2
+; CHECK-VSX: xvcmpeqdp [[REG1:(vs|v)[0-9]+]], v4, v5
+; CHECK-VSX: xxsel v2, v3, v2, [[REG1]]
+; CHECK-VSX: blr
+
+; CHECK-NOVSX-LABLE: test2
+; CHECK-NOVSX: fcmp
+; CHECK-NOVSX: fcmp
+; CHECK-NOVSX: blr
+
+define <16 x i8> @test3(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) {
+entry:
+  %m = icmp eq <16 x i8> %c, %d
+  %v = select <16 x i1> %m, <16 x i8> %a, <16 x i8> %b
+  ret <16 x i8> %v
+}
+; CHECK-VSX-LABLE: test3
+; CHECK-VSX: vcmpequb v[[REG1:[0-9]+]], v4, v5
+; CHECK-VSX: xxsel v2, v3, v2, v[[REG1]]
+; CHECK-VSX: blr
+
+; CHECK-NOVSX-LABLE: test3
+; CHECK-NOVSX: vcmpequb v[[REG1:[0-9]+]], v4, v5
+; CHECK-NOVSX: vsel v2, v3, v2, v[[REG1]]
+; CHECK-NOVSX: blr
+
+define <8 x i16> @test4(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c, <8 x i16> %d) {
+entry:
+  %m = icmp eq <8 x i16> %c, %d
+  %v = select <8 x i1> %m, <8 x i16> %a, <8 x i16> %b
+  ret <8 x i16> %v
+}
+; CHECK-VSX-LABLE: test4
+; CHECK-VSX: vcmpequh v[[REG1:[0-9]+]], v4, v5
+; CHECK-VSX: xxsel v2, v3, v2, v[[REG1]]
+; CHECK-VSX: blr
+
+; CHECK-NOVSX-LABLE: test4
+; CHECK-NOVSX: vcmpequh v[[REG1:[0-9]+]], v4, v5
+; CHECK-NOVSX: vsel v2, v3, v2, v[[REG1]]
+; CHECK-NOVSX: blr
+
+define <4 x i32> @test5(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i32> %d) {
+entry:
+  %m = icmp eq <4 x i32> %c, %d
+  %v = select <4 x i1> %m, <4 x i32> %a, <4 x i32> %b
+  ret <4 x i32> %v
+}
+; CHECK-VSX-LABLE: test5
+; CHECK-VSX: vcmpequw v[[REG1:[0-9]+]], v4, v5
+; CHECK-VSX: xxsel v2, v3, v2, v[[REG1]]
+; CHECK-VSX: blr
+
+; CHECK-NOVSX-LABLE: test5
+; CHECK-NOVSX: vcmpequw v[[REG1:[0-9]+]], v4, v5
+; CHECK-NOVSX: vsel v2, v3, v2, v[[REG1]]
+; CHECK-NOVSX: blr
+
+define <2 x i64> @test6(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c, <2 x i64> %d) {
+entry:
+  %m = icmp eq <2 x i64> %c, %d
+  %v = select <2 x i1> %m, <2 x i64> %a, <2 x i64> %b
+  ret <2 x i64> %v
+}
+; CHECK-VSX-LABLE: test6
+; CHECK-VSX: vcmpequd v[[REG1:[0-9]+]], v4, v5
+; CHECK-VSX: xxsel v2, v3, v2, v[[REG1]]
+; CHECK-VSX: blr
+
+; CHECK-NOVSX-LABLE: test6
+; CHECK-NOVSX: vcmpequd v[[REG1:[0-9]+]], v4, v5
+; CHECK-NOVSX: vsel v2, v3, v2, v[[REG1]]
+; CHECK-NOVSX: blr