[Power9] Builtins for ELF v.2 API conformance - back end portion

This patch corresponds to review: https://reviews.llvm.org/D24396 This patch adds support for the "vector count trailing zeroes", "vector compare not equal" and "vector compare not equal or zero instructions" as well as "scalar count trailing zeroes" instructions. It also changes the vector negation to use XXLNOR (when VSX is enabled) so as not to increase register pressure (previously this was done with a splat immediate of all ones followed by an XXLXOR). This was done because the altivec.h builtins (patch to follow) use vector negation and the use of an additional register for the splat immediate is not optimal. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@282478 91177308-0d34-0410-b5e6-96231b3b80d8
2024-12-11 05:35:11 +00:00 · 2016-09-27 08:42:12 +00:00 · 2016-09-27 08:42:12 +00:00 · 7a5ffa3882
commit 7a5ffa3882
parent 941593b0ca
6 changed files with 276 additions and 50 deletions
--- a/include/llvm/IR/IntrinsicsPowerPC.td
+++ b/include/llvm/IR/IntrinsicsPowerPC.td
@ -250,6 +250,12 @@ let TargetPrefix = "ppc" in {  // All intrinsics start with "llvm.ppc.".
  def int_ppc_altivec_vcmpgtuw : GCCBuiltin<"__builtin_altivec_vcmpgtuw">,
              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
                        [IntrNoMem]>;
+  def int_ppc_altivec_vcmpnew : GCCBuiltin<"__builtin_altivec_vcmpnew">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
+                        [IntrNoMem]>;
+  def int_ppc_altivec_vcmpnezw : GCCBuiltin<"__builtin_altivec_vcmpnezw">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
+                        [IntrNoMem]>;
                        
  def int_ppc_altivec_vcmpequh : GCCBuiltin<"__builtin_altivec_vcmpequh">,
              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
@ -260,6 +266,12 @@ let TargetPrefix = "ppc" in {  // All intrinsics start with "llvm.ppc.".
  def int_ppc_altivec_vcmpgtuh : GCCBuiltin<"__builtin_altivec_vcmpgtuh">,
              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
                        [IntrNoMem]>;
+  def int_ppc_altivec_vcmpneh : GCCBuiltin<"__builtin_altivec_vcmpneh">,
+              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
+                        [IntrNoMem]>;
+  def int_ppc_altivec_vcmpnezh : GCCBuiltin<"__builtin_altivec_vcmpnezh">,
+              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
+                        [IntrNoMem]>;

  def int_ppc_altivec_vcmpequb : GCCBuiltin<"__builtin_altivec_vcmpequb">,
              Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
@ -270,6 +282,12 @@ let TargetPrefix = "ppc" in {  // All intrinsics start with "llvm.ppc.".
  def int_ppc_altivec_vcmpgtub : GCCBuiltin<"__builtin_altivec_vcmpgtub">,
              Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
                        [IntrNoMem]>;
+  def int_ppc_altivec_vcmpneb : GCCBuiltin<"__builtin_altivec_vcmpneb">,
+              Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
+                        [IntrNoMem]>;
+  def int_ppc_altivec_vcmpnezb : GCCBuiltin<"__builtin_altivec_vcmpnezb">,
+              Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
+                        [IntrNoMem]>;

  // Predicate Comparisons.  The first operand specifies interpretation of CR6.
  def int_ppc_altivec_vcmpbfp_p : GCCBuiltin<"__builtin_altivec_vcmpbfp_p">,
@ -304,6 +322,12 @@ let TargetPrefix = "ppc" in {  // All intrinsics start with "llvm.ppc.".
  def int_ppc_altivec_vcmpgtuw_p : GCCBuiltin<"__builtin_altivec_vcmpgtuw_p">,
              Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v4i32_ty,llvm_v4i32_ty],
                        [IntrNoMem]>;
+  def int_ppc_altivec_vcmpnew_p : GCCBuiltin<"__builtin_altivec_vcmpnew_p">,
+              Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v4i32_ty,llvm_v4i32_ty],
+                        [IntrNoMem]>;
+  def int_ppc_altivec_vcmpnezw_p : GCCBuiltin<"__builtin_altivec_vcmpnezw_p">,
+              Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v4i32_ty,llvm_v4i32_ty],
+                        [IntrNoMem]>;
                        
  def int_ppc_altivec_vcmpequh_p : GCCBuiltin<"__builtin_altivec_vcmpequh_p">,
              Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v8i16_ty,llvm_v8i16_ty],
@ -314,6 +338,12 @@ let TargetPrefix = "ppc" in {  // All intrinsics start with "llvm.ppc.".
  def int_ppc_altivec_vcmpgtuh_p : GCCBuiltin<"__builtin_altivec_vcmpgtuh_p">,
              Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v8i16_ty,llvm_v8i16_ty],
                        [IntrNoMem]>;
+  def int_ppc_altivec_vcmpneh_p : GCCBuiltin<"__builtin_altivec_vcmpneh_p">,
+              Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v8i16_ty,llvm_v8i16_ty],
+                        [IntrNoMem]>;
+  def int_ppc_altivec_vcmpnezh_p : GCCBuiltin<"__builtin_altivec_vcmpnezh_p">,
+              Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v8i16_ty,llvm_v8i16_ty],
+                        [IntrNoMem]>;

  def int_ppc_altivec_vcmpequb_p : GCCBuiltin<"__builtin_altivec_vcmpequb_p">,
              Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v16i8_ty,llvm_v16i8_ty],
@ -324,6 +354,12 @@ let TargetPrefix = "ppc" in {  // All intrinsics start with "llvm.ppc.".
  def int_ppc_altivec_vcmpgtub_p : GCCBuiltin<"__builtin_altivec_vcmpgtub_p">,
              Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v16i8_ty,llvm_v16i8_ty],
                        [IntrNoMem]>;
+  def int_ppc_altivec_vcmpneb_p : GCCBuiltin<"__builtin_altivec_vcmpneb_p">,
+              Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v16i8_ty,llvm_v16i8_ty],
+                        [IntrNoMem]>;
+  def int_ppc_altivec_vcmpnezb_p : GCCBuiltin<"__builtin_altivec_vcmpnezb_p">,
+              Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v16i8_ty,llvm_v16i8_ty],
+                        [IntrNoMem]>;
 }

 // Vector average.
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@ -216,11 +216,17 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
    setOperationAction(ISD::FROUND, MVT::f32, Legal);
  }

-  // PowerPC does not have BSWAP, CTPOP or CTTZ
+  // PowerPC does not have BSWAP
+  // CTPOP or CTTZ were introduced in P8/P9 respectivelly
  setOperationAction(ISD::BSWAP, MVT::i32  , Expand);
-  setOperationAction(ISD::CTTZ , MVT::i32  , Expand);
  setOperationAction(ISD::BSWAP, MVT::i64  , Expand);
-  setOperationAction(ISD::CTTZ , MVT::i64  , Expand);
+  if (Subtarget.isISA3_0()) {
+    setOperationAction(ISD::CTTZ , MVT::i32  , Legal);
+    setOperationAction(ISD::CTTZ , MVT::i64  , Legal);
+  } else {
+    setOperationAction(ISD::CTTZ , MVT::i32  , Expand);
+    setOperationAction(ISD::CTTZ , MVT::i64  , Expand);
+  }

  if (Subtarget.hasPOPCNTD() == PPCSubtarget::POPCNTD_Fast) {
    setOperationAction(ISD::CTPOP, MVT::i32  , Legal);
@ -433,6 +439,12 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
        setOperationAction(ISD::CTLZ, VT, Expand);
      }

+      // Vector instructions introduced in P9
+      if (Subtarget.hasP9Altivec() && (VT.SimpleTy != MVT::v1i128))
+        setOperationAction(ISD::CTTZ, VT, Legal);
+      else
+        setOperationAction(ISD::CTTZ, VT, Expand);
+
      // We promote all shuffles to v16i8.
      setOperationAction(ISD::VECTOR_SHUFFLE, VT, Promote);
      AddPromotedToType (ISD::VECTOR_SHUFFLE, VT, MVT::v16i8);
@ -489,7 +501,6 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
      setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand);
      setOperationAction(ISD::FPOW, VT, Expand);
      setOperationAction(ISD::BSWAP, VT, Expand);
-      setOperationAction(ISD::CTTZ, VT, Expand);
      setOperationAction(ISD::VSELECT, VT, Expand);
      setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
      setOperationAction(ISD::ROTL, VT, Expand);
@ -7710,6 +7721,27 @@ static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc,
    } else
      return false;

+    break;
+  case Intrinsic::ppc_altivec_vcmpneb_p:
+  case Intrinsic::ppc_altivec_vcmpneh_p:
+  case Intrinsic::ppc_altivec_vcmpnew_p:
+  case Intrinsic::ppc_altivec_vcmpnezb_p:
+  case Intrinsic::ppc_altivec_vcmpnezh_p:
+  case Intrinsic::ppc_altivec_vcmpnezw_p:
+    if (Subtarget.hasP9Altivec()) {
+      switch(IntrinsicID) {
+      default: llvm_unreachable("Unknown comparison intrinsic.");
+      case Intrinsic::ppc_altivec_vcmpneb_p: CompareOpc = 7; break;
+      case Intrinsic::ppc_altivec_vcmpneh_p: CompareOpc = 71; break;
+      case Intrinsic::ppc_altivec_vcmpnew_p: CompareOpc = 135; break;
+      case Intrinsic::ppc_altivec_vcmpnezb_p: CompareOpc = 263; break;
+      case Intrinsic::ppc_altivec_vcmpnezh_p: CompareOpc = 327; break;
+      case Intrinsic::ppc_altivec_vcmpnezw_p: CompareOpc = 391; break;
+      }
+      isDot = 1;
+    } else
+      return false;
+
    break;
  case Intrinsic::ppc_altivec_vcmpgefp_p: CompareOpc = 454; isDot = 1; break;
  case Intrinsic::ppc_altivec_vcmpgtfp_p: CompareOpc = 710; isDot = 1; break;
@ -7772,6 +7804,26 @@ static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc,
      return false;

    break;
+  case Intrinsic::ppc_altivec_vcmpneb:
+  case Intrinsic::ppc_altivec_vcmpneh:
+  case Intrinsic::ppc_altivec_vcmpnew:
+  case Intrinsic::ppc_altivec_vcmpnezb:
+  case Intrinsic::ppc_altivec_vcmpnezh:
+  case Intrinsic::ppc_altivec_vcmpnezw:
+    if (Subtarget.hasP9Altivec()) {
+      switch (IntrinsicID) {
+      default: llvm_unreachable("Unknown comparison intrinsic.");
+      case Intrinsic::ppc_altivec_vcmpneb: CompareOpc = 7; break;
+      case Intrinsic::ppc_altivec_vcmpneh: CompareOpc = 71; break;
+      case Intrinsic::ppc_altivec_vcmpnew: CompareOpc = 135; break;
+      case Intrinsic::ppc_altivec_vcmpnezb: CompareOpc = 263; break;
+      case Intrinsic::ppc_altivec_vcmpnezh: CompareOpc = 327; break;
+      case Intrinsic::ppc_altivec_vcmpnezw: CompareOpc = 391; break;
+      }
+      isDot = 0;
+    } else
+      return false;
+    break;
  case Intrinsic::ppc_altivec_vcmpgefp:   CompareOpc = 454; isDot = 0; break;
  case Intrinsic::ppc_altivec_vcmpgtfp:   CompareOpc = 710; isDot = 0; break;
  case Intrinsic::ppc_altivec_vcmpgtsb:   CompareOpc = 774; isDot = 0; break;
--- a/lib/Target/PowerPC/PPCInstrAltivec.td
+++ b/lib/Target/PowerPC/PPCInstrAltivec.td
@ -1218,34 +1218,23 @@ def VSBOX : VXBX_Int_Ty<1480, "vsbox", int_ppc_altivec_crypto_vsbox, v2i64>;
 def HasP9Altivec : Predicate<"PPCSubTarget->hasP9Altivec()">;
 let Predicates = [HasP9Altivec] in {

-// Vector Compare Not Equal (Zero)
-class P9VCMP<bits<10> xo, string asmstr, ValueType Ty>
-  : VXRForm_1<xo, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), asmstr,
-              IIC_VecFPCompare, []>;
-class P9VCMPo<bits<10> xo, string asmstr, ValueType Ty>
-  : VXRForm_1<xo, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), asmstr,
-              IIC_VecFPCompare, []> {
-  let Defs = [CR6];
-  let RC = 1;
-}
-
 // i8 element comparisons.
-def VCMPNEB   : P9VCMP <  7, "vcmpneb $vD, $vA, $vB"  , v16i8>;
-def VCMPNEBo  : P9VCMPo<  7, "vcmpneb. $vD, $vA, $vB" , v16i8>;
-def VCMPNEZB  : P9VCMP <263, "vcmpnezb $vD, $vA, $vB" , v16i8>;
-def VCMPNEZBo : P9VCMPo<263, "vcmpnezb. $vD, $vA, $vB", v16i8>;
+def VCMPNEB   : VCMP   <  7, "vcmpneb $vD, $vA, $vB"  , v16i8>;
+def VCMPNEBo  : VCMPo  <  7, "vcmpneb. $vD, $vA, $vB" , v16i8>;
+def VCMPNEZB  : VCMP <263, "vcmpnezb $vD, $vA, $vB" , v16i8>;
+def VCMPNEZBo : VCMPo<263, "vcmpnezb. $vD, $vA, $vB", v16i8>;

 // i16 element comparisons.
-def VCMPNEH   : P9VCMP < 71, "vcmpneh $vD, $vA, $vB"  , v8i16>;
-def VCMPNEHo  : P9VCMPo< 71, "vcmpneh. $vD, $vA, $vB" , v8i16>;
-def VCMPNEZH  : P9VCMP <327, "vcmpnezh $vD, $vA, $vB" , v8i16>;
-def VCMPNEZHo : P9VCMPo<327, "vcmpnezh. $vD, $vA, $vB", v8i16>;
+def VCMPNEH   : VCMP < 71, "vcmpneh $vD, $vA, $vB"  , v8i16>;
+def VCMPNEHo  : VCMPo< 71, "vcmpneh. $vD, $vA, $vB" , v8i16>;
+def VCMPNEZH  : VCMP <327, "vcmpnezh $vD, $vA, $vB" , v8i16>;
+def VCMPNEZHo : VCMPo<327, "vcmpnezh. $vD, $vA, $vB", v8i16>;

 // i32 element comparisons.
-def VCMPNEW   : P9VCMP <135, "vcmpnew $vD, $vA, $vB"  , v4i32>;
-def VCMPNEWo  : P9VCMPo<135, "vcmpnew. $vD, $vA, $vB" , v4i32>;
-def VCMPNEZW  : P9VCMP <391, "vcmpnezw $vD, $vA, $vB" , v4i32>;
-def VCMPNEZWo : P9VCMPo<391, "vcmpnezw. $vD, $vA, $vB", v4i32>;
+def VCMPNEW   : VCMP <135, "vcmpnew $vD, $vA, $vB"  , v4i32>;
+def VCMPNEWo  : VCMPo<135, "vcmpnew. $vD, $vA, $vB" , v4i32>;
+def VCMPNEZW  : VCMP <391, "vcmpnezw $vD, $vA, $vB" , v4i32>;
+def VCMPNEZWo : VCMPo<391, "vcmpnezw. $vD, $vA, $vB", v4i32>;

 // VX-Form: [PO VRT / UIM VRB XO].
 // We use VXForm_1 to implement it, that is, we use "VRA" (5 bit) to represent
@ -1288,10 +1277,14 @@ def VCLZLSBB : VXForm_RD5_XO5_RS5<1538, 0, (outs g8rc:$rD), (ins vrrc:$vB),
 def VCTZLSBB : VXForm_RD5_XO5_RS5<1538, 1, (outs g8rc:$rD), (ins vrrc:$vB),
                                  "vctzlsbb $rD, $vB", IIC_VecGeneral, []>;
 // Vector Count Trailing Zeros
-def VCTZB : VX_VT5_EO5_VB5<1538, 28, "vctzb", []>;
-def VCTZH : VX_VT5_EO5_VB5<1538, 29, "vctzh", []>;
-def VCTZW : VX_VT5_EO5_VB5<1538, 30, "vctzw", []>;
-def VCTZD : VX_VT5_EO5_VB5<1538, 31, "vctzd", []>;
+def VCTZB : VX_VT5_EO5_VB5<1538, 28, "vctzb",
+                           [(set v16i8:$vD, (cttz v16i8:$vB))]>;
+def VCTZH : VX_VT5_EO5_VB5<1538, 29, "vctzh",
+                           [(set v8i16:$vD, (cttz v8i16:$vB))]>;
+def VCTZW : VX_VT5_EO5_VB5<1538, 30, "vctzw",
+                           [(set v4i32:$vD, (cttz v4i32:$vB))]>;
+def VCTZD : VX_VT5_EO5_VB5<1538, 31, "vctzd",
+                           [(set v2i64:$vD, (cttz v2i64:$vB))]>;

 // Vector Extend Sign
 def VEXTSB2W : VX_VT5_EO5_VB5<1538, 16, "vextsb2w", []>;
--- a/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/lib/Target/PowerPC/PPCInstrVSX.td
@ -852,6 +852,8 @@ def : InstAlias<"xxswapd $XT, $XB",

 let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.

+def : Pat<(v4i32 (vnot_ppc v4i32:$A)),
+          (v4i32 (XXLNOR $A, $A))>;
 let Predicates = [IsBigEndian] in {
 def : Pat<(v2f64 (scalar_to_vector f64:$A)),
          (v2f64 (SUBREG_TO_REG (i64 1), $A, sub_64))>;
--- a/test/CodeGen/PowerPC/p9-vector-compares-and-counts.ll
+++ b/test/CodeGen/PowerPC/p9-vector-compares-and-counts.ll
@ -0,0 +1,147 @@
+; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:   -verify-machineinstrs < %s | FileCheck %s
+
+; Function Attrs: nounwind readnone
+define zeroext i32 @testCTZ32(i32 signext %a) {
+entry:
+  %0 = tail call i32 @llvm.cttz.i32(i32 %a, i1 false)
+  ret i32 %0
+; CHECK-LABEL: testCTZ32
+; CHECK: cnttzw 3, 3
+}
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.cttz.i32(i32, i1)
+
+; Function Attrs: nounwind readnone
+define zeroext i32 @testCTZ64(i64 %a) {
+entry:
+  %0 = tail call i64 @llvm.cttz.i64(i64 %a, i1 false)
+  %cast = trunc i64 %0 to i32
+  ret i32 %cast
+; CHECK-LABEL: testCTZ64
+; CHECK: cnttzd 3, 3
+}
+
+; Function Attrs: nounwind readnone
+declare i64 @llvm.cttz.i64(i64, i1)
+
+; Function Attrs: nounwind readnone
+define <16 x i8> @testVCMPNEB(<16 x i8> %a, <16 x i8> %b) {
+entry:
+  %0 = tail call <16 x i8> @llvm.ppc.altivec.vcmpneb(<16 x i8> %a, <16 x i8> %b)
+  ret <16 x i8> %0
+; CHECK-LABEL: testVCMPNEB
+; CHECK: vcmpneb 2, 2
+}
+
+; Function Attrs: nounwind readnone
+declare <16 x i8> @llvm.ppc.altivec.vcmpneb(<16 x i8>, <16 x i8>)
+
+; Function Attrs: nounwind readnone
+define <16 x i8> @testVCMPNEZB(<16 x i8> %a, <16 x i8> %b) {
+entry:
+  %0 = tail call <16 x i8> @llvm.ppc.altivec.vcmpnezb(<16 x i8> %a, <16 x i8> %b)
+  ret <16 x i8> %0
+; CHECK-LABEL: testVCMPNEZB
+; CHECK: vcmpnezb 2, 2
+}
+
+; Function Attrs: nounwind readnone
+declare <16 x i8> @llvm.ppc.altivec.vcmpnezb(<16 x i8>, <16 x i8>)
+
+; Function Attrs: nounwind readnone
+define <8 x i16> @testVCMPNEH(<8 x i16> %a, <8 x i16> %b) {
+entry:
+  %0 = tail call <8 x i16> @llvm.ppc.altivec.vcmpneh(<8 x i16> %a, <8 x i16> %b)
+  ret <8 x i16> %0
+; CHECK-LABEL: testVCMPNEH
+; CHECK: vcmpneh 2, 2
+}
+
+; Function Attrs: nounwind readnone
+declare <8 x i16> @llvm.ppc.altivec.vcmpneh(<8 x i16>, <8 x i16>)
+
+; Function Attrs: nounwind readnone
+define <8 x i16> @testVCMPNEZH(<8 x i16> %a, <8 x i16> %b) {
+entry:
+  %0 = tail call <8 x i16> @llvm.ppc.altivec.vcmpnezh(<8 x i16> %a, <8 x i16> %b)
+  ret <8 x i16> %0
+; CHECK-LABEL: testVCMPNEZH
+; CHECK: vcmpnezh 2, 2
+}
+
+; Function Attrs: nounwind readnone
+declare <8 x i16> @llvm.ppc.altivec.vcmpnezh(<8 x i16>, <8 x i16>)
+
+; Function Attrs: nounwind readnone
+define <4 x i32> @testVCMPNEW(<4 x i32> %a, <4 x i32> %b) {
+entry:
+  %0 = tail call <4 x i32> @llvm.ppc.altivec.vcmpnew(<4 x i32> %a, <4 x i32> %b)
+  ret <4 x i32> %0
+; CHECK-LABEL: testVCMPNEW
+; CHECK: vcmpnew 2, 2
+}
+
+; Function Attrs: nounwind readnone
+declare <4 x i32> @llvm.ppc.altivec.vcmpnew(<4 x i32>, <4 x i32>)
+
+; Function Attrs: nounwind readnone
+define <4 x i32> @testVCMPNEZW(<4 x i32> %a, <4 x i32> %b) {
+entry:
+  %0 = tail call <4 x i32> @llvm.ppc.altivec.vcmpnezw(<4 x i32> %a, <4 x i32> %b)
+  ret <4 x i32> %0
+; CHECK-LABEL: testVCMPNEZW
+; CHECK: vcmpnezw 2, 2
+}
+
+; Function Attrs: nounwind readnone
+declare <4 x i32> @llvm.ppc.altivec.vcmpnezw(<4 x i32>, <4 x i32>)
+
+; Function Attrs: nounwind readnone
+define <16 x i8> @testVCNTTZB(<16 x i8> %a) {
+entry:
+  %0 = tail call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false)
+  ret <16 x i8> %0
+; CHECK-LABEL: testVCNTTZB
+; CHECK: vctzb 2, 2
+}
+
+; Function Attrs: nounwind readnone
+define <8 x i16> @testVCNTTZH(<8 x i16> %a) {
+entry:
+  %0 = tail call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 false)
+  ret <8 x i16> %0
+; CHECK-LABEL: testVCNTTZH
+; CHECK: vctzh 2, 2
+}
+
+; Function Attrs: nounwind readnone
+define <4 x i32> @testVCNTTZW(<4 x i32> %a) {
+entry:
+  %0 = tail call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false)
+  ret <4 x i32> %0
+; CHECK-LABEL: testVCNTTZW
+; CHECK: vctzw 2, 2
+}
+
+; Function Attrs: nounwind readnone
+define <2 x i64> @testVCNTTZD(<2 x i64> %a) {
+entry:
+  %0 = tail call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 false)
+  ret <2 x i64> %0
+; CHECK-LABEL: testVCNTTZD
+; CHECK: vctzd 2, 2
+}
+
+; Function Attrs: nounwind readnone
+declare <16 x i8> @llvm.cttz.v16i8(<16 x i8>, i1)
+
+; Function Attrs: nounwind readnone
+declare <8 x i16> @llvm.cttz.v8i16(<8 x i16>, i1)
+
+; Function Attrs: nounwind readnone
+declare <4 x i32> @llvm.cttz.v4i32(<4 x i32>, i1)
+
+; Function Attrs: nounwind readnone
+declare <2 x i64> @llvm.cttz.v2i64(<2 x i64>, i1)
--- a/test/CodeGen/PowerPC/vsx.ll
+++ b/test/CodeGen/PowerPC/vsx.ll
@ -346,9 +346,7 @@ entry:
 ; CHECK-FISL-LABEL: @test17
 ; CHECK-FISL: vor 4, 3, 3
 ; CHECK-FISL: vor 5, 2, 2
-; CHECK-FISL: vspltisb 2, -1
-; CHECK-FISL: vor 0, 2, 2
-; CHECK-FISL: xxlxor 36, 36, 32
+; CHECK-FISL: xxlnor 36, 36, 36
 ; CHECK-FISL: xxland 36, 37, 36
 ; CHECK-FISL: vor 2, 4, 4
 ; CHECK-FISL: blr
@ -369,18 +367,17 @@ entry:
 ; CHECK-REG: blr

 ; CHECK-FISL-LABEL: @test18
-; CHECK-FISL: vspltisb 4, -1
+; CHECK-FISL: vor 4, 3, 3
 ; CHECK-FISL: vor 5, 3, 3
+; CHECK-FISL: xxlnor 36, 36, 37
 ; CHECK-FISL: vor 0, 4, 4
-; CHECK-FISL: xxlxor 37, 37, 32
-; CHECK-FISL: vor 4, 5, 5
-; CHECK-FISL: vor 5, 2, 2
-; CHECK-FISL: vor 0, 3, 3
-; CHECK-FISL: xxlandc 37, 37, 32
-; CHECK-FISL: vor 2, 5, 5
+; CHECK-FISL: vor 4, 2, 2
+; CHECK-FISL: vor 5, 3, 3
+; CHECK-FISL: xxlandc 36, 36, 37
+; CHECK-FISL: vor 2, 4, 4
 ; CHECK-FISL: lis 0, -1
 ; CHECK-FISL: ori 0, 0, 65520
-; CHECK-FISL: stvx 4, 1, 0
+; CHECK-FISL: stvx 0, 1, 0
 ; CHECK-FISL: blr

 ; CHECK-LE-LABEL: @test18
@ -399,18 +396,17 @@ entry:
 ; CHECK-REG: blr

 ; CHECK-FISL-LABEL: @test19
-; CHECK-FISL: vspltisb 4, -1
+; CHECK-FISL: vor 4, 3, 3
 ; CHECK-FISL: vor 5, 3, 3
+; CHECK-FISL: xxlnor 36, 36, 37
 ; CHECK-FISL: vor 0, 4, 4
-; CHECK-FISL: xxlxor 37, 37, 32
-; CHECK-FISL: vor 4, 5, 5
-; CHECK-FISL: vor 5, 2, 2
-; CHECK-FISL: vor 0, 3, 3
-; CHECK-FISL: xxlandc 37, 37, 32
-; CHECK-FISL: vor 2, 5, 5
+; CHECK-FISL: vor 4, 2, 2
+; CHECK-FISL: vor 5, 3, 3
+; CHECK-FISL: xxlandc 36, 36, 37
+; CHECK-FISL: vor 2, 4, 4
 ; CHECK-FISL: lis 0, -1
 ; CHECK-FISL: ori 0, 0, 65520
-; CHECK-FISL: stvx 4, 1, 0
+; CHECK-FISL: stvx 0, 1, 0
 ; CHECK-FISL: blr

 ; CHECK-LE-LABEL: @test19