Add LLVM support for PPC cryptography builtins

Review: http://reviews.llvm.org/D7955 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@231285 91177308-0d34-0410-b5e6-96231b3b80d8
2025-04-02 13:21:43 +00:00 · 2015-03-04 20:44:33 +00:00 · 2015-03-04 20:44:33 +00:00 · b69d556c37
commit b69d556c37
parent c56226c6d1
14 changed files with 539 additions and 3 deletions
--- a/include/llvm/IR/IntrinsicsPowerPC.td
+++ b/include/llvm/IR/IntrinsicsPowerPC.td
@ -73,7 +73,7 @@ class PowerPC_Vec_FFF_Intrinsic<string GCCIntSuffix>
                          [llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty],
                          [IntrNoMem]>;

-/// PowerPC_Vec_BBB_Intrinsic - A PowerPC intrinsic that takes two v16f8
+/// PowerPC_Vec_BBB_Intrinsic - A PowerPC intrinsic that takes two v16i8
 /// vectors and returns one.  These intrinsics have no side effects.
 class PowerPC_Vec_BBB_Intrinsic<string GCCIntSuffix> 
  : PowerPC_Vec_Intrinsic<GCCIntSuffix,
@ -126,7 +126,6 @@ class PowerPC_VSX_Sca_DDD_Intrinsic<string GCCIntSuffix>
                          [llvm_double_ty], [llvm_double_ty, llvm_double_ty],
                          [IntrNoMem]>;

-
 //===----------------------------------------------------------------------===//
 // PowerPC Altivec Intrinsic Definitions.

@ -559,6 +558,41 @@ def int_ppc_altivec_vlogefp   : PowerPC_Vec_FF_Intrinsic<"vlogefp">;
 def int_ppc_altivec_vrefp     : PowerPC_Vec_FF_Intrinsic<"vrefp">;
 def int_ppc_altivec_vrsqrtefp : PowerPC_Vec_FF_Intrinsic<"vrsqrtefp">;

+// Power8 Intrinsics
+// Crypto
+let TargetPrefix = "ppc" in {  // All PPC intrinsics start with "llvm.ppc.".
+  def int_ppc_altivec_crypto_vsbox :
+              GCCBuiltin<"__builtin_altivec_crypto_vsbox">,
+              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty], [IntrNoMem]>;
+  def int_ppc_altivec_crypto_vpermxor :
+              GCCBuiltin<"__builtin_altivec_crypto_vpermxor">,
+              Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty,
+                         llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
+
+// These need diagnostics for invalid arguments so don't inherit from GCCBuiltin
+def int_ppc_altivec_crypto_vshasigmad :
+            Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty,
+                       llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+def int_ppc_altivec_crypto_vshasigmaw :
+            Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty,
+                       llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+}
+def int_ppc_altivec_crypto_vcipher :
+            PowerPC_Vec_DDD_Intrinsic<"crypto_vcipher">;
+def int_ppc_altivec_crypto_vcipherlast :
+            PowerPC_Vec_DDD_Intrinsic<"crypto_vcipherlast">;
+def int_ppc_altivec_crypto_vncipher :
+            PowerPC_Vec_DDD_Intrinsic<"crypto_vncipher">;
+def int_ppc_altivec_crypto_vncipherlast :
+            PowerPC_Vec_DDD_Intrinsic<"crypto_vncipherlast">;
+def int_ppc_altivec_crypto_vpmsumb :
+            PowerPC_Vec_BBB_Intrinsic<"crypto_vpmsumb">;
+def int_ppc_altivec_crypto_vpmsumh :
+            PowerPC_Vec_HHH_Intrinsic<"crypto_vpmsumh">;
+def int_ppc_altivec_crypto_vpmsumw :
+            PowerPC_Vec_WWW_Intrinsic<"crypto_vpmsumw">;
+def int_ppc_altivec_crypto_vpmsumd :
+            PowerPC_Vec_DDD_Intrinsic<"crypto_vpmsumd">;

 //===----------------------------------------------------------------------===//
 // PowerPC VSX Intrinsic Definitions.
--- a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
+++ b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
@ -425,6 +425,7 @@ public:

  bool isToken() const override { return Kind == Token; }
  bool isImm() const override { return Kind == Immediate || Kind == Expression; }
+  bool isU1Imm() const { return Kind == Immediate && isUInt<1>(getImm()); }
  bool isU2Imm() const { return Kind == Immediate && isUInt<2>(getImm()); }
  bool isU4Imm() const { return Kind == Immediate && isUInt<4>(getImm()); }
  bool isU5Imm() const { return Kind == Immediate && isUInt<5>(getImm()); }
--- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
+++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
@ -214,6 +214,13 @@ void PPCInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNo,
  printOperand(MI, OpNo+1, O);
 }

+void PPCInstPrinter::printU1ImmOperand(const MCInst *MI, unsigned OpNo,
+                                       raw_ostream &O) {
+  unsigned int Value = MI->getOperand(OpNo).getImm();
+  assert(Value <= 1 && "Invalid u1imm argument!");
+  O << (unsigned int)Value;
+}
+
 void PPCInstPrinter::printU2ImmOperand(const MCInst *MI, unsigned OpNo,
                                       raw_ostream &O) {
  unsigned int Value = MI->getOperand(OpNo).getImm();
--- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
+++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
@ -43,6 +43,7 @@ public:
  void printPredicateOperand(const MCInst *MI, unsigned OpNo,
                             raw_ostream &O, const char *Modifier = nullptr);

+  void printU1ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
  void printU2ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
  void printU4ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
  void printS5ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
--- a/lib/Target/PowerPC/PPC.td
+++ b/lib/Target/PowerPC/PPC.td
@ -112,6 +112,9 @@ def FeatureVSX       : SubtargetFeature<"vsx","HasVSX", "true",
 def FeatureP8Altivec : SubtargetFeature<"power8-altivec", "HasP8Altivec", "true",
                                        "Enable POWER8 Altivec instructions",
                                        [FeatureAltivec]>;
+def FeatureP8Crypto : SubtargetFeature<"crypto", "HasP8Crypto", "true",
+                                        "Enable POWER8 Crypto instructions",
+                                        [FeatureP8Altivec]>;
 def FeatureP8Vector  : SubtargetFeature<"power8-vector", "HasP8Vector", "true",
                                        "Enable POWER8 vector instructions",
                                        [FeatureVSX, FeatureP8Altivec]>;
@ -258,7 +261,7 @@ def ProcessorFeatures {
        FeatureFRE, FeatureFRES, FeatureFRSQRTE, FeatureFRSQRTES,
        FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX,
        FeatureFPRND, FeatureFPCVT, FeatureISEL,
-        FeaturePOPCNTD, FeatureCMPB, FeatureLDBRX,
+        FeaturePOPCNTD, FeatureCMPB, FeatureLDBRX, FeatureP8Crypto,
        Feature64Bit /*, Feature64BitRegs */, FeatureICBT,
        DeprecatedMFTB, DeprecatedDST];
 }
--- a/lib/Target/PowerPC/PPCInstrAltivec.td
+++ b/lib/Target/PowerPC/PPCInstrAltivec.td
@ -269,6 +269,16 @@ class VX2_Int_Ty2<bits<11> xo, string opc, Intrinsic IntID, ValueType OutTy,
             !strconcat(opc, " $vD, $vB"), IIC_VecFP,
             [(set OutTy:$vD, (IntID InTy:$vB))]>;

+class VXBX_Int_Ty<bits<11> xo, string opc, Intrinsic IntID, ValueType Ty>
+  : VXForm_BX<xo, (outs vrrc:$vD), (ins vrrc:$vA),
+             !strconcat(opc, " $vD, $vA"), IIC_VecFP,
+             [(set Ty:$vD, (IntID Ty:$vA))]>;
+
+class VXCR_Int_Ty<bits<11> xo, string opc, Intrinsic IntID, ValueType Ty>
+  : VXForm_CR<xo, (outs vrrc:$vD), (ins vrrc:$vA, u1imm:$ST, u4imm:$SIX),
+              !strconcat(opc, " $vD, $vA, $ST, $SIX"), IIC_VecFP,
+              [(set Ty:$vD, (IntID Ty:$vA, imm:$ST, imm:$SIX))]>;
+
 //===----------------------------------------------------------------------===//
 // Instruction Definitions.

@ -939,6 +949,7 @@ def : Pat<(v4f32 (fnearbyint v4f32:$vA)),
 } // end HasAltivec

 def HasP8Altivec : Predicate<"PPCSubTarget->hasP8Altivec()">;
+def HasP8Crypto : Predicate<"PPCSubTarget->hasP8Crypto()">;
 let Predicates = [HasP8Altivec] in {

 let isCommutable = 1 in {
@ -1035,4 +1046,33 @@ def VCMPGTSDo : VCMPo<967, "vcmpgtsd. $vD, $vA, $vB", v2i64>;
 def VCMPGTUD  : VCMP <711, "vcmpgtud $vD, $vA, $vB" , v2i64>;
 def VCMPGTUDo : VCMPo<711, "vcmpgtud. $vD, $vA, $vB", v2i64>;

+// The cryptography instructions that do not require Category:Vector.Crypto
+def VPMSUMB : VX1_Int_Ty<1032, "vpmsumb",
+                         int_ppc_altivec_crypto_vpmsumb, v16i8>;
+def VPMSUMH : VX1_Int_Ty<1096, "vpmsumh",
+                         int_ppc_altivec_crypto_vpmsumh, v8i16>;
+def VPMSUMW : VX1_Int_Ty<1160, "vpmsumw",
+                         int_ppc_altivec_crypto_vpmsumw, v4i32>;
+def VPMSUMD : VX1_Int_Ty<1224, "vpmsumd",
+                         int_ppc_altivec_crypto_vpmsumd, v2i64>;
+def VPERMXOR : VA1a_Int_Ty<45, "vpermxor",
+                         int_ppc_altivec_crypto_vpermxor, v16i8>;
+
 } // end HasP8Altivec
+
+// Crypto instructions (from builtins)
+let Predicates = [HasP8Crypto] in {
+def VSHASIGMAW : VXCR_Int_Ty<1666, "vshasigmaw",
+                              int_ppc_altivec_crypto_vshasigmaw, v4i32>;
+def VSHASIGMAD : VXCR_Int_Ty<1730, "vshasigmad",
+                              int_ppc_altivec_crypto_vshasigmad, v2i64>;
+def VCIPHER : VX1_Int_Ty<1288, "vcipher", int_ppc_altivec_crypto_vcipher,
+                         v2i64>;
+def VCIPHERLAST : VX1_Int_Ty<1289, "vcipherlast",
+                              int_ppc_altivec_crypto_vcipherlast, v2i64>;
+def VNCIPHER : VX1_Int_Ty<1352, "vncipher",
+                          int_ppc_altivec_crypto_vncipher, v2i64>;
+def VNCIPHERLAST : VX1_Int_Ty<1353, "vncipherlast",
+                              int_ppc_altivec_crypto_vncipherlast, v2i64>;
+def VSBOX : VXBX_Int_Ty<1480, "vsbox", int_ppc_altivec_crypto_vsbox, v2i64>;
+} // HasP8Crypto
--- a/lib/Target/PowerPC/PPCInstrFormats.td
+++ b/lib/Target/PowerPC/PPCInstrFormats.td
@ -1470,6 +1470,39 @@ class VXForm_5<bits<11> xo, dag OOL, dag IOL, string asmstr,
  let Inst{21-31} = xo;
 }

+/// VXForm_CR - VX crypto instructions with "VRT, VRA, ST, SIX"
+class VXForm_CR<bits<11> xo, dag OOL, dag IOL, string asmstr,
+               InstrItinClass itin, list<dag> pattern>
+    : I<4, OOL, IOL, asmstr, itin> {
+  bits<5> VD;
+  bits<5> VA;
+  bits<1> ST;
+  bits<4> SIX;
+  
+  let Pattern = pattern;
+  
+  let Inst{6-10}  = VD;
+  let Inst{11-15} = VA;
+  let Inst{16} =  ST;
+  let Inst{17-20} = SIX;
+  let Inst{21-31} = xo;
+}
+
+/// VXForm_BX - VX crypto instructions with "VRT, VRA, 0 - like vsbox"
+class VXForm_BX<bits<11> xo, dag OOL, dag IOL, string asmstr,
+               InstrItinClass itin, list<dag> pattern>
+    : I<4, OOL, IOL, asmstr, itin> {
+  bits<5> VD;
+  bits<5> VA;
+  
+  let Pattern = pattern;
+  
+  let Inst{6-10}  = VD;
+  let Inst{11-15} = VA;
+  let Inst{16-20} = 0;
+  let Inst{21-31} = xo;
+}
+
 // E-4 VXR-Form
 class VXRForm_1<bits<10> xo, dag OOL, dag IOL, string asmstr,
               InstrItinClass itin, list<dag> pattern>
--- a/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/lib/Target/PowerPC/PPCInstrInfo.td
@ -446,6 +446,15 @@ def crrc : RegisterOperand<CRRC> {
  let ParserMatchClass = PPCRegCRRCAsmOperand;
 }

+def PPCU1ImmAsmOperand : AsmOperandClass {
+  let Name = "U1Imm"; let PredicateMethod = "isU1Imm";
+  let RenderMethod = "addImmOperands";
+}
+def u1imm   : Operand<i32> {
+  let PrintMethod = "printU1ImmOperand";
+  let ParserMatchClass = PPCU1ImmAsmOperand;
+}
+
 def PPCU2ImmAsmOperand : AsmOperandClass {
  let Name = "U2Imm"; let PredicateMethod = "isU2Imm";
  let RenderMethod = "addImmOperands";
--- a/lib/Target/PowerPC/PPCSubtarget.cpp
+++ b/lib/Target/PowerPC/PPCSubtarget.cpp
@ -69,6 +69,7 @@ void PPCSubtarget::initializeEnvironment() {
  HasVSX = false;
  HasP8Vector = false;
  HasP8Altivec = false;
+  HasP8Crypto = false;
  HasFCPSGN = false;
  HasFSQRT = false;
  HasFRE = false;
--- a/lib/Target/PowerPC/PPCSubtarget.h
+++ b/lib/Target/PowerPC/PPCSubtarget.h
@ -90,6 +90,7 @@ protected:
  bool HasVSX;
  bool HasP8Vector;
  bool HasP8Altivec;
+  bool HasP8Crypto;
  bool HasFCPSGN;
  bool HasFSQRT;
  bool HasFRE, HasFRES, HasFRSQRTE, HasFRSQRTES;
@ -218,6 +219,7 @@ public:
  bool hasVSX() const { return HasVSX; }
  bool hasP8Vector() const { return HasP8Vector; }
  bool hasP8Altivec() const { return HasP8Altivec; }
+  bool hasP8Crypto() const { return HasP8Crypto; }
  bool hasMFOCRF() const { return HasMFOCRF; }
  bool hasISEL() const { return HasISEL; }
  bool hasPOPCNTD() const { return HasPOPCNTD; }
--- a/lib/Target/PowerPC/README_ALTIVEC.txt
+++ b/lib/Target/PowerPC/README_ALTIVEC.txt
@ -211,6 +211,51 @@ vector float f(vector float a, vector float b) {

 //===----------------------------------------------------------------------===//

+We should do a little better with eliminating dead stores.
+The stores to the stack are dead since %a and %b are not needed
+
+; Function Attrs: nounwind
+define <16 x i8> @test_vpmsumb() #0 {
+  entry:
+  %a = alloca <16 x i8>, align 16
+  %b = alloca <16 x i8>, align 16
+  store <16 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16>, <16 x i8>* %a, align 16
+  store <16 x i8> <i8 113, i8 114, i8 115, i8 116, i8 117, i8 118, i8 119, i8 120, i8 121, i8 122, i8 123, i8 124, i8 125, i8 126, i8 127, i8 112>, <16 x i8>* %b, align 16
+  %0 = load <16 x i8>* %a, align 16
+  %1 = load <16 x i8>* %b, align 16
+  %2 = call <16 x i8> @llvm.ppc.altivec.crypto.vpmsumb(<16 x i8> %0, <16 x i8> %1)
+  ret <16 x i8> %2
+}
+
+
+; Function Attrs: nounwind readnone
+declare <16 x i8> @llvm.ppc.altivec.crypto.vpmsumb(<16 x i8>, <16 x i8>) #1
+
+
+Produces the following code with -mtriple=powerpc64-unknown-linux-gnu:
+# BB#0:                                 # %entry
+    addis 3, 2, .LCPI0_0@toc@ha
+    addis 4, 2, .LCPI0_1@toc@ha
+    addi 3, 3, .LCPI0_0@toc@l
+    addi 4, 4, .LCPI0_1@toc@l
+    lxvw4x 0, 0, 3
+    addi 3, 1, -16
+    lxvw4x 35, 0, 4
+    stxvw4x 0, 0, 3
+    ori 2, 2, 0
+    lxvw4x 34, 0, 3
+    addi 3, 1, -32
+    stxvw4x 35, 0, 3
+    vpmsumb 2, 2, 3
+    blr
+    .long   0
+    .quad   0
+
+The two stxvw4x instructions are not needed.
+With -mtriple=powerpc64le-unknown-linux-gnu, the associated permutes
+are present too.
+//===----------------------------------------------------------------------===//
+
 The following example is found in test/CodeGen/PowerPC/vec_add_sub_doubleword.ll:

 define <2 x i64> @increment_by_val(<2 x i64> %x, i64 %val) nounwind {
--- a/test/CodeGen/PowerPC/crypto_bifs.ll
+++ b/test/CodeGen/PowerPC/crypto_bifs.ll
@ -0,0 +1,275 @@
+; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s
+; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s
+; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -mattr=+crypto < %s | FileCheck %s
+; FIXME: llc -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s
+; FIXME: The original intent was to add a check-next for the blr after every check.
+; However, this currently fails since we don't eliminate stores of the unused
+; locals. These stores are sometimes scheduled after the crypto instruction
+
+; Function Attrs: nounwind
+define <16 x i8> @test_vpmsumb() #0 {
+entry:
+  %a = alloca <16 x i8>, align 16
+  %b = alloca <16 x i8>, align 16
+  store <16 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16>, <16 x i8>* %a, align 16
+  store <16 x i8> <i8 113, i8 114, i8 115, i8 116, i8 117, i8 118, i8 119, i8 120, i8 121, i8 122, i8 123, i8 124, i8 125, i8 126, i8 127, i8 112>, <16 x i8>* %b, align 16
+  %0 = load <16 x i8>,  <16 x i8>* %a, align 16
+  %1 = load <16 x i8>,  <16 x i8>* %b, align 16
+  %2 = call <16 x i8> @llvm.ppc.altivec.crypto.vpmsumb(<16 x i8> %0, <16 x i8> %1)
+  ret <16 x i8> %2
+; CHECK: vpmsumb 2,
+}
+
+; Function Attrs: nounwind readnone
+declare <16 x i8> @llvm.ppc.altivec.crypto.vpmsumb(<16 x i8>, <16 x i8>) #1
+
+; Function Attrs: nounwind
+define <8 x i16> @test_vpmsumh() #0 {
+entry:
+  %a = alloca <8 x i16>, align 16
+  %b = alloca <8 x i16>, align 16
+  store <8 x i16> <i16 258, i16 772, i16 1286, i16 1800, i16 2314, i16 2828, i16 3342, i16 3856>, <8 x i16>* %a, align 16
+  store <8 x i16> <i16 29042, i16 29556, i16 30070, i16 30584, i16 31098, i16 31612, i16 32126, i16 32624>, <8 x i16>* %b, align 16
+  %0 = load <8 x i16>,  <8 x i16>* %a, align 16
+  %1 = load <8 x i16>,  <8 x i16>* %b, align 16
+  %2 = call <8 x i16> @llvm.ppc.altivec.crypto.vpmsumh(<8 x i16> %0, <8 x i16> %1)
+  ret <8 x i16> %2
+; CHECK: vpmsumh 2,
+}
+
+; Function Attrs: nounwind readnone
+declare <8 x i16> @llvm.ppc.altivec.crypto.vpmsumh(<8 x i16>, <8 x i16>) #1
+
+; Function Attrs: nounwind
+define <4 x i32> @test_vpmsumw() #0 {
+entry:
+  %a = alloca <4 x i32>, align 16
+  %b = alloca <4 x i32>, align 16
+  store <4 x i32> <i32 16909060, i32 84281096, i32 151653132, i32 219025168>, <4 x i32>* %a, align 16
+  store <4 x i32> <i32 1903326068, i32 1970698104, i32 2038070140, i32 2105442160>, <4 x i32>* %b, align 16
+  %0 = load <4 x i32>,  <4 x i32>* %a, align 16
+  %1 = load <4 x i32>,  <4 x i32>* %b, align 16
+  %2 = call <4 x i32> @llvm.ppc.altivec.crypto.vpmsumw(<4 x i32> %0, <4 x i32> %1)
+  ret <4 x i32> %2
+; CHECK: vpmsumw 2,
+}
+
+; Function Attrs: nounwind readnone
+declare <4 x i32> @llvm.ppc.altivec.crypto.vpmsumw(<4 x i32>, <4 x i32>) #1
+
+; Function Attrs: nounwind
+define <2 x i64> @test_vpmsumd() #0 {
+entry:
+  %a = alloca <2 x i64>, align 16
+  %b = alloca <2 x i64>, align 16
+  store <2 x i64> <i64 72623859790382856, i64 651345242494996240>, <2 x i64>* %a, align 16
+  store <2 x i64> <i64 8174723217654970232, i64 8753444600359583600>, <2 x i64>* %b, align 16
+  %0 = load <2 x i64>,  <2 x i64>* %a, align 16
+  %1 = load <2 x i64>,  <2 x i64>* %b, align 16
+  %2 = call <2 x i64> @llvm.ppc.altivec.crypto.vpmsumd(<2 x i64> %0, <2 x i64> %1)
+  ret <2 x i64> %2
+; CHECK: vpmsumd 2,
+}
+
+; Function Attrs: nounwind readnone
+declare <2 x i64> @llvm.ppc.altivec.crypto.vpmsumd(<2 x i64>, <2 x i64>) #1
+
+; Function Attrs: nounwind
+define <2 x i64> @test_vsbox() #0 {
+entry:
+  %a = alloca <2 x i64>, align 16
+  store <2 x i64> <i64 72623859790382856, i64 651345242494996240>, <2 x i64>* %a, align 16
+  %0 = load <2 x i64>,  <2 x i64>* %a, align 16
+  %1 = call <2 x i64> @llvm.ppc.altivec.crypto.vsbox(<2 x i64> %0)
+  ret <2 x i64> %1
+; CHECK: vsbox 2,
+}
+
+; Function Attrs: nounwind readnone
+declare <2 x i64> @llvm.ppc.altivec.crypto.vsbox(<2 x i64>) #1
+
+; Function Attrs: nounwind
+define <16 x i8> @test_vpermxorb() #0 {
+entry:
+  %a = alloca <16 x i8>, align 16
+  %b = alloca <16 x i8>, align 16
+  %c = alloca <16 x i8>, align 16
+  store <16 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16>, <16 x i8>* %a, align 16
+  store <16 x i8> <i8 113, i8 114, i8 115, i8 116, i8 117, i8 118, i8 119, i8 120, i8 121, i8 122, i8 123, i8 124, i8 125, i8 126, i8 127, i8 112>, <16 x i8>* %b, align 16
+  store <16 x i8> <i8 113, i8 114, i8 115, i8 116, i8 117, i8 118, i8 119, i8 120, i8 121, i8 122, i8 123, i8 124, i8 125, i8 126, i8 127, i8 112>, <16 x i8>* %c, align 16
+  %0 = load <16 x i8>,  <16 x i8>* %a, align 16
+  %1 = load <16 x i8>,  <16 x i8>* %b, align 16
+  %2 = load <16 x i8>,  <16 x i8>* %c, align 16
+  %3 = call <16 x i8> @llvm.ppc.altivec.crypto.vpermxor(<16 x i8> %0, <16 x i8> %1, <16 x i8> %2)
+  ret <16 x i8> %3
+; CHECK: vpermxor 2,
+}
+
+; Function Attrs: nounwind readnone
+declare <16 x i8> @llvm.ppc.altivec.crypto.vpermxor(<16 x i8>, <16 x i8>, <16 x i8>) #1
+
+; Function Attrs: nounwind
+define <8 x i16> @test_vpermxorh() #0 {
+entry:
+  %a = alloca <8 x i16>, align 16
+  %b = alloca <8 x i16>, align 16
+  %c = alloca <8 x i16>, align 16
+  store <8 x i16> <i16 258, i16 772, i16 1286, i16 1800, i16 2314, i16 2828, i16 3342, i16 3856>, <8 x i16>* %a, align 16
+  store <8 x i16> <i16 29042, i16 29556, i16 30070, i16 30584, i16 31098, i16 31612, i16 32126, i16 32624>, <8 x i16>* %b, align 16
+  store <8 x i16> <i16 29042, i16 29556, i16 30070, i16 30584, i16 31098, i16 31612, i16 32126, i16 32624>, <8 x i16>* %c, align 16
+  %0 = load <8 x i16>,  <8 x i16>* %a, align 16
+  %1 = bitcast <8 x i16> %0 to <16 x i8>
+  %2 = load <8 x i16>,  <8 x i16>* %b, align 16
+  %3 = bitcast <8 x i16> %2 to <16 x i8>
+  %4 = load <8 x i16>,  <8 x i16>* %c, align 16
+  %5 = bitcast <8 x i16> %4 to <16 x i8>
+  %6 = call <16 x i8> @llvm.ppc.altivec.crypto.vpermxor(<16 x i8> %1, <16 x i8> %3, <16 x i8> %5)
+  %7 = bitcast <16 x i8> %6 to <8 x i16>
+  ret <8 x i16> %7
+; CHECK: vpermxor 2,
+}
+
+; Function Attrs: nounwind
+define <4 x i32> @test_vpermxorw() #0 {
+entry:
+  %a = alloca <4 x i32>, align 16
+  %b = alloca <4 x i32>, align 16
+  %c = alloca <4 x i32>, align 16
+  store <4 x i32> <i32 16909060, i32 84281096, i32 151653132, i32 219025168>, <4 x i32>* %a, align 16
+  store <4 x i32> <i32 1903326068, i32 1970698104, i32 2038070140, i32 2105442160>, <4 x i32>* %b, align 16
+  store <4 x i32> <i32 1903326068, i32 1970698104, i32 2038070140, i32 2105442160>, <4 x i32>* %c, align 16
+  %0 = load <4 x i32>,  <4 x i32>* %a, align 16
+  %1 = bitcast <4 x i32> %0 to <16 x i8>
+  %2 = load <4 x i32>,  <4 x i32>* %b, align 16
+  %3 = bitcast <4 x i32> %2 to <16 x i8>
+  %4 = load <4 x i32>,  <4 x i32>* %c, align 16
+  %5 = bitcast <4 x i32> %4 to <16 x i8>
+  %6 = call <16 x i8> @llvm.ppc.altivec.crypto.vpermxor(<16 x i8> %1, <16 x i8> %3, <16 x i8> %5)
+  %7 = bitcast <16 x i8> %6 to <4 x i32>
+  ret <4 x i32> %7
+; CHECK: vpermxor 2,
+}
+
+; Function Attrs: nounwind
+define <2 x i64> @test_vpermxord() #0 {
+entry:
+  %a = alloca <2 x i64>, align 16
+  %b = alloca <2 x i64>, align 16
+  %c = alloca <2 x i64>, align 16
+  store <2 x i64> <i64 72623859790382856, i64 651345242494996240>, <2 x i64>* %a, align 16
+  store <2 x i64> <i64 8174723217654970232, i64 8753444600359583600>, <2 x i64>* %b, align 16
+  store <2 x i64> <i64 8174723217654970232, i64 8753444600359583600>, <2 x i64>* %c, align 16
+  %0 = load <2 x i64>,  <2 x i64>* %a, align 16
+  %1 = bitcast <2 x i64> %0 to <16 x i8>
+  %2 = load <2 x i64>,  <2 x i64>* %b, align 16
+  %3 = bitcast <2 x i64> %2 to <16 x i8>
+  %4 = load <2 x i64>,  <2 x i64>* %c, align 16
+  %5 = bitcast <2 x i64> %4 to <16 x i8>
+  %6 = call <16 x i8> @llvm.ppc.altivec.crypto.vpermxor(<16 x i8> %1, <16 x i8> %3, <16 x i8> %5)
+  %7 = bitcast <16 x i8> %6 to <2 x i64>
+  ret <2 x i64> %7
+; CHECK: vpermxor 2,
+}
+
+; Function Attrs: nounwind
+define <2 x i64> @test_vcipher() #0 {
+entry:
+  %a = alloca <2 x i64>, align 16
+  %b = alloca <2 x i64>, align 16
+  store <2 x i64> <i64 72623859790382856, i64 651345242494996240>, <2 x i64>* %a, align 16
+  store <2 x i64> <i64 8174723217654970232, i64 8753444600359583600>, <2 x i64>* %b, align 16
+  %0 = load <2 x i64>,  <2 x i64>* %a, align 16
+  %1 = load <2 x i64>,  <2 x i64>* %b, align 16
+  %2 = call <2 x i64> @llvm.ppc.altivec.crypto.vcipher(<2 x i64> %0, <2 x i64> %1)
+  ret <2 x i64> %2
+; CHECK: vcipher 2,
+}
+
+; Function Attrs: nounwind readnone
+declare <2 x i64> @llvm.ppc.altivec.crypto.vcipher(<2 x i64>, <2 x i64>) #1
+
+; Function Attrs: nounwind
+define <2 x i64> @test_vcipherlast() #0 {
+entry:
+  %a = alloca <2 x i64>, align 16
+  %b = alloca <2 x i64>, align 16
+  store <2 x i64> <i64 72623859790382856, i64 651345242494996240>, <2 x i64>* %a, align 16
+  store <2 x i64> <i64 8174723217654970232, i64 8753444600359583600>, <2 x i64>* %b, align 16
+  %0 = load <2 x i64>,  <2 x i64>* %a, align 16
+  %1 = load <2 x i64>,  <2 x i64>* %b, align 16
+  %2 = call <2 x i64> @llvm.ppc.altivec.crypto.vcipherlast(<2 x i64> %0, <2 x i64> %1)
+  ret <2 x i64> %2
+; CHECK: vcipherlast 2,
+}
+
+; Function Attrs: nounwind readnone
+declare <2 x i64> @llvm.ppc.altivec.crypto.vcipherlast(<2 x i64>, <2 x i64>) #1
+
+; Function Attrs: nounwind
+define <2 x i64> @test_vncipher() #0 {
+entry:
+  %a = alloca <2 x i64>, align 16
+  %b = alloca <2 x i64>, align 16
+  store <2 x i64> <i64 72623859790382856, i64 651345242494996240>, <2 x i64>* %a, align 16
+  store <2 x i64> <i64 8174723217654970232, i64 8753444600359583600>, <2 x i64>* %b, align 16
+  %0 = load <2 x i64>,  <2 x i64>* %a, align 16
+  %1 = load <2 x i64>,  <2 x i64>* %b, align 16
+  %2 = call <2 x i64> @llvm.ppc.altivec.crypto.vncipher(<2 x i64> %0, <2 x i64> %1)
+  ret <2 x i64> %2
+; CHECK: vncipher 2,
+}
+
+; Function Attrs: nounwind readnone
+declare <2 x i64> @llvm.ppc.altivec.crypto.vncipher(<2 x i64>, <2 x i64>) #1
+
+; Function Attrs: nounwind
+define <2 x i64> @test_vncipherlast() #0 {
+entry:
+  %a = alloca <2 x i64>, align 16
+  %b = alloca <2 x i64>, align 16
+  store <2 x i64> <i64 72623859790382856, i64 651345242494996240>, <2 x i64>* %a, align 16
+  store <2 x i64> <i64 8174723217654970232, i64 8753444600359583600>, <2 x i64>* %b, align 16
+  %0 = load <2 x i64>,  <2 x i64>* %a, align 16
+  %1 = load <2 x i64>,  <2 x i64>* %b, align 16
+  %2 = call <2 x i64> @llvm.ppc.altivec.crypto.vncipherlast(<2 x i64> %0, <2 x i64> %1)
+  ret <2 x i64> %2
+; CHECK: vncipherlast 2,
+}
+
+; Function Attrs: nounwind readnone
+declare <2 x i64> @llvm.ppc.altivec.crypto.vncipherlast(<2 x i64>, <2 x i64>) #1
+
+; Function Attrs: nounwind
+define <4 x i32> @test_vshasigmaw() #0 {
+entry:
+  %a = alloca <4 x i32>, align 16
+  store <4 x i32> <i32 16909060, i32 84281096, i32 151653132, i32 219025168>, <4 x i32>* %a, align 16
+  %0 = load <4 x i32>,  <4 x i32>* %a, align 16
+  %1 = call <4 x i32> @llvm.ppc.altivec.crypto.vshasigmaw(<4 x i32> %0, i32 1, i32 15)
+  ret <4 x i32> %1
+; CHECK: vshasigmaw 2,
+}
+
+; Function Attrs: nounwind readnone
+declare <4 x i32> @llvm.ppc.altivec.crypto.vshasigmaw(<4 x i32>, i32, i32) #1
+
+; Function Attrs: nounwind
+define <2 x i64> @test_vshasigmad() #0 {
+entry:
+  %a = alloca <2 x i64>, align 16
+  store <2 x i64> <i64 8174723217654970232, i64 8753444600359583600>, <2 x i64>* %a, align 16
+  %0 = load <2 x i64>,  <2 x i64>* %a, align 16
+  %1 = call <2 x i64> @llvm.ppc.altivec.crypto.vshasigmad(<2 x i64> %0, i32 1, i32 15)
+  ret <2 x i64> %1
+; CHECK: vshasigmad 2,
+}
+
+; Function Attrs: nounwind readnone
+declare <2 x i64> @llvm.ppc.altivec.crypto.vshasigmad(<2 x i64>, i32, i32) #1
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+
+!llvm.ident = !{!0}
+
+!0 = !{!"clang version 3.7.0 (trunk 230949) (llvm/trunk 230946)"}
--- a/test/MC/Disassembler/PowerPC/ppc64-encoding-vmx.txt
+++ b/test/MC/Disassembler/PowerPC/ppc64-encoding-vmx.txt
@ -120,6 +120,42 @@
 # CHECK: vperm 2, 3, 4, 5                
 0x10 0x43 0x21 0x6b

+# CHECK: vpermxor 2, 3, 4, 5
+0x10 0x43 0x21 0x6d
+
+# CHECK: vsbox 2, 5
+0x10 0x45 0x05 0xc8
+
+# CHECK: vcipher 2, 5, 17
+0x10 0x45 0x8d 0x08
+
+# CHECK: vcipherlast 2, 5, 17
+0x10 0x45 0x8d 0x09
+
+# CHECK: vncipher 2, 5, 17
+0x10,0x45,0x8d,0x48
+
+# CHECK: vncipherlast 2, 5, 17
+0x10,0x45,0x8d,0x49
+
+# CHECK: vpmsumb 2, 5, 17
+0x10 0x45 0x8c 0x08
+
+# CHECK: vpmsumh 2, 5, 17
+0x10 0x45 0x8c 0x48
+
+# CHECK: vpmsumw 2, 5, 17
+0x10 0x45 0x8c 0x88
+
+# CHECK: vpmsumd 2, 5, 17
+0x10 0x45 0x8c 0xc8
+
+# CHECK: vshasigmaw 2, 3, 0, 11
+0x10 0x43 0x5e 0x82
+
+# CHECK: vshasigmad 2, 3, 1, 15
+0x10 0x43 0xfe 0xc2
+
 # CHECK: vsel 2, 3, 4, 5                 
 0x10 0x43 0x21 0x6a

--- a/test/MC/PowerPC/ppc64-encoding-vmx.s
+++ b/test/MC/PowerPC/ppc64-encoding-vmx.s
@ -133,6 +133,55 @@
 # CHECK-BE: vperm 2, 3, 4, 5                # encoding: [0x10,0x43,0x21,0x6b]
 # CHECK-LE: vperm 2, 3, 4, 5                # encoding: [0x6b,0x21,0x43,0x10]
            vperm 2, 3, 4, 5
+
+# CHECK-BE: vpermxor 2, 3, 4, 5             # encoding: [0x10,0x43,0x21,0x6d]
+# CHECK-LE: vpermxor 2, 3, 4, 5             # encoding: [0x6d,0x21,0x43,0x10]
+            vpermxor 2, 3, 4, 5
+
+# CHECK-BE: vsbox 2, 5                      # encoding: [0x10,0x45,0x05,0xc8]
+# CHECK-LE: vsbox 2, 5                      # encoding: [0xc8,0x05,0x45,0x10]
+            vsbox 2, 5
+
+# CHECK-BE: vcipher 2, 5, 17                # encoding: [0x10,0x45,0x8d,0x08]
+# CHECK-LE: vcipher 2, 5, 17                # encoding: [0x08,0x8d,0x45,0x10]
+            vcipher 2, 5, 17
+
+# CHECK-BE: vcipherlast 2, 5, 17            # encoding: [0x10,0x45,0x8d,0x09]
+# CHECK-LE: vcipherlast 2, 5, 17            # encoding: [0x09,0x8d,0x45,0x10]
+            vcipherlast 2, 5, 17
+
+# CHECK-BE: vncipher 2, 5, 17               # encoding: [0x10,0x45,0x8d,0x48]
+# CHECK-LE: vncipher 2, 5, 17               # encoding: [0x48,0x8d,0x45,0x10]
+            vncipher 2, 5, 17
+
+# CHECK-BE: vncipherlast 2, 5, 17           # encoding: [0x10,0x45,0x8d,0x49]
+# CHECK-LE: vncipherlast 2, 5, 17           # encoding: [0x49,0x8d,0x45,0x10]
+            vncipherlast 2, 5, 17
+
+# CHECK-BE: vpmsumb 2, 5, 17                # encoding: [0x10,0x45,0x8c,0x08]
+# CHECK-LE: vpmsumb 2, 5, 17                # encoding: [0x08,0x8c,0x45,0x10]
+            vpmsumb 2, 5, 17
+
+# CHECK-BE: vpmsumh 2, 5, 17                # encoding: [0x10,0x45,0x8c,0x48]
+# CHECK-LE: vpmsumh 2, 5, 17                # encoding: [0x48,0x8c,0x45,0x10]
+            vpmsumh 2, 5, 17
+
+# CHECK-BE: vpmsumw 2, 5, 17                # encoding: [0x10,0x45,0x8c,0x88]
+# CHECK-LE: vpmsumw 2, 5, 17                # encoding: [0x88,0x8c,0x45,0x10]
+            vpmsumw 2, 5, 17
+
+# CHECK-BE: vpmsumd 2, 5, 17                # encoding: [0x10,0x45,0x8c,0xc8]
+# CHECK-LE: vpmsumd 2, 5, 17                # encoding: [0xc8,0x8c,0x45,0x10]
+            vpmsumd 2, 5, 17
+
+# CHECK-BE: vshasigmaw 2, 3, 0, 11          # encoding: [0x10,0x43,0x5e,0x82]
+# CHECK-LE: vshasigmaw 2, 3, 0, 11          # encoding: [0x82,0x5e,0x43,0x10]
+            vshasigmaw 2, 3, 0, 11
+
+# CHECK-BE: vshasigmad 2, 3, 1, 15          # encoding: [0x10,0x43,0xfe,0xc2]
+# CHECK-LE: vshasigmad 2, 3, 1, 15          # encoding: [0xc2,0xfe,0x43,0x10]
+            vshasigmad 2, 3, 1, 15
+
 # CHECK-BE: vsel 2, 3, 4, 5                 # encoding: [0x10,0x43,0x21,0x6a]
 # CHECK-LE: vsel 2, 3, 4, 5                 # encoding: [0x6a,0x21,0x43,0x10]
            vsel 2, 3, 4, 5