[AArch64] Armv8.2-A: add the crypto extensions

This adds MC support for the crypto instructions that were made optional extensions in Armv8.2-A (AArch64 only). Differential Revision: https://reviews.llvm.org/D49370 llvm-svn: 338010
2025-02-13 17:00:01 +00:00 · 2018-07-26 07:13:59 +00:00 · 2018-07-26 07:13:59 +00:00 · 539ee65110
commit 539ee65110
parent 728f1c07e4
13 changed files with 444 additions and 18 deletions
--- a/include/llvm/Support/TargetParser.h
+++ b/include/llvm/Support/TargetParser.h
@ -206,6 +206,7 @@ unsigned checkArchVersion(StringRef Arch);
 unsigned  getDefaultFPU(StringRef CPU, ArchKind AK);
 unsigned  getDefaultExtensions(StringRef CPU, ArchKind AK);
 StringRef getDefaultCPU(StringRef Arch);
+AArch64::ArchKind getCPUArchKind(StringRef CPU);

 // Parser
 unsigned parseFPU(StringRef FPU);
--- a/lib/Support/TargetParser.cpp
+++ b/lib/Support/TargetParser.cpp
@ -433,6 +433,17 @@ unsigned llvm::AArch64::getDefaultExtensions(StringRef CPU, ArchKind AK) {
    .Default(AArch64::AEK_INVALID);
 }

+AArch64::ArchKind llvm::AArch64::getCPUArchKind(StringRef CPU) {
+  if (CPU == "generic")
+    return AArch64::ArchKind::ARMV8A;
+
+  return StringSwitch<AArch64::ArchKind>(CPU)
+#define AARCH64_CPU_NAME(NAME, ID, DEFAULT_FPU, IS_DEFAULT, DEFAULT_EXT) \
+  .Case(NAME, AArch64::ArchKind:: ID)
+#include "llvm/Support/AArch64TargetParser.def"
+    .Default(AArch64::ArchKind::INVALID);
+}
+
 bool llvm::AArch64::getExtensionFeatures(unsigned Extensions,
                                     std::vector<StringRef> &Features) {

--- a/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/lib/Target/AArch64/AArch64InstrFormats.td
@ -5720,7 +5720,7 @@ multiclass SIMDDifferentThreeVectorBD<bit U, bits<4> opc, string asm,
  def v16i8  : BaseSIMDDifferentThreeVector<U, 0b001, opc,
                                            V128, V128, V128,
                                            asm#"2", ".8h", ".16b", ".16b", []>;
-  let Predicates = [HasCrypto] in {
+  let Predicates = [HasAES] in {
    def v1i64  : BaseSIMDDifferentThreeVector<U, 0b110, opc,
                                              V128, V64, V64,
                                              asm, ".1q", ".1d", ".1d", []>;
@ -9920,7 +9920,6 @@ multiclass SIMDIndexedTiedComplexHSD<bit U, bit opc1, bit opc2, Operand rottype,
 // Crypto extensions
 //----------------------------------------------------------------------------

-let Predicates = [HasCrypto] in {
 let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
 class AESBase<bits<4> opc, string asm, dag outs, dag ins, string cstr,
              list<dag> pat>
@ -10010,7 +10009,103 @@ class SHATiedInstVV<bits<4> opc, string asm, Intrinsic OpNode>
 class SHAInstSS<bits<4> opc, string asm, Intrinsic OpNode>
  : SHA2OpInst<opc, asm, "", "", (outs FPR32:$Rd), (ins FPR32:$Rn),
               [(set (i32 FPR32:$Rd), (OpNode (i32 FPR32:$Rn)))]>;
-} // end of 'let Predicates = [HasCrypto]'
+
+// Armv8.2-A Crypto extensions
+class BaseCryptoV82<dag oops, dag iops, string asm, string asmops, string cst,
+                    list<dag> pattern>
+  : I <oops, iops, asm, asmops, cst, pattern>, Sched<[WriteV]> {
+  bits<5> Vd;
+  bits<5> Vn;
+  let Inst{31-25} = 0b1100111;
+  let Inst{9-5}   = Vn;
+  let Inst{4-0}   = Vd;
+}
+
+class CryptoRRTied<bits<1>op0, bits<2>op1, string asm, string asmops>
+  : BaseCryptoV82<(outs V128:$Vd), (ins V128:$Vn, V128:$Vm), asm, asmops,
+                  "$Vm = $Vd", []> {
+  let Inst{31-25} = 0b1100111;
+  let Inst{24-21} = 0b0110;
+  let Inst{20-15} = 0b000001;
+  let Inst{14}    = op0;
+  let Inst{13-12} = 0b00;
+  let Inst{11-10} = op1;
+}
+class CryptoRRTied_2D<bits<1>op0, bits<2>op1, string asm>
+  : CryptoRRTied<op0, op1, asm, "{\t$Vd.2d, $Vn.2d}">;
+class CryptoRRTied_4S<bits<1>op0, bits<2>op1, string asm>
+  : CryptoRRTied<op0, op1, asm, "{\t$Vd.4s, $Vn.4s}">;
+
+class CryptoRRR<bits<1> op0, bits<2>op1, dag oops, dag iops, string asm,
+                string asmops, string cst>
+  : BaseCryptoV82<oops, iops, asm , asmops, cst, []> {
+  bits<5> Vm;
+  let Inst{24-21} = 0b0011;
+  let Inst{20-16} = Vm;
+  let Inst{15}    = 0b1;
+  let Inst{14}    = op0;
+  let Inst{13-12} = 0b00;
+  let Inst{11-10} = op1;
+}
+class CryptoRRR_2D<bits<1> op0, bits<2>op1, string asm>
+  : CryptoRRR<op0, op1, (outs V128:$Vd), (ins V128:$Vn, V128:$Vm), asm,
+              "{\t$Vd.2d, $Vn.2d, $Vm.2d}", "">;
+class CryptoRRRTied_2D<bits<1> op0, bits<2>op1, string asm>
+  : CryptoRRR<op0, op1, (outs V128:$Vdst), (ins V128:$Vd, V128:$Vn, V128:$Vm), asm,
+              "{\t$Vd.2d, $Vn.2d, $Vm.2d}", "$Vd = $Vdst">;
+class CryptoRRR_4S<bits<1> op0, bits<2>op1, string asm>
+  : CryptoRRR<op0, op1, (outs V128:$Vd), (ins V128:$Vn, V128:$Vm), asm,
+              "{\t$Vd.4s, $Vn.4s, $Vm.4s}", "">;
+class CryptoRRRTied_4S<bits<1> op0, bits<2>op1, string asm>
+  : CryptoRRR<op0, op1, (outs V128:$Vdst), (ins V128:$Vd, V128:$Vn, V128:$Vm), asm,
+              "{\t$Vd.4s, $Vn.4s, $Vm.4s}", "$Vd = $Vdst">;
+class CryptoRRRTied<bits<1> op0, bits<2>op1, string asm>
+  : CryptoRRR<op0, op1, (outs FPR128:$Vdst), (ins FPR128:$Vd, FPR128:$Vn, V128:$Vm),
+              asm, "{\t$Vd, $Vn, $Vm.2d}", "$Vd = $Vdst">;
+
+class CryptoRRRR<bits<2>op0, string asm, string asmops>
+  : BaseCryptoV82<(outs V128:$Vd), (ins V128:$Vn, V128:$Vm, V128:$Va), asm,
+                  asmops, "", []> {
+  bits<5> Vm;
+  bits<5> Va;
+  let Inst{24-23} = 0b00;
+  let Inst{22-21} = op0;
+  let Inst{20-16} = Vm;
+  let Inst{15}    = 0b0;
+  let Inst{14-10} = Va;
+}
+class CryptoRRRR_16B<bits<2>op0, string asm>
+ : CryptoRRRR<op0, asm, "{\t$Vd.16b, $Vn.16b, $Vm.16b, $Va.16b}"> {
+}
+class CryptoRRRR_4S<bits<2>op0, string asm>
+ : CryptoRRRR<op0, asm, "{\t$Vd.4s, $Vn.4s, $Vm.4s, $Va.4s}"> {
+}
+
+class CryptoRRRi6<string asm>
+  : BaseCryptoV82<(outs V128:$Vd), (ins V128:$Vn, V128:$Vm, uimm6:$imm), asm,
+                  "{\t$Vd.2d, $Vn.2d, $Vm.2d, $imm}", "", []> {
+  bits<6> imm;
+  bits<5> Vm;
+  let Inst{24-21} = 0b0100;
+  let Inst{20-16} = Vm;
+  let Inst{15-10} = imm;
+  let Inst{9-5}   = Vn;
+  let Inst{4-0}   = Vd;
+}
+
+class CryptoRRRi2Tied<bits<1>op0, bits<2>op1, string asm>
+  : BaseCryptoV82<(outs V128:$Vdst),
+                  (ins V128:$Vd, V128:$Vn, V128:$Vm, VectorIndexS:$imm),
+                  asm, "{\t$Vd.4s, $Vn.4s, $Vm.s$imm}", "$Vd = $Vdst", []> {
+  bits<2> imm;
+  bits<5> Vm;
+  let Inst{24-21} = 0b0010;
+  let Inst{20-16} = Vm;
+  let Inst{15}    = 0b1;
+  let Inst{14}    = op0;
+  let Inst{13-12} = imm;
+  let Inst{11-10} = op1;
+}

 //----------------------------------------------------------------------------
 // v8.1 atomic instructions extension:
--- a/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/lib/Target/AArch64/AArch64InstrInfo.td
@ -28,6 +28,14 @@ def HasNEON          : Predicate<"Subtarget->hasNEON()">,
                                 AssemblerPredicate<"FeatureNEON", "neon">;
 def HasCrypto        : Predicate<"Subtarget->hasCrypto()">,
                                 AssemblerPredicate<"FeatureCrypto", "crypto">;
+def HasSM4           : Predicate<"Subtarget->hasSM4()">,
+                                 AssemblerPredicate<"FeatureSM4", "sm4">;
+def HasSHA3          : Predicate<"Subtarget->hasSHA3()">,
+                                 AssemblerPredicate<"FeatureSHA3", "sha3">;
+def HasSHA2          : Predicate<"Subtarget->hasSHA2()">,
+                                 AssemblerPredicate<"FeatureSHA2", "sha2">;
+def HasAES           : Predicate<"Subtarget->hasAES()">,
+                                 AssemblerPredicate<"FeatureAES", "aes">;
 def HasDotProd       : Predicate<"Subtarget->hasDotProd()">,
                                 AssemblerPredicate<"FeatureDotProd", "dotprod">;
 def HasCRC           : Predicate<"Subtarget->hasCRC()">,
@ -467,6 +475,30 @@ defm SDOTlane : SIMDThreeSameVectorDotIndex<0, "sdot", int_aarch64_neon_sdot>;
 defm UDOTlane : SIMDThreeSameVectorDotIndex<1, "udot", int_aarch64_neon_udot>;
 }

+// Armv8.2-A Crypto extensions
+let Predicates = [HasSHA3] in {
+def SHA512H   : CryptoRRRTied<0b0, 0b00, "sha512h">;
+def SHA512H2  : CryptoRRRTied<0b0, 0b01, "sha512h2">;
+def SHA512SU0 : CryptoRRTied_2D<0b0, 0b00, "sha512su0">;
+def SHA512SU1 : CryptoRRRTied_2D<0b0, 0b10, "sha512su1">;
+def RAX1      : CryptoRRR_2D<0b0,0b11, "rax1">;
+def EOR3      : CryptoRRRR_16B<0b00, "eor3">;
+def BCAX      : CryptoRRRR_16B<0b01, "bcax">;
+def XAR       : CryptoRRRi6<"xar">;
+} // HasSHA3
+
+let Predicates = [HasSM4] in {
+def SM3TT1A   : CryptoRRRi2Tied<0b0, 0b00, "sm3tt1a">;
+def SM3TT1B   : CryptoRRRi2Tied<0b0, 0b01, "sm3tt1b">;
+def SM3TT2A   : CryptoRRRi2Tied<0b0, 0b10, "sm3tt2a">;
+def SM3TT2B   : CryptoRRRi2Tied<0b0, 0b11, "sm3tt2b">;
+def SM3SS1    : CryptoRRRR_4S<0b10, "sm3ss1">;
+def SM3PARTW1 : CryptoRRRTied_4S<0b1, 0b00, "sm3partw1">;
+def SM3PARTW2 : CryptoRRRTied_4S<0b1, 0b01, "sm3partw2">;
+def SM4ENCKEY : CryptoRRR_4S<0b1, 0b10, "sm4ekey">;
+def SM4E      : CryptoRRTied_4S<0b0, 0b01, "sm4e">;
+} // HasSM4
+
 let Predicates = [HasRCPC] in {
  // v8.3 Release Consistent Processor Consistent support, optional in v8.2.
  def LDAPRB  : RCPCLoad<0b00, "ldaprb", GPR32>;
@ -555,7 +587,7 @@ let Predicates = [HasV8_3a] in {
    let Inst{31} = 0;
  }

-} // HasV8_3A
+} // HasV8_3a

 // v8.4 Flag manipulation instructions
 let Predicates = [HasV8_4a] in {
@ -5606,10 +5638,12 @@ defm ST4 : SIMDLdSt4SingleAliases<"st4">;
 // Crypto extensions
 //----------------------------------------------------------------------------

+let Predicates = [HasAES] in {
 def AESErr   : AESTiedInst<0b0100, "aese",   int_aarch64_crypto_aese>;
 def AESDrr   : AESTiedInst<0b0101, "aesd",   int_aarch64_crypto_aesd>;
 def AESMCrr  : AESInst<    0b0110, "aesmc",  int_aarch64_crypto_aesmc>;
 def AESIMCrr : AESInst<    0b0111, "aesimc", int_aarch64_crypto_aesimc>;
+}

 // Pseudo instructions for AESMCrr/AESIMCrr with a register constraint required
 // for AES fusion on some CPUs.
@ -5636,6 +5670,7 @@ def : Pat<(v16i8 (int_aarch64_crypto_aesimc
                                              (v16i8 V128:$src2)))))>,
          Requires<[HasFuseAES]>;

+let Predicates = [HasSHA2] in {
 def SHA1Crrr     : SHATiedInstQSV<0b000, "sha1c",   int_aarch64_crypto_sha1c>;
 def SHA1Prrr     : SHATiedInstQSV<0b001, "sha1p",   int_aarch64_crypto_sha1p>;
 def SHA1Mrrr     : SHATiedInstQSV<0b010, "sha1m",   int_aarch64_crypto_sha1m>;
@ -5647,6 +5682,7 @@ def SHA256SU1rrr :SHATiedInstVVV<0b110, "sha256su1",int_aarch64_crypto_sha256su1
 def SHA1Hrr     : SHAInstSS<    0b0000, "sha1h",    int_aarch64_crypto_sha1h>;
 def SHA1SU1rr   : SHATiedInstVV<0b0001, "sha1su1",  int_aarch64_crypto_sha1su1>;
 def SHA256SU0rr : SHATiedInstVV<0b0010, "sha256su0",int_aarch64_crypto_sha256su0>;
+}

 //----------------------------------------------------------------------------
 // Compiler-pseudos
--- a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
+++ b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
@ -4749,7 +4749,11 @@ static const struct {
  const char *Name;
  const FeatureBitset Features;
 } ExtensionMap[] = {
-  { "crc", {AArch64::FeatureCRC} },
+  { "crc",  {AArch64::FeatureCRC} },
+  { "sm4",  {AArch64::FeatureSM4} },
+  { "sha3", {AArch64::FeatureSHA3} },
+  { "sha2", {AArch64::FeatureSHA2} },
+  { "aes",  {AArch64::FeatureAES} },
  { "crypto", {AArch64::FeatureCrypto} },
  { "fp", {AArch64::FeatureFPARMv8} },
  { "simd", {AArch64::FeatureNEON} },
@ -4763,6 +4767,54 @@ static const struct {
  { "profile", {} },
 };

+static void ExpandCryptoAEK(AArch64::ArchKind ArchKind,
+                            SmallVector<StringRef, 4> &RequestedExtensions) {
+  const bool NoCrypto =
+      (std::find(RequestedExtensions.begin(), RequestedExtensions.end(),
+                 "nocrypto") != std::end(RequestedExtensions));
+  const bool Crypto =
+      (std::find(RequestedExtensions.begin(), RequestedExtensions.end(),
+                 "crypto") != std::end(RequestedExtensions));
+
+  if (!NoCrypto && Crypto) {
+    switch (ArchKind) {
+    default:
+      // Map 'generic' (and others) to sha2 and aes, because
+      // that was the traditional meaning of crypto.
+    case AArch64::ArchKind::ARMV8_1A:
+    case AArch64::ArchKind::ARMV8_2A:
+    case AArch64::ArchKind::ARMV8_3A:
+      RequestedExtensions.push_back("sha2");
+      RequestedExtensions.push_back("aes");
+      break;
+    case AArch64::ArchKind::ARMV8_4A:
+      RequestedExtensions.push_back("sm4");
+      RequestedExtensions.push_back("sha3");
+      RequestedExtensions.push_back("sha2");
+      RequestedExtensions.push_back("aes");
+      break;
+    }
+  } else if (NoCrypto) {
+    switch (ArchKind) {
+    default:
+      // Map 'generic' (and others) to sha2 and aes, because
+      // that was the traditional meaning of crypto.
+    case AArch64::ArchKind::ARMV8_1A:
+    case AArch64::ArchKind::ARMV8_2A:
+    case AArch64::ArchKind::ARMV8_3A:
+      RequestedExtensions.push_back("nosha2");
+      RequestedExtensions.push_back("noaes");
+      break;
+    case AArch64::ArchKind::ARMV8_4A:
+      RequestedExtensions.push_back("nosm4");
+      RequestedExtensions.push_back("nosha3");
+      RequestedExtensions.push_back("nosha2");
+      RequestedExtensions.push_back("noaes");
+      break;
+    }
+  }
+}
+
 /// parseDirectiveArch
 ///   ::= .arch token
 bool AArch64AsmParser::parseDirectiveArch(SMLoc L) {
@ -4793,6 +4845,8 @@ bool AArch64AsmParser::parseDirectiveArch(SMLoc L) {
  if (!ExtensionString.empty())
    ExtensionString.split(RequestedExtensions, '+');

+  ExpandCryptoAEK(ID, RequestedExtensions);
+
  FeatureBitset Features = STI.getFeatureBits();
  for (auto Name : RequestedExtensions) {
    bool EnableFeature = true;
@ -4852,6 +4906,8 @@ bool AArch64AsmParser::parseDirectiveCPU(SMLoc L) {
  STI.setDefaultFeatures(CPU, "");
  CurLoc = incrementLoc(CurLoc, CPU.size());

+  ExpandCryptoAEK(llvm::AArch64::getCPUArchKind(CPU), RequestedExtensions);
+
  FeatureBitset Features = STI.getFeatureBits();
  for (auto Name : RequestedExtensions) {
    // Advance source location past '+'.
--- a/test/MC/AArch64/arm64-diagno-predicate.s
+++ b/test/MC/AArch64/arm64-diagno-predicate.s
@ -13,7 +13,7 @@
 // CHECK-ERROR-NEXT:        ^

        pmull v0.1q, v1.1d, v2.1d
-// CHECK-ERROR: error: instruction requires: crypto
+// CHECK-ERROR: error: instruction requires: aes
 // CHECK-ERROR-NEXT:        pmull v0.1q, v1.1d, v2.1d
 // CHECK-ERROR-NEXT:        ^

--- a/test/MC/AArch64/armv8.2a-crypto-error.s
+++ b/test/MC/AArch64/armv8.2a-crypto-error.s
@ -0,0 +1,31 @@
+// RUN: not llvm-mc -triple aarch64 -mattr=+sm4,+sha3 -show-encoding < %s 2>&1 | FileCheck %s
+
+  xar v26.2d, v21.2d, v27.2d, #-1
+  xar v26.2d, v21.2d, v27.2d, #64
+  sm3tt1a v20.4s, v23.4s, v21.s[4]
+  sm3tt1b v20.4s, v23.4s, v21.s[4]
+  sm3tt2a v20.4s, v23.4s, v21.s[4]
+  sm3tt2b v20.4s, v23.4s, v21.s[4]
+  sm3tt2b v20.4s, v23.4s, v21.s[-1]
+
+// CHECK:      error: immediate must be an integer in range [0, 63].
+// CHECK-NEXT: xar v26.2d, v21.2d, v27.2d, #-1
+// CHECK-NEXT:                             ^
+// CHECK-NEXT: error: immediate must be an integer in range [0, 63].
+// CHECK-NEXT: xar v26.2d, v21.2d, v27.2d, #64
+// CHECK-NEXT:                             ^
+// CHECK-NEXT: error: vector lane must be an integer in range [0, 3].
+// CHECK-NEXT: sm3tt1a v20.4s, v23.4s, v21.s[4]
+// CHECK-NEXT:                              ^
+// CHECK-NEXT: error: vector lane must be an integer in range [0, 3].
+// CHECK-NEXT: sm3tt1b v20.4s, v23.4s, v21.s[4]
+// CHECK-NEXT:                              ^
+// CHECK-NEXT: error: vector lane must be an integer in range [0, 3].
+// CHECK-NEXT: sm3tt2a v20.4s, v23.4s, v21.s[4]
+// CHECK-NEXT:                              ^
+// CHECK-NEXT: error: vector lane must be an integer in range [0, 3].
+// CHECK-NEXT: sm3tt2b v20.4s, v23.4s, v21.s[4]
+// CHECK-NEXT:                              ^
+// CHECK-NEXT: error: vector lane must be an integer in range [0, 3].
+// CHECK-NEXT: sm3tt2b v20.4s, v23.4s, v21.s[-1]
+// CHECK-NEXT:                              ^
--- a/test/MC/AArch64/armv8.2a-crypto.s
+++ b/test/MC/AArch64/armv8.2a-crypto.s
@ -0,0 +1,64 @@
+// RUN: not llvm-mc -triple aarch64 -mattr=+sha3,-sm4 -show-encoding < %s 2> %t | FileCheck %s  --check-prefix=CHECK-SHA
+// RUN: FileCheck --check-prefix=CHECK-NO-SM < %t %s
+
+// RUN: not llvm-mc -triple aarch64 -mattr=+sm4,-sha3 -show-encoding < %s 2> %t | FileCheck %s --check-prefix=CHECK-SM
+// RUN: FileCheck --check-prefix=CHECK-NO-SHA < %t %s
+
+// RUN: not llvm-mc -triple aarch64 -show-encoding < %s 2> %t
+// RUN: FileCheck --check-prefix=CHECK-NO-SHA --check-prefix=CHECK-NO-SM < %t %s
+
+  sha512h   q0, q1, v2.2d
+  sha512h2  q0, q1, v2.2d
+  sha512su0 v11.2d, v12.2d
+  sha512su1 v11.2d, v13.2d, v14.2d
+  eor3  v25.16b, v12.16b, v7.16b, v2.16b
+  rax1  v30.2d, v29.2d, v26.2d
+  xar v26.2d, v21.2d, v27.2d, #63
+  bcax  v31.16b, v26.16b, v2.16b, v1.16b
+
+//CHECK-SHA:  sha512h   q0, q1, v2.2d                    // encoding: [0x20,0x80,0x62,0xce]
+//CHECK-SHA:  sha512h2  q0, q1, v2.2d                    // encoding: [0x20,0x84,0x62,0xce]
+//CHECK-SHA:  sha512su0 v11.2d, v12.2d                   // encoding: [0x8b,0x81,0xc0,0xce]
+//CHECK-SHA:  sha512su1 v11.2d, v13.2d, v14.2d           // encoding: [0xab,0x89,0x6e,0xce]
+//CHECK-SHA:  eor3  v25.16b, v12.16b, v7.16b, v2.16b     // encoding: [0x99,0x09,0x07,0xce]
+//CHECK-SHA:  rax1  v30.2d, v29.2d, v26.2d               // encoding: [0xbe,0x8f,0x7a,0xce]
+//CHECK-SHA:  xar v26.2d, v21.2d, v27.2d, #63            // encoding: [0xba,0xfe,0x9b,0xce]
+//CHECK-SHA:  bcax  v31.16b, v26.16b, v2.16b, v1.16b     // encoding: [0x5f,0x07,0x22,0xce]
+
+
+// CHECK-NO-SHA: error: instruction requires: sha3
+// CHECK-NO-SHA: error: instruction requires: sha3
+// CHECK-NO-SHA: error: instruction requires: sha3
+// CHECK-NO-SHA: error: instruction requires: sha3
+// CHECK-NO-SHA: error: instruction requires: sha3
+// CHECK-NO-SHA: error: instruction requires: sha3
+// CHECK-NO-SHA: error: instruction requires: sha3
+// CHECK-NO-SHA: error: instruction requires: sha3
+
+  sm3ss1  v20.4s, v23.4s, v21.4s, v22.4s
+  sm3tt1a v20.4s, v23.4s, v21.s[3]
+  sm3tt1b v20.4s, v23.4s, v21.s[3]
+  sm3tt2a v20.4s, v23.4s, v21.s[3]
+  sm3tt2b v20.4s, v23.4s, v21.s[3]
+  sm3partw1 v30.4s, v29.4s, v26.4s
+  sm3partw2 v30.4s, v29.4s, v26.4s
+  sm4ekey v11.4s, v11.4s, v19.4s
+  sm4e  v2.4s, v15.4s
+
+// CHECK-SM:  sm3ss1  v20.4s, v23.4s, v21.4s, v22.4s     // encoding: [0xf4,0x5a,0x55,0xce]
+// CHECK-SM:  sm3tt1a v20.4s, v23.4s, v21.s[3]           // encoding: [0xf4,0xb2,0x55,0xce]
+// CHECK-SM:  sm3tt1b v20.4s, v23.4s, v21.s[3]           // encoding: [0xf4,0xb6,0x55,0xce]
+// CHECK-SM:  sm3tt2a v20.4s, v23.4s, v21.s[3]           // encoding: [0xf4,0xba,0x55,0xce]
+// CHECK-SM:  sm3tt2b v20.4s, v23.4s, v21.s[3]           // encoding: [0xf4,0xbe,0x55,0xce]
+// CHECK-SM:  sm3partw1 v30.4s, v29.4s, v26.4s           // encoding: [0xbe,0xc3,0x7a,0xce]
+// CHECK-SM:  sm3partw2 v30.4s, v29.4s, v26.4s           // encoding: [0xbe,0xc7,0x7a,0xce]
+// CHECK-SM:  sm4ekey v11.4s, v11.4s, v19.4s             // encoding: [0x6b,0xc9,0x73,0xce]
+// CHECK-SM:  sm4e v2.4s, v15.4s                         // encoding: [0xe2,0x85,0xc0,0xce]
+
+// CHECK-NO-SM: error: instruction requires: sm4
+// CHECK-NO-SM: error: instruction requires: sm4
+// CHECK-NO-SM: error: instruction requires: sm4
+// CHECK-NO-SM: error: instruction requires: sm4
+// CHECK-NO-SM: error: instruction requires: sm4
+// CHECK-NO-SM: error: instruction requires: sm4
+// CHECK-NO-SM: error: instruction requires: sm4
--- a/test/MC/AArch64/directive-cpu-err.s
+++ b/test/MC/AArch64/directive-cpu-err.s
@ -28,7 +28,7 @@

    .cpu generic+nocrypto+crc
    aesd v0.16b, v2.16b
-    // CHECK: error: instruction requires: crypto
+    // CHECK: error: instruction requires: aes
    // CHECK-NEXT:   aesd v0.16b, v2.16b
    // CHECK-NEXT:   ^

@ -42,3 +42,27 @@
    // CHECK: error: unsupported architectural extension
    // CHECK-NEXT:   .cpu generic+v8.1-a
    // CHECK-NEXT:   ^
+
+    .cpu generic+noaes
+    aese v0.16b, v1.16b
+    // CHECK:       error: instruction requires: aes
+    // CHECK-NEXT:  aese v0.16b, v1.16b
+    // CHECK-NEXT:  ^
+
+    .cpu generic+nosha2
+    sha1h s0, s1
+    // CHECK:       error: instruction requires: sha2
+    // CHECK-NEXT:  sha1h s0, s1
+    // CHECK-NEXT:  ^
+
+    .cpu generic+nosha3
+    sha512h q0, q1, v2.2d
+    // CHECK:       error: instruction requires: sha3
+    // CHECK-NEXT:  sha512h q0, q1, v2.2d
+    // CHECK-NEXT:  ^
+
+    .cpu generic+nosm4
+    sm4e v2.4s, v15.4s
+    // CHECK:       error: instruction requires: sm4
+    // CHECK-NEXT:  sm4e v2.4s, v15.4s
+    // CHECK-NEXT:  ^
--- a/test/MC/AArch64/directive-cpu.s
+++ b/test/MC/AArch64/directive-cpu.s
@ -21,11 +21,27 @@
 	aesd v0.16b, v2.16b

 	.cpu generic+lse
-        casa  w5, w7, [x20]
+  casa  w5, w7, [x20]

 // CHECK:	fminnm d0, d0, d1
 // CHECK:	fminnm d0, d0, d1
 // CHECK:	addp v0.4s, v0.4s, v0.4s
 // CHECK:	crc32cx w0, w1, x3
 // CHECK:	aesd v0.16b, v2.16b
-// CHECK:       casa  w5, w7, [x20]
+// CHECK: casa  w5, w7, [x20]
+
+  .cpu generic+aes
+  aese v0.16b, v1.16b
+// CHECK: aese  v0.16b, v1.16b
+
+  .cpu generic+sha2
+  sha1h s0, s1
+// CHECK: sha1h s0, s1
+
+  .cpu generic+sha3
+  sha512h q0, q1, v2.2d
+// CHECK: sha512h q0, q1, v2.2d
+
+  .cpu generic+sm4
+  sm4e v2.4s, v15.4s
+// CHECK: sm4e  v2.4s, v15.4s
--- a/test/MC/AArch64/neon-crypto.s
+++ b/test/MC/AArch64/neon-crypto.s
@ -13,19 +13,19 @@
        aesimc v0.16b, v1.16b

 // CHECK-NO-CRYPTO: error: instruction requires a CPU feature not currently enabled
-// CHECK-NO-CRYPTO-ARM64: error: instruction requires: crypto
+// CHECK-NO-CRYPTO-ARM64: error: instruction requires: aes
 // CHECK: aese	v0.16b, v1.16b          // encoding: [0x20,0x48,0x28,0x4e]
 // CHECK: aesd	v0.16b, v1.16b          // encoding: [0x20,0x58,0x28,0x4e]
 // CHECK: aesmc	v0.16b, v1.16b          // encoding: [0x20,0x68,0x28,0x4e]
-// CHECK: aesimc	v0.16b, v1.16b          // encoding: [0x20,0x78,0x28,0x4e]
+// CHECK: aesimc	v0.16b, v1.16b        // encoding: [0x20,0x78,0x28,0x4e]

        sha1h s0, s1
        sha1su1 v0.4s, v1.4s
        sha256su0 v0.4s, v1.4s

 // CHECK: sha1h	s0, s1                  // encoding: [0x20,0x08,0x28,0x5e]
-// CHECK: sha1su1	v0.4s, v1.4s            // encoding: [0x20,0x18,0x28,0x5e]
-// CHECK: sha256su0	v0.4s, v1.4s    // encoding: [0x20,0x28,0x28,0x5e]
+// CHECK: sha1su1	v0.4s, v1.4s          // encoding: [0x20,0x18,0x28,0x5e]
+// CHECK: sha256su0	v0.4s, v1.4s        // encoding: [0x20,0x28,0x28,0x5e]

        sha1c q0, s1, v2.4s
        sha1p q0, s1, v2.4s
@ -38,8 +38,7 @@
 // CHECK: sha1c	q0, s1, v2.4s           // encoding: [0x20,0x00,0x02,0x5e]
 // CHECK: sha1p	q0, s1, v2.4s           // encoding: [0x20,0x10,0x02,0x5e]
 // CHECK: sha1m	q0, s1, v2.4s           // encoding: [0x20,0x20,0x02,0x5e]
-// CHECK: sha1su0	v0.4s, v1.4s, v2.4s     // encoding: [0x20,0x30,0x02,0x5e]
-// CHECK: sha256h	q0, q1, v2.4s           // encoding: [0x20,0x40,0x02,0x5e]
-// CHECK: sha256h2	q0, q1, v2.4s   // encoding: [0x20,0x50,0x02,0x5e]
+// CHECK: sha1su0	v0.4s, v1.4s, v2.4s   // encoding: [0x20,0x30,0x02,0x5e]
+// CHECK: sha256h	q0, q1, v2.4s         // encoding: [0x20,0x40,0x02,0x5e]
+// CHECK: sha256h2	q0, q1, v2.4s       // encoding: [0x20,0x50,0x02,0x5e]
 // CHECK: sha256su1	v0.4s, v1.4s, v2.4s // encoding: [0x20,0x60,0x02,0x5e]
-
--- a/test/MC/AArch64/nofp-crypto-diagnostic.s
+++ b/test/MC/AArch64/nofp-crypto-diagnostic.s
@ -3,6 +3,6 @@

        sha1h s0, s1

-// CHECK-ERROR: error: instruction requires: crypto
+// CHECK-ERROR: error: instruction requires: sha2
 // CHECK-ERROR-NEXT:    sha1h s0, s1
 // CHECK-ERROR-NEXT:    ^
--- a/test/MC/Disassembler/AArch64/armv8.2a-crypto.txt
+++ b/test/MC/Disassembler/AArch64/armv8.2a-crypto.txt
@ -0,0 +1,93 @@
+# RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+v8.4a,+crypto,+sm4,+sha3 --disassemble < %s | FileCheck %s
+# RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+v8.4a,-crypto,-sm4,-sha3 --disassemble < %s 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR
+
+0x20,0x80,0x62,0xce
+0x20,0x84,0x62,0xce
+0x8b,0x81,0xc0,0xce
+0xab,0x89,0x6e,0xce
+
+# CHECK:  sha512h   q0, q1, v2.2d
+# CHECK:  sha512h2  q0, q1, v2.2d
+# CHECK:  sha512su0 v11.2d, v12.2d
+# CHECK:  sha512su1 v11.2d, v13.2d, v14.2d
+
+0x99,0x09,0x07,0xce
+0xbe,0x8f,0x7a,0xce
+0xba,0xfe,0x9b,0xce
+0x5f,0x07,0x22,0xce
+0xf4,0x5a,0x55,0xce
+0xf4,0xb2,0x55,0xce
+0xf4,0xb6,0x55,0xce
+0xf4,0xba,0x55,0xce
+0xf4,0xbe,0x55,0xce
+0xbe,0xc3,0x7a,0xce
+0xbe,0xc7,0x7a,0xce
+0x6b,0xc9,0x73,0xce
+0xe2,0x85,0xc0,0xce
+
+# CHECK:  eor3  v25.16b, v12.16b, v7.16b, v2.16b
+# CHECK:  rax1  v30.2d, v29.2d, v26.2d
+# CHECK:  xar v26.2d, v21.2d, v27.2d, #63
+# CHECK:  bcax  v31.16b, v26.16b, v2.16b, v1.16b
+# CHECK:  sm3ss1  v20.4s, v23.4s, v21.4s, v22.4s
+# CHECK:  sm3tt1a v20.4s, v23.4s, v21.s[3]
+# CHECK:  sm3tt1b v20.4s, v23.4s, v21.s[3]
+# CHECK:  sm3tt2a v20.4s, v23.4s, v21.s[3]
+# CHECK:  sm3tt2b v20.4s, v23.4s, v21.s[3]
+# CHECK:  sm3partw1 v30.4s, v29.4s, v26.4s
+# CHECK:  sm3partw2 v30.4s, v29.4s, v26.4s
+# CHECK:  sm4ekey v11.4s, v11.4s, v19.4s
+# CHECK:  sm4e  v2.4s, v15.4s
+
+#CHECK-ERROR: warning: invalid instruction encoding
+#CHECK-ERROR: 0x20,0x80,0x62,0xce
+#CHECK-ERROR: ^
+#CHECK-ERROR: warning: invalid instruction encoding
+#CHECK-ERROR: 0x20,0x84,0x62,0xce
+#CHECK-ERROR: ^
+#CHECK-ERROR: warning: invalid instruction encoding
+#CHECK-ERROR: 0x8b,0x81,0xc0,0xce
+#CHECK-ERROR: ^
+#CHECK-ERROR: warning: invalid instruction encoding
+#CHECK-ERROR: 0xab,0x89,0x6e,0xce
+#CHECK-ERROR: ^
+#CHECK-ERROR: warning: invalid instruction encoding
+#CHECK-ERROR: 0x99,0x09,0x07,0xce
+#CHECK-ERROR: ^
+#CHECK-ERROR: warning: invalid instruction encoding
+#CHECK-ERROR: 0xbe,0x8f,0x7a,0xce
+#CHECK-ERROR: ^
+#CHECK-ERROR: warning: invalid instruction encoding
+#CHECK-ERROR: 0xba,0xfe,0x9b,0xce
+#CHECK-ERROR: ^
+#CHECK-ERROR: warning: invalid instruction encoding
+#CHECK-ERROR: 0x5f,0x07,0x22,0xce
+#CHECK-ERROR: ^
+#CHECK-ERROR: warning: invalid instruction encoding
+#CHECK-ERROR: 0xf4,0x5a,0x55,0xce
+#CHECK-ERROR: ^
+#CHECK-ERROR: warning: invalid instruction encoding
+#CHECK-ERROR: 0xf4,0xb2,0x55,0xce
+#CHECK-ERROR: ^
+#CHECK-ERROR: warning: invalid instruction encoding
+#CHECK-ERROR: 0xf4,0xb6,0x55,0xce
+#CHECK-ERROR: ^
+#CHECK-ERROR: warning: invalid instruction encoding
+#CHECK-ERROR: 0xf4,0xba,0x55,0xce
+#CHECK-ERROR: ^
+#CHECK-ERROR: warning: invalid instruction encoding
+#CHECK-ERROR: 0xf4,0xbe,0x55,0xce
+#CHECK-ERROR: ^
+#CHECK-ERROR: warning: invalid instruction encoding
+#CHECK-ERROR: 0xbe,0xc3,0x7a,0xce
+#CHECK-ERROR: ^
+#CHECK-ERROR: warning: invalid instruction encoding
+#CHECK-ERROR: 0xbe,0xc7,0x7a,0xce
+#CHECK-ERROR: ^
+#CHECK-ERROR: warning: invalid instruction encoding
+#CHECK-ERROR: 0x6b,0xc9,0x73,0xce
+#CHECK-ERROR: ^
+#CHECK-ERROR: warning: invalid instruction encoding
+#CHECK-ERROR: 0xe2,0x85,0xc0,0xce
+#CHECK-ERROR: ^
+