Merge remote-tracking branch 'origin/next' into superh

2025-02-12 18:08:42 +00:00 · 2022-10-13 12:30:15 +09:00 · 2022-10-13 12:30:15 +09:00 · 586e405a7c
commit 586e405a7c
parent 342a39a206 4875599850
458 changed files with 283774 additions and 76685 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -582,6 +582,7 @@ add_library(capstone::capstone ALIAS capstone)
 target_include_directories(capstone PUBLIC
    $<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/include>
 )
+set_property(TARGET capstone PROPERTY C_STANDARD 99)

 if(BUILD_SHARED_LIBS)
    target_compile_definitions(capstone PUBLIC CAPSTONE_SHARED)
--- a/CREDITS.TXT
+++ b/CREDITS.TXT
@ -86,3 +86,4 @@ Do Minh Tuan: Regression testing tool (cstest)
 david942j: BPF (both classic and extended) architecture.
 fanfuqiang & citypw & porto703 : RISCV architecture.
 Josh "blacktop" Maine: Arm64 architecture improvements.
+Finn Wilkinson: AArch64 update to Armv9.2-a (SME + SVE2 support)
--- a/MCRegisterInfo.c
+++ b/MCRegisterInfo.c
@ -133,6 +133,11 @@ const MCRegisterClass* MCRegisterInfo_getRegClass(const MCRegisterInfo *RI, unsi

 bool MCRegisterClass_contains(const MCRegisterClass *c, unsigned Reg)
 {
+	// Make sure that MCRegisterInfo_getRegClass didn't return 0
+	// (for calls to GETREGCLASS_CONTAIN0)
+	if(!c)
+		return false;
+
 	unsigned InByte = Reg % 8;
 	unsigned Byte = Reg / 8;

--- a/3
+++ b/3
@ -45,6 +45,9 @@ else
 CFLAGS ?= -O3
 endif

+# C99 has been enforced elsewhere like xcode
+CFLAGS += -std=gnu99
+
 ifneq (,$(findstring yes,$(CAPSTONE_X86_ATT_DISABLE)))
 CFLAGS += -DCAPSTONE_X86_ATT_DISABLE
 endif
--- a/arch/AArch64/AArch64BaseInfo.h
+++ b/arch/AArch64/AArch64BaseInfo.h
@ -32,45 +32,45 @@

 inline static unsigned getWRegFromXReg(unsigned Reg)
 {
-	switch (Reg) {
-		default: break;
-		case ARM64_REG_X0: return ARM64_REG_W0;
-		case ARM64_REG_X1: return ARM64_REG_W1;
-		case ARM64_REG_X2: return ARM64_REG_W2;
-		case ARM64_REG_X3: return ARM64_REG_W3;
-		case ARM64_REG_X4: return ARM64_REG_W4;
-		case ARM64_REG_X5: return ARM64_REG_W5;
-		case ARM64_REG_X6: return ARM64_REG_W6;
-		case ARM64_REG_X7: return ARM64_REG_W7;
-		case ARM64_REG_X8: return ARM64_REG_W8;
-		case ARM64_REG_X9: return ARM64_REG_W9;
-		case ARM64_REG_X10: return ARM64_REG_W10;
-		case ARM64_REG_X11: return ARM64_REG_W11;
-		case ARM64_REG_X12: return ARM64_REG_W12;
-		case ARM64_REG_X13: return ARM64_REG_W13;
-		case ARM64_REG_X14: return ARM64_REG_W14;
-		case ARM64_REG_X15: return ARM64_REG_W15;
-		case ARM64_REG_X16: return ARM64_REG_W16;
-		case ARM64_REG_X17: return ARM64_REG_W17;
-		case ARM64_REG_X18: return ARM64_REG_W18;
-		case ARM64_REG_X19: return ARM64_REG_W19;
-		case ARM64_REG_X20: return ARM64_REG_W20;
-		case ARM64_REG_X21: return ARM64_REG_W21;
-		case ARM64_REG_X22: return ARM64_REG_W22;
-		case ARM64_REG_X23: return ARM64_REG_W23;
-		case ARM64_REG_X24: return ARM64_REG_W24;
-		case ARM64_REG_X25: return ARM64_REG_W25;
-		case ARM64_REG_X26: return ARM64_REG_W26;
-		case ARM64_REG_X27: return ARM64_REG_W27;
-		case ARM64_REG_X28: return ARM64_REG_W28;
-		case ARM64_REG_FP: return ARM64_REG_W29;
-		case ARM64_REG_LR: return ARM64_REG_W30;
-		case ARM64_REG_SP: return ARM64_REG_WSP;
-		case ARM64_REG_XZR: return ARM64_REG_WZR;
-	}
+  switch (Reg) {
+    default: break;
+    case ARM64_REG_X0: return ARM64_REG_W0;
+    case ARM64_REG_X1: return ARM64_REG_W1;
+    case ARM64_REG_X2: return ARM64_REG_W2;
+    case ARM64_REG_X3: return ARM64_REG_W3;
+    case ARM64_REG_X4: return ARM64_REG_W4;
+    case ARM64_REG_X5: return ARM64_REG_W5;
+    case ARM64_REG_X6: return ARM64_REG_W6;
+    case ARM64_REG_X7: return ARM64_REG_W7;
+    case ARM64_REG_X8: return ARM64_REG_W8;
+    case ARM64_REG_X9: return ARM64_REG_W9;
+    case ARM64_REG_X10: return ARM64_REG_W10;
+    case ARM64_REG_X11: return ARM64_REG_W11;
+    case ARM64_REG_X12: return ARM64_REG_W12;
+    case ARM64_REG_X13: return ARM64_REG_W13;
+    case ARM64_REG_X14: return ARM64_REG_W14;
+    case ARM64_REG_X15: return ARM64_REG_W15;
+    case ARM64_REG_X16: return ARM64_REG_W16;
+    case ARM64_REG_X17: return ARM64_REG_W17;
+    case ARM64_REG_X18: return ARM64_REG_W18;
+    case ARM64_REG_X19: return ARM64_REG_W19;
+    case ARM64_REG_X20: return ARM64_REG_W20;
+    case ARM64_REG_X21: return ARM64_REG_W21;
+    case ARM64_REG_X22: return ARM64_REG_W22;
+    case ARM64_REG_X23: return ARM64_REG_W23;
+    case ARM64_REG_X24: return ARM64_REG_W24;
+    case ARM64_REG_X25: return ARM64_REG_W25;
+    case ARM64_REG_X26: return ARM64_REG_W26;
+    case ARM64_REG_X27: return ARM64_REG_W27;
+    case ARM64_REG_X28: return ARM64_REG_W28;
+    case ARM64_REG_FP: return ARM64_REG_W29;
+    case ARM64_REG_LR: return ARM64_REG_W30;
+    case ARM64_REG_SP: return ARM64_REG_WSP;
+    case ARM64_REG_XZR: return ARM64_REG_WZR;
+  }

-	// For anything else, return it unchanged.
-	return Reg;
+  // For anything else, return it unchanged.
+  return Reg;
 }

 inline static unsigned getXRegFromWReg(unsigned Reg)
@ -111,8 +111,8 @@ inline static unsigned getXRegFromWReg(unsigned Reg)
 		case ARM64_REG_WZR: return ARM64_REG_XZR;
 	}

-	// For anything else, return it unchanged.
-	return Reg;
+  // For anything else, return it unchanged.
+  return Reg;
 }

 inline static unsigned getBRegFromDReg(unsigned Reg)
@ -152,8 +152,8 @@ inline static unsigned getBRegFromDReg(unsigned Reg)
 		case ARM64_REG_D31: return ARM64_REG_B31;
 	}

-	// For anything else, return it unchanged.
-	return Reg;
+  // For anything else, return it unchanged.
+  return Reg;
 }

 inline static unsigned getDRegFromBReg(unsigned Reg)
@ -193,8 +193,8 @@ inline static unsigned getDRegFromBReg(unsigned Reg)
 		case ARM64_REG_B31: return ARM64_REG_D31;
 	}

-	// For anything else, return it unchanged.
-	return Reg;
+  // For anything else, return it unchanged.
+  return Reg;
 }

 // // Enums corresponding to AArch64 condition codes
@ -223,8 +223,8 @@ typedef enum AArch64CC_CondCode { // Meaning (integer)     Meaning (floating-poi

 inline static AArch64CC_CondCode getInvertedCondCode(AArch64CC_CondCode Code)
 {
-	// To reverse a condition it's necessary to only invert the low bit:
-	return (AArch64CC_CondCode)((unsigned)Code ^ 0x1);
+  // To reverse a condition it's necessary to only invert the low bit:
+  return (AArch64CC_CondCode)((unsigned)Code ^ 0x1);
 }

 inline static const char *getCondCodeName(AArch64CC_CondCode CC)
@ -289,62 +289,63 @@ inline static unsigned getNZCVToSatisfyCondCode(AArch64CC_CondCode Code)
 /// might even be optimal to just reorder the tables for the common instructions
 /// rather than changing the algorithm.
 typedef struct A64NamedImmMapper_Mapping {
-	const char *Name;
-	uint32_t Value;
+  const char *Name;
+  uint32_t Value;
 } A64NamedImmMapper_Mapping;

 typedef struct A64NamedImmMapper {
-	const A64NamedImmMapper_Mapping *Pairs;
-	size_t NumPairs;
-	uint32_t TooBigImm;
+  const A64NamedImmMapper_Mapping *Pairs;
+  size_t NumPairs;
+  uint32_t TooBigImm;
 } A64NamedImmMapper;

 typedef struct A64SysRegMapper {
-	const A64NamedImmMapper_Mapping *SysRegPairs;
-	const A64NamedImmMapper_Mapping *InstPairs;
-	size_t NumInstPairs;
+  const A64NamedImmMapper_Mapping *SysRegPairs;
+  const A64NamedImmMapper_Mapping *InstPairs;
+  size_t NumInstPairs;
 } A64SysRegMapper;

 typedef enum A64SE_ShiftExtSpecifiers {
-	A64SE_Invalid = -1,
-	A64SE_LSL,
-	A64SE_MSL,
-	A64SE_LSR,
-	A64SE_ASR,
-	A64SE_ROR,
+  A64SE_Invalid = -1,
+  A64SE_LSL,
+  A64SE_MSL,
+  A64SE_LSR,
+  A64SE_ASR,
+  A64SE_ROR,

-	A64SE_UXTB,
-	A64SE_UXTH,
-	A64SE_UXTW,
-	A64SE_UXTX,
+  A64SE_UXTB,
+  A64SE_UXTH,
+  A64SE_UXTW,
+  A64SE_UXTX,

-	A64SE_SXTB,
-	A64SE_SXTH,
-	A64SE_SXTW,
-	A64SE_SXTX
+  A64SE_SXTB,
+  A64SE_SXTH,
+  A64SE_SXTW,
+  A64SE_SXTX
 } A64SE_ShiftExtSpecifiers;

 typedef enum A64Layout_VectorLayout {
-	A64Layout_Invalid = -1,
-	A64Layout_VL_8B,
-	A64Layout_VL_4H,
-	A64Layout_VL_2S,
-	A64Layout_VL_1D,
+  A64Layout_Invalid = -1,
+  A64Layout_VL_8B,
+  A64Layout_VL_4H,
+  A64Layout_VL_2S,
+  A64Layout_VL_1D,

-	A64Layout_VL_16B,
-	A64Layout_VL_8H,
-	A64Layout_VL_4S,
-	A64Layout_VL_2D,
+  A64Layout_VL_16B,
+  A64Layout_VL_8H,
+  A64Layout_VL_4S,
+  A64Layout_VL_2D,

-	// Bare layout for the 128-bit vector
-	// (only show ".b", ".h", ".s", ".d" without vector number)
-	A64Layout_VL_B,
-	A64Layout_VL_H,
-	A64Layout_VL_S,
-	A64Layout_VL_D
+  // Bare layout for the 128-bit vector
+  // (only show ".b", ".h", ".s", ".d" without vector number)
+  A64Layout_VL_B,
+  A64Layout_VL_H,
+  A64Layout_VL_S,
+  A64Layout_VL_D
 } A64Layout_VectorLayout;

-inline static const char *AArch64VectorLayoutToString(A64Layout_VectorLayout Layout)
+inline static const char *
+AArch64VectorLayoutToString(A64Layout_VectorLayout Layout)
 {
 	switch (Layout) {
 		default: return NULL;	// never reach
@ -363,109 +364,110 @@ inline static const char *AArch64VectorLayoutToString(A64Layout_VectorLayout Lay
 	}
 }

-inline static A64Layout_VectorLayout AArch64StringToVectorLayout(char *LayoutStr)
+inline static A64Layout_VectorLayout
+AArch64StringToVectorLayout(char *LayoutStr)
 {
-	if (!strcmp(LayoutStr, ".8b"))
-		return A64Layout_VL_8B;
+  if (!strcmp(LayoutStr, ".8b"))
+    return A64Layout_VL_8B;

-	if (!strcmp(LayoutStr, ".4h"))
-		return A64Layout_VL_4H;
+  if (!strcmp(LayoutStr, ".4h"))
+    return A64Layout_VL_4H;

-	if (!strcmp(LayoutStr, ".2s"))
-		return A64Layout_VL_2S;
+  if (!strcmp(LayoutStr, ".2s"))
+    return A64Layout_VL_2S;

-	if (!strcmp(LayoutStr, ".1d"))
-		return A64Layout_VL_1D;
+  if (!strcmp(LayoutStr, ".1d"))
+    return A64Layout_VL_1D;

-	if (!strcmp(LayoutStr, ".16b"))
-		return A64Layout_VL_16B;
+  if (!strcmp(LayoutStr, ".16b"))
+    return A64Layout_VL_16B;

-	if (!strcmp(LayoutStr, ".8h"))
-		return A64Layout_VL_8H;
+  if (!strcmp(LayoutStr, ".8h"))
+    return A64Layout_VL_8H;

-	if (!strcmp(LayoutStr, ".4s"))
-		return A64Layout_VL_4S;
+  if (!strcmp(LayoutStr, ".4s"))
+    return A64Layout_VL_4S;

-	if (!strcmp(LayoutStr, ".2d"))
-		return A64Layout_VL_2D;
+  if (!strcmp(LayoutStr, ".2d"))
+    return A64Layout_VL_2D;

-	if (!strcmp(LayoutStr, ".b"))
-		return A64Layout_VL_B;
+  if (!strcmp(LayoutStr, ".b"))
+    return A64Layout_VL_B;

-	if (!strcmp(LayoutStr, ".s"))
-		return A64Layout_VL_S;
+  if (!strcmp(LayoutStr, ".s"))
+    return A64Layout_VL_S;

-	if (!strcmp(LayoutStr, ".d"))
-		return A64Layout_VL_D;
+  if (!strcmp(LayoutStr, ".d"))
+    return A64Layout_VL_D;

-	return A64Layout_Invalid;
+  return A64Layout_Invalid;
 }

 /// Target Operand Flag enum.
 enum TOF {
-	//===------------------------------------------------------------------===//
-	// AArch64 Specific MachineOperand flags.
+  //===------------------------------------------------------------------===//
+  // AArch64 Specific MachineOperand flags.

-	MO_NO_FLAG,
+  MO_NO_FLAG,

-	MO_FRAGMENT = 0xf,
+  MO_FRAGMENT = 0xf,

-	/// MO_PAGE - A symbol operand with this flag represents the pc-relative
-	/// offset of the 4K page containing the symbol.  This is used with the
-	/// ADRP instruction.
-	MO_PAGE = 1,
+  /// MO_PAGE - A symbol operand with this flag represents the pc-relative
+  /// offset of the 4K page containing the symbol.  This is used with the
+  /// ADRP instruction.
+  MO_PAGE = 1,

-	/// MO_PAGEOFF - A symbol operand with this flag represents the offset of
-	/// that symbol within a 4K page.  This offset is added to the page address
-	/// to produce the complete address.
-	MO_PAGEOFF = 2,
+  /// MO_PAGEOFF - A symbol operand with this flag represents the offset of
+  /// that symbol within a 4K page.  This offset is added to the page address
+  /// to produce the complete address.
+  MO_PAGEOFF = 2,

-	/// MO_G3 - A symbol operand with this flag (granule 3) represents the high
-	/// 16-bits of a 64-bit address, used in a MOVZ or MOVK instruction
-	MO_G3 = 3,
+  /// MO_G3 - A symbol operand with this flag (granule 3) represents the high
+  /// 16-bits of a 64-bit address, used in a MOVZ or MOVK instruction
+  MO_G3 = 3,

-	/// MO_G2 - A symbol operand with this flag (granule 2) represents the bits
-	/// 32-47 of a 64-bit address, used in a MOVZ or MOVK instruction
-	MO_G2 = 4,
+  /// MO_G2 - A symbol operand with this flag (granule 2) represents the bits
+  /// 32-47 of a 64-bit address, used in a MOVZ or MOVK instruction
+  MO_G2 = 4,

-	/// MO_G1 - A symbol operand with this flag (granule 1) represents the bits
-	/// 16-31 of a 64-bit address, used in a MOVZ or MOVK instruction
-	MO_G1 = 5,
+  /// MO_G1 - A symbol operand with this flag (granule 1) represents the bits
+  /// 16-31 of a 64-bit address, used in a MOVZ or MOVK instruction
+  MO_G1 = 5,

-	/// MO_G0 - A symbol operand with this flag (granule 0) represents the bits
-	/// 0-15 of a 64-bit address, used in a MOVZ or MOVK instruction
-	MO_G0 = 6,
+  /// MO_G0 - A symbol operand with this flag (granule 0) represents the bits
+  /// 0-15 of a 64-bit address, used in a MOVZ or MOVK instruction
+  MO_G0 = 6,

-	/// MO_HI12 - This flag indicates that a symbol operand represents the bits
-	/// 13-24 of a 64-bit address, used in a arithmetic immediate-shifted-left-
-	/// by-12-bits instruction.
-	MO_HI12 = 7,
+  /// MO_HI12 - This flag indicates that a symbol operand represents the bits
+  /// 13-24 of a 64-bit address, used in a arithmetic immediate-shifted-left-
+  /// by-12-bits instruction.
+  MO_HI12 = 7,

-	/// MO_GOT - This flag indicates that a symbol operand represents the
-	/// address of the GOT entry for the symbol, rather than the address of
-	/// the symbol itself.
-	MO_GOT = 0x10,
+  /// MO_GOT - This flag indicates that a symbol operand represents the
+  /// address of the GOT entry for the symbol, rather than the address of
+  /// the symbol itself.
+  MO_GOT = 0x10,

-	/// MO_NC - Indicates whether the linker is expected to check the symbol
-	/// reference for overflow. For example in an ADRP/ADD pair of relocations
-	/// the ADRP usually does check, but not the ADD.
-	MO_NC = 0x20,
+  /// MO_NC - Indicates whether the linker is expected to check the symbol
+  /// reference for overflow. For example in an ADRP/ADD pair of relocations
+  /// the ADRP usually does check, but not the ADD.
+  MO_NC = 0x20,

-	/// MO_TLS - Indicates that the operand being accessed is some kind of
-	/// thread-local symbol. On Darwin, only one type of thread-local access
-	/// exists (pre linker-relaxation), but on ELF the TLSModel used for the
-	/// referee will affect interpretation.
-	MO_TLS = 0x40,
+  /// MO_TLS - Indicates that the operand being accessed is some kind of
+  /// thread-local symbol. On Darwin, only one type of thread-local access
+  /// exists (pre linker-relaxation), but on ELF the TLSModel used for the
+  /// referee will affect interpretation.
+  MO_TLS = 0x40,

-	/// MO_DLLIMPORT - On a symbol operand, this represents that the reference
-	/// to the symbol is for an import stub.  This is used for DLL import
-	/// storage class indication on Windows.
-	MO_DLLIMPORT = 0x80,
+  /// MO_DLLIMPORT - On a symbol operand, this represents that the reference
+  /// to the symbol is for an import stub.  This is used for DLL import
+  /// storage class indication on Windows.
+  MO_DLLIMPORT = 0x80,
 };

 typedef struct SysAlias {
-	const char *Name;
-	uint16_t Encoding;
+  const char *Name;
+  uint16_t Encoding;
 } SysAlias;

 #define AT SysAlias
@ -478,29 +480,39 @@ typedef struct SysAlias {
 #define TSB SysAlias
 #define PState SysAlias
 #define SVEPREDPAT SysAlias
+#define SVCR SysAlias
+#define BTI SysAlias

 typedef struct SysAliasReg {
-	const char *Name;
-	uint16_t Encoding;
-	bool NeedsReg;
+  const char *Name;
+  uint16_t Encoding;
+  bool NeedsReg;
 } SysAliasReg;

 #define IC SysAliasReg
 #define TLBI SysAliasReg

 typedef struct SysAliasSysReg {
-	const char *Name;
-	uint16_t Encoding;
-	bool Readable;
-	bool Writeable;
+  const char *Name;
+  uint16_t Encoding;
+  bool Readable;
+  bool Writeable;
 } SysAliasSysReg;

 #define SysReg SysAliasSysReg

+typedef struct SysAliasImm {
+  const char *Name;
+  uint16_t Encoding;
+  uint16_t ImmValue;
+} SysAliasImm;
+
+#define DBnXS SysAliasImm
+
 typedef struct ExactFPImm {
-	const char *Name;
-	int Enum;
-	const char *Repr;
+  const char *Name;
+  int Enum;
+  const char *Repr;
 } ExactFPImm;

 const AT *lookupATByEncoding(uint16_t Encoding);
@ -510,17 +522,64 @@ const IC *lookupICByEncoding(uint16_t Encoding);
 const TLBI *lookupTLBIByEncoding(uint16_t Encoding);
 const SVEPRFM *lookupSVEPRFMByEncoding(uint16_t Encoding);
 const PRFM *lookupPRFMByEncoding(uint16_t Encoding);
-const PSB *AArch64PSBHint_lookupPSBByEncoding(uint16_t Encoding);
+const PSB *lookupPSBByEncoding(uint16_t Encoding);
 const ISB *lookupISBByEncoding(uint16_t Encoding);
 const TSB *lookupTSBByEncoding(uint16_t Encoding);
 const SysReg *lookupSysRegByEncoding(uint16_t Encoding);
 const PState *lookupPStateByEncoding(uint16_t Encoding);
 const SVEPREDPAT *lookupSVEPREDPATByEncoding(uint16_t Encoding);
 const ExactFPImm *lookupExactFPImmByEnum(uint16_t Encoding);
+const SVCR *lookupSVCRByEncoding(uint8_t Encoding);
+const BTI *lookupBTIByEncoding(uint8_t Encoding);
+const DBnXS *lookupDBnXSByEncoding(uint8_t Encoding);

 // NOTE: result must be 128 bytes to contain the result
 void AArch64SysReg_genericRegisterString(uint32_t Bits, char *result);

+// ---------------------------------------------------------------------------
+// The following Structs and Enum are taken from MCInstPrinter.h in llvm.
+// These are required for the updated printAliasInstr() function in
+// $ARCHGenAsmWriter.inc
+
+/// Map from opcode to pattern list by binary search.
+typedef struct PatternsForOpcode {
+  uint32_t Opcode;
+  uint16_t PatternStart;
+  uint16_t NumPatterns;
+} PatternsForOpcode;
+
+/// Data for each alias pattern. Includes feature bits, string, number of
+/// operands, and a variadic list of conditions to check.
+typedef struct AliasPattern {
+  uint32_t AsmStrOffset;
+  uint32_t AliasCondStart;
+  uint8_t NumOperands;
+  uint8_t NumConds;
+} AliasPattern;
+
+enum CondKind {
+  AliasPatternCond_K_Feature,	      // Match only if a feature is enabled.
+  AliasPatternCond_K_NegFeature,    // Match only if a feature is disabled.
+  AliasPatternCond_K_OrFeature,	    // Match only if one of a set of features is
+				                            // enabled.
+  AliasPatternCond_K_OrNegFeature,  // Match only if one of a set of features is
+				                            // disabled.
+  AliasPatternCond_K_EndOrFeatures, // Note end of list of K_Or(Neg)?Features.
+  AliasPatternCond_K_Ignore,	      // Match any operand.
+  AliasPatternCond_K_Reg,	          // Match a specific register.
+  AliasPatternCond_K_TiedReg,	      // Match another already matched register.
+  AliasPatternCond_K_Imm,	          // Match a specific immediate.
+  AliasPatternCond_K_RegClass,	    // Match registers in a class.
+  AliasPatternCond_K_Custom,	      // Call custom matcher by index.
+};
+
+typedef struct AliasPatternCond {
+  int Kind;
+  uint32_t Value;
+} AliasPatternCond;
+
+// ---------------------------------------------------------------------------
+
 #include "AArch64GenSystemOperands_enum.inc"

 #endif
--- a/arch/AArch64/AArch64Disassembler.c
+++ b/arch/AArch64/AArch64Disassembler.c
@ -49,8 +49,12 @@ static DecodeStatus DecodeFPR8RegisterClass(MCInst *Inst, unsigned RegNo,
 		uint64_t Address, const void *Decoder);
 static DecodeStatus DecodeGPR64RegisterClass(MCInst *Inst, unsigned RegNo,
 		uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeGPR64x8ClassRegisterClass(MCInst *Inst, unsigned RegNo, 
+		uint64_t Address, const void *Decoder);
 static DecodeStatus DecodeGPR64spRegisterClass(MCInst *Inst,
 		unsigned RegNo, uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeMatrixIndexGPR32_12_15RegisterClass(MCInst *Inst,
+        unsigned RegNo, uint64_t Address, const void *Decoder);
 static DecodeStatus DecodeGPR32RegisterClass(MCInst *Inst, unsigned RegNo,
 		uint64_t Address, const void *Decoder);
 static DecodeStatus DecodeGPR32spRegisterClass(MCInst *Inst,
@ -79,6 +83,10 @@ static DecodeStatus DecodeZPR3RegisterClass(MCInst *Inst, unsigned RegNo,
 		uint64_t Address, const void *Decoder);
 static DecodeStatus DecodeZPR4RegisterClass(MCInst *Inst, unsigned RegNo,
 		uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeMatrixTile(MCInst *Inst, unsigned RegNo, 
+		uint64_t Address, const void *Decoder, unsigned NumBitsForTile);
+static DecodeStatus DecodeMatrixTileListRegisterClass(MCInst *Inst,
+        unsigned RegMask, uint64_t Address, const void *Decoder);
 static DecodeStatus DecodePPRRegisterClass(MCInst *Inst, unsigned RegNo,
 		uint64_t Address, const void *Decoder);
 static DecodeStatus DecodePPR_3bRegisterClass(MCInst *Inst, unsigned RegNo,
@ -105,6 +113,8 @@ static DecodeStatus DecodeExclusiveLdStInstruction(MCInst *Inst,
 		uint32_t insn, uint64_t Address, const void *Decoder);
 static DecodeStatus DecodePairLdStInstruction(MCInst *Inst, uint32_t insn,
 		uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeAuthLoadInstruction(MCInst *Inst, uint32_t insn, 
+		uint64_t Address, const void *Decoder);
 static DecodeStatus DecodeAddSubERegInstruction(MCInst *Inst,
 		uint32_t insn, uint64_t Address, const void *Decoder);
 static DecodeStatus DecodeLogicalImmInstruction(MCInst *Inst,
@ -115,8 +125,8 @@ static DecodeStatus DecodeModImmTiedInstruction(MCInst *Inst,
 		uint32_t insn, uint64_t Address, const void *Decoder);
 static DecodeStatus DecodeAdrInstruction(MCInst *Inst, uint32_t insn,
 		uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeBaseAddSubImm(MCInst *Inst, uint32_t insn,
-		uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeAddSubImmShift(MCInst *Inst, uint32_t insn,
+        uint64_t Address, const void *Decoder);
 static DecodeStatus DecodeUnconditionalBranch(MCInst *Inst, uint32_t insn,
 		uint64_t Address, const void *Decoder);
 static DecodeStatus DecodeSystemPStateInstruction(MCInst *Inst,
@ -165,6 +175,12 @@ static DecodeStatus DecodeGPR64commonRegisterClass(MCInst *Inst, unsigned RegNo,
 		uint64_t Addr, const void *Decoder);
 static DecodeStatus DecodeFPR128_loRegisterClass(MCInst *Inst, unsigned RegNo,
 		uint64_t Addr, const void *Decoder);
+static DecodeStatus DecodeSVCROp(MCInst *Inst, unsigned Imm, uint64_t Address, 
+		const void *Decoder);
+static DecodeStatus DecodeCPYMemOpInstruction(MCInst *Inst, uint32_t insn,
+        uint64_t Addr, const void *Decoder);
+static DecodeStatus DecodeSETMemOpInstruction(MCInst *Inst, uint32_t insn,
+        uint64_t Addr, const void *Decoder);


 static bool Check(DecodeStatus *Out, DecodeStatus In)
@ -241,6 +257,93 @@ static DecodeStatus _getInstruction(cs_struct *ud, MCInst *MI,

 	// Calling the auto-generated decoder function.
 	result = decodeInstruction_4(DecoderTable32, MI, insn, Address);
+	// If Decoding fails initially, try Fallback table.
+	if(result == MCDisassembler_Fail){
+		result = decodeInstruction_4(DecoderTableFallback32, MI, insn, Address);	
+	}
+
+	// Init new MCOperand to be used in switch below.
+	// Kind RegVal set inside a case when needed.
+	MCOperand *Op;
+	Op = malloc(sizeof(MCOperand));
+	switch (MCInst_getOpcode(MI)) {
+    	default:
+    	  break;
+    	// For Scalable Matrix Extension (SME) instructions that have an implicit
+    	// operand for the accumulator (ZA) which isn't encoded, manually insert
+    	// operand.
+    	case AArch64_LDR_ZA:
+    	case AArch64_STR_ZA: {
+		  Op->Kind = kRegister;
+		  Op->RegVal = AArch64_ZA;
+		  MCInst_insert0(MI, 0, Op);
+    	  // Spill and fill instructions have a single immediate used for both the
+    	  // vector select offset and optional memory offset. Replicate the decoded
+    	  // immediate.
+    	  MCOperand *Imm4Op = MCInst_getOperand(MI, 2);
+    	//   assert(MCOperand_isImm(Imm4Op) && "Unexpected operand type!");
+    	  MCInst_addOperand2(MI, Imm4Op);
+    	  break;
+    	}
+    	case AArch64_LD1_MXIPXX_H_B:
+    	case AArch64_LD1_MXIPXX_V_B:
+    	case AArch64_ST1_MXIPXX_H_B:
+    	case AArch64_ST1_MXIPXX_V_B:
+    	case AArch64_INSERT_MXIPZ_H_B:
+    	case AArch64_INSERT_MXIPZ_V_B:
+    	  // e.g.
+    	  // MOVA ZA0<HV>.B[<Ws>, <imm>], <Pg>/M, <Zn>.B
+    	  //      ^ insert implicit 8-bit element tile
+		  Op->Kind = kRegister;
+		  Op->RegVal = AArch64_ZAB0;
+		  MCInst_insert0(MI, 0, Op);
+    	  break;
+    	case AArch64_EXTRACT_ZPMXI_H_B:
+    	case AArch64_EXTRACT_ZPMXI_V_B:
+    	  // MOVA <Zd>.B, <Pg>/M, ZA0<HV>.B[<Ws>, <imm>]
+    	  //                      ^ insert implicit 8-bit element tile
+		  Op->Kind = kRegister;
+		  Op->RegVal = AArch64_ZAB0;
+		  MCInst_insert0(MI, 2, Op);
+    	  break;
+    	case AArch64_LD1_MXIPXX_H_Q:
+    	case AArch64_LD1_MXIPXX_V_Q:
+    	case AArch64_ST1_MXIPXX_H_Q:
+    	case AArch64_ST1_MXIPXX_V_Q:
+    	  // 128-bit load/store have implicit zero vector index.
+		  Op->Kind = kImmediate;
+		  Op->ImmVal = 0;
+		  MCInst_insert0(MI, 2, Op);
+    	  break;
+    	// 128-bit mova have implicit zero vector index.
+    	case AArch64_INSERT_MXIPZ_H_Q:
+    	case AArch64_INSERT_MXIPZ_V_Q:
+		  Op->Kind = kImmediate;
+		  Op->ImmVal = 0;
+		  MCInst_insert0(MI, 2, Op);
+    	  break;
+    	case AArch64_EXTRACT_ZPMXI_H_Q:
+    	case AArch64_EXTRACT_ZPMXI_V_Q:
+		  Op->Kind = kImmediate;
+		  Op->ImmVal = 0;
+		  MCInst_addOperand2(MI, Op);
+    	  break;
+    	case AArch64_SMOVvi8to32_idx0:
+    	case AArch64_SMOVvi8to64_idx0:
+    	case AArch64_SMOVvi16to32_idx0:
+    	case AArch64_SMOVvi16to64_idx0:
+    	case AArch64_SMOVvi32to64_idx0:
+    	case AArch64_UMOVvi8_idx0:
+    	case AArch64_UMOVvi16_idx0:
+    	case AArch64_UMOVvi32_idx0:
+    	case AArch64_UMOVvi64_idx0:
+		  Op->Kind = kImmediate;
+		  Op->ImmVal = 0;
+		  MCInst_addOperand2(MI, Op);
+    	  break;
+    }
+	free(Op);
+
 	if (result != MCDisassembler_Fail) {
 		*Size = 4;

@ -432,6 +535,29 @@ static DecodeStatus DecodeGPR64RegisterClass(MCInst *Inst, unsigned RegNo,
 	return Success;
 }

+static const unsigned GPR64x8DecoderTable[] = {
+	AArch64_X0_X1_X2_X3_X4_X5_X6_X7, AArch64_X2_X3_X4_X5_X6_X7_X8_X9, 
+	AArch64_X4_X5_X6_X7_X8_X9_X10_X11, AArch64_X6_X7_X8_X9_X10_X11_X12_X13, 
+	AArch64_X8_X9_X10_X11_X12_X13_X14_X15, AArch64_X10_X11_X12_X13_X14_X15_X16_X17, 
+	AArch64_X12_X13_X14_X15_X16_X17_X18_X19, AArch64_X14_X15_X16_X17_X18_X19_X20_X21, 
+	AArch64_X16_X17_X18_X19_X20_X21_X22_X23, AArch64_X18_X19_X20_X21_X22_X23_X24_X25, 
+	AArch64_X20_X21_X22_X23_X24_X25_X26_X27, AArch64_X22_X23_X24_X25_X26_X27_X28_FP
+};
+
+static DecodeStatus DecodeGPR64x8ClassRegisterClass(MCInst *Inst, unsigned RegNo, 
+		uint64_t Address, const void *Decoder) 
+{	
+	if (RegNo > 22)
+		return Fail;
+	if (RegNo & 1)
+		return Fail;
+	
+	unsigned Register = GPR64x8DecoderTable[RegNo >> 1];
+	MCOperand_CreateReg0(Inst, Register);
+
+	return Success;
+}
+
 static DecodeStatus DecodeGPR64spRegisterClass(MCInst *Inst, unsigned RegNo,
 		uint64_t Addr, const void *Decoder)
 {
@ -449,6 +575,25 @@ static DecodeStatus DecodeGPR64spRegisterClass(MCInst *Inst, unsigned RegNo,
 	return Success;
 }

+
+static const unsigned MatrixIndexGPR32_12_15DecoderTable[] = {
+	AArch64_W12, AArch64_W13, AArch64_W14, AArch64_W15
+};
+
+static DecodeStatus DecodeMatrixIndexGPR32_12_15RegisterClass(MCInst *Inst,
+		unsigned RegNo, uint64_t Addr, const void *Decoder) 
+{
+	unsigned Register;
+
+	if (RegNo > 3)
+    	return Fail;
+	
+	Register = MatrixIndexGPR32_12_15DecoderTable[RegNo];
+	MCOperand_CreateReg0(Inst, Register);
+
+  	return Success;
+}
+
 static const unsigned GPR32DecoderTable[] = {
 	AArch64_W0,  AArch64_W1,  AArch64_W2,  AArch64_W3,  AArch64_W4,
 	AArch64_W5,  AArch64_W6,  AArch64_W7,  AArch64_W8,  AArch64_W9,
@ -614,6 +759,61 @@ static DecodeStatus DecodeZPR4RegisterClass(MCInst *Inst, unsigned RegNo,
 	return Success;
 }

+static DecodeStatus DecodeMatrixTileListRegisterClass(MCInst *Inst,
+		unsigned RegMask, uint64_t Address, const void *Decoder) {
+	if (RegMask > 0xFF)
+    	return Fail;
+	
+	MCOperand_CreateImm0(Inst, RegMask);
+	return Success;
+}
+
+static const unsigned MatrixZATileDecoderTable[] = {
+	AArch64_ZAB0,
+    AArch64_ZAH0, AArch64_ZAH1,
+    AArch64_ZAS0, AArch64_ZAS1, AArch64_ZAS2, AArch64_ZAS3,
+    AArch64_ZAD0, AArch64_ZAD1, AArch64_ZAD2, AArch64_ZAD3,
+    AArch64_ZAD4, AArch64_ZAD5, AArch64_ZAD6, AArch64_ZAD7,
+    AArch64_ZAQ0, AArch64_ZAQ1, AArch64_ZAQ2, AArch64_ZAQ3,
+    AArch64_ZAQ4, AArch64_ZAQ5, AArch64_ZAQ6, AArch64_ZAQ7,
+    AArch64_ZAQ8, AArch64_ZAQ9, AArch64_ZAQ10, AArch64_ZAQ11,
+    AArch64_ZAQ12, AArch64_ZAQ13, AArch64_ZAQ14, AArch64_ZAQ15
+};
+
+static DecodeStatus DecodeMatrixTile(MCInst *Inst, unsigned RegNo,
+		uint64_t Address, const void *Decoder, unsigned NumBitsForTile) {
+	unsigned LastReg = (1 << NumBitsForTile) - 1;
+	if (RegNo > LastReg)
+    	return Fail;
+
+	// Convert original 2D indexes into 1D table index
+	unsigned index = 0;
+	switch (NumBitsForTile)
+	{
+	case 0:
+		// Only a single Byte tile at beginning of list so index = 0
+		break;
+	case 1:
+		index = 1 + RegNo;
+		break;
+	case 2:
+		index = 3 + RegNo;
+		break;
+	case 3:
+		index = 7 + RegNo;
+		break;
+	case 4:
+		index = 15 + RegNo;
+		break;
+	default:
+		break;
+	}
+
+	MCOperand_CreateReg0(Inst, MatrixZATileDecoderTable[index]);
+	return Success;
+}
+
+
 static const unsigned PPRDecoderTable[] = {
 	AArch64_P0,  AArch64_P1,  AArch64_P2,  AArch64_P3,
 	AArch64_P4,  AArch64_P5,  AArch64_P6,  AArch64_P7,
@ -1600,6 +1800,39 @@ static DecodeStatus DecodePairLdStInstruction(MCInst *Inst, uint32_t insn,
 	return Success;
 }

+static DecodeStatus DecodeAuthLoadInstruction(MCInst *Inst, uint32_t insn, 
+		uint64_t Addr, const void *Decoder) 
+{
+	unsigned Rt = fieldFromInstruction_4(insn, 0, 5);
+	unsigned Rn = fieldFromInstruction_4(insn, 5, 5);
+	uint64_t offset = fieldFromInstruction_4(insn, 22, 1) << 9 |
+						fieldFromInstruction_4(insn, 12, 9);
+	unsigned writeback = fieldFromInstruction_4(insn, 11, 1);
+
+	switch (MCInst_getOpcode(Inst)) {
+	default:
+		return Fail;
+	case AArch64_LDRAAwriteback:
+	case AArch64_LDRABwriteback:
+		DecodeGPR64spRegisterClass(Inst, Rn /* writeback register */, Addr,
+								Decoder);
+		break;
+	case AArch64_LDRAAindexed:
+	case AArch64_LDRABindexed:
+		break;
+	}
+
+	DecodeGPR64RegisterClass(Inst, Rt, Addr, Decoder);
+	DecodeGPR64spRegisterClass(Inst, Rn, Addr, Decoder);
+	DecodeSImm(Inst, offset, Addr, Decoder, 10);
+
+	if (writeback && Rt == Rn && Rn != 31) {
+		return SoftFail;
+	}
+
+	return Success;
+}
+
 static DecodeStatus DecodeAddSubERegInstruction(MCInst *Inst,
 		uint32_t insn, uint64_t Addr, const void *Decoder)
 {
@ -1780,8 +2013,8 @@ static DecodeStatus DecodeAdrInstruction(MCInst *Inst, uint32_t insn,
 	return Success;
 }

-static DecodeStatus DecodeBaseAddSubImm(MCInst *Inst, uint32_t insn,
-		uint64_t Addr, const void *Decoder)
+static DecodeStatus DecodeAddSubImmShift(MCInst *Inst, uint32_t insn,
+		uint64_t Addr, const void *Decoder) 
 {
 	unsigned Rd = fieldFromInstruction_4(insn, 0, 5);
 	unsigned Rn = fieldFromInstruction_4(insn, 5, 5);
@ -1791,30 +2024,30 @@ static DecodeStatus DecodeBaseAddSubImm(MCInst *Inst, uint32_t insn,

 	unsigned ShifterVal = (Imm >> 12) & 3;
 	unsigned ImmVal = Imm & 0xFFF;
+	//   const AArch64Disassembler *Dis =
+	//       static_cast<const AArch64Disassembler *>(Decoder);

 	if (ShifterVal != 0 && ShifterVal != 1)
 		return Fail;

 	if (Datasize) {
 		if (Rd == 31 && !S)
-			DecodeGPR64spRegisterClass(Inst, Rd, Addr, Decoder);
+		DecodeGPR64spRegisterClass(Inst, Rd, Addr, Decoder);
 		else
-			DecodeGPR64RegisterClass(Inst, Rd, Addr, Decoder);
-
+		DecodeGPR64RegisterClass(Inst, Rd, Addr, Decoder);
 		DecodeGPR64spRegisterClass(Inst, Rn, Addr, Decoder);
 	} else {
 		if (Rd == 31 && !S)
-			DecodeGPR32spRegisterClass(Inst, Rd, Addr, Decoder);
+		DecodeGPR32spRegisterClass(Inst, Rd, Addr, Decoder);
 		else
-			DecodeGPR32RegisterClass(Inst, Rd, Addr, Decoder);
-
+		DecodeGPR32RegisterClass(Inst, Rd, Addr, Decoder);
 		DecodeGPR32spRegisterClass(Inst, Rn, Addr, Decoder);
 	}

-	//if (!Dis->tryAddingSymbolicOperand(Inst, Imm, Addr, Fail, 0, 4))
+	//   if (!Dis->tryAddingSymbolicOperand(Inst, Imm, Addr, Fail, 0, 4))
 	MCOperand_CreateImm0(Inst, ImmVal);
-	MCOperand_CreateImm0(Inst, 12 * ShifterVal);
-
+	
+	MCOperand_CreateImm0(Inst, (12 * ShifterVal));
 	return Success;
 }

@ -1889,7 +2122,7 @@ static DecodeStatus DecodeGPRSeqPairsClassRegisterClass(MCInst *Inst,
 	if (RegNo & 0x1)
 		return Fail;

-	Register = AArch64MCRegisterClasses[RegClassID].RegsBegin[RegNo];
+	Register = AArch64MCRegisterClasses[RegClassID].RegsBegin[RegNo / 2];
 	MCOperand_CreateReg0(Inst, Register);

 	return Success;
@ -1968,6 +2201,62 @@ static DecodeStatus DecodeSVEIncDecImm(MCInst *Inst, unsigned Imm,
 	return Success;
 }

+static DecodeStatus DecodeSVCROp(MCInst *Inst, unsigned Imm, uint64_t Address,
+        const void *Decoder) {
+	if (lookupSVCRByEncoding(Imm)) {
+		MCOperand_CreateImm0(Inst, Imm);
+    	return Success;
+  	}
+  	return Fail;
+}
+
+static DecodeStatus DecodeCPYMemOpInstruction(MCInst *Inst, uint32_t insn,
+        uint64_t Addr, const void *Decoder) {
+  	unsigned Rd = fieldFromInstruction_4(insn, 0, 5);
+  	unsigned Rs = fieldFromInstruction_4(insn, 16, 5);
+  	unsigned Rn = fieldFromInstruction_4(insn, 5, 5);
+
+  	// None of the registers may alias: if they do, then the instruction is not
+  	// merely unpredictable but actually entirely unallocated.
+  	if (Rd == Rs || Rs == Rn || Rd == Rn)
+    	return Fail;
+
+  	// All three register operands are written back, so they all appear
+  	// twice in the operand list, once as outputs and once as inputs.
+  	if (!DecodeGPR64commonRegisterClass(Inst, Rd, Addr, Decoder) ||
+    	!DecodeGPR64commonRegisterClass(Inst, Rs, Addr, Decoder) ||
+      	!DecodeGPR64RegisterClass(Inst, Rn, Addr, Decoder) ||
+      	!DecodeGPR64commonRegisterClass(Inst, Rd, Addr, Decoder) ||
+      	!DecodeGPR64commonRegisterClass(Inst, Rs, Addr, Decoder) ||
+      	!DecodeGPR64RegisterClass(Inst, Rn, Addr, Decoder))
+    	return Fail;
+
+  return Success;
+}
+
+static DecodeStatus DecodeSETMemOpInstruction(MCInst *Inst, uint32_t insn,
+        uint64_t Addr, const void *Decoder) {
+  	unsigned Rd = fieldFromInstruction_4(insn, 0, 5);
+  	unsigned Rm = fieldFromInstruction_4(insn, 16, 5);
+  	unsigned Rn = fieldFromInstruction_4(insn, 5, 5);
+
+  	// None of the registers may alias: if they do, then the instruction is not
+  	// merely unpredictable but actually entirely unallocated.
+  	if (Rd == Rm || Rm == Rn || Rd == Rn)
+    	return Fail;
+
+  	// Rd and Rn (not Rm) register operands are written back, so they appear
+  	// twice in the operand list, once as outputs and once as inputs.
+  	if (!DecodeGPR64commonRegisterClass(Inst, Rd, Addr, Decoder) ||
+      	!DecodeGPR64RegisterClass(Inst, Rn, Addr, Decoder) ||
+      	!DecodeGPR64commonRegisterClass(Inst, Rd, Addr, Decoder) ||
+      	!DecodeGPR64RegisterClass(Inst, Rn, Addr, Decoder) ||
+      	!DecodeGPR64RegisterClass(Inst, Rm, Addr, Decoder))
+    	return Fail;
+
+  	return Success;
+}
+
 void AArch64_init(MCRegisterInfo *MRI)
 {
 	/*
@ -1980,9 +2269,9 @@ void AArch64_init(MCRegisterInfo *MRI)
 			AArch64SubRegIdxRanges, AArch64RegEncodingTable);
 	*/

-	MCRegisterInfo_InitMCRegisterInfo(MRI, AArch64RegDesc, 661,
+	MCRegisterInfo_InitMCRegisterInfo(MRI, AArch64RegDesc, 674,
 			0, 0,
-			AArch64MCRegisterClasses, 100,
+			AArch64MCRegisterClasses, 202,
 			0, 0, AArch64RegDiffLists,
 			0,
 			AArch64SubRegIdxLists, 100,
--- a/arch/AArch64/AArch64GenAsmWriter.inc
+++ b/arch/AArch64/AArch64GenAsmWriter.inc
--- a/arch/AArch64/AArch64GenDisassemblerTables.inc
+++ b/arch/AArch64/AArch64GenDisassemblerTables.inc
--- a/arch/AArch64/AArch64GenInstrInfo.inc
+++ b/arch/AArch64/AArch64GenInstrInfo.inc
--- a/arch/AArch64/AArch64GenRegisterInfo.inc
+++ b/arch/AArch64/AArch64GenRegisterInfo.inc
--- a/arch/AArch64/AArch64GenRegisterName.inc
+++ b/arch/AArch64/AArch64GenRegisterName.inc
--- a/arch/AArch64/AArch64GenRegisterV.inc
+++ b/arch/AArch64/AArch64GenRegisterV.inc
@ -1,4 +1,6 @@
-// size = 660
+// size = 673
+0,
+0,
 0,
 0,
 0,
@ -307,6 +309,37 @@ ARM64_REG_V31,
 0,
 0,
 0,
+0,
+0,
+0,
+0,
+0,
+0,
+0,
+0,
+0,
+0,
+0,
+0,
+0,
+0,
+0,
+0,
+0,
+0,
+0,
+0,
+0,
+0,
+0,
+0,
+0,
+0,
+0,
+0,
+0,
+0,
+0,
 ARM64_REG_V0,
 ARM64_REG_V1,
 ARM64_REG_V2,
@ -638,23 +671,3 @@ ARM64_REG_V31,
 0,
 0,
 0,
-0,
-0,
-0,
-0,
-0,
-0,
-0,
-0,
-0,
-0,
-0,
-0,
-0,
-0,
-0,
-0,
-0,
-0,
-0,
-0,
--- a/arch/AArch64/AArch64GenSubtargetInfo.inc
+++ b/arch/AArch64/AArch64GenSubtargetInfo.inc
@ -12,71 +12,218 @@

 enum {
  AArch64_FeatureAES = 0,
-  AArch64_FeatureAggressiveFMA = 1,
-  AArch64_FeatureAlternateSExtLoadCVTF32Pattern = 2,
-  AArch64_FeatureArithmeticBccFusion = 3,
-  AArch64_FeatureArithmeticCbzFusion = 4,
-  AArch64_FeatureBalanceFPOps = 5,
-  AArch64_FeatureCRC = 6,
-  AArch64_FeatureCrypto = 7,
-  AArch64_FeatureCustomCheapAsMoveHandling = 8,
-  AArch64_FeatureDisableLatencySchedHeuristic = 9,
-  AArch64_FeatureDotProd = 10,
-  AArch64_FeatureExynosCheapAsMoveHandling = 11,
-  AArch64_FeatureFPARMv8 = 12,
-  AArch64_FeatureFullFP16 = 13,
-  AArch64_FeatureFuseAES = 14,
-  AArch64_FeatureFuseAddress = 15,
-  AArch64_FeatureFuseCCSelect = 16,
-  AArch64_FeatureFuseLiterals = 17,
-  AArch64_FeatureLSE = 18,
-  AArch64_FeatureLSLFast = 19,
-  AArch64_FeatureNEON = 20,
-  AArch64_FeatureNoNegativeImmediates = 21,
-  AArch64_FeaturePerfMon = 22,
-  AArch64_FeaturePostRAScheduler = 23,
-  AArch64_FeaturePredictableSelectIsExpensive = 24,
-  AArch64_FeatureRAS = 25,
-  AArch64_FeatureRCPC = 26,
-  AArch64_FeatureRDM = 27,
-  AArch64_FeatureReserveX18 = 28,
-  AArch64_FeatureReserveX20 = 29,
-  AArch64_FeatureSHA2 = 30,
-  AArch64_FeatureSHA3 = 31,
-  AArch64_FeatureSM4 = 32,
-  AArch64_FeatureSPE = 33,
-  AArch64_FeatureSVE = 34,
-  AArch64_FeatureSlowMisaligned128Store = 35,
-  AArch64_FeatureSlowPaired128 = 36,
-  AArch64_FeatureSlowSTRQro = 37,
-  AArch64_FeatureStrictAlign = 38,
-  AArch64_FeatureUseAA = 39,
-  AArch64_FeatureUseRSqrt = 40,
-  AArch64_FeatureZCRegMove = 41,
-  AArch64_FeatureZCZeroing = 42,
-  AArch64_FeatureZCZeroingFPWorkaround = 43,
-  AArch64_HasV8_1aOps = 44,
-  AArch64_HasV8_2aOps = 45,
-  AArch64_HasV8_3aOps = 46,
-  AArch64_HasV8_4aOps = 47,
-  AArch64_ProcA35 = 48,
-  AArch64_ProcA53 = 49,
-  AArch64_ProcA55 = 50,
-  AArch64_ProcA57 = 51,
-  AArch64_ProcA72 = 52,
-  AArch64_ProcA73 = 53,
-  AArch64_ProcA75 = 54,
-  AArch64_ProcCyclone = 55,
-  AArch64_ProcExynosM1 = 56,
-  AArch64_ProcExynosM2 = 57,
-  AArch64_ProcExynosM3 = 58,
-  AArch64_ProcFalkor = 59,
-  AArch64_ProcKryo = 60,
-  AArch64_ProcSaphira = 61,
-  AArch64_ProcThunderX = 62,
-  AArch64_ProcThunderX2T99 = 63,
-  AArch64_ProcThunderXT81 = 64,
-  AArch64_ProcThunderXT83 = 65,
-  AArch64_ProcThunderXT88 = 66,
+  AArch64_FeatureAM = 1,
+  AArch64_FeatureAMVS = 2,
+  AArch64_FeatureAggressiveFMA = 3,
+  AArch64_FeatureAltFPCmp = 4,
+  AArch64_FeatureAlternateSExtLoadCVTF32Pattern = 5,
+  AArch64_FeatureAppleA7SysReg = 6,
+  AArch64_FeatureArithmeticBccFusion = 7,
+  AArch64_FeatureArithmeticCbzFusion = 8,
+  AArch64_FeatureBF16 = 9,
+  AArch64_FeatureBRBE = 10,
+  AArch64_FeatureBalanceFPOps = 11,
+  AArch64_FeatureBranchTargetId = 12,
+  AArch64_FeatureCCIDX = 13,
+  AArch64_FeatureCCPP = 14,
+  AArch64_FeatureCONTEXTIDREL2 = 15,
+  AArch64_FeatureCRC = 16,
+  AArch64_FeatureCacheDeepPersist = 17,
+  AArch64_FeatureCallSavedX8 = 18,
+  AArch64_FeatureCallSavedX9 = 19,
+  AArch64_FeatureCallSavedX10 = 20,
+  AArch64_FeatureCallSavedX11 = 21,
+  AArch64_FeatureCallSavedX12 = 22,
+  AArch64_FeatureCallSavedX13 = 23,
+  AArch64_FeatureCallSavedX14 = 24,
+  AArch64_FeatureCallSavedX15 = 25,
+  AArch64_FeatureCallSavedX18 = 26,
+  AArch64_FeatureCmpBccFusion = 27,
+  AArch64_FeatureComplxNum = 28,
+  AArch64_FeatureCrypto = 29,
+  AArch64_FeatureCustomCheapAsMoveHandling = 30,
+  AArch64_FeatureDIT = 31,
+  AArch64_FeatureDisableLatencySchedHeuristic = 32,
+  AArch64_FeatureDotProd = 33,
+  AArch64_FeatureEL2VMSA = 34,
+  AArch64_FeatureEL3 = 35,
+  AArch64_FeatureETE = 36,
+  AArch64_FeatureEnhancedCounterVirtualization = 37,
+  AArch64_FeatureExperimentalZeroingPseudos = 38,
+  AArch64_FeatureExynosCheapAsMoveHandling = 39,
+  AArch64_FeatureFP16FML = 40,
+  AArch64_FeatureFPARMv8 = 41,
+  AArch64_FeatureFRInt3264 = 42,
+  AArch64_FeatureFineGrainedTraps = 43,
+  AArch64_FeatureFixCortexA53_835769 = 44,
+  AArch64_FeatureFlagM = 45,
+  AArch64_FeatureForce32BitJumpTables = 46,
+  AArch64_FeatureFullFP16 = 47,
+  AArch64_FeatureFuseAES = 48,
+  AArch64_FeatureFuseAddress = 49,
+  AArch64_FeatureFuseArithmeticLogic = 50,
+  AArch64_FeatureFuseCCSelect = 51,
+  AArch64_FeatureFuseCryptoEOR = 52,
+  AArch64_FeatureFuseLiterals = 53,
+  AArch64_FeatureHBC = 54,
+  AArch64_FeatureHCX = 55,
+  AArch64_FeatureHardenSlsBlr = 56,
+  AArch64_FeatureHardenSlsNoComdat = 57,
+  AArch64_FeatureHardenSlsRetBr = 58,
+  AArch64_FeatureJS = 59,
+  AArch64_FeatureLOR = 60,
+  AArch64_FeatureLS64 = 61,
+  AArch64_FeatureLSE = 62,
+  AArch64_FeatureLSE2 = 63,
+  AArch64_FeatureLSLFast = 64,
+  AArch64_FeatureMOPS = 65,
+  AArch64_FeatureMPAM = 66,
+  AArch64_FeatureMTE = 67,
+  AArch64_FeatureMatMulFP32 = 68,
+  AArch64_FeatureMatMulFP64 = 69,
+  AArch64_FeatureMatMulInt8 = 70,
+  AArch64_FeatureNEON = 71,
+  AArch64_FeatureNV = 72,
+  AArch64_FeatureNoBTIAtReturnTwice = 73,
+  AArch64_FeatureNoNegativeImmediates = 74,
+  AArch64_FeatureNoZCZeroingFP = 75,
+  AArch64_FeatureOutlineAtomics = 76,
+  AArch64_FeaturePAN = 77,
+  AArch64_FeaturePAN_RWV = 78,
+  AArch64_FeaturePAuth = 79,
+  AArch64_FeaturePerfMon = 80,
+  AArch64_FeaturePostRAScheduler = 81,
+  AArch64_FeaturePredRes = 82,
+  AArch64_FeaturePredictableSelectIsExpensive = 83,
+  AArch64_FeaturePsUAO = 84,
+  AArch64_FeatureRAS = 85,
+  AArch64_FeatureRCPC = 86,
+  AArch64_FeatureRCPC_IMMO = 87,
+  AArch64_FeatureRDM = 88,
+  AArch64_FeatureRME = 89,
+  AArch64_FeatureRandGen = 90,
+  AArch64_FeatureReserveX1 = 91,
+  AArch64_FeatureReserveX2 = 92,
+  AArch64_FeatureReserveX3 = 93,
+  AArch64_FeatureReserveX4 = 94,
+  AArch64_FeatureReserveX5 = 95,
+  AArch64_FeatureReserveX6 = 96,
+  AArch64_FeatureReserveX7 = 97,
+  AArch64_FeatureReserveX9 = 98,
+  AArch64_FeatureReserveX10 = 99,
+  AArch64_FeatureReserveX11 = 100,
+  AArch64_FeatureReserveX12 = 101,
+  AArch64_FeatureReserveX13 = 102,
+  AArch64_FeatureReserveX14 = 103,
+  AArch64_FeatureReserveX15 = 104,
+  AArch64_FeatureReserveX18 = 105,
+  AArch64_FeatureReserveX20 = 106,
+  AArch64_FeatureReserveX21 = 107,
+  AArch64_FeatureReserveX22 = 108,
+  AArch64_FeatureReserveX23 = 109,
+  AArch64_FeatureReserveX24 = 110,
+  AArch64_FeatureReserveX25 = 111,
+  AArch64_FeatureReserveX26 = 112,
+  AArch64_FeatureReserveX27 = 113,
+  AArch64_FeatureReserveX28 = 114,
+  AArch64_FeatureReserveX30 = 115,
+  AArch64_FeatureSB = 116,
+  AArch64_FeatureSEL2 = 117,
+  AArch64_FeatureSHA2 = 118,
+  AArch64_FeatureSHA3 = 119,
+  AArch64_FeatureSM4 = 120,
+  AArch64_FeatureSME = 121,
+  AArch64_FeatureSMEF64 = 122,
+  AArch64_FeatureSMEI64 = 123,
+  AArch64_FeatureSPE = 124,
+  AArch64_FeatureSPE_EEF = 125,
+  AArch64_FeatureSSBS = 126,
+  AArch64_FeatureSVE = 127,
+  AArch64_FeatureSVE2 = 128,
+  AArch64_FeatureSVE2AES = 129,
+  AArch64_FeatureSVE2BitPerm = 130,
+  AArch64_FeatureSVE2SHA3 = 131,
+  AArch64_FeatureSVE2SM4 = 132,
+  AArch64_FeatureSlowMisaligned128Store = 133,
+  AArch64_FeatureSlowPaired128 = 134,
+  AArch64_FeatureSlowSTRQro = 135,
+  AArch64_FeatureSpecRestrict = 136,
+  AArch64_FeatureStreamingSVE = 137,
+  AArch64_FeatureStrictAlign = 138,
+  AArch64_FeatureTLB_RMI = 139,
+  AArch64_FeatureTME = 140,
+  AArch64_FeatureTRACEV8_4 = 141,
+  AArch64_FeatureTRBE = 142,
+  AArch64_FeatureTaggedGlobals = 143,
+  AArch64_FeatureUseEL1ForTP = 144,
+  AArch64_FeatureUseEL2ForTP = 145,
+  AArch64_FeatureUseEL3ForTP = 146,
+  AArch64_FeatureUseRSqrt = 147,
+  AArch64_FeatureUseScalarIncVL = 148,
+  AArch64_FeatureVH = 149,
+  AArch64_FeatureWFxT = 150,
+  AArch64_FeatureXS = 151,
+  AArch64_FeatureZCRegMove = 152,
+  AArch64_FeatureZCZeroing = 153,
+  AArch64_FeatureZCZeroingFPWorkaround = 154,
+  AArch64_FeatureZCZeroingGP = 155,
+  AArch64_HasV8_0aOps = 156,
+  AArch64_HasV8_0rOps = 157,
+  AArch64_HasV8_1aOps = 158,
+  AArch64_HasV8_2aOps = 159,
+  AArch64_HasV8_3aOps = 160,
+  AArch64_HasV8_4aOps = 161,
+  AArch64_HasV8_5aOps = 162,
+  AArch64_HasV8_6aOps = 163,
+  AArch64_HasV8_7aOps = 164,
+  AArch64_HasV8_8aOps = 165,
+  AArch64_HasV9_0aOps = 166,
+  AArch64_HasV9_1aOps = 167,
+  AArch64_HasV9_2aOps = 168,
+  AArch64_HasV9_3aOps = 169,
+  AArch64_TuneA35 = 170,
+  AArch64_TuneA53 = 171,
+  AArch64_TuneA55 = 172,
+  AArch64_TuneA57 = 173,
+  AArch64_TuneA64FX = 174,
+  AArch64_TuneA65 = 175,
+  AArch64_TuneA72 = 176,
+  AArch64_TuneA73 = 177,
+  AArch64_TuneA75 = 178,
+  AArch64_TuneA76 = 179,
+  AArch64_TuneA77 = 180,
+  AArch64_TuneA78 = 181,
+  AArch64_TuneA78C = 182,
+  AArch64_TuneA510 = 183,
+  AArch64_TuneA710 = 184,
+  AArch64_TuneAmpere1 = 185,
+  AArch64_TuneAppleA7 = 186,
+  AArch64_TuneAppleA10 = 187,
+  AArch64_TuneAppleA11 = 188,
+  AArch64_TuneAppleA12 = 189,
+  AArch64_TuneAppleA13 = 190,
+  AArch64_TuneAppleA14 = 191,
+  AArch64_TuneCarmel = 192,
+  AArch64_TuneExynosM3 = 193,
+  AArch64_TuneExynosM4 = 194,
+  AArch64_TuneFalkor = 195,
+  AArch64_TuneKryo = 196,
+  AArch64_TuneNeoverse512TVB = 197,
+  AArch64_TuneNeoverseE1 = 198,
+  AArch64_TuneNeoverseN1 = 199,
+  AArch64_TuneNeoverseN2 = 200,
+  AArch64_TuneNeoverseV1 = 201,
+  AArch64_TuneR82 = 202,
+  AArch64_TuneSaphira = 203,
+  AArch64_TuneTSV110 = 204,
+  AArch64_TuneThunderX = 205,
+  AArch64_TuneThunderX2T99 = 206,
+  AArch64_TuneThunderX3T110 = 207,
+  AArch64_TuneThunderXT81 = 208,
+  AArch64_TuneThunderXT83 = 209,
+  AArch64_TuneThunderXT88 = 210,
+  AArch64_TuneX1 = 211,
+  AArch64_TuneX2 = 212,
+  AArch64_NumSubtargetFeatures = 213
 };

--- a/arch/AArch64/AArch64GenSystemOperands.inc
+++ b/arch/AArch64/AArch64GenSystemOperands.inc
--- a/arch/AArch64/AArch64GenSystemOperands_enum.inc
+++ b/arch/AArch64/AArch64GenSystemOperands_enum.inc
@ -8,6 +8,8 @@ enum PStateValues {
  AArch64PState_PAN = 4,
  AArch64PState_UAO = 3,
  AArch64PState_DIT = 26,
+  AArch64PState_SSBS = 25,
+  AArch64PState_TCO = 28,
 };

 enum ExactFPImmValues {
--- a/arch/AArch64/AArch64InstPrinter.c
+++ b/arch/AArch64/AArch64InstPrinter.c
@ -47,7 +47,7 @@ static bool printSysAlias(MCInst *MI, SStream *O);
 static char *printAliasInstr(MCInst *MI, SStream *OS, MCRegisterInfo *MRI);
 static void printInstruction(MCInst *MI, SStream *O);
 static void printShifter(MCInst *MI, unsigned OpNum, SStream *O);
-static void printCustomAliasOperand(MCInst *MI, unsigned OpIdx,
+static void printCustomAliasOperand(MCInst *MI, uint64_t Address, unsigned OpIdx,
 		unsigned PrintMethodIdx, SStream *OS);


@ -74,10 +74,37 @@ static void op_addImm(MCInst *MI, int v)
 	}
 }

+static void set_sme_index(MCInst *MI, bool status)
+{
+	// Doing SME Index operand
+	MI->csh->doing_SME_Index = status;
+
+	if (MI->csh->detail != CS_OPT_ON)
+		return;
+
+	if (status) {
+		unsigned prevOpNum = MI->flat_insn->detail->arm64.op_count - 1; 
+		unsigned Reg = MCOperand_getReg(MCInst_getOperand(MI, prevOpNum));
+		// Replace previous SME register operand with an OP_SME_INDEX operand
+		MI->flat_insn->detail->arm64.operands[prevOpNum].type = ARM64_OP_SME_INDEX;
+		MI->flat_insn->detail->arm64.operands[prevOpNum].sme_index.reg = Reg;
+		MI->flat_insn->detail->arm64.operands[prevOpNum].sme_index.base = ARM64_REG_INVALID;
+		MI->flat_insn->detail->arm64.operands[prevOpNum].sme_index.disp = 0;
+	}
+}
+
 static void set_mem_access(MCInst *MI, bool status)
 {
+	// If status == false, check if this is meant for SME_index
+	if(!status && MI->csh->doing_SME_Index) {
+		MI->csh->doing_SME_Index = status;
+		return;
+	}
+
+	// Doing Memory Operation
 	MI->csh->doing_mem = status;

+
 	if (MI->csh->detail != CS_OPT_ON)
 		return;

@ -588,37 +615,35 @@ void AArch64_printInst(MCInst *MI, SStream *O, void *Info)
 	}

 	if ((Opcode == AArch64_ORRXri || Opcode == AArch64_ORRWri) &&
-			(MCOperand_getReg(MCInst_getOperand(MI, 0)) == AArch64_XZR ||
+			(MCOperand_getReg(MCInst_getOperand(MI, 1)) == AArch64_XZR ||
 			 MCOperand_getReg(MCInst_getOperand(MI, 1)) == AArch64_WZR) &&
 			MCOperand_isImm(MCInst_getOperand(MI, 2))) {
 		int RegWidth = Opcode == AArch64_ORRXri ? 64 : 32;
 		uint64_t Value = AArch64_AM_decodeLogicalImmediate(
 				MCOperand_getImm(MCInst_getOperand(MI, 2)), RegWidth);
-		if (!AArch64_AM_isAnyMOVWMovAlias(Value, RegWidth)) {
-			SStream_concat(O, "mov\t%s, ", getRegisterName(MCOperand_getReg(MCInst_getOperand(MI, 0)), AArch64_NoRegAltName));
+		SStream_concat(O, "mov\t%s, ", getRegisterName(MCOperand_getReg(MCInst_getOperand(MI, 0)), AArch64_NoRegAltName));

-			printInt64Bang(O, SignExtend64(Value, RegWidth));
+		printInt64Bang(O, SignExtend64(Value, RegWidth));

-			if (MI->csh->detail) {
+		if (MI->csh->detail) {
 #ifndef CAPSTONE_DIET
-				uint8_t access;
-				access = get_op_access(MI->csh, MCInst_getOpcode(MI), MI->ac_idx);
-				MI->flat_insn->detail->arm64.operands[MI->flat_insn->detail->arm64.op_count].access = access;
-				MI->ac_idx++;
+			uint8_t access;
+			access = get_op_access(MI->csh, MCInst_getOpcode(MI), MI->ac_idx);
+			MI->flat_insn->detail->arm64.operands[MI->flat_insn->detail->arm64.op_count].access = access;
+			MI->ac_idx++;
 #endif
-				MI->flat_insn->detail->arm64.operands[MI->flat_insn->detail->arm64.op_count].type = ARM64_OP_REG;
-				MI->flat_insn->detail->arm64.operands[MI->flat_insn->detail->arm64.op_count].reg = MCOperand_getReg(MCInst_getOperand(MI, 0));
-				MI->flat_insn->detail->arm64.op_count++;
+			MI->flat_insn->detail->arm64.operands[MI->flat_insn->detail->arm64.op_count].type = ARM64_OP_REG;
+			MI->flat_insn->detail->arm64.operands[MI->flat_insn->detail->arm64.op_count].reg = MCOperand_getReg(MCInst_getOperand(MI, 0));
+			MI->flat_insn->detail->arm64.op_count++;

-				MI->flat_insn->detail->arm64.operands[MI->flat_insn->detail->arm64.op_count].type = ARM64_OP_IMM;
-				MI->flat_insn->detail->arm64.operands[MI->flat_insn->detail->arm64.op_count].imm = SignExtend64(Value, RegWidth);
-				MI->flat_insn->detail->arm64.op_count++;
-			}
-
-			MCInst_setOpcodePub(MI, AArch64_map_insn("mov"));
-
-			return;
+			MI->flat_insn->detail->arm64.operands[MI->flat_insn->detail->arm64.op_count].type = ARM64_OP_IMM;
+			MI->flat_insn->detail->arm64.operands[MI->flat_insn->detail->arm64.op_count].imm = SignExtend64(Value, RegWidth);
+			MI->flat_insn->detail->arm64.op_count++;
 		}
+
+		MCInst_setOpcodePub(MI, AArch64_map_insn("mov"));
+
+		return;
 	}

 	// Instruction TSB is specified as a one operand instruction, but 'csync' is
@ -868,6 +893,26 @@ void AArch64_printInst(MCInst *MI, SStream *O, void *Info)
 					}
 				}
 				break;
+			// Hacky detail filling of SMSTART and SMSTOP alias'
+			case AArch64_MSRpstatesvcrImm1:{
+				if(MI->csh->detail){
+					MI->flat_insn->detail->arm64.op_count = 2;
+#ifndef CAPSTONE_DIET
+					for (int i = 0; i < 2; i++)
+					{
+						MI->flat_insn->detail->arm64.operands[MI->flat_insn->detail->arm64.op_count].access = 
+							get_op_access(MI->csh, MCInst_getOpcode(MI), MI->ac_idx);
+						MI->ac_idx++;
+					}
+#endif
+					MI->flat_insn->detail->arm64.operands[0].type = ARM64_OP_SVCR;
+					MI->flat_insn->detail->arm64.operands[0].sys = (unsigned)ARM64_SYSREG_SVCR;
+					MI->flat_insn->detail->arm64.operands[0].svcr = lookupSVCRByEncoding(MCOperand_getImm(MCInst_getOperand(MI, 0)))->Encoding;
+					MI->flat_insn->detail->arm64.operands[1].type = ARM64_OP_IMM;
+					MI->flat_insn->detail->arm64.operands[1].imm = MCOperand_getImm(MCInst_getOperand(MI, 1));
+				}
+				break;
+			}
 		}
 	} else {
 		printInstruction(MI, O);
@ -1003,11 +1048,14 @@ static void printOperand(MCInst *MI, unsigned OpNum, SStream *O)
 				else if (MI->flat_insn->detail->arm64.operands[MI->flat_insn->detail->arm64.op_count].mem.index == ARM64_REG_INVALID) {
 					MI->flat_insn->detail->arm64.operands[MI->flat_insn->detail->arm64.op_count].mem.index = Reg;
 				}
+			} else if (MI->csh->doing_SME_Index) {
+				// Access op_count-1 as We want to add info to previous operand, not create a new one
+				MI->flat_insn->detail->arm64.operands[MI->flat_insn->detail->arm64.op_count-1].sme_index.base = Reg;
 			} else {
 #ifndef CAPSTONE_DIET
 				uint8_t access;

-				access = get_op_access(MI->csh, MCInst_getOpcode(MI), OpNum);
+				access = get_op_access(MI->csh, MCInst_getOpcode(MI), MI->ac_idx);
 				MI->flat_insn->detail->arm64.operands[MI->flat_insn->detail->arm64.op_count].access = access;
 				MI->ac_idx++;
 #endif
@ -1036,11 +1084,14 @@ static void printOperand(MCInst *MI, unsigned OpNum, SStream *O)
 		if (MI->csh->detail) {
 			if (MI->csh->doing_mem) {
 				MI->flat_insn->detail->arm64.operands[MI->flat_insn->detail->arm64.op_count].mem.disp = (int32_t)imm;
+			} else if (MI->csh->doing_SME_Index) {
+				// Access op_count-1 as We want to add info to previous operand, not create a new one
+				MI->flat_insn->detail->arm64.operands[MI->flat_insn->detail->arm64.op_count-1].sme_index.disp = (int32_t)imm; 
 			} else {
 #ifndef CAPSTONE_DIET
 				uint8_t access;

-				access = get_op_access(MI->csh, MCInst_getOpcode(MI), OpNum);
+				access = get_op_access(MI->csh, MCInst_getOpcode(MI), MI->ac_idx);
 				MI->flat_insn->detail->arm64.operands[MI->flat_insn->detail->arm64.op_count].access = access;
 #endif
 				MI->flat_insn->detail->arm64.operands[MI->flat_insn->detail->arm64.op_count].type = ARM64_OP_IMM;
@ -1087,6 +1138,28 @@ static void printImmHex(MCInst *MI, unsigned OpNum, SStream *O)
 	}
 }

+static void printSImm(MCInst *MI, unsigned OpNo, SStream *O, int Size) {
+  MCOperand *Op = MCInst_getOperand(MI, OpNo);
+  if (Size == 8)
+	printInt64Bang(O, (signed char) MCOperand_getImm(Op));
+  else if (Size == 16)
+	printInt64Bang(O, (signed short) MCOperand_getImm(Op));
+  else
+    printInt64Bang(O, MCOperand_getImm(Op));
+
+	if (MI->csh->detail) {
+#ifndef CAPSTONE_DIET
+		uint8_t access;
+		access = get_op_access(MI->csh, MCInst_getOpcode(MI), MI->ac_idx);
+		MI->flat_insn->detail->arm64.operands[MI->flat_insn->detail->arm64.op_count].access = access;
+		MI->ac_idx++;
+#endif
+		MI->flat_insn->detail->arm64.operands[MI->flat_insn->detail->arm64.op_count].type = ARM64_OP_IMM;
+		MI->flat_insn->detail->arm64.operands[MI->flat_insn->detail->arm64.op_count].imm = MCOperand_getImm(Op);
+		MI->flat_insn->detail->arm64.op_count++;
+	}
+}
+
 static void printPostIncOperand(MCInst *MI, unsigned OpNum, SStream *O,
 		unsigned Imm)
 {
@ -1638,13 +1711,23 @@ static void printPSBHintOp(MCInst *MI, unsigned OpNum, SStream *O)
 	MCOperand *Op = MCInst_getOperand(MI, OpNum);
 	unsigned int psbhintop = MCOperand_getImm(Op);

-	const PSB *PSB = AArch64PSBHint_lookupPSBByEncoding(psbhintop);
+	const PSB *PSB = lookupPSBByEncoding(psbhintop);
 	if (PSB)
 		SStream_concat0(O, PSB->Name);
 	else
 		printUInt32Bang(O, psbhintop);
 }

+static void printBTIHintOp(MCInst *MI, unsigned OpNum, SStream *O) {
+  unsigned btihintop = MCOperand_getImm(MCInst_getOperand(MI, OpNum)) ^ 32;
+
+  const BTI *BTI = lookupBTIByEncoding(btihintop);
+  if (BTI)
+	SStream_concat0(O, BTI->Name);
+  else
+	printUInt32Bang(O, btihintop);
+}
+
 static void printFPImmOperand(MCInst *MI, unsigned OpNum, SStream *O)
 {
 	MCOperand *MO = MCInst_getOperand(MI, OpNum);
@ -1761,7 +1844,8 @@ static void printVectorList(MCInst *MI, unsigned OpNum, SStream *O,
 	}

 	for (i = 0; i < NumRegs; ++i, Reg = getNextVectorRegister(Reg, 1)) {
-		if (GETREGCLASS_CONTAIN0(AArch64_ZPRRegClassID, Reg))
+		bool isZReg = GETREGCLASS_CONTAIN0(AArch64_ZPRRegClassID, Reg);
+		if (isZReg)
 			SStream_concat(O, "%s%s", getRegisterName(Reg, AArch64_NoRegAltName), LayoutSuffix);
 		else
 			SStream_concat(O, "%s%s", getRegisterName(Reg, AArch64_vreg), LayoutSuffix);
@ -1774,9 +1858,9 @@ static void printVectorList(MCInst *MI, unsigned OpNum, SStream *O,
 			MI->flat_insn->detail->arm64.operands[MI->flat_insn->detail->arm64.op_count].access = access;
 			MI->ac_idx++;
 #endif
-
+			unsigned regForDetail = isZReg ? Reg : AArch64_map_vregister(Reg);
 			MI->flat_insn->detail->arm64.operands[MI->flat_insn->detail->arm64.op_count].type = ARM64_OP_REG;
-			MI->flat_insn->detail->arm64.operands[MI->flat_insn->detail->arm64.op_count].reg = AArch64_map_vregister(Reg);
+			MI->flat_insn->detail->arm64.operands[MI->flat_insn->detail->arm64.op_count].reg = regForDetail;
 			MI->flat_insn->detail->arm64.operands[MI->flat_insn->detail->arm64.op_count].vas = vas;
 			MI->flat_insn->detail->arm64.op_count++;
 		}
@ -2003,6 +2087,48 @@ static void printBarrierOption(MCInst *MI, unsigned OpNum, SStream *O)
 	}
 }

+static void printBarriernXSOption(MCInst *MI, unsigned OpNo, SStream *O) {
+	unsigned Val = MCOperand_getImm(MCInst_getOperand(MI, OpNo));
+	// assert(MI->getOpcode() == AArch64::DSBnXS);
+
+	const char *Name = NULL;
+	const DBnXS *DB = lookupDBnXSByEncoding(Val);
+	Name = DB ? DB->Name : NULL;
+
+	if (Name) {
+		SStream_concat0(O, Name);
+
+		if (MI->csh->detail) {
+#ifndef CAPSTONE_DIET
+			uint8_t access;
+
+			access = get_op_access(MI->csh, MCInst_getOpcode(MI), MI->ac_idx);
+			MI->flat_insn->detail->arm64.operands[MI->flat_insn->detail->arm64.op_count].access = access;
+			MI->ac_idx++;
+#endif
+			MI->flat_insn->detail->arm64.operands[MI->flat_insn->detail->arm64.op_count].type = ARM64_OP_BARRIER;
+			MI->flat_insn->detail->arm64.operands[MI->flat_insn->detail->arm64.op_count].barrier = Val;
+			MI->flat_insn->detail->arm64.op_count++;
+		}
+	}
+	else {
+		printUInt32Bang(O, Val);
+
+		if (MI->csh->detail) {
+#ifndef CAPSTONE_DIET
+			uint8_t access;
+
+			access = get_op_access(MI->csh, MCInst_getOpcode(MI), MI->ac_idx);
+			MI->flat_insn->detail->arm64.operands[MI->flat_insn->detail->arm64.op_count].access = access;
+			MI->ac_idx++;
+#endif
+			MI->flat_insn->detail->arm64.operands[MI->flat_insn->detail->arm64.op_count].type = ARM64_OP_IMM;
+			MI->flat_insn->detail->arm64.operands[MI->flat_insn->detail->arm64.op_count].imm = Val;
+			MI->flat_insn->detail->arm64.op_count++;
+		}
+	}
+}
+
 static void printMRSSystemRegister(MCInst *MI, unsigned OpNum, SStream *O)
 {
 	unsigned Val = (unsigned)MCOperand_getImm(MCInst_getOperand(MI, OpNum));
@ -2031,6 +2157,28 @@ static void printMRSSystemRegister(MCInst *MI, unsigned OpNum, SStream *O)
 		return;
 	}

+	// Another hack for a register which has an alternative name which is not an alias,
+	// and is not in the Armv9-A documentation.
+	if( Val == ARM64_SYSREG_VSCTLR_EL2){
+		SStream_concat0(O, "ttbr0_el2");
+
+		if (MI->csh->detail) {
+#ifndef CAPSTONE_DIET
+			uint8_t access;
+
+			access = get_op_access(MI->csh, MCInst_getOpcode(MI), MI->ac_idx);
+			MI->flat_insn->detail->arm64.operands[MI->flat_insn->detail->arm64.op_count].access = access;
+			MI->ac_idx++;
+#endif
+
+			MI->flat_insn->detail->arm64.operands[MI->flat_insn->detail->arm64.op_count].type = ARM64_OP_SYS;
+			MI->flat_insn->detail->arm64.operands[MI->flat_insn->detail->arm64.op_count].sys = Val;
+			MI->flat_insn->detail->arm64.op_count++;
+		}
+
+		return;
+	}
+
 	// if (Reg && Reg->Readable && Reg->haveFeatures(STI.getFeatureBits()))
 	if (Reg && Reg->Readable) {
 		SStream_concat0(O, Reg->Name);
@ -2096,6 +2244,28 @@ static void printMSRSystemRegister(MCInst *MI, unsigned OpNum, SStream *O)
 		return;
 	}

+	// Another hack for a register which has an alternative name which is not an alias,
+	// and is not in the Armv9-A documentation.
+	if( Val == ARM64_SYSREG_VSCTLR_EL2){
+		SStream_concat0(O, "ttbr0_el2");
+
+		if (MI->csh->detail) {
+#ifndef CAPSTONE_DIET
+			uint8_t access;
+
+			access = get_op_access(MI->csh, MCInst_getOpcode(MI), MI->ac_idx);
+			MI->flat_insn->detail->arm64.operands[MI->flat_insn->detail->arm64.op_count].access = access;
+			MI->ac_idx++;
+#endif
+
+			MI->flat_insn->detail->arm64.operands[MI->flat_insn->detail->arm64.op_count].type = ARM64_OP_SYS;
+			MI->flat_insn->detail->arm64.operands[MI->flat_insn->detail->arm64.op_count].sys = Val;
+			MI->flat_insn->detail->arm64.op_count++;
+		}
+
+		return;
+	}
+
 	// if (Reg && Reg->Writeable && Reg->haveFeatures(STI.getFeatureBits()))
 	if (Reg && Reg->Writeable) {
 		SStream_concat0(O, Reg->Name);
@ -2200,6 +2370,197 @@ static void printComplexRotationOp(MCInst *MI, unsigned OpNum, SStream *O, int64
 	op_addImm(MI, (Val * Angle) + Remainder);
 }

+static void printSVCROp(MCInst *MI, unsigned OpNum, SStream *O)
+{
+	MCOperand *MO = MCInst_getOperand(MI, OpNum);
+  	// assert(MCOperand_isImm(MO) && "Unexpected operand type!");
+  	unsigned svcrop = MCOperand_getImm(MO);
+	const SVCR *svcr = lookupSVCRByEncoding(svcrop);
+  	// assert(svcr && "Unexpected SVCR operand!");
+	SStream_concat0(O, svcr->Name);
+
+	if (MI->csh->detail) {
+#ifndef CAPSTONE_DIET
+		uint8_t access;
+
+		access = get_op_access(MI->csh, MCInst_getOpcode(MI), MI->ac_idx);
+		MI->flat_insn->detail->arm64.operands[MI->flat_insn->detail->arm64.op_count].access = access;
+		MI->ac_idx++;
+#endif
+
+		MI->flat_insn->detail->arm64.operands[MI->flat_insn->detail->arm64.op_count].type = ARM64_OP_SVCR;
+		MI->flat_insn->detail->arm64.operands[MI->flat_insn->detail->arm64.op_count].sys = (unsigned)ARM64_SYSREG_SVCR;
+		MI->flat_insn->detail->arm64.operands[MI->flat_insn->detail->arm64.op_count].svcr = svcr->Encoding;
+		MI->flat_insn->detail->arm64.op_count++;
+	}
+}
+
+static void printMatrix(MCInst *MI, unsigned OpNum, SStream *O, int EltSize)
+{
+	MCOperand *RegOp = MCInst_getOperand(MI, OpNum);
+  	// assert(MCOperand_isReg(RegOp) && "Unexpected operand type!");
+	unsigned Reg = MCOperand_getReg(RegOp);
+
+	SStream_concat0(O, getRegisterName(Reg, AArch64_NoRegAltName));
+	const char *sizeStr;
+  	switch (EltSize) {
+  	case 0:
+	  sizeStr = "";
+  	  break;
+  	case 8:
+  	  sizeStr = ".b";
+  	  break;
+  	case 16:
+  	  sizeStr = ".h";
+  	  break;
+  	case 32:
+  	  sizeStr = ".s";
+  	  break;
+  	case 64:
+  	  sizeStr = ".d";
+  	  break;
+  	case 128:
+  	  sizeStr = ".q";
+  	  break;
+  	default:
+	  break;
+  	//   llvm_unreachable("Unsupported element size");
+  	}
+	SStream_concat0(O, sizeStr);
+
+	if (MI->csh->detail) {
+#ifndef CAPSTONE_DIET
+		uint8_t access;
+
+		access = get_op_access(MI->csh, MCInst_getOpcode(MI), MI->ac_idx);
+		MI->flat_insn->detail->arm64.operands[MI->flat_insn->detail->arm64.op_count].access = access;
+		MI->ac_idx++;
+#endif
+
+		MI->flat_insn->detail->arm64.operands[MI->flat_insn->detail->arm64.op_count].type = ARM64_OP_REG;
+		MI->flat_insn->detail->arm64.operands[MI->flat_insn->detail->arm64.op_count].reg = Reg;
+		MI->flat_insn->detail->arm64.op_count++;
+	}
+}
+
+static void printMatrixIndex(MCInst *MI, unsigned OpNum, SStream *O)
+{
+	int64_t imm = MCOperand_getImm(MCInst_getOperand(MI, OpNum));
+	printInt64(O, imm);
+
+	if (MI->csh->detail) {
+		if (MI->csh->doing_SME_Index) {
+			// Access op_count-1 as We want to add info to previous operand, not create a new one
+			MI->flat_insn->detail->arm64.operands[MI->flat_insn->detail->arm64.op_count-1].sme_index.disp = imm;
+		}
+	}
+}
+
+static void printMatrixTile(MCInst *MI, unsigned OpNum, SStream *O)
+{
+	MCOperand *RegOp = MCInst_getOperand(MI, OpNum);
+  	// assert(MCOperand_isReg(RegOp) && "Unexpected operand type!");
+	unsigned Reg = MCOperand_getReg(RegOp);
+  	SStream_concat0(O, getRegisterName(Reg, AArch64_NoRegAltName));
+
+	if (MI->csh->detail) {
+#ifndef CAPSTONE_DIET
+		uint8_t access;
+
+		access = get_op_access(MI->csh, MCInst_getOpcode(MI), MI->ac_idx);
+		MI->flat_insn->detail->arm64.operands[MI->flat_insn->detail->arm64.op_count].access = access;
+		MI->ac_idx++;
+#endif
+
+		MI->flat_insn->detail->arm64.operands[MI->flat_insn->detail->arm64.op_count].type = ARM64_OP_REG;
+		MI->flat_insn->detail->arm64.operands[MI->flat_insn->detail->arm64.op_count].reg = Reg;
+		MI->flat_insn->detail->arm64.op_count++;
+	}
+}
+
+static void printMatrixTileVector(MCInst *MI, unsigned OpNum, SStream *O, bool IsVertical)
+{
+	MCOperand *RegOp = MCInst_getOperand(MI, OpNum);
+  	// assert(MCOperand_isReg(RegOp) && "Unexpected operand type!");
+	unsigned Reg = MCOperand_getReg(RegOp);
+	const char *RegName = getRegisterName(Reg, AArch64_NoRegAltName);
+
+	const size_t strLn = strlen(RegName);
+	// +2 for extra chars, + 1 for null char \0
+	char *RegNameNew = malloc(sizeof(char) * (strLn + 2 + 1));
+	int index = 0;
+	for(int i = 0; i < (strLn + 2); i++){
+		if(RegName[i] != '.'){
+			RegNameNew[index] = RegName[i];
+			index++;
+		}
+		else{
+			RegNameNew[index] = IsVertical ? 'v' : 'h';
+			RegNameNew[index + 1] = '.';
+			index += 2;
+		}
+	}
+	SStream_concat0(O, RegNameNew);
+
+	if (MI->csh->detail) {
+#ifndef CAPSTONE_DIET
+		uint8_t access;
+
+		access = get_op_access(MI->csh, MCInst_getOpcode(MI), MI->ac_idx);
+		MI->flat_insn->detail->arm64.operands[MI->flat_insn->detail->arm64.op_count].access = access;
+		MI->ac_idx++;
+#endif
+
+		MI->flat_insn->detail->arm64.operands[MI->flat_insn->detail->arm64.op_count].type = ARM64_OP_REG;
+		MI->flat_insn->detail->arm64.operands[MI->flat_insn->detail->arm64.op_count].reg = Reg;
+		MI->flat_insn->detail->arm64.op_count++;
+	}
+	free(RegNameNew);
+}
+
+static const unsigned MatrixZADRegisterTable[] = {
+  AArch64_ZAD0, AArch64_ZAD1, AArch64_ZAD2, AArch64_ZAD3,
+  AArch64_ZAD4, AArch64_ZAD5, AArch64_ZAD6, AArch64_ZAD7
+};
+
+static void printMatrixTileList(MCInst *MI, unsigned OpNum, SStream *O){
+	unsigned MaxRegs = 8;
+	unsigned RegMask = MCOperand_getImm(MCInst_getOperand(MI, OpNum));
+
+	unsigned NumRegs = 0;
+	for (unsigned I = 0; I < MaxRegs; ++I)
+		if ((RegMask & (1 << I)) != 0)
+			++NumRegs;
+
+	SStream_concat0(O, "{");
+	unsigned Printed = 0;
+	for (unsigned I = 0; I < MaxRegs; ++I) {
+		unsigned Reg = RegMask & (1 << I);
+		if (Reg == 0)
+			continue;
+		SStream_concat0(O, getRegisterName(MatrixZADRegisterTable[I], AArch64_NoRegAltName));
+
+		if (MI->csh->detail) {
+#ifndef CAPSTONE_DIET
+			uint8_t access;
+
+			access = get_op_access(MI->csh, MCInst_getOpcode(MI), MI->ac_idx);
+			MI->flat_insn->detail->arm64.operands[MI->flat_insn->detail->arm64.op_count].access = access;
+			MI->ac_idx++;
+#endif
+
+			MI->flat_insn->detail->arm64.operands[MI->flat_insn->detail->arm64.op_count].type = ARM64_OP_REG;
+			MI->flat_insn->detail->arm64.operands[MI->flat_insn->detail->arm64.op_count].reg = MatrixZADRegisterTable[I];
+			MI->flat_insn->detail->arm64.op_count++;
+		}
+
+		if (Printed + 1 != NumRegs)
+			SStream_concat0(O, ", ");
+		++Printed;
+	}
+	SStream_concat0(O, "}");
+}
+
 static void printSVEPattern(MCInst *MI, unsigned OpNum, SStream *O)
 {
 	unsigned Val = MCOperand_getImm(MCInst_getOperand(MI, OpNum));
@ -2233,6 +2594,13 @@ static void printSVERegOp(MCInst *MI, unsigned OpNum, SStream *O, char suffix)
 	Reg = MCOperand_getReg(MCInst_getOperand(MI, OpNum));

 	if (MI->csh->detail) {
+#ifndef CAPSTONE_DIET
+			uint8_t access;
+
+			access = get_op_access(MI->csh, MCInst_getOpcode(MI), MI->ac_idx);
+			MI->flat_insn->detail->arm64.operands[MI->flat_insn->detail->arm64.op_count].access = access;
+			MI->ac_idx++;
+#endif
 		MI->flat_insn->detail->arm64.operands[MI->flat_insn->detail->arm64.op_count].type = ARM64_OP_REG;
 		MI->flat_insn->detail->arm64.operands[MI->flat_insn->detail->arm64.op_count].reg = Reg;
 		MI->flat_insn->detail->arm64.op_count++;
@ -2341,9 +2709,22 @@ static void printZPRasFPR(MCInst *MI, unsigned OpNum, SStream *O, int Width)
 		case 128: Base = AArch64_Q0; break;
 	}

-	Reg = MCOperand_getReg(MCInst_getOperand(MI, OpNum));
+	Reg = MCOperand_getReg(MCInst_getOperand(MI, OpNum)) - AArch64_Z0 + Base;

-	SStream_concat0(O, getRegisterName(Reg - AArch64_Z0 + Base, AArch64_NoRegAltName));
+	SStream_concat0(O, getRegisterName(Reg, AArch64_NoRegAltName));
+
+	if (MI->csh->detail) {
+#ifndef CAPSTONE_DIET
+		uint8_t access;
+
+		access = get_op_access(MI->csh, MCInst_getOpcode(MI), MI->ac_idx);
+		MI->flat_insn->detail->arm64.operands[MI->flat_insn->detail->arm64.op_count].access = access;
+		MI->ac_idx++;
+#endif
+		MI->flat_insn->detail->arm64.operands[MI->flat_insn->detail->arm64.op_count].type = ARM64_OP_REG;
+		MI->flat_insn->detail->arm64.operands[MI->flat_insn->detail->arm64.op_count].reg = Reg;
+		MI->flat_insn->detail->arm64.op_count++;
+	}
 }

 static void printExactFPImm(MCInst *MI, unsigned OpNum, SStream *O, unsigned ImmIs0, unsigned ImmIs1)
@ -2362,6 +2743,13 @@ static void printGPR64as32(MCInst *MI, unsigned OpNum, SStream *O)
 	SStream_concat0(O, getRegisterName(getWRegFromXReg(Reg), AArch64_NoRegAltName));
 }

+static void printGPR64x8(MCInst *MI, unsigned OpNum, SStream *O) 
+{
+  	unsigned int Reg = MCOperand_getReg(MCInst_getOperand(MI, OpNum));
+
+  	SStream_concat0(O, getRegisterName(MCRegisterInfo_getSubReg(MI->MRI, Reg, AArch64_x8sub_0), AArch64_NoRegAltName));
+}
+
 #define PRINT_ALIAS_INSTR
 #include "AArch64GenAsmWriter.inc"
 #include "AArch64GenRegisterName.inc"
@ -2490,6 +2878,8 @@ void AArch64_post_printer(csh handle, cs_insn *flat_insn, char *insn_asm, MCInst
 			case AArch64_LDPWpre:
 			case AArch64_LDPXpost:
 			case AArch64_LDPXpre:
+			case AArch64_LDRAAwriteback:
+			case AArch64_LDRABwriteback:
 			case AArch64_LDRBBpost:
 			case AArch64_LDRBBpre:
 			case AArch64_LDRBpost:
@ -2554,6 +2944,8 @@ void AArch64_post_printer(csh handle, cs_insn *flat_insn, char *insn_asm, MCInst
 			case AArch64_ST1i32_POST:
 			case AArch64_ST1i64_POST:
 			case AArch64_ST1i8_POST:
+			case AArch64_ST2GPostIndex:
+			case AArch64_ST2GPreIndex:
 			case AArch64_ST2Twov16b_POST:
 			case AArch64_ST2Twov2d_POST:
 			case AArch64_ST2Twov2s_POST:
@ -2587,6 +2979,10 @@ void AArch64_post_printer(csh handle, cs_insn *flat_insn, char *insn_asm, MCInst
 			case AArch64_ST4i32_POST:
 			case AArch64_ST4i64_POST:
 			case AArch64_ST4i8_POST:
+			case AArch64_STGPostIndex:
+			case AArch64_STGPpost:
+			case AArch64_STGPpre:
+			case AArch64_STGPreIndex:
 			case AArch64_STPDpost:
 			case AArch64_STPDpre:
 			case AArch64_STPQpost:
@ -2615,8 +3011,10 @@ void AArch64_post_printer(csh handle, cs_insn *flat_insn, char *insn_asm, MCInst
 			case AArch64_STRWpre:
 			case AArch64_STRXpost:
 			case AArch64_STRXpre:
-			case AArch64_LDRAAwriteback:
-			case AArch64_LDRABwriteback:
+			case AArch64_STZ2GPostIndex:
+			case AArch64_STZ2GPreIndex:
+			case AArch64_STZGPostIndex:
+			case AArch64_STZGPreIndex:
 				flat_insn->detail->arm64.writeback = true;
 				break;
 		}
--- a/arch/AArch64/AArch64Mapping.c
+++ b/arch/AArch64/AArch64Mapping.c
--- a/arch/AArch64/AArch64MappingInsn.inc
+++ b/arch/AArch64/AArch64MappingInsn.inc
--- a/arch/AArch64/AArch64MappingInsnName.inc
+++ b/arch/AArch64/AArch64MappingInsnName.inc
@ -4,14 +4,21 @@

 	"abs", // ARM64_INS_ABS,
 	"adc", // ARM64_INS_ADC,
+	"adclb", // ARM64_INS_ADCLB,
+	"adclt", // ARM64_INS_ADCLT,
 	"adcs", // ARM64_INS_ADCS,
 	"add", // ARM64_INS_ADD,
+	"addg", // ARM64_INS_ADDG,
+	"addha", // ARM64_INS_ADDHA,
 	"addhn", // ARM64_INS_ADDHN,
 	"addhn2", // ARM64_INS_ADDHN2,
+	"addhnb", // ARM64_INS_ADDHNB,
+	"addhnt", // ARM64_INS_ADDHNT,
 	"addp", // ARM64_INS_ADDP,
 	"addpl", // ARM64_INS_ADDPL,
 	"adds", // ARM64_INS_ADDS,
 	"addv", // ARM64_INS_ADDV,
+	"addva", // ARM64_INS_ADDVA,
 	"addvl", // ARM64_INS_ADDVL,
 	"adr", // ARM64_INS_ADR,
 	"adrp", // ARM64_INS_ADRP,
@ -40,9 +47,24 @@
 	"autibz", // ARM64_INS_AUTIBZ,
 	"autiza", // ARM64_INS_AUTIZA,
 	"autizb", // ARM64_INS_AUTIZB,
+	"axflag", // ARM64_INS_AXFLAG,
 	"b", // ARM64_INS_B,
+	"bc", // ARM64_INS_BC,
 	"bcax", // ARM64_INS_BCAX,
+	"bdep", // ARM64_INS_BDEP,
+	"bext", // ARM64_INS_BEXT,
+	"bfcvt", // ARM64_INS_BFCVT,
+	"bfcvtn", // ARM64_INS_BFCVTN,
+	"bfcvtn2", // ARM64_INS_BFCVTN2,
+	"bfcvtnt", // ARM64_INS_BFCVTNT,
+	"bfdot", // ARM64_INS_BFDOT,
 	"bfm", // ARM64_INS_BFM,
+	"bfmlalb", // ARM64_INS_BFMLALB,
+	"bfmlalt", // ARM64_INS_BFMLALT,
+	"bfmmla", // ARM64_INS_BFMMLA,
+	"bfmopa", // ARM64_INS_BFMOPA,
+	"bfmops", // ARM64_INS_BFMOPS,
+	"bgrp", // ARM64_INS_BGRP,
 	"bic", // ARM64_INS_BIC,
 	"bics", // ARM64_INS_BICS,
 	"bif", // ARM64_INS_BIF,
@ -58,6 +80,7 @@
 	"braaz", // ARM64_INS_BRAAZ,
 	"brab", // ARM64_INS_BRAB,
 	"brabz", // ARM64_INS_BRABZ,
+	"brb", // ARM64_INS_BRB,
 	"brk", // ARM64_INS_BRK,
 	"brka", // ARM64_INS_BRKA,
 	"brkas", // ARM64_INS_BRKAS,
@ -70,6 +93,10 @@
 	"brkpb", // ARM64_INS_BRKPB,
 	"brkpbs", // ARM64_INS_BRKPBS,
 	"bsl", // ARM64_INS_BSL,
+	"bsl1n", // ARM64_INS_BSL1N,
+	"bsl2n", // ARM64_INS_BSL2N,
+	"bti", // ARM64_INS_BTI,
+	"cadd", // ARM64_INS_CADD,
 	"cas", // ARM64_INS_CAS,
 	"casa", // ARM64_INS_CASA,
 	"casab", // ARM64_INS_CASAB,
@ -90,6 +117,7 @@
 	"cbz", // ARM64_INS_CBZ,
 	"ccmn", // ARM64_INS_CCMN,
 	"ccmp", // ARM64_INS_CCMP,
+	"cdot", // ARM64_INS_CDOT,
 	"cfinv", // ARM64_INS_CFINV,
 	"cinc", // ARM64_INS_CINC,
 	"cinv", // ARM64_INS_CINV,
@ -103,6 +131,7 @@
 	"cmgt", // ARM64_INS_CMGT,
 	"cmhi", // ARM64_INS_CMHI,
 	"cmhs", // ARM64_INS_CMHS,
+	"cmla", // ARM64_INS_CMLA,
 	"cmle", // ARM64_INS_CMLE,
 	"cmlo", // ARM64_INS_CMLO,
 	"cmls", // ARM64_INS_CMLS,
@ -119,6 +148,7 @@
 	"cmpls", // ARM64_INS_CMPLS,
 	"cmplt", // ARM64_INS_CMPLT,
 	"cmpne", // ARM64_INS_CMPNE,
+	"cmpp", // ARM64_INS_CMPP,
 	"cmtst", // ARM64_INS_CMTST,
 	"cneg", // ARM64_INS_CNEG,
 	"cnot", // ARM64_INS_CNOT,
@ -130,6 +160,102 @@
 	"cntw", // ARM64_INS_CNTW,
 	"compact", // ARM64_INS_COMPACT,
 	"cpy", // ARM64_INS_CPY,
+	"cpye", // ARM64_INS_CPYE,
+	"cpyen", // ARM64_INS_CPYEN,
+	"cpyern", // ARM64_INS_CPYERN,
+	"cpyert", // ARM64_INS_CPYERT,
+	"cpyertn", // ARM64_INS_CPYERTN,
+	"cpyertrn", // ARM64_INS_CPYERTRN,
+	"cpyertwn", // ARM64_INS_CPYERTWN,
+	"cpyet", // ARM64_INS_CPYET,
+	"cpyetn", // ARM64_INS_CPYETN,
+	"cpyetrn", // ARM64_INS_CPYETRN,
+	"cpyetwn", // ARM64_INS_CPYETWN,
+	"cpyewn", // ARM64_INS_CPYEWN,
+	"cpyewt", // ARM64_INS_CPYEWT,
+	"cpyewtn", // ARM64_INS_CPYEWTN,
+	"cpyewtrn", // ARM64_INS_CPYEWTRN,
+	"cpyewtwn", // ARM64_INS_CPYEWTWN,
+	"cpyfe", // ARM64_INS_CPYFE,
+	"cpyfen", // ARM64_INS_CPYFEN,
+	"cpyfern", // ARM64_INS_CPYFERN,
+	"cpyfert", // ARM64_INS_CPYFERT,
+	"cpyfertn", // ARM64_INS_CPYFERTN,
+	"cpyfertrn", // ARM64_INS_CPYFERTRN,
+	"cpyfertwn", // ARM64_INS_CPYFERTWN,
+	"cpyfet", // ARM64_INS_CPYFET,
+	"cpyfetn", // ARM64_INS_CPYFETN,
+	"cpyfetrn", // ARM64_INS_CPYFETRN,
+	"cpyfetwn", // ARM64_INS_CPYFETWN,
+	"cpyfewn", // ARM64_INS_CPYFEWN,
+	"cpyfewt", // ARM64_INS_CPYFEWT,
+	"cpyfewtn", // ARM64_INS_CPYFEWTN,
+	"cpyfewtrn", // ARM64_INS_CPYFEWTRN,
+	"cpyfewtwn", // ARM64_INS_CPYFEWTWN,
+	"cpyfm", // ARM64_INS_CPYFM,
+	"cpyfmn", // ARM64_INS_CPYFMN,
+	"cpyfmrn", // ARM64_INS_CPYFMRN,
+	"cpyfmrt", // ARM64_INS_CPYFMRT,
+	"cpyfmrtn", // ARM64_INS_CPYFMRTN,
+	"cpyfmrtrn", // ARM64_INS_CPYFMRTRN,
+	"cpyfmrtwn", // ARM64_INS_CPYFMRTWN,
+	"cpyfmt", // ARM64_INS_CPYFMT,
+	"cpyfmtn", // ARM64_INS_CPYFMTN,
+	"cpyfmtrn", // ARM64_INS_CPYFMTRN,
+	"cpyfmtwn", // ARM64_INS_CPYFMTWN,
+	"cpyfmwn", // ARM64_INS_CPYFMWN,
+	"cpyfmwt", // ARM64_INS_CPYFMWT,
+	"cpyfmwtn", // ARM64_INS_CPYFMWTN,
+	"cpyfmwtrn", // ARM64_INS_CPYFMWTRN,
+	"cpyfmwtwn", // ARM64_INS_CPYFMWTWN,
+	"cpyfp", // ARM64_INS_CPYFP,
+	"cpyfpn", // ARM64_INS_CPYFPN,
+	"cpyfprn", // ARM64_INS_CPYFPRN,
+	"cpyfprt", // ARM64_INS_CPYFPRT,
+	"cpyfprtn", // ARM64_INS_CPYFPRTN,
+	"cpyfprtrn", // ARM64_INS_CPYFPRTRN,
+	"cpyfprtwn", // ARM64_INS_CPYFPRTWN,
+	"cpyfpt", // ARM64_INS_CPYFPT,
+	"cpyfptn", // ARM64_INS_CPYFPTN,
+	"cpyfptrn", // ARM64_INS_CPYFPTRN,
+	"cpyfptwn", // ARM64_INS_CPYFPTWN,
+	"cpyfpwn", // ARM64_INS_CPYFPWN,
+	"cpyfpwt", // ARM64_INS_CPYFPWT,
+	"cpyfpwtn", // ARM64_INS_CPYFPWTN,
+	"cpyfpwtrn", // ARM64_INS_CPYFPWTRN,
+	"cpyfpwtwn", // ARM64_INS_CPYFPWTWN,
+	"cpym", // ARM64_INS_CPYM,
+	"cpymn", // ARM64_INS_CPYMN,
+	"cpymrn", // ARM64_INS_CPYMRN,
+	"cpymrt", // ARM64_INS_CPYMRT,
+	"cpymrtn", // ARM64_INS_CPYMRTN,
+	"cpymrtrn", // ARM64_INS_CPYMRTRN,
+	"cpymrtwn", // ARM64_INS_CPYMRTWN,
+	"cpymt", // ARM64_INS_CPYMT,
+	"cpymtn", // ARM64_INS_CPYMTN,
+	"cpymtrn", // ARM64_INS_CPYMTRN,
+	"cpymtwn", // ARM64_INS_CPYMTWN,
+	"cpymwn", // ARM64_INS_CPYMWN,
+	"cpymwt", // ARM64_INS_CPYMWT,
+	"cpymwtn", // ARM64_INS_CPYMWTN,
+	"cpymwtrn", // ARM64_INS_CPYMWTRN,
+	"cpymwtwn", // ARM64_INS_CPYMWTWN,
+	"cpyp", // ARM64_INS_CPYP,
+	"cpypn", // ARM64_INS_CPYPN,
+	"cpyprn", // ARM64_INS_CPYPRN,
+	"cpyprt", // ARM64_INS_CPYPRT,
+	"cpyprtn", // ARM64_INS_CPYPRTN,
+	"cpyprtrn", // ARM64_INS_CPYPRTRN,
+	"cpyprtwn", // ARM64_INS_CPYPRTWN,
+	"cpypt", // ARM64_INS_CPYPT,
+	"cpyptn", // ARM64_INS_CPYPTN,
+	"cpyptrn", // ARM64_INS_CPYPTRN,
+	"cpyptwn", // ARM64_INS_CPYPTWN,
+	"cpypwn", // ARM64_INS_CPYPWN,
+	"cpypwt", // ARM64_INS_CPYPWT,
+	"cpypwtn", // ARM64_INS_CPYPWTN,
+	"cpypwtrn", // ARM64_INS_CPYPWTRN,
+	"cpypwtwn", // ARM64_INS_CPYPWTWN,
 	"crc32b", // ARM64_INS_CRC32B,
 	"crc32cb", // ARM64_INS_CRC32CB,
 	"crc32ch", // ARM64_INS_CRC32CH,
@ -155,6 +281,8 @@
 	"dech", // ARM64_INS_DECH,
 	"decp", // ARM64_INS_DECP,
 	"decw", // ARM64_INS_DECW,
+	"dfb", // ARM64_INS_DFB,
+	"dgh", // ARM64_INS_DGH,
 	"dmb", // ARM64_INS_DMB,
 	"drps", // ARM64_INS_DRPS,
 	"dsb", // ARM64_INS_DSB,
@ -163,7 +291,9 @@
 	"eon", // ARM64_INS_EON,
 	"eor", // ARM64_INS_EOR,
 	"eor3", // ARM64_INS_EOR3,
+	"eorbt", // ARM64_INS_EORBT,
 	"eors", // ARM64_INS_EORS,
+	"eortb", // ARM64_INS_EORTB,
 	"eorv", // ARM64_INS_EORV,
 	"eret", // ARM64_INS_ERET,
 	"eretaa", // ARM64_INS_ERETAA,
@ -201,16 +331,20 @@
 	"fcvtau", // ARM64_INS_FCVTAU,
 	"fcvtl", // ARM64_INS_FCVTL,
 	"fcvtl2", // ARM64_INS_FCVTL2,
+	"fcvtlt", // ARM64_INS_FCVTLT,
 	"fcvtms", // ARM64_INS_FCVTMS,
 	"fcvtmu", // ARM64_INS_FCVTMU,
 	"fcvtn", // ARM64_INS_FCVTN,
 	"fcvtn2", // ARM64_INS_FCVTN2,
 	"fcvtns", // ARM64_INS_FCVTNS,
+	"fcvtnt", // ARM64_INS_FCVTNT,
 	"fcvtnu", // ARM64_INS_FCVTNU,
 	"fcvtps", // ARM64_INS_FCVTPS,
 	"fcvtpu", // ARM64_INS_FCVTPU,
+	"fcvtx", // ARM64_INS_FCVTX,
 	"fcvtxn", // ARM64_INS_FCVTXN,
 	"fcvtxn2", // ARM64_INS_FCVTXN2,
+	"fcvtxnt", // ARM64_INS_FCVTXNT,
 	"fcvtzs", // ARM64_INS_FCVTZS,
 	"fcvtzu", // ARM64_INS_FCVTZU,
 	"fdiv", // ARM64_INS_FDIV,
@ -218,6 +352,7 @@
 	"fdup", // ARM64_INS_FDUP,
 	"fexpa", // ARM64_INS_FEXPA,
 	"fjcvtzs", // ARM64_INS_FJCVTZS,
+	"flogb", // ARM64_INS_FLOGB,
 	"fmad", // ARM64_INS_FMAD,
 	"fmadd", // ARM64_INS_FMADD,
 	"fmax", // ARM64_INS_FMAX,
@ -233,7 +368,18 @@
 	"fminp", // ARM64_INS_FMINP,
 	"fminv", // ARM64_INS_FMINV,
 	"fmla", // ARM64_INS_FMLA,
+	"fmlal", // ARM64_INS_FMLAL,
+	"fmlal2", // ARM64_INS_FMLAL2,
+	"fmlalb", // ARM64_INS_FMLALB,
+	"fmlalt", // ARM64_INS_FMLALT,
 	"fmls", // ARM64_INS_FMLS,
+	"fmlsl", // ARM64_INS_FMLSL,
+	"fmlsl2", // ARM64_INS_FMLSL2,
+	"fmlslb", // ARM64_INS_FMLSLB,
+	"fmlslt", // ARM64_INS_FMLSLT,
+	"fmmla", // ARM64_INS_FMMLA,
+	"fmopa", // ARM64_INS_FMOPA,
+	"fmops", // ARM64_INS_FMOPS,
 	"fmov", // ARM64_INS_FMOV,
 	"fmsb", // ARM64_INS_FMSB,
 	"fmsub", // ARM64_INS_FMSUB,
@ -250,6 +396,10 @@
 	"frecpe", // ARM64_INS_FRECPE,
 	"frecps", // ARM64_INS_FRECPS,
 	"frecpx", // ARM64_INS_FRECPX,
+	"frint32x", // ARM64_INS_FRINT32X,
+	"frint32z", // ARM64_INS_FRINT32Z,
+	"frint64x", // ARM64_INS_FRINT64X,
+	"frint64z", // ARM64_INS_FRINT64Z,
 	"frinta", // ARM64_INS_FRINTA,
 	"frinti", // ARM64_INS_FRINTI,
 	"frintm", // ARM64_INS_FRINTM,
@ -266,7 +416,10 @@
 	"ftmad", // ARM64_INS_FTMAD,
 	"ftsmul", // ARM64_INS_FTSMUL,
 	"ftssel", // ARM64_INS_FTSSEL,
+	"gmi", // ARM64_INS_GMI,
 	"hint", // ARM64_INS_HINT,
+	"histcnt", // ARM64_INS_HISTCNT,
+	"histseg", // ARM64_INS_HISTSEG,
 	"hlt", // ARM64_INS_HLT,
 	"hvc", // ARM64_INS_HVC,
 	"incb", // ARM64_INS_INCB,
@ -277,6 +430,7 @@
 	"index", // ARM64_INS_INDEX,
 	"ins", // ARM64_INS_INS,
 	"insr", // ARM64_INS_INSR,
+	"irg", // ARM64_INS_IRG,
 	"isb", // ARM64_INS_ISB,
 	"lasta", // ARM64_INS_LASTA,
 	"lastb", // ARM64_INS_LASTB,
@ -284,10 +438,15 @@
 	"ld1b", // ARM64_INS_LD1B,
 	"ld1d", // ARM64_INS_LD1D,
 	"ld1h", // ARM64_INS_LD1H,
+	"ld1q", // ARM64_INS_LD1Q,
 	"ld1r", // ARM64_INS_LD1R,
 	"ld1rb", // ARM64_INS_LD1RB,
 	"ld1rd", // ARM64_INS_LD1RD,
 	"ld1rh", // ARM64_INS_LD1RH,
+	"ld1rob", // ARM64_INS_LD1ROB,
+	"ld1rod", // ARM64_INS_LD1ROD,
+	"ld1roh", // ARM64_INS_LD1ROH,
+	"ld1row", // ARM64_INS_LD1ROW,
 	"ld1rqb", // ARM64_INS_LD1RQB,
 	"ld1rqd", // ARM64_INS_LD1RQD,
 	"ld1rqh", // ARM64_INS_LD1RQH,
@ -318,6 +477,7 @@
 	"ld4h", // ARM64_INS_LD4H,
 	"ld4r", // ARM64_INS_LD4R,
 	"ld4w", // ARM64_INS_LD4W,
+	"ld64b", // ARM64_INS_LD64B,
 	"ldadd", // ARM64_INS_LDADD,
 	"ldadda", // ARM64_INS_LDADDA,
 	"ldaddab", // ARM64_INS_LDADDAB,
@ -377,6 +537,8 @@
 	"ldff1sh", // ARM64_INS_LDFF1SH,
 	"ldff1sw", // ARM64_INS_LDFF1SW,
 	"ldff1w", // ARM64_INS_LDFF1W,
+	"ldg", // ARM64_INS_LDG,
+	"ldgm", // ARM64_INS_LDGM,
 	"ldlar", // ARM64_INS_LDLAR,
 	"ldlarb", // ARM64_INS_LDLARB,
 	"ldlarh", // ARM64_INS_LDLARH,
@ -391,6 +553,9 @@
 	"ldnt1b", // ARM64_INS_LDNT1B,
 	"ldnt1d", // ARM64_INS_LDNT1D,
 	"ldnt1h", // ARM64_INS_LDNT1H,
+	"ldnt1sb", // ARM64_INS_LDNT1SB,
+	"ldnt1sh", // ARM64_INS_LDNT1SH,
+	"ldnt1sw", // ARM64_INS_LDNT1SW,
 	"ldnt1w", // ARM64_INS_LDNT1W,
 	"ldp", // ARM64_INS_LDP,
 	"ldpsw", // ARM64_INS_LDPSW,
@ -486,10 +651,12 @@
 	"lsrv", // ARM64_INS_LSRV,
 	"mad", // ARM64_INS_MAD,
 	"madd", // ARM64_INS_MADD,
+	"match", // ARM64_INS_MATCH,
 	"mla", // ARM64_INS_MLA,
 	"mls", // ARM64_INS_MLS,
 	"mneg", // ARM64_INS_MNEG,
 	"mov", // ARM64_INS_MOV,
+	"mova", // ARM64_INS_MOVA,
 	"movi", // ARM64_INS_MOVI,
 	"movk", // ARM64_INS_MOVK,
 	"movn", // ARM64_INS_MOVN,
@ -505,10 +672,12 @@
 	"mvni", // ARM64_INS_MVNI,
 	"nand", // ARM64_INS_NAND,
 	"nands", // ARM64_INS_NANDS,
+	"nbsl", // ARM64_INS_NBSL,
 	"neg", // ARM64_INS_NEG,
 	"negs", // ARM64_INS_NEGS,
 	"ngc", // ARM64_INS_NGC,
 	"ngcs", // ARM64_INS_NGCS,
+	"nmatch", // ARM64_INS_NMATCH,
 	"nop", // ARM64_INS_NOP,
 	"nor", // ARM64_INS_NOR,
 	"nors", // ARM64_INS_NORS,
@ -539,6 +708,8 @@
 	"pmul", // ARM64_INS_PMUL,
 	"pmull", // ARM64_INS_PMULL,
 	"pmull2", // ARM64_INS_PMULL2,
+	"pmullb", // ARM64_INS_PMULLB,
+	"pmullt", // ARM64_INS_PMULLT,
 	"pnext", // ARM64_INS_PNEXT,
 	"prfb", // ARM64_INS_PRFB,
 	"prfd", // ARM64_INS_PRFD,
@ -547,6 +718,8 @@
 	"prfum", // ARM64_INS_PRFUM,
 	"prfw", // ARM64_INS_PRFW,
 	"psb", // ARM64_INS_PSB,
+	"psel", // ARM64_INS_PSEL,
+	"pssbb", // ARM64_INS_PSSBB,
 	"ptest", // ARM64_INS_PTEST,
 	"ptrue", // ARM64_INS_PTRUE,
 	"ptrues", // ARM64_INS_PTRUES,
@ -554,6 +727,8 @@
 	"punpklo", // ARM64_INS_PUNPKLO,
 	"raddhn", // ARM64_INS_RADDHN,
 	"raddhn2", // ARM64_INS_RADDHN2,
+	"raddhnb", // ARM64_INS_RADDHNB,
+	"raddhnt", // ARM64_INS_RADDHNT,
 	"rax1", // ARM64_INS_RAX1,
 	"rbit", // ARM64_INS_RBIT,
 	"rdffr", // ARM64_INS_RDFFR,
@ -567,6 +742,7 @@
 	"rev32", // ARM64_INS_REV32,
 	"rev64", // ARM64_INS_REV64,
 	"revb", // ARM64_INS_REVB,
+	"revd", // ARM64_INS_REVD,
 	"revh", // ARM64_INS_REVH,
 	"revw", // ARM64_INS_REVW,
 	"rmif", // ARM64_INS_RMIF,
@ -574,33 +750,74 @@
 	"rorv", // ARM64_INS_RORV,
 	"rshrn", // ARM64_INS_RSHRN,
 	"rshrn2", // ARM64_INS_RSHRN2,
+	"rshrnb", // ARM64_INS_RSHRNB,
+	"rshrnt", // ARM64_INS_RSHRNT,
 	"rsubhn", // ARM64_INS_RSUBHN,
 	"rsubhn2", // ARM64_INS_RSUBHN2,
+	"rsubhnb", // ARM64_INS_RSUBHNB,
+	"rsubhnt", // ARM64_INS_RSUBHNT,
 	"saba", // ARM64_INS_SABA,
 	"sabal", // ARM64_INS_SABAL,
 	"sabal2", // ARM64_INS_SABAL2,
+	"sabalb", // ARM64_INS_SABALB,
+	"sabalt", // ARM64_INS_SABALT,
 	"sabd", // ARM64_INS_SABD,
 	"sabdl", // ARM64_INS_SABDL,
 	"sabdl2", // ARM64_INS_SABDL2,
+	"sabdlb", // ARM64_INS_SABDLB,
+	"sabdlt", // ARM64_INS_SABDLT,
 	"sadalp", // ARM64_INS_SADALP,
 	"saddl", // ARM64_INS_SADDL,
 	"saddl2", // ARM64_INS_SADDL2,
+	"saddlb", // ARM64_INS_SADDLB,
+	"saddlbt", // ARM64_INS_SADDLBT,
 	"saddlp", // ARM64_INS_SADDLP,
+	"saddlt", // ARM64_INS_SADDLT,
 	"saddlv", // ARM64_INS_SADDLV,
 	"saddv", // ARM64_INS_SADDV,
 	"saddw", // ARM64_INS_SADDW,
 	"saddw2", // ARM64_INS_SADDW2,
+	"saddwb", // ARM64_INS_SADDWB,
+	"saddwt", // ARM64_INS_SADDWT,
+	"sb", // ARM64_INS_SB,
 	"sbc", // ARM64_INS_SBC,
+	"sbclb", // ARM64_INS_SBCLB,
+	"sbclt", // ARM64_INS_SBCLT,
 	"sbcs", // ARM64_INS_SBCS,
 	"sbfm", // ARM64_INS_SBFM,
+	"sclamp", // ARM64_INS_SCLAMP,
 	"scvtf", // ARM64_INS_SCVTF,
 	"sdiv", // ARM64_INS_SDIV,
 	"sdivr", // ARM64_INS_SDIVR,
 	"sdot", // ARM64_INS_SDOT,
 	"sel", // ARM64_INS_SEL,
+	"sete", // ARM64_INS_SETE,
+	"seten", // ARM64_INS_SETEN,
+	"setet", // ARM64_INS_SETET,
+	"setetn", // ARM64_INS_SETETN,
 	"setf16", // ARM64_INS_SETF16,
 	"setf8", // ARM64_INS_SETF8,
 	"setffr", // ARM64_INS_SETFFR,
+	"setge", // ARM64_INS_SETGE,
+	"setgen", // ARM64_INS_SETGEN,
+	"setget", // ARM64_INS_SETGET,
+	"setgetn", // ARM64_INS_SETGETN,
+	"setgm", // ARM64_INS_SETGM,
+	"setgmn", // ARM64_INS_SETGMN,
+	"setgmt", // ARM64_INS_SETGMT,
+	"setgmtn", // ARM64_INS_SETGMTN,
+	"setgp", // ARM64_INS_SETGP,
+	"setgpn", // ARM64_INS_SETGPN,
+	"setgpt", // ARM64_INS_SETGPT,
+	"setgptn", // ARM64_INS_SETGPTN,
+	"setm", // ARM64_INS_SETM,
+	"setmn", // ARM64_INS_SETMN,
+	"setmt", // ARM64_INS_SETMT,
+	"setmtn", // ARM64_INS_SETMTN,
+	"setp", // ARM64_INS_SETP,
+	"setpn", // ARM64_INS_SETPN,
+	"setpt", // ARM64_INS_SETPT,
+	"setptn", // ARM64_INS_SETPTN,
 	"sev", // ARM64_INS_SEV,
 	"sevl", // ARM64_INS_SEVL,
 	"sha1c", // ARM64_INS_SHA1C,
@ -623,7 +840,10 @@
 	"shll2", // ARM64_INS_SHLL2,
 	"shrn", // ARM64_INS_SHRN,
 	"shrn2", // ARM64_INS_SHRN2,
+	"shrnb", // ARM64_INS_SHRNB,
+	"shrnt", // ARM64_INS_SHRNT,
 	"shsub", // ARM64_INS_SHSUB,
+	"shsubr", // ARM64_INS_SHSUBR,
 	"sli", // ARM64_INS_SLI,
 	"sm3partw1", // ARM64_INS_SM3PARTW1,
 	"sm3partw2", // ARM64_INS_SM3PARTW2,
@ -644,17 +864,29 @@
 	"sminv", // ARM64_INS_SMINV,
 	"smlal", // ARM64_INS_SMLAL,
 	"smlal2", // ARM64_INS_SMLAL2,
+	"smlalb", // ARM64_INS_SMLALB,
+	"smlalt", // ARM64_INS_SMLALT,
 	"smlsl", // ARM64_INS_SMLSL,
 	"smlsl2", // ARM64_INS_SMLSL2,
+	"smlslb", // ARM64_INS_SMLSLB,
+	"smlslt", // ARM64_INS_SMLSLT,
+	"smmla", // ARM64_INS_SMMLA,
 	"smnegl", // ARM64_INS_SMNEGL,
+	"smopa", // ARM64_INS_SMOPA,
+	"smops", // ARM64_INS_SMOPS,
 	"smov", // ARM64_INS_SMOV,
+	"smstart", // ARM64_INS_SMSTART,
+	"smstop", // ARM64_INS_SMSTOP,
 	"smsubl", // ARM64_INS_SMSUBL,
 	"smulh", // ARM64_INS_SMULH,
 	"smull", // ARM64_INS_SMULL,
 	"smull2", // ARM64_INS_SMULL2,
+	"smullb", // ARM64_INS_SMULLB,
+	"smullt", // ARM64_INS_SMULLT,
 	"splice", // ARM64_INS_SPLICE,
 	"sqabs", // ARM64_INS_SQABS,
 	"sqadd", // ARM64_INS_SQADD,
+	"sqcadd", // ARM64_INS_SQCADD,
 	"sqdecb", // ARM64_INS_SQDECB,
 	"sqdecd", // ARM64_INS_SQDECD,
 	"sqdech", // ARM64_INS_SQDECH,
@ -662,58 +894,94 @@
 	"sqdecw", // ARM64_INS_SQDECW,
 	"sqdmlal", // ARM64_INS_SQDMLAL,
 	"sqdmlal2", // ARM64_INS_SQDMLAL2,
+	"sqdmlalb", // ARM64_INS_SQDMLALB,
+	"sqdmlalbt", // ARM64_INS_SQDMLALBT,
+	"sqdmlalt", // ARM64_INS_SQDMLALT,
 	"sqdmlsl", // ARM64_INS_SQDMLSL,
 	"sqdmlsl2", // ARM64_INS_SQDMLSL2,
+	"sqdmlslb", // ARM64_INS_SQDMLSLB,
+	"sqdmlslbt", // ARM64_INS_SQDMLSLBT,
+	"sqdmlslt", // ARM64_INS_SQDMLSLT,
 	"sqdmulh", // ARM64_INS_SQDMULH,
 	"sqdmull", // ARM64_INS_SQDMULL,
 	"sqdmull2", // ARM64_INS_SQDMULL2,
+	"sqdmullb", // ARM64_INS_SQDMULLB,
+	"sqdmullt", // ARM64_INS_SQDMULLT,
 	"sqincb", // ARM64_INS_SQINCB,
 	"sqincd", // ARM64_INS_SQINCD,
 	"sqinch", // ARM64_INS_SQINCH,
 	"sqincp", // ARM64_INS_SQINCP,
 	"sqincw", // ARM64_INS_SQINCW,
 	"sqneg", // ARM64_INS_SQNEG,
+	"sqrdcmlah", // ARM64_INS_SQRDCMLAH,
 	"sqrdmlah", // ARM64_INS_SQRDMLAH,
 	"sqrdmlsh", // ARM64_INS_SQRDMLSH,
 	"sqrdmulh", // ARM64_INS_SQRDMULH,
 	"sqrshl", // ARM64_INS_SQRSHL,
+	"sqrshlr", // ARM64_INS_SQRSHLR,
 	"sqrshrn", // ARM64_INS_SQRSHRN,
 	"sqrshrn2", // ARM64_INS_SQRSHRN2,
+	"sqrshrnb", // ARM64_INS_SQRSHRNB,
+	"sqrshrnt", // ARM64_INS_SQRSHRNT,
 	"sqrshrun", // ARM64_INS_SQRSHRUN,
 	"sqrshrun2", // ARM64_INS_SQRSHRUN2,
+	"sqrshrunb", // ARM64_INS_SQRSHRUNB,
+	"sqrshrunt", // ARM64_INS_SQRSHRUNT,
 	"sqshl", // ARM64_INS_SQSHL,
+	"sqshlr", // ARM64_INS_SQSHLR,
 	"sqshlu", // ARM64_INS_SQSHLU,
 	"sqshrn", // ARM64_INS_SQSHRN,
 	"sqshrn2", // ARM64_INS_SQSHRN2,
+	"sqshrnb", // ARM64_INS_SQSHRNB,
+	"sqshrnt", // ARM64_INS_SQSHRNT,
 	"sqshrun", // ARM64_INS_SQSHRUN,
 	"sqshrun2", // ARM64_INS_SQSHRUN2,
+	"sqshrunb", // ARM64_INS_SQSHRUNB,
+	"sqshrunt", // ARM64_INS_SQSHRUNT,
 	"sqsub", // ARM64_INS_SQSUB,
+	"sqsubr", // ARM64_INS_SQSUBR,
 	"sqxtn", // ARM64_INS_SQXTN,
 	"sqxtn2", // ARM64_INS_SQXTN2,
+	"sqxtnb", // ARM64_INS_SQXTNB,
+	"sqxtnt", // ARM64_INS_SQXTNT,
 	"sqxtun", // ARM64_INS_SQXTUN,
 	"sqxtun2", // ARM64_INS_SQXTUN2,
+	"sqxtunb", // ARM64_INS_SQXTUNB,
+	"sqxtunt", // ARM64_INS_SQXTUNT,
 	"srhadd", // ARM64_INS_SRHADD,
 	"sri", // ARM64_INS_SRI,
 	"srshl", // ARM64_INS_SRSHL,
+	"srshlr", // ARM64_INS_SRSHLR,
 	"srshr", // ARM64_INS_SRSHR,
 	"srsra", // ARM64_INS_SRSRA,
+	"ssbb", // ARM64_INS_SSBB,
 	"sshl", // ARM64_INS_SSHL,
 	"sshll", // ARM64_INS_SSHLL,
 	"sshll2", // ARM64_INS_SSHLL2,
+	"sshllb", // ARM64_INS_SSHLLB,
+	"sshllt", // ARM64_INS_SSHLLT,
 	"sshr", // ARM64_INS_SSHR,
 	"ssra", // ARM64_INS_SSRA,
 	"ssubl", // ARM64_INS_SSUBL,
 	"ssubl2", // ARM64_INS_SSUBL2,
+	"ssublb", // ARM64_INS_SSUBLB,
+	"ssublbt", // ARM64_INS_SSUBLBT,
+	"ssublt", // ARM64_INS_SSUBLT,
+	"ssubltb", // ARM64_INS_SSUBLTB,
 	"ssubw", // ARM64_INS_SSUBW,
 	"ssubw2", // ARM64_INS_SSUBW2,
+	"ssubwb", // ARM64_INS_SSUBWB,
+	"ssubwt", // ARM64_INS_SSUBWT,
 	"st1", // ARM64_INS_ST1,
 	"st1b", // ARM64_INS_ST1B,
 	"st1d", // ARM64_INS_ST1D,
 	"st1h", // ARM64_INS_ST1H,
+	"st1q", // ARM64_INS_ST1Q,
 	"st1w", // ARM64_INS_ST1W,
 	"st2", // ARM64_INS_ST2,
 	"st2b", // ARM64_INS_ST2B,
 	"st2d", // ARM64_INS_ST2D,
+	"st2g", // ARM64_INS_ST2G,
 	"st2h", // ARM64_INS_ST2H,
 	"st2w", // ARM64_INS_ST2W,
 	"st3", // ARM64_INS_ST3,
@ -726,6 +994,9 @@
 	"st4d", // ARM64_INS_ST4D,
 	"st4h", // ARM64_INS_ST4H,
 	"st4w", // ARM64_INS_ST4W,
+	"st64b", // ARM64_INS_ST64B,
+	"st64bv", // ARM64_INS_ST64BV,
+	"st64bv0", // ARM64_INS_ST64BV0,
 	"stadd", // ARM64_INS_STADD,
 	"staddb", // ARM64_INS_STADDB,
 	"staddh", // ARM64_INS_STADDH,
@ -744,6 +1015,9 @@
 	"steorl", // ARM64_INS_STEORL,
 	"steorlb", // ARM64_INS_STEORLB,
 	"steorlh", // ARM64_INS_STEORLH,
+	"stg", // ARM64_INS_STG,
+	"stgm", // ARM64_INS_STGM,
+	"stgp", // ARM64_INS_STGP,
 	"stllr", // ARM64_INS_STLLR,
 	"stllrb", // ARM64_INS_STLLRB,
 	"stllrh", // ARM64_INS_STLLRH,
@ -806,11 +1080,22 @@
 	"stxr", // ARM64_INS_STXR,
 	"stxrb", // ARM64_INS_STXRB,
 	"stxrh", // ARM64_INS_STXRH,
+	"stz2g", // ARM64_INS_STZ2G,
+	"stzg", // ARM64_INS_STZG,
+	"stzgm", // ARM64_INS_STZGM,
 	"sub", // ARM64_INS_SUB,
+	"subg", // ARM64_INS_SUBG,
 	"subhn", // ARM64_INS_SUBHN,
 	"subhn2", // ARM64_INS_SUBHN2,
+	"subhnb", // ARM64_INS_SUBHNB,
+	"subhnt", // ARM64_INS_SUBHNT,
+	"subp", // ARM64_INS_SUBP,
+	"subps", // ARM64_INS_SUBPS,
 	"subr", // ARM64_INS_SUBR,
 	"subs", // ARM64_INS_SUBS,
+	"sudot", // ARM64_INS_SUDOT,
+	"sumopa", // ARM64_INS_SUMOPA,
+	"sumops", // ARM64_INS_SUMOPS,
 	"sunpkhi", // ARM64_INS_SUNPKHI,
 	"sunpklo", // ARM64_INS_SUNPKLO,
 	"suqadd", // ARM64_INS_SUQADD,
@ -838,31 +1123,46 @@
 	"tbnz", // ARM64_INS_TBNZ,
 	"tbx", // ARM64_INS_TBX,
 	"tbz", // ARM64_INS_TBZ,
+	"tcancel", // ARM64_INS_TCANCEL,
+	"tcommit", // ARM64_INS_TCOMMIT,
 	"trn1", // ARM64_INS_TRN1,
 	"trn2", // ARM64_INS_TRN2,
 	"tsb", // ARM64_INS_TSB,
 	"tst", // ARM64_INS_TST,
+	"tstart", // ARM64_INS_TSTART,
+	"ttest", // ARM64_INS_TTEST,
 	"uaba", // ARM64_INS_UABA,
 	"uabal", // ARM64_INS_UABAL,
 	"uabal2", // ARM64_INS_UABAL2,
+	"uabalb", // ARM64_INS_UABALB,
+	"uabalt", // ARM64_INS_UABALT,
 	"uabd", // ARM64_INS_UABD,
 	"uabdl", // ARM64_INS_UABDL,
 	"uabdl2", // ARM64_INS_UABDL2,
+	"uabdlb", // ARM64_INS_UABDLB,
+	"uabdlt", // ARM64_INS_UABDLT,
 	"uadalp", // ARM64_INS_UADALP,
 	"uaddl", // ARM64_INS_UADDL,
 	"uaddl2", // ARM64_INS_UADDL2,
+	"uaddlb", // ARM64_INS_UADDLB,
 	"uaddlp", // ARM64_INS_UADDLP,
+	"uaddlt", // ARM64_INS_UADDLT,
 	"uaddlv", // ARM64_INS_UADDLV,
 	"uaddv", // ARM64_INS_UADDV,
 	"uaddw", // ARM64_INS_UADDW,
 	"uaddw2", // ARM64_INS_UADDW2,
+	"uaddwb", // ARM64_INS_UADDWB,
+	"uaddwt", // ARM64_INS_UADDWT,
 	"ubfm", // ARM64_INS_UBFM,
+	"uclamp", // ARM64_INS_UCLAMP,
 	"ucvtf", // ARM64_INS_UCVTF,
+	"udf", // ARM64_INS_UDF,
 	"udiv", // ARM64_INS_UDIV,
 	"udivr", // ARM64_INS_UDIVR,
 	"udot", // ARM64_INS_UDOT,
 	"uhadd", // ARM64_INS_UHADD,
 	"uhsub", // ARM64_INS_UHSUB,
+	"uhsubr", // ARM64_INS_UHSUBR,
 	"umaddl", // ARM64_INS_UMADDL,
 	"umax", // ARM64_INS_UMAX,
 	"umaxp", // ARM64_INS_UMAXP,
@ -872,14 +1172,23 @@
 	"uminv", // ARM64_INS_UMINV,
 	"umlal", // ARM64_INS_UMLAL,
 	"umlal2", // ARM64_INS_UMLAL2,
+	"umlalb", // ARM64_INS_UMLALB,
+	"umlalt", // ARM64_INS_UMLALT,
 	"umlsl", // ARM64_INS_UMLSL,
 	"umlsl2", // ARM64_INS_UMLSL2,
+	"umlslb", // ARM64_INS_UMLSLB,
+	"umlslt", // ARM64_INS_UMLSLT,
+	"ummla", // ARM64_INS_UMMLA,
 	"umnegl", // ARM64_INS_UMNEGL,
+	"umopa", // ARM64_INS_UMOPA,
+	"umops", // ARM64_INS_UMOPS,
 	"umov", // ARM64_INS_UMOV,
 	"umsubl", // ARM64_INS_UMSUBL,
 	"umulh", // ARM64_INS_UMULH,
 	"umull", // ARM64_INS_UMULL,
 	"umull2", // ARM64_INS_UMULL2,
+	"umullb", // ARM64_INS_UMULLB,
+	"umullt", // ARM64_INS_UMULLT,
 	"uqadd", // ARM64_INS_UQADD,
 	"uqdecb", // ARM64_INS_UQDECB,
 	"uqdecd", // ARM64_INS_UQDECD,
@ -892,30 +1201,50 @@
 	"uqincp", // ARM64_INS_UQINCP,
 	"uqincw", // ARM64_INS_UQINCW,
 	"uqrshl", // ARM64_INS_UQRSHL,
+	"uqrshlr", // ARM64_INS_UQRSHLR,
 	"uqrshrn", // ARM64_INS_UQRSHRN,
 	"uqrshrn2", // ARM64_INS_UQRSHRN2,
+	"uqrshrnb", // ARM64_INS_UQRSHRNB,
+	"uqrshrnt", // ARM64_INS_UQRSHRNT,
 	"uqshl", // ARM64_INS_UQSHL,
+	"uqshlr", // ARM64_INS_UQSHLR,
 	"uqshrn", // ARM64_INS_UQSHRN,
 	"uqshrn2", // ARM64_INS_UQSHRN2,
+	"uqshrnb", // ARM64_INS_UQSHRNB,
+	"uqshrnt", // ARM64_INS_UQSHRNT,
 	"uqsub", // ARM64_INS_UQSUB,
+	"uqsubr", // ARM64_INS_UQSUBR,
 	"uqxtn", // ARM64_INS_UQXTN,
 	"uqxtn2", // ARM64_INS_UQXTN2,
+	"uqxtnb", // ARM64_INS_UQXTNB,
+	"uqxtnt", // ARM64_INS_UQXTNT,
 	"urecpe", // ARM64_INS_URECPE,
 	"urhadd", // ARM64_INS_URHADD,
 	"urshl", // ARM64_INS_URSHL,
+	"urshlr", // ARM64_INS_URSHLR,
 	"urshr", // ARM64_INS_URSHR,
 	"ursqrte", // ARM64_INS_URSQRTE,
 	"ursra", // ARM64_INS_URSRA,
+	"usdot", // ARM64_INS_USDOT,
 	"ushl", // ARM64_INS_USHL,
 	"ushll", // ARM64_INS_USHLL,
 	"ushll2", // ARM64_INS_USHLL2,
+	"ushllb", // ARM64_INS_USHLLB,
+	"ushllt", // ARM64_INS_USHLLT,
 	"ushr", // ARM64_INS_USHR,
+	"usmmla", // ARM64_INS_USMMLA,
+	"usmopa", // ARM64_INS_USMOPA,
+	"usmops", // ARM64_INS_USMOPS,
 	"usqadd", // ARM64_INS_USQADD,
 	"usra", // ARM64_INS_USRA,
 	"usubl", // ARM64_INS_USUBL,
 	"usubl2", // ARM64_INS_USUBL2,
+	"usublb", // ARM64_INS_USUBLB,
+	"usublt", // ARM64_INS_USUBLT,
 	"usubw", // ARM64_INS_USUBW,
 	"usubw2", // ARM64_INS_USUBW2,
+	"usubwb", // ARM64_INS_USUBWB,
+	"usubwt", // ARM64_INS_USUBWT,
 	"uunpkhi", // ARM64_INS_UUNPKHI,
 	"uunpklo", // ARM64_INS_UUNPKLO,
 	"uxtb", // ARM64_INS_UXTB,
@ -926,12 +1255,21 @@
 	"uzp1", // ARM64_INS_UZP1,
 	"uzp2", // ARM64_INS_UZP2,
 	"wfe", // ARM64_INS_WFE,
+	"wfet", // ARM64_INS_WFET,
 	"wfi", // ARM64_INS_WFI,
+	"wfit", // ARM64_INS_WFIT,
+	"whilege", // ARM64_INS_WHILEGE,
+	"whilegt", // ARM64_INS_WHILEGT,
+	"whilehi", // ARM64_INS_WHILEHI,
+	"whilehs", // ARM64_INS_WHILEHS,
 	"whilele", // ARM64_INS_WHILELE,
 	"whilelo", // ARM64_INS_WHILELO,
 	"whilels", // ARM64_INS_WHILELS,
 	"whilelt", // ARM64_INS_WHILELT,
+	"whilerw", // ARM64_INS_WHILERW,
+	"whilewr", // ARM64_INS_WHILEWR,
 	"wrffr", // ARM64_INS_WRFFR,
+	"xaflag", // ARM64_INS_XAFLAG,
 	"xar", // ARM64_INS_XAR,
 	"xpacd", // ARM64_INS_XPACD,
 	"xpaci", // ARM64_INS_XPACI,
@ -939,5 +1277,6 @@
 	"xtn", // ARM64_INS_XTN,
 	"xtn2", // ARM64_INS_XTN2,
 	"yield", // ARM64_INS_YIELD,
+	"zero", // ARM64_INS_ZERO,
 	"zip1", // ARM64_INS_ZIP1,
 	"zip2", // ARM64_INS_ZIP2,
--- a/arch/AArch64/AArch64MappingInsnOp.inc
+++ b/arch/AArch64/AArch64MappingInsnOp.inc
--- a/arch/M68K/M68KDisassembler.c
+++ b/arch/M68K/M68KDisassembler.c
@ -1784,6 +1784,13 @@ static void d68020_cpbcc_16(m68k_info *info)
 	cs_m68k* ext;
 	LIMIT_CPU_TYPES(info, M68020_PLUS);

+	// FNOP is a special case of FBF
+	if (info->ir == 0xf280 && peek_imm_16(info) == 0) {
+		MCInst_setOpcode(info->inst, M68K_INS_FNOP);
+		info->pc += 2;
+		return;
+	}
+
 	// these are all in row with the extension so just doing a add here is fine
 	info->inst->Opcode += (info->ir & 0x2f);

@ -1803,8 +1810,6 @@ static void d68020_cpbcc_32(m68k_info *info)

 	LIMIT_CPU_TYPES(info, M68020_PLUS);

-	LIMIT_CPU_TYPES(info, M68020_PLUS);
-
 	// these are all in row with the extension so just doing a add here is fine
 	info->inst->Opcode += (info->ir & 0x2f);

--- a/arch/PowerPC/PPCInstPrinter.c
+++ b/arch/PowerPC/PPCInstPrinter.c
@ -1150,7 +1150,8 @@ static char *stripRegisterPrefix(const char *RegName)
 				char *name = cs_strdup(RegName + 2);

 				// also strip the last 2 letters
-				name[strlen(name) - 2] = '\0';
+				if(strlen(name) > 2)
+					name[strlen(name) - 2] = '\0';

 				return name;
 			}
--- a/bindings/python/setup.py
+++ b/bindings/python/setup.py
@ -223,7 +223,6 @@ setup(
        'Programming Language :: Python :: 2.7',
        'Programming Language :: Python :: 3',
    ],
-    requires=['ctypes'],
    cmdclass=cmdclass,
    zip_safe=True,
    include_package_data=True,
--- a/cs_priv.h
+++ b/cs_priv.h
@ -68,6 +68,7 @@ struct cs_struct {
 	cs_opt_value detail, imm_unsigned;
 	int syntax;	// asm syntax for simple printer such as ARM, Mips & PPC
 	bool doing_mem;	// handling memory operand in InstPrinter code
+	bool doing_SME_Index; // handling a SME instruction that has index
 	unsigned short *insn_cache;	// index caching for mapping.c
 	GetRegisterName_t get_regname;
 	bool skipdata;	// set this to True if we skip data when disassembling
--- a/cstool/cstool_arm64.c
+++ b/cstool/cstool_arm64.c
@ -74,6 +74,22 @@ void print_insn_detail_arm64(csh handle, cs_insn *ins)
 			case ARM64_OP_BARRIER:
 				printf("\t\toperands[%u].type: BARRIER = 0x%x\n", i, op->barrier);
 				break;
+			case ARM64_OP_SVCR:
+				printf("\t\toperands[%u].type: SYS = 0x%x\n", i, op->sys);
+				if(op->svcr == ARM64_SVCR_SVCRSM)
+					printf("\t\t\toperands[%u].svcr: BIT = SM\n", i);
+				if(op->svcr == ARM64_SVCR_SVCRZA)
+					printf("\t\t\toperands[%u].svcr: BIT = ZA\n", i);
+				if(op->svcr == ARM64_SVCR_SVCRSMZA)
+					printf("\t\t\toperands[%u].svcr: BIT = SM & ZA\n", i);
+				break;
+			case ARM64_OP_SME_INDEX:
+				printf("\t\toperands[%u].type: REG = %s\n", i, cs_reg_name(handle, op->sme_index.reg));
+				if (op->sme_index.base != ARM64_REG_INVALID)
+					printf("\t\t\toperands[%u].index.base: REG = %s\n", i, cs_reg_name(handle, op->sme_index.base));
+				if (op->sme_index.disp != 0)
+					printf("\t\t\toperands[%u].index.disp: 0x%x\n", i, op->sme_index.disp);
+				break;
 		}
 		
 		access = op->access;
--- a/include/capstone/arm64.h
+++ b/include/capstone/arm64.h
--- a/suite/MC/AArch64/basic-a64-instructions.s.cs
+++ b/suite/MC/AArch64/basic-a64-instructions.s.cs
@ -1295,7 +1295,8 @@
 // 0x7f,0xf0,0x01,0xf2 = ands xzr, x3, #0xaaaaaaaaaaaaaaaa
 // 0xff,0xf3,0x00,0xf2 = ands xzr, xzr, #0x5555555555555555
 0xe3,0x8f,0x00,0x32 = mov w3, #0xf000f
-0xea,0xf3,0x01,0xb2 = orr x10, xzr, #0xaaaaaaaaaaaaaaaa
+// 0xea,0xf3,0x01,0xb2 = orr x10, xzr, #0xaaaaaaaaaaaaaaaa
+0xea,0xf3,0x01,0xb2 = mov x10, #0xaaaaaaaaaaaaaaaa
 0xec,0x02,0x15,0x0a = and w12, w23, w21
 0xf0,0x05,0x01,0x0a = and w16, w15, w1, lsl #1
 0x89,0x7c,0x0a,0x0a = and w9, w4, w10, lsl #31
@ -1349,8 +1350,11 @@
 0x5f,0x30,0x03,0xd5 = clrex #0
 0x5f,0x37,0x03,0xd5 = clrex #7
 0x5f,0x3f,0x03,0xd5 = clrex
-0x9f,0x30,0x03,0xd5 = dsb #0
-0x9f,0x3c,0x03,0xd5 = dsb #12
+// 0x9f,0x30,0x03,0xd5 = dsb #0
+0x9f,0x30,0x03,0xd5 = ssbb
+// 0x9f,0x3c,0x03,0xd5 = dsb #12
+0x9f,0x3c,0x03,0xd5 = dfb
+0x9f,0x38,0x03,0xd5 = dsb #8
 0x9f,0x3f,0x03,0xd5 = dsb sy
 0x9f,0x31,0x03,0xd5 = dsb oshld
 0x9f,0x32,0x03,0xd5 = dsb oshst
@ -1566,7 +1570,7 @@
 0x0c,0x42,0x18,0xd5 = msr spsel, x12
 0x0c,0x42,0x1b,0xd5 = msr nzcv, x12
 0x2c,0x42,0x1b,0xd5 = msr daif, x12
-0x4c,0x42,0x18,0xd5 = msr currentel, x12
+// 0x4c,0x42,0x18,0xd5 = msr currentel, x12
 0x0c,0x43,0x1c,0xd5 = msr spsr_irq, x12
 0x2c,0x43,0x1c,0xd5 = msr spsr_abt, x12
 0x4c,0x43,0x1c,0xd5 = msr spsr_und, x12
--- a/suite/MC/AArch64/gicv3-regs.s.cs
+++ b/suite/MC/AArch64/gicv3-regs.s.cs
@ -18,7 +18,6 @@
 0xd6,0xcc,0x38,0xd5 = mrs x22, icc_igrpen0_el1
 0xe5,0xcc,0x38,0xd5 = mrs x5, icc_igrpen1_el1
 0xe7,0xcc,0x3e,0xd5 = mrs x7, icc_igrpen1_el3
-0x16,0xcd,0x38,0xd5 = mrs x22, icc_seien_el1
 0x84,0xc8,0x38,0xd5 = mrs x4, icc_ap0r0_el1
 0xab,0xc8,0x38,0xd5 = mrs x11, icc_ap0r1_el1
 0xdb,0xc8,0x38,0xd5 = mrs x27, icc_ap0r2_el1
@ -38,7 +37,6 @@
 0x0a,0xcb,0x3c,0xd5 = mrs x10, ich_hcr_el2
 0x5b,0xcb,0x3c,0xd5 = mrs x27, ich_misr_el2
 0xe6,0xcb,0x3c,0xd5 = mrs x6, ich_vmcr_el2
-0x93,0xc9,0x3c,0xd5 = mrs x19, ich_vseir_el2
 0x03,0xcc,0x3c,0xd5 = mrs x3, ich_lr0_el2
 0x21,0xcc,0x3c,0xd5 = mrs x1, ich_lr1_el2
 0x56,0xcc,0x3c,0xd5 = mrs x22, ich_lr2_el2
@ -72,7 +70,6 @@
 0xd6,0xcc,0x18,0xd5 = msr icc_igrpen0_el1, x22
 0xeb,0xcc,0x18,0xd5 = msr icc_igrpen1_el1, x11
 0xe8,0xcc,0x1e,0xd5 = msr icc_igrpen1_el3, x8
-0x04,0xcd,0x18,0xd5 = msr icc_seien_el1, x4
 0x9b,0xc8,0x18,0xd5 = msr icc_ap0r0_el1, x27
 0xa5,0xc8,0x18,0xd5 = msr icc_ap0r1_el1, x5
 0xd4,0xc8,0x18,0xd5 = msr icc_ap0r2_el1, x20
@ -90,9 +87,7 @@
 0x4e,0xc9,0x1c,0xd5 = msr ich_ap1r2_el2, x14
 0x6d,0xc9,0x1c,0xd5 = msr ich_ap1r3_el2, x13
 0x01,0xcb,0x1c,0xd5 = msr ich_hcr_el2, x1
-0x4a,0xcb,0x1c,0xd5 = msr ich_misr_el2, x10
 0xf8,0xcb,0x1c,0xd5 = msr ich_vmcr_el2, x24
-0x9d,0xc9,0x1c,0xd5 = msr ich_vseir_el2, x29
 0x1a,0xcc,0x1c,0xd5 = msr ich_lr0_el2, x26
 0x29,0xcc,0x1c,0xd5 = msr ich_lr1_el2, x9
 0x52,0xcc,0x1c,0xd5 = msr ich_lr2_el2, x18
--- a/suite/MC/AArch64/trace-regs.s.cs
+++ b/suite/MC/AArch64/trace-regs.s.cs
@ -60,7 +60,7 @@
 0x9a,0x02,0x31,0xd5 = mrs x26, trcseqevr2
 0x8e,0x06,0x31,0xd5 = mrs x14, trcseqrstevr
 0x84,0x07,0x31,0xd5 = mrs x4, trcseqstr
-0x91,0x08,0x31,0xd5 = mrs x17, trcextinselr
+0x91,0x08,0x31,0xd5 = mrs x17, trcextinselr0
 0xb5,0x00,0x31,0xd5 = mrs x21, trccntrldvr0
 0xaa,0x01,0x31,0xd5 = mrs x10, trccntrldvr1
 0xb4,0x02,0x31,0xd5 = mrs x20, trccntrldvr2
@ -234,7 +234,7 @@
 0x90,0x02,0x11,0xd5 = msr trcseqevr2, x16
 0x90,0x06,0x11,0xd5 = msr trcseqrstevr, x16
 0x99,0x07,0x11,0xd5 = msr trcseqstr, x25
-0x9d,0x08,0x11,0xd5 = msr trcextinselr, x29
+0x9d,0x08,0x11,0xd5 = msr trcextinselr0, x29
 0xb4,0x00,0x11,0xd5 = msr trccntrldvr0, x20
 0xb4,0x01,0x11,0xd5 = msr trccntrldvr1, x20
 0xb6,0x02,0x11,0xd5 = msr trccntrldvr2, x22
--- a/suite/arm/test_arm_regression.c
+++ b/suite/arm/test_arm_regression.c
@ -336,6 +336,8 @@ static void test_valids()
 					valid->platform_comment, hex_str, valid_code->start_addr, 
 					valid_code->comment, valid_code->expected_out);

+			free(hex_str);
+
 			count = cs_disasm(handle,
 					valid_code->code, valid_code->size, 
 					valid_code->start_addr, 0, &insn
--- a/suite/cstest/issues.cs
+++ b/suite/cstest/issues.cs
@ -1,3 +1,11 @@
+!# issue 1912 PPC register name
+!# CS_ARCH_PPC, CS_MODE_BIG_ENDIAN, None
+0x2d,0x03,0x00,0x80 == cmpwi cr2, r3, 0x80
+
+!# issue 1912 PPC no register name
+!# CS_ARCH_PPC, CS_MODE_BIG_ENDIAN, CS_OPT_SYNTAX_NOREGNAME
+0x2d,0x03,0x00,0x80 == cmpwi 2, 3, 0x80
+
 !# issue 1902 PPC psq_st negative displacement
 !# CS_ARCH_PPC, CS_MODE_32 | CS_MODE_BIG_ENDIAN | CS_MODE_PS, CS_OPT_DETAIL
 0xf3,0xec,0x0f,0xf8 == psq_st f31, -8(r12), 0, 0 ; op_count: 4 ; operands[0].type: REG = f31 ; operands[1].type: MEM ; operands[1].mem.base: REG = r12 ; operands[1].mem.disp: 0xfffffff8 ; operands[2].type: IMM = 0x0 ; operands[3].type: IMM = 0x0
@ -136,23 +144,23 @@

 !# issue 1856 AArch64 SYS instruction operands: tlbi 1 op
 !# CS_ARCH_ARM64, CS_MODE_ARM, CS_OPT_DETAIL
-0x1f,0x83,0x08,0xd5 == tlbi vmalle1is ; op_count: 1 ; operands[0].type: SYS = 0x3
+0x1f,0x83,0x08,0xd5 == tlbi vmalle1is ; op_count: 1 ; operands[0].type: SYS = 0x9a

 !# issue 1856 AArch64 SYS instruction operands: tlbi 2 op
 !# CS_ARCH_ARM64, CS_MODE_ARM, CS_OPT_DETAIL
-0x22,0x87,0x08,0xd5 == tlbi vae1, x2 ; op_count: 2 ; operands[0].type: SYS = 0x16
+0x22,0x87,0x08,0xd5 == tlbi vae1, x2 ; op_count: 2 ; operands[0].type: SYS = 0x75

 !# issue 1856 AArch64 SYS instruction operands: at
 !# CS_ARCH_ARM64, CS_MODE_ARM, CS_OPT_DETAIL
-0xc0,0x78,0x0c,0xd5 == at s12e0r, x0 ; op_count: 2 ; operands[0].type: SYS = 0x59
+0xc0,0x78,0x0c,0xd5 == at s12e0r, x0 ; op_count: 2 ; operands[0].type: SYS = 0xaf

 !# issue 1856 AArch64 SYS instruction operands: dc
 !# CS_ARCH_ARM64, CS_MODE_ARM, CS_OPT_DETAIL
-0x22,0x7b,0x0b,0xd5 == dc cvau, x2 ; op_count: 2 ; operands[0].type: SYS = 0x62
+0x22,0x7b,0x0b,0xd5 == dc cvau, x2 ; op_count: 2 ; operands[0].type: SYS = 0xc5

 !# issue 1856 AArch64 SYS instruction operands: ic
 !# CS_ARCH_ARM64, CS_MODE_ARM, CS_OPT_DETAIL
-0x20,0x75,0x0b,0xd5 == ic ivau, x0 ; op_count: 2 ; operands[0].type: SYS = 0x68
+0x20,0x75,0x0b,0xd5 == ic ivau, x0 ; op_count: 2 ; operands[0].type: SYS = 0xd1

 !# issue 1843 AArch64 missing VAS specifiers in aliased instructions: mov 16b
 !# CS_ARCH_ARM64, CS_MODE_ARM, CS_OPT_DETAIL
@ -201,9 +209,9 @@
 !# CS_ARCH_M68K, CS_MODE_BIG_ENDIAN | CS_MODE_M68K_040, None
 0x4E,0x7A,0x00,0x02 == movec cacr, d0

-!# issue 1653 AArch64 wrong register access read/write flags on cmp instruction
-!# CS_ARCH_ARM64, CS_MODE_ARM, CS_OPT_DETAIL
-0x3F,0x00,0x02,0xEB == cmp x1, x2 ; operands[0].access: READ
+// !# issue 1653 AArch64 wrong register access read/write flags on cmp instruction
+// !# CS_ARCH_ARM64, CS_MODE_ARM, CS_OPT_DETAIL
+// 0x3F,0x00,0x02,0xEB == cmp x1, x2 ; operands[0].access: READ

 !# issue 1643 M68K incorrect read of 32-bit imm for bsr
 !# CS_ARCH_M68K, CS_MODE_BIG_ENDIAN | CS_MODE_M68K_040 , None
--- a/suite/synctools/.gitignore
+++ b/suite/synctools/.gitignore
@ -1,4 +1,5 @@
 *.inc
+insn_list.txt

 disassemblertables_reduce2
 disassemblertables2
--- a/suite/synctools/asmwriter.py
+++ b/suite/synctools/asmwriter.py
@ -138,6 +138,21 @@ for line in lines:
            print_line("static void printInstruction(MCInst *MI, SStream *O)\n{")
        else:
            print_line("static void printInstruction(MCInst *MI, SStream *O)\n{")
+    elif 'LLVM_NO_PROFILE_INSTRUMENT_FUNCTION' in line:
+        continue
+    elif 'AArch64InstPrinter::getMnemonic' in line:
+        print_line("static uint64_t getMnemonic(MCInst *MI, SStream *O, unsigned int opcode) {")
+    elif 'return {AsmStrs+(Bits' in line:
+        tmp = line.split(',')
+        prntStr = tmp[0].split('{')[1]
+        print_line("\tSStream_concat0(O, " + prntStr + ");")
+        print_line("\treturn Bits;")
+    elif 'MnemonicInfo = getMnemonic(' in line:
+        continue
+    elif 'O << MnemonicInfo' in line:
+        continue
+    elif 'uint64_t Bits = MnemonicInfo' in line:
+        print_line("\tuint64_t Bits = getMnemonic(MI, O, opcode);")
    elif 'const char *AArch64InstPrinter::' in line:
        continue
    elif 'getRegisterName(' in line:
@ -164,6 +179,14 @@ for line in lines:
            line2 = line.replace('STI.getFeatureBits()[', 'AArch64_getFeatureBits(')
        line2 = line2.replace(']', ')')
        print_line(line2)
+    elif 'lookupBTIByEncoding' in line:
+        line = line.replace('AArch64BTIHint::', '')
+        line = line.replace('MCOp.getImm()', 'MCOperand_getImm(MCOp)')
+        print_line(line)
+    elif 'lookupPSBByEncoding' in line:
+        line = line.replace('AArch64PSBHint::', '')
+        line = line.replace('MCOp.getImm()', 'MCOperand_getImm(MCOp)')
+        print_line(line)
    elif ', STI, ' in line:
        line2 = line.replace(', STI, ', ', ')

@ -222,7 +245,7 @@ for line in lines:
                line2 = line2.replace('printLogicalImm', 'printLogicalImm32')
            elif '64' in param:
                line2 = line2.replace('printLogicalImm', 'printLogicalImm64')
-        elif 'printSVERegOp' in line2 or 'printGPRSeqPairsClassOperand' in line2 or 'printTypedVectorList' in line2 or 'printPostIncOperand' in line2 or 'printImmScale' in line2 or 'printRegWithShiftExtend' in line2 or 'printUImm12Offset' in line2 or 'printExactFPImm' in line2 or 'printMemExtend' in line2 or 'printZPRasFPR' in line2:
+        elif 'printSVERegOp' in line2 or 'printGPRSeqPairsClassOperand' in line2 or 'printTypedVectorList' in line2 or 'printPostIncOperand' in line2 or 'printImmScale' in line2 or 'printRegWithShiftExtend' in line2 or 'printUImm12Offset' in line2 or 'printExactFPImm' in line2 or 'printMemExtend' in line2 or 'printZPRasFPR' in line2 or 'printMatrixTileVector' in line2 or 'printMatrix<' in line2 or 'printSImm' in line2:
            param = extract_brackets(line2)
            if param == '':
                param = '0'
@ -234,6 +257,8 @@ for line in lines:
            bracket_content = line2[line2.index('<') + 1 : line2.index('>')]
            line2 = line2.replace('<' + bracket_content + '>', '')
            line2 = line2.replace(' O);', ' O, %s);' %bracket_content)
+        elif 'printAlignedLabel' in line2 or 'printAdrpLabel' in line2:
+            line2 = line2.replace('Address, ', '')

        print_line(line2)
    elif "static const char AsmStrs[]" in line:
@ -286,12 +311,21 @@ for line in lines:

        elif '", -1"' in line2:
            print_line('    op_addImm(MI, -1);')
+        

-        if '[' in line2:
+        if '], [' in line2 or ']!, [' in line2:
+            print_line('    set_mem_access(MI, false);')
+            print_line('    set_mem_access(MI, true);')
+        
+        elif "\"[\"" in line2:
+            # Check for SME_Index specific string of only "["
+            print_line('    set_sme_index(MI, true);')
+
+        elif '[' in line2:
            if not '[]' in line2:
                print_line('    set_mem_access(MI, true);')

-        if ']' in line2:
+        elif ']' in line2:
            if not '[]' in line2:
                print_line('    set_mem_access(MI, false);')

@ -629,12 +663,100 @@ for line in lines:
    elif 'switch (PredicateIndex) {' in line:
        print_line('  int64_t Val;')
        print_line(line)
-    elif 'unsigned I = 0;' in line and in_printAliasInstr:
+    elif 'uint32_t(' in line and in_printAliasInstr:
+        line = line.replace('uint32_t(', '')
+        line = line.replace(')', '')
+        print_line(line)
+    elif '#ifndef NDEBUG' in line and in_printAliasInstr:
        print_line("""
+  char *AsmString;
+  const size_t OpToSize = sizeof(OpToPatterns) / sizeof(PatternsForOpcode);
+
+  const unsigned opcode = MCInst_getOpcode(MI);
+
+  // Check for alias
+  int OpToIndex = 0;
+  for(int i = 0; i < OpToSize; i++){
+    if(OpToPatterns[i].Opcode == opcode){
+      OpToIndex = i;
+      break;
+    }
+  }
+  // Chech for match
+  if(opcode != OpToPatterns[OpToIndex].Opcode)
+    return NULL;
+
+  const PatternsForOpcode opToPat = OpToPatterns[OpToIndex];
+
+  // Try all patterns for this opcode
+  uint32_t AsmStrOffset = ~0U;
+  int patIdx = opToPat.PatternStart;
+  while(patIdx < (opToPat.PatternStart + opToPat.NumPatterns)){
+    // Check operand count first
+    if(MCInst_getNumOperands(MI) != Patterns[patIdx].NumOperands)
+      return NULL;
+    
+    // Test all conditions for this pattern
+    int condIdx = Patterns[patIdx].AliasCondStart;
+    int opIdx = 0;
+    bool allPass = true;
+    while(condIdx < (Patterns[patIdx].AliasCondStart + Patterns[patIdx].NumConds)){
+      MCOperand *opnd = MCInst_getOperand(MI, opIdx);
+      opIdx++;
+      // Not concerned with any Feature related conditions as STI is disregarded
+      switch (Conds[condIdx].Kind)
+      {
+      case AliasPatternCond_K_Ignore :
+        // Operand can be anything.
+        break;
+      case AliasPatternCond_K_Reg :
+        // Operand must be a specific register.
+        allPass = allPass && (MCOperand_isReg(opnd) && MCOperand_getReg(opnd) == Conds[condIdx].Value);
+        break;
+      case AliasPatternCond_K_TiedReg :
+        // Operand must match the register of another operand.
+        allPass = allPass && (MCOperand_isReg(opnd) && MCOperand_getReg(opnd) == 
+                  MCOperand_getReg(MCInst_getOperand(MI, Conds[condIdx].Value)));
+        break;
+      case AliasPatternCond_K_Imm :
+        // Operand must be a specific immediate.
+        allPass = allPass && (MCOperand_isImm(opnd) && MCOperand_getImm(opnd) == Conds[condIdx].Value);
+        break;
+      case AliasPatternCond_K_RegClass :
+        // Operand must be a register in this class. Value is a register class id.
+        allPass = allPass && (MCOperand_isReg(opnd) && GETREGCLASS_CONTAIN(Conds[condIdx].Value, (opIdx-1)));
+        break;
+      case AliasPatternCond_K_Custom :
+        // Operand must match some custom criteria.
+        allPass = allPass && AArch64InstPrinterValidateMCOperand(opnd, Conds[condIdx].Value);
+        break;
+      case AliasPatternCond_K_Feature :
+      case AliasPatternCond_K_NegFeature :
+      case AliasPatternCond_K_OrFeature :
+      case AliasPatternCond_K_OrNegFeature :
+      case AliasPatternCond_K_EndOrFeatures :
+      default :
+        break;
+      }
+      condIdx++;
+    }
+    if(allPass){
+      AsmStrOffset = Patterns[patIdx].AsmStrOffset;
+      break;
+    }
+    patIdx++;
+  }
+
+  // If no alias matched, don't print an alias.
+  if (AsmStrOffset == ~0U)
+    return NULL;
+
+  AsmString = cs_strdup(&AsmStrings[AsmStrOffset]);
+
  tmpString = cs_strdup(AsmString);

  while (AsmString[I] != ' ' && AsmString[I] != '\\t' &&
-         AsmString[I] != '$' && AsmString[I] != '\\0')
+        AsmString[I] != '$' && AsmString[I] != '\\0')
    ++I;

  tmpString[I] = 0;
@ -653,15 +775,20 @@ for line in lines:
          ++I;
          OpIdx = AsmString[I++] - 1;
          PrintMethodIdx = AsmString[I++] - 1;
-          printCustomAliasOperand(MI, OpIdx, PrintMethodIdx, OS);
+          printCustomAliasOperand(MI, 0, OpIdx, PrintMethodIdx, OS);
        } else
-            printOperand(MI, (unsigned)(AsmString[I++]) - 1, OS);
+          printOperand(MI, (unsigned)(AsmString[I++]) - 1, OS);
      } else {
-          SStream_concat1(OS, AsmString[I++]);
+        if (AsmString[I] == '[') {
+          set_mem_access(MI, true);
+        } else if (AsmString[I] == ']') {
+          set_mem_access(MI, false);
+        }
+        SStream_concat1(OS, AsmString[I++]);
      }
    } while (AsmString[I] != '\\0');
  }
-
+  cs_mem_free(AsmString);
  return tmpString;
 }
        """)
--- a/suite/synctools/disassemblertables-arch.py
+++ b/suite/synctools/disassemblertables-arch.py
@ -84,7 +84,7 @@ for line in lines:
    elif skip_print and 'static const uint8_t DecoderTable' in line2:
        skip_print = False

-    elif 'End llvm namespace' in line2:
+    elif 'end namespace llvm' in line2:
        # done
        break

@ -103,13 +103,13 @@ for line in lines:
            line2 = line2.replace('Bits[', 'AArch64_getFeatureBits(')
            line2 = line2.replace(']', ')')

-    elif 'static bool checkDecoderPredicate(unsigned Idx, const FeatureBitset& Bits) {' in line2:
+    elif 'static bool checkDecoderPredicate(unsigned Idx, const FeatureBitset &Bits) {' in line2:
        line2 = 'static bool checkDecoderPredicate(unsigned Idx, MCInst *MI)\n{'

    elif 'checkDecoderPredicate(PIdx, ' in line2:
        line2 = line2.replace(', Bits)', ', MI)')

-    elif 'template<typename InsnType>' in line2:
+    elif 'template <typename InsnType>' in line2:
        continue

    elif 'static DecodeStatus decodeToMCInst' in line2:
@ -122,6 +122,10 @@ for line in lines:
        line2 = line2.replace('fieldFromInstruction', 'fieldname')
        if 'InsnType FieldValue' in line2:
            line2 = line2.replace('InsnType ', '')
+        if 'insertBits(tmp,' in line2:
+            line2 = line2.replace('insertBits(', '')
+            tmpLn = line2.split(',')
+            line2 = tmpLn[0] + ' |=' + tmpLn[1] + ',' + tmpLn[2] + ',' + tmpLn[3] + ' <<' + tmpLn[4] + ';'

    elif 'DecodeComplete = true;' in line2:
        # dead code
@ -184,8 +188,8 @@ for line in lines:
        line2 = line2.replace('InsnType ', '')
    elif 'InsnType NegativeMask =' in line2:
        line2 = line2.replace('InsnType ', '')
-    elif 'uint32_t ExpectedValue' in line2:
-        line2 = line2.replace('uint32_t ', '')
+    elif 'InsnType ExpectedValue' in line2:
+        line2 = line2.replace('InsnType ', '')
    elif 'ptrdiff_t Loc = ' in line2:
        continue
    elif 'LLVM_DEBUG(' in line2:
@ -223,6 +227,10 @@ for line in lines:
            param = extract_brackets(line2)
            line2 = del_brackets(line2)
            line2 = line2.replace(', Decoder)', ', Decoder, %s)' %param)
+        elif 'DecodeMatrixTile<' in line2:
+            param = extract_brackets(line2)
+            line2 = del_brackets(line2)
+            line2 = line2.replace(', Decoder)', ', Decoder, %s)' %param)
        if 'DecodeComplete = false; ' in line2:
            line2 = line2.replace('DecodeComplete = false; ', '')
    elif 'decodeUImmOperand<' in line2 or 'decodeSImmOperand<' in line2 :
@ -235,6 +243,10 @@ for line in lines:
    elif 'MI = TmpMI;' in line2:
        line2 = ''
        #line2 = line2.replace('TmpMI', '&TmpMI')
+    elif 'using TmpType = std::conditional' in line2:
+        continue
+    elif 'TmpType tmp;' in line2:
+        line2 = line2.replace('TmpType', 'InsnType')

    line2 = line2.replace('::', '_')
    print_line(line2)
--- a/suite/synctools/genall-arch.sh
+++ b/suite/synctools/genall-arch.sh
@ -12,7 +12,7 @@ ARCH=$3
 echo "Generating ${ARCH}GenAsmWriter.inc"
 ./asmwriter.py $1/${ARCH}GenAsmWriter.inc ${ARCH}GenAsmWriter.inc ${ARCH}GenRegisterName.inc ${ARCH}

-echo "Generating ${ARCH}MappingInsnName.inc"
+echo "Generating ${ARCH}MappingInsnName.inc (Copy comments to include/capstone/<arch>.h for complete insn list.)"
 ./mapping_insn_name-arch.py $1/${ARCH}GenAsmMatcher.inc > ${ARCH}MappingInsnName.inc
 #./mapping_insn_name-arch.py tablegen/ARMGenAsmMatcher.inc

@ -47,6 +47,7 @@ case $3 in
  ./systemregister.py $1/${ARCH}GenSystemRegister.inc > ${ARCH}GenSystemRegister.inc
  ;;
  AArch64)
+  make arm64
  echo "Generating ${ARCH}GenSystemOperands.inc"
  ./systemoperand.py tablegen/AArch64GenSystemOperands.inc AArch64GenSystemOperands.inc AArch64GenSystemOperands_enum.inc
  echo "Generating instruction enum in insn_list.txt (for include/capstone/<arch>.h)"
@ -55,7 +56,6 @@ case $3 in
  ./arm64_gen_vreg > AArch64GenRegisterV.inc
  echo "Generating ${ARCH}MappingInsnOp.inc"
  ./mapping_insn_op-arch.py $1/${ARCH}GenAsmMatcher.inc $1/${ARCH}GenInstrInfo.inc  $2/${ARCH}MappingInsnOp.inc > ${ARCH}MappingInsnOp.inc 
-  make arm64
  ;;
  PowerPC)
  # PowerPC
--- a/suite/synctools/mapping_insn-arch.py
+++ b/suite/synctools/mapping_insn-arch.py
@ -92,7 +92,7 @@ def extract_matcher(filename):
                    if not first_insn:
                        arch, first_insn = _arch, insn_id

-                    if not insn_id in insn_id_list:
+                    if not insn_id in insn_id_list and mnem.upper() in insn_id:
                        # save this
                        insn_id_list[insn_id] = mnem

@ -175,7 +175,7 @@ lines = f.readlines()
 f.close()

 count = 0
-last_mnem = None
+last_mnem = ''

 # 1st enum is register enum
 for line in lines:
--- a/suite/synctools/registerinfo.py
+++ b/suite/synctools/registerinfo.py
@ -136,10 +136,10 @@ if arch.upper() == 'AARCH64':
        if len(line.strip()) == 0:
            continue
    
-        if line.strip() == 'enum {':
+        if line.strip() == 'enum {' or 'enum :' in line.strip():
            enum_count += 1
            if enum_count == 4:
-                print(line)
+                print('enum {')
            continue
    
        if enum_count == 4:
--- a/suite/synctools/systemoperand.py
+++ b/suite/synctools/systemoperand.py
@ -84,7 +84,7 @@ for line in lines:
    if len(line.strip()) == 0:
        continue

-    if line.strip() == 'const AT ATsList[] = {':
+    if line.strip() == 'constexpr AT ATsList[] = {':
        count += 1
        print_line('static const AT ATsList[] = {')
        continue
@ -148,7 +148,7 @@ for line in lines:
    if len(line.strip()) == 0:
        continue

-    if line.strip() == 'const DB DBsList[] = {':
+    if line.strip() == 'constexpr DB DBsList[] = {':
        count += 1
        print_line('static const DB DBsList[] = {')
        continue
@ -211,7 +211,7 @@ for line in lines:
    if len(line.strip()) == 0:
        continue

-    if line.strip() == 'const DC DCsList[] = {':
+    if line.strip() == 'constexpr DC DCsList[] = {':
        count += 1
        print_line('static const DC DCsList[] = {')
        continue
@ -276,7 +276,7 @@ for line in lines:
    if len(line.strip()) == 0:
        continue

-    if line.strip() == 'const IC ICsList[] = {':
+    if line.strip() == 'constexpr IC ICsList[] = {':
        count += 1
        print_line('static const IC ICsList[] = {')
        continue
@ -342,7 +342,7 @@ for line in lines:
    if len(line.strip()) == 0:
        continue

-    if line.strip() == 'const TLBI TLBIsList[] = {':
+    if line.strip() == 'constexpr TLBI TLBITable[] = {':
        count += 1
        print_line('static const TLBI TLBIsList[] = {')
        continue
@ -407,7 +407,7 @@ for line in lines:
    if len(line.strip()) == 0:
        continue

-    if line.strip() == 'const SVEPRFM SVEPRFMsList[] = {':
+    if line.strip() == 'constexpr SVEPRFM SVEPRFMsList[] = {':
        count += 1
        print_line('static const SVEPRFM SVEPRFMsList[] = {')
        continue
@ -473,7 +473,7 @@ for line in lines:
    if len(line.strip()) == 0:
        continue

-    if line.strip() == 'const PRFM PRFMsList[] = {':
+    if line.strip() == 'constexpr PRFM PRFMsList[] = {':
        count += 1
        print_line('static const PRFM PRFMsList[] = {')
        continue
@ -539,7 +539,7 @@ for line in lines:
    if len(line.strip()) == 0:
        continue

-    if line.strip() == 'const PSB PSBsList[] = {':
+    if line.strip() == 'constexpr PSB PSBsList[] = {':
        count += 1
        print_line('static const PSB PSBsList[] = {')
        continue
@ -570,7 +570,7 @@ for line in lines:

    if 'lookupPSBByEncoding' in line and '{' in line:
        count += 1
-        print_line('const PSB *AArch64PSBHint_lookupPSBByEncoding(uint16_t Encoding)\n{')
+        print_line('const PSB *lookupPSBByEncoding(uint16_t Encoding)\n{')
        print_line('  unsigned int i;')
        continue

@ -605,7 +605,7 @@ for line in lines:
    if len(line.strip()) == 0:
        continue

-    if line.strip() == 'const ISB ISBsList[] = {':
+    if line.strip() == 'constexpr ISB ISBsList[] = {':
        count += 1
        print_line('static const ISB ISBsList[] = {')
        continue
@ -671,7 +671,7 @@ for line in lines:
    if len(line.strip()) == 0:
        continue

-    if line.strip() == 'const TSB TSBsList[] = {':
+    if line.strip() == 'constexpr TSB TSBsList[] = {':
        count += 1
        print_line('static const TSB TSBsList[] = {')
        continue
@ -735,7 +735,7 @@ for line in lines:
    if len(line.strip()) == 0:
        continue

-    if line.strip() == 'const SysReg SysRegsList[] = {':
+    if line.strip() == 'constexpr SysReg SysRegsList[] = {':
        count += 1
        print_line('static const SysReg SysRegsList[] = {')
        continue
@ -752,7 +752,7 @@ for line in lines:
            #line = line.replace('{}', '{ 0 }')
            line = line.replace('{}', '')
            tmp = line.split(',')
-            print_line("  %s, %s, %s, %s }, // %u" %(tmp[0].lower(), tmp[1], tmp[2], tmp[3], c))
+            print_line("  %s, %s, %s, %s }, // %u" %(tmp[0].lower(), tmp[2], tmp[3], tmp[4], c))
            #print_line("  %s" %line.lower())
            c += 1

@ -800,7 +800,7 @@ for line in lines:
    if len(line.strip()) == 0:
        continue

-    if line.strip() == 'const PState PStatesList[] = {':
+    if line.strip() == 'constexpr PState PStatesList[] = {':
        count += 1
        print_line('static const PState PStatesList[] = {')
        continue
@ -865,7 +865,7 @@ for line in lines:
    if len(line.strip()) == 0:
        continue

-    if line.strip() == 'const SVEPREDPAT SVEPREDPATsList[] = {':
+    if line.strip() == 'constexpr SVEPREDPAT SVEPREDPATsList[] = {':
        count += 1
        print_line('static const SVEPREDPAT SVEPREDPATsList[] = {')
        continue
@ -921,6 +921,198 @@ print_line("""
 }
 """)

+# extract SVCRsList
+count = 0
+c = 0
+for line in lines:
+    line = line.rstrip()
+
+    if len(line.strip()) == 0:
+        continue
+
+    if line.strip() == 'constexpr SVCR SVCRsList[] = {':
+        count += 1
+        print_line('static const SVCR SVCRsList[] = {')
+        continue
+
+    line = line.strip()
+    if count == 1:
+        if line == '};':
+            # done with first enum
+            print_line('};\n')
+            break
+        else:
+            # skip pseudo instructions
+            line = line.replace('::', '_')
+            #line = line.replace('{}', '{ 0 }')
+            line = line.replace('{}', '')
+            tmp = line.split(',')
+            print_line("  %s, %s }, // %u" %(tmp[0].lower(), tmp[1], c))
+            c += 1
+
+# lookupSVCRByEncoding
+count = 0
+for line in lines:
+    line = line.rstrip()
+
+    if len(line.strip()) == 0:
+        continue
+
+    if 'lookupSVCRByEncoding' in line and '{' in line:
+        count += 1
+        print_line('const SVCR *lookupSVCRByEncoding(uint8_t Encoding)\n{')
+        print_line('  unsigned int i;')
+        continue
+
+    if count == 1 and 'IndexType Index[] = {' in line:
+        count += 1
+
+    if count == 2:
+        if line.strip() == '};':
+            # done with array, or this function?
+            print_line(line)
+            break
+        else:
+            # enum items
+            print_line(line)
+
+print_line("""
+  i = binsearch_IndexTypeEncoding(Index, ARR_SIZE(Index), Encoding);
+  if (i == -1)
+    return NULL;
+  else
+    return &SVCRsList[Index[i].index];
+}
+""")
+
+# extract BTIsList
+count = 0
+c = 0
+for line in lines:
+    line = line.rstrip()
+
+    if len(line.strip()) == 0:
+        continue
+
+    if line.strip() == 'constexpr BTI BTIsList[] = {':
+        count += 1
+        print_line('static const BTI BTIsList[] = {')
+        continue
+
+    line = line.strip()
+    if count == 1:
+        if line == '};':
+            # done with first enum
+            print_line('};\n')
+            break
+        else:
+            # skip pseudo instructions
+            line = line.replace('::', '_')
+            #line = line.replace('{}', '{ 0 }')
+            line = line.replace('{}', '')
+            tmp = line.split(',')
+            print_line("  %s, %s, // %u" %(tmp[0].lower(), tmp[1], c))
+            c += 1
+
+# lookupBTIByEncoding
+count = 0
+for line in lines:
+    line = line.rstrip()
+
+    if len(line.strip()) == 0:
+        continue
+
+    if 'lookupBTIByEncoding' in line and '{' in line:
+        count += 1
+        print_line('const BTI *lookupBTIByEncoding(uint8_t Encoding)\n{')
+        print_line('  unsigned int i;')
+        continue
+
+    if count == 1 and 'IndexType Index[] = {' in line:
+        count += 1
+
+    if count == 2:
+        if line.strip() == '};':
+            # done with array, or this function?
+            print_line(line)
+            break
+        else:
+            # enum items
+            print_line(line)
+
+print_line("""
+  i = binsearch_IndexTypeEncoding(Index, ARR_SIZE(Index), Encoding);
+  if (i == -1)
+    return NULL;
+  else
+    return &BTIsList[Index[i].index];
+}
+""")
+
+
+# extract DBnXSsList
+count = 0
+c = 0
+for line in lines:
+    line = line.rstrip()
+
+    if len(line.strip()) == 0:
+        continue
+
+    if line.strip() == 'constexpr DBnXS DBnXSsList[] = {':
+        count += 1
+        print_line('static const DBnXS DBnXSsList[] = {')
+        continue
+
+    line = line.strip()
+    if count == 1:
+        if line == '};':
+            # done with first enum
+            print_line('};\n')
+            break
+        else:
+            # skip pseudo instructions
+            line = line.replace('::', '_')
+            #line = line.replace('{}', '{ 0 }')
+            line = line.replace('{}', '')
+            tmp = line.split(',')
+            print_line("  %s, %s, %s}, // %u" %(tmp[0].lower(), tmp[1], tmp[2], c))
+            c += 1
+
+# lookupDBnXSByEncoding
+count = 0
+for line in lines:
+    line = line.rstrip()
+
+    if len(line.strip()) == 0:
+        continue
+
+    if 'lookupDBnXSByEncoding' in line and '{' in line:
+        count += 1
+        print_line('const DBnXS *lookupDBnXSByEncoding(uint8_t Encoding)\n{')
+        print_line('  unsigned int i;')
+        continue
+
+    if count == 1 and 'IndexType Index[] = {' in line:
+        count += 1
+
+    if count == 2:
+        if line.strip() == '};':
+            # done with array, or this function?
+            print_line(line)
+            break
+        else:
+            # enum items
+            print_line(line)
+
+print_line("""
+  i = binsearch_IndexTypeEncoding(Index, ARR_SIZE(Index), Encoding);
+  if (i == -1)
+    return NULL;
+  else
+    return &DBnXSsList[Index[i].index];
+}
+""")

 # extract ExactFPImmsList
 count = 0
@ -931,7 +1123,7 @@ for line in lines:
    if len(line.strip()) == 0:
        continue

-    if line.strip() == 'const ExactFPImm ExactFPImmsList[] = {':
+    if line.strip() == 'constexpr ExactFPImm ExactFPImmsList[] = {':
        count += 1
        print_line('static const ExactFPImm ExactFPImmsList[] = {')
        continue
--- a/suite/synctools/tablegen/AArch64/AArch64.td
+++ b/suite/synctools/tablegen/AArch64/AArch64.td
--- a/suite/synctools/tablegen/AArch64/AArch64CallingConvention.td
+++ b/suite/synctools/tablegen/AArch64/AArch64CallingConvention.td
@ -1,9 +1,8 @@
 //=- AArch64CallingConv.td - Calling Conventions for AArch64 -*- tablegen -*-=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@ -11,17 +10,19 @@
 //
 //===----------------------------------------------------------------------===//

-/// CCIfAlign - Match of the original alignment of the arg
-class CCIfAlign<string Align, CCAction A> :
-  CCIf<!strconcat("ArgFlags.getOrigAlign() == ", Align), A>;
 /// CCIfBigEndian - Match only if we're in big endian mode.
 class CCIfBigEndian<CCAction A> :
  CCIf<"State.getMachineFunction().getDataLayout().isBigEndian()", A>;

+class CCIfILP32<CCAction A> :
+  CCIf<"State.getMachineFunction().getDataLayout().getPointerSize() == 4", A>;
+
+
 //===----------------------------------------------------------------------===//
 // ARM AAPCS64 Calling Convention
 //===----------------------------------------------------------------------===//

+let Entry = 1 in
 def CC_AArch64_AAPCS : CallingConv<[
  CCIfType<[iPTR], CCBitConvertToType<i64>>,
  CCIfType<[v2f32], CCBitConvertToType<v2i32>>,
@ -29,13 +30,29 @@ def CC_AArch64_AAPCS : CallingConv<[

  // Big endian vectors must be passed as if they were 1-element vectors so that
  // their lanes are in a consistent order.
-  CCIfBigEndian<CCIfType<[v2i32, v2f32, v4i16, v4f16, v8i8],
+  CCIfBigEndian<CCIfType<[v2i32, v2f32, v4i16, v4f16, v4bf16, v8i8],
                         CCBitConvertToType<f64>>>,
-  CCIfBigEndian<CCIfType<[v2i64, v2f64, v4i32, v4f32, v8i16, v8f16, v16i8],
+  CCIfBigEndian<CCIfType<[v2i64, v2f64, v4i32, v4f32, v8i16, v8f16, v8bf16, v16i8],
                         CCBitConvertToType<f128>>>,

-  // An SRet is passed in X8, not X0 like a normal pointer parameter.
-  CCIfSRet<CCIfType<[i64], CCAssignToRegWithShadow<[X8], [W8]>>>,
+  // In AAPCS, an SRet is passed in X8, not X0 like a normal pointer parameter.
+  // However, on windows, in some circumstances, the SRet is passed in X0 or X1
+  // instead.  The presence of the inreg attribute indicates that SRet is
+  // passed in the alternative register (X0 or X1), not X8:
+  // - X0 for non-instance methods.
+  // - X1 for instance methods.
+
+  // The "sret" attribute identifies indirect returns.
+  // The "inreg" attribute identifies non-aggregate types.
+  // The position of the "sret" attribute identifies instance/non-instance
+  // methods.
+  // "sret" on argument 0 means non-instance methods.
+  // "sret" on argument 1 means instance methods.
+
+  CCIfInReg<CCIfType<[i64],
+    CCIfSRet<CCIfType<[i64], CCAssignToReg<[X0, X1]>>>>>,
+
+  CCIfSRet<CCIfType<[i64], CCAssignToReg<[X8]>>>,

  // Put ByVal arguments directly on the stack. Minimum size and alignment of a
  // slot is 64-bit.
@ -47,18 +64,33 @@ def CC_AArch64_AAPCS : CallingConv<[
  CCIfNest<CCAssignToReg<[X18]>>,

  // Pass SwiftSelf in a callee saved register.
-  CCIfSwiftSelf<CCIfType<[i64], CCAssignToRegWithShadow<[X20], [W20]>>>,
+  CCIfSwiftSelf<CCIfType<[i64], CCAssignToReg<[X20]>>>,

  // A SwiftError is passed in X21.
-  CCIfSwiftError<CCIfType<[i64], CCAssignToRegWithShadow<[X21], [W21]>>>,
+  CCIfSwiftError<CCIfType<[i64], CCAssignToReg<[X21]>>>,
+
+  // Pass SwiftAsync in an otherwise callee saved register so that it will be
+  // preserved for normal function calls.
+  CCIfSwiftAsync<CCIfType<[i64], CCAssignToReg<[X22]>>>,

  CCIfConsecutiveRegs<CCCustom<"CC_AArch64_Custom_Block">>,

+  CCIfType<[nxv16i8, nxv8i16, nxv4i32, nxv2i64, nxv2f16, nxv4f16, nxv8f16,
+            nxv2bf16, nxv4bf16, nxv8bf16, nxv2f32, nxv4f32, nxv2f64],
+           CCAssignToReg<[Z0, Z1, Z2, Z3, Z4, Z5, Z6, Z7]>>,
+  CCIfType<[nxv16i8, nxv8i16, nxv4i32, nxv2i64, nxv2f16, nxv4f16, nxv8f16,
+            nxv2bf16, nxv4bf16, nxv8bf16, nxv2f32, nxv4f32, nxv2f64],
+           CCPassIndirect<i64>>,
+
+  CCIfType<[nxv2i1, nxv4i1, nxv8i1, nxv16i1],
+           CCAssignToReg<[P0, P1, P2, P3]>>,
+  CCIfType<[nxv2i1, nxv4i1, nxv8i1, nxv16i1],
+           CCPassIndirect<i64>>,
+
  // Handle i1, i8, i16, i32, i64, f32, f64 and v2f64 by passing in registers,
  // up to eight each of GPR and FPR.
  CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
-  CCIfType<[i32], CCAssignToRegWithShadow<[W0, W1, W2, W3, W4, W5, W6, W7],
-                                          [X0, X1, X2, X3, X4, X5, X6, X7]>>,
+  CCIfType<[i32], CCAssignToReg<[W0, W1, W2, W3, W4, W5, W6, W7]>>,
  // i128 is split to two i64s, we can't fit half to register X7.
  CCIfType<[i64], CCIfSplit<CCAssignToRegWithShadow<[X0, X2, X4, X6],
                                                    [X0, X1, X3, X5]>>>,
@ -66,129 +98,145 @@ def CC_AArch64_AAPCS : CallingConv<[
  // i128 is split to two i64s, and its stack alignment is 16 bytes.
  CCIfType<[i64], CCIfSplit<CCAssignToStackWithShadow<8, 16, [X7]>>>,

-  CCIfType<[i64], CCAssignToRegWithShadow<[X0, X1, X2, X3, X4, X5, X6, X7],
-                                          [W0, W1, W2, W3, W4, W5, W6, W7]>>,
-  CCIfType<[f16], CCAssignToRegWithShadow<[H0, H1, H2, H3, H4, H5, H6, H7],
-                                          [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
-  CCIfType<[f32], CCAssignToRegWithShadow<[S0, S1, S2, S3, S4, S5, S6, S7],
-                                          [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
-  CCIfType<[f64], CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7],
-                                          [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
-  CCIfType<[v1i64, v2i32, v4i16, v8i8, v1f64, v2f32, v4f16],
-           CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7],
-                                   [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
-  CCIfType<[f128, v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16],
+  CCIfType<[i64], CCAssignToReg<[X0, X1, X2, X3, X4, X5, X6, X7]>>,
+  CCIfType<[f16], CCAssignToReg<[H0, H1, H2, H3, H4, H5, H6, H7]>>,
+  CCIfType<[bf16], CCAssignToReg<[H0, H1, H2, H3, H4, H5, H6, H7]>>,
+  CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7]>>,
+  CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>,
+  CCIfType<[v1i64, v2i32, v4i16, v8i8, v1f64, v2f32, v4f16, v4bf16],
+           CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>,
+  CCIfType<[f128, v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16, v8bf16],
           CCAssignToReg<[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,

  // If more than will fit in registers, pass them on the stack instead.
-  CCIfType<[i1, i8, i16, f16], CCAssignToStack<8, 8>>,
+  CCIfType<[i1, i8, i16, f16, bf16], CCAssignToStack<8, 8>>,
  CCIfType<[i32, f32], CCAssignToStack<8, 8>>,
-  CCIfType<[i64, f64, v1f64, v2f32, v1i64, v2i32, v4i16, v8i8, v4f16],
+  CCIfType<[i64, f64, v1f64, v2f32, v1i64, v2i32, v4i16, v8i8, v4f16, v4bf16],
           CCAssignToStack<8, 8>>,
-  CCIfType<[f128, v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16],
+  CCIfType<[f128, v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16, v8bf16],
           CCAssignToStack<16, 16>>
 ]>;

+let Entry = 1 in
 def RetCC_AArch64_AAPCS : CallingConv<[
  CCIfType<[iPTR], CCBitConvertToType<i64>>,
  CCIfType<[v2f32], CCBitConvertToType<v2i32>>,
  CCIfType<[v2f64, v4f32], CCBitConvertToType<v2i64>>,

-  CCIfSwiftError<CCIfType<[i64], CCAssignToRegWithShadow<[X21], [W21]>>>,
+  CCIfConsecutiveRegs<CCCustom<"CC_AArch64_Custom_Block">>,
+  CCIfSwiftError<CCIfType<[i64], CCAssignToReg<[X21]>>>,

  // Big endian vectors must be passed as if they were 1-element vectors so that
  // their lanes are in a consistent order.
-  CCIfBigEndian<CCIfType<[v2i32, v2f32, v4i16, v4f16, v8i8],
+  CCIfBigEndian<CCIfType<[v2i32, v2f32, v4i16, v4f16, v4bf16, v8i8],
                         CCBitConvertToType<f64>>>,
-  CCIfBigEndian<CCIfType<[v2i64, v2f64, v4i32, v4f32, v8i16, v8f16, v16i8],
+  CCIfBigEndian<CCIfType<[v2i64, v2f64, v4i32, v4f32, v8i16, v8f16, v8bf16, v16i8],
                         CCBitConvertToType<f128>>>,

  CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
-  CCIfType<[i32], CCAssignToRegWithShadow<[W0, W1, W2, W3, W4, W5, W6, W7],
-                                          [X0, X1, X2, X3, X4, X5, X6, X7]>>,
-  CCIfType<[i64], CCAssignToRegWithShadow<[X0, X1, X2, X3, X4, X5, X6, X7],
-                                          [W0, W1, W2, W3, W4, W5, W6, W7]>>,
-  CCIfType<[f16], CCAssignToRegWithShadow<[H0, H1, H2, H3, H4, H5, H6, H7],
-                                          [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
-  CCIfType<[f32], CCAssignToRegWithShadow<[S0, S1, S2, S3, S4, S5, S6, S7],
-                                          [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
-  CCIfType<[f64], CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7],
-                                          [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
-  CCIfType<[v1i64, v2i32, v4i16, v8i8, v1f64, v2f32, v4f16],
-      CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7],
-                              [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
-  CCIfType<[f128, v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16],
-      CCAssignToReg<[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>
+  CCIfType<[i32], CCAssignToReg<[W0, W1, W2, W3, W4, W5, W6, W7]>>,
+  CCIfType<[i64], CCAssignToReg<[X0, X1, X2, X3, X4, X5, X6, X7]>>,
+  CCIfType<[f16], CCAssignToReg<[H0, H1, H2, H3, H4, H5, H6, H7]>>,
+  CCIfType<[bf16], CCAssignToReg<[H0, H1, H2, H3, H4, H5, H6, H7]>>,
+  CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7]>>,
+  CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>,
+  CCIfType<[v1i64, v2i32, v4i16, v8i8, v1f64, v2f32, v4f16, v4bf16],
+      CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>,
+  CCIfType<[f128, v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16, v8bf16],
+      CCAssignToReg<[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
+
+  CCIfType<[nxv16i8, nxv8i16, nxv4i32, nxv2i64, nxv2f16, nxv4f16, nxv8f16,
+            nxv2bf16, nxv4bf16, nxv8bf16, nxv2f32, nxv4f32, nxv2f64],
+           CCAssignToReg<[Z0, Z1, Z2, Z3, Z4, Z5, Z6, Z7]>>,
+
+  CCIfType<[nxv2i1, nxv4i1, nxv8i1, nxv16i1],
+           CCAssignToReg<[P0, P1, P2, P3]>>
 ]>;

 // Vararg functions on windows pass floats in integer registers
+let Entry = 1 in
 def CC_AArch64_Win64_VarArg : CallingConv<[
-  CCIfType<[f16, f32],    CCPromoteToType<f64>>,
+  CCIfType<[f16, bf16], CCBitConvertToType<i16>>,
+  CCIfType<[f32], CCBitConvertToType<i32>>,
  CCIfType<[f64], CCBitConvertToType<i64>>,
  CCDelegateTo<CC_AArch64_AAPCS>
 ]>;

+// Windows Control Flow Guard checks take a single argument (the target function
+// address) and have no return value.
+let Entry = 1 in
+def CC_AArch64_Win64_CFGuard_Check : CallingConv<[
+  CCIfType<[i64], CCAssignToReg<[X15]>>
+]>;
+

 // Darwin uses a calling convention which differs in only two ways
 // from the standard one at this level:
 //     + i128s (i.e. split i64s) don't need even registers.
 //     + Stack slots are sized as needed rather than being at least 64-bit.
+let Entry = 1 in
 def CC_AArch64_DarwinPCS : CallingConv<[
  CCIfType<[iPTR], CCBitConvertToType<i64>>,
  CCIfType<[v2f32], CCBitConvertToType<v2i32>>,
  CCIfType<[v2f64, v4f32, f128], CCBitConvertToType<v2i64>>,

  // An SRet is passed in X8, not X0 like a normal pointer parameter.
-  CCIfSRet<CCIfType<[i64], CCAssignToRegWithShadow<[X8], [W8]>>>,
+  CCIfSRet<CCIfType<[i64], CCAssignToReg<[X8]>>>,

  // Put ByVal arguments directly on the stack. Minimum size and alignment of a
  // slot is 64-bit.
  CCIfByVal<CCPassByVal<8, 8>>,

  // Pass SwiftSelf in a callee saved register.
-  CCIfSwiftSelf<CCIfType<[i64], CCAssignToRegWithShadow<[X20], [W20]>>>,
+  CCIfSwiftSelf<CCIfType<[i64], CCAssignToReg<[X20]>>>,

  // A SwiftError is passed in X21.
-  CCIfSwiftError<CCIfType<[i64], CCAssignToRegWithShadow<[X21], [W21]>>>,
+  CCIfSwiftError<CCIfType<[i64], CCAssignToReg<[X21]>>>,
+
+  // Pass SwiftAsync in an otherwise callee saved register so that it will be
+  // preserved for normal function calls.
+  CCIfSwiftAsync<CCIfType<[i64], CCAssignToReg<[X22]>>>,

  CCIfConsecutiveRegs<CCCustom<"CC_AArch64_Custom_Block">>,

  // Handle i1, i8, i16, i32, i64, f32, f64 and v2f64 by passing in registers,
  // up to eight each of GPR and FPR.
  CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
-  CCIfType<[i32], CCAssignToRegWithShadow<[W0, W1, W2, W3, W4, W5, W6, W7],
-                                          [X0, X1, X2, X3, X4, X5, X6, X7]>>,
+  CCIfType<[i32], CCAssignToReg<[W0, W1, W2, W3, W4, W5, W6, W7]>>,
  // i128 is split to two i64s, we can't fit half to register X7.
  CCIfType<[i64],
-           CCIfSplit<CCAssignToRegWithShadow<[X0, X1, X2, X3, X4, X5, X6],
-                                             [W0, W1, W2, W3, W4, W5, W6]>>>,
+           CCIfSplit<CCAssignToReg<[X0, X1, X2, X3, X4, X5, X6]>>>,
  // i128 is split to two i64s, and its stack alignment is 16 bytes.
  CCIfType<[i64], CCIfSplit<CCAssignToStackWithShadow<8, 16, [X7]>>>,

-  CCIfType<[i64], CCAssignToRegWithShadow<[X0, X1, X2, X3, X4, X5, X6, X7],
-                                          [W0, W1, W2, W3, W4, W5, W6, W7]>>,
-  CCIfType<[f16], CCAssignToRegWithShadow<[H0, H1, H2, H3, H4, H5, H6, H7],
-                                          [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
-  CCIfType<[f32], CCAssignToRegWithShadow<[S0, S1, S2, S3, S4, S5, S6, S7],
-                                          [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
-  CCIfType<[f64], CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7],
-                                          [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
-  CCIfType<[v1i64, v2i32, v4i16, v8i8, v1f64, v2f32, v4f16],
-           CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7],
-                                   [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
-  CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16],
+  CCIfType<[i64], CCAssignToReg<[X0, X1, X2, X3, X4, X5, X6, X7]>>,
+  CCIfType<[f16], CCAssignToReg<[H0, H1, H2, H3, H4, H5, H6, H7]>>,
+  CCIfType<[bf16], CCAssignToReg<[H0, H1, H2, H3, H4, H5, H6, H7]>>,
+  CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7]>>,
+  CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>,
+  CCIfType<[v1i64, v2i32, v4i16, v8i8, v1f64, v2f32, v4f16, v4bf16],
+           CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>,
+  CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16, v8bf16],
           CCAssignToReg<[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,

  // If more than will fit in registers, pass them on the stack instead.
  CCIf<"ValVT == MVT::i1 || ValVT == MVT::i8", CCAssignToStack<1, 1>>,
-  CCIf<"ValVT == MVT::i16 || ValVT == MVT::f16", CCAssignToStack<2, 2>>,
+  CCIf<"ValVT == MVT::i16 || ValVT == MVT::f16 || ValVT == MVT::bf16",
+  CCAssignToStack<2, 2>>,
  CCIfType<[i32, f32], CCAssignToStack<4, 4>>,
-  CCIfType<[i64, f64, v1f64, v2f32, v1i64, v2i32, v4i16, v8i8, v4f16],
+
+  // Re-demote pointers to 32-bits so we don't end up storing 64-bit
+  // values and clobbering neighbouring stack locations. Not very pretty.
+  CCIfPtr<CCIfILP32<CCTruncToType<i32>>>,
+  CCIfPtr<CCIfILP32<CCAssignToStack<4, 4>>>,
+
+  CCIfType<[i64, f64, v1f64, v2f32, v1i64, v2i32, v4i16, v8i8, v4f16, v4bf16],
           CCAssignToStack<8, 8>>,
-  CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16],
+  CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16, v8bf16],
           CCAssignToStack<16, 16>>
 ]>;

+let Entry = 1 in
 def CC_AArch64_DarwinPCS_VarArg : CallingConv<[
  CCIfType<[iPTR], CCBitConvertToType<i64>>,
  CCIfType<[v2f32], CCBitConvertToType<v2i32>>,
@ -198,41 +246,62 @@ def CC_AArch64_DarwinPCS_VarArg : CallingConv<[

  // Handle all scalar types as either i64 or f64.
  CCIfType<[i8, i16, i32], CCPromoteToType<i64>>,
-  CCIfType<[f16, f32],     CCPromoteToType<f64>>,
+  CCIfType<[f16, bf16, f32], CCPromoteToType<f64>>,

  // Everything is on the stack.
  // i128 is split to two i64s, and its stack alignment is 16 bytes.
  CCIfType<[i64], CCIfSplit<CCAssignToStack<8, 16>>>,
-  CCIfType<[i64, f64, v1i64, v2i32, v4i16, v8i8, v1f64, v2f32, v4f16],
+  CCIfType<[i64, f64, v1i64, v2i32, v4i16, v8i8, v1f64, v2f32, v4f16, v4bf16],
           CCAssignToStack<8, 8>>,
-  CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16],
+  CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16, v8bf16],
           CCAssignToStack<16, 16>>
 ]>;

+// In the ILP32 world, the minimum stack slot size is 4 bytes. Otherwise the
+// same as the normal Darwin VarArgs handling.
+let Entry = 1 in
+def CC_AArch64_DarwinPCS_ILP32_VarArg : CallingConv<[
+  CCIfType<[v2f32], CCBitConvertToType<v2i32>>,
+  CCIfType<[v2f64, v4f32, f128], CCBitConvertToType<v2i64>>,
+
+  // Handle all scalar types as either i32 or f32.
+  CCIfType<[i8, i16], CCPromoteToType<i32>>,
+  CCIfType<[f16, bf16], CCPromoteToType<f32>>,
+
+  // Everything is on the stack.
+  // i128 is split to two i64s, and its stack alignment is 16 bytes.
+  CCIfPtr<CCIfILP32<CCTruncToType<i32>>>,
+  CCIfType<[i32, f32], CCAssignToStack<4, 4>>,
+  CCIfType<[i64], CCIfSplit<CCAssignToStack<8, 16>>>,
+  CCIfType<[i64, f64, v1i64, v2i32, v4i16, v8i8, v1f64, v2f32, v4f16, v4bf16],
+           CCAssignToStack<8, 8>>,
+  CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16, v8bf16],
+           CCAssignToStack<16, 16>>
+]>;
+
+
 // The WebKit_JS calling convention only passes the first argument (the callee)
 // in register and the remaining arguments on stack. We allow 32bit stack slots,
 // so that WebKit can write partial values in the stack and define the other
 // 32bit quantity as undef.
+let Entry = 1 in
 def CC_AArch64_WebKit_JS : CallingConv<[
  // Handle i1, i8, i16, i32, and i64 passing in register X0 (W0).
  CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
-  CCIfType<[i32], CCAssignToRegWithShadow<[W0], [X0]>>,
-  CCIfType<[i64], CCAssignToRegWithShadow<[X0], [W0]>>,
+  CCIfType<[i32], CCAssignToReg<[W0]>>,
+  CCIfType<[i64], CCAssignToReg<[X0]>>,

  // Pass the remaining arguments on the stack instead.
  CCIfType<[i32, f32], CCAssignToStack<4, 4>>,
  CCIfType<[i64, f64], CCAssignToStack<8, 8>>
 ]>;

+let Entry = 1 in
 def RetCC_AArch64_WebKit_JS : CallingConv<[
-  CCIfType<[i32], CCAssignToRegWithShadow<[W0, W1, W2, W3, W4, W5, W6, W7],
-                                          [X0, X1, X2, X3, X4, X5, X6, X7]>>,
-  CCIfType<[i64], CCAssignToRegWithShadow<[X0, X1, X2, X3, X4, X5, X6, X7],
-                                          [W0, W1, W2, W3, W4, W5, W6, W7]>>,
-  CCIfType<[f32], CCAssignToRegWithShadow<[S0, S1, S2, S3, S4, S5, S6, S7],
-                                          [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
-  CCIfType<[f64], CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7],
-                                          [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>
+  CCIfType<[i32], CCAssignToReg<[W0, W1, W2, W3, W4, W5, W6, W7]>>,
+  CCIfType<[i64], CCAssignToReg<[X0, X1, X2, X3, X4, X5, X6, X7]>>,
+  CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7]>>,
+  CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>
 ]>;

 //===----------------------------------------------------------------------===//
@ -257,6 +326,7 @@ def RetCC_AArch64_WebKit_JS : CallingConv<[
 // The AArch64 register mapping is under the heading "The ARMv8/AArch64 ABI
 // register mapping".

+let Entry = 1 in
 def CC_AArch64_GHC : CallingConv<[
  CCIfType<[iPTR], CCBitConvertToType<i64>>,

@ -275,6 +345,12 @@ def CC_AArch64_GHC : CallingConv<[
  CCIfType<[i64], CCAssignToReg<[X19, X20, X21, X22, X23, X24, X25, X26, X27, X28]>>
 ]>;

+// The order of the callee-saves in this file is important, because the
+// FrameLowering code will use this order to determine the layout the
+// callee-save area in the stack frame. As can be observed below, Darwin
+// requires the frame-record (LR, FP) to be at the top the callee-save area,
+// whereas for other platforms they are at the bottom.
+
 // FIXME: LR is only callee-saved in the sense that *we* preserve it and are
 // presumably a callee to someone. External functions may not do so, but this
 // is currently safe since BL has LR as an implicit-def and what happens after a
@ -283,11 +359,45 @@ def CC_AArch64_GHC : CallingConv<[
 // It would be better to model its preservation semantics properly (create a
 // vreg on entry, use it in RET & tail call generation; make that vreg def if we
 // end up saving LR as part of a call frame). Watch this space...
-def CSR_AArch64_AAPCS : CalleeSavedRegs<(add LR, FP, X19, X20, X21, X22,
-                                           X23, X24, X25, X26, X27, X28,
+def CSR_AArch64_AAPCS : CalleeSavedRegs<(add X19, X20, X21, X22, X23, X24,
+                                           X25, X26, X27, X28, LR, FP,
                                           D8,  D9,  D10, D11,
                                           D12, D13, D14, D15)>;

+// A variant for treating X18 as callee saved, when interfacing with
+// code that needs X18 to be preserved.
+def CSR_AArch64_AAPCS_X18 : CalleeSavedRegs<(add X18, CSR_AArch64_AAPCS)>;
+
+// Win64 has unwinding codes for an (FP,LR) pair, save_fplr and save_fplr_x.
+// We put FP before LR, so that frame lowering logic generates (FP,LR) pairs,
+// and not (LR,FP) pairs.
+def CSR_Win_AArch64_AAPCS : CalleeSavedRegs<(add X19, X20, X21, X22, X23, X24,
+                                               X25, X26, X27, X28, FP, LR,
+                                               D8, D9, D10, D11,
+                                               D12, D13, D14, D15)>;
+
+// The Control Flow Guard check call uses a custom calling convention that also
+// preserves X0-X8 and Q0-Q7.
+def CSR_Win_AArch64_CFGuard_Check : CalleeSavedRegs<(add CSR_Win_AArch64_AAPCS,
+                                               (sequence "X%u", 0, 8),
+                                               (sequence "Q%u", 0, 7))>;
+
+// AArch64 PCS for vector functions (VPCS)
+// must (additionally) preserve full Q8-Q23 registers
+def CSR_AArch64_AAVPCS : CalleeSavedRegs<(add X19, X20, X21, X22, X23, X24,
+                                          X25, X26, X27, X28, LR, FP,
+                                          (sequence "Q%u", 8, 23))>;
+
+// Functions taking SVE arguments or returning an SVE type
+// must (additionally) preserve full Z8-Z23 and predicate registers P4-P15
+def CSR_AArch64_SVE_AAPCS : CalleeSavedRegs<(add (sequence "Z%u", 8, 23),
+                                                 (sequence "P%u", 4, 15),
+                                                 X19, X20, X21, X22, X23, X24,
+                                                 X25, X26, X27, X28, LR, FP)>;
+
+def CSR_AArch64_AAPCS_SwiftTail
+    : CalleeSavedRegs<(sub CSR_AArch64_AAPCS, X20, X22)>;
+
 // Constructors and destructors return 'this' in the iOS 64-bit C++ ABI; since
 // 'this' and the pointer return value are both passed in X0 in these cases,
 // this can be partially modelled by treating X0 as a callee-saved register;
@ -301,32 +411,6 @@ def CSR_AArch64_AAPCS_ThisReturn : CalleeSavedRegs<(add CSR_AArch64_AAPCS, X0)>;
 def CSR_AArch64_AAPCS_SwiftError
    : CalleeSavedRegs<(sub CSR_AArch64_AAPCS, X21)>;

-// The function used by Darwin to obtain the address of a thread-local variable
-// guarantees more than a normal AAPCS function. x16 and x17 are used on the
-// fast path for calculation, but other registers except X0 (argument/return)
-// and LR (it is a call, after all) are preserved.
-def CSR_AArch64_TLS_Darwin
-    : CalleeSavedRegs<(add (sub (sequence "X%u", 1, 28), X16, X17),
-                           FP,
-                           (sequence "Q%u", 0, 31))>;
-
-// We can only handle a register pair with adjacent registers, the register pair
-// should belong to the same class as well. Since the access function on the
-// fast path calls a function that follows CSR_AArch64_TLS_Darwin,
-// CSR_AArch64_CXX_TLS_Darwin should be a subset of CSR_AArch64_TLS_Darwin.
-def CSR_AArch64_CXX_TLS_Darwin
-    : CalleeSavedRegs<(add CSR_AArch64_AAPCS,
-                           (sub (sequence "X%u", 1, 28), X15, X16, X17, X18),
-                           (sequence "D%u", 0, 31))>;
-
-// CSRs that are handled by prologue, epilogue.
-def CSR_AArch64_CXX_TLS_Darwin_PE
-    : CalleeSavedRegs<(add LR, FP)>;
-
-// CSRs that are handled explicitly via copies.
-def CSR_AArch64_CXX_TLS_Darwin_ViaCopy
-    : CalleeSavedRegs<(sub CSR_AArch64_CXX_TLS_Darwin, LR, FP)>;
-
 // The ELF stub used for TLS-descriptor access saves every feasible
 // register. Only X0 and LR are clobbered.
 def CSR_AArch64_TLS_ELF
@ -350,17 +434,67 @@ def CSR_AArch64_StackProbe_Windows
                           (sequence "X%u", 18, 28), FP, SP,
                           (sequence "Q%u", 0, 31))>;

+// Darwin variants of AAPCS.
+// Darwin puts the frame-record at the top of the callee-save area.
+def CSR_Darwin_AArch64_AAPCS : CalleeSavedRegs<(add LR, FP, X19, X20, X21, X22,
+                                                X23, X24, X25, X26, X27, X28,
+                                                D8,  D9,  D10, D11,
+                                                D12, D13, D14, D15)>;
+
+def CSR_Darwin_AArch64_AAVPCS : CalleeSavedRegs<(add LR, FP, X19, X20, X21,
+                                                 X22, X23, X24, X25, X26, X27,
+                                                 X28, (sequence "Q%u", 8, 23))>;
+def CSR_Darwin_AArch64_AAPCS_ThisReturn
+    : CalleeSavedRegs<(add CSR_Darwin_AArch64_AAPCS, X0)>;
+
+def CSR_Darwin_AArch64_AAPCS_SwiftError
+    : CalleeSavedRegs<(sub CSR_Darwin_AArch64_AAPCS, X21)>;
+
+def CSR_Darwin_AArch64_AAPCS_SwiftTail
+    : CalleeSavedRegs<(sub CSR_Darwin_AArch64_AAPCS, X20, X22)>;
+
+// The function used by Darwin to obtain the address of a thread-local variable
+// guarantees more than a normal AAPCS function. x16 and x17 are used on the
+// fast path for calculation, but other registers except X0 (argument/return)
+// and LR (it is a call, after all) are preserved.
+def CSR_Darwin_AArch64_TLS
+    : CalleeSavedRegs<(add (sub (sequence "X%u", 1, 28), X16, X17),
+                           FP,
+                           (sequence "Q%u", 0, 31))>;
+
+// We can only handle a register pair with adjacent registers, the register pair
+// should belong to the same class as well. Since the access function on the
+// fast path calls a function that follows CSR_Darwin_AArch64_TLS,
+// CSR_Darwin_AArch64_CXX_TLS should be a subset of CSR_Darwin_AArch64_TLS.
+def CSR_Darwin_AArch64_CXX_TLS
+    : CalleeSavedRegs<(add CSR_Darwin_AArch64_AAPCS,
+                           (sub (sequence "X%u", 1, 28), X9, X15, X16, X17, X18, X19),
+                           (sequence "D%u", 0, 31))>;
+
+// CSRs that are handled by prologue, epilogue.
+def CSR_Darwin_AArch64_CXX_TLS_PE
+    : CalleeSavedRegs<(add LR, FP)>;
+
+// CSRs that are handled explicitly via copies.
+def CSR_Darwin_AArch64_CXX_TLS_ViaCopy
+    : CalleeSavedRegs<(sub CSR_Darwin_AArch64_CXX_TLS, LR, FP)>;
+
+def CSR_Darwin_AArch64_RT_MostRegs
+    : CalleeSavedRegs<(add CSR_Darwin_AArch64_AAPCS, (sequence "X%u", 9, 15))>;
+
 // Variants of the standard calling conventions for shadow call stack.
 // These all preserve x18 in addition to any other registers.
 def CSR_AArch64_NoRegs_SCS
    : CalleeSavedRegs<(add CSR_AArch64_NoRegs, X18)>;
 def CSR_AArch64_AllRegs_SCS
    : CalleeSavedRegs<(add CSR_AArch64_AllRegs, X18)>;
-def CSR_AArch64_CXX_TLS_Darwin_SCS
-    : CalleeSavedRegs<(add CSR_AArch64_CXX_TLS_Darwin, X18)>;
 def CSR_AArch64_AAPCS_SwiftError_SCS
    : CalleeSavedRegs<(add CSR_AArch64_AAPCS_SwiftError, X18)>;
 def CSR_AArch64_RT_MostRegs_SCS
    : CalleeSavedRegs<(add CSR_AArch64_RT_MostRegs, X18)>;
+def CSR_AArch64_AAVPCS_SCS
+    : CalleeSavedRegs<(add CSR_AArch64_AAVPCS, X18)>;
+def CSR_AArch64_SVE_AAPCS_SCS
+    : CalleeSavedRegs<(add CSR_AArch64_SVE_AAPCS, X18)>;
 def CSR_AArch64_AAPCS_SCS
    : CalleeSavedRegs<(add CSR_AArch64_AAPCS, X18)>;
--- a/suite/synctools/tablegen/AArch64/AArch64Combine.td
+++ b/suite/synctools/tablegen/AArch64/AArch64Combine.td
@ -0,0 +1,233 @@
+//=- AArch64.td - Define AArch64 Combine Rules ---------------*- tablegen -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+include "llvm/Target/GlobalISel/Combine.td"
+
+def fconstant_to_constant : GICombineRule<
+  (defs root:$root),
+  (match (wip_match_opcode G_FCONSTANT):$root,
+         [{ return matchFConstantToConstant(*${root}, MRI); }]),
+  (apply [{ applyFConstantToConstant(*${root}); }])>;
+
+def icmp_redundant_trunc_matchdata : GIDefMatchData<"Register">;
+def icmp_redundant_trunc : GICombineRule<
+  (defs root:$root, icmp_redundant_trunc_matchdata:$matchinfo),
+  (match (wip_match_opcode G_ICMP):$root,
+         [{ return matchICmpRedundantTrunc(*${root}, MRI, Helper.getKnownBits(), ${matchinfo}); }]),
+  (apply [{ applyICmpRedundantTrunc(*${root}, MRI, B, Observer, ${matchinfo}); }])>;
+
+// AArch64-specific offset folding for G_GLOBAL_VALUE.
+def fold_global_offset_matchdata : GIDefMatchData<"std::pair<uint64_t, uint64_t>">;
+def fold_global_offset : GICombineRule<
+  (defs root:$root, fold_global_offset_matchdata:$matchinfo),
+  (match (wip_match_opcode G_GLOBAL_VALUE):$root,
+          [{ return matchFoldGlobalOffset(*${root}, MRI, ${matchinfo}); }]),
+  (apply [{  return applyFoldGlobalOffset(*${root}, MRI, B, Observer, ${matchinfo});}])
+>;
+
+def AArch64PreLegalizerCombinerHelper: GICombinerHelper<
+  "AArch64GenPreLegalizerCombinerHelper", [all_combines,
+                                           fconstant_to_constant,
+                                           icmp_redundant_trunc,
+                                           fold_global_offset]> {
+  let DisableRuleOption = "aarch64prelegalizercombiner-disable-rule";
+  let StateClass = "AArch64PreLegalizerCombinerHelperState";
+  let AdditionalArguments = [];
+}
+
+def AArch64O0PreLegalizerCombinerHelper: GICombinerHelper<
+  "AArch64GenO0PreLegalizerCombinerHelper", [optnone_combines]> {
+  let DisableRuleOption = "aarch64O0prelegalizercombiner-disable-rule";
+  let StateClass = "AArch64O0PreLegalizerCombinerHelperState";
+  let AdditionalArguments = [];
+}
+
+// Matchdata for combines which replace a G_SHUFFLE_VECTOR with a
+// target-specific opcode.
+def shuffle_matchdata : GIDefMatchData<"ShuffleVectorPseudo">;
+
+def rev : GICombineRule<
+  (defs root:$root, shuffle_matchdata:$matchinfo),
+  (match (wip_match_opcode G_SHUFFLE_VECTOR):$root,
+         [{ return matchREV(*${root}, MRI, ${matchinfo}); }]),
+  (apply [{ applyShuffleVectorPseudo(*${root}, ${matchinfo}); }])
+>;
+
+def zip : GICombineRule<
+  (defs root:$root, shuffle_matchdata:$matchinfo),
+  (match (wip_match_opcode G_SHUFFLE_VECTOR):$root,
+         [{ return matchZip(*${root}, MRI, ${matchinfo}); }]),
+  (apply [{ applyShuffleVectorPseudo(*${root}, ${matchinfo}); }])
+>;
+
+def uzp : GICombineRule<
+  (defs root:$root, shuffle_matchdata:$matchinfo),
+  (match (wip_match_opcode G_SHUFFLE_VECTOR):$root,
+         [{ return matchUZP(*${root}, MRI, ${matchinfo}); }]),
+  (apply [{ applyShuffleVectorPseudo(*${root}, ${matchinfo}); }])
+>;
+
+def dup: GICombineRule <
+  (defs root:$root, shuffle_matchdata:$matchinfo),
+  (match (wip_match_opcode G_SHUFFLE_VECTOR):$root,
+         [{ return matchDup(*${root}, MRI, ${matchinfo}); }]),
+  (apply [{ applyShuffleVectorPseudo(*${root}, ${matchinfo}); }])
+>;
+
+def trn : GICombineRule<
+  (defs root:$root, shuffle_matchdata:$matchinfo),
+  (match (wip_match_opcode G_SHUFFLE_VECTOR):$root,
+         [{ return matchTRN(*${root}, MRI, ${matchinfo}); }]),
+  (apply [{ applyShuffleVectorPseudo(*${root}, ${matchinfo}); }])
+>;
+
+def ext: GICombineRule <
+  (defs root:$root, shuffle_matchdata:$matchinfo),
+  (match (wip_match_opcode G_SHUFFLE_VECTOR):$root,
+         [{ return matchEXT(*${root}, MRI, ${matchinfo}); }]),
+  (apply [{ applyEXT(*${root}, ${matchinfo}); }])
+>;
+
+def shuf_to_ins_matchdata : GIDefMatchData<"std::tuple<Register, int, Register, int>">;
+def shuf_to_ins: GICombineRule <
+  (defs root:$root, shuf_to_ins_matchdata:$matchinfo),
+  (match (wip_match_opcode G_SHUFFLE_VECTOR):$root,
+         [{ return matchINS(*${root}, MRI, ${matchinfo}); }]),
+  (apply [{ return applyINS(*${root}, MRI, B, ${matchinfo}); }])
+>;
+
+def vashr_vlshr_imm_matchdata : GIDefMatchData<"int64_t">;
+def vashr_vlshr_imm : GICombineRule<
+  (defs root:$root, vashr_vlshr_imm_matchdata:$matchinfo),
+  (match (wip_match_opcode G_ASHR, G_LSHR):$root,
+          [{ return matchVAshrLshrImm(*${root}, MRI, ${matchinfo}); }]),
+  (apply [{ applyVAshrLshrImm(*${root}, MRI, ${matchinfo}); }])
+>;
+
+def form_duplane_matchdata :
+  GIDefMatchData<"std::pair<unsigned, int>">;
+def form_duplane : GICombineRule <
+  (defs root:$root, form_duplane_matchdata:$matchinfo),
+  (match (wip_match_opcode G_SHUFFLE_VECTOR):$root,
+          [{ return matchDupLane(*${root}, MRI, ${matchinfo}); }]),
+  (apply [{ applyDupLane(*${root}, MRI, B, ${matchinfo}); }])
+>;
+
+def shuffle_vector_lowering : GICombineGroup<[dup, rev, ext, zip, uzp, trn,
+                                              form_duplane,
+                                              shuf_to_ins]>;
+
+def adjust_icmp_imm_matchdata :
+  GIDefMatchData<"std::pair<uint64_t, CmpInst::Predicate>">;
+def adjust_icmp_imm : GICombineRule <
+  (defs root:$root, adjust_icmp_imm_matchdata:$matchinfo),
+  (match (wip_match_opcode G_ICMP):$root,
+          [{ return matchAdjustICmpImmAndPred(*${root}, MRI, ${matchinfo}); }]),
+  (apply [{ applyAdjustICmpImmAndPred(*${root}, ${matchinfo}, B, Observer); }])
+>;
+
+def swap_icmp_operands : GICombineRule <
+  (defs root:$root),
+  (match (wip_match_opcode G_ICMP):$root,
+          [{ return trySwapICmpOperands(*${root}, MRI); }]),
+  (apply [{ applySwapICmpOperands(*${root}, Observer); }])
+>;
+
+def icmp_lowering : GICombineGroup<[adjust_icmp_imm, swap_icmp_operands]>;
+
+def extractvecelt_pairwise_add_matchdata : GIDefMatchData<"std::tuple<unsigned, LLT, Register>">;
+def extractvecelt_pairwise_add : GICombineRule<
+  (defs root:$root, extractvecelt_pairwise_add_matchdata:$matchinfo),
+  (match (wip_match_opcode G_EXTRACT_VECTOR_ELT):$root,
+          [{ return matchExtractVecEltPairwiseAdd(*${root}, MRI, ${matchinfo}); }]),
+  (apply [{ applyExtractVecEltPairwiseAdd(*${root}, MRI, B, ${matchinfo}); }])
+>;
+
+def mul_const_matchdata : GIDefMatchData<"std::function<void(MachineIRBuilder&, Register)>">;
+def mul_const : GICombineRule<
+  (defs root:$root, mul_const_matchdata:$matchinfo),
+  (match (wip_match_opcode G_MUL):$root,
+          [{ return matchAArch64MulConstCombine(*${root}, MRI, ${matchinfo}); }]),
+  (apply [{ applyAArch64MulConstCombine(*${root}, MRI, B, ${matchinfo}); }])
+>;
+
+def build_vector_to_dup : GICombineRule<
+  (defs root:$root),
+  (match (wip_match_opcode G_BUILD_VECTOR):$root,
+          [{ return matchBuildVectorToDup(*${root}, MRI); }]),
+  (apply [{ return applyBuildVectorToDup(*${root}, MRI, B); }])
+>;
+
+def build_vector_lowering : GICombineGroup<[build_vector_to_dup]>;
+
+def lower_vector_fcmp : GICombineRule<
+  (defs root:$root),
+  (match (wip_match_opcode G_FCMP):$root,
+    [{ return lowerVectorFCMP(*${root}, MRI, B); }]),
+  (apply [{}])>;
+
+def form_truncstore_matchdata : GIDefMatchData<"Register">;
+def form_truncstore : GICombineRule<
+  (defs root:$root, form_truncstore_matchdata:$matchinfo),
+  (match (wip_match_opcode G_STORE):$root,
+          [{ return matchFormTruncstore(*${root}, MRI, ${matchinfo}); }]),
+  (apply [{ applyFormTruncstore(*${root}, MRI, B, Observer, ${matchinfo}); }])
+>;
+
+def fold_merge_to_zext : GICombineRule<
+  (defs root:$d),
+  (match (wip_match_opcode G_MERGE_VALUES):$d,
+          [{ return matchFoldMergeToZext(*${d}, MRI); }]),
+  (apply [{ applyFoldMergeToZext(*${d}, MRI, B, Observer); }])
+>;
+
+def mutate_anyext_to_zext : GICombineRule<
+  (defs root:$d),
+  (match (wip_match_opcode G_ANYEXT):$d,
+          [{ return matchMutateAnyExtToZExt(*${d}, MRI); }]),
+  (apply [{ applyMutateAnyExtToZExt(*${d}, MRI, B, Observer); }])
+>;
+
+def split_store_zero_128 : GICombineRule<
+  (defs root:$d),
+  (match (wip_match_opcode G_STORE):$d,
+          [{ return matchSplitStoreZero128(*${d}, MRI); }]),
+  (apply [{ applySplitStoreZero128(*${d}, MRI, B, Observer); }])
+>;
+
+// Post-legalization combines which should happen at all optimization levels.
+// (E.g. ones that facilitate matching for the selector) For example, matching
+// pseudos.
+def AArch64PostLegalizerLoweringHelper
+    : GICombinerHelper<"AArch64GenPostLegalizerLoweringHelper",
+                       [shuffle_vector_lowering, vashr_vlshr_imm,
+                        icmp_lowering, build_vector_lowering,
+                        lower_vector_fcmp, form_truncstore]> {
+  let DisableRuleOption = "aarch64postlegalizerlowering-disable-rule";
+}
+
+// Post-legalization combines which are primarily optimizations.
+def AArch64PostLegalizerCombinerHelper
+    : GICombinerHelper<"AArch64GenPostLegalizerCombinerHelper",
+                       [copy_prop, erase_undef_store, combines_for_extload,
+                        sext_trunc_sextload, mutate_anyext_to_zext,
+                        hoist_logic_op_with_same_opcode_hands,
+                        redundant_and, xor_of_and_with_same_reg,
+                        extractvecelt_pairwise_add, redundant_or,
+                        mul_const, redundant_sext_inreg,
+                        form_bitfield_extract, rotate_out_of_range,
+                        icmp_to_true_false_known_bits, merge_unmerge,
+                        select_combines, fold_merge_to_zext,
+                        constant_fold, identity_combines,
+                        ptr_add_immed_chain, overlapping_and,
+                        split_store_zero_128]> {
+  let DisableRuleOption = "aarch64postlegalizercombiner-disable-rule";
+}
--- a/suite/synctools/tablegen/AArch64/AArch64GenRegisterBankInfo.def
+++ b/suite/synctools/tablegen/AArch64/AArch64GenRegisterBankInfo.def
@ -0,0 +1,275 @@
+//===- AArch64GenRegisterBankInfo.def ----------------------------*- C++ -*-==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file defines all the static objects used by AArch64RegisterBankInfo.
+/// \todo This should be generated by TableGen.
+//===----------------------------------------------------------------------===//
+
+namespace llvm {
+RegisterBankInfo::PartialMapping AArch64GenRegisterBankInfo::PartMappings[]{
+    /* StartIdx, Length, RegBank */
+    // 0: FPR 16-bit value.
+    {0, 16, AArch64::FPRRegBank},
+    // 1: FPR 32-bit value.
+    {0, 32, AArch64::FPRRegBank},
+    // 2: FPR 64-bit value.
+    {0, 64, AArch64::FPRRegBank},
+    // 3: FPR 128-bit value.
+    {0, 128, AArch64::FPRRegBank},
+    // 4: FPR 256-bit value.
+    {0, 256, AArch64::FPRRegBank},
+    // 5: FPR 512-bit value.
+    {0, 512, AArch64::FPRRegBank},
+    // 6: GPR 32-bit value.
+    {0, 32, AArch64::GPRRegBank},
+    // 7: GPR 64-bit value.
+    {0, 64, AArch64::GPRRegBank},
+    // 8: GPR 128-bit value.
+    {0, 128, AArch64::GPRRegBank},
+};
+
+// ValueMappings.
+RegisterBankInfo::ValueMapping AArch64GenRegisterBankInfo::ValMappings[]{
+    /* BreakDown, NumBreakDowns */
+    // 0: invalid
+    {nullptr, 0},
+    // 3-operands instructions (all binary operations should end up with one of
+    // those mapping).
+    // 1: FPR 16-bit value. <-- This must match First3OpsIdx.
+    {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR16 - PMI_Min], 1},
+    {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR16 - PMI_Min], 1},
+    {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR16 - PMI_Min], 1},
+    // 4: FPR 32-bit value. <-- This must match First3OpsIdx.
+    {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR32 - PMI_Min], 1},
+    {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR32 - PMI_Min], 1},
+    {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR32 - PMI_Min], 1},
+    // 7: FPR 64-bit value.
+    {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR64 - PMI_Min], 1},
+    {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR64 - PMI_Min], 1},
+    {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR64 - PMI_Min], 1},
+    // 10: FPR 128-bit value.
+    {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR128 - PMI_Min], 1},
+    {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR128 - PMI_Min], 1},
+    {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR128 - PMI_Min], 1},
+    // 13: FPR 256-bit value.
+    {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR256 - PMI_Min], 1},
+    {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR256 - PMI_Min], 1},
+    {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR256 - PMI_Min], 1},
+    // 16: FPR 512-bit value.
+    {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR512 - PMI_Min], 1},
+    {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR512 - PMI_Min], 1},
+    {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR512 - PMI_Min], 1},
+    // 19: GPR 32-bit value.
+    {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR32 - PMI_Min], 1},
+    {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR32 - PMI_Min], 1},
+    {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR32 - PMI_Min], 1},
+    // 22: GPR 64-bit value.
+    {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR64 - PMI_Min], 1},
+    {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR64 - PMI_Min], 1},
+    {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR64 - PMI_Min], 1},
+    // 25: GPR 128-bit value. <-- This must match Last3OpsIdx.
+    {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR128 - PMI_Min], 1},
+    {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR128 - PMI_Min], 1},
+    {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR128 - PMI_Min], 1},
+    // Cross register bank copies.
+    // 28: FPR 16-bit value to GPR 16-bit. <-- This must match
+    //                                         FirstCrossRegCpyIdx.
+    // Note: This is the kind of copy we see with physical registers.
+    {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR16 - PMI_Min], 1},
+    {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR32 - PMI_Min], 1},
+    // 30: FPR 32-bit value to GPR 32-bit value.
+    {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR32 - PMI_Min], 1},
+    {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR32 - PMI_Min], 1},
+    // 32: FPR 64-bit value to GPR 64-bit value.
+    {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR64 - PMI_Min], 1},
+    {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR64 - PMI_Min], 1},
+    // 34: FPR 128-bit value to GPR 128-bit value (invalid)
+    {nullptr, 1},
+    {nullptr, 1},
+    // 36: FPR 256-bit value to GPR 256-bit value (invalid)
+    {nullptr, 1},
+    {nullptr, 1},
+    // 38: FPR 512-bit value to GPR 512-bit value (invalid)
+    {nullptr, 1},
+    {nullptr, 1},
+    // 40: GPR 32-bit value to FPR 32-bit value.
+    {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR32 - PMI_Min], 1},
+    {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR32 - PMI_Min], 1},
+    // 42: GPR 64-bit value to FPR 64-bit value. <-- This must match
+    //                                               LastCrossRegCpyIdx.
+    {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR64 - PMI_Min], 1},
+    {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR64 - PMI_Min], 1},
+    // 44: FPExt: 16 to 32. <-- This must match FPExt16To32Idx.
+    {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR32 - PMI_Min], 1},
+    {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR16 - PMI_Min], 1},
+    // 46: FPExt: 16 to 32. <-- This must match FPExt16To64Idx.
+    {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR64 - PMI_Min], 1},
+    {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR16 - PMI_Min], 1},
+    // 48: FPExt: 32 to 64. <-- This must match FPExt32To64Idx.
+    {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR64 - PMI_Min], 1},
+    {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR32 - PMI_Min], 1},
+    // 50: FPExt vector: 64 to 128. <-- This must match FPExt64To128Idx.
+    {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR128 - PMI_Min], 1},
+    {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR64 - PMI_Min], 1},
+    // 52: Shift scalar with 64 bit shift imm
+    {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR32 - PMI_Min], 1},
+    {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR32 - PMI_Min], 1},
+    {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR64 - PMI_Min], 1},
+};
+
+bool AArch64GenRegisterBankInfo::checkPartialMap(unsigned Idx,
+                                                 unsigned ValStartIdx,
+                                                 unsigned ValLength,
+                                                 const RegisterBank &RB) {
+  const PartialMapping &Map = PartMappings[Idx - PartialMappingIdx::PMI_Min];
+  return Map.StartIdx == ValStartIdx && Map.Length == ValLength &&
+         Map.RegBank == &RB;
+}
+
+bool AArch64GenRegisterBankInfo::checkValueMapImpl(unsigned Idx,
+                                                   unsigned FirstInBank,
+                                                   unsigned Size,
+                                                   unsigned Offset) {
+  unsigned PartialMapBaseIdx = Idx - PartialMappingIdx::PMI_Min;
+  const ValueMapping &Map =
+      AArch64GenRegisterBankInfo::getValueMapping((PartialMappingIdx)FirstInBank, Size)[Offset];
+  return Map.BreakDown == &PartMappings[PartialMapBaseIdx] &&
+         Map.NumBreakDowns == 1;
+}
+
+bool AArch64GenRegisterBankInfo::checkPartialMappingIdx(
+    PartialMappingIdx FirstAlias, PartialMappingIdx LastAlias,
+    ArrayRef<PartialMappingIdx> Order) {
+  if (Order.front() != FirstAlias)
+    return false;
+  if (Order.back() != LastAlias)
+    return false;
+  if (Order.front() > Order.back())
+    return false;
+
+  PartialMappingIdx Previous = Order.front();
+  bool First = true;
+  for (const auto &Current : Order) {
+    if (First) {
+      First = false;
+      continue;
+    }
+    if (Previous + 1 != Current)
+      return false;
+    Previous = Current;
+  }
+  return true;
+}
+
+unsigned AArch64GenRegisterBankInfo::getRegBankBaseIdxOffset(unsigned RBIdx,
+                                                             unsigned Size) {
+  if (RBIdx == PMI_FirstGPR) {
+    if (Size <= 32)
+      return 0;
+    if (Size <= 64)
+      return 1;
+    if (Size <= 128)
+      return 2;
+    return -1;
+  }
+  if (RBIdx == PMI_FirstFPR) {
+    if (Size <= 16)
+      return 0;
+    if (Size <= 32)
+      return 1;
+    if (Size <= 64)
+      return 2;
+    if (Size <= 128)
+      return 3;
+    if (Size <= 256)
+      return 4;
+    if (Size <= 512)
+      return 5;
+    return -1;
+  }
+  return -1;
+}
+
+const RegisterBankInfo::ValueMapping *
+AArch64GenRegisterBankInfo::getValueMapping(PartialMappingIdx RBIdx,
+                                            unsigned Size) {
+  assert(RBIdx != PartialMappingIdx::PMI_None && "No mapping needed for that");
+  unsigned BaseIdxOffset = getRegBankBaseIdxOffset(RBIdx, Size);
+  if (BaseIdxOffset == -1u)
+    return &ValMappings[InvalidIdx];
+
+  unsigned ValMappingIdx =
+      First3OpsIdx + (RBIdx - PartialMappingIdx::PMI_Min + BaseIdxOffset) *
+                         ValueMappingIdx::DistanceBetweenRegBanks;
+  assert(ValMappingIdx >= First3OpsIdx && ValMappingIdx <= Last3OpsIdx &&
+         "Mapping out of bound");
+
+  return &ValMappings[ValMappingIdx];
+}
+
+AArch64GenRegisterBankInfo::PartialMappingIdx
+    AArch64GenRegisterBankInfo::BankIDToCopyMapIdx[]{
+        PMI_None,     // CCR
+        PMI_FirstFPR, // FPR
+        PMI_FirstGPR, // GPR
+    };
+
+const RegisterBankInfo::ValueMapping *
+AArch64GenRegisterBankInfo::getCopyMapping(unsigned DstBankID,
+                                           unsigned SrcBankID, unsigned Size) {
+  assert(DstBankID < AArch64::NumRegisterBanks && "Invalid bank ID");
+  assert(SrcBankID < AArch64::NumRegisterBanks && "Invalid bank ID");
+  PartialMappingIdx DstRBIdx = BankIDToCopyMapIdx[DstBankID];
+  PartialMappingIdx SrcRBIdx = BankIDToCopyMapIdx[SrcBankID];
+  assert(DstRBIdx != PMI_None && "No such mapping");
+  assert(SrcRBIdx != PMI_None && "No such mapping");
+
+  if (DstRBIdx == SrcRBIdx)
+    return getValueMapping(DstRBIdx, Size);
+
+  assert(Size <= 64 && "GPR cannot handle that size");
+  unsigned ValMappingIdx =
+      FirstCrossRegCpyIdx +
+      (DstRBIdx - PMI_Min + getRegBankBaseIdxOffset(DstRBIdx, Size)) *
+          ValueMappingIdx::DistanceBetweenCrossRegCpy;
+  assert(ValMappingIdx >= FirstCrossRegCpyIdx &&
+         ValMappingIdx <= LastCrossRegCpyIdx && "Mapping out of bound");
+  return &ValMappings[ValMappingIdx];
+}
+
+const RegisterBankInfo::ValueMapping *
+AArch64GenRegisterBankInfo::getFPExtMapping(unsigned DstSize,
+                                         unsigned SrcSize) {
+  // We support:
+  // - For Scalar:
+  //   - 16 to 32.
+  //   - 16 to 64.
+  //   - 32 to 64.
+  // => FPR 16 to FPR 32|64
+  // => FPR 32 to FPR 64
+  // - For vectors:
+  //   - v4f16 to v4f32
+  //   - v2f32 to v2f64
+  // => FPR 64 to FPR 128
+
+  // Check that we have been asked sensible sizes.
+  if (SrcSize == 16) {
+    assert((DstSize == 32 || DstSize == 64) && "Unexpected half extension");
+    if (DstSize == 32)
+      return &ValMappings[FPExt16To32Idx];
+    return &ValMappings[FPExt16To64Idx];
+  }
+
+  if (SrcSize == 32) {
+    assert(DstSize == 64 && "Unexpected float extension");
+    return &ValMappings[FPExt32To64Idx];
+  }
+  assert((SrcSize == 64 || DstSize == 128) && "Unexpected vector extension");
+  return &ValMappings[FPExt64To128Idx];
+}
+} // End llvm namespace.
--- a/suite/synctools/tablegen/AArch64/AArch64InstrAtomics.td
+++ b/suite/synctools/tablegen/AArch64/AArch64InstrAtomics.td
@ -1,9 +1,8 @@
 //=- AArch64InstrAtomics.td - AArch64 Atomic codegen support -*- tablegen -*-=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@ -16,9 +15,9 @@
 //===----------------------------------
 let AddedComplexity = 15, Size = 0 in
 def CompilerBarrier : Pseudo<(outs), (ins i32imm:$ordering),
-                             [(atomic_fence imm:$ordering, 0)]>, Sched<[]>;
-def : Pat<(atomic_fence (i64 4), (imm)), (DMB (i32 0x9))>;
-def : Pat<(atomic_fence (imm), (imm)), (DMB (i32 0xb))>;
+                             [(atomic_fence timm:$ordering, 0)]>, Sched<[]>;
+def : Pat<(atomic_fence (i64 4), (timm)), (DMB (i32 0x9))>;
+def : Pat<(atomic_fence (timm), (timm)), (DMB (i32 0xb))>;

 //===----------------------------------
 // Atomic loads
@ -103,6 +102,34 @@ def : Pat<(relaxed_load<atomic_load_64>
               (am_unscaled64 GPR64sp:$Rn, simm9:$offset)),
          (LDURXi GPR64sp:$Rn, simm9:$offset)>;

+// FP 32-bit loads
+def : Pat<(f32 (bitconvert (i32 (relaxed_load<atomic_load_32> (ro_Windexed32 GPR64sp:$Rn, GPR32:$Rm,
+                                                       ro_Wextend32:$extend))))),
+          (LDRSroW GPR64sp:$Rn, GPR32:$Rm, ro_Wextend32:$extend)>;
+def : Pat<(f32 (bitconvert (i32 (relaxed_load<atomic_load_32> (ro_Xindexed32 GPR64sp:$Rn, GPR64:$Rm,
+                                                       ro_Xextend32:$extend))))),
+          (LDRSroX GPR64sp:$Rn, GPR64:$Rm, ro_Xextend32:$extend)>;
+def : Pat<(f32 (bitconvert (i32 (relaxed_load<atomic_load_32> (am_indexed32 GPR64sp:$Rn,
+                                                      uimm12s8:$offset))))),
+          (LDRSui GPR64sp:$Rn, uimm12s8:$offset)>;
+def : Pat<(f32 (bitconvert (i32 (relaxed_load<atomic_load_32>
+               (am_unscaled32 GPR64sp:$Rn, simm9:$offset))))),
+          (LDURSi GPR64sp:$Rn, simm9:$offset)>;
+
+// FP 64-bit loads
+def : Pat<(f64 (bitconvert (i64 (relaxed_load<atomic_load_64> (ro_Windexed64 GPR64sp:$Rn, GPR32:$Rm,
+                                                       ro_Wextend64:$extend))))),
+          (LDRDroW GPR64sp:$Rn, GPR32:$Rm, ro_Wextend64:$extend)>;
+def : Pat<(f64 (bitconvert (i64 (relaxed_load<atomic_load_64> (ro_Xindexed64 GPR64sp:$Rn, GPR64:$Rm,
+                                                       ro_Xextend64:$extend))))),
+          (LDRDroX GPR64sp:$Rn, GPR64:$Rm, ro_Xextend64:$extend)>;
+def : Pat<(f64 (bitconvert (i64 (relaxed_load<atomic_load_64> (am_indexed64 GPR64sp:$Rn,
+                                                      uimm12s8:$offset))))),
+          (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
+def : Pat<(f64 (bitconvert (i64 (relaxed_load<atomic_load_64>
+               (am_unscaled64 GPR64sp:$Rn, simm9:$offset))))),
+          (LDURDi GPR64sp:$Rn, simm9:$offset)>;
+
 //===----------------------------------
 // Atomic stores
 //===----------------------------------
@ -197,6 +224,38 @@ def : Pat<(relaxed_store<atomic_store_64>
               (am_unscaled64 GPR64sp:$Rn, simm9:$offset), GPR64:$val),
          (STURXi GPR64:$val, GPR64sp:$Rn, simm9:$offset)>;

+// FP 32-bit stores
+def : Pat<(relaxed_store<atomic_store_32> (ro_Windexed32 GPR64sp:$Rn, GPR32:$Rm,
+                                                         ro_Wextend32:$extend),
+                                          (i32 (bitconvert (f32 FPR32Op:$val)))),
+          (STRSroW FPR32Op:$val, GPR64sp:$Rn, GPR32:$Rm, ro_Wextend32:$extend)>;
+def : Pat<(relaxed_store<atomic_store_32> (ro_Xindexed32 GPR64sp:$Rn, GPR64:$Rm,
+                                                         ro_Xextend32:$extend),
+                                          (i32 (bitconvert (f32 FPR32Op:$val)))),
+          (STRSroX FPR32Op:$val, GPR64sp:$Rn, GPR64:$Rm, ro_Xextend32:$extend)>;
+def : Pat<(relaxed_store<atomic_store_32>
+              (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset), (i32 (bitconvert (f32 FPR32Op:$val)))),
+          (STRSui FPR32Op:$val, GPR64sp:$Rn, uimm12s4:$offset)>;
+def : Pat<(relaxed_store<atomic_store_32>
+               (am_unscaled32 GPR64sp:$Rn, simm9:$offset), (i32 (bitconvert (f32 FPR32Op:$val)))),
+          (STURSi FPR32Op:$val, GPR64sp:$Rn, simm9:$offset)>;
+
+// FP 64-bit stores
+def : Pat<(relaxed_store<atomic_store_64> (ro_Windexed64 GPR64sp:$Rn, GPR32:$Rm,
+                                                         ro_Wextend64:$extend),
+                                          (i64 (bitconvert (f64 FPR64Op:$val)))),
+          (STRDroW FPR64Op:$val, GPR64sp:$Rn, GPR32:$Rm, ro_Wextend64:$extend)>;
+def : Pat<(relaxed_store<atomic_store_64> (ro_Xindexed64 GPR64sp:$Rn, GPR64:$Rm,
+                                                         ro_Xextend64:$extend),
+                                          (i64 (bitconvert (f64 FPR64Op:$val)))),
+          (STRDroX FPR64Op:$val, GPR64sp:$Rn, GPR64:$Rm, ro_Xextend64:$extend)>;
+def : Pat<(relaxed_store<atomic_store_64>
+              (am_indexed64 GPR64sp:$Rn, uimm12s4:$offset), (i64 (bitconvert (f64 FPR64Op:$val)))),
+          (STRDui FPR64Op:$val, GPR64sp:$Rn, uimm12s4:$offset)>;
+def : Pat<(relaxed_store<atomic_store_64>
+               (am_unscaled64 GPR64sp:$Rn, simm9:$offset), (i64 (bitconvert (f64 FPR64Op:$val)))),
+          (STURDi FPR64Op:$val, GPR64sp:$Rn, simm9:$offset)>;
+
 //===----------------------------------
 // Low-level exclusive operations
 //===----------------------------------
@ -205,19 +264,27 @@ def : Pat<(relaxed_store<atomic_store_64>

 def ldxr_1 : PatFrag<(ops node:$ptr), (int_aarch64_ldxr node:$ptr), [{
  return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i8;
-}]>;
+}]> {
+  let GISelPredicateCode = [{ return isLoadStoreOfNumBytes(MI, 1); }];
+}

 def ldxr_2 : PatFrag<(ops node:$ptr), (int_aarch64_ldxr node:$ptr), [{
  return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i16;
-}]>;
+}]> {
+  let GISelPredicateCode = [{ return isLoadStoreOfNumBytes(MI, 2); }];
+}

 def ldxr_4 : PatFrag<(ops node:$ptr), (int_aarch64_ldxr node:$ptr), [{
  return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i32;
-}]>;
+}]> {
+  let GISelPredicateCode = [{ return isLoadStoreOfNumBytes(MI, 4); }];
+}

 def ldxr_8 : PatFrag<(ops node:$ptr), (int_aarch64_ldxr node:$ptr), [{
  return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i64;
-}]>;
+}]> {
+  let GISelPredicateCode = [{ return isLoadStoreOfNumBytes(MI, 8); }];
+}

 def : Pat<(ldxr_1 GPR64sp:$addr),
          (SUBREG_TO_REG (i64 0), (LDXRB GPR64sp:$addr), sub_32)>;
@ -238,19 +305,27 @@ def : Pat<(and (ldxr_4 GPR64sp:$addr), 0xffffffff),

 def ldaxr_1 : PatFrag<(ops node:$ptr), (int_aarch64_ldaxr node:$ptr), [{
  return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i8;
-}]>;
+}]> {
+  let GISelPredicateCode = [{ return isLoadStoreOfNumBytes(MI, 1); }];
+}

 def ldaxr_2 : PatFrag<(ops node:$ptr), (int_aarch64_ldaxr node:$ptr), [{
  return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i16;
-}]>;
+}]> {
+  let GISelPredicateCode = [{ return isLoadStoreOfNumBytes(MI, 2); }];
+}

 def ldaxr_4 : PatFrag<(ops node:$ptr), (int_aarch64_ldaxr node:$ptr), [{
  return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i32;
-}]>;
+}]> {
+  let GISelPredicateCode = [{ return isLoadStoreOfNumBytes(MI, 4); }];
+}

 def ldaxr_8 : PatFrag<(ops node:$ptr), (int_aarch64_ldaxr node:$ptr), [{
  return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i64;
-}]>;
+}]> {
+  let GISelPredicateCode = [{ return isLoadStoreOfNumBytes(MI, 8); }];
+}

 def : Pat<(ldaxr_1 GPR64sp:$addr),
          (SUBREG_TO_REG (i64 0), (LDAXRB GPR64sp:$addr), sub_32)>;
@ -272,22 +347,30 @@ def : Pat<(and (ldaxr_4 GPR64sp:$addr), 0xffffffff),
 def stxr_1 : PatFrag<(ops node:$val, node:$ptr),
                     (int_aarch64_stxr node:$val, node:$ptr), [{
  return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i8;
-}]>;
+}]> {
+  let GISelPredicateCode = [{ return isLoadStoreOfNumBytes(MI, 1); }];
+}

 def stxr_2 : PatFrag<(ops node:$val, node:$ptr),
                     (int_aarch64_stxr node:$val, node:$ptr), [{
  return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i16;
-}]>;
+}]> {
+  let GISelPredicateCode = [{ return isLoadStoreOfNumBytes(MI, 2); }];
+}

 def stxr_4 : PatFrag<(ops node:$val, node:$ptr),
                     (int_aarch64_stxr node:$val, node:$ptr), [{
  return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i32;
-}]>;
+}]> {
+  let GISelPredicateCode = [{ return isLoadStoreOfNumBytes(MI, 4); }];
+}

 def stxr_8 : PatFrag<(ops node:$val, node:$ptr),
                     (int_aarch64_stxr node:$val, node:$ptr), [{
  return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i64;
-}]>;
+}]> {
+  let GISelPredicateCode = [{ return isLoadStoreOfNumBytes(MI, 8); }];
+}


 def : Pat<(stxr_1 GPR64:$val, GPR64sp:$addr),
@ -318,22 +401,30 @@ def : Pat<(stxr_4 (and GPR64:$val, 0xffffffff), GPR64sp:$addr),
 def stlxr_1 : PatFrag<(ops node:$val, node:$ptr),
                     (int_aarch64_stlxr node:$val, node:$ptr), [{
  return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i8;
-}]>;
+}]> {
+  let GISelPredicateCode = [{ return isLoadStoreOfNumBytes(MI, 1); }];
+}

 def stlxr_2 : PatFrag<(ops node:$val, node:$ptr),
                     (int_aarch64_stlxr node:$val, node:$ptr), [{
  return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i16;
-}]>;
+}]> {
+  let GISelPredicateCode = [{ return isLoadStoreOfNumBytes(MI, 2); }];
+}

 def stlxr_4 : PatFrag<(ops node:$val, node:$ptr),
                     (int_aarch64_stlxr node:$val, node:$ptr), [{
  return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i32;
-}]>;
+}]> {
+  let GISelPredicateCode = [{ return isLoadStoreOfNumBytes(MI, 4); }];
+}

 def stlxr_8 : PatFrag<(ops node:$val, node:$ptr),
                     (int_aarch64_stlxr node:$val, node:$ptr), [{
  return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i64;
-}]>;
+}]> {
+  let GISelPredicateCode = [{ return isLoadStoreOfNumBytes(MI, 8); }];
+}


 def : Pat<(stlxr_1 GPR64:$val, GPR64sp:$addr),
@ -398,11 +489,16 @@ def CMP_SWAP_64 : Pseudo<(outs GPR64:$Rd, GPR32:$scratch),
 }

 let Constraints = "@earlyclobber $RdLo,@earlyclobber $RdHi,@earlyclobber $scratch",
-    mayLoad = 1, mayStore = 1 in
-def CMP_SWAP_128 : Pseudo<(outs GPR64:$RdLo, GPR64:$RdHi, GPR32:$scratch),
-                          (ins GPR64:$addr, GPR64:$desiredLo, GPR64:$desiredHi,
-                               GPR64:$newLo, GPR64:$newHi), []>,
-                   Sched<[WriteAtomic]>;
+    mayLoad = 1, mayStore = 1 in {
+class cmp_swap_128 : Pseudo<(outs GPR64:$RdLo, GPR64:$RdHi, GPR32common:$scratch),
+                           (ins GPR64:$addr, GPR64:$desiredLo, GPR64:$desiredHi,
+                                GPR64:$newLo, GPR64:$newHi), []>,
+                     Sched<[WriteAtomic]>;
+def CMP_SWAP_128 : cmp_swap_128;
+def CMP_SWAP_128_RELEASE : cmp_swap_128;
+def CMP_SWAP_128_ACQUIRE : cmp_swap_128;
+def CMP_SWAP_128_MONOTONIC : cmp_swap_128;
+}

 // v8.1 Atomic instructions:
 let Predicates = [HasLSE] in {
@ -423,4 +519,3 @@ let Predicates = [HasLSE] in {
  defm : LDOPregister_patterns_mod<"LDADD", "atomic_load_sub", "SUB">;
  defm : LDOPregister_patterns_mod<"LDCLR", "atomic_load_and", "ORN">;
 }
-
--- a/suite/synctools/tablegen/AArch64/AArch64InstrFormats.td
+++ b/suite/synctools/tablegen/AArch64/AArch64InstrFormats.td
--- a/suite/synctools/tablegen/AArch64/AArch64InstrGISel.td
+++ b/suite/synctools/tablegen/AArch64/AArch64InstrGISel.td
@ -0,0 +1,287 @@
+//=----- AArch64InstrGISel.td - AArch64 GISel target pseudos -*- tablegen -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// AArch64 GlobalISel target pseudo instruction definitions. This is kept
+// separately from the other tablegen files for organizational purposes, but
+// share the same infrastructure.
+//
+//===----------------------------------------------------------------------===//
+
+
+class AArch64GenericInstruction : GenericInstruction {
+  let Namespace = "AArch64";
+}
+
+// A pseudo to represent a relocatable add instruction as part of address
+// computation.
+def G_ADD_LOW : AArch64GenericInstruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type1:$src, type2:$imm);
+  let hasSideEffects = 0;
+}
+
+// Pseudo for a rev16 instruction. Produced post-legalization from
+// G_SHUFFLE_VECTORs with appropriate masks.
+def G_REV16 : AArch64GenericInstruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type0:$src);
+  let hasSideEffects = 0;
+}
+
+// Pseudo for a rev32 instruction. Produced post-legalization from
+// G_SHUFFLE_VECTORs with appropriate masks.
+def G_REV32 : AArch64GenericInstruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type0:$src);
+  let hasSideEffects = 0;
+}
+
+// Pseudo for a rev64 instruction. Produced post-legalization from
+// G_SHUFFLE_VECTORs with appropriate masks.
+def G_REV64 : AArch64GenericInstruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type0:$src);
+  let hasSideEffects = 0;
+}
+
+// Represents an uzp1 instruction. Produced post-legalization from
+// G_SHUFFLE_VECTORs with appropriate masks.
+def G_UZP1 : AArch64GenericInstruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type0:$v1, type0:$v2);
+  let hasSideEffects = 0;
+}
+
+// Represents an uzp2 instruction. Produced post-legalization from
+// G_SHUFFLE_VECTORs with appropriate masks.
+def G_UZP2 : AArch64GenericInstruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type0:$v1, type0:$v2);
+  let hasSideEffects = 0;
+}
+
+// Represents a zip1 instruction. Produced post-legalization from
+// G_SHUFFLE_VECTORs with appropriate masks.
+def G_ZIP1 : AArch64GenericInstruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type0:$v1, type0:$v2);
+  let hasSideEffects = 0;
+}
+
+// Represents a zip2 instruction. Produced post-legalization from
+// G_SHUFFLE_VECTORs with appropriate masks.
+def G_ZIP2 : AArch64GenericInstruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type0:$v1, type0:$v2);
+  let hasSideEffects = 0;
+}
+
+// Represents a dup instruction. Produced post-legalization from
+// G_SHUFFLE_VECTORs with appropriate masks.
+def G_DUP: AArch64GenericInstruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type1:$lane);
+  let hasSideEffects = 0;
+}
+
+// Represents a lane duplicate operation.
+def G_DUPLANE8 : AArch64GenericInstruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type0:$src, type1:$lane);
+  let hasSideEffects = 0;
+}
+def G_DUPLANE16 : AArch64GenericInstruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type0:$src, type1:$lane);
+  let hasSideEffects = 0;
+}
+def G_DUPLANE32 : AArch64GenericInstruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type0:$src, type1:$lane);
+  let hasSideEffects = 0;
+}
+def G_DUPLANE64 : AArch64GenericInstruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type0:$src, type1:$lane);
+  let hasSideEffects = 0;
+}
+
+// Represents a trn1 instruction. Produced post-legalization from
+// G_SHUFFLE_VECTORs with appropriate masks.
+def G_TRN1 : AArch64GenericInstruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type0:$v1, type0:$v2);
+  let hasSideEffects = 0;
+}
+
+// Represents a trn2 instruction. Produced post-legalization from
+// G_SHUFFLE_VECTORs with appropriate masks.
+def G_TRN2 : AArch64GenericInstruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type0:$v1, type0:$v2);
+  let hasSideEffects = 0;
+}
+
+// Represents an ext instruction. Produced post-legalization from
+// G_SHUFFLE_VECTORs with appropriate masks.
+def G_EXT: AArch64GenericInstruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type0:$v1, type0:$v2, untyped_imm_0:$imm);
+  let hasSideEffects = 0;
+}
+
+// Represents a vector G_ASHR with an immediate.
+def G_VASHR : AArch64GenericInstruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type0:$src1, untyped_imm_0:$imm);
+  let hasSideEffects = 0;
+}
+
+// Represents a vector G_LSHR with an immediate.
+def G_VLSHR : AArch64GenericInstruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type0:$src1, untyped_imm_0:$imm);
+  let hasSideEffects = 0;
+}
+
+// Represents an integer to FP conversion on the FPR bank.
+def G_SITOF : AArch64GenericInstruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type0:$src);
+  let hasSideEffects = 0;
+}
+def G_UITOF : AArch64GenericInstruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type0:$src);
+  let hasSideEffects = 0;
+}
+
+def G_FCMEQ : AArch64GenericInstruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type0:$src1, type1:$src2);
+  let hasSideEffects = 0;
+}
+
+def G_FCMGE : AArch64GenericInstruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type0:$src1, type1:$src2);
+  let hasSideEffects = 0;
+}
+
+def G_FCMGT : AArch64GenericInstruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type0:$src1, type1:$src2);
+  let hasSideEffects = 0;
+}
+
+def G_FCMEQZ : AArch64GenericInstruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type0:$src);
+  let hasSideEffects = 0;
+}
+
+def G_FCMGEZ : AArch64GenericInstruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type0:$src);
+  let hasSideEffects = 0;
+}
+
+def G_FCMGTZ : AArch64GenericInstruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type0:$src);
+  let hasSideEffects = 0;
+}
+
+def G_FCMLEZ : AArch64GenericInstruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type0:$src);
+  let hasSideEffects = 0;
+}
+
+def G_FCMLTZ : AArch64GenericInstruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type0:$src);
+  let hasSideEffects = 0;
+}
+
+def : GINodeEquiv<G_REV16, AArch64rev16>;
+def : GINodeEquiv<G_REV32, AArch64rev32>;
+def : GINodeEquiv<G_REV64, AArch64rev64>;
+def : GINodeEquiv<G_UZP1, AArch64uzp1>;
+def : GINodeEquiv<G_UZP2, AArch64uzp2>;
+def : GINodeEquiv<G_ZIP1, AArch64zip1>;
+def : GINodeEquiv<G_ZIP2, AArch64zip2>;
+def : GINodeEquiv<G_DUP, AArch64dup>;
+def : GINodeEquiv<G_DUPLANE8, AArch64duplane8>;
+def : GINodeEquiv<G_DUPLANE16, AArch64duplane16>;
+def : GINodeEquiv<G_DUPLANE32, AArch64duplane32>;
+def : GINodeEquiv<G_DUPLANE64, AArch64duplane64>;
+def : GINodeEquiv<G_TRN1, AArch64trn1>;
+def : GINodeEquiv<G_TRN2, AArch64trn2>;
+def : GINodeEquiv<G_EXT, AArch64ext>;
+def : GINodeEquiv<G_VASHR, AArch64vashr>;
+def : GINodeEquiv<G_VLSHR, AArch64vlshr>;
+def : GINodeEquiv<G_SITOF, AArch64sitof>;
+def : GINodeEquiv<G_UITOF, AArch64uitof>;
+
+def : GINodeEquiv<G_FCMEQ, AArch64fcmeq>;
+def : GINodeEquiv<G_FCMGE, AArch64fcmge>;
+def : GINodeEquiv<G_FCMGT, AArch64fcmgt>;
+
+def : GINodeEquiv<G_FCMEQZ, AArch64fcmeqz>;
+def : GINodeEquiv<G_FCMGEZ, AArch64fcmgez>;
+def : GINodeEquiv<G_FCMGTZ, AArch64fcmgtz>;
+def : GINodeEquiv<G_FCMLEZ, AArch64fcmlez>;
+def : GINodeEquiv<G_FCMLTZ, AArch64fcmltz>;
+
+def : GINodeEquiv<G_EXTRACT_VECTOR_ELT, vector_extract>;
+
+// These are patterns that we only use for GlobalISel via the importer.
+def : Pat<(f32 (fadd (vector_extract (v2f32 FPR64:$Rn), (i64 0)),
+                     (vector_extract (v2f32 FPR64:$Rn), (i64 1)))),
+           (f32 (FADDPv2i32p (v2f32 FPR64:$Rn)))>;
+
+let Predicates = [HasNEON] in {
+  def : Pat<(v2f64 (sint_to_fp v2i32:$src)),
+            (SCVTFv2f64 (SSHLLv2i32_shift V64:$src, 0))>;
+  def : Pat<(v2f64 (uint_to_fp v2i32:$src)),
+            (UCVTFv2f64 (USHLLv2i32_shift V64:$src, 0))>;
+  def : Pat<(v2f32 (sint_to_fp v2i64:$src)),
+            (FCVTNv2i32 (SCVTFv2f64 V128:$src))>;
+  def : Pat<(v2f32 (uint_to_fp v2i64:$src)),
+            (FCVTNv2i32 (UCVTFv2f64 V128:$src))>;
+
+  def : Pat<(v2i64 (fp_to_sint v2f32:$src)),
+            (FCVTZSv2f64 (FCVTLv2i32 V64:$src))>;
+  def : Pat<(v2i64 (fp_to_uint v2f32:$src)),
+            (FCVTZUv2f64 (FCVTLv2i32 V64:$src))>;
+  def : Pat<(v2i32 (fp_to_sint v2f64:$src)),
+            (XTNv2i32 (FCVTZSv2f64 V128:$src))>;
+  def : Pat<(v2i32 (fp_to_uint v2f64:$src)),
+            (XTNv2i32 (FCVTZUv2f64 V128:$src))>;
+
+}
+
+let Predicates = [HasNoLSE] in {
+def : Pat<(atomic_cmp_swap_8 GPR64:$addr, GPR32:$desired, GPR32:$new),
+          (CMP_SWAP_8 GPR64:$addr, GPR32:$desired, GPR32:$new)>;
+
+def : Pat<(atomic_cmp_swap_16 GPR64:$addr, GPR32:$desired, GPR32:$new),
+          (CMP_SWAP_16 GPR64:$addr, GPR32:$desired, GPR32:$new)>;
+
+def : Pat<(atomic_cmp_swap_32 GPR64:$addr, GPR32:$desired, GPR32:$new),
+          (CMP_SWAP_32 GPR64:$addr, GPR32:$desired, GPR32:$new)>;
+
+def : Pat<(atomic_cmp_swap_64 GPR64:$addr, GPR64:$desired, GPR64:$new),
+          (CMP_SWAP_64 GPR64:$addr, GPR64:$desired, GPR64:$new)>;
+}
+
+def : Pat<(int_aarch64_stlxp GPR64:$lo, GPR64:$hi, GPR64:$addr),
+          (STLXPX GPR64:$lo, GPR64:$hi, GPR64:$addr)>;
+def : Pat<(int_aarch64_stxp GPR64:$lo, GPR64:$hi, GPR64:$addr),
+          (STXPX GPR64:$lo, GPR64:$hi, GPR64:$addr)>;
--- a/suite/synctools/tablegen/AArch64/AArch64InstrInfo.td
+++ b/suite/synctools/tablegen/AArch64/AArch64InstrInfo.td
--- a/suite/synctools/tablegen/AArch64/AArch64PfmCounters.td
+++ b/suite/synctools/tablegen/AArch64/AArch64PfmCounters.td
@ -0,0 +1,18 @@
+//===-- AArch64PfmCounters.td - AArch64 Hardware Counters --*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This describes the available hardware counters for AArch64.
+//
+//===----------------------------------------------------------------------===//
+
+def CpuCyclesPfmCounter : PfmCounter<"CPU_CYCLES">;
+
+def DefaultPfmCounters : ProcPfmCounters {
+  let CycleCounter = CpuCyclesPfmCounter;
+}
+def : PfmCountersDefaultBinding<DefaultPfmCounters>;
--- a/suite/synctools/tablegen/AArch64/AArch64RegisterBanks.td
+++ b/suite/synctools/tablegen/AArch64/AArch64RegisterBanks.td
@ -1,9 +1,8 @@
 //=- AArch64RegisterBank.td - Describe the AArch64 Banks -----*- tablegen -*-=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@ -11,7 +10,7 @@
 //===----------------------------------------------------------------------===//

 /// General Purpose Registers: W, X.
-def GPRRegBank : RegisterBank<"GPR", [GPR64all]>;
+def GPRRegBank : RegisterBank<"GPR", [XSeqPairsClass]>;

 /// Floating Point/Vector Registers: B, H, S, D, Q.
 def FPRRegBank : RegisterBank<"FPR", [QQQQ]>;
--- a/suite/synctools/tablegen/AArch64/AArch64RegisterInfo.td
+++ b/suite/synctools/tablegen/AArch64/AArch64RegisterInfo.td
@ -1,9 +1,8 @@
 //=- AArch64RegisterInfo.td - Describe the AArch64 Registers -*- tablegen -*-=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@ -25,11 +24,9 @@ let Namespace = "AArch64" in {
  def bsub : SubRegIndex<8>;
  def hsub : SubRegIndex<16>;
  def ssub : SubRegIndex<32>;
-  def dsub : SubRegIndex<32>;
+  def dsub : SubRegIndex<64>;
  def sube32 : SubRegIndex<32>;
  def subo32 : SubRegIndex<32>;
-  def qhisub : SubRegIndex<64>;
-  def qsub : SubRegIndex<64>;
  def sube64 : SubRegIndex<64>;
  def subo64 : SubRegIndex<64>;
  // SVE
@ -48,6 +45,16 @@ let Namespace = "AArch64" in {
  def qsub1 : SubRegIndex<128>;
  def qsub2 : SubRegIndex<128>;
  def qsub3 : SubRegIndex<128>;
+  // Note: Code depends on these having consecutive numbers
+  def zasubb  : SubRegIndex<2048>; // (16 x 16)/1 bytes  = 2048 bits
+  def zasubh0 : SubRegIndex<1024>; // (16 x 16)/2 bytes  = 1024 bits
+  def zasubh1 : SubRegIndex<1024>; // (16 x 16)/2 bytes  = 1024 bits
+  def zasubs0 : SubRegIndex<512>;  // (16 x 16)/4 bytes  = 512 bits
+  def zasubs1 : SubRegIndex<512>;  // (16 x 16)/4 bytes  = 512 bits
+  def zasubd0 : SubRegIndex<256>;  // (16 x 16)/8 bytes  = 256 bits
+  def zasubd1 : SubRegIndex<256>;  // (16 x 16)/8 bytes  = 256 bits
+  def zasubq0 : SubRegIndex<128>;  // (16 x 16)/16 bytes = 128 bits
+  def zasubq1 : SubRegIndex<128>;  // (16 x 16)/16 bytes = 128 bits
 }

 let Namespace = "AArch64" in {
@ -134,6 +141,9 @@ def NZCV  : AArch64Reg<0, "nzcv">;
 // First fault status register
 def FFR : AArch64Reg<0, "ffr">, DwarfRegNum<[47]>;

+// Purely virtual Vector Granule (VG) Dwarf register
+def VG : AArch64Reg<0, "vg">, DwarfRegNum<[46]>;
+
 // GPR register classes with the intersections of GPR32/GPR32sp and
 // GPR64/GPR64sp for use by the coalescer.
 def GPR32common : RegisterClass<"AArch64", [i32], 32, (sequence "W%u", 0, 30)> {
@ -188,6 +198,10 @@ def GPR64z : RegisterOperand<GPR64> {
  let GIZeroRegister = XZR;
 }

+// GPR argument registers.
+def GPR32arg : RegisterClass<"AArch64", [i32], 32, (sequence "W%u", 0, 7)>;
+def GPR64arg : RegisterClass<"AArch64", [i64], 64, (sequence "X%u", 0, 7)>;
+
 // GPR register classes which include WZR/XZR AND SP/WSP. This is not a
 // constraint used by any instructions, it is used as a common super-class.
 def GPR32all : RegisterClass<"AArch64", [i32], 32, (add GPR32common, WZR, WSP)>;
@ -200,6 +214,17 @@ def tcGPR64 : RegisterClass<"AArch64", [i64], 64, (sub GPR64common, X19, X20, X2
                                                     X22, X23, X24, X25, X26,
                                                     X27, X28, FP, LR)>;

+// Restricted set of tail call registers, for use when branch target
+// enforcement is enabled. These are the only registers which can be used to
+// indirectly branch (not call) to the "BTI c" instruction at the start of a
+// BTI-protected function.
+def rtcGPR64 : RegisterClass<"AArch64", [i64], 64, (add X16, X17)>;
+
+// Register set that excludes registers that are reserved for procedure calls.
+// This is used for pseudo-instructions that are actually implemented using a
+// procedure call.
+def GPR64noip : RegisterClass<"AArch64", [i64], 64, (sub GPR64, X16, X17, LR)>;
+
 // GPR register classes for post increment amount of vector load/store that
 // has alternate printing when Rm=31 and prints a constant immediate value
 // equal to the total number of bytes transferred.
@ -408,25 +433,35 @@ def Q31   : AArch64Reg<31, "q31", [D31], ["v31", ""]>, DwarfRegAlias<B31>;
 def FPR8  : RegisterClass<"AArch64", [untyped], 8, (sequence "B%u", 0, 31)> {
  let Size = 8;
 }
-def FPR16 : RegisterClass<"AArch64", [f16], 16, (sequence "H%u", 0, 31)> {
+def FPR16 : RegisterClass<"AArch64", [f16, bf16], 16, (sequence "H%u", 0, 31)> {
+  let Size = 16;
+}
+
+def FPR16_lo : RegisterClass<"AArch64", [f16], 16, (trunc FPR16, 16)> {
  let Size = 16;
 }
 def FPR32 : RegisterClass<"AArch64", [f32, i32], 32,(sequence "S%u", 0, 31)>;
 def FPR64 : RegisterClass<"AArch64", [f64, i64, v2f32, v1f64, v8i8, v4i16, v2i32,
-                                    v1i64, v4f16],
-                                    64, (sequence "D%u", 0, 31)>;
+                                      v1i64, v4f16, v4bf16],
+                                     64, (sequence "D%u", 0, 31)>;
+def FPR64_lo : RegisterClass<"AArch64",
+                             [v8i8, v4i16, v2i32, v1i64, v4f16, v4bf16, v2f32,
+                              v1f64],
+                             64, (trunc FPR64, 16)>;
+
 // We don't (yet) have an f128 legal type, so don't use that here. We
 // normalize 128-bit vectors to v2f64 for arg passing and such, so use
 // that here.
 def FPR128 : RegisterClass<"AArch64",
                           [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64, f128,
-                            v8f16],
+                            v8f16, v8bf16],
                           128, (sequence "Q%u", 0, 31)>;

 // The lower 16 vector registers.  Some instructions can only take registers
 // in this range.
 def FPR128_lo : RegisterClass<"AArch64",
-                              [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64, v8f16],
+                              [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64, v8f16,
+                               v8bf16],
                              128, (trunc FPR128, 16)>;

 // Pairs, triples, and quads of 64-bit vector registers.
@ -467,7 +502,7 @@ def QQQQ : RegisterClass<"AArch64", [untyped], 128, (add QSeqQuads)> {


 // Vector operand versions of the FP registers. Alternate name printing and
-// assmebler matching.
+// assembler matching.
 def VectorReg64AsmOperand : AsmOperandClass {
  let Name = "VectorReg64";
  let PredicateMethod = "isNeonVectorReg";
@ -489,6 +524,9 @@ def VectorRegLoAsmOperand : AsmOperandClass {
  let Name = "VectorRegLo";
  let PredicateMethod = "isNeonVectorRegLo";
 }
+def V64_lo : RegisterOperand<FPR64_lo, "printVRegOperand"> {
+  let ParserMatchClass = VectorRegLoAsmOperand;
+}
 def V128_lo : RegisterOperand<FPR128_lo, "printVRegOperand"> {
  let ParserMatchClass = VectorRegLoAsmOperand;
 }
@ -627,6 +665,10 @@ def FPR16Op  : RegisterOperand<FPR16, "printOperand"> {
  let ParserMatchClass = FPRAsmOperand<"FPR16">;
 }

+def FPR16Op_lo  : RegisterOperand<FPR16_lo, "printOperand"> {
+  let ParserMatchClass = FPRAsmOperand<"FPR16_lo">;
+}
+
 def FPR32Op  : RegisterOperand<FPR32, "printOperand"> {
  let ParserMatchClass = FPRAsmOperand<"FPR32">;
 }
@ -643,16 +685,18 @@ def FPR128Op : RegisterOperand<FPR128, "printOperand"> {
 // ARMv8.1a atomic CASP register operands


-def WSeqPairs : RegisterTuples<[sube32, subo32], 
-                               [(rotl GPR32, 0), (rotl GPR32, 1)]>;
-def XSeqPairs : RegisterTuples<[sube64, subo64], 
-                               [(rotl GPR64, 0), (rotl GPR64, 1)]>;
+def WSeqPairs : RegisterTuples<[sube32, subo32],
+                               [(decimate (rotl GPR32, 0), 2),
+                                (decimate (rotl GPR32, 1), 2)]>;
+def XSeqPairs : RegisterTuples<[sube64, subo64],
+                               [(decimate (rotl GPR64, 0), 2),
+                                (decimate (rotl GPR64, 1), 2)]>;

-def WSeqPairsClass   : RegisterClass<"AArch64", [untyped], 32, 
+def WSeqPairsClass   : RegisterClass<"AArch64", [untyped], 32,
                                     (add WSeqPairs)>{
  let Size = 64;
 }
-def XSeqPairsClass   : RegisterClass<"AArch64", [untyped], 64, 
+def XSeqPairsClass   : RegisterClass<"AArch64", [untyped], 64,
                                     (add XSeqPairs)>{
  let Size = 128;
 }
@ -675,6 +719,34 @@ def XSeqPairClassOperand :

 //===----- END: v8.1a atomic CASP register operands -----------------------===//

+//===----------------------------------------------------------------------===//
+// Armv8.7a accelerator extension register operands: 8 consecutive GPRs
+// starting with an even one
+
+let Namespace = "AArch64" in {
+  foreach i = 0-7 in
+    def "x8sub_"#i : SubRegIndex<64, !mul(64, i)>;
+}
+
+def Tuples8X : RegisterTuples<
+  !foreach(i, [0,1,2,3,4,5,6,7], !cast<SubRegIndex>("x8sub_"#i)),
+  !foreach(i, [0,1,2,3,4,5,6,7], (trunc (decimate (rotl GPR64, i), 2), 12))>;
+
+def GPR64x8Class : RegisterClass<"AArch64", [i64x8], 512, (trunc Tuples8X, 12)> {
+  let Size = 512;
+}
+def GPR64x8AsmOp : AsmOperandClass {
+  let Name = "GPR64x8";
+  let ParserMethod = "tryParseGPR64x8";
+  let RenderMethod = "addRegOperands";
+}
+def GPR64x8 : RegisterOperand<GPR64x8Class, "printGPR64x8"> {
+  let ParserMatchClass = GPR64x8AsmOp;
+  let PrintMethod = "printGPR64x8";
+}
+
+//===----- END: v8.7a accelerator extension register operands -------------===//
+
 // SVE predicate registers
 def P0    : AArch64Reg<0,   "p0">, DwarfRegNum<[48]>;
 def P1    : AArch64Reg<1,   "p1">, DwarfRegNum<[49]>;
@ -764,7 +836,7 @@ def Z30   : AArch64Reg<30, "z30", [Q30, Z30_HI]>, DwarfRegNum<[126]>;
 def Z31   : AArch64Reg<31, "z31", [Q31, Z31_HI]>, DwarfRegNum<[127]>;
 }

-// Enum descibing the element size for destructive
+// Enum describing the element size for destructive
 // operations.
 class ElementSizeEnum<bits<3> val> {
  bits<3> Value = val;
@ -829,48 +901,25 @@ def PPR32  : PPRRegOp<"s", PPRAsmOp32,  ElementSizeS,  PPR>;
 def PPR64  : PPRRegOp<"d", PPRAsmOp64,  ElementSizeD,  PPR>;

 def PPRAsmOp3bAny : PPRAsmOperand<"Predicate3bAny", "PPR_3b",  0>;
-def PPRAsmOp3b8   : PPRAsmOperand<"Predicate3bB",   "PPR_3b",  8>;
-def PPRAsmOp3b16  : PPRAsmOperand<"Predicate3bH",   "PPR_3b", 16>;
-def PPRAsmOp3b32  : PPRAsmOperand<"Predicate3bS",   "PPR_3b", 32>;
-def PPRAsmOp3b64  : PPRAsmOperand<"Predicate3bD",   "PPR_3b", 64>;

 def PPR3bAny : PPRRegOp<"",  PPRAsmOp3bAny, ElementSizeNone, PPR_3b>;
-def PPR3b8   : PPRRegOp<"b", PPRAsmOp3b8,   ElementSizeB, PPR_3b>;
-def PPR3b16  : PPRRegOp<"h", PPRAsmOp3b16,  ElementSizeH, PPR_3b>;
-def PPR3b32  : PPRRegOp<"s", PPRAsmOp3b32,  ElementSizeS, PPR_3b>;
-def PPR3b64  : PPRRegOp<"d", PPRAsmOp3b64,  ElementSizeD, PPR_3b>;

 //******************************************************************************

-// SVE vector register class
-def ZPR : RegisterClass<"AArch64",
-                        [nxv16i8, nxv8i16, nxv4i32, nxv2i64,
-                         nxv2f16, nxv4f16, nxv8f16,
-                         nxv1f32, nxv2f32, nxv4f32,
-                         nxv1f64, nxv2f64],
-                        128, (sequence "Z%u", 0, 31)> {
+// SVE vector register classes
+class ZPRClass<int lastreg> : RegisterClass<"AArch64",
+                                            [nxv16i8, nxv8i16, nxv4i32, nxv2i64,
+                                             nxv2f16, nxv4f16, nxv8f16,
+                                             nxv2bf16, nxv4bf16, nxv8bf16,
+                                             nxv2f32, nxv4f32,
+                                             nxv2f64],
+                                            128, (sequence "Z%u", 0, lastreg)> {
  let Size = 128;
 }

-// SVE restricted 4 bit scalable vector register class
-def ZPR_4b : RegisterClass<"AArch64",
-                         [nxv16i8, nxv8i16, nxv4i32, nxv2i64,
-                          nxv2f16, nxv4f16, nxv8f16,
-                          nxv1f32, nxv2f32, nxv4f32,
-                          nxv1f64, nxv2f64],
-                         128, (sequence "Z%u", 0, 15)> {
-  let Size = 128;
-}
-
-// SVE restricted 3 bit scalable vector register class
-def ZPR_3b : RegisterClass<"AArch64",
-                         [nxv16i8, nxv8i16, nxv4i32, nxv2i64,
-                          nxv2f16, nxv4f16, nxv8f16,
-                          nxv1f32, nxv2f32, nxv4f32,
-                          nxv1f64, nxv2f64],
-                         128, (sequence "Z%u", 0, 7)> {
-  let Size = 128;
-}
+def ZPR    : ZPRClass<31>;
+def ZPR_4b : ZPRClass<15>; // Restricted 4 bit SVE vector register class.
+def ZPR_3b : ZPRClass<7>;  // Restricted 3 bit SVE vector register class.

 class ZPRAsmOperand<string name, int Width, string RegClassSuffix = "">
    : AsmOperandClass {
@ -1104,10 +1153,235 @@ class GPR64ExtendRegisterOperand<string Name, int Scale, RegisterClass RegClass>
  let PrintMethod = "printRegWithShiftExtend<false, " # Scale # ", 'x', 0>";
 }

-foreach Scale = [8, 16, 32, 64] in {
+foreach Scale = [8, 16, 32, 64, 128] in {
  def GPR64shiftedAsmOpnd # Scale : GPR64ShiftExtendAsmOperand<"GPR64shifted", Scale, "GPR64">;
  def GPR64shifted # Scale : GPR64ExtendRegisterOperand<"GPR64shiftedAsmOpnd" # Scale, Scale, GPR64>;

  def GPR64NoXZRshiftedAsmOpnd # Scale : GPR64ShiftExtendAsmOperand<"GPR64NoXZRshifted", Scale, "GPR64common">;
  def GPR64NoXZRshifted # Scale : GPR64ExtendRegisterOperand<"GPR64NoXZRshiftedAsmOpnd" # Scale, Scale, GPR64common>;
 }
+
+// Accumulator array tiles.
+def ZAQ0  : AArch64Reg<0,  "za0.q">;
+def ZAQ1  : AArch64Reg<1,  "za1.q">;
+def ZAQ2  : AArch64Reg<2,  "za2.q">;
+def ZAQ3  : AArch64Reg<3,  "za3.q">;
+def ZAQ4  : AArch64Reg<4,  "za4.q">;
+def ZAQ5  : AArch64Reg<5,  "za5.q">;
+def ZAQ6  : AArch64Reg<6,  "za6.q">;
+def ZAQ7  : AArch64Reg<7,  "za7.q">;
+def ZAQ8  : AArch64Reg<8,  "za8.q">;
+def ZAQ9  : AArch64Reg<9,  "za9.q">;
+def ZAQ10 : AArch64Reg<10, "za10.q">;
+def ZAQ11 : AArch64Reg<11, "za11.q">;
+def ZAQ12 : AArch64Reg<12, "za12.q">;
+def ZAQ13 : AArch64Reg<13, "za13.q">;
+def ZAQ14 : AArch64Reg<14, "za14.q">;
+def ZAQ15 : AArch64Reg<15, "za15.q">;
+
+let SubRegIndices = [zasubq0, zasubq1] in {
+  def ZAD0 : AArch64Reg<0, "za0.d", [ZAQ0, ZAQ8]>;
+  def ZAD1 : AArch64Reg<1, "za1.d", [ZAQ1, ZAQ9]>;
+  def ZAD2 : AArch64Reg<2, "za2.d", [ZAQ2, ZAQ10]>;
+  def ZAD3 : AArch64Reg<3, "za3.d", [ZAQ3, ZAQ11]>;
+  def ZAD4 : AArch64Reg<4, "za4.d", [ZAQ4, ZAQ12]>;
+  def ZAD5 : AArch64Reg<5, "za5.d", [ZAQ5, ZAQ13]>;
+  def ZAD6 : AArch64Reg<6, "za6.d", [ZAQ6, ZAQ14]>;
+  def ZAD7 : AArch64Reg<7, "za7.d", [ZAQ7, ZAQ15]>;
+}
+
+let SubRegIndices = [zasubd0, zasubd1] in {
+  def ZAS0 : AArch64Reg<0, "za0.s", [ZAD0, ZAD4]>;
+  def ZAS1 : AArch64Reg<1, "za1.s", [ZAD1, ZAD5]>;
+  def ZAS2 : AArch64Reg<2, "za2.s", [ZAD2, ZAD6]>;
+  def ZAS3 : AArch64Reg<3, "za3.s", [ZAD3, ZAD7]>;
+}
+
+let SubRegIndices = [zasubs0, zasubs1] in {
+  def ZAH0 : AArch64Reg<0, "za0.h", [ZAS0, ZAS2]>;
+  def ZAH1 : AArch64Reg<1, "za1.h", [ZAS1, ZAS3]>;
+}
+
+let SubRegIndices = [zasubh0, zasubh1] in {
+  def ZAB0 : AArch64Reg<0, "za0.b", [ZAH0, ZAH1]>;
+}
+
+let SubRegIndices = [zasubb] in {
+  def ZA : AArch64Reg<0, "za", [ZAB0]>;
+}
+
+// SME Register Classes
+
+// Accumulator array
+def MPR : RegisterClass<"AArch64", [untyped], 2048, (add ZA)> {
+  let Size = 2048;
+}
+
+// Accumulator array as single tiles
+def MPR8    : RegisterClass<"AArch64", [untyped], 2048, (add (sequence "ZAB%u", 0, 0))> {
+  let Size = 2048;
+}
+def MPR16   : RegisterClass<"AArch64", [untyped], 1024, (add (sequence "ZAH%u", 0, 1))> {
+  let Size = 1024;
+}
+def MPR32   : RegisterClass<"AArch64", [untyped],  512, (add (sequence "ZAS%u", 0, 3))> {
+  let Size = 512;
+}
+def MPR64   : RegisterClass<"AArch64", [untyped],  256, (add (sequence "ZAD%u", 0, 7))> {
+  let Size = 256;
+}
+def MPR128  : RegisterClass<"AArch64", [untyped],  128, (add (sequence "ZAQ%u", 0, 15))> {
+  let Size = 128;
+}
+
+// SME Register Operands
+// There are three types of SME matrix register operands:
+// * Tiles:
+//
+//   These tiles make up the larger accumulator matrix. The tile representation
+//   has an element type suffix, e.g. za0.b or za15.q and can be any of the
+//   registers:
+//          ZAQ0..ZAQ15
+//          ZAD0..ZAD7
+//          ZAS0..ZAS3
+//          ZAH0..ZAH1
+//       or ZAB0
+//
+// * Tile vectors:
+//
+//   Their representation is similar to regular tiles, but they have an extra
+//   'h' or 'v' to tell how the vector at [reg+offset] is layed out in the tile,
+//   horizontally or vertically.
+//
+//   e.g. za1h.h or za15v.q, which corresponds to vectors in registers ZAH1 and
+//   ZAQ15, respectively. The horizontal/vertical is more a property of the
+//   instruction, than a property of the asm-operand itself, or its register.
+//   The distinction is required for the parsing/printing of the operand,
+//   as from a compiler's perspective, the whole tile is read/written.
+//
+// * Accumulator matrix:
+//
+//   This is the entire matrix accumulator register ZA (<=> ZAB0), printed as
+//   'za'.
+
+//
+// Tiles
+//
+
+class MatrixTileAsmOperand<string RC, int EltSize> : AsmOperandClass {
+  let Name = "MatrixTile" # EltSize;
+  let DiagnosticType = "Invalid" # Name;
+  let ParserMethod = "tryParseMatrixRegister";
+  let RenderMethod = "addMatrixOperands";
+  let PredicateMethod = "isMatrixRegOperand<"
+                          # "MatrixKind::Tile" # ", "
+                          # EltSize # ", AArch64::" # RC # "RegClassID>";
+}
+
+class MatrixTileOperand<int EltSize, int NumBitsForTile, RegisterClass RC>
+    : RegisterOperand<RC> {
+  let ParserMatchClass = MatrixTileAsmOperand<!cast<string>(RC), EltSize>;
+  let DecoderMethod = "DecodeMatrixTile<" # NumBitsForTile # ">";
+  let PrintMethod = "printMatrixTile";
+}
+
+def TileOp32  : MatrixTileOperand<32, 2, MPR32>;
+def TileOp64  : MatrixTileOperand<64, 3, MPR64>;
+
+//
+// Tile vectors (horizontal and vertical)
+//
+
+class MatrixTileVectorAsmOperand<string RC, int EltSize, int IsVertical>
+    : AsmOperandClass {
+  let Name = "MatrixTileVector" # !if(IsVertical, "V", "H") # EltSize;
+  let DiagnosticType = "Invalid" # Name;
+  let ParserMethod = "tryParseMatrixRegister";
+  let RenderMethod = "addMatrixOperands";
+  let PredicateMethod = "isMatrixRegOperand<"
+                          # "MatrixKind::"
+                          # !if(IsVertical, "Col", "Row") # ", "
+                          # EltSize # ", AArch64::" # RC # "RegClassID>";
+}
+
+class MatrixTileVectorOperand<int EltSize, int NumBitsForTile,
+                              RegisterClass RC, int IsVertical>
+    : RegisterOperand<RC> {
+  let ParserMatchClass = MatrixTileVectorAsmOperand<!cast<string>(RC), EltSize,
+                                                    IsVertical>;
+  let DecoderMethod = "DecodeMatrixTile<" # NumBitsForTile # ">";
+  let PrintMethod = "printMatrixTileVector<" # IsVertical # ">";
+}
+
+def TileVectorOpH8   : MatrixTileVectorOperand<  8, 0, MPR8,   0>;
+def TileVectorOpH16  : MatrixTileVectorOperand< 16, 1, MPR16,  0>;
+def TileVectorOpH32  : MatrixTileVectorOperand< 32, 2, MPR32,  0>;
+def TileVectorOpH64  : MatrixTileVectorOperand< 64, 3, MPR64,  0>;
+def TileVectorOpH128 : MatrixTileVectorOperand<128, 4, MPR128, 0>;
+
+def TileVectorOpV8   : MatrixTileVectorOperand<  8, 0, MPR8,   1>;
+def TileVectorOpV16  : MatrixTileVectorOperand< 16, 1, MPR16,  1>;
+def TileVectorOpV32  : MatrixTileVectorOperand< 32, 2, MPR32,  1>;
+def TileVectorOpV64  : MatrixTileVectorOperand< 64, 3, MPR64,  1>;
+def TileVectorOpV128 : MatrixTileVectorOperand<128, 4, MPR128, 1>;
+
+//
+// Accumulator matrix
+//
+
+class MatrixAsmOperand<string RC, int EltSize> : AsmOperandClass {
+  let Name = "Matrix";
+  let DiagnosticType = "Invalid" # Name;
+  let ParserMethod = "tryParseMatrixRegister";
+  let RenderMethod = "addMatrixOperands";
+  let PredicateMethod = "isMatrixRegOperand<"
+                          # "MatrixKind::Array" # ", "
+                          # EltSize # ", AArch64::" # RC # "RegClassID>";
+}
+
+class MatrixOperand<RegisterClass RC, int EltSize> : RegisterOperand<RC> {
+  let ParserMatchClass = MatrixAsmOperand<!cast<string>(RC), EltSize>;
+  let PrintMethod = "printMatrix<" # EltSize # ">";
+}
+
+def MatrixOp : MatrixOperand<MPR, 0>;
+
+class MatrixTileListAsmOperand : AsmOperandClass {
+  let Name = "MatrixTileList";
+  let ParserMethod = "tryParseMatrixTileList";
+  let RenderMethod = "addMatrixTileListOperands";
+  let PredicateMethod = "isMatrixTileList";
+}
+
+class MatrixTileListOperand : Operand<i8> {
+  let ParserMatchClass = MatrixTileListAsmOperand<>;
+  let DecoderMethod = "DecodeMatrixTileListRegisterClass";
+  let EncoderMethod = "EncodeMatrixTileListRegisterClass";
+  let PrintMethod = "printMatrixTileList";
+}
+
+def MatrixTileList : MatrixTileListOperand<>;
+
+def MatrixIndexGPR32_12_15 : RegisterClass<"AArch64", [i32], 32, (sequence "W%u", 12, 15)> {
+  let DiagnosticType = "InvalidMatrixIndexGPR32_12_15";
+}
+def MatrixIndexGPR32Op12_15 : RegisterOperand<MatrixIndexGPR32_12_15> {
+  let EncoderMethod = "encodeMatrixIndexGPR32";
+}
+
+def SVCROperand : AsmOperandClass {
+  let Name = "SVCR";
+  let ParserMethod = "tryParseSVCR";
+  let DiagnosticType = "Invalid" # Name;
+}
+
+def svcr_op : Operand<i32> {
+  let ParserMatchClass = SVCROperand;
+  let PrintMethod = "printSVCROp";
+  let DecoderMethod = "DecodeSVCROp";
+  let MCOperandPredicate = [{
+    if (!MCOp.isImm())
+      return false;
+    return AArch64SVCR::lookupSVCRByEncoding(MCOp.getImm()) != nullptr;
+  }];
+}
--- a/suite/synctools/tablegen/AArch64/AArch64SMEInstrInfo.td
+++ b/suite/synctools/tablegen/AArch64/AArch64SMEInstrInfo.td
@ -0,0 +1,143 @@
+//=- AArch64SMEInstrInfo.td -  AArch64 SME Instructions -*- tablegen -*-----=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// AArch64 Scalable Matrix Extension (SME) Instruction definitions.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Add vector elements horizontally or vertically to ZA tile.
+//===----------------------------------------------------------------------===//
+
+let Predicates = [HasSME] in {
+def ADDHA_MPPZ_S : sme_add_vector_to_tile_u32<0b0, "addha">;
+def ADDVA_MPPZ_S : sme_add_vector_to_tile_u32<0b1, "addva">;
+}
+
+let Predicates = [HasSMEI64] in {
+def ADDHA_MPPZ_D : sme_add_vector_to_tile_u64<0b0, "addha">;
+def ADDVA_MPPZ_D : sme_add_vector_to_tile_u64<0b1, "addva">;
+}
+
+let Predicates = [HasSME] in {
+//===----------------------------------------------------------------------===//
+// Outer products
+//===----------------------------------------------------------------------===//
+
+defm BFMOPA_MPPZZ  : sme_bf16_outer_product<0b0, "bfmopa">;
+defm BFMOPS_MPPZZ  : sme_bf16_outer_product<0b1, "bfmops">;
+
+def FMOPA_MPPZZ_S : sme_outer_product_fp32<0b0, "fmopa">;
+def FMOPS_MPPZZ_S : sme_outer_product_fp32<0b1, "fmops">;
+}
+
+let Predicates = [HasSMEF64] in {
+def FMOPA_MPPZZ_D : sme_outer_product_fp64<0b0, "fmopa">;
+def FMOPS_MPPZZ_D : sme_outer_product_fp64<0b1, "fmops">;
+}
+
+let Predicates = [HasSME] in {
+defm FMOPAL_MPPZZ  : sme_f16_outer_product<0b0, "fmopa">;
+defm FMOPSL_MPPZZ  : sme_f16_outer_product<0b1, "fmops">;
+
+def SMOPA_MPPZZ_S  : sme_int_outer_product_i32<0b000, "smopa">;
+def SMOPS_MPPZZ_S  : sme_int_outer_product_i32<0b001, "smops">;
+def UMOPA_MPPZZ_S  : sme_int_outer_product_i32<0b110, "umopa">;
+def UMOPS_MPPZZ_S  : sme_int_outer_product_i32<0b111, "umops">;
+def SUMOPA_MPPZZ_S : sme_int_outer_product_i32<0b010, "sumopa">;
+def SUMOPS_MPPZZ_S : sme_int_outer_product_i32<0b011, "sumops">;
+def USMOPA_MPPZZ_S : sme_int_outer_product_i32<0b100, "usmopa">;
+def USMOPS_MPPZZ_S : sme_int_outer_product_i32<0b101, "usmops">;
+}
+
+let Predicates = [HasSMEI64] in {
+def SMOPA_MPPZZ_D  : sme_int_outer_product_i64<0b000, "smopa">;
+def SMOPS_MPPZZ_D  : sme_int_outer_product_i64<0b001, "smops">;
+def UMOPA_MPPZZ_D  : sme_int_outer_product_i64<0b110, "umopa">;
+def UMOPS_MPPZZ_D  : sme_int_outer_product_i64<0b111, "umops">;
+def SUMOPA_MPPZZ_D : sme_int_outer_product_i64<0b010, "sumopa">;
+def SUMOPS_MPPZZ_D : sme_int_outer_product_i64<0b011, "sumops">;
+def USMOPA_MPPZZ_D : sme_int_outer_product_i64<0b100, "usmopa">;
+def USMOPS_MPPZZ_D : sme_int_outer_product_i64<0b101, "usmops">;
+}
+
+let Predicates = [HasSME] in {
+//===----------------------------------------------------------------------===//
+// Loads and stores
+//===----------------------------------------------------------------------===//
+
+defm LD1_MXIPXX : sme_mem_ld_ss<"ld1">;
+defm ST1_MXIPXX : sme_mem_st_ss<"st1">;
+
+//===----------------------------------------------------------------------===//
+// Spill + fill
+//===----------------------------------------------------------------------===//
+
+defm LDR_ZA : sme_fill<"ldr">;
+defm STR_ZA : sme_spill<"str">;
+
+//===----------------------------------------------------------------------===//
+// Move instructions
+//===----------------------------------------------------------------------===//
+
+defm INSERT_MXIPZ  : sme_vector_to_tile<"mova">;
+defm EXTRACT_ZPMXI : sme_tile_to_vector<"mova">;
+
+//===----------------------------------------------------------------------===//
+// Zero instruction
+//===----------------------------------------------------------------------===//
+
+defm ZERO_M : sme_zero<"zero">;
+
+//===----------------------------------------------------------------------===//
+// Mode selection and state access instructions
+//===----------------------------------------------------------------------===//
+
+// SME defines three pstate fields to set or clear PSTATE.SM, PSTATE.ZA, or
+// both fields:
+//
+//   MSR SVCRSM, #<imm1>
+//   MSR SVCRZA, #<imm1>
+//   MSR SVCRSMZA, #<imm1>
+//
+// It's tricky to using the existing pstate operand defined in
+// AArch64SystemOperands.td since it only encodes 5 bits including op1;op2,
+// when these fields are also encoded in CRm[3:1].
+class MSRpstatesvcrImm0_1
+  : PstateWriteSimple<(ins svcr_op:$pstatefield, imm0_1:$imm), "msr",
+                      "\t$pstatefield, $imm">,
+    Sched<[WriteSys]> {
+  bits<3> pstatefield;
+  bit imm;
+  let Inst{18-16} = 0b011; // op1
+  let Inst{11-9} = pstatefield;
+  let Inst{8} = imm;
+  let Inst{7-5} = 0b011; // op2
+}
+
+def MSRpstatesvcrImm1 : MSRpstatesvcrImm0_1;
+def : InstAlias<"smstart",    (MSRpstatesvcrImm1 0b011, 0b1)>;
+def : InstAlias<"smstart sm", (MSRpstatesvcrImm1 0b001, 0b1)>;
+def : InstAlias<"smstart za", (MSRpstatesvcrImm1 0b010, 0b1)>;
+
+def : InstAlias<"smstop",     (MSRpstatesvcrImm1 0b011, 0b0)>;
+def : InstAlias<"smstop sm",  (MSRpstatesvcrImm1 0b001, 0b0)>;
+def : InstAlias<"smstop za",  (MSRpstatesvcrImm1 0b010, 0b0)>;
+
+//===----------------------------------------------------------------------===//
+// SVE2 instructions
+//===----------------------------------------------------------------------===//
+
+def REVD_ZPmZ : sve2_int_perm_revd<"revd">;
+
+defm SCLAMP_ZZZ : sve2_clamp<"sclamp", 0b0>;
+defm UCLAMP_ZZZ : sve2_clamp<"uclamp", 0b1>;
+
+defm PSEL_PPPRI : sve2_int_perm_sel_p<"psel">;
+
+} // End let Predicates = [HasSME]
--- a/suite/synctools/tablegen/AArch64/AArch64SVEInstrInfo.td
+++ b/suite/synctools/tablegen/AArch64/AArch64SVEInstrInfo.td
--- a/suite/synctools/tablegen/AArch64/AArch64SchedA53.td
+++ b/suite/synctools/tablegen/AArch64/AArch64SchedA53.td
@ -1,9 +1,8 @@
 //==- AArch64SchedA53.td - Cortex-A53 Scheduling Definitions -*- tablegen -*-=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@ -27,7 +26,9 @@ def CortexA53Model : SchedMachineModel {
                             // v 1.0 Spreadsheet
  let CompleteModel = 1;

-  list<Predicate> UnsupportedFeatures = [HasSVE];
+  list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F,
+                                                    PAUnsupported.F,
+                                                    SMEUnsupported.F);
 }


@ -126,7 +127,8 @@ def : WriteRes<WriteFCmp, [A53UnitFPALU]> { let Latency = 6; }
 def : WriteRes<WriteFCvt, [A53UnitFPALU]> { let Latency = 6; }
 def : WriteRes<WriteFCopy, [A53UnitFPALU]> { let Latency = 6; }
 def : WriteRes<WriteFImm, [A53UnitFPALU]> { let Latency = 6; }
-def : WriteRes<WriteV, [A53UnitFPALU]> { let Latency = 6; }
+def : WriteRes<WriteVd, [A53UnitFPALU]> { let Latency = 6; }
+def : WriteRes<WriteVq, [A53UnitFPALU]> { let Latency = 6; }

 // FP Mul, Div, Sqrt
 def : WriteRes<WriteFMul, [A53UnitFPMDS]> { let Latency = 6; }
@ -148,6 +150,7 @@ def A53WriteFSqrtDP : SchedWriteRes<[A53UnitFPMDS]> { let Latency = 32;
 // No forwarding for these reads.
 def : ReadAdvance<ReadExtrHi, 0>;
 def : ReadAdvance<ReadAdrBase, 0>;
+def : ReadAdvance<ReadST, 0>;
 def : ReadAdvance<ReadVLD, 0>;

 // ALU - Most operands in the ALU pipes are not needed for two cycles. Shiftable
--- a/suite/synctools/tablegen/AArch64/AArch64SchedA55.td
+++ b/suite/synctools/tablegen/AArch64/AArch64SchedA55.td
@ -0,0 +1,361 @@
+//==- AArch64SchedCortexA55.td - ARM Cortex-A55 Scheduling Definitions -*- tablegen -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the machine model for the ARM Cortex-A55 processors.
+//
+//===----------------------------------------------------------------------===//
+
+// ===---------------------------------------------------------------------===//
+// The following definitions describe the per-operand machine model.
+// This works with MachineScheduler. See MCSchedModel.h for details.
+
+// Cortex-A55 machine model for scheduling and other instruction cost heuristics.
+def CortexA55Model : SchedMachineModel {
+  let MicroOpBufferSize = 0;  // The Cortex-A55 is an in-order processor
+  let IssueWidth = 2;         // It dual-issues under most circumstances
+  let LoadLatency = 4;        // Cycles for loads to access the cache. The
+                              // optimisation guide shows that most loads have
+                              // a latency of 3, but some have a latency of 4
+                              // or 5. Setting it 4 looked to be good trade-off.
+  let MispredictPenalty = 8;  // A branch direction mispredict.
+  let PostRAScheduler = 1;    // Enable PostRA scheduler pass.
+  let CompleteModel = 0;      // Covers instructions applicable to Cortex-A55.
+
+  list<Predicate> UnsupportedFeatures = [HasSVE];
+
+  // FIXME: Remove when all errors have been fixed.
+  let FullInstRWOverlapCheck = 0;
+}
+
+//===----------------------------------------------------------------------===//
+// Define each kind of processor resource and number available.
+
+// Modeling each pipeline as a ProcResource using the BufferSize = 0 since the
+// Cortex-A55 is in-order.
+
+def CortexA55UnitALU    : ProcResource<2> { let BufferSize = 0; } // Int ALU
+def CortexA55UnitMAC    : ProcResource<1> { let BufferSize = 0; } // Int MAC, 64-bi wide
+def CortexA55UnitDiv    : ProcResource<1> { let BufferSize = 0; } // Int Division, not pipelined
+def CortexA55UnitLd     : ProcResource<1> { let BufferSize = 0; } // Load pipe
+def CortexA55UnitSt     : ProcResource<1> { let BufferSize = 0; } // Store pipe
+def CortexA55UnitB      : ProcResource<1> { let BufferSize = 0; } // Branch
+
+// The FP DIV/SQRT instructions execute totally differently from the FP ALU
+// instructions, which can mostly be dual-issued; that's why for now we model
+// them with 2 resources.
+def CortexA55UnitFPALU  : ProcResource<2> { let BufferSize = 0; } // FP ALU
+def CortexA55UnitFPMAC  : ProcResource<2> { let BufferSize = 0; } // FP MAC
+def CortexA55UnitFPDIV  : ProcResource<1> { let BufferSize = 0; } // FP Div/SQRT, 64/128
+
+//===----------------------------------------------------------------------===//
+// Subtarget-specific SchedWrite types
+
+let SchedModel = CortexA55Model in {
+
+// These latencies are modeled without taking into account forwarding paths
+// (the software optimisation guide lists latencies taking into account
+// typical forwarding paths).
+def : WriteRes<WriteImm, [CortexA55UnitALU]> { let Latency = 3; }    // MOVN, MOVZ
+def : WriteRes<WriteI, [CortexA55UnitALU]> { let Latency = 3; }      // ALU
+def : WriteRes<WriteISReg, [CortexA55UnitALU]> { let Latency = 3; }  // ALU of Shifted-Reg
+def : WriteRes<WriteIEReg, [CortexA55UnitALU]> { let Latency = 3; }  // ALU of Extended-Reg
+def : WriteRes<WriteExtr, [CortexA55UnitALU]> { let Latency = 3; }   // EXTR from a reg pair
+def : WriteRes<WriteIS, [CortexA55UnitALU]> { let Latency = 3; }     // Shift/Scale
+
+// MAC
+def : WriteRes<WriteIM32, [CortexA55UnitMAC]> { let Latency = 4; }   // 32-bit Multiply
+def : WriteRes<WriteIM64, [CortexA55UnitMAC]> { let Latency = 4; }   // 64-bit Multiply
+
+// Div
+def : WriteRes<WriteID32, [CortexA55UnitDiv]> {
+  let Latency = 8; let ResourceCycles = [8];
+}
+def : WriteRes<WriteID64, [CortexA55UnitDiv]> {
+  let Latency = 8; let ResourceCycles = [8];
+}
+
+// Load
+def : WriteRes<WriteLD, [CortexA55UnitLd]> { let Latency = 3; }
+def : WriteRes<WriteLDIdx, [CortexA55UnitLd]> { let Latency = 4; }
+def : WriteRes<WriteLDHi, [CortexA55UnitLd]> { let Latency = 5; }
+
+// Vector Load - Vector loads take 1-5 cycles to issue. For the WriteVecLd
+//               below, choosing the median of 3 which makes the latency 6.
+// An extra cycle is needed to get the swizzling right.
+def : WriteRes<WriteVLD, [CortexA55UnitLd]> { let Latency = 6;
+                                           let ResourceCycles = [3]; }
+def CortexA55WriteVLD1 : SchedWriteRes<[CortexA55UnitLd]> { let Latency = 4; }
+def CortexA55WriteVLD1SI : SchedWriteRes<[CortexA55UnitLd]> { let Latency = 4; let SingleIssue = 1; }
+def CortexA55WriteVLD2 : SchedWriteRes<[CortexA55UnitLd]> { let Latency = 5;
+                                                  let ResourceCycles = [2]; }
+def CortexA55WriteVLD3 : SchedWriteRes<[CortexA55UnitLd]> { let Latency = 6;
+                                                  let ResourceCycles = [3]; }
+def CortexA55WriteVLD4 : SchedWriteRes<[CortexA55UnitLd]> { let Latency = 7;
+                                                  let ResourceCycles = [4]; }
+def CortexA55WriteVLD5 : SchedWriteRes<[CortexA55UnitLd]> { let Latency = 8;
+                                                  let ResourceCycles = [5]; }
+def CortexA55WriteVLD6 : SchedWriteRes<[CortexA55UnitLd]> { let Latency = 9;
+                                                  let ResourceCycles = [6]; }
+def CortexA55WriteVLD7 : SchedWriteRes<[CortexA55UnitLd]> { let Latency = 10;
+                                                  let ResourceCycles = [7]; }
+def CortexA55WriteVLD8 : SchedWriteRes<[CortexA55UnitLd]> { let Latency = 11;
+                                                  let ResourceCycles = [8]; }
+
+def CortexA55WriteLDP1 : SchedWriteRes<[]> { let Latency = 4; }
+def CortexA55WriteLDP2 : SchedWriteRes<[CortexA55UnitLd]> { let Latency = 5; }
+def CortexA55WriteLDP4 : SchedWriteRes<[CortexA55UnitLd, CortexA55UnitLd, CortexA55UnitLd, CortexA55UnitLd, CortexA55UnitLd]> { let Latency = 6; }
+
+// Pre/Post Indexing - Performed as part of address generation
+def : WriteRes<WriteAdr, []> { let Latency = 0; }
+
+// Store
+let RetireOOO = 1 in {
+def : WriteRes<WriteST, [CortexA55UnitSt]> { let Latency = 1; }
+def : WriteRes<WriteSTP, [CortexA55UnitSt]> { let Latency = 1; }
+def : WriteRes<WriteSTIdx, [CortexA55UnitSt]> { let Latency = 1; }
+}
+def : WriteRes<WriteSTX, [CortexA55UnitSt]> { let Latency = 4; }
+
+// Vector Store - Similar to vector loads, can take 1-3 cycles to issue.
+def : WriteRes<WriteVST, [CortexA55UnitSt]> { let Latency = 5;
+                                          let ResourceCycles = [2];}
+def CortexA55WriteVST1 : SchedWriteRes<[CortexA55UnitSt]> { let Latency = 4; }
+def CortexA55WriteVST2 : SchedWriteRes<[CortexA55UnitSt]> { let Latency = 5;
+                                                  let ResourceCycles = [2]; }
+def CortexA55WriteVST3 : SchedWriteRes<[CortexA55UnitSt]> { let Latency = 6;
+                                                  let ResourceCycles = [3]; }
+def CortexA55WriteVST4 : SchedWriteRes<[CortexA55UnitSt]> { let Latency = 5;
+                                                  let ResourceCycles = [4]; }
+
+def : WriteRes<WriteAtomic, []> { let Unsupported = 1; }
+
+// Branch
+def : WriteRes<WriteBr, [CortexA55UnitB]>;
+def : WriteRes<WriteBrReg, [CortexA55UnitB]>;
+def : WriteRes<WriteSys, [CortexA55UnitB]>;
+def : WriteRes<WriteBarrier, [CortexA55UnitB]>;
+def : WriteRes<WriteHint, [CortexA55UnitB]>;
+
+// FP ALU
+//   As WriteF result is produced in F5 and it can be mostly forwarded
+//   to consumer at F1, the effectively latency is set as 4.
+def : WriteRes<WriteF, [CortexA55UnitFPALU]> { let Latency = 4; }
+def : WriteRes<WriteFCmp, [CortexA55UnitFPALU]> { let Latency = 3; }
+def : WriteRes<WriteFCvt, [CortexA55UnitFPALU]> { let Latency = 4; }
+def : WriteRes<WriteFCopy, [CortexA55UnitFPALU]> { let Latency = 3; }
+def : WriteRes<WriteFImm, [CortexA55UnitFPALU]> { let Latency = 3; }
+def : WriteRes<WriteVd, [CortexA55UnitFPALU]> { let Latency = 4; }
+def : WriteRes<WriteVq, [CortexA55UnitFPALU,CortexA55UnitFPALU]> { let Latency = 4; let BeginGroup = 1; }
+
+// FP ALU specific new schedwrite definitions
+def CortexA55WriteFPALU_F2 : SchedWriteRes<[CortexA55UnitFPALU]> { let Latency = 2;}
+def CortexA55WriteFPALU_F3 : SchedWriteRes<[CortexA55UnitFPALU]> { let Latency = 3;}
+def CortexA55WriteFPALU_F4 : SchedWriteRes<[CortexA55UnitFPALU]> { let Latency = 4;}
+def CortexA55WriteFPALU_F5 : SchedWriteRes<[CortexA55UnitFPALU]> { let Latency = 5;}
+
+// FP Mul, Div, Sqrt. Div/Sqrt are not pipelined
+def : WriteRes<WriteFMul, [CortexA55UnitFPMAC]> { let Latency = 4; }
+
+let RetireOOO = 1 in {
+def : WriteRes<WriteFDiv, [CortexA55UnitFPDIV]> { let Latency = 22;
+                                            let ResourceCycles = [29]; }
+def CortexA55WriteFMAC : SchedWriteRes<[CortexA55UnitFPMAC]> { let Latency = 4; }
+def CortexA55WriteFDivHP : SchedWriteRes<[CortexA55UnitFPDIV]> { let Latency = 8;
+                                                     let ResourceCycles = [5]; }
+def CortexA55WriteFDivSP : SchedWriteRes<[CortexA55UnitFPDIV]> { let Latency = 13;
+                                                     let ResourceCycles = [10]; }
+def CortexA55WriteFDivDP : SchedWriteRes<[CortexA55UnitFPDIV]> { let Latency = 22;
+                                                     let ResourceCycles = [19]; }
+def CortexA55WriteFSqrtHP : SchedWriteRes<[CortexA55UnitFPDIV]> { let Latency = 8;
+                                                      let ResourceCycles = [5]; }
+def CortexA55WriteFSqrtSP : SchedWriteRes<[CortexA55UnitFPDIV]> { let Latency = 12;
+                                                      let ResourceCycles = [9]; }
+def CortexA55WriteFSqrtDP : SchedWriteRes<[CortexA55UnitFPDIV]> { let Latency = 22;
+                                                      let ResourceCycles = [19]; }
+}
+//===----------------------------------------------------------------------===//
+// Subtarget-specific SchedRead types.
+
+def : ReadAdvance<ReadVLD, 0>;
+def : ReadAdvance<ReadExtrHi, 1>;
+def : ReadAdvance<ReadAdrBase, 1>;
+def : ReadAdvance<ReadST, 1>;
+
+// ALU - ALU input operands are generally needed in EX1. An operand produced in
+//       in say EX2 can be forwarded for consumption to ALU in EX1, thereby
+//       allowing back-to-back ALU operations such as add. If an operand requires
+//       a shift, it will, however, be required in ISS stage.
+def : ReadAdvance<ReadI, 2, [WriteImm,WriteI,
+                             WriteISReg, WriteIEReg,WriteIS,
+                             WriteID32,WriteID64,
+                             WriteIM32,WriteIM64]>;
+// Shifted operand
+def CortexA55ReadShifted : SchedReadAdvance<1, [WriteImm,WriteI,
+                                          WriteISReg, WriteIEReg,WriteIS,
+                                          WriteID32,WriteID64,
+                                          WriteIM32,WriteIM64]>;
+def CortexA55ReadNotShifted : SchedReadAdvance<2, [WriteImm,WriteI,
+                                             WriteISReg, WriteIEReg,WriteIS,
+                                             WriteID32,WriteID64,
+                                             WriteIM32,WriteIM64]>;
+def CortexA55ReadISReg : SchedReadVariant<[
+        SchedVar<RegShiftedPred, [CortexA55ReadShifted]>,
+        SchedVar<NoSchedPred, [CortexA55ReadNotShifted]>]>;
+def : SchedAlias<ReadISReg, CortexA55ReadISReg>;
+
+def CortexA55ReadIEReg : SchedReadVariant<[
+        SchedVar<RegExtendedPred, [CortexA55ReadShifted]>,
+        SchedVar<NoSchedPred, [CortexA55ReadNotShifted]>]>;
+def : SchedAlias<ReadIEReg, CortexA55ReadIEReg>;
+
+// MUL
+def : ReadAdvance<ReadIM, 1, [WriteImm,WriteI,
+                              WriteISReg, WriteIEReg,WriteIS,
+                              WriteID32,WriteID64,
+                              WriteIM32,WriteIM64]>;
+def : ReadAdvance<ReadIMA, 2, [WriteImm,WriteI,
+                               WriteISReg, WriteIEReg,WriteIS,
+                               WriteID32,WriteID64,
+                               WriteIM32,WriteIM64]>;
+
+// Div
+def : ReadAdvance<ReadID, 1, [WriteImm,WriteI,
+                              WriteISReg, WriteIEReg,WriteIS,
+                              WriteID32,WriteID64,
+                              WriteIM32,WriteIM64]>;
+
+//===----------------------------------------------------------------------===//
+// Subtarget-specific InstRWs.
+
+//---
+// Miscellaneous
+//---
+def : InstRW<[CortexA55WriteVLD1SI,CortexA55WriteLDP1], (instregex "LDPS?Wi")>;
+def : InstRW<[CortexA55WriteVLD1,CortexA55WriteLDP1], (instregex "LDPSi")>;
+def : InstRW<[CortexA55WriteVLD1,CortexA55WriteLDP2], (instregex "LDP(X|D)i")>;
+def : InstRW<[CortexA55WriteVLD1,CortexA55WriteLDP4], (instregex "LDPQi")>;
+def : InstRW<[WriteAdr, CortexA55WriteVLD1SI,CortexA55WriteLDP1], (instregex "LDPS?W(pre|post)")>;
+def : InstRW<[WriteAdr, CortexA55WriteVLD1,CortexA55WriteLDP1], (instregex "LDPS(pre|post)")>;
+def : InstRW<[WriteAdr, CortexA55WriteVLD1,CortexA55WriteLDP2], (instregex "LDP(X|D)(pre|post)")>;
+def : InstRW<[WriteAdr, CortexA55WriteVLD1,CortexA55WriteLDP4], (instregex "LDPQ(pre|post)")>;
+def : InstRW<[WriteI], (instrs COPY)>;
+//---
+// Vector Loads - 64-bit per cycle
+//---
+//   1-element structures
+def : InstRW<[CortexA55WriteVLD1], (instregex "LD1i(8|16|32|64)$")>;                // single element
+def : InstRW<[CortexA55WriteVLD1], (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; // replicate
+def : InstRW<[CortexA55WriteVLD1], (instregex "LD1Onev(8b|4h|2s|1d)$")>;
+def : InstRW<[CortexA55WriteVLD2], (instregex "LD1Onev(16b|8h|4s|2d)$")>;
+def : InstRW<[CortexA55WriteVLD2], (instregex "LD1Twov(8b|4h|2s|1d)$")>; // multiple structures
+def : InstRW<[CortexA55WriteVLD4], (instregex "LD1Twov(16b|8h|4s|2d)$")>;
+def : InstRW<[CortexA55WriteVLD3], (instregex "LD1Threev(8b|4h|2s|1d)$")>;
+def : InstRW<[CortexA55WriteVLD6], (instregex "LD1Threev(16b|8h|4s|2d)$")>;
+def : InstRW<[CortexA55WriteVLD4], (instregex "LD1Fourv(8b|4h|2s|1d)$")>;
+def : InstRW<[CortexA55WriteVLD8], (instregex "LD1Fourv(16b|8h|4s|2d)$")>;
+
+def : InstRW<[CortexA55WriteVLD1, WriteAdr], (instregex "LD1i(8|16|32|64)_POST$")>;
+def : InstRW<[CortexA55WriteVLD1, WriteAdr], (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[CortexA55WriteVLD1, WriteAdr], (instregex "LD1Onev(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[CortexA55WriteVLD2, WriteAdr], (instregex "LD1Onev(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[CortexA55WriteVLD2, WriteAdr], (instregex "LD1Twov(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[CortexA55WriteVLD4, WriteAdr], (instregex "LD1Twov(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[CortexA55WriteVLD3, WriteAdr], (instregex "LD1Threev(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[CortexA55WriteVLD6, WriteAdr], (instregex "LD1Threev(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[CortexA55WriteVLD4, WriteAdr], (instregex "LD1Fourv(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[CortexA55WriteVLD8, WriteAdr], (instregex "LD1Fourv(16b|8h|4s|2d)_POST$")>;
+
+//    2-element structures
+def : InstRW<[CortexA55WriteVLD2], (instregex "LD2i(8|16|32|64)$")>;
+def : InstRW<[CortexA55WriteVLD2], (instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[CortexA55WriteVLD2], (instregex "LD2Twov(8b|4h|2s)$")>;
+def : InstRW<[CortexA55WriteVLD4], (instregex "LD2Twov(16b|8h|4s|2d)$")>;
+
+def : InstRW<[CortexA55WriteVLD2, WriteAdr], (instregex "LD2i(8|16|32|64)(_POST)?$")>;
+def : InstRW<[CortexA55WriteVLD2, WriteAdr], (instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)(_POST)?$")>;
+def : InstRW<[CortexA55WriteVLD2, WriteAdr], (instregex "LD2Twov(8b|4h|2s)(_POST)?$")>;
+def : InstRW<[CortexA55WriteVLD4, WriteAdr], (instregex "LD2Twov(16b|8h|4s|2d)(_POST)?$")>;
+
+//    3-element structures
+def : InstRW<[CortexA55WriteVLD2], (instregex "LD3i(8|16|32|64)$")>;
+def : InstRW<[CortexA55WriteVLD2], (instregex "LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[CortexA55WriteVLD3], (instregex "LD3Threev(8b|4h|2s|1d)$")>;
+def : InstRW<[CortexA55WriteVLD6], (instregex "LD3Threev(16b|8h|4s|2d)$")>;
+
+def : InstRW<[CortexA55WriteVLD2, WriteAdr], (instregex "LD3i(8|16|32|64)_POST$")>;
+def : InstRW<[CortexA55WriteVLD2, WriteAdr], (instregex "LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[CortexA55WriteVLD3, WriteAdr], (instregex "LD3Threev(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[CortexA55WriteVLD6, WriteAdr], (instregex "LD3Threev(16b|8h|4s|2d)_POST$")>;
+
+//    4-element structures
+def : InstRW<[CortexA55WriteVLD2], (instregex "LD4i(8|16|32|64)$")>;                // load single 4-el structure to one lane of 4 regs.
+def : InstRW<[CortexA55WriteVLD2], (instregex "LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; // load single 4-el structure, replicate to all lanes of 4 regs.
+def : InstRW<[CortexA55WriteVLD4], (instregex "LD4Fourv(8b|4h|2s|1d)$")>;           // load multiple 4-el structures to 4 regs.
+def : InstRW<[CortexA55WriteVLD8], (instregex "LD4Fourv(16b|8h|4s|2d)$")>;
+
+def : InstRW<[CortexA55WriteVLD2, WriteAdr], (instregex "LD4i(8|16|32|64)_POST$")>;
+def : InstRW<[CortexA55WriteVLD2, WriteAdr], (instregex "LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[CortexA55WriteVLD4, WriteAdr], (instregex "LD4Fourv(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[CortexA55WriteVLD8, WriteAdr], (instregex "LD4Fourv(16b|8h|4s|2d)_POST$")>;
+
+//---
+// Vector Stores
+//---
+def : InstRW<[CortexA55WriteVST1], (instregex "ST1i(8|16|32|64)$")>;
+def : InstRW<[CortexA55WriteVST1], (instregex "ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[CortexA55WriteVST1], (instregex "ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[CortexA55WriteVST2], (instregex "ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[CortexA55WriteVST4], (instregex "ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[CortexA55WriteVST1, WriteAdr], (instregex "ST1i(8|16|32|64)_POST$")>;
+def : InstRW<[CortexA55WriteVST1, WriteAdr], (instregex "ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[CortexA55WriteVST1, WriteAdr], (instregex "ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[CortexA55WriteVST2, WriteAdr], (instregex "ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[CortexA55WriteVST4, WriteAdr], (instregex "ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[CortexA55WriteVST2], (instregex "ST2i(8|16|32|64)$")>;
+def : InstRW<[CortexA55WriteVST2], (instregex "ST2Twov(8b|4h|2s)$")>;
+def : InstRW<[CortexA55WriteVST4], (instregex "ST2Twov(16b|8h|4s|2d)$")>;
+def : InstRW<[CortexA55WriteVST2, WriteAdr], (instregex "ST2i(8|16|32|64)_POST$")>;
+def : InstRW<[CortexA55WriteVST2, WriteAdr], (instregex "ST2Twov(8b|4h|2s)_POST$")>;
+def : InstRW<[CortexA55WriteVST4, WriteAdr], (instregex "ST2Twov(16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[CortexA55WriteVST2], (instregex "ST3i(8|16|32|64)$")>;
+def : InstRW<[CortexA55WriteVST4], (instregex "ST3Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[CortexA55WriteVST2, WriteAdr], (instregex "ST3i(8|16|32|64)_POST$")>;
+def : InstRW<[CortexA55WriteVST4, WriteAdr], (instregex "ST3Threev(8b|4h|2s|1d|2d|16b|8h|4s|4d)_POST$")>;
+
+def : InstRW<[CortexA55WriteVST2], (instregex "ST4i(8|16|32|64)$")>;
+def : InstRW<[CortexA55WriteVST4], (instregex "ST4Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[CortexA55WriteVST2, WriteAdr], (instregex "ST4i(8|16|32|64)_POST$")>;
+def : InstRW<[CortexA55WriteVST4, WriteAdr], (instregex "ST4Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+
+//---
+// Floating Point Conversions, MAC, DIV, SQRT
+//---
+def : InstRW<[CortexA55WriteFPALU_F2], (instregex "^DUP(v2i64|v4i32|v8i16|v16i8)")>;
+def : InstRW<[CortexA55WriteFPALU_F2], (instregex "^XTN")>;
+def : InstRW<[CortexA55WriteFPALU_F3], (instregex "^FCVT[ALMNPZ][SU](S|U)?(W|X)")>;
+def : InstRW<[CortexA55WriteFPALU_F4], (instregex "^FCVT(X)?[ALMNPXZ](S|U|N)?v")>;
+
+def : InstRW<[CortexA55WriteFPALU_F4], (instregex "^(S|U)CVTF(S|U)(W|X)(H|S|D)")>;
+def : InstRW<[CortexA55WriteFPALU_F4], (instregex "^(S|U)CVTF(h|s|d)")>;
+def : InstRW<[CortexA55WriteFPALU_F4], (instregex "^(S|U)CVTFv")>;
+
+def : InstRW<[CortexA55WriteFMAC], (instregex "^FN?M(ADD|SUB).*")>;
+def : InstRW<[CortexA55WriteFMAC], (instregex "^FML(A|S).*")>;
+def : InstRW<[CortexA55WriteFDivHP], (instrs FDIVHrr)>;
+def : InstRW<[CortexA55WriteFDivSP], (instrs FDIVSrr)>;
+def : InstRW<[CortexA55WriteFDivDP], (instrs FDIVDrr)>;
+def : InstRW<[CortexA55WriteFDivHP], (instregex "^FDIVv.*16$")>;
+def : InstRW<[CortexA55WriteFDivSP], (instregex "^FDIVv.*32$")>;
+def : InstRW<[CortexA55WriteFDivDP], (instregex "^FDIVv.*64$")>;
+def : InstRW<[CortexA55WriteFSqrtHP], (instregex "^.*SQRT.*16$")>;
+def : InstRW<[CortexA55WriteFSqrtSP], (instregex "^.*SQRT.*32$")>;
+def : InstRW<[CortexA55WriteFSqrtDP], (instregex "^.*SQRT.*64$")>;
+
+}
--- a/suite/synctools/tablegen/AArch64/AArch64SchedA57.td
+++ b/suite/synctools/tablegen/AArch64/AArch64SchedA57.td
@ -1,9 +1,8 @@
 //=- AArch64SchedA57.td - ARM Cortex-A57 Scheduling Defs -----*- tablegen -*-=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@ -32,7 +31,9 @@ def CortexA57Model : SchedMachineModel {
  let LoopMicroOpBufferSize = 16;
  let CompleteModel = 1;

-  list<Predicate> UnsupportedFeatures = [HasSVE];
+  list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F,
+                                                    PAUnsupported.F,
+                                                    SMEUnsupported.F);
 }

 //===----------------------------------------------------------------------===//
@ -93,9 +94,10 @@ def : SchedAlias<WriteFCmp,  A57Write_3cyc_1V>;
 def : SchedAlias<WriteFCvt,  A57Write_5cyc_1V>;
 def : SchedAlias<WriteFCopy, A57Write_5cyc_1L>;
 def : SchedAlias<WriteFImm,  A57Write_3cyc_1V>;
-def : SchedAlias<WriteFMul,  A57Write_5cyc_1V>;
+def : WriteRes<WriteFMul, [A57UnitV]> { let Latency = 5;}
 def : SchedAlias<WriteFDiv,  A57Write_17cyc_1W>;
-def : SchedAlias<WriteV,     A57Write_3cyc_1V>;
+def : SchedAlias<WriteVd,    A57Write_3cyc_1V>;
+def : SchedAlias<WriteVq,    A57Write_3cyc_1V>;
 def : SchedAlias<WriteVLD,   A57Write_5cyc_1L>;
 def : SchedAlias<WriteVST,   A57Write_1cyc_1S>;

@ -115,6 +117,7 @@ def : ReadAdvance<ReadIM,      0>;
 def : ReadAdvance<ReadIMA,     2, [WriteIM32, WriteIM64]>;
 def : ReadAdvance<ReadID,      0>;
 def : ReadAdvance<ReadExtrHi,  0>;
+def : ReadAdvance<ReadST,      0>;
 def : ReadAdvance<ReadAdrBase, 0>;
 def : ReadAdvance<ReadVLD,     0>;

@ -350,12 +353,16 @@ def : InstRW<[A57Write_8cyc_8S, WriteAdr],      (instregex "ST4Fourv(2d)_POST$")
 //   D form - v8i8_v8i16, v4i16_v4i32, v2i32_v2i64
 //   Q form - v16i8_v8i16, v8i16_v4i32, v4i32_v2i64

+// Cortex A57 Software Optimization Guide Sec 3.14
+// Advance for absolute diff accum, pairwise add and accumulate, shift accumulate
+def A57ReadIVA3 : SchedReadAdvance<3, [A57Write_4cyc_1X_NonMul_Forward, A57Write_5cyc_2X_NonMul_Forward]>;
+
 // ASIMD absolute diff accum, D-form
-def : InstRW<[A57Write_4cyc_1X], (instregex "^[SU]ABA(v8i8|v4i16|v2i32)$")>;
+def : InstRW<[A57Write_4cyc_1X_NonMul_Forward, A57ReadIVA3], (instregex "^[SU]ABA(v8i8|v4i16|v2i32)$")>;
 // ASIMD absolute diff accum, Q-form
-def : InstRW<[A57Write_5cyc_2X], (instregex "^[SU]ABA(v16i8|v8i16|v4i32)$")>;
+def : InstRW<[A57Write_5cyc_2X_NonMul_Forward, A57ReadIVA3], (instregex "^[SU]ABA(v16i8|v8i16|v4i32)$")>;
 // ASIMD absolute diff accum long
-def : InstRW<[A57Write_4cyc_1X], (instregex "^[SU]ABAL")>;
+def : InstRW<[A57Write_4cyc_1X_NonMul_Forward, A57ReadIVA3], (instregex "^[SU]ABAL")>;

 // ASIMD arith, reduce, 4H/4S
 def : InstRW<[A57Write_4cyc_1X], (instregex "^[SU]?ADDL?V(v8i8|v4i16|v2i32)v$")>;
@ -372,32 +379,41 @@ def : InstRW<[A57Write_7cyc_1V_1X], (instregex "^[SU](MIN|MAX)V(v8i8|v8i16)v$")>
 def : InstRW<[A57Write_8cyc_2X], (instregex "^[SU](MIN|MAX)Vv16i8v$")>;

 // ASIMD multiply, D-form
-def : InstRW<[A57Write_5cyc_1W], (instregex "^(P?MUL|SQR?DMULH)(v8i8|v4i16|v2i32|v1i8|v1i16|v1i32|v1i64)(_indexed)?$")>;
+// MUL
+def : InstRW<[A57Write_5cyc_1W_Mul_Forward], (instregex "^MUL(v8i8|v4i16|v2i32|v1i8|v1i16|v1i32|v1i64)(_indexed)?$")>;
+// PMUL, SQDMULH, SQRDMULH
+def : InstRW<[A57Write_5cyc_1W], (instregex "^(PMUL|SQR?DMULH)(v8i8|v4i16|v2i32|v1i8|v1i16|v1i32|v1i64)(_indexed)?$")>;
+
 // ASIMD multiply, Q-form
-def : InstRW<[A57Write_6cyc_2W], (instregex "^(P?MUL|SQR?DMULH)(v16i8|v8i16|v4i32)(_indexed)?$")>;
+// MUL
+def : InstRW<[A57Write_6cyc_2W_Mul_Forward], (instregex "^MUL(v16i8|v8i16|v4i32)(_indexed)?$")>;
+// PMUL, SQDMULH, SQRDMULH
+def : InstRW<[A57Write_6cyc_2W], (instregex "^(PMUL|SQR?DMULH)(v16i8|v8i16|v4i32)(_indexed)?$")>;
+
+// Cortex A57 Software Optimization Guide Sec 3.14
+def A57ReadIVMA4   : SchedReadAdvance<4 , [A57Write_5cyc_1W_Mul_Forward, A57Write_6cyc_2W_Mul_Forward]>;
+def A57ReadIVMA3   : SchedReadAdvance<3 , [A57Write_5cyc_1W_Mul_Forward, A57Write_6cyc_2W_Mul_Forward]>;

 // ASIMD multiply accumulate, D-form
-def : InstRW<[A57Write_5cyc_1W], (instregex "^ML[AS](v8i8|v4i16|v2i32)(_indexed)?$")>;
+def : InstRW<[A57Write_5cyc_1W_Mul_Forward, A57ReadIVMA4], (instregex "^ML[AS](v8i8|v4i16|v2i32)(_indexed)?$")>;
 // ASIMD multiply accumulate, Q-form
-def : InstRW<[A57Write_6cyc_2W], (instregex "^ML[AS](v16i8|v8i16|v4i32)(_indexed)?$")>;
+def : InstRW<[A57Write_6cyc_2W_Mul_Forward, A57ReadIVMA4], (instregex "^ML[AS](v16i8|v8i16|v4i32)(_indexed)?$")>;

 // ASIMD multiply accumulate long
 // ASIMD multiply accumulate saturating long
-def A57WriteIVMA   : SchedWriteRes<[A57UnitW]> { let Latency = 5;  }
-def A57ReadIVMA4   : SchedReadAdvance<4, [A57WriteIVMA]>;
-def : InstRW<[A57WriteIVMA, A57ReadIVMA4], (instregex "^(S|U|SQD)ML[AS]L")>;
+def : InstRW<[A57Write_5cyc_1W_Mul_Forward, A57ReadIVMA4], (instregex "^(S|U)ML[AS]L")>;
+def : InstRW<[A57Write_5cyc_1W_Mul_Forward, A57ReadIVMA3], (instregex "^SQDML[AS]L")>;

 // ASIMD multiply long
-def : InstRW<[A57Write_5cyc_1W], (instregex "^(S|U|SQD)MULL")>;
+def : InstRW<[A57Write_5cyc_1W_Mul_Forward], (instregex "^(S|U)MULL")>;
+def : InstRW<[A57Write_5cyc_1W], (instregex "^SQDMULL")>;
 def : InstRW<[A57Write_5cyc_1W], (instregex "^PMULL(v8i8|v16i8)")>;
 def : InstRW<[A57Write_3cyc_1W], (instregex "^PMULL(v1i64|v2i64)")>;

 // ASIMD pairwise add and accumulate
 // ASIMD shift accumulate
-def A57WriteIVA    : SchedWriteRes<[A57UnitX]> { let Latency = 4;  }
-def A57ReadIVA3    : SchedReadAdvance<3, [A57WriteIVA]>;
-def : InstRW<[A57WriteIVA, A57ReadIVA3], (instregex "^[SU]ADALP")>;
-def : InstRW<[A57WriteIVA, A57ReadIVA3], (instregex "^(S|SR|U|UR)SRA")>;
+def : InstRW<[A57Write_4cyc_1X_NonMul_Forward, A57ReadIVA3], (instregex "^[SU]ADALP")>;
+def : InstRW<[A57Write_4cyc_1X_NonMul_Forward, A57ReadIVA3], (instregex "^(S|SR|U|UR)SRA")>;

 // ASIMD shift by immed, complex
 def : InstRW<[A57Write_4cyc_1X], (instregex "^[SU]?(Q|R){1,2}SHR")>;
@ -474,17 +490,22 @@ def : InstRW<[A57Write_9cyc_3V], (instregex "^(FMAX|FMIN)(NM)?P(v4f32|v2f64|v2i6
 def : InstRW<[A57Write_10cyc_3V], (instregex "^(FMAX|FMIN)(NM)?Vv")>;

 // ASIMD FP multiply, D-form, FZ
-def : InstRW<[A57Write_5cyc_1V], (instregex "^FMULX?(v2f32|v1i32|v2i32|v1i64|32|64)")>;
+def : InstRW<[A57Write_5cyc_1V_FP_Forward], (instregex "^FMULX?(v2f32|v1i32|v2i32|v1i64|32|64)")>;
 // ASIMD FP multiply, Q-form, FZ
-def : InstRW<[A57Write_5cyc_2V], (instregex "^FMULX?(v4f32|v2f64|v4i32|v2i64)")>;
+def : InstRW<[A57Write_5cyc_2V_FP_Forward], (instregex "^FMULX?(v4f32|v2f64|v4i32|v2i64)")>;

 // ASIMD FP multiply accumulate, D-form, FZ
 // ASIMD FP multiply accumulate, Q-form, FZ
 def A57WriteFPVMAD : SchedWriteRes<[A57UnitV]> { let Latency = 9;  }
 def A57WriteFPVMAQ : SchedWriteRes<[A57UnitV, A57UnitV]> { let Latency = 10;  }
-def A57ReadFPVMA5  : SchedReadAdvance<5, [A57WriteFPVMAD, A57WriteFPVMAQ]>;
+
+// Cortex A57 Software Optimization Guide Sec 3.15
+// Advances from FP mul and mul-accum to mul-accum
+def A57ReadFPVMA5  : SchedReadAdvance<5, [A57WriteFPVMAD, A57WriteFPVMAQ, A57Write_5cyc_1V_FP_Forward, A57Write_5cyc_2V_FP_Forward]>;
+def A57ReadFPVMA6  : SchedReadAdvance<6, [A57WriteFPVMAD, A57WriteFPVMAQ, A57Write_5cyc_1V_FP_Forward, A57Write_5cyc_2V_FP_Forward]>;
+
 def : InstRW<[A57WriteFPVMAD, A57ReadFPVMA5], (instregex "^FML[AS](v2f32|v1i32|v2i32|v1i64)")>;
-def : InstRW<[A57WriteFPVMAQ, A57ReadFPVMA5], (instregex "^FML[AS](v4f32|v2f64|v4i32|v2i64)")>;
+def : InstRW<[A57WriteFPVMAQ, A57ReadFPVMA6], (instregex "^FML[AS](v4f32|v2f64|v4i32|v2i64)")>;

 // ASIMD FP round, D-form
 def : InstRW<[A57Write_5cyc_1V], (instregex "^FRINT[AIMNPXZ](v2f32)")>;
@ -502,10 +523,10 @@ def : InstRW<[A57Write_5cyc_2V], (instregex "^FRINT[AIMNPXZ](v4f32|v2f64)")>;
 //   Q form - v16i8, v8i16, v4i32, v2i64

 // ASIMD bitwise insert, Q-form
-def : InstRW<[A57Write_3cyc_2V], (instregex "^(BIF|BIT|BSL)v16i8")>;
+def : InstRW<[A57Write_3cyc_2V], (instregex "^(BIF|BIT|BSL|BSP)v16i8")>;

 // ASIMD duplicate, gen reg, D-form and Q-form
-def : InstRW<[A57Write_8cyc_1L_1V], (instregex "^CPY")>;
+def : InstRW<[A57Write_8cyc_1L_1V], (instregex "^DUP(i8|i16|i32|i64)$")>;
 def : InstRW<[A57Write_8cyc_1L_1V], (instregex "^DUPv.+gpr")>;

 // ASIMD move, saturating
@ -547,8 +568,9 @@ def : InstRW<[A57Write_6cyc_3V], (instregex "^(UZP|ZIP)(1|2)(v16i8|v8i16|v4i32|v

 def : InstRW<[A57Write_5cyc_1V], (instregex "^F(ADD|SUB)[DS]rr")>;

+// Cortex A57 Software Optimization Guide Sec 3.10
 def A57WriteFPMA  : SchedWriteRes<[A57UnitV]> { let Latency = 9;  }
-def A57ReadFPMA5  : SchedReadAdvance<5, [A57WriteFPMA]>;
+def A57ReadFPMA5  : SchedReadAdvance<5, [A57WriteFPMA, WriteFMul]>;
 def A57ReadFPM    : SchedReadAdvance<0>;
 def : InstRW<[A57WriteFPMA, A57ReadFPM, A57ReadFPM, A57ReadFPMA5], (instregex "^FN?M(ADD|SUB)[DS]rrr")>;

--- a/suite/synctools/tablegen/AArch64/AArch64SchedA57WriteRes.td
+++ b/suite/synctools/tablegen/AArch64/AArch64SchedA57WriteRes.td
@ -1,9 +1,8 @@
 //=- AArch64SchedA57WriteRes.td - ARM Cortex-A57 Write Res ---*- tablegen -*-=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@ -14,6 +13,10 @@
 //   Prefix: A57Write
 //   Latency: #cyc
 //   MicroOp Count/Types: #(B|I|M|L|S|X|W|V)
+//   Postfix (optional): (XYZ)_Forward
+//
+//   The postfix is added to differentiate SchedWriteRes that are used in
+//   subsequent SchedReadAdvances.
 //
 // e.g. A57Write_6cyc_1I_6S_4V means the total latency is 6 and there are
 //      11 micro-ops to be issued down one I pipe, six S pipes and four V pipes.
@ -26,7 +29,9 @@
 def A57Write_5cyc_1L  : SchedWriteRes<[A57UnitL]> { let Latency = 5;  }
 def A57Write_5cyc_1M  : SchedWriteRes<[A57UnitM]> { let Latency = 5;  }
 def A57Write_5cyc_1V  : SchedWriteRes<[A57UnitV]> { let Latency = 5;  }
+def A57Write_5cyc_1V_FP_Forward  : SchedWriteRes<[A57UnitV]> { let Latency = 5; }
 def A57Write_5cyc_1W  : SchedWriteRes<[A57UnitW]> { let Latency = 5;  }
+def A57Write_5cyc_1W_Mul_Forward  : SchedWriteRes<[A57UnitW]> { let Latency = 5;  }
 def A57Write_10cyc_1V : SchedWriteRes<[A57UnitV]> { let Latency = 10; }
 def A57Write_17cyc_1W : SchedWriteRes<[A57UnitW]> { let Latency = 17;
                                                    let ResourceCycles = [17]; }
@ -46,6 +51,7 @@ def A57Write_3cyc_1W  : SchedWriteRes<[A57UnitW]> { let Latency = 3;  }
 def A57Write_3cyc_1X  : SchedWriteRes<[A57UnitX]> { let Latency = 3;  }
 def A57Write_4cyc_1L  : SchedWriteRes<[A57UnitL]> { let Latency = 4;  }
 def A57Write_4cyc_1X  : SchedWriteRes<[A57UnitX]> { let Latency = 4;  }
+def A57Write_4cyc_1X_NonMul_Forward  : SchedWriteRes<[A57UnitX]> { let Latency = 4;  }
 def A57Write_9cyc_1V  : SchedWriteRes<[A57UnitV]> { let Latency = 9;  }
 def A57Write_6cyc_1M  : SchedWriteRes<[A57UnitM]> { let Latency = 6;  }
 def A57Write_6cyc_1V  : SchedWriteRes<[A57UnitV]> { let Latency = 6;  }
@ -94,6 +100,10 @@ def A57Write_6cyc_2W     : SchedWriteRes<[A57UnitW, A57UnitW]> {
  let Latency     = 6;
  let NumMicroOps = 2;
 }
+def A57Write_6cyc_2W_Mul_Forward     : SchedWriteRes<[A57UnitW, A57UnitW]> {
+  let Latency     = 6;
+  let NumMicroOps = 2;
+}
 def A57Write_5cyc_1I_1L  : SchedWriteRes<[A57UnitI,
                                          A57UnitL]> {
  let Latency     = 5;
@ -103,10 +113,18 @@ def A57Write_5cyc_2V     : SchedWriteRes<[A57UnitV, A57UnitV]> {
  let Latency     = 5;
  let NumMicroOps = 2;
 }
+def A57Write_5cyc_2V_FP_Forward     : SchedWriteRes<[A57UnitV, A57UnitV]> {
+  let Latency     = 5;
+  let NumMicroOps = 2;
+}
 def A57Write_5cyc_2X     : SchedWriteRes<[A57UnitX, A57UnitX]> {
  let Latency     = 5;
  let NumMicroOps = 2;
 }
+def A57Write_5cyc_2X_NonMul_Forward     : SchedWriteRes<[A57UnitX, A57UnitX]> {
+  let Latency     = 5;
+  let NumMicroOps = 2;
+}
 def A57Write_10cyc_1L_1V : SchedWriteRes<[A57UnitL,
                                          A57UnitV]> {
  let Latency     = 10;
--- a/suite/synctools/tablegen/AArch64/AArch64SchedA64FX.td
+++ b/suite/synctools/tablegen/AArch64/AArch64SchedA64FX.td
--- a/suite/synctools/tablegen/AArch64/AArch64SchedAmpere1.td
+++ b/suite/synctools/tablegen/AArch64/AArch64SchedAmpere1.td
--- a/suite/synctools/tablegen/AArch64/AArch64SchedCyclone.td
+++ b/suite/synctools/tablegen/AArch64/AArch64SchedCyclone.td
@ -1,9 +1,8 @@
 //=- AArch64SchedCyclone.td - Cyclone Scheduling Definitions -*- tablegen -*-=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@ -19,7 +18,9 @@ def CycloneModel : SchedMachineModel {
  let MispredictPenalty = 16; // 14-19 cycles are typical.
  let CompleteModel = 1;

-  list<Predicate> UnsupportedFeatures = [HasSVE];
+  list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F,
+                                                    PAUnsupported.F,
+                                                    SMEUnsupported.F);
 }

 //===----------------------------------------------------------------------===//
@ -257,6 +258,7 @@ def CyReadAdrBase : SchedReadVariant<[
  SchedVar<ScaledIdxPred, [ReadBaseRS]>, // Read base reg after shifting offset.
  SchedVar<NoSchedPred,   [ReadDefault]>]>;   // Read base reg with no shift.
 def : SchedAlias<ReadAdrBase, CyReadAdrBase>; // Map AArch64->Cyclone type.
+def : ReadAdvance<ReadST, 0>;

 //---
 // 7.8.9,7.8.11. Load/Store, paired
@ -302,7 +304,8 @@ def : WriteRes<WriteSys, []> {let Latency = -1;}
 // 7.9 Vector Unit Instructions

 // Simple vector operations take 2 cycles.
-def : WriteRes<WriteV, [CyUnitV]> {let Latency = 2;}
+def : WriteRes<WriteVd, [CyUnitV]> {let Latency = 2;}
+def : WriteRes<WriteVq, [CyUnitV]> {let Latency = 2;}

 // Define some longer latency vector op types for Cyclone.
 def CyWriteV3 : SchedWriteRes<[CyUnitV]> {let Latency = 3;}
@ -333,7 +336,7 @@ def : WriteRes<WriteFImm, [CyUnitV]> {let Latency = 2;}
 // COPY is handled above in the WriteMov Variant.
 def WriteVMov    : SchedWriteVariant<[
                     SchedVar<WriteVMovPred, [WriteX]>,
-                     SchedVar<NoSchedPred,   [WriteV]>]>;
+                     SchedVar<NoSchedPred,   [WriteVq]>]>;
 def : InstRW<[WriteVMov], (instrs ORRv16i8)>;

 // FMOVSr,FMOVDr are WriteF.
@ -353,7 +356,7 @@ def : WriteRes<WriteFCopy, [CyUnitLS]> {
 def : InstRW<[WriteLD], (instrs FMOVSWr,FMOVDXr,FMOVDXHighr)>;

 // INS V[x],R
-def CyWriteCopyToFPR : WriteSequence<[WriteVLD, WriteV]>;
+def CyWriteCopyToFPR : WriteSequence<[WriteVLD, WriteVq]>;
 def : InstRW<[CyWriteCopyToFPR], (instregex "INSv")>;

 // SMOV,UMOV R,V[x]
@ -495,7 +498,7 @@ def : InstRW<[CyWriteV3], (instregex "SQRSHLv","UQRSHLv")>;
 // WriteV includes:
 // SHLL,SSHLL,USHLL
 // SLI,SRI
-// BIF,BIT,BSL
+// BIF,BIT,BSL,BSP
 // EXT
 // CLS,CLZ,CNT,RBIT,REV16,REV32,REV64,XTN
 // XTN2
@ -569,7 +572,7 @@ def : InstRW<[WriteFRSQRTS], (instregex "FRSQRTSv")>;
 //---

 // FCVT lengthen f16/s32
-def : InstRW<[WriteV], (instrs FCVTSHr,FCVTDHr,FCVTDSr)>;
+def : InstRW<[WriteVq], (instrs FCVTSHr,FCVTDHr,FCVTDSr)>;

 // FCVT,FCVTN,FCVTXN
 // SCVTF,UCVTF V,V
@ -679,61 +682,61 @@ def : InstRW<[WriteVLDShuffle],
 def : InstRW<[WriteVLDShuffle, WriteAdr],
             (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;

-def : InstRW<[WriteVLDShuffle, WriteV],
+def : InstRW<[WriteVLDShuffle, WriteVq],
             (instregex "LD2Twov(8b|4h|2s)$")>;
-def : InstRW<[WriteVLDShuffle, WriteAdr, WriteV],
+def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVq],
             (instregex "LD2Twov(8b|4h|2s)_POST$")>;
 def : InstRW<[WriteVLDShuffle, WriteVLDShuffle],
             (instregex "LD2Twov(16b|8h|4s|2d)$")>;
 def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle],
             (instregex "LD2Twov(16b|8h|4s|2d)_POST")>;

-def : InstRW<[WriteVLDShuffle, ReadVLD, WriteV],
+def : InstRW<[WriteVLDShuffle, ReadVLD, WriteVq],
             (instregex "LD2i(8|16|32)$")>;
-def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteV],
+def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteVq],
             (instregex "LD2i(8|16|32)_POST")>;
-def : InstRW<[WriteVLDShuffle, ReadVLD, WriteV],
+def : InstRW<[WriteVLDShuffle, ReadVLD, WriteVq],
             (instregex "LD2i64$")>;
-def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteV],
+def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteVq],
             (instregex "LD2i64_POST")>;

-def : InstRW<[WriteVLDShuffle, WriteV],
+def : InstRW<[WriteVLDShuffle, WriteVq],
             (instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[WriteVLDShuffle, WriteAdr, WriteV],
+def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVq],
             (instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST")>;

-def : InstRW<[WriteVLDShuffle, WriteVLDShuffle, WriteV],
+def : InstRW<[WriteVLDShuffle, WriteVLDShuffle, WriteVq],
             (instregex "LD3Threev(8b|4h|2s)$")>;
-def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle, WriteV],
+def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle, WriteVq],
             (instregex "LD3Threev(8b|4h|2s)_POST")>;
 def : InstRW<[WriteVLDShuffle, WriteVLDShuffle, WriteVLDShuffle],
             (instregex "LD3Threev(16b|8h|4s|2d)$")>;
 def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle, WriteVLDShuffle],
             (instregex "LD3Threev(16b|8h|4s|2d)_POST")>;

-def : InstRW<[WriteVLDShuffle, ReadVLD, WriteV, WriteV],
+def : InstRW<[WriteVLDShuffle, ReadVLD, WriteVq, WriteVq],
             (instregex "LD3i(8|16|32)$")>;
-def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteV, WriteV],
+def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteVq, WriteVq],
             (instregex "LD3i(8|16|32)_POST")>;

-def : InstRW<[WriteVLDShuffle, ReadVLD, WriteVLDShuffle, WriteV],
+def : InstRW<[WriteVLDShuffle, ReadVLD, WriteVLDShuffle, WriteVq],
             (instregex "LD3i64$")>;
-def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteVLDShuffle, WriteV],
+def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteVLDShuffle, WriteVq],
             (instregex "LD3i64_POST")>;

-def : InstRW<[WriteVLDShuffle, WriteV, WriteV],
+def : InstRW<[WriteVLDShuffle, WriteVq, WriteVq],
             (instregex "LD3Rv(8b|4h|2s|16b|8h|4s)$")>;
-def : InstRW<[WriteVLDShuffle, WriteAdr, WriteV, WriteV],
+def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVq, WriteVq],
             (instregex "LD3Rv(8b|4h|2s|16b|8h|4s)_POST")>;

-def : InstRW<[WriteVLDShuffle, WriteVLDShuffle, WriteV],
+def : InstRW<[WriteVLDShuffle, WriteVLDShuffle, WriteVq],
             (instrs LD3Rv1d,LD3Rv2d)>;
-def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle, WriteV],
+def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle, WriteVq],
             (instrs LD3Rv1d_POST,LD3Rv2d_POST)>;

-def : InstRW<[WriteVLDShuffle, WriteVLDShuffle, WriteV, WriteV],
+def : InstRW<[WriteVLDShuffle, WriteVLDShuffle, WriteVq, WriteVq],
             (instregex "LD4Fourv(8b|4h|2s)$")>;
-def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle, WriteV, WriteV],
+def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle, WriteVq, WriteVq],
             (instregex "LD4Fourv(8b|4h|2s)_POST")>;
 def : InstRW<[WriteVLDPairShuffle, WriteVLDPairShuffle,
              WriteVLDPairShuffle, WriteVLDPairShuffle],
@ -742,25 +745,25 @@ def : InstRW<[WriteVLDPairShuffle, WriteAdr, WriteVLDPairShuffle,
              WriteVLDPairShuffle, WriteVLDPairShuffle],
             (instregex "LD4Fourv(16b|8h|4s|2d)_POST")>;

-def : InstRW<[WriteVLDShuffle, ReadVLD, WriteV, WriteV, WriteV],
+def : InstRW<[WriteVLDShuffle, ReadVLD, WriteVq, WriteVq, WriteVq],
             (instregex "LD4i(8|16|32)$")>;
-def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteV, WriteV, WriteV],
+def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteVq, WriteVq, WriteVq],
             (instregex "LD4i(8|16|32)_POST")>;


-def : InstRW<[WriteVLDShuffle, ReadVLD, WriteVLDShuffle, WriteV, WriteV],
+def : InstRW<[WriteVLDShuffle, ReadVLD, WriteVLDShuffle, WriteVq, WriteVq],
             (instrs LD4i64)>;
-def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteVLDShuffle, WriteV],
+def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteVLDShuffle, WriteVq],
             (instrs LD4i64_POST)>;

-def : InstRW<[WriteVLDShuffle, WriteV, WriteV, WriteV],
+def : InstRW<[WriteVLDShuffle, WriteVq, WriteVq, WriteVq],
             (instregex "LD4Rv(8b|4h|2s|16b|8h|4s)$")>;
-def : InstRW<[WriteVLDShuffle, WriteAdr, WriteV, WriteV, WriteV],
+def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVq, WriteVq, WriteVq],
             (instregex "LD4Rv(8b|4h|2s|16b|8h|4s)_POST")>;

-def : InstRW<[WriteVLDShuffle, WriteVLDShuffle, WriteV, WriteV],
+def : InstRW<[WriteVLDShuffle, WriteVLDShuffle, WriteVq, WriteVq],
             (instrs LD4Rv1d,LD4Rv2d)>;
-def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle, WriteV, WriteV],
+def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle, WriteVq, WriteVq],
             (instrs LD4Rv1d_POST,LD4Rv2d_POST)>;

 //---
--- a/suite/synctools/tablegen/AArch64/AArch64SchedExynosM1.td
+++ b/suite/synctools/tablegen/AArch64/AArch64SchedExynosM1.td
@ -1,847 +0,0 @@
-//=- AArch64SchedExynosM1.td - Samsung Exynos M1 Sched Defs --*- tablegen -*-=//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the machine model for the Samsung Exynos M1 to support
-// instruction scheduling and other instruction cost heuristics.
-//
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// The Exynos-M1 is a traditional superscalar microprocessor with a
-// 4-wide in-order stage for decode and dispatch and a wider issue stage.
-// The execution units and loads and stores are out-of-order.
-
-def ExynosM1Model : SchedMachineModel {
-  let IssueWidth            =  4; // Up to 4 uops per cycle.
-  let MicroOpBufferSize     = 96; // ROB size.
-  let LoopMicroOpBufferSize = 24; // Based on the instruction queue size.
-  let LoadLatency           =  4; // Optimistic load cases.
-  let MispredictPenalty     = 14; // Minimum branch misprediction penalty.
-  let CompleteModel         =  1; // Use the default model otherwise.
-
-  list<Predicate> UnsupportedFeatures = [HasSVE];
-}
-
-//===----------------------------------------------------------------------===//
-// Define each kind of processor resource and number available on the Exynos-M1,
-// which has 9 pipelines, each with its own queue with out-of-order dispatch.
-
-let SchedModel = ExynosM1Model in {
-
-def M1UnitA  : ProcResource<2>; // Simple integer
-def M1UnitC  : ProcResource<1>; // Simple and complex integer
-def M1UnitD  : ProcResource<1>; // Integer division (inside C, serialized)
-def M1UnitB  : ProcResource<2>; // Branch
-def M1UnitL  : ProcResource<1>; // Load
-def M1UnitS  : ProcResource<1>; // Store
-def M1PipeF0 : ProcResource<1>; // FP #0
-let Super = M1PipeF0 in {
-  def M1UnitFMAC   : ProcResource<1>; // FP multiplication
-  def M1UnitNAL0   : ProcResource<1>; // Simple vector
-  def M1UnitNMISC  : ProcResource<1>; // Miscellanea
-  def M1UnitFCVT   : ProcResource<1>; // FP conversion
-  def M1UnitNCRYPT : ProcResource<1>; // Cryptographic
-}
-def M1PipeF1 : ProcResource<1>; // FP #1
-let Super = M1PipeF1 in {
-  def M1UnitFADD : ProcResource<1>; // Simple FP
-  def M1UnitNAL1 : ProcResource<1>; // Simple vector
-  def M1UnitFVAR : ProcResource<1>; // FP division & square root (serialized)
-  def M1UnitFST  : ProcResource<1>; // FP store
-}
-
-def M1UnitALU  : ProcResGroup<[M1UnitA,
-                               M1UnitC]>;    // All integer
-def M1UnitNALU : ProcResGroup<[M1UnitNAL0,
-                               M1UnitNAL1]>; // All simple vector
-
-//===----------------------------------------------------------------------===//
-// Predicates.
-
-def M1BranchLinkFastPred : SchedPredicate<[{MI->getOpcode() == AArch64::BLR &&
-                                            MI->getOperand(0).getReg() != AArch64::LR}]>;
-def M1ShiftLeftFastPred  : SchedPredicate<[{TII->isExynosShiftLeftFast(*MI)}]>;
-
-//===----------------------------------------------------------------------===//
-// Coarse scheduling model.
-
-def M1WriteA1 : SchedWriteRes<[M1UnitALU]> { let Latency = 1; }
-def M1WriteA2 : SchedWriteRes<[M1UnitALU]> { let Latency = 2; }
-def M1WriteAA : SchedWriteRes<[M1UnitALU]> { let Latency = 2;
-                                             let ResourceCycles = [2]; }
-def M1WriteAB : SchedWriteRes<[M1UnitALU,
-                               M1UnitC]>   { let Latency = 1;
-                                             let NumMicroOps = 2; }
-def M1WriteAC : SchedWriteRes<[M1UnitALU,
-                               M1UnitALU,
-                               M1UnitC]>   { let Latency = 2;
-                                             let NumMicroOps = 3; }
-def M1WriteAD : SchedWriteRes<[M1UnitALU,
-                               M1UnitC]>   { let Latency = 2;
-                                             let NumMicroOps = 2; }
-def M1WriteAX : SchedWriteVariant<[SchedVar<M1ShiftLeftFastPred, [M1WriteA1]>,
-                                   SchedVar<NoSchedPred,         [M1WriteAA]>]>;
-def M1WriteC1 : SchedWriteRes<[M1UnitC]>   { let Latency = 1; }
-def M1WriteC2 : SchedWriteRes<[M1UnitC]>   { let Latency = 2; }
-
-def M1WriteB1 : SchedWriteRes<[M1UnitB]> { let Latency = 1; }
-def M1WriteBX : SchedWriteVariant<[SchedVar<M1BranchLinkFastPred, [M1WriteAB]>,
-                                   SchedVar<NoSchedPred,          [M1WriteAC]>]>;
-
-def M1WriteL5 : SchedWriteRes<[M1UnitL]> { let Latency = 5; }
-def M1WriteL6 : SchedWriteRes<[M1UnitL]> { let Latency = 6; }
-def M1WriteLA : SchedWriteRes<[M1UnitL]> { let Latency = 6;
-                                           let ResourceCycles = [2]; }
-def M1WriteLB : SchedWriteRes<[M1UnitL,
-                               M1UnitA]> { let Latency = 4;
-                                           let NumMicroOps = 2; }
-def M1WriteLC : SchedWriteRes<[M1UnitL,
-                               M1UnitA]> { let Latency = 5;
-                                           let NumMicroOps = 2; }
-def M1WriteLD : SchedWriteRes<[M1UnitL,
-                               M1UnitA]> { let Latency = 6;
-                                           let NumMicroOps = 2;
-                                           let ResourceCycles = [2, 1]; }
-def M1WriteLH : SchedWriteRes<[]>        { let Latency = 5;
-                                           let NumMicroOps = 0; }
-def M1WriteLX : SchedWriteVariant<[SchedVar<M1ShiftLeftFastPred, [M1WriteL5]>,
-                                   SchedVar<NoSchedPred,         [M1WriteLC]>]>;
-def M1WriteLY : SchedWriteVariant<[SchedVar<M1ShiftLeftFastPred, [M1WriteL5]>,
-                                   SchedVar<NoSchedPred,         [M1WriteLD]>]>;
-
-def M1WriteS1 : SchedWriteRes<[M1UnitS]>   { let Latency = 1; }
-def M1WriteS3 : SchedWriteRes<[M1UnitS]>   { let Latency = 3; }
-def M1WriteS4 : SchedWriteRes<[M1UnitS]>   { let Latency = 4; }
-def M1WriteSA : SchedWriteRes<[M1UnitS,
-                               M1UnitFST,
-                               M1UnitS,
-                               M1UnitFST]> { let Latency = 1;
-                                             let NumMicroOps = 2; }
-def M1WriteSB : SchedWriteRes<[M1UnitS,
-                               M1UnitFST,
-                               M1UnitA]>   { let Latency = 3;
-                                             let NumMicroOps = 2; }
-def M1WriteSC : SchedWriteRes<[M1UnitS,
-                               M1UnitFST,
-                               M1UnitS,
-                               M1UnitFST,
-                               M1UnitA]>   { let Latency = 3;
-                                             let NumMicroOps = 3; }
-def M1WriteSD : SchedWriteRes<[M1UnitS,
-                               M1UnitFST,
-                               M1UnitA]>   { let Latency = 1;
-                                             let NumMicroOps = 2; }
-def M1WriteSE : SchedWriteRes<[M1UnitS,
-                               M1UnitA]>   { let Latency = 2;
-                                             let NumMicroOps = 2; }
-def M1WriteSX : SchedWriteVariant<[SchedVar<M1ShiftLeftFastPred, [M1WriteS1]>,
-                                   SchedVar<NoSchedPred,         [M1WriteSE]>]>;
-def M1WriteSY : SchedWriteVariant<[SchedVar<M1ShiftLeftFastPred, [M1WriteS1]>,
-                                   SchedVar<NoSchedPred,         [M1WriteSB]>]>;
-
-def M1ReadAdrBase : SchedReadVariant<[SchedVar<ScaledIdxPred, [ReadDefault]>,
-                                      SchedVar<NoSchedPred,   [ReadDefault]>]>;
-
-// Branch instructions.
-def : WriteRes<WriteBr,    []>        { let Latency = 0; }
-def : WriteRes<WriteBrReg, [M1UnitC]> { let Latency = 1; }
-
-// Arithmetic and logical integer instructions.
-def : WriteRes<WriteI,     [M1UnitALU]> { let Latency = 1; }
-def : WriteRes<WriteISReg, [M1UnitALU]> { let Latency = 1; }
-def : WriteRes<WriteIEReg, [M1UnitALU]> { let Latency = 1; }
-def : WriteRes<WriteIS,    [M1UnitALU]> { let Latency = 1; }
-
-// Move instructions.
-def : WriteRes<WriteImm, [M1UnitALU]> { let Latency = 1; }
-
-// Divide and multiply instructions.
-def : WriteRes<WriteID32, [M1UnitC,
-                           M1UnitD]> { let Latency = 13;
-                                       let ResourceCycles = [1, 13]; }
-def : WriteRes<WriteID64, [M1UnitC,
-                           M1UnitD]> { let Latency = 21;
-                                       let ResourceCycles = [1, 21]; }
-// TODO: Long multiplication take 5 cycles and also the ALU.
-def : WriteRes<WriteIM32, [M1UnitC]> { let Latency = 3; }
-def : WriteRes<WriteIM64, [M1UnitC]> { let Latency = 4;
-                                       let ResourceCycles = [2]; }
-
-// Miscellaneous instructions.
-def : WriteRes<WriteExtr, [M1UnitALU,
-                           M1UnitALU]> { let Latency = 2;
-                                         let NumMicroOps = 2; }
-
-// Addressing modes.
-def : WriteRes<WriteAdr, []> { let Latency = 1;
-                               let NumMicroOps = 0; }
-def : SchedAlias<ReadAdrBase, M1ReadAdrBase>;
-
-// Load instructions.
-def : WriteRes<WriteLD,    [M1UnitL]>   { let Latency = 4; }
-def : WriteRes<WriteLDHi,  []>          { let Latency = 4;
-                                          let NumMicroOps = 0; }
-def : SchedAlias<WriteLDIdx, M1WriteLX>;
-
-// Store instructions.
-def : WriteRes<WriteST,    [M1UnitS]> { let Latency = 1; }
-def : WriteRes<WriteSTP,   [M1UnitS]> { let Latency = 1; }
-def : WriteRes<WriteSTX,   [M1UnitS]> { let Latency = 1; }
-def : SchedAlias<WriteSTIdx, M1WriteSX>;
-
-// FP data instructions.
-def : WriteRes<WriteF,    [M1UnitFADD]>  { let Latency = 3; }
-def : WriteRes<WriteFCmp, [M1UnitNMISC]> { let Latency = 4; }
-def : WriteRes<WriteFDiv, [M1UnitFVAR]>  { let Latency = 15;
-                                           let ResourceCycles = [15]; }
-def : WriteRes<WriteFMul, [M1UnitFMAC]>  { let Latency = 4; }
-
-// FP miscellaneous instructions.
-def : WriteRes<WriteFCvt,  [M1UnitFCVT]> { let Latency = 3; }
-def : WriteRes<WriteFImm,  [M1UnitNALU]> { let Latency = 1; }
-def : WriteRes<WriteFCopy, [M1UnitS]>    { let Latency = 4; }
-
-// FP load instructions.
-def : WriteRes<WriteVLD,   [M1UnitL]> { let Latency = 5; }
-
-// FP store instructions.
-def : WriteRes<WriteVST, [M1UnitS,
-                          M1UnitFST]> { let Latency = 1;
-                                        let NumMicroOps = 1; }
-
-// ASIMD FP instructions.
-def : WriteRes<WriteV, [M1UnitFADD]> { let Latency = 3; }
-
-// Other miscellaneous instructions.
-def : WriteRes<WriteAtomic,  []> { let Unsupported = 1; }
-def : WriteRes<WriteBarrier, []> { let Latency = 1; }
-def : WriteRes<WriteHint,    []> { let Latency = 1; }
-def : WriteRes<WriteSys,     []> { let Latency = 1; }
-
-//===----------------------------------------------------------------------===//
-// Fast forwarding.
-
-// TODO: Add FP register forwarding rules.
-def : ReadAdvance<ReadI,       0>;
-def : ReadAdvance<ReadISReg,   0>;
-def : ReadAdvance<ReadIEReg,   0>;
-def : ReadAdvance<ReadIM,      0>;
-// TODO: The forwarding for WriteIM32 saves actually 2 cycles.
-def : ReadAdvance<ReadIMA,     3, [WriteIM32, WriteIM64]>;
-def : ReadAdvance<ReadID,      0>;
-def : ReadAdvance<ReadExtrHi,  0>;
-def : ReadAdvance<ReadAdrBase, 0>;
-def : ReadAdvance<ReadVLD,     0>;
-
-//===----------------------------------------------------------------------===//
-// Finer scheduling model.
-
-def M1WriteNEONA   : SchedWriteRes<[M1UnitNALU,
-                                    M1UnitNALU,
-                                    M1UnitFADD]>   { let Latency = 9;
-                                                     let NumMicroOps = 3; }
-def M1WriteNEONB   : SchedWriteRes<[M1UnitNALU,
-                                    M1UnitFST]>    { let Latency = 5;
-                                                     let NumMicroOps = 2;}
-def M1WriteNEONC   : SchedWriteRes<[M1UnitNALU,
-                                    M1UnitFST]>    { let Latency = 6;
-                                                     let NumMicroOps = 2; }
-def M1WriteNEOND   : SchedWriteRes<[M1UnitNALU,
-                                    M1UnitFST,
-                                    M1UnitL]>      { let Latency = 10;
-                                                     let NumMicroOps = 3; }
-def M1WriteNEONE   : SchedWriteRes<[M1UnitFCVT,
-                                    M1UnitFST]>    { let Latency = 8;
-                                                     let NumMicroOps = 2; }
-def M1WriteNEONF   : SchedWriteRes<[M1UnitFCVT,
-                                    M1UnitFST,
-                                    M1UnitL]>      { let Latency = 13;
-                                                     let NumMicroOps = 3; }
-def M1WriteNEONG   : SchedWriteRes<[M1UnitNMISC,
-                                    M1UnitFST]>    { let Latency = 6;
-                                                     let NumMicroOps = 2; }
-def M1WriteNEONH   : SchedWriteRes<[M1UnitNALU,
-                                    M1UnitFST]>    { let Latency = 3;
-                                                     let NumMicroOps = 2; }
-def M1WriteNEONI   : SchedWriteRes<[M1UnitFST,
-                                    M1UnitL]>      { let Latency = 9;
-                                                     let NumMicroOps = 2; }
-def M1WriteNEONJ   : SchedWriteRes<[M1UnitNMISC,
-                                    M1UnitFMAC]>   { let Latency = 6;
-                                                     let NumMicroOps = 2; }
-def M1WriteNEONK   : SchedWriteRes<[M1UnitNMISC,
-                                    M1UnitFMAC]>   { let Latency = 7;
-                                                     let NumMicroOps = 2; }
-def M1WriteNEONL   : SchedWriteRes<[M1UnitNALU]>   { let Latency = 2;
-                                                     let ResourceCycles = [2]; }
-def M1WriteFADD3   : SchedWriteRes<[M1UnitFADD]>   { let Latency = 3; }
-def M1WriteFCVT3   : SchedWriteRes<[M1UnitFCVT]>   { let Latency = 3; }
-def M1WriteFCVT4   : SchedWriteRes<[M1UnitFCVT]>   { let Latency = 4; }
-def M1WriteFMAC4   : SchedWriteRes<[M1UnitFMAC]>   { let Latency = 4; }
-def M1WriteFMAC5   : SchedWriteRes<[M1UnitFMAC]>   { let Latency = 5; }
-// TODO
-def M1WriteFVAR15  : SchedWriteRes<[M1UnitFVAR]>   { let Latency = 15;
-                                                     let ResourceCycles = [15]; }
-def M1WriteFVAR23  : SchedWriteRes<[M1UnitFVAR]>   { let Latency = 23;
-                                                     let ResourceCycles = [23]; }
-def M1WriteNALU1   : SchedWriteRes<[M1UnitNALU]>   { let Latency = 1; }
-def M1WriteNALU2   : SchedWriteRes<[M1UnitNALU]>   { let Latency = 2; }
-def M1WriteNAL11   : SchedWriteRes<[M1UnitNAL1]>   { let Latency = 1; }
-def M1WriteNAL12   : SchedWriteRes<[M1UnitNAL1]>   { let Latency = 2; }
-def M1WriteNAL13   : SchedWriteRes<[M1UnitNAL1]>   { let Latency = 3; }
-def M1WriteNCRYPT1 : SchedWriteRes<[M1UnitNCRYPT]> { let Latency = 1; }
-def M1WriteNCRYPT5 : SchedWriteRes<[M1UnitNCRYPT]> { let Latency = 5; }
-def M1WriteNMISC1  : SchedWriteRes<[M1UnitNMISC]>  { let Latency = 1; }
-def M1WriteNMISC2  : SchedWriteRes<[M1UnitNMISC]>  { let Latency = 2; }
-def M1WriteNMISC3  : SchedWriteRes<[M1UnitNMISC]>  { let Latency = 3; }
-def M1WriteNMISC4  : SchedWriteRes<[M1UnitNMISC]>  { let Latency = 4; }
-def M1WriteTB      : SchedWriteRes<[M1UnitC,
-                                    M1UnitALU]>    { let Latency = 2;
-                                                     let NumMicroOps = 2; }
-def M1WriteVLDA    : SchedWriteRes<[M1UnitL,
-                                    M1UnitL]>      { let Latency = 6;
-                                                     let NumMicroOps = 2; }
-def M1WriteVLDB    : SchedWriteRes<[M1UnitL,
-                                    M1UnitL,
-                                    M1UnitL]>      { let Latency = 7;
-                                                     let NumMicroOps = 3; }
-def M1WriteVLDC    : SchedWriteRes<[M1UnitL,
-                                    M1UnitL,
-                                    M1UnitL,
-                                    M1UnitL]>      { let Latency = 8;
-                                                     let NumMicroOps = 4; }
-def M1WriteVLDD    : SchedWriteRes<[M1UnitL,
-                                    M1UnitNALU]>   { let Latency = 7;
-                                                     let NumMicroOps = 2;
-                                                     let ResourceCycles = [2, 1]; }
-def M1WriteVLDE    : SchedWriteRes<[M1UnitL,
-                                    M1UnitNALU]>   { let Latency = 6;
-                                                     let NumMicroOps = 2; }
-def M1WriteVLDF    : SchedWriteRes<[M1UnitL,
-                                    M1UnitL]>      { let Latency = 10;
-                                                     let NumMicroOps = 2;
-                                                     let ResourceCycles = [1, 1]; }
-def M1WriteVLDG    : SchedWriteRes<[M1UnitL,
-                                    M1UnitNALU,
-                                    M1UnitNALU]>   { let Latency = 7;
-                                                     let NumMicroOps = 3;
-                                                     let ResourceCycles = [2, 1, 1]; }
-def M1WriteVLDH    : SchedWriteRes<[M1UnitL,
-                                    M1UnitNALU,
-                                    M1UnitNALU]>   { let Latency = 6;
-                                                     let NumMicroOps = 3; }
-def M1WriteVLDI    : SchedWriteRes<[M1UnitL,
-                                    M1UnitL,
-                                    M1UnitL]>      { let Latency = 12;
-                                                     let NumMicroOps = 3;
-                                                     let ResourceCycles = [2, 2, 2]; }
-def M1WriteVLDJ    : SchedWriteRes<[M1UnitL,
-                                    M1UnitNALU,
-                                    M1UnitNALU,
-                                    M1UnitNALU]>   { let Latency = 9;
-                                                     let NumMicroOps = 4;
-                                                     let ResourceCycles = [2, 1, 1, 1]; }
-def M1WriteVLDK    : SchedWriteRes<[M1UnitL,
-                                    M1UnitNALU,
-                                    M1UnitNALU,
-                                    M1UnitNALU,
-                                    M1UnitNALU]>   { let Latency = 9;
-                                                     let NumMicroOps = 5;
-                                                     let ResourceCycles = [2, 1, 1, 1, 1]; }
-def M1WriteVLDL    : SchedWriteRes<[M1UnitL,
-                                    M1UnitNALU,
-                                    M1UnitNALU,
-                                    M1UnitL,
-                                    M1UnitNALU]>   { let Latency = 7;
-                                                     let NumMicroOps = 5;
-                                                     let ResourceCycles = [1, 1, 1, 1, 1]; }
-def M1WriteVLDM    : SchedWriteRes<[M1UnitL,
-                                    M1UnitNALU,
-                                    M1UnitNALU,
-                                    M1UnitL,
-                                    M1UnitNALU,
-                                    M1UnitNALU]>   { let Latency = 7;
-                                                     let NumMicroOps = 6;
-                                                     let ResourceCycles = [1, 1, 1, 1, 1, 1]; }
-def M1WriteVLDN    : SchedWriteRes<[M1UnitL,
-                                    M1UnitL,
-                                    M1UnitL,
-                                    M1UnitL]>      { let Latency = 14;
-                                                     let NumMicroOps = 4;
-                                                     let ResourceCycles = [2, 1, 2, 1]; }
-def M1WriteVSTA    : WriteSequence<[WriteVST], 2>;
-def M1WriteVSTB    : WriteSequence<[WriteVST], 3>;
-def M1WriteVSTC    : WriteSequence<[WriteVST], 4>;
-def M1WriteVSTD    : SchedWriteRes<[M1UnitS,
-                                    M1UnitFST,
-                                    M1UnitFST]>    { let Latency = 7;
-                                                     let NumMicroOps = 2;
-                                                     let ResourceCycles = [7, 1, 1]; }
-def M1WriteVSTE    : SchedWriteRes<[M1UnitS,
-                                    M1UnitFST,
-                                    M1UnitS,
-                                    M1UnitFST,
-                                    M1UnitFST]>    { let Latency = 8;
-                                                     let NumMicroOps = 3;
-                                                     let ResourceCycles = [7, 1, 1, 1, 1]; }
-def M1WriteVSTF    : SchedWriteRes<[M1UnitNALU,
-                                    M1UnitS,
-                                    M1UnitFST,
-                                    M1UnitS,
-                                    M1UnitFST,
-                                    M1UnitFST,
-                                    M1UnitFST]>     { let Latency = 15;
-                                                      let NumMicroOps = 5;
-                                                      let ResourceCycles = [1, 7, 1, 7, 1, 1, 1]; }
-def M1WriteVSTG    : SchedWriteRes<[M1UnitNALU,
-                                    M1UnitS,
-                                    M1UnitFST,
-                                    M1UnitS,
-                                    M1UnitFST,
-                                    M1UnitS,
-                                    M1UnitFST,
-                                    M1UnitFST,
-                                    M1UnitFST]>     { let Latency = 16;
-                                                      let NumMicroOps = 6;
-                                                      let ResourceCycles = [1, 7, 1, 7, 1, 1, 1, 1, 1]; }
-def M1WriteVSTH    : SchedWriteRes<[M1UnitNALU,
-                                    M1UnitS,
-                                    M1UnitFST,
-                                    M1UnitFST,
-                                    M1UnitFST]>      { let Latency = 14;
-                                                       let NumMicroOps = 4;
-                                                       let ResourceCycles = [1, 7, 1, 7, 1]; }
-def M1WriteVSTI    : SchedWriteRes<[M1UnitNALU,
-                                    M1UnitS,
-                                    M1UnitFST,
-                                    M1UnitS,
-                                    M1UnitFST,
-                                    M1UnitS,
-                                    M1UnitFST,
-                                    M1UnitS,
-                                    M1UnitFST,
-                                    M1UnitFST,
-                                    M1UnitFST]>      { let Latency = 17;
-                                                       let NumMicroOps = 7;
-                                                       let ResourceCycles = [1, 7, 1, 7, 1, 1, 1, 1, 1, 1, 1]; }
-
-// Branch instructions
-def : InstRW<[M1WriteB1], (instrs Bcc)>;
-def : InstRW<[M1WriteA1], (instrs BL)>;
-def : InstRW<[M1WriteBX], (instrs BLR)>;
-def : InstRW<[M1WriteC1], (instregex "^CBN?Z[WX]")>;
-def : InstRW<[M1WriteAD], (instregex "^TBN?Z[WX]")>;
-
-// Arithmetic and logical integer instructions.
-def : InstRW<[M1WriteA1], (instrs COPY)>;
-def : InstRW<[M1WriteAX], (instregex ".+r[sx](64)?$")>;
-
-// Divide and multiply instructions.
-
-// Miscellaneous instructions.
-
-// Load instructions.
-def : InstRW<[M1WriteLB,
-              WriteLDHi,
-              WriteAdr],    (instregex "^LDP(SW|W|X)(post|pre)")>;
-def : InstRW<[M1WriteLX,
-              ReadAdrBase], (instregex "^PRFMro[WX]")>;
-
-// Store instructions.
-
-// FP data instructions.
-def : InstRW<[M1WriteNALU1],  (instregex "^F(ABS|NEG)[DS]r")>;
-def : InstRW<[M1WriteFADD3],  (instregex "^F(ADD|SUB)[DS]rr")>;
-def : InstRW<[M1WriteNEONG],  (instregex "^FCCMPE?[DS]rr")>;
-def : InstRW<[M1WriteNMISC4], (instregex "^FCMPE?[DS]r")>;
-def : InstRW<[M1WriteFVAR15], (instrs FDIVSrr)>;
-def : InstRW<[M1WriteFVAR23], (instrs FDIVDrr)>;
-def : InstRW<[M1WriteNMISC2], (instregex "^F(MAX|MIN).+rr")>;
-def : InstRW<[M1WriteFMAC4],  (instregex "^FN?MUL[DS]rr")>;
-def : InstRW<[M1WriteFMAC5],  (instregex "^FN?M(ADD|SUB)[DS]rrr")>;
-def : InstRW<[M1WriteFCVT3],  (instregex "^FRINT.+r")>;
-def : InstRW<[M1WriteNEONH],  (instregex "^FCSEL[DS]rrr")>;
-def : InstRW<[M1WriteFVAR15], (instrs FSQRTSr)>;
-def : InstRW<[M1WriteFVAR23], (instrs FSQRTDr)>;
-
-// FP miscellaneous instructions.
-def : InstRW<[M1WriteFCVT3],  (instregex "^FCVT[DS][DS]r")>;
-def : InstRW<[M1WriteNEONF],  (instregex "^[FSU]CVT[AMNPZ][SU](_Int)?[SU]?[XW]?[DS]?[rds]i?")>;
-def : InstRW<[M1WriteNEONE],  (instregex "^[SU]CVTF[SU]")>;
-def : InstRW<[M1WriteNALU1],  (instregex "^FMOV[DS][ir]")>;
-def : InstRW<[M1WriteFCVT4],  (instregex "^[FU](RECP|RSQRT)Ev1")>;
-def : InstRW<[M1WriteNMISC1], (instregex "^FRECPXv1")>;
-def : InstRW<[M1WriteFMAC5],  (instregex "^F(RECP|RSQRT)S(16|32|64)")>;
-def : InstRW<[M1WriteS4],     (instregex "^FMOV[WX][DS](High)?r")>;
-def : InstRW<[M1WriteNEONI],  (instregex "^FMOV[DS][WX](High)?r")>;
-
-// FP load instructions.
-def : InstRW<[WriteVLD],    (instregex "^LDR[DSQ]l")>;
-def : InstRW<[WriteVLD],    (instregex "^LDUR[BDHSQ]i")>;
-def : InstRW<[WriteVLD,
-              WriteAdr],    (instregex "^LDR[BDHSQ](post|pre)")>;
-def : InstRW<[WriteVLD],    (instregex "^LDR[BDHSQ]ui")>;
-def : InstRW<[M1WriteLY,
-              ReadAdrBase], (instregex "^LDR[BDHS]ro[WX]")>;
-def : InstRW<[M1WriteLD,
-              ReadAdrBase], (instregex "^LDRQro[WX]")>;
-def : InstRW<[WriteVLD,
-              M1WriteLH],   (instregex "^LDN?P[DS]i")>;
-def : InstRW<[M1WriteLA,
-              M1WriteLH],   (instregex "^LDN?PQi")>;
-def : InstRW<[M1WriteLC,
-              M1WriteLH,
-              WriteAdr],    (instregex "^LDP[DS](post|pre)")>;
-def : InstRW<[M1WriteLD,
-              M1WriteLH,
-              WriteAdr],    (instregex "^LDPQ(post|pre)")>;
-
-// FP store instructions.
-def : InstRW<[WriteVST],    (instregex "^STUR[BDHSQ]i")>;
-def : InstRW<[WriteVST,
-              WriteAdr],    (instregex "^STR[BDHSQ](post|pre)")>;
-def : InstRW<[WriteVST],    (instregex "^STR[BDHSQ]ui")>;
-def : InstRW<[M1WriteSY,
-              ReadAdrBase], (instregex "^STR[BDHS]ro[WX]")>;
-def : InstRW<[M1WriteSB,
-              ReadAdrBase], (instregex "^STRQro[WX]")>;
-def : InstRW<[WriteVST],    (instregex "^STN?P[DSQ]i")>;
-def : InstRW<[WriteVST,
-              WriteAdr],    (instregex "^STP[DS](post|pre)")>;
-def : InstRW<[M1WriteSC,
-              WriteAdr],    (instregex "^STPQ(post|pre)")>;
-
-// ASIMD instructions.
-def : InstRW<[M1WriteNMISC3], (instregex "^[SU]ABAL?v")>;
-def : InstRW<[M1WriteNMISC1], (instregex "^[SU]ABDL?v")>;
-def : InstRW<[M1WriteNMISC1], (instregex "^(SQ)?ABSv")>;
-def : InstRW<[M1WriteNMISC1], (instregex "^SQNEGv")>;
-def : InstRW<[M1WriteNALU1],  (instregex "^(ADD|NEG|SUB)v")>;
-def : InstRW<[M1WriteNMISC3], (instregex "^[SU]?H(ADD|SUB)v")>;
-def : InstRW<[M1WriteNMISC3], (instregex "^[SU]?AD[AD](L|LP|P|W)V?2?v")>;
-def : InstRW<[M1WriteNMISC3], (instregex "^[SU]?SUB[LW]2?v")>;
-def : InstRW<[M1WriteNMISC3], (instregex "^R?(ADD|SUB)HN?2?v")>;
-def : InstRW<[M1WriteNMISC3], (instregex "^[SU]+Q(ADD|SUB)v")>;
-def : InstRW<[M1WriteNMISC3], (instregex "^[SU]RHADDv")>;
-def : InstRW<[M1WriteNMISC1], (instregex "^CM(EQ|GE|GT|HI|HS|LE|LT)v")>;
-def : InstRW<[M1WriteNALU1],  (instregex "^CMTSTv")>;
-def : InstRW<[M1WriteNALU1],  (instregex "^(AND|BIC|EOR|MVNI|NOT|ORN|ORR)v")>;
-def : InstRW<[M1WriteNMISC1], (instregex "^[SU](MIN|MAX)v")>;
-def : InstRW<[M1WriteNMISC2], (instregex "^[SU](MIN|MAX)Pv")>;
-def : InstRW<[M1WriteNMISC3], (instregex "^[SU](MIN|MAX)Vv")>;
-def : InstRW<[M1WriteNMISC4], (instregex "^(MUL|SQR?DMULH)v")>;
-def : InstRW<[M1WriteNMISC4], (instregex "^ML[AS]v")>;
-def : InstRW<[M1WriteNMISC4], (instregex "^(S|U|SQD|SQRD)ML[AS][HL]v")>;
-def : InstRW<[M1WriteNMISC4], (instregex "^(S|U|SQD)MULLv")>;
-def : InstRW<[M1WriteNAL13],  (instregex "^(S|SR|U|UR)SRAv")>;
-def : InstRW<[M1WriteNALU1],  (instregex "^SHL[dv]")>;
-def : InstRW<[M1WriteNALU1],  (instregex "^[SU]SH[LR][dv]")>;
-def : InstRW<[M1WriteNALU1],  (instregex "^S[RS]I[dv]")>;
-def : InstRW<[M1WriteNAL13],  (instregex "^(([SU]Q)?R)?SHRU?N[bhsv]")>;
-def : InstRW<[M1WriteNAL13],  (instregex "^[SU]RSH[LR][dv]")>;
-def : InstRW<[M1WriteNAL13],  (instregex "^[SU]QR?SHLU?[bdhsv]")>;
-
-// ASIMD FP instructions.
-def : InstRW<[M1WriteNALU1],  (instregex "^F(ABS|NEG)v")>;
-def : InstRW<[M1WriteNMISC3], (instregex "^F(ABD|ADD|SUB)v")>;
-def : InstRW<[M1WriteNEONA],  (instregex "^FADDP")>;
-def : InstRW<[M1WriteNMISC1], (instregex "^F(AC|CM)(EQ|GE|GT|LE|LT)v[^1]")>;
-def : InstRW<[M1WriteFCVT3],  (instregex "^[FVSU]CVTX?[AFLMNPZ][SU]?(_Int)?v")>;
-def : InstRW<[M1WriteFVAR15], (instregex "FDIVv.f32")>;
-def : InstRW<[M1WriteFVAR23], (instregex "FDIVv2f64")>;
-def : InstRW<[M1WriteFVAR15], (instregex "FSQRTv.f32")>;
-def : InstRW<[M1WriteFVAR23], (instregex "FSQRTv2f64")>;
-def : InstRW<[M1WriteNMISC1], (instregex "^F(MAX|MIN)(NM)?V?v")>;
-def : InstRW<[M1WriteNMISC2], (instregex "^F(MAX|MIN)(NM)?Pv")>;
-def : InstRW<[M1WriteNEONJ],  (instregex "^FMULX?v.i")>;
-def : InstRW<[M1WriteFMAC4],  (instregex "^FMULX?v.f")>;
-def : InstRW<[M1WriteNEONK],  (instregex "^FML[AS]v.i")>;
-def : InstRW<[M1WriteFMAC5],  (instregex "^FML[AS]v.f")>;
-def : InstRW<[M1WriteFCVT3],  (instregex "^FRINT[AIMNPXZ]v")>;
-
-// ASIMD miscellaneous instructions.
-def : InstRW<[M1WriteNALU1],  (instregex "^RBITv")>;
-def : InstRW<[M1WriteNAL11],  (instregex "^(BIF|BIT|BSL)v")>;
-def : InstRW<[M1WriteNEONB],  (instregex "^DUPv.+gpr")>;
-def : InstRW<[M1WriteNALU1],  (instregex "^DUPv.+lane")>;
-def : InstRW<[M1WriteNALU1],  (instregex "^EXTv8")>;
-def : InstRW<[M1WriteNEONL],  (instregex "^EXTv16")>;
-def : InstRW<[M1WriteNAL13],  (instregex "^[SU]?Q?XTU?Nv")>;
-def : InstRW<[M1WriteNALU1],  (instregex "^CPY")>;
-def : InstRW<[M1WriteNALU1],  (instregex "^INSv.+lane")>;
-def : InstRW<[M1WriteNALU1],  (instregex "^MOVI[Dv]")>;
-def : InstRW<[M1WriteNALU1],  (instregex "^FMOVv")>;
-def : InstRW<[M1WriteFCVT4],  (instregex "^[FU](RECP|RSQRT)Ev[248]")>;
-def : InstRW<[M1WriteFMAC5],  (instregex "^F(RECP|RSQRT)Sv")>;
-def : InstRW<[M1WriteNALU1],  (instregex "^REV(16|32|64)v")>;
-def : InstRW<[M1WriteNAL11],  (instregex "^TB[LX]v8i8One")>;
-def : InstRW<[WriteSequence<[M1WriteNAL11], 2>],
-                              (instregex "^TB[LX]v8i8Two")>;
-def : InstRW<[WriteSequence<[M1WriteNAL11], 3>],
-                              (instregex "^TB[LX]v8i8Three")>;
-def : InstRW<[WriteSequence<[M1WriteNAL11], 4>],
-                              (instregex "^TB[LX]v8i8Four")>;
-def : InstRW<[M1WriteNAL12],  (instregex "^TB[LX]v16i8One")>;
-def : InstRW<[WriteSequence<[M1WriteNAL12], 2>],
-                              (instregex "^TB[LX]v16i8Two")>;
-def : InstRW<[WriteSequence<[M1WriteNAL12], 3>],
-                              (instregex "^TB[LX]v16i8Three")>;
-def : InstRW<[WriteSequence<[M1WriteNAL12], 4>],
-                              (instregex "^TB[LX]v16i8Four")>;
-def : InstRW<[M1WriteNEOND],  (instregex "^[SU]MOVv")>;
-def : InstRW<[M1WriteNEONC],  (instregex "^INSv.+gpr")>;
-def : InstRW<[M1WriteNALU1],  (instregex "^(TRN|UZP)[12](v8i8|v4i16|v2i32)")>;
-def : InstRW<[M1WriteNALU2],  (instregex "^(TRN|UZP)[12](v16i8|v8i16|v4i32|v2i64)")>;
-def : InstRW<[M1WriteNALU1],  (instregex "^ZIP[12]v")>;
-
-// ASIMD load instructions.
-def : InstRW<[M1WriteVLDD],   (instregex "LD1i(8|16|32)$")>;
-def : InstRW<[M1WriteVLDD,
-              WriteAdr],      (instregex "LD1i(8|16|32)_POST$")>;
-def : InstRW<[M1WriteVLDE],   (instregex "LD1i(64)$")>;
-def : InstRW<[M1WriteVLDE,
-              WriteAdr],      (instregex "LD1i(64)_POST$")>;
-
-def : InstRW<[M1WriteL5],     (instregex "LD1Rv(8b|4h|2s)$")>;
-def : InstRW<[M1WriteL5,
-              WriteAdr],      (instregex "LD1Rv(8b|4h|2s)_POST$")>;
-def : InstRW<[M1WriteL5],     (instregex "LD1Rv(1d)$")>;
-def : InstRW<[M1WriteL5,
-              WriteAdr],      (instregex "LD1Rv(1d)_POST$")>;
-def : InstRW<[M1WriteL5],     (instregex "LD1Rv(16b|8h|4s|2d)$")>;
-def : InstRW<[M1WriteL5,
-              WriteAdr],      (instregex "LD1Rv(16b|8h|4s|2d)_POST$")>;
-
-def : InstRW<[M1WriteL5],     (instregex "LD1Onev(8b|4h|2s|1d)$")>;
-def : InstRW<[M1WriteL5,
-              WriteAdr],      (instregex "LD1Onev(8b|4h|2s|1d)_POST$")>;
-def : InstRW<[M1WriteL5],     (instregex "LD1Onev(16b|8h|4s|2d)$")>;
-def : InstRW<[M1WriteL5,
-              WriteAdr],      (instregex "LD1Onev(16b|8h|4s|2d)_POST$")>;
-def : InstRW<[M1WriteVLDA],   (instregex "LD1Twov(8b|4h|2s|1d)$")>;
-def : InstRW<[M1WriteVLDA,
-              WriteAdr],      (instregex "LD1Twov(8b|4h|2s|1d)_POST$")>;
-def : InstRW<[M1WriteVLDA],   (instregex "LD1Twov(16b|8h|4s|2d)$")>;
-def : InstRW<[M1WriteVLDA,
-              WriteAdr],      (instregex "LD1Twov(16b|8h|4s|2d)_POST$")>;
-def : InstRW<[M1WriteVLDB],   (instregex "LD1Threev(8b|4h|2s|1d)$")>;
-def : InstRW<[M1WriteVLDB,
-              WriteAdr],      (instregex "LD1Threev(8b|4h|2s|1d)_POST$")>;
-def : InstRW<[M1WriteVLDB],   (instregex "LD1Threev(16b|8h|4s|2d)$")>;
-def : InstRW<[M1WriteVLDB,
-              WriteAdr],      (instregex "LD1Threev(16b|8h|4s|2d)_POST$")>;
-def : InstRW<[M1WriteVLDC],   (instregex "LD1Fourv(8b|4h|2s|1d)$")>;
-def : InstRW<[M1WriteVLDC,
-              WriteAdr],      (instregex "LD1Fourv(8b|4h|2s|1d)_POST$")>;
-def : InstRW<[M1WriteVLDC],   (instregex "LD1Fourv(16b|8h|4s|2d)$")>;
-def : InstRW<[M1WriteVLDC,
-              WriteAdr],      (instregex "LD1Fourv(16b|8h|4s|2d)_POST$")>;
-
-def : InstRW<[M1WriteVLDG],   (instregex "LD2i(8|16)$")>;
-def : InstRW<[M1WriteVLDG,
-              WriteAdr],      (instregex "LD2i(8|16)_POST$")>;
-def : InstRW<[M1WriteVLDG],   (instregex "LD2i(32)$")>;
-def : InstRW<[M1WriteVLDG,
-              WriteAdr],      (instregex "LD2i(32)_POST$")>;
-def : InstRW<[M1WriteVLDH],   (instregex "LD2i(64)$")>;
-def : InstRW<[M1WriteVLDH,
-              WriteAdr],      (instregex "LD2i(64)_POST$")>;
-
-def : InstRW<[M1WriteVLDA],   (instregex "LD2Rv(8b|4h|2s)$")>;
-def : InstRW<[M1WriteVLDA,
-              WriteAdr],      (instregex "LD2Rv(8b|4h|2s)_POST$")>;
-def : InstRW<[M1WriteVLDA],   (instregex "LD2Rv(1d)$")>;
-def : InstRW<[M1WriteVLDA,
-              WriteAdr],      (instregex "LD2Rv(1d)_POST$")>;
-def : InstRW<[M1WriteVLDA],   (instregex "LD2Rv(16b|8h|4s|2d)$")>;
-def : InstRW<[M1WriteVLDA,
-              WriteAdr],      (instregex "LD2Rv(16b|8h|4s|2d)_POST$")>;
-
-def : InstRW<[M1WriteVLDF],   (instregex "LD2Twov(8b|4h|2s)$")>;
-def : InstRW<[M1WriteVLDF,
-              WriteAdr],      (instregex "LD2Twov(8b|4h|2s)_POST$")>;
-def : InstRW<[M1WriteVLDF],   (instregex "LD2Twov(16b|8h|4s)$")>;
-def : InstRW<[M1WriteVLDF,
-              WriteAdr],      (instregex "LD2Twov(16b|8h|4s)_POST$")>;
-def : InstRW<[M1WriteVLDF],   (instregex "LD2Twov(2d)$")>;
-def : InstRW<[M1WriteVLDF,
-              WriteAdr],      (instregex "LD2Twov(2d)_POST$")>;
-
-def : InstRW<[M1WriteVLDJ],   (instregex "LD3i(8|16)$")>;
-def : InstRW<[M1WriteVLDJ,
-              WriteAdr],      (instregex "LD3i(8|16)_POST$")>;
-def : InstRW<[M1WriteVLDJ],   (instregex "LD3i(32)$")>;
-def : InstRW<[M1WriteVLDJ,
-              WriteAdr],      (instregex "LD3i(32)_POST$")>;
-def : InstRW<[M1WriteVLDL],   (instregex "LD3i(64)$")>;
-def : InstRW<[M1WriteVLDL,
-              WriteAdr],      (instregex "LD3i(64)_POST$")>;
-
-def : InstRW<[M1WriteVLDB],   (instregex "LD3Rv(8b|4h|2s)$")>;
-def : InstRW<[M1WriteVLDB,
-              WriteAdr],      (instregex "LD3Rv(8b|4h|2s)_POST$")>;
-def : InstRW<[M1WriteVLDB],   (instregex "LD3Rv(1d)$")>;
-def : InstRW<[M1WriteVLDB,
-              WriteAdr],      (instregex "LD3Rv(1d)_POST$")>;
-def : InstRW<[M1WriteVLDB],   (instregex "LD3Rv(16b|8h|4s)$")>;
-def : InstRW<[M1WriteVLDB,
-              WriteAdr],      (instregex "LD3Rv(16b|8h|4s)_POST$")>;
-def : InstRW<[M1WriteVLDB],   (instregex "LD3Rv(2d)$")>;
-def : InstRW<[M1WriteVLDB,
-              WriteAdr],      (instregex "LD3Rv(2d)_POST$")>;
-
-def : InstRW<[M1WriteVLDI],   (instregex "LD3Threev(8b|4h|2s)$")>;
-def : InstRW<[M1WriteVLDI,
-              WriteAdr],      (instregex "LD3Threev(8b|4h|2s)_POST$")>;
-def : InstRW<[M1WriteVLDI],   (instregex "LD3Threev(16b|8h|4s)$")>;
-def : InstRW<[M1WriteVLDI,
-              WriteAdr],      (instregex "LD3Threev(16b|8h|4s)_POST$")>;
-def : InstRW<[M1WriteVLDI],   (instregex "LD3Threev(2d)$")>;
-def : InstRW<[M1WriteVLDI,
-              WriteAdr],      (instregex "LD3Threev(2d)_POST$")>;
-
-def : InstRW<[M1WriteVLDK],   (instregex "LD4i(8|16)$")>;
-def : InstRW<[M1WriteVLDK,
-              WriteAdr],      (instregex "LD4i(8|16)_POST$")>;
-def : InstRW<[M1WriteVLDK],   (instregex "LD4i(32)$")>;
-def : InstRW<[M1WriteVLDK,
-              WriteAdr],      (instregex "LD4i(32)_POST$")>;
-def : InstRW<[M1WriteVLDM],   (instregex "LD4i(64)$")>;
-def : InstRW<[M1WriteVLDM,
-              WriteAdr],      (instregex "LD4i(64)_POST$")>;
-
-def : InstRW<[M1WriteVLDC],   (instregex "LD4Rv(8b|4h|2s)$")>;
-def : InstRW<[M1WriteVLDC,
-              WriteAdr],      (instregex "LD4Rv(8b|4h|2s)_POST$")>;
-def : InstRW<[M1WriteVLDC],   (instregex "LD4Rv(1d)$")>;
-def : InstRW<[M1WriteVLDC,
-              WriteAdr],      (instregex "LD4Rv(1d)_POST$")>;
-def : InstRW<[M1WriteVLDC],   (instregex "LD4Rv(16b|8h|4s)$")>;
-def : InstRW<[M1WriteVLDC,
-              WriteAdr],      (instregex "LD4Rv(16b|8h|4s)_POST$")>;
-def : InstRW<[M1WriteVLDC],   (instregex "LD4Rv(2d)$")>;
-def : InstRW<[M1WriteVLDC,
-              WriteAdr],      (instregex "LD4Rv(2d)_POST$")>;
-
-def : InstRW<[M1WriteVLDN],   (instregex "LD4Fourv(8b|4h|2s)$")>;
-def : InstRW<[M1WriteVLDN,
-              WriteAdr],      (instregex "LD4Fourv(8b|4h|2s)_POST$")>;
-def : InstRW<[M1WriteVLDN],   (instregex "LD4Fourv(16b|8h|4s)$")>;
-def : InstRW<[M1WriteVLDN,
-              WriteAdr],      (instregex "LD4Fourv(16b|8h|4s)_POST$")>;
-def : InstRW<[M1WriteVLDN],   (instregex "LD4Fourv(2d)$")>;
-def : InstRW<[M1WriteVLDN,
-              WriteAdr],      (instregex "LD4Fourv(2d)_POST$")>;
-
-// ASIMD store instructions.
-def : InstRW<[M1WriteVSTD],   (instregex "ST1i(8|16|32)$")>;
-def : InstRW<[M1WriteVSTD,
-              WriteAdr],      (instregex "ST1i(8|16|32)_POST$")>;
-def : InstRW<[M1WriteVSTD],   (instregex "ST1i(64)$")>;
-def : InstRW<[M1WriteVSTD,
-              WriteAdr],      (instregex "ST1i(64)_POST$")>;
-
-def : InstRW<[WriteVST],      (instregex "ST1Onev(8b|4h|2s|1d)$")>;
-def : InstRW<[WriteVST,
-              WriteAdr],      (instregex "ST1Onev(8b|4h|2s|1d)_POST$")>;
-def : InstRW<[WriteVST],      (instregex "ST1Onev(16b|8h|4s|2d)$")>;
-def : InstRW<[WriteVST,
-              WriteAdr],      (instregex "ST1Onev(16b|8h|4s|2d)_POST$")>;
-def : InstRW<[M1WriteVSTA],   (instregex "ST1Twov(8b|4h|2s|1d)$")>;
-def : InstRW<[M1WriteVSTA,
-              WriteAdr],      (instregex "ST1Twov(8b|4h|2s|1d)_POST$")>;
-def : InstRW<[M1WriteVSTA],   (instregex "ST1Twov(16b|8h|4s|2d)$")>;
-def : InstRW<[M1WriteVSTA,
-              WriteAdr],      (instregex "ST1Twov(16b|8h|4s|2d)_POST$")>;
-def : InstRW<[M1WriteVSTB],   (instregex "ST1Threev(8b|4h|2s|1d)$")>;
-def : InstRW<[M1WriteVSTB,
-              WriteAdr],      (instregex "ST1Threev(8b|4h|2s|1d)_POST$")>;
-def : InstRW<[M1WriteVSTB],   (instregex "ST1Threev(16b|8h|4s|2d)$")>;
-def : InstRW<[M1WriteVSTB,
-              WriteAdr],      (instregex "ST1Threev(16b|8h|4s|2d)_POST$")>;
-def : InstRW<[M1WriteVSTC],   (instregex "ST1Fourv(8b|4h|2s|1d)$")>;
-def : InstRW<[M1WriteVSTC,
-              WriteAdr],      (instregex "ST1Fourv(8b|4h|2s|1d)_POST$")>;
-def : InstRW<[M1WriteVSTC],   (instregex "ST1Fourv(16b|8h|4s|2d)$")>;
-def : InstRW<[M1WriteVSTC,
-              WriteAdr], (instregex "ST1Fourv(16b|8h|4s|2d)_POST$")>;
-
-def : InstRW<[M1WriteVSTD],   (instregex "ST2i(8|16|32)$")>;
-def : InstRW<[M1WriteVSTD,
-              WriteAdr],      (instregex "ST2i(8|16|32)_POST$")>;
-def : InstRW<[M1WriteVSTD],   (instregex "ST2i(64)$")>;
-def : InstRW<[M1WriteVSTD,
-              WriteAdr],      (instregex "ST2i(64)_POST$")>;
-
-def : InstRW<[M1WriteVSTD],   (instregex "ST2Twov(8b|4h|2s)$")>;
-def : InstRW<[M1WriteVSTD,
-              WriteAdr],      (instregex "ST2Twov(8b|4h|2s)_POST$")>;
-def : InstRW<[M1WriteVSTE],   (instregex "ST2Twov(16b|8h|4s)$")>;
-def : InstRW<[M1WriteVSTE,
-              WriteAdr],      (instregex "ST2Twov(16b|8h|4s)_POST$")>;
-def : InstRW<[M1WriteVSTE],   (instregex "ST2Twov(2d)$")>;
-def : InstRW<[M1WriteVSTE,
-              WriteAdr],      (instregex "ST2Twov(2d)_POST$")>;
-
-def : InstRW<[M1WriteVSTH],   (instregex "ST3i(8|16)$")>;
-def : InstRW<[M1WriteVSTH,
-              WriteAdr],      (instregex "ST3i(8|16)_POST$")>;
-def : InstRW<[M1WriteVSTH],   (instregex "ST3i(32)$")>;
-def : InstRW<[M1WriteVSTH,
-              WriteAdr],      (instregex "ST3i(32)_POST$")>;
-def : InstRW<[M1WriteVSTF],   (instregex "ST3i(64)$")>;
-def : InstRW<[M1WriteVSTF,
-              WriteAdr],      (instregex "ST3i(64)_POST$")>;
-
-def : InstRW<[M1WriteVSTF],   (instregex "ST3Threev(8b|4h|2s)$")>;
-def : InstRW<[M1WriteVSTF,
-              WriteAdr],      (instregex "ST3Threev(8b|4h|2s)_POST$")>;
-def : InstRW<[M1WriteVSTG],   (instregex "ST3Threev(16b|8h|4s)$")>;
-def : InstRW<[M1WriteVSTG,
-              WriteAdr],      (instregex "ST3Threev(16b|8h|4s)_POST$")>;
-def : InstRW<[M1WriteVSTG],   (instregex "ST3Threev(2d)$")>;
-def : InstRW<[M1WriteVSTG,
-              WriteAdr],      (instregex "ST3Threev(2d)_POST$")>;
-
-def : InstRW<[M1WriteVSTH],   (instregex "ST4i(8|16)$")>;
-def : InstRW<[M1WriteVSTH,
-              WriteAdr],      (instregex "ST4i(8|16)_POST$")>;
-def : InstRW<[M1WriteVSTH],   (instregex "ST4i(32)$")>;
-def : InstRW<[M1WriteVSTH,
-              WriteAdr],      (instregex "ST4i(32)_POST$")>;
-def : InstRW<[M1WriteVSTF],   (instregex "ST4i(64)$")>;
-def : InstRW<[M1WriteVSTF,
-              WriteAdr],      (instregex "ST4i(64)_POST$")>;
-
-def : InstRW<[M1WriteVSTF],   (instregex "ST4Fourv(8b|4h|2s)$")>;
-def : InstRW<[M1WriteVSTF,
-              WriteAdr],      (instregex "ST4Fourv(8b|4h|2s)_POST$")>;
-def : InstRW<[M1WriteVSTI],   (instregex "ST4Fourv(16b|8h|4s)$")>;
-def : InstRW<[M1WriteVSTI,
-              WriteAdr],      (instregex "ST4Fourv(16b|8h|4s)_POST$")>;
-def : InstRW<[M1WriteVSTI],   (instregex "ST4Fourv(2d)$")>;
-def : InstRW<[M1WriteVSTI,
-              WriteAdr],      (instregex "ST4Fourv(2d)_POST$")>;
-
-// Cryptography instructions.
-def M1WriteAES : SchedWriteRes<[M1UnitNCRYPT]> { let Latency = 1; }
-def M1ReadAES  : SchedReadAdvance<1, [M1WriteAES]>;
-def : InstRW<[M1WriteAES], (instregex "^AES[DE]")>;
-def : InstRW<[M1WriteAES, M1ReadAES], (instregex "^AESI?MC")>;
-
-def : InstRW<[M1WriteNCRYPT1], (instregex "^PMUL")>;
-def : InstRW<[M1WriteNCRYPT1], (instregex "^SHA1(H|SU)")>;
-def : InstRW<[M1WriteNCRYPT5], (instregex "^SHA1[CMP]")>;
-def : InstRW<[M1WriteNCRYPT1], (instregex "^SHA256SU0")>;
-def : InstRW<[M1WriteNCRYPT5], (instregex "^SHA256(H|SU1)")>;
-
-// CRC instructions.
-def : InstRW<[M1WriteC2], (instregex "^CRC32")>;
-
-} // SchedModel = ExynosM1Model
--- a/suite/synctools/tablegen/AArch64/AArch64SchedExynosM3.td
+++ b/suite/synctools/tablegen/AArch64/AArch64SchedExynosM3.td
@ -1,9 +1,8 @@
 //=- AArch64SchedExynosM3.td - Samsung Exynos M3 Sched Defs --*- tablegen -*-=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@ -25,10 +24,9 @@ def ExynosM3Model : SchedMachineModel {
  let MispredictPenalty     =  16; // Minimum branch misprediction penalty.
  let CompleteModel         =   1; // Use the default model otherwise.

-  list<Predicate> UnsupportedFeatures = [HasSVE];
-
-  // FIXME: Remove when all errors have been fixed.
-  let FullInstRWOverlapCheck = 0;
+  list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F,
+                                                    PAUnsupported.F,
+                                                    SMEUnsupported.F);
 }

 //===----------------------------------------------------------------------===//
@ -106,24 +104,13 @@ def M3UnitNSHF : ProcResGroup<[M3UnitNSHF0,
                               M3UnitNSHF1,
                               M3UnitNSHF2]>;

-//===----------------------------------------------------------------------===//
-// Predicates.
-
-def M3BranchLinkFastPred  : SchedPredicate<[{MI->getOpcode() == AArch64::BLR &&
-                                             MI->getOperand(0).isReg() &&
-                                             MI->getOperand(0).getReg() != AArch64::LR}]>;
-def M3ResetFastPred       : SchedPredicate<[{TII->isExynosResetFast(*MI)}]>;
-def M3RotateRightFastPred : SchedPredicate<[{(MI->getOpcode() == AArch64::EXTRWrri ||
-                                              MI->getOpcode() == AArch64::EXTRXrri) &&
-                                             MI->getOperand(1).isReg() && MI->getOperand(2).isReg() &&
-                                             MI->getOperand(1).getReg() == MI->getOperand(2).getReg()}]>;
-def M3ShiftLeftFastPred   : SchedPredicate<[{TII->isExynosShiftLeftFast(*MI)}]>;
-
 //===----------------------------------------------------------------------===//
 // Coarse scheduling model.

 def M3WriteZ0 : SchedWriteRes<[]> { let Latency = 0;
                                    let NumMicroOps = 1; }
+def M3WriteZ1 : SchedWriteRes<[]> { let Latency = 1;
+                                    let NumMicroOps = 0; }

 def M3WriteA1 : SchedWriteRes<[M3UnitALU]> { let Latency = 1; }
 def M3WriteAA : SchedWriteRes<[M3UnitALU]> { let Latency = 2;
@ -140,15 +127,25 @@ def M3WriteAD : SchedWriteRes<[M3UnitALU,
                                             let NumMicroOps = 2; }
 def M3WriteC1 : SchedWriteRes<[M3UnitC]>   { let Latency = 1; }
 def M3WriteC2 : SchedWriteRes<[M3UnitC]>   { let Latency = 2; }
-def M3WriteAX : SchedWriteVariant<[SchedVar<M3ResetFastPred,     [M3WriteZ0]>,
-                                   SchedVar<M3ShiftLeftFastPred, [M3WriteA1]>,
-                                   SchedVar<NoSchedPred,         [M3WriteAA]>]>;
-def M3WriteAY : SchedWriteVariant<[SchedVar<M3RotateRightFastPred, [M3WriteA1]>,
-                                   SchedVar<NoSchedPred,           [M3WriteAA]>]>;
+def M3WriteAU : SchedWriteVariant<[SchedVar<IsCopyIdiomPred, [M3WriteZ0]>,
+                                   SchedVar<ExynosArithPred, [M3WriteA1]>,
+                                   SchedVar<ExynosLogicPred, [M3WriteA1]>,
+                                   SchedVar<NoSchedPred,     [M3WriteAA]>]>;
+def M3WriteAV : SchedWriteVariant<[SchedVar<IsCopyIdiomPred, [M3WriteZ0]>,
+                                   SchedVar<ExynosArithPred, [M3WriteA1]>,
+                                   SchedVar<NoSchedPred,     [M3WriteAA]>]>;
+def M3WriteAW : SchedWriteVariant<[SchedVar<IsZeroIdiomPred, [M3WriteZ0]>,
+                                   SchedVar<ExynosLogicPred, [M3WriteA1]>,
+                                   SchedVar<NoSchedPred,     [M3WriteAA]>]>;
+def M3WriteAX : SchedWriteVariant<[SchedVar<ExynosArithPred, [M3WriteA1]>,
+                                   SchedVar<ExynosLogicPred, [M3WriteA1]>,
+                                   SchedVar<NoSchedPred,     [M3WriteAA]>]>;
+def M3WriteAY : SchedWriteVariant<[SchedVar<ExynosRotateRightImmPred, [M3WriteA1]>,
+                                   SchedVar<NoSchedPred,              [M3WriteAA]>]>;

 def M3WriteB1 : SchedWriteRes<[M3UnitB]> { let Latency = 1; }
-def M3WriteBX : SchedWriteVariant<[SchedVar<M3BranchLinkFastPred, [M3WriteAB]>,
-                                   SchedVar<NoSchedPred,          [M3WriteAC]>]>;
+def M3WriteBX : SchedWriteVariant<[SchedVar<ExynosBranchLinkLRPred, [M3WriteAC]>,
+                                   SchedVar<NoSchedPred,            [M3WriteAB]>]>;

 def M3WriteL4 : SchedWriteRes<[M3UnitL]> { let Latency = 4; }
 def M3WriteL5 : SchedWriteRes<[M3UnitL]> { let Latency = 5; }
@ -165,44 +162,46 @@ def M3WriteLC : SchedWriteRes<[M3UnitA,
 def M3WriteLD : SchedWriteRes<[M3UnitA,
                               M3UnitL]> { let Latency = 4;
                                           let NumMicroOps = 2; }
+def M3WriteLE : SchedWriteRes<[M3UnitA,
+                               M3UnitL]> { let Latency = 6;
+                                           let NumMicroOps = 2; }
 def M3WriteLH : SchedWriteRes<[]>        { let Latency = 5;
                                           let NumMicroOps = 0; }
-
-def M3WriteLX : SchedWriteVariant<[SchedVar<M3ShiftLeftFastPred, [M3WriteL5]>,
-                                   SchedVar<NoSchedPred,         [M3WriteLB]>]>;
+def M3WriteLX : SchedWriteVariant<[SchedVar<ExynosScaledIdxPred, [M3WriteL5]>,
+                                   SchedVar<NoSchedPred,         [M3WriteL4]>]>;
+def M3WriteLY : SchedWriteVariant<[SchedVar<ExynosScaledIdxPred, [M3WriteLE]>,
+                                   SchedVar<NoSchedPred,         [M3WriteL5]>]>;

 def M3WriteS1 : SchedWriteRes<[M3UnitS]>   { let Latency = 1; }
 def M3WriteSA : SchedWriteRes<[M3UnitA,
                               M3UnitS,
-                               M3UnitFST]> { let Latency = 2;
+                               M3UnitFST]> { let Latency = 3;
                                             let NumMicroOps = 2; }
 def M3WriteSB : SchedWriteRes<[M3UnitA,
-                               M3UnitS]>   { let Latency = 1;
-                                             let NumMicroOps = 2; }
-def M3WriteSC : SchedWriteRes<[M3UnitA,
                               M3UnitS]>   { let Latency = 2;
                                             let NumMicroOps = 2; }
+def M3WriteSC : SchedWriteRes<[M3UnitA,
+                               M3UnitS,
+                               M3UnitFST]> { let Latency = 1;
+                                             let NumMicroOps = 2; }
+def M3WriteSY : SchedWriteVariant<[SchedVar<ExynosScaledIdxPred, [M3WriteSA]>,
+                                   SchedVar<NoSchedPred,         [WriteVST]>]>;

-def M3WriteSX : SchedWriteVariant<[SchedVar<M3ShiftLeftFastPred, [M3WriteS1]>,
-                                   SchedVar<NoSchedPred,         [M3WriteSB]>]>;
-def M3WriteSY : SchedWriteVariant<[SchedVar<M3ShiftLeftFastPred, [M3WriteS1]>,
-                                   SchedVar<NoSchedPred,         [M3WriteSC]>]>;
-
-def M3ReadAdrBase : SchedReadVariant<[SchedVar<ScaledIdxPred, [ReadDefault]>,
-                                      SchedVar<NoSchedPred,   [ReadDefault]>]>;
+def M3ReadAdrBase : SchedReadVariant<[SchedVar<ExynosScaledIdxPred, [ReadDefault]>,
+                                      SchedVar<NoSchedPred,         [ReadDefault]>]>;

 // Branch instructions.
 def : SchedAlias<WriteBr, M3WriteZ0>;
-def : WriteRes<WriteBrReg, [M3UnitC]> { let Latency = 1; }
+def : SchedAlias<WriteBrReg, M3WriteC1>;

 // Arithmetic and logical integer instructions.
-def : WriteRes<WriteI,     [M3UnitALU]> { let Latency = 1; }
-def : WriteRes<WriteISReg, [M3UnitALU]> { let Latency = 1; }
-def : WriteRes<WriteIEReg, [M3UnitALU]> { let Latency = 1; }
-def : WriteRes<WriteIS,    [M3UnitALU]> { let Latency = 1; }
+def : SchedAlias<WriteI,     M3WriteA1>;
+def : SchedAlias<WriteISReg, M3WriteA1>;
+def : SchedAlias<WriteIEReg, M3WriteA1>;
+def : SchedAlias<WriteIS,    M3WriteA1>;

 // Move instructions.
-def : WriteRes<WriteImm, [M3UnitALU]> { let Latency = 1; }
+def : SchedAlias<WriteImm, M3WriteA1>;

 // Divide and multiply instructions.
 def : WriteRes<WriteID32, [M3UnitC,
@ -216,26 +215,23 @@ def : WriteRes<WriteIM64, [M3UnitC]>  { let Latency = 4;
                                        let ResourceCycles = [2]; }

 // Miscellaneous instructions.
-def : WriteRes<WriteExtr, [M3UnitALU,
-                           M3UnitALU]> { let Latency = 1;
-                                         let NumMicroOps = 2; }
+def : SchedAlias<WriteExtr, M3WriteAY>;

 // Addressing modes.
-def : WriteRes<WriteAdr, []> { let Latency = 1;
-                               let NumMicroOps = 0; }
+def : SchedAlias<WriteAdr,    M3WriteZ1>;
 def : SchedAlias<ReadAdrBase, M3ReadAdrBase>;

 // Load instructions.
 def : SchedAlias<WriteLD, M3WriteL4>;
 def : WriteRes<WriteLDHi, []> { let Latency = 4;
                                let NumMicroOps = 0; }
-def : SchedAlias<WriteLDIdx, M3WriteLX>;
+def : SchedAlias<WriteLDIdx, M3WriteLB>;

 // Store instructions.
 def : SchedAlias<WriteST,    M3WriteS1>;
 def : SchedAlias<WriteSTP,   M3WriteS1>;
 def : SchedAlias<WriteSTX,   M3WriteS1>;
-def : SchedAlias<WriteSTIdx, M3WriteSX>;
+def : SchedAlias<WriteSTIdx, M3WriteSB>;

 // FP data instructions.
 def : WriteRes<WriteF,    [M3UnitFADD]>  { let Latency = 2; }
@ -245,7 +241,6 @@ def : WriteRes<WriteFDiv, [M3UnitFDIV]>  { let Latency = 12;
 def : WriteRes<WriteFMul, [M3UnitFMAC]>  { let Latency = 4; }

 // FP miscellaneous instructions.
-// TODO: Conversion between register files is much different.
 def : WriteRes<WriteFCvt,  [M3UnitFCVT]> { let Latency = 3; }
 def : WriteRes<WriteFImm,  [M3UnitNALU]> { let Latency = 1; }
 def : WriteRes<WriteFCopy, [M3UnitNALU]> { let Latency = 1; }
@ -259,7 +254,8 @@ def : WriteRes<WriteVST, [M3UnitS,
                                        let NumMicroOps = 1; }

 // ASIMD FP instructions.
-def : WriteRes<WriteV, [M3UnitNALU]> { let Latency = 3; }
+def : WriteRes<WriteVd, [M3UnitNALU]> { let Latency = 3; }
+def : WriteRes<WriteVq, [M3UnitNALU]> { let Latency = 3; }

 // Other miscellaneous instructions.
 def : WriteRes<WriteAtomic,  []> { let Unsupported = 1; }
@ -282,6 +278,7 @@ def : ReadAdvance<ReadID,      0>;
 def : ReadAdvance<ReadExtrHi,  0>;
 def : ReadAdvance<ReadAdrBase, 0>;
 def : ReadAdvance<ReadVLD,     0>;
+def : ReadAdvance<ReadST,      0>;

 //===----------------------------------------------------------------------===//
 // Finer scheduling model.
@ -481,11 +478,15 @@ def M3WriteVSTI    : SchedWriteRes<[M3UnitNALU,

 // Special cases.
 def M3WriteAES     : SchedWriteRes<[M3UnitNCRY]>  { let Latency = 1; }
+def M3WriteCOPY    : SchedWriteVariant<[SchedVar<ExynosFPPred, [M3WriteNALU1]>,
+                                        SchedVar<NoSchedPred,  [M3WriteZ0]>]>;
+def M3WriteMOVI    : SchedWriteVariant<[SchedVar<IsZeroFPIdiomPred, [M3WriteZ0]>,
+                                        SchedVar<NoSchedPred,       [M3WriteNALU1]>]>;
+
+// Fast forwarding.
 def M3ReadAES      : SchedReadAdvance<1, [M3WriteAES]>;
 def M3ReadFMAC     : SchedReadAdvance<1, [M3WriteFMAC4,
                                          M3WriteFMAC5]>;
-def M3WriteMOVI    : SchedWriteVariant<[SchedVar<M3ResetFastPred, [M3WriteZ0]>,
-                                        SchedVar<NoSchedPred,     [M3WriteNALU1]>]>;
 def M3ReadNMUL     : SchedReadAdvance<1, [M3WriteNMUL3]>;

 // Branch instructions
@ -496,29 +497,40 @@ def : InstRW<[M3WriteC1], (instregex "^CBN?Z[WX]")>;
 def : InstRW<[M3WriteAD], (instregex "^TBN?Z[WX]")>;

 // Arithmetic and logical integer instructions.
-def : InstRW<[M3WriteA1], (instrs COPY)>;
-def : InstRW<[M3WriteAX], (instregex "^(ADD|SUB)S?Xrx64")>;
-def : InstRW<[M3WriteAX], (instregex "^(ADD|AND|BIC|EON|EOR|ORN|ORR|SUB)[WX]r[sx]$")>;
-def : InstRW<[M3WriteAX], (instregex "^(ADD|BIC|SUB)S[WX]r[sx]$")>;
-def : InstRW<[M3WriteAX], (instregex "^(ADD|AND|EOR|ORR|SUB)[WX]ri")>;
+def : InstRW<[M3WriteAX], (instregex "^(ADD|AND|BIC|EON|EOR|ORN|SUB)[WX]rs$")>;
+def : InstRW<[M3WriteAU], (instrs ORRWrs, ORRXrs)>;
+def : InstRW<[M3WriteAX], (instregex "^(ADD|SUB)S?[WX]rx(64)?$")>;
+def : InstRW<[M3WriteAX], (instregex "^(ADD|AND|BIC|SUB)S[WX]rs$")>;
+def : InstRW<[M3WriteAV], (instrs ADDWri, ADDXri)>;
+def : InstRW<[M3WriteAW], (instrs ORRWri, ORRXri)>;

 // Move instructions.
-def : InstRW<[M3WriteZ0], (instrs ADR, ADRP)>;
-def : InstRW<[M3WriteZ0], (instregex "^MOV[NZ][WX]i")>;
+def : InstRW<[M3WriteCOPY], (instrs COPY)>;
+def : InstRW<[M3WriteZ0],   (instrs ADR, ADRP)>;
+def : InstRW<[M3WriteZ0],   (instregex "^MOV[NZ][WX]i")>;

 // Divide and multiply instructions.

 // Miscellaneous instructions.
-def : InstRW<[M3WriteAY], (instrs EXTRWrri, EXTRXrri)>;

 // Load instructions.
 def : InstRW<[M3WriteLD,
              WriteLDHi,
              WriteAdr],    (instregex "^LDP(SW|W|X)(post|pre)")>;
+def : InstRW<[M3WriteLB,
+              ReadAdrBase], (instregex "^LDR(BB|SBW|SBX|HH|SHW|SHX|SW|W|X)roW")>;
 def : InstRW<[M3WriteLX,
-              ReadAdrBase], (instregex "^PRFMro[WX]")>;
+              ReadAdrBase], (instregex "^LDR(BB|SBW|SBX|HH|SHW|SHX|SW|W|X)roX")>;
+def : InstRW<[M3WriteLB,
+              ReadAdrBase], (instrs PRFMroW)>;
+def : InstRW<[M3WriteLX,
+              ReadAdrBase], (instrs PRFMroX)>;

 // Store instructions.
+def : InstRW<[M3WriteSB,
+              ReadAdrBase], (instregex "^STR(BB|HH|W|X)roW")>;
+def : InstRW<[WriteST,
+              ReadAdrBase], (instregex "^STR(BB|HH|W|X)roX")>;

 // FP data instructions.
 def : InstRW<[M3WriteNSHF1],  (instregex "^FABS[DS]r")>;
@ -555,9 +567,11 @@ def : InstRW<[WriteVLD],    (instregex "^LDUR[BDHSQ]i")>;
 def : InstRW<[WriteVLD,
              WriteAdr],    (instregex "^LDR[BDHSQ](post|pre)")>;
 def : InstRW<[WriteVLD],    (instregex "^LDR[BDHSQ]ui")>;
-def : InstRW<[M3WriteLX,
-              ReadAdrBase], (instregex "^LDR[BDHS]ro[WX]")>;
-def : InstRW<[M3WriteLB,
+def : InstRW<[M3WriteLE,
+              ReadAdrBase], (instregex "^LDR[BDHS]roW")>;
+def : InstRW<[WriteVLD,
+              ReadAdrBase], (instregex "^LDR[BDHS]roX")>;
+def : InstRW<[M3WriteLY,
              ReadAdrBase], (instregex "^LDRQro[WX]")>;
 def : InstRW<[WriteVLD,
              M3WriteLH],   (instregex "^LDN?P[DS]i")>;
@ -575,20 +589,24 @@ def : InstRW<[WriteVST],    (instregex "^STUR[BDHSQ]i")>;
 def : InstRW<[WriteVST,
              WriteAdr],    (instregex "^STR[BDHSQ](post|pre)")>;
 def : InstRW<[WriteVST],    (instregex "^STR[BDHSQ]ui")>;
-def : InstRW<[M3WriteSY,
-              ReadAdrBase], (instregex "^STR[BDHS]ro[WX]")>;
 def : InstRW<[M3WriteSA,
-              ReadAdrBase], (instregex "^STRQro[WX]")>;
+              ReadAdrBase], (instregex "^STR[BDHS]roW")>;
+def : InstRW<[M3WriteSA,
+              ReadAdrBase], (instregex "^STRQroW")>;
+def : InstRW<[WriteVST,
+              ReadAdrBase], (instregex "^STR[BDHS]roX")>;
+def : InstRW<[M3WriteSY,
+              ReadAdrBase], (instregex "^STRQroX")>;
 def : InstRW<[WriteVST],    (instregex "^STN?P[DSQ]i")>;
 def : InstRW<[WriteVST,
              WriteAdr],    (instregex "^STP[DS](post|pre)")>;
-def : InstRW<[M3WriteSA,
+def : InstRW<[M3WriteSC,
              WriteAdr],    (instregex "^STPQ(post|pre)")>;

 // ASIMD instructions.
 def : InstRW<[M3WriteNMSC3], (instregex "^[SU]ABAL?v")>;
 def : InstRW<[M3WriteNMSC1], (instregex "^[SU]ABDL?v")>;
-def : InstRW<[M3WriteNMSC1], (instregex "^(SQ)?(ABS|NEG)v")>;
+def : InstRW<[M3WriteNMSC1], (instregex "^((SQ)?ABS|SQNEG)v")>;
 def : InstRW<[M3WriteNALU1], (instregex "^(ADD|NEG|SUB)v")>;
 def : InstRW<[M3WriteNMSC3], (instregex "^[SU]?ADDL?Pv")>;
 def : InstRW<[M3WriteNMSC3], (instregex "^[SU]H(ADD|SUB)v")>;
@ -597,7 +615,6 @@ def : InstRW<[M3WriteNMSC3], (instregex "^R?(ADD|SUB)HN2?v")>;
 def : InstRW<[M3WriteNMSC3], (instregex "^[SU]Q(ADD|SUB)v")>;
 def : InstRW<[M3WriteNMSC3], (instregex "^(SU|US)QADDv")>;
 def : InstRW<[M3WriteNMSC3], (instregex "^[SU]RHADDv")>;
-def : InstRW<[M3WriteNMSC3], (instregex "^[SU]?ADDL?Vv")>;
 def : InstRW<[M3WriteNMSC1], (instregex "^CM(EQ|GE|GT|HI|HS|LE|LT)v")>;
 def : InstRW<[M3WriteNALU1], (instregex "^CMTSTv")>;
 def : InstRW<[M3WriteNALU1], (instregex "^(AND|BIC|EOR|MVNI|NOT|ORN|ORR)v")>;
@ -647,12 +664,12 @@ def : InstRW<[M3WriteNEONY],  (instrs FSQRTv2f64)>;

 // ASIMD miscellaneous instructions.
 def : InstRW<[M3WriteNALU1], (instregex "^RBITv")>;
-def : InstRW<[M3WriteNALU1], (instregex "^(BIF|BIT|BSL)v")>;
+def : InstRW<[M3WriteNALU1], (instregex "^(BIF|BIT|BSL|BSP)v")>;
 def : InstRW<[M3WriteNEONB], (instregex "^DUPv.+gpr")>;
 def : InstRW<[M3WriteNSHF1], (instregex "^DUPv.+lane")>;
 def : InstRW<[M3WriteNSHF1], (instregex "^EXTv")>;
 def : InstRW<[M3WriteNSHF1], (instregex "^[SU]?Q?XTU?Nv")>;
-def : InstRW<[M3WriteNSHF1], (instregex "^CPY")>;
+def : InstRW<[M3WriteNSHF1], (instregex "^DUP(i8|i16|i32|i64)$")>;
 def : InstRW<[M3WriteNSHF1], (instregex "^INSv.+lane")>;
 def : InstRW<[M3WriteMOVI],  (instregex "^MOVI")>;
 def : InstRW<[M3WriteNALU1], (instregex "^FMOVv")>;
@ -668,108 +685,108 @@ def : InstRW<[M3WriteNSHF1], (instregex "^(TRN|UZP|ZIP)[12]v")>;
 // ASIMD load instructions.
 def : InstRW<[M3WriteL5],   (instregex "LD1Onev(8b|4h|2s|1d)$")>;
 def : InstRW<[M3WriteL5,
-              WriteAdr],    (instregex "LD1Onev(8b|4h|2s|1d)_POST")>;
+              M3WriteA1],   (instregex "LD1Onev(8b|4h|2s|1d)_POST")>;
 def : InstRW<[M3WriteL5],   (instregex "LD1Onev(16b|8h|4s|2d)$")>;
 def : InstRW<[M3WriteL5,
-              WriteAdr],    (instregex "LD1Onev(16b|8h|4s|2d)_POST")>;
+              M3WriteA1],   (instregex "LD1Onev(16b|8h|4s|2d)_POST")>;

 def : InstRW<[M3WriteVLDA], (instregex "LD1Twov(8b|4h|2s|1d)$")>;
 def : InstRW<[M3WriteVLDA,
-              WriteAdr],    (instregex "LD1Twov(8b|4h|2s|1d)_POST")>;
+              M3WriteA1],   (instregex "LD1Twov(8b|4h|2s|1d)_POST")>;
 def : InstRW<[M3WriteVLDA], (instregex "LD1Twov(16b|8h|4s|2d)$")>;
 def : InstRW<[M3WriteVLDA,
-              WriteAdr],    (instregex "LD1Twov(16b|8h|4s|2d)_POST")>;
+              M3WriteA1],   (instregex "LD1Twov(16b|8h|4s|2d)_POST")>;

 def : InstRW<[M3WriteVLDB], (instregex "LD1Threev(8b|4h|2s|1d)$")>;
 def : InstRW<[M3WriteVLDB,
-              WriteAdr],    (instregex "LD1Threev(8b|4h|2s|1d)_POST")>;
+              M3WriteA1],   (instregex "LD1Threev(8b|4h|2s|1d)_POST")>;
 def : InstRW<[M3WriteVLDB], (instregex "LD1Threev(16b|8h|4s|2d)$")>;
 def : InstRW<[M3WriteVLDB,
-              WriteAdr],    (instregex "LD1Threev(16b|8h|4s|2d)_POST")>;
+              M3WriteA1],   (instregex "LD1Threev(16b|8h|4s|2d)_POST")>;

 def : InstRW<[M3WriteVLDC], (instregex "LD1Fourv(8b|4h|2s|1d)$")>;
 def : InstRW<[M3WriteVLDC,
-              WriteAdr],    (instregex "LD1Fourv(8b|4h|2s|1d)_POST")>;
+              M3WriteA1],   (instregex "LD1Fourv(8b|4h|2s|1d)_POST")>;
 def : InstRW<[M3WriteVLDC], (instregex "LD1Fourv(16b|8h|4s|2d)$")>;
 def : InstRW<[M3WriteVLDC,
-              WriteAdr],    (instregex "LD1Fourv(16b|8h|4s|2d)_POST")>;
+              M3WriteA1],   (instregex "LD1Fourv(16b|8h|4s|2d)_POST")>;

 def : InstRW<[M3WriteVLDD], (instregex "LD1i(8|16|32)$")>;
 def : InstRW<[M3WriteVLDD,
-              WriteAdr],    (instregex "LD1i(8|16|32)_POST")>;
+              M3WriteA1],   (instregex "LD1i(8|16|32)_POST")>;
 def : InstRW<[M3WriteVLDE], (instregex "LD1i(64)$")>;
 def : InstRW<[M3WriteVLDE,
-              WriteAdr],    (instregex "LD1i(64)_POST")>;
+              M3WriteA1],   (instregex "LD1i(64)_POST")>;

 def : InstRW<[M3WriteL5],   (instregex "LD1Rv(8b|4h|2s|1d)$")>;
 def : InstRW<[M3WriteL5,
-              WriteAdr],    (instregex "LD1Rv(8b|4h|2s|1d)_POST")>;
+              M3WriteA1],   (instregex "LD1Rv(8b|4h|2s|1d)_POST")>;
 def : InstRW<[M3WriteL5],   (instregex "LD1Rv(16b|8h|4s|2d)$")>;
 def : InstRW<[M3WriteL5,
-              WriteAdr],    (instregex "LD1Rv(16b|8h|4s|2d)_POST")>;
+              M3WriteA1],   (instregex "LD1Rv(16b|8h|4s|2d)_POST")>;

 def : InstRW<[M3WriteVLDF], (instregex "LD2Twov(8b|4h|2s)$")>;
 def : InstRW<[M3WriteVLDF,
-              WriteAdr],    (instregex "LD2Twov(8b|4h|2s)_POST")>;
+              M3WriteA1],   (instregex "LD2Twov(8b|4h|2s)_POST")>;
 def : InstRW<[M3WriteVLDF], (instregex "LD2Twov(16b|8h|4s|2d)$")>;
 def : InstRW<[M3WriteVLDF,
-              WriteAdr],    (instregex "LD2Twov(16b|8h|4s|2d)_POST")>;
+              M3WriteA1],   (instregex "LD2Twov(16b|8h|4s|2d)_POST")>;

 def : InstRW<[M3WriteVLDG], (instregex "LD2i(8|16|32)$")>;
 def : InstRW<[M3WriteVLDG,
-              WriteAdr],    (instregex "LD2i(8|16|32)_POST")>;
+              M3WriteA1],   (instregex "LD2i(8|16|32)_POST")>;
 def : InstRW<[M3WriteVLDH], (instregex "LD2i(64)$")>;
 def : InstRW<[M3WriteVLDH,
-              WriteAdr],    (instregex "LD2i(64)_POST")>;
+              M3WriteA1],   (instregex "LD2i(64)_POST")>;

 def : InstRW<[M3WriteVLDA], (instregex "LD2Rv(8b|4h|2s|1d)$")>;
 def : InstRW<[M3WriteVLDA,
-              WriteAdr],    (instregex "LD2Rv(8b|4h|2s|1d)_POST")>;
+              M3WriteA1],   (instregex "LD2Rv(8b|4h|2s|1d)_POST")>;
 def : InstRW<[M3WriteVLDA], (instregex "LD2Rv(16b|8h|4s|2d)$")>;
 def : InstRW<[M3WriteVLDA,
-              WriteAdr],    (instregex "LD2Rv(16b|8h|4s|2d)_POST")>;
+              M3WriteA1],   (instregex "LD2Rv(16b|8h|4s|2d)_POST")>;

 def : InstRW<[M3WriteVLDI], (instregex "LD3Threev(8b|4h|2s)$")>;
 def : InstRW<[M3WriteVLDI,
-              WriteAdr],    (instregex "LD3Threev(8b|4h|2s)_POST")>;
+              M3WriteA1],   (instregex "LD3Threev(8b|4h|2s)_POST")>;
 def : InstRW<[M3WriteVLDI], (instregex "LD3Threev(16b|8h|4s|2d)$")>;
 def : InstRW<[M3WriteVLDI,
-              WriteAdr],    (instregex "LD3Threev(16b|8h|4s|2d)_POST")>;
+              M3WriteA1],   (instregex "LD3Threev(16b|8h|4s|2d)_POST")>;

 def : InstRW<[M3WriteVLDJ], (instregex "LD3i(8|16|32)$")>;
 def : InstRW<[M3WriteVLDJ,
-              WriteAdr],    (instregex "LD3i(8|16|32)_POST")>;
+              M3WriteA1],   (instregex "LD3i(8|16|32)_POST")>;
 def : InstRW<[M3WriteVLDL], (instregex "LD3i(64)$")>;
 def : InstRW<[M3WriteVLDL,
-              WriteAdr],    (instregex "LD3i(64)_POST")>;
+              M3WriteA1],   (instregex "LD3i(64)_POST")>;

 def : InstRW<[M3WriteVLDB], (instregex "LD3Rv(8b|4h|2s|1d)$")>;
 def : InstRW<[M3WriteVLDB,
-              WriteAdr],    (instregex "LD3Rv(8b|4h|2s|1d)_POST")>;
+              M3WriteA1],   (instregex "LD3Rv(8b|4h|2s|1d)_POST")>;
 def : InstRW<[M3WriteVLDB], (instregex "LD3Rv(16b|8h|4s|2d)$")>;
 def : InstRW<[M3WriteVLDB,
-              WriteAdr],    (instregex "LD3Rv(16b|8h|4s|2d)_POST")>;
+              M3WriteA1],   (instregex "LD3Rv(16b|8h|4s|2d)_POST")>;

 def : InstRW<[M3WriteVLDN], (instregex "LD4Fourv(8b|4h|2s)$")>;
 def : InstRW<[M3WriteVLDN,
-              WriteAdr],    (instregex "LD4Fourv(8b|4h|2s)_POST")>;
+              M3WriteA1],   (instregex "LD4Fourv(8b|4h|2s)_POST")>;
 def : InstRW<[M3WriteVLDN], (instregex "LD4Fourv(16b|8h|4s|2d)$")>;
 def : InstRW<[M3WriteVLDN,
-              WriteAdr],    (instregex "LD4Fourv(16b|8h|4s|2d)_POST")>;
+              M3WriteA1],   (instregex "LD4Fourv(16b|8h|4s|2d)_POST")>;

 def : InstRW<[M3WriteVLDK], (instregex "LD4i(8|16|32)$")>;
 def : InstRW<[M3WriteVLDK,
-              WriteAdr],    (instregex "LD4i(8|16|32)_POST")>;
+              M3WriteA1],   (instregex "LD4i(8|16|32)_POST")>;
 def : InstRW<[M3WriteVLDM], (instregex "LD4i(64)$")>;
 def : InstRW<[M3WriteVLDM,
-              WriteAdr],    (instregex "LD4i(64)_POST")>;
+              M3WriteA1],   (instregex "LD4i(64)_POST")>;

 def : InstRW<[M3WriteVLDC], (instregex "LD4Rv(8b|4h|2s|1d)$")>;
 def : InstRW<[M3WriteVLDC,
-              WriteAdr],    (instregex "LD4Rv(8b|4h|2s|1d)_POST")>;
+              M3WriteA1],   (instregex "LD4Rv(8b|4h|2s|1d)_POST")>;
 def : InstRW<[M3WriteVLDC], (instregex "LD4Rv(16b|8h|4s|2d)$")>;
 def : InstRW<[M3WriteVLDC,
-              WriteAdr],    (instregex "LD4Rv(16b|8h|4s|2d)_POST")>;
+              M3WriteA1],   (instregex "LD4Rv(16b|8h|4s|2d)_POST")>;

 // ASIMD store instructions.
 def : InstRW<[WriteVST],    (instregex "ST1Onev(8b|4h|2s|1d)$")>;
--- a/suite/synctools/tablegen/AArch64/AArch64SchedExynosM4.td
+++ b/suite/synctools/tablegen/AArch64/AArch64SchedExynosM4.td
--- a/suite/synctools/tablegen/AArch64/AArch64SchedExynosM5.td
+++ b/suite/synctools/tablegen/AArch64/AArch64SchedExynosM5.td
--- a/suite/synctools/tablegen/AArch64/AArch64SchedFalkor.td
+++ b/suite/synctools/tablegen/AArch64/AArch64SchedFalkor.td
@ -1,9 +1,8 @@
 //==- AArch64SchedFalkor.td - Falkor Scheduling Definitions -*- tablegen -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@ -24,8 +23,9 @@ def FalkorModel : SchedMachineModel {
  let MispredictPenalty = 11;  // Minimum branch misprediction penalty.
  let CompleteModel = 1;

-  list<Predicate> UnsupportedFeatures = [HasSVE];
-
+  list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F,
+                                                    PAUnsupported.F,
+                                                    SMEUnsupported.F);
  // FIXME: Remove when all errors have been fixed.
  let FullInstRWOverlapCheck = 0;
 }
@ -92,7 +92,8 @@ def : WriteRes<WriteFCopy, []>   { let Unsupported = 1; }
 def : WriteRes<WriteFImm, []>    { let Unsupported = 1; }
 def : WriteRes<WriteFMul, []>    { let Unsupported = 1; }
 def : WriteRes<WriteFDiv, []>    { let Unsupported = 1; }
-def : WriteRes<WriteV, []>       { let Unsupported = 1; }
+def : WriteRes<WriteVd, []>      { let Unsupported = 1; }
+def : WriteRes<WriteVq, []>      { let Unsupported = 1; }
 def : WriteRes<WriteVLD, []>     { let Unsupported = 1; }
 def : WriteRes<WriteVST, []>     { let Unsupported = 1; }
 def : WriteRes<WriteSys, []>     { let Unsupported = 1; }
@ -111,6 +112,7 @@ def : ReadAdvance<ReadID,      0>;
 def : ReadAdvance<ReadExtrHi,  0>;
 def : ReadAdvance<ReadAdrBase, 0>;
 def : ReadAdvance<ReadVLD,     0>;
+def : ReadAdvance<ReadST,      0>;

 // Detailed Refinements
 // -----------------------------------------------------------------------------
--- a/suite/synctools/tablegen/AArch64/AArch64SchedFalkorDetails.td
+++ b/suite/synctools/tablegen/AArch64/AArch64SchedFalkorDetails.td
@ -1,9 +1,8 @@
 //==- AArch64SchedFalkorDetails.td - Falkor Scheduling Defs -*- tablegen -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@ -909,10 +908,10 @@ def : InstRW<[FalkorWr_ADDSUBsx],     (instregex "^SUB(S)?(W|X)r(s|x|x64)$")>;
 // -----------------------------------------------------------------------------
 def : InstRW<[FalkorWr_1GTOV_1cyc],   (instregex "^DUP(v8i8|v4i16|v2i32)(gpr|lane)$")>;
 def : InstRW<[FalkorWr_1VXVY_1cyc],   (instregex "^DUP(v16i8|v8i16)(gpr|lane)$")>;
-def : InstRW<[FalkorWr_1VXVY_1cyc],   (instregex "^CPY(i8|i16|i32|i64)$")>;
+def : InstRW<[FalkorWr_1VXVY_1cyc],   (instregex "^DUP(i8|i16|i32|i64)$")>;
 def : InstRW<[FalkorWr_1GTOV_1cyc],   (instregex "^INSv(i8|i16)(gpr|lane)$")>;
 def : InstRW<[FalkorWr_1VTOG_1cyc],   (instregex "^(S|U)MOVv.*$")>;
-def : InstRW<[FalkorWr_1VXVY_1cyc],   (instregex "^(BIF|BIT|BSL)v8i8$")>;
+def : InstRW<[FalkorWr_1VXVY_1cyc],   (instregex "^(BIF|BIT|BSL|BSP)v8i8$")>;
 def : InstRW<[FalkorWr_1VXVY_1cyc],   (instrs EXTv8i8)>;
 def : InstRW<[FalkorWr_1VXVY_0cyc],   (instregex "(MOVI|MVNI)(D|v8b_ns|v2i32|v4i16|v2s_msl)$")>; // imm fwd
 def : InstRW<[FalkorWr_1VXVY_1cyc],   (instrs TBLv8i8One)>;
@ -936,7 +935,7 @@ def : InstRW<[FalkorWr_FMUL64_1VXVY_6cyc],
 def : InstRW<[FalkorWr_1GTOV_1VXVY_2cyc],
                                      (instregex "^INSv(i32|i64)(gpr|lane)$")>;
 def : InstRW<[FalkorWr_2GTOV_1cyc],   (instregex "^DUP(v4i32|v2i64)(gpr|lane)$")>;
-def : InstRW<[FalkorWr_2VXVY_1cyc],   (instregex "^(BIF|BIT|BSL)v16i8$")>;
+def : InstRW<[FalkorWr_2VXVY_1cyc],   (instregex "^(BIF|BIT|BSL|BSP)v16i8$")>;
 def : InstRW<[FalkorWr_2VXVY_1cyc],   (instrs EXTv16i8)>;
 def : InstRW<[FalkorWr_2VXVY_0cyc],   (instregex "(MOVI|MVNI)(v2d_ns|v16b_ns|v4i32|v8i16|v4s_msl)$")>; // imm fwd
 def : InstRW<[FalkorWr_2VXVY_1cyc],   (instrs NOTv16i8)>;
--- a/suite/synctools/tablegen/AArch64/AArch64SchedKryo.td
+++ b/suite/synctools/tablegen/AArch64/AArch64SchedKryo.td
@ -1,9 +1,8 @@
 //==- AArch64SchedKryo.td - Qualcomm Kryo Scheduling Defs ---*- tablegen -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@ -28,8 +27,9 @@ def KryoModel : SchedMachineModel {
  let LoopMicroOpBufferSize = 16;
  let CompleteModel = 1;

-  list<Predicate> UnsupportedFeatures = [HasSVE];
-
+  list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F,
+                                                    PAUnsupported.F,
+                                                    SMEUnsupported.F);
  // FIXME: Remove when all errors have been fixed.
  let FullInstRWOverlapCheck = 0;
 }
@ -95,7 +95,8 @@ def : WriteRes<WriteFMul,  [KryoUnitX, KryoUnitX]>
      { let Latency = 6; let NumMicroOps = 2; }
 def : WriteRes<WriteFDiv,  [KryoUnitXA, KryoUnitY]>
      { let Latency = 12; let NumMicroOps = 2; } // Fragent -1 / NoRSV +1
-def : WriteRes<WriteV,     [KryoUnitXY]> { let Latency = 6; }
+def : WriteRes<WriteVd,    [KryoUnitXY]> { let Latency = 6; }
+def : WriteRes<WriteVq,    [KryoUnitXY]> { let Latency = 6; }
 def : WriteRes<WriteVLD,   [KryoUnitLS]> { let Latency = 4; }
 def : WriteRes<WriteVST,   [KryoUnitLS]> { let Latency = 4; }

@ -117,6 +118,7 @@ def : ReadAdvance<ReadID,      0>;
 def : ReadAdvance<ReadExtrHi,  0>;
 def : ReadAdvance<ReadAdrBase, 0>;
 def : ReadAdvance<ReadVLD,     0>;
+def : ReadAdvance<ReadST,      0>;


 //===----------------------------------------------------------------------===//
--- a/suite/synctools/tablegen/AArch64/AArch64SchedKryoDetails.td
+++ b/suite/synctools/tablegen/AArch64/AArch64SchedKryoDetails.td
@ -1,9 +1,8 @@
 //=- AArch64SchedKryoDetails.td - QC Kryo Scheduling Defs ----*- tablegen -*-=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@ -463,13 +462,13 @@ def KryoWrite_1cyc_X_noRSV_74ln :
 	let Latency = 1; let NumMicroOps = 2;
 }
 def : InstRW<[KryoWrite_1cyc_X_noRSV_74ln],
-	(instrs BIFv8i8, BITv8i8, BSLv8i8)>;
+	(instrs BIFv8i8, BITv8i8, BSLv8i8, BSPv8i8)>;
 def KryoWrite_1cyc_X_X_75ln :
 	SchedWriteRes<[KryoUnitX, KryoUnitX]> {
 	let Latency = 1; let NumMicroOps = 2;
 }
 def : InstRW<[KryoWrite_1cyc_X_X_75ln],
-	(instrs BIFv16i8, BITv16i8, BSLv16i8)>;
+	(instrs BIFv16i8, BITv16i8, BSLv16i8, BSPv16i8)>;
 def KryoWrite_0cyc_noRSV_11ln :
 	SchedWriteRes<[]> {
 	let Latency = 0; let NumMicroOps = 1;
--- a/suite/synctools/tablegen/AArch64/AArch64SchedPredAmpere.td
+++ b/suite/synctools/tablegen/AArch64/AArch64SchedPredAmpere.td
@ -0,0 +1,25 @@
+//===- AArch64SchedPredAmpere.td - AArch64 Sched Preds -----*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines scheduling predicate definitions that are used by the
+// AArch64 Ampere Computing processors.
+//
+//===----------------------------------------------------------------------===//
+
+// Auxiliary predicates.
+
+// Check for a LSL shift <= 4
+def AmpereCheapLSL : MCSchedPredicate<
+                                CheckAny<[CheckShiftBy0,
+                                 CheckAll<
+                                   [CheckShiftLSL,
+                                    CheckAny<
+                                      [CheckShiftBy1,
+                                       CheckShiftBy2,
+                                       CheckShiftBy3,
+                                       CheckShiftBy4]>]>]>>;
--- a/suite/synctools/tablegen/AArch64/AArch64SchedPredExynos.td
+++ b/suite/synctools/tablegen/AArch64/AArch64SchedPredExynos.td
@ -0,0 +1,157 @@
+//===- AArch64SchedPredExynos.td - AArch64 Sched Preds -----*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines scheduling predicate definitions that are used by the
+// AArch64 Exynos processors.
+//
+//===----------------------------------------------------------------------===//
+
+// Auxiliary predicates.
+
+// Check the shift in arithmetic and logic instructions.
+def ExynosCheckShift : CheckAny<[CheckShiftBy0,
+                                 CheckAll<
+                                   [CheckShiftLSL,
+                                    CheckAny<
+                                      [CheckShiftBy1,
+                                       CheckShiftBy2,
+                                       CheckShiftBy3]>]>]>;
+
+// Exynos predicates.
+
+// Identify BLR specifying the LR register as the indirect target register.
+def ExynosBranchLinkLRPred : MCSchedPredicate<
+                               CheckAll<[CheckOpcode<[BLR]>,
+                                         CheckRegOperand<0, LR>]>>;
+
+// Identify arithmetic instructions without or with limited extension or shift.
+def ExynosArithFn   : TIIPredicate<
+                        "isExynosArithFast",
+                        MCOpcodeSwitchStatement<
+                          [MCOpcodeSwitchCase<
+                             IsArithExtOp.ValidOpcodes,
+                             MCReturnStatement<
+                               CheckAny<[CheckExtBy0,
+                                         CheckAll<
+                                           [CheckAny<
+                                             [CheckExtUXTW,
+                                              CheckExtUXTX]>,
+                                            CheckAny<
+                                              [CheckExtBy1,
+                                               CheckExtBy2,
+                                               CheckExtBy3]>]>]>>>,
+                           MCOpcodeSwitchCase<
+                             IsArithShiftOp.ValidOpcodes,
+                             MCReturnStatement<ExynosCheckShift>>,
+                           MCOpcodeSwitchCase<
+                             IsArithUnshiftOp.ValidOpcodes,
+                             MCReturnStatement<TruePred>>,
+                           MCOpcodeSwitchCase<
+                             IsArithImmOp.ValidOpcodes,
+                             MCReturnStatement<TruePred>>],
+                          MCReturnStatement<FalsePred>>>;
+def ExynosArithPred : MCSchedPredicate<ExynosArithFn>;
+
+// Identify logic instructions with limited shift.
+def ExynosLogicFn   : TIIPredicate<
+                        "isExynosLogicFast",
+                        MCOpcodeSwitchStatement<
+                          [MCOpcodeSwitchCase<
+                             IsLogicShiftOp.ValidOpcodes,
+                             MCReturnStatement<ExynosCheckShift>>,
+                           MCOpcodeSwitchCase<
+                             IsLogicUnshiftOp.ValidOpcodes,
+                             MCReturnStatement<TruePred>>,
+                           MCOpcodeSwitchCase<
+                             IsLogicImmOp.ValidOpcodes,
+                             MCReturnStatement<TruePred>>],
+                          MCReturnStatement<FalsePred>>>;
+def ExynosLogicPred : MCSchedPredicate<ExynosLogicFn>;
+
+// Identify more logic instructions with limited shift.
+def ExynosLogicExFn   : TIIPredicate<
+                          "isExynosLogicExFast",
+                          MCOpcodeSwitchStatement<
+                            [MCOpcodeSwitchCase<
+                               IsLogicShiftOp.ValidOpcodes,
+                               MCReturnStatement<
+                                 CheckAny<
+                                   [ExynosCheckShift,
+                                    CheckAll<
+                                     [CheckShiftLSL,
+                                      CheckShiftBy8]>]>>>,
+                             MCOpcodeSwitchCase<
+                               IsLogicUnshiftOp.ValidOpcodes,
+                               MCReturnStatement<TruePred>>,
+                             MCOpcodeSwitchCase<
+                               IsLogicImmOp.ValidOpcodes,
+                               MCReturnStatement<TruePred>>],
+                            MCReturnStatement<FalsePred>>>;
+def ExynosLogicExPred : MCSchedPredicate<ExynosLogicExFn>;
+
+// Identify a load or store using the register offset addressing mode
+// with a scaled non-extended register.
+def ExynosScaledIdxFn   : TIIPredicate<"isExynosScaledAddr",
+                                       MCOpcodeSwitchStatement<
+                                         [MCOpcodeSwitchCase<
+                                            IsLoadStoreRegOffsetOp.ValidOpcodes,
+                                            MCReturnStatement<
+                                              CheckAny<
+                                                [CheckMemExtSXTW,
+                                                 CheckMemExtUXTW,
+                                                 CheckMemScaled]>>>],
+                                         MCReturnStatement<FalsePred>>>;
+def ExynosScaledIdxPred : MCSchedPredicate<ExynosScaledIdxFn>;
+
+// Identify FP instructions.
+def ExynosFPPred : MCSchedPredicate<CheckAny<[CheckHForm,
+                                              CheckSForm,
+                                              CheckDForm,
+                                              CheckQForm]>>;
+
+// Identify 128-bit NEON instructions.
+def ExynosQFormPred : MCSchedPredicate<CheckQForm>;
+
+// Identify instructions that reset a register efficiently.
+def ExynosResetFn   : TIIPredicate<
+                        "isExynosResetFast",
+                        MCOpcodeSwitchStatement<
+                          [MCOpcodeSwitchCase<
+                             [ADR, ADRP,
+                              MOVNWi, MOVNXi,
+                              MOVZWi, MOVZXi],
+                             MCReturnStatement<TruePred>>,
+                           MCOpcodeSwitchCase<
+                             [ORRWri, ORRXri],
+                             MCReturnStatement<
+                               CheckAll<
+                                 [CheckIsRegOperand<1>,
+                                  CheckAny<
+                                    [CheckRegOperand<1, WZR>,
+                                     CheckRegOperand<1, XZR>]>]>>>],
+                          MCReturnStatement<
+                            CheckAny<
+                              [IsCopyIdiomFn,
+                               IsZeroFPIdiomFn]>>>>;
+def ExynosResetPred : MCSchedPredicate<ExynosResetFn>;
+
+// Identify EXTR as the alias for ROR (immediate).
+def ExynosRotateRightImmPred : MCSchedPredicate<
+                                 CheckAll<[CheckOpcode<[EXTRWrri, EXTRXrri]>,
+                                           CheckSameRegOperand<1, 2>]>>;
+
+// Identify cheap arithmetic and logic immediate instructions.
+def ExynosCheapFn : TIIPredicate<
+                      "isExynosCheapAsMove",
+                      MCOpcodeSwitchStatement<
+                        [MCOpcodeSwitchCase<
+                           IsArithLogicImmOp.ValidOpcodes,
+                           MCReturnStatement<TruePred>>],
+                        MCReturnStatement<
+                          CheckAny<
+                            [ExynosArithFn, ExynosResetFn, ExynosLogicFn]>>>>;
--- a/suite/synctools/tablegen/AArch64/AArch64SchedPredicates.td
+++ b/suite/synctools/tablegen/AArch64/AArch64SchedPredicates.td
@ -0,0 +1,441 @@
+//===- AArch64SchedPredicates.td - AArch64 Sched Preds -----*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines scheduling predicate definitions that are used by the
+// AArch64 subtargets.
+//
+//===----------------------------------------------------------------------===//
+
+// Function mappers.
+
+// Check the extension type in arithmetic instructions.
+let FunctionMapper = "AArch64_AM::getArithExtendType" in {
+  def CheckExtUXTB                      : CheckImmOperand_s<3, "AArch64_AM::UXTB">;
+  def CheckExtUXTH                      : CheckImmOperand_s<3, "AArch64_AM::UXTH">;
+  def CheckExtUXTW                      : CheckImmOperand_s<3, "AArch64_AM::UXTW">;
+  def CheckExtUXTX                      : CheckImmOperand_s<3, "AArch64_AM::UXTX">;
+  def CheckExtSXTB                      : CheckImmOperand_s<3, "AArch64_AM::SXTB">;
+  def CheckExtSXTH                      : CheckImmOperand_s<3, "AArch64_AM::SXTH">;
+  def CheckExtSXTW                      : CheckImmOperand_s<3, "AArch64_AM::SXTW">;
+  def CheckExtSXTX                      : CheckImmOperand_s<3, "AArch64_AM::SXTX">;
+}
+
+// Check for shifting in extended arithmetic instructions.
+foreach I = {0-3} in {
+  let FunctionMapper = "AArch64_AM::getArithShiftValue" in
+  def CheckExtBy#I                      : CheckImmOperand<3, I>;
+}
+
+// Check the extension type in the register offset addressing mode.
+let FunctionMapper = "AArch64_AM::getMemExtendType" in {
+  def CheckMemExtUXTW                   : CheckImmOperand_s<3, "AArch64_AM::UXTW">;
+  def CheckMemExtLSL                    : CheckImmOperand_s<3, "AArch64_AM::UXTX">;
+  def CheckMemExtSXTW                   : CheckImmOperand_s<3, "AArch64_AM::SXTW">;
+  def CheckMemExtSXTX                   : CheckImmOperand_s<3, "AArch64_AM::SXTX">;
+}
+
+// Check for scaling in the register offset addressing mode.
+let FunctionMapper = "AArch64_AM::getMemDoShift" in
+def CheckMemScaled                      : CheckImmOperandSimple<4>;
+
+// Check the shifting type in arithmetic and logic instructions.
+let FunctionMapper = "AArch64_AM::getShiftType" in {
+  def CheckShiftLSL                : CheckImmOperand_s<3, "AArch64_AM::LSL">;
+  def CheckShiftLSR                : CheckImmOperand_s<3, "AArch64_AM::LSR">;
+  def CheckShiftASR                : CheckImmOperand_s<3, "AArch64_AM::ASR">;
+  def CheckShiftROR                : CheckImmOperand_s<3, "AArch64_AM::ROR">;
+  def CheckShiftMSL                : CheckImmOperand_s<3, "AArch64_AM::MSL">;
+}
+
+// Check for shifting in arithmetic and logic instructions.
+foreach I = {0-4, 8} in {
+  let FunctionMapper = "AArch64_AM::getShiftValue" in
+  def CheckShiftBy#I        : CheckImmOperand<3, I>;
+}
+
+// Generic predicates.
+
+// Identify whether an instruction is the 16-bit NEON form based on its result.
+def CheckHForm             : CheckAll<[CheckIsRegOperand<0>,
+                                       CheckAny<[CheckRegOperand<0, H0>,
+                                                 CheckRegOperand<0, H1>,
+                                                 CheckRegOperand<0, H2>,
+                                                 CheckRegOperand<0, H3>,
+                                                 CheckRegOperand<0, H4>,
+                                                 CheckRegOperand<0, H5>,
+                                                 CheckRegOperand<0, H6>,
+                                                 CheckRegOperand<0, H7>,
+                                                 CheckRegOperand<0, H8>,
+                                                 CheckRegOperand<0, H9>,
+                                                 CheckRegOperand<0, H10>,
+                                                 CheckRegOperand<0, H11>,
+                                                 CheckRegOperand<0, H12>,
+                                                 CheckRegOperand<0, H13>,
+                                                 CheckRegOperand<0, H14>,
+                                                 CheckRegOperand<0, H15>,
+                                                 CheckRegOperand<0, H16>,
+                                                 CheckRegOperand<0, H17>,
+                                                 CheckRegOperand<0, H18>,
+                                                 CheckRegOperand<0, H19>,
+                                                 CheckRegOperand<0, H20>,
+                                                 CheckRegOperand<0, H21>,
+                                                 CheckRegOperand<0, H22>,
+                                                 CheckRegOperand<0, H23>,
+                                                 CheckRegOperand<0, H24>,
+                                                 CheckRegOperand<0, H25>,
+                                                 CheckRegOperand<0, H26>,
+                                                 CheckRegOperand<0, H27>,
+                                                 CheckRegOperand<0, H28>,
+                                                 CheckRegOperand<0, H29>,
+                                                 CheckRegOperand<0, H30>,
+                                                 CheckRegOperand<0, H31>]>]>;
+
+// Identify whether an instruction is the 32-bit NEON form based on its result.
+def CheckSForm             : CheckAll<[CheckIsRegOperand<0>,
+                                       CheckAny<[CheckRegOperand<0, S0>,
+                                                 CheckRegOperand<0, S1>,
+                                                 CheckRegOperand<0, S2>,
+                                                 CheckRegOperand<0, S3>,
+                                                 CheckRegOperand<0, S4>,
+                                                 CheckRegOperand<0, S5>,
+                                                 CheckRegOperand<0, S6>,
+                                                 CheckRegOperand<0, S7>,
+                                                 CheckRegOperand<0, S8>,
+                                                 CheckRegOperand<0, S9>,
+                                                 CheckRegOperand<0, S10>,
+                                                 CheckRegOperand<0, S11>,
+                                                 CheckRegOperand<0, S12>,
+                                                 CheckRegOperand<0, S13>,
+                                                 CheckRegOperand<0, S14>,
+                                                 CheckRegOperand<0, S15>,
+                                                 CheckRegOperand<0, S16>,
+                                                 CheckRegOperand<0, S17>,
+                                                 CheckRegOperand<0, S18>,
+                                                 CheckRegOperand<0, S19>,
+                                                 CheckRegOperand<0, S20>,
+                                                 CheckRegOperand<0, S21>,
+                                                 CheckRegOperand<0, S22>,
+                                                 CheckRegOperand<0, S23>,
+                                                 CheckRegOperand<0, S24>,
+                                                 CheckRegOperand<0, S25>,
+                                                 CheckRegOperand<0, S26>,
+                                                 CheckRegOperand<0, S27>,
+                                                 CheckRegOperand<0, S28>,
+                                                 CheckRegOperand<0, S29>,
+                                                 CheckRegOperand<0, S30>,
+                                                 CheckRegOperand<0, S31>]>]>;
+
+// Identify whether an instruction is the 64-bit NEON form based on its result.
+def CheckDForm             : CheckAll<[CheckIsRegOperand<0>,
+                                       CheckAny<[CheckRegOperand<0, D0>,
+                                                 CheckRegOperand<0, D1>,
+                                                 CheckRegOperand<0, D2>,
+                                                 CheckRegOperand<0, D3>,
+                                                 CheckRegOperand<0, D4>,
+                                                 CheckRegOperand<0, D5>,
+                                                 CheckRegOperand<0, D6>,
+                                                 CheckRegOperand<0, D7>,
+                                                 CheckRegOperand<0, D8>,
+                                                 CheckRegOperand<0, D9>,
+                                                 CheckRegOperand<0, D10>,
+                                                 CheckRegOperand<0, D11>,
+                                                 CheckRegOperand<0, D12>,
+                                                 CheckRegOperand<0, D13>,
+                                                 CheckRegOperand<0, D14>,
+                                                 CheckRegOperand<0, D15>,
+                                                 CheckRegOperand<0, D16>,
+                                                 CheckRegOperand<0, D17>,
+                                                 CheckRegOperand<0, D18>,
+                                                 CheckRegOperand<0, D19>,
+                                                 CheckRegOperand<0, D20>,
+                                                 CheckRegOperand<0, D21>,
+                                                 CheckRegOperand<0, D22>,
+                                                 CheckRegOperand<0, D23>,
+                                                 CheckRegOperand<0, D24>,
+                                                 CheckRegOperand<0, D25>,
+                                                 CheckRegOperand<0, D26>,
+                                                 CheckRegOperand<0, D27>,
+                                                 CheckRegOperand<0, D28>,
+                                                 CheckRegOperand<0, D29>,
+                                                 CheckRegOperand<0, D30>,
+                                                 CheckRegOperand<0, D31>]>]>;
+
+// Identify whether an instruction is the 128-bit NEON form based on its result.
+def CheckQForm             : CheckAll<[CheckIsRegOperand<0>,
+                                       CheckAny<[CheckRegOperand<0, Q0>,
+                                                 CheckRegOperand<0, Q1>,
+                                                 CheckRegOperand<0, Q2>,
+                                                 CheckRegOperand<0, Q3>,
+                                                 CheckRegOperand<0, Q4>,
+                                                 CheckRegOperand<0, Q5>,
+                                                 CheckRegOperand<0, Q6>,
+                                                 CheckRegOperand<0, Q7>,
+                                                 CheckRegOperand<0, Q8>,
+                                                 CheckRegOperand<0, Q9>,
+                                                 CheckRegOperand<0, Q10>,
+                                                 CheckRegOperand<0, Q11>,
+                                                 CheckRegOperand<0, Q12>,
+                                                 CheckRegOperand<0, Q13>,
+                                                 CheckRegOperand<0, Q14>,
+                                                 CheckRegOperand<0, Q15>,
+                                                 CheckRegOperand<0, Q16>,
+                                                 CheckRegOperand<0, Q17>,
+                                                 CheckRegOperand<0, Q18>,
+                                                 CheckRegOperand<0, Q19>,
+                                                 CheckRegOperand<0, Q20>,
+                                                 CheckRegOperand<0, Q21>,
+                                                 CheckRegOperand<0, Q22>,
+                                                 CheckRegOperand<0, Q23>,
+                                                 CheckRegOperand<0, Q24>,
+                                                 CheckRegOperand<0, Q25>,
+                                                 CheckRegOperand<0, Q26>,
+                                                 CheckRegOperand<0, Q27>,
+                                                 CheckRegOperand<0, Q28>,
+                                                 CheckRegOperand<0, Q29>,
+                                                 CheckRegOperand<0, Q30>,
+                                                 CheckRegOperand<0, Q31>]>]>;
+
+// Identify arithmetic instructions with extend.
+def IsArithExtOp           : CheckOpcode<[ADDWrx, ADDXrx, ADDSWrx, ADDSXrx,
+                                          SUBWrx, SUBXrx, SUBSWrx, SUBSXrx,
+                                          ADDXrx64, ADDSXrx64,
+                                          SUBXrx64, SUBSXrx64]>;
+
+// Identify arithmetic immediate instructions.
+def IsArithImmOp           : CheckOpcode<[ADDWri, ADDXri, ADDSWri, ADDSXri,
+                                          SUBWri, SUBXri, SUBSWri, SUBSXri]>;
+
+// Identify arithmetic instructions with shift.
+def IsArithShiftOp         : CheckOpcode<[ADDWrs, ADDXrs, ADDSWrs, ADDSXrs,
+                                          SUBWrs, SUBXrs, SUBSWrs, SUBSXrs]>;
+
+// Identify arithmetic instructions without shift.
+def IsArithUnshiftOp       : CheckOpcode<[ADDWrr, ADDXrr, ADDSWrr, ADDSXrr,
+                                          SUBWrr, SUBXrr, SUBSWrr, SUBSXrr]>;
+
+// Identify logic immediate instructions.
+def IsLogicImmOp           : CheckOpcode<[ANDWri, ANDXri,
+                                          EORWri, EORXri,
+                                          ORRWri, ORRXri]>;
+
+// Identify logic instructions with shift.
+def IsLogicShiftOp         : CheckOpcode<[ANDWrs, ANDXrs, ANDSWrs, ANDSXrs,
+                                          BICWrs, BICXrs, BICSWrs, BICSXrs,
+                                          EONWrs, EONXrs,
+                                          EORWrs, EORXrs,
+                                          ORNWrs, ORNXrs,
+                                          ORRWrs, ORRXrs]>;
+
+// Identify logic instructions without shift.
+def IsLogicUnshiftOp       : CheckOpcode<[ANDWrr, ANDXrr, ANDSWrr, ANDSXrr,
+                                          BICWrr, BICXrr, BICSWrr, BICSXrr,
+                                          EONWrr, EONXrr,
+                                          EORWrr, EORXrr,
+                                          ORNWrr, ORNXrr,
+                                          ORRWrr, ORRXrr]>;
+
+// Identify arithmetic and logic immediate instructions.
+def IsArithLogicImmOp      : CheckOpcode<!listconcat(IsArithImmOp.ValidOpcodes,
+                                                     IsLogicImmOp.ValidOpcodes)>;
+
+// Identify arithmetic and logic instructions with shift.
+def IsArithLogicShiftOp    : CheckOpcode<!listconcat(IsArithShiftOp.ValidOpcodes,
+                                                     IsLogicShiftOp.ValidOpcodes)>;
+
+// Identify arithmetic and logic instructions without shift.
+def IsArithLogicUnshiftOp  : CheckOpcode<!listconcat(IsArithUnshiftOp.ValidOpcodes,
+                                                     IsLogicUnshiftOp.ValidOpcodes)>;
+
+// Identify whether an instruction is an ASIMD
+// load using the post index addressing mode.
+def IsLoadASIMDPostOp      : CheckOpcode<[LD1Onev8b_POST, LD1Onev4h_POST, LD1Onev2s_POST, LD1Onev1d_POST,
+                                          LD1Onev16b_POST, LD1Onev8h_POST, LD1Onev4s_POST, LD1Onev2d_POST,
+                                          LD1Twov8b_POST, LD1Twov4h_POST, LD1Twov2s_POST, LD1Twov1d_POST,
+                                          LD1Twov16b_POST, LD1Twov8h_POST, LD1Twov4s_POST, LD1Twov2d_POST,
+                                          LD1Threev8b_POST, LD1Threev4h_POST, LD1Threev2s_POST, LD1Threev1d_POST,
+                                          LD1Threev16b_POST, LD1Threev8h_POST, LD1Threev4s_POST, LD1Threev2d_POST,
+                                          LD1Fourv8b_POST, LD1Fourv4h_POST, LD1Fourv2s_POST, LD1Fourv1d_POST,
+                                          LD1Fourv16b_POST, LD1Fourv8h_POST, LD1Fourv4s_POST, LD1Fourv2d_POST,
+                                          LD1i8_POST, LD1i16_POST, LD1i32_POST, LD1i64_POST,
+                                          LD1Rv8b_POST, LD1Rv4h_POST, LD1Rv2s_POST, LD1Rv1d_POST,
+                                          LD1Rv16b_POST, LD1Rv8h_POST, LD1Rv4s_POST, LD1Rv2d_POST,
+                                          LD2Twov8b_POST, LD2Twov4h_POST, LD2Twov2s_POST,
+                                          LD2Twov16b_POST, LD2Twov8h_POST, LD2Twov4s_POST, LD2Twov2d_POST,
+                                          LD2i8_POST, LD2i16_POST, LD2i32_POST, LD2i64_POST,
+                                          LD2Rv8b_POST, LD2Rv4h_POST, LD2Rv2s_POST, LD2Rv1d_POST,
+                                          LD2Rv16b_POST, LD2Rv8h_POST, LD2Rv4s_POST, LD2Rv2d_POST,
+                                          LD3Threev8b_POST, LD3Threev4h_POST, LD3Threev2s_POST,
+                                          LD3Threev16b_POST, LD3Threev8h_POST, LD3Threev4s_POST, LD3Threev2d_POST,
+                                          LD3i8_POST, LD3i16_POST, LD3i32_POST, LD3i64_POST,
+                                          LD3Rv8b_POST, LD3Rv4h_POST, LD3Rv2s_POST, LD3Rv1d_POST,
+                                          LD3Rv16b_POST, LD3Rv8h_POST, LD3Rv4s_POST, LD3Rv2d_POST,
+                                          LD4Fourv8b_POST, LD4Fourv4h_POST, LD4Fourv2s_POST,
+                                          LD4Fourv16b_POST, LD4Fourv8h_POST, LD4Fourv4s_POST, LD4Fourv2d_POST,
+                                          LD4i8_POST, LD4i16_POST, LD4i32_POST, LD4i64_POST,
+                                          LD4Rv8b_POST, LD4Rv4h_POST, LD4Rv2s_POST, LD4Rv1d_POST,
+                                          LD4Rv16b_POST, LD4Rv8h_POST, LD4Rv4s_POST, LD4Rv2d_POST]>;
+
+// Identify whether an instruction is an ASIMD
+// store using the post index addressing mode.
+def IsStoreASIMDPostOp     : CheckOpcode<[ST1Onev8b_POST, ST1Onev4h_POST, ST1Onev2s_POST, ST1Onev1d_POST,
+                                          ST1Onev16b_POST, ST1Onev8h_POST, ST1Onev4s_POST, ST1Onev2d_POST,
+                                          ST1Twov8b_POST, ST1Twov4h_POST, ST1Twov2s_POST, ST1Twov1d_POST,
+                                          ST1Twov16b_POST, ST1Twov8h_POST, ST1Twov4s_POST, ST1Twov2d_POST,
+                                          ST1Threev8b_POST, ST1Threev4h_POST, ST1Threev2s_POST, ST1Threev1d_POST,
+                                          ST1Threev16b_POST, ST1Threev8h_POST, ST1Threev4s_POST, ST1Threev2d_POST,
+                                          ST1Fourv8b_POST, ST1Fourv4h_POST, ST1Fourv2s_POST, ST1Fourv1d_POST,
+                                          ST1Fourv16b_POST, ST1Fourv8h_POST, ST1Fourv4s_POST, ST1Fourv2d_POST,
+                                          ST1i8_POST, ST1i16_POST, ST1i32_POST, ST1i64_POST,
+                                          ST2Twov8b_POST, ST2Twov4h_POST, ST2Twov2s_POST,
+                                          ST2Twov16b_POST, ST2Twov8h_POST, ST2Twov4s_POST, ST2Twov2d_POST,
+                                          ST2i8_POST, ST2i16_POST, ST2i32_POST, ST2i64_POST,
+                                          ST3Threev8b_POST, ST3Threev4h_POST, ST3Threev2s_POST,
+                                          ST3Threev16b_POST, ST3Threev8h_POST, ST3Threev4s_POST, ST3Threev2d_POST,
+                                          ST3i8_POST, ST3i16_POST, ST3i32_POST, ST3i64_POST,
+                                          ST4Fourv8b_POST, ST4Fourv4h_POST, ST4Fourv2s_POST,
+                                          ST4Fourv16b_POST, ST4Fourv8h_POST, ST4Fourv4s_POST, ST4Fourv2d_POST,
+                                          ST4i8_POST, ST4i16_POST, ST4i32_POST, ST4i64_POST]>;
+
+// Identify whether an instruction is an ASIMD load
+// or store using the post index addressing mode.
+def IsLoadStoreASIMDPostOp : CheckOpcode<!listconcat(IsLoadASIMDPostOp.ValidOpcodes,
+                                                     IsStoreASIMDPostOp.ValidOpcodes)>;
+
+// Identify whether an instruction is a load
+// using the register offset addressing mode.
+def IsLoadRegOffsetOp      : CheckOpcode<[PRFMroW, PRFMroX,
+                                          LDRBBroW, LDRBBroX,
+                                          LDRSBWroW, LDRSBWroX, LDRSBXroW, LDRSBXroX,
+                                          LDRHHroW, LDRHHroX,
+                                          LDRSHWroW, LDRSHWroX, LDRSHXroW, LDRSHXroX,
+                                          LDRWroW, LDRWroX,
+                                          LDRSWroW, LDRSWroX,
+                                          LDRXroW, LDRXroX,
+                                          LDRBroW, LDRBroX,
+                                          LDRHroW, LDRHroX,
+                                          LDRSroW, LDRSroX,
+                                          LDRDroW, LDRDroX,
+                                          LDRQroW, LDRQroX]>;
+
+// Identify whether an instruction is a store
+// using the register offset addressing mode.
+def IsStoreRegOffsetOp     : CheckOpcode<[STRBBroW, STRBBroX,
+                                          STRHHroW, STRHHroX,
+                                          STRWroW, STRWroX,
+                                          STRXroW, STRXroX,
+                                          STRBroW, STRBroX,
+                                          STRHroW, STRHroX,
+                                          STRSroW, STRSroX,
+                                          STRDroW, STRDroX,
+                                          STRQroW, STRQroX]>;
+
+// Identify whether an instruction is a load or
+// store using the register offset addressing mode.
+def IsLoadStoreRegOffsetOp : CheckOpcode<!listconcat(IsLoadRegOffsetOp.ValidOpcodes,
+                                                     IsStoreRegOffsetOp.ValidOpcodes)>;
+
+// Target predicates.
+
+// Identify an instruction that effectively transfers a register to another.
+def IsCopyIdiomFn     : TIIPredicate<"isCopyIdiom",
+                                     MCOpcodeSwitchStatement<
+                                       [// MOV {Rd, SP}, {SP, Rn} =>
+                                        // ADD {Rd, SP}, {SP, Rn}, #0
+                                        MCOpcodeSwitchCase<
+                                          [ADDWri, ADDXri],
+                                          MCReturnStatement<
+                                            CheckAll<
+                                              [CheckIsRegOperand<0>,
+                                               CheckIsRegOperand<1>,
+                                               CheckAny<
+                                                 [CheckRegOperand<0, WSP>,
+                                                  CheckRegOperand<0, SP>,
+                                                  CheckRegOperand<1, WSP>,
+                                                  CheckRegOperand<1, SP>]>,
+                                               CheckZeroOperand<2>]>>>,
+                                        // MOV Rd, Rm =>
+                                        // ORR Rd, ZR, Rm, LSL #0
+                                        MCOpcodeSwitchCase<
+                                          [ORRWrs, ORRXrs],
+                                          MCReturnStatement<
+                                            CheckAll<
+                                              [CheckIsRegOperand<1>,
+                                               CheckIsRegOperand<2>,
+                                               CheckAny<
+                                                 [CheckRegOperand<1, WZR>,
+                                                  CheckRegOperand<1, XZR>]>,
+                                               CheckShiftBy0]>>>],
+                                       MCReturnStatement<FalsePred>>>;
+def IsCopyIdiomPred   : MCSchedPredicate<IsCopyIdiomFn>;
+
+// Identify arithmetic instructions with an extended register.
+def RegExtendedFn     : TIIPredicate<"hasExtendedReg",
+                                     MCOpcodeSwitchStatement<
+                                       [MCOpcodeSwitchCase<
+                                         IsArithExtOp.ValidOpcodes,
+                                         MCReturnStatement<
+                                           CheckNot<CheckZeroOperand<3>>>>],
+                                       MCReturnStatement<FalsePred>>>;
+def RegExtendedPred   : MCSchedPredicate<RegExtendedFn>;
+
+// Identify arithmetic and logic instructions with a shifted register.
+def RegShiftedFn      : TIIPredicate<"hasShiftedReg",
+                                     MCOpcodeSwitchStatement<
+                                       [MCOpcodeSwitchCase<
+                                          IsArithLogicShiftOp.ValidOpcodes,
+                                          MCReturnStatement<
+                                            CheckNot<CheckZeroOperand<3>>>>],
+                                       MCReturnStatement<FalsePred>>>;
+def RegShiftedPred    : MCSchedPredicate<RegShiftedFn>;
+
+// Identify a load or store using the register offset addressing mode
+// with an extended or scaled register.
+def ScaledIdxFn       : TIIPredicate<"isScaledAddr",
+                                     MCOpcodeSwitchStatement<
+                                       [MCOpcodeSwitchCase<
+                                          IsLoadStoreRegOffsetOp.ValidOpcodes,
+                                          MCReturnStatement<
+                                            CheckAny<[CheckNot<CheckMemExtLSL>,
+                                                      CheckMemScaled]>>>],
+                                       MCReturnStatement<FalsePred>>>;
+def ScaledIdxPred     : MCSchedPredicate<ScaledIdxFn>;
+
+// Identify an instruction that effectively resets a FP register to zero.
+def IsZeroFPIdiomFn   : TIIPredicate<"isZeroFPIdiom",
+                                     MCOpcodeSwitchStatement<
+                                       [// MOVI Vd, #0
+                                        MCOpcodeSwitchCase<
+                                          [MOVIv8b_ns, MOVIv16b_ns,
+                                           MOVID, MOVIv2d_ns],
+                                          MCReturnStatement<CheckZeroOperand<1>>>,
+                                        // MOVI Vd, #0, LSL #0
+                                        MCOpcodeSwitchCase<
+                                          [MOVIv4i16, MOVIv8i16,
+                                           MOVIv2i32, MOVIv4i32],
+                                          MCReturnStatement<
+                                            CheckAll<
+                                              [CheckZeroOperand<1>,
+                                               CheckZeroOperand<2>]>>>],
+                                       MCReturnStatement<FalsePred>>>;
+def IsZeroFPIdiomPred : MCSchedPredicate<IsZeroFPIdiomFn>;
+
+// Identify an instruction that effectively resets a GP register to zero.
+def IsZeroIdiomFn     : TIIPredicate<"isZeroIdiom",
+                                    MCOpcodeSwitchStatement<
+                                      [// ORR Rd, ZR, #0
+                                       MCOpcodeSwitchCase<
+                                         [ORRWri, ORRXri],
+                                         MCReturnStatement<
+                                           CheckAll<
+                                             [CheckIsRegOperand<1>,
+                                              CheckAny<
+                                                [CheckRegOperand<1, WZR>,
+                                                 CheckRegOperand<1, XZR>]>,
+                                              CheckZeroOperand<2>]>>>],
+                                      MCReturnStatement<FalsePred>>>;
+def IsZeroIdiomPred   : MCSchedPredicate<IsZeroIdiomFn>;
--- a/suite/synctools/tablegen/AArch64/AArch64SchedTSV110.td
+++ b/suite/synctools/tablegen/AArch64/AArch64SchedTSV110.td
@ -0,0 +1,747 @@
+//==- AArch64SchedTSV110.td - Huawei TSV110 Scheduling Definitions -*- tablegen -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the machine model for Huawei TSV110 to support
+// instruction scheduling and other instruction cost heuristics.
+//
+//===----------------------------------------------------------------------===//
+
+// ===---------------------------------------------------------------------===//
+// The following definitions describe the simpler per-operand machine model.
+// This works with MachineScheduler. See llvm/MC/MCSchedule.h for details.
+
+// Huawei TSV110 scheduling machine model.
+def TSV110Model : SchedMachineModel {
+  let IssueWidth            =   4; // 4 micro-ops dispatched  per cycle. 
+  let MicroOpBufferSize     = 128; // 128 micro-op re-order buffer
+  let LoopMicroOpBufferSize =  16; 
+  let LoadLatency           =   4; // Optimistic load latency.
+  let MispredictPenalty     =  14; // Fetch + Decode/Rename/Dispatch + Branch
+  let CompleteModel         =   1;
+
+  list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F,
+                                                    PAUnsupported.F);
+}
+
+// Define each kind of processor resource and number available on the TSV110,
+// which has 8 pipelines, each with its own queue where micro-ops wait for
+// their operands and issue out-of-order to one of eight execution pipelines.
+let SchedModel = TSV110Model in {
+  def TSV110UnitALU  : ProcResource<1>; // Int ALU
+  def TSV110UnitAB   : ProcResource<2>; // Int ALU/BRU
+  def TSV110UnitMDU  : ProcResource<1>; // Multi-Cycle
+  def TSV110UnitFSU1 : ProcResource<1>; // FP/ASIMD
+  def TSV110UnitFSU2 : ProcResource<1>; // FP/ASIMD
+  def TSV110UnitLdSt : ProcResource<2>; // Load/Store
+
+  def TSV110UnitF     : ProcResGroup<[TSV110UnitFSU1, TSV110UnitFSU2]>;
+  def TSV110UnitALUAB : ProcResGroup<[TSV110UnitALU, TSV110UnitAB]>;
+  def TSV110UnitFLdSt : ProcResGroup<[TSV110UnitFSU1, TSV110UnitFSU2, TSV110UnitLdSt]>;
+}
+
+let SchedModel = TSV110Model in {
+
+//===----------------------------------------------------------------------===//
+// Map the target-defined scheduler read/write resources and latency for 
+// TSV110
+
+// Integer ALU
+def : WriteRes<WriteImm,   [TSV110UnitALUAB]> { let Latency = 1; }
+def : WriteRes<WriteI,     [TSV110UnitALUAB]> { let Latency = 1; }
+def : WriteRes<WriteISReg, [TSV110UnitMDU]>   { let Latency = 2; } 
+def : WriteRes<WriteIEReg, [TSV110UnitMDU]>   { let Latency = 2; } 
+def : WriteRes<WriteExtr,  [TSV110UnitALUAB]> { let Latency = 1; }
+def : WriteRes<WriteIS,    [TSV110UnitALUAB]> { let Latency = 1; }
+
+// Integer Mul/MAC/Div
+def : WriteRes<WriteID32,  [TSV110UnitMDU]> { let Latency = 12;
+                                              let ResourceCycles = [12]; } 
+def : WriteRes<WriteID64,  [TSV110UnitMDU]> { let Latency = 20;
+                                              let ResourceCycles = [20]; }
+def : WriteRes<WriteIM32,  [TSV110UnitMDU]> { let Latency = 3; }
+def : WriteRes<WriteIM64,  [TSV110UnitMDU]> { let Latency = 4; }
+
+// Load
+def : WriteRes<WriteLD,    [TSV110UnitLdSt]> { let Latency = 4; }
+def : WriteRes<WriteLDIdx, [TSV110UnitLdSt]> { let Latency = 4; }
+def : WriteRes<WriteLDHi,  []> { let Latency = 4; }
+
+// Pre/Post Indexing
+def : WriteRes<WriteAdr,   [TSV110UnitALUAB]> { let Latency = 1; } 
+
+// Store
+def : WriteRes<WriteST,    [TSV110UnitLdSt]> { let Latency = 1; }
+def : WriteRes<WriteSTP,   [TSV110UnitLdSt]> { let Latency = 1; }
+def : WriteRes<WriteSTIdx, [TSV110UnitLdSt]> { let Latency = 1; }
+
+// FP
+def : WriteRes<WriteF,     [TSV110UnitF]> { let Latency = 2; }
+def : WriteRes<WriteFCmp,  [TSV110UnitF]> { let Latency = 3; }
+def : WriteRes<WriteFCvt,  [TSV110UnitF]> { let Latency = 3; } 
+def : WriteRes<WriteFCopy, [TSV110UnitF]> { let Latency = 2; }
+def : WriteRes<WriteFImm,  [TSV110UnitF]> { let Latency = 2; } 
+def : WriteRes<WriteFMul,  [TSV110UnitF]> { let Latency = 5; }
+
+// FP Div, Sqrt
+def : WriteRes<WriteFDiv,  [TSV110UnitFSU1]> { let Latency = 18; } 
+
+def : WriteRes<WriteVd,    [TSV110UnitF]>     { let Latency = 4; }
+def : WriteRes<WriteVq,    [TSV110UnitF]>     { let Latency = 4; }
+def : WriteRes<WriteVLD,   [TSV110UnitFLdSt]> { let Latency = 5; }
+def : WriteRes<WriteVST,   [TSV110UnitF]>     { let Latency = 1; }
+
+// Branch
+def : WriteRes<WriteBr,    [TSV110UnitAB]> { let Latency = 1; }
+def : WriteRes<WriteBrReg, [TSV110UnitAB]> { let Latency = 1; }
+def : WriteRes<WriteSys,     []> { let Latency = 1; }
+def : WriteRes<WriteBarrier, []> { let Latency = 1; }
+def : WriteRes<WriteHint,    []> { let Latency = 1; }
+
+def : WriteRes<WriteAtomic, []> { let Unsupported = 1; } 
+
+// Forwarding logic is modeled only for multiply and accumulate.
+def : ReadAdvance<ReadI,       0>;
+def : ReadAdvance<ReadISReg,   0>;
+def : ReadAdvance<ReadIEReg,   0>;
+def : ReadAdvance<ReadIM,      0>;
+def : ReadAdvance<ReadIMA,     2, [WriteIM32, WriteIM64]>;
+def : ReadAdvance<ReadID,      0>;
+def : ReadAdvance<ReadExtrHi,  0>;
+def : ReadAdvance<ReadAdrBase, 0>;
+def : ReadAdvance<ReadVLD,     0>;
+def : ReadAdvance<ReadST,      0>;
+
+def : InstRW<[WriteI], (instrs COPY)>;
+
+// Detailed Refinements
+//===----------------------------------------------------------------------===//
+
+// Contains all of the TSV110 specific SchedWriteRes types. The approach
+// below is to define a generic SchedWriteRes for every combination of
+// latency and microOps. The naming conventions is to use a prefix, one field
+// for latency, and one or more microOp count/type designators.
+//   Prefix: TSV110Wr
+//       Latency: #cyc
+//   MicroOp Count/Types: #(ALU|AB|MDU|FSU1|FSU2|LdSt|ALUAB|F|FLdSt)
+//
+// e.g. TSV110Wr_6cyc_1ALU_6MDU_4LdSt means the total latency is 6 and there are
+//      1 micro-ops to be issued down one ALU pipe, six MDU pipes and four LdSt pipes.
+//
+
+//===----------------------------------------------------------------------===//
+// Define Generic 1 micro-op types
+
+def TSV110Wr_1cyc_1AB    : SchedWriteRes<[TSV110UnitAB]>    { let Latency = 1; }
+def TSV110Wr_1cyc_1ALU   : SchedWriteRes<[TSV110UnitALU]>   { let Latency = 1; }
+def TSV110Wr_1cyc_1ALUAB : SchedWriteRes<[TSV110UnitALUAB]> { let Latency = 1; }
+def TSV110Wr_1cyc_1LdSt  : SchedWriteRes<[TSV110UnitLdSt]>  { let Latency = 1; }
+
+def TSV110Wr_2cyc_1AB    : SchedWriteRes<[TSV110UnitAB]>    { let Latency = 2; }
+def TSV110Wr_2cyc_1ALU   : SchedWriteRes<[TSV110UnitALU]>   { let Latency = 2; }
+def TSV110Wr_2cyc_1LdSt  : SchedWriteRes<[TSV110UnitLdSt]>  { let Latency = 2; }
+def TSV110Wr_2cyc_1MDU   : SchedWriteRes<[TSV110UnitMDU]>   { let Latency = 2; }
+def TSV110Wr_2cyc_1FSU1  : SchedWriteRes<[TSV110UnitFSU1]>  { let Latency = 2; }
+def TSV110Wr_2cyc_1F     : SchedWriteRes<[TSV110UnitF]>     { let Latency = 2; }
+
+def TSV110Wr_3cyc_1F     : SchedWriteRes<[TSV110UnitF]>     { let Latency = 3; }
+def TSV110Wr_3cyc_1FSU1  : SchedWriteRes<[TSV110UnitFSU1]>  { let Latency = 3; }
+def TSV110Wr_3cyc_1MDU   : SchedWriteRes<[TSV110UnitMDU]>   { let Latency = 3; }
+
+def TSV110Wr_4cyc_1FSU1  : SchedWriteRes<[TSV110UnitFSU1]>  { let Latency = 4; }
+def TSV110Wr_4cyc_1F     : SchedWriteRes<[TSV110UnitF]>     { let Latency = 4; }
+def TSV110Wr_4cyc_1LdSt  : SchedWriteRes<[TSV110UnitLdSt]>  { let Latency = 4; }
+def TSV110Wr_4cyc_1MDU   : SchedWriteRes<[TSV110UnitMDU]>   { let Latency = 4; }
+
+def TSV110Wr_5cyc_1F     : SchedWriteRes<[TSV110UnitF]>     { let Latency = 5; }
+def TSV110Wr_5cyc_1FSU1  : SchedWriteRes<[TSV110UnitFSU1]>  { let Latency = 5; }
+def TSV110Wr_5cyc_1FSU2  : SchedWriteRes<[TSV110UnitFSU2]>  { let Latency = 5; }
+def TSV110Wr_5cyc_1LdSt  : SchedWriteRes<[TSV110UnitLdSt]>  { let Latency = 5; }
+
+def TSV110Wr_6cyc_1F     : SchedWriteRes<[TSV110UnitF]>     { let Latency = 6; }
+
+def TSV110Wr_7cyc_1F     : SchedWriteRes<[TSV110UnitF]>     { let Latency = 7; }
+
+def TSV110Wr_8cyc_1F     : SchedWriteRes<[TSV110UnitF]>     { let Latency = 8; }
+
+def TSV110Wr_11cyc_1FSU1 : SchedWriteRes<[TSV110UnitFSU1]>  { let Latency = 11; }
+
+def TSV110Wr_12cyc_1MDU  : SchedWriteRes<[TSV110UnitMDU]>   { let Latency = 12; }
+
+def TSV110Wr_17cyc_1FSU2 : SchedWriteRes<[TSV110UnitFSU2]>  { let Latency = 17; }
+
+def TSV110Wr_18cyc_1FSU1 : SchedWriteRes<[TSV110UnitFSU1]>  { let Latency = 18; }
+
+def TSV110Wr_20cyc_1MDU  : SchedWriteRes<[TSV110UnitMDU]>   { let Latency = 20; }
+
+def TSV110Wr_24cyc_1FSU1 : SchedWriteRes<[TSV110UnitFSU1]>  { let Latency = 24; }
+
+def TSV110Wr_31cyc_1FSU2 : SchedWriteRes<[TSV110UnitFSU2]>  { let Latency = 31; }
+
+def TSV110Wr_36cyc_1FSU2 : SchedWriteRes<[TSV110UnitFSU2]>  { let Latency = 36; }
+
+def TSV110Wr_38cyc_1FSU1 : SchedWriteRes<[TSV110UnitFSU1]>  { let Latency = 38; }
+
+def TSV110Wr_64cyc_1FSU2 : SchedWriteRes<[TSV110UnitFSU2]>  { let Latency = 64; }
+
+//===----------------------------------------------------------------------===//
+// Define Generic 2 micro-op types
+
+def TSV110Wr_1cyc_1LdSt_1ALUAB : SchedWriteRes<[TSV110UnitLdSt,
+                                                TSV110UnitALUAB]> {
+  let Latency = 1;
+  let NumMicroOps = 2;
+}
+
+def TSV110Wr_2cyc_1LdSt_1ALUAB :  SchedWriteRes<[TSV110UnitLdSt,
+                                                 TSV110UnitALUAB]> {
+  let Latency = 2;
+  let NumMicroOps = 2;
+}
+
+def TSV110Wr_2cyc_2LdSt        : SchedWriteRes<[TSV110UnitLdSt,
+                                                TSV110UnitLdSt]> {
+  let Latency = 2;
+  let NumMicroOps = 2;
+}
+
+def TSV110Wr_2cyc_2F           : SchedWriteRes<[TSV110UnitF,
+                                                TSV110UnitF]> {
+  let Latency = 2;
+  let NumMicroOps = 2;
+}
+
+def TSV110Wr_2cyc_1FSU1_1FSU2  : SchedWriteRes<[TSV110UnitFSU1,
+                                                TSV110UnitFSU2]> {
+  let Latency = 2;
+  let NumMicroOps = 2;
+}
+
+def TSV110Wr_4cyc_2F           : SchedWriteRes<[TSV110UnitF,
+                                                TSV110UnitF]> {
+  let Latency = 4;
+  let NumMicroOps = 2;
+}
+
+def TSV110Wr_4cyc_1FSU1_1FSU2  : SchedWriteRes<[TSV110UnitFSU1,
+                                                TSV110UnitFSU2]> {
+  let Latency = 4;
+  let NumMicroOps = 2;
+}
+
+def TSV110Wr_4cyc_1LdSt_1ALUAB : SchedWriteRes<[TSV110UnitLdSt,
+                                                TSV110UnitALUAB]> {
+  let Latency = 4;
+  let NumMicroOps = 2;
+}
+
+def TSV110Wr_5cyc_1ALU_1F      : SchedWriteRes<[TSV110UnitALU,
+                                                TSV110UnitF]> {
+  let Latency     = 5;
+  let NumMicroOps = 2;
+}
+
+def TSV110Wr_6cyc_2LdSt        : SchedWriteRes<[TSV110UnitLdSt,
+                                                TSV110UnitLdSt]> {
+  let Latency = 6;
+  let NumMicroOps = 2;
+}
+
+def TSV110Wr_6cyc_1LdSt_1ALUAB : SchedWriteRes<[TSV110UnitLdSt,
+                                                TSV110UnitALUAB]> {
+  let Latency = 6;
+  let NumMicroOps = 2;
+}
+
+def TSV110Wr_7cyc_1F_1LdSt     : SchedWriteRes<[TSV110UnitF,
+                                                TSV110UnitLdSt]> {
+  let Latency = 7;
+  let NumMicroOps = 2;
+}
+
+def TSV110Wr_8cyc_2FSU1        : SchedWriteRes<[TSV110UnitFSU1,
+                                                TSV110UnitFSU1]> {
+  let Latency = 8;
+  let NumMicroOps = 2;
+}
+
+
+def TSV110Wr_8cyc_1FSU1_1FSU2  : SchedWriteRes<[TSV110UnitFSU1,
+                                                TSV110UnitFSU2]> {
+  let Latency = 8;
+  let NumMicroOps = 2;
+}
+
+//===----------------------------------------------------------------------===//
+// Define Generic 3 micro-op types
+
+def TSV110Wr_6cyc_3F       : SchedWriteRes<[TSV110UnitF, TSV110UnitF,
+                                            TSV110UnitF]> {
+  let Latency     = 6;
+  let NumMicroOps = 3;
+}
+
+def TSV110Wr_6cyc_3LdSt    : SchedWriteRes<[TSV110UnitLdSt, TSV110UnitLdSt,
+                                            TSV110UnitLdSt]> {
+  let Latency = 6;
+  let NumMicroOps = 3;
+}
+
+def TSV110Wr_7cyc_2F_1LdSt : SchedWriteRes<[TSV110UnitF, TSV110UnitF,
+                                                         TSV110UnitLdSt]> {
+  let Latency = 7;
+  let NumMicroOps = 3;
+}
+
+//===----------------------------------------------------------------------===//
+// Define Generic 4 micro-op types
+
+def TSV110Wr_8cyc_4F          : SchedWriteRes<[TSV110UnitF, TSV110UnitF,
+                                               TSV110UnitF, TSV110UnitF]> {
+  let Latency = 8;
+  let NumMicroOps = 4;
+}
+
+def TSV110Wr_8cyc_3F_1LdSt    : SchedWriteRes<[TSV110UnitF, TSV110UnitF,
+                                               TSV110UnitF, TSV110UnitLdSt]> {
+  let Latency = 8;
+  let NumMicroOps = 4;
+}
+
+//===----------------------------------------------------------------------===//
+// Define Generic 5 micro-op types
+
+def TSV110Wr_8cyc_3F_2LdSt : SchedWriteRes<[TSV110UnitF, TSV110UnitF, TSV110UnitF,
+                                            TSV110UnitLdSt, TSV110UnitLdSt]> {
+  let Latency = 8;
+  let NumMicroOps = 5;
+}
+
+//===----------------------------------------------------------------------===//
+// Define Generic 8 micro-op types
+
+def TSV110Wr_10cyc_4F_4LdSt : SchedWriteRes<[TSV110UnitF, TSV110UnitF,
+                                             TSV110UnitF, TSV110UnitF,
+                                             TSV110UnitLdSt, TSV110UnitLdSt,
+                                             TSV110UnitLdSt, TSV110UnitLdSt]> {
+  let Latency = 10;
+  let NumMicroOps = 8;
+}
+
+
+// Branch Instructions
+// -----------------------------------------------------------------------------
+
+def : InstRW<[TSV110Wr_1cyc_1AB], (instrs B)>;
+def : InstRW<[TSV110Wr_1cyc_1AB], (instrs BL)>;
+def : InstRW<[TSV110Wr_1cyc_1AB], (instrs BLR)>;
+def : InstRW<[TSV110Wr_1cyc_1AB], (instregex "^(BR|RET|(CBZ|CBNZ|TBZ|TBNZ))$")>;
+
+
+// Cryptography Extensions
+// -----------------------------------------------------------------------------
+
+def : InstRW<[TSV110Wr_3cyc_1FSU1], (instregex "^AES[DE]")>;
+def : InstRW<[TSV110Wr_3cyc_1FSU1], (instregex "^AESI?MC")>;
+def : InstRW<[TSV110Wr_2cyc_1FSU1], (instregex "^SHA1SU1")>;
+def : InstRW<[TSV110Wr_2cyc_2F],    (instregex "^SHA1(H|SU0)")>;
+def : InstRW<[TSV110Wr_5cyc_1FSU1], (instregex "^SHA1[CMP]")>;
+def : InstRW<[TSV110Wr_2cyc_1FSU1], (instregex "^SHA256SU0")>;
+def : InstRW<[TSV110Wr_3cyc_1FSU1], (instregex "^SHA256SU1")>;
+def : InstRW<[TSV110Wr_5cyc_1FSU1], (instregex "^SHA256(H|H2)")>;
+def TSV110ReadCRC: SchedReadAdvance<1, [TSV110Wr_2cyc_1MDU]>;
+def : InstRW<[TSV110Wr_2cyc_1MDU, TSV110ReadCRC],  (instregex "^CRC32.*$")>;
+
+
+// Arithmetic and Logical Instructions
+// -----------------------------------------------------------------------------
+
+def : InstRW<[TSV110Wr_1cyc_1ALUAB], (instregex "(BIC|EON|ORN)[WX]rr")>;
+def : InstRW<[TSV110Wr_1cyc_1AB],    (instregex "(BIC)S[WX]rr")>;
+
+def : InstRW<[TSV110Wr_1cyc_1ALUAB], (instregex "(ADD|AND|EOR|ORR|SUB)[WX]r(r|i)")>;
+def : InstRW<[TSV110Wr_1cyc_1AB],    (instregex "(ADD|AND|EOR|ORR|SUB)S[WX]r(r|i)")>;
+
+def : InstRW<[TSV110Wr_1cyc_1ALUAB], (instregex "^(ADC|SBC|BIC)[WX]r$")>;
+def : InstRW<[TSV110Wr_1cyc_1AB],    (instregex "^(ADC|SBC)S[WX]r$")>;
+
+def : InstRW<[TSV110Wr_2cyc_1MDU],   (instregex "^(AND|BIC|EON|EOR|ORN|ORR)[WX]rs$")>;
+def : InstRW<[TSV110Wr_2cyc_1AB],    (instregex "^(AND|BIC|EON|EOR|ORN|ORR)S[WX]rs$")>;
+def : InstRW<[TSV110Wr_2cyc_1MDU],   (instregex "^(ADD|SUB)[WX]r(s|x|x64)$")>;
+def : InstRW<[TSV110Wr_2cyc_1AB],    (instregex "^(ADD|SUB)S[WX]r(s|x|x64)$")>;
+
+def : InstRW<[TSV110Wr_1cyc_1AB],    (instregex "^(CCMN|CCMP)(W|X)(r|i)$")>;
+def : InstRW<[TSV110Wr_1cyc_1ALUAB], (instregex "^(CSEL|CSINC|CSINV|CSNEG)(W|X)r$")>;
+
+
+// Move and Shift Instructions
+// -----------------------------------------------------------------------------
+
+def : InstRW<[TSV110Wr_1cyc_1ALUAB], (instrs ADR, ADRP)>;
+def : InstRW<[TSV110Wr_1cyc_1ALUAB], (instregex "^MOV[NZK][WX]i")>;
+def : InstRW<[TSV110Wr_1cyc_1ALUAB], (instregex "(LSLV|LSRV|ASRV|RORV)(W|X)r")>;
+
+
+// Divide and Multiply Instructions
+// -----------------------------------------------------------------------------
+
+def : InstRW<[TSV110Wr_12cyc_1MDU],  (instregex "^(S|U)DIVWr$")>;
+def : InstRW<[TSV110Wr_20cyc_1MDU],  (instregex "^(S|U)DIVXr$")>;
+
+def TSV110ReadMAW : SchedReadAdvance<2, [TSV110Wr_3cyc_1MDU]>;
+def : InstRW<[TSV110Wr_3cyc_1MDU, TSV110ReadMAW], (instrs MADDWrrr, MSUBWrrr)>;
+def TSV110ReadMAQ : SchedReadAdvance<3, [TSV110Wr_4cyc_1MDU]>;
+def : InstRW<[TSV110Wr_4cyc_1MDU, TSV110ReadMAQ], (instrs MADDXrrr, MSUBXrrr)>;
+def : InstRW<[TSV110Wr_3cyc_1MDU, TSV110ReadMAW], (instregex "(S|U)(MADDL|MSUBL)rrr")>;
+def : InstRW<[TSV110Wr_4cyc_1MDU], (instregex "^(S|U)MULHrr$")>;
+
+
+// Miscellaneous Data-Processing Instructions
+// -----------------------------------------------------------------------------
+
+def : InstRW<[TSV110Wr_1cyc_1ALUAB],    (instregex "^EXTR(W|X)rri$")>;
+def : InstRW<[TSV110Wr_1cyc_1ALUAB],    (instregex "^(S|U)?BFM(W|X)ri$")>;
+def : InstRW<[TSV110Wr_1cyc_1ALUAB],    (instregex "^(CLS|CLZ|RBIT|REV(16|32)?)(W|X)r$")>;
+
+
+// Load Instructions
+// -----------------------------------------------------------------------------
+
+def : InstRW<[TSV110Wr_4cyc_1LdSt],     (instregex "^LDR(W|X)l$")>;
+def : InstRW<[TSV110Wr_4cyc_1LdSt],     (instrs LDRSWl)>;
+
+def : InstRW<[TSV110Wr_4cyc_1LdSt],     (instregex "^LDR(BB|HH|W|X)ui$")>;
+def : InstRW<[TSV110Wr_4cyc_1LdSt],     (instregex "^LDRS(BW|BX|HW|HX|W)ui$")>;
+
+def : InstRW<[TSV110Wr_4cyc_1LdSt, WriteAdr],     (instregex "^LDR(BB|HH|W|X)(post|pre)$")>;
+def : InstRW<[TSV110Wr_4cyc_1LdSt, WriteAdr],     (instregex "^LDRS(BW|BX|HW|HX|W)(post|pre)$")>;
+
+def : InstRW<[TSV110Wr_4cyc_1LdSt],     (instregex "^LDTR(B|H|W|X)i$")>;
+def : InstRW<[TSV110Wr_4cyc_1LdSt],     (instregex "^LDUR(BB|HH|W|X)i$")>;
+def : InstRW<[TSV110Wr_4cyc_1LdSt],     (instregex "^LDTRS(BW|BX|HW|HX|W)i$")>;
+def : InstRW<[TSV110Wr_4cyc_1LdSt],     (instregex "^LDURS(BW|BX|HW|HX|W)i$")>;
+
+def : InstRW<[TSV110Wr_4cyc_1LdSt, WriteLDHi],     (instregex "^LDNP(W|X)i$")>;
+def : InstRW<[TSV110Wr_4cyc_1LdSt, WriteLDHi],     (instregex "^LDP(W|X)i$")>;
+def : InstRW<[TSV110Wr_4cyc_1LdSt_1ALUAB, WriteLDHi, WriteAdr],(instregex "^LDP(W|X)(post|pre)$")>;
+
+def : InstRW<[TSV110Wr_4cyc_1LdSt, WriteLDHi],           (instrs LDPSWi)>;
+def : InstRW<[TSV110Wr_4cyc_1LdSt, WriteLDHi, WriteAdr], (instrs LDPSWpost)>;
+def : InstRW<[TSV110Wr_4cyc_1LdSt, WriteLDHi, WriteAdr], (instrs LDPSWpre)>;
+
+def : InstRW<[TSV110Wr_4cyc_1LdSt],     (instrs PRFMl)>;
+def : InstRW<[TSV110Wr_4cyc_1LdSt],     (instrs PRFUMi)>;
+def : InstRW<[TSV110Wr_4cyc_1LdSt],     (instregex "^PRFMui$")>;
+def : InstRW<[TSV110Wr_4cyc_1LdSt],     (instregex "^PRFMro(W|X)$")>;
+
+
+// Store Instructions
+// -----------------------------------------------------------------------------
+
+def : InstRW<[TSV110Wr_1cyc_1LdSt],            (instregex "^STN?P(W|X)i$")>;
+def : InstRW<[TSV110Wr_1cyc_1LdSt, WriteAdr],  (instregex "^STP(W|X)(post|pre)$")>;
+def : InstRW<[TSV110Wr_1cyc_1LdSt],            (instregex "^STUR(BB|HH|W|X)i$")>;
+def : InstRW<[TSV110Wr_1cyc_1LdSt],            (instregex "^STTR(B|H|W|X)i$")>;
+def : InstRW<[TSV110Wr_1cyc_1LdSt],            (instregex "^STR(BB|HH|W|X)ui$")>;
+
+def : InstRW<[TSV110Wr_1cyc_1LdSt, WriteAdr],  (instregex "^STR(BB|HH|W|X)(post|pre)$")>;
+def : InstRW<[TSV110Wr_1cyc_1LdSt, WriteAdr],  (instregex "^STR(BB|HH|W|X)ro(W|X)$")>;
+
+
+// FP Data Processing Instructions
+// -----------------------------------------------------------------------------
+
+def : InstRW<[TSV110Wr_2cyc_1F], (instregex "F(ABS|NEG)(D|S)r")>;
+def : InstRW<[TSV110Wr_3cyc_1F], (instregex "^FCCMP(E)?(S|D)rr$")>;
+def : InstRW<[TSV110Wr_3cyc_1F], (instregex "^FCMP(E)?(S|D)r(r|i)$")>;
+def : InstRW<[TSV110Wr_3cyc_1F], (instregex "^FCSEL(S|D)rrr$")>;
+
+def : InstRW<[TSV110Wr_11cyc_1FSU1], (instrs FDIVSrr)>;
+def : InstRW<[TSV110Wr_18cyc_1FSU1], (instrs FDIVDrr)>;
+def : InstRW<[TSV110Wr_17cyc_1FSU2], (instrs FSQRTSr)>;
+def : InstRW<[TSV110Wr_31cyc_1FSU2], (instrs FSQRTDr)>;
+
+def : InstRW<[TSV110Wr_2cyc_1F], (instregex "^F(MAX|MIN).+rr")>;
+
+def : InstRW<[TSV110Wr_4cyc_1F], (instregex "^FN?M(ADD|SUB)Hrrr")>;
+def : InstRW<[TSV110Wr_5cyc_1F], (instregex "^FN?M(ADD|SUB)Srrr")>;
+def : InstRW<[TSV110Wr_7cyc_1F], (instregex "^FN?M(ADD|SUB)Drrr")>;
+
+def : InstRW<[TSV110Wr_4cyc_1F], (instregex "^F(ADD|SUB)Hrr")>;
+def : InstRW<[TSV110Wr_5cyc_1F], (instregex "^F(ADD|SUB)Srr")>;
+def : InstRW<[TSV110Wr_4cyc_1F], (instregex "^F(ADD|SUB)Drr")>;
+
+def : InstRW<[TSV110Wr_4cyc_1F], (instregex "^F(N)?MULHrr$")>;
+def : InstRW<[TSV110Wr_5cyc_1F], (instregex "^F(N)?MULSrr$")>;
+def : InstRW<[TSV110Wr_5cyc_1F], (instregex "^F(N)?MULDrr$")>;
+
+def : InstRW<[TSV110Wr_3cyc_1F], (instregex "^FRINT.+r")>;
+
+
+// FP Miscellaneous Instructions
+// -----------------------------------------------------------------------------
+
+def : InstRW<[TSV110Wr_5cyc_1ALU_1F], (instregex "^[SU]CVTF[SU][WX][SD]ri")>;
+def : InstRW<[TSV110Wr_4cyc_1FSU1],   (instregex "^FCVT(A|M|N|P|Z)(S|U)U(W|X)(S|D)r$")>;
+def : InstRW<[TSV110Wr_3cyc_1F],      (instregex "^FCVT[HSD][HSD]r")>;
+
+def : InstRW<[TSV110Wr_2cyc_1FSU1],   (instregex "^FMOV(DX|WS|XD|SW|DXHigh|XDHigh)r$")>;
+def : InstRW<[TSV110Wr_2cyc_1F],      (instregex "^FMOV[SD][ir]$")>;
+
+
+// FP Load Instructions
+// -----------------------------------------------------------------------------
+
+def : InstRW<[TSV110Wr_5cyc_1LdSt],                      (instregex "^LDR[DSQ]l")>;
+def : InstRW<[TSV110Wr_5cyc_1LdSt],                      (instregex "^LDUR[BDHSQ]i")>;
+def : InstRW<[TSV110Wr_5cyc_1LdSt, WriteAdr],            (instregex "^LDR[BDHSQ](post|pre)")>;
+def : InstRW<[TSV110Wr_5cyc_1LdSt],                      (instregex "^LDR[BDHSQ]ui")>;
+def : InstRW<[TSV110Wr_6cyc_1LdSt_1ALUAB, ReadAdrBase],  (instregex "^LDR(Q|D|H|S|B)ro(W|X)$")>;
+def : InstRW<[TSV110Wr_5cyc_1LdSt, WriteLDHi],           (instregex "^LDN?P[DQS]i")>;
+def : InstRW<[TSV110Wr_5cyc_1LdSt, WriteLDHi, WriteAdr], (instregex "^LDP[DQS](post|pre)")>;
+
+
+// FP Store Instructions
+// -----------------------------------------------------------------------------
+
+def : InstRW<[TSV110Wr_1cyc_1LdSt],                     (instregex "^STUR[BHSDQ]i")>;
+def : InstRW<[TSV110Wr_1cyc_1LdSt_1ALUAB, ReadAdrBase], (instregex "^STR[BHSDQ](post|pre)")>;
+def : InstRW<[TSV110Wr_1cyc_1LdSt],                     (instregex "^STR[BHSDQ]ui")>;
+def : InstRW<[TSV110Wr_2cyc_1LdSt_1ALUAB, ReadAdrBase], (instregex "^STR[BHSDQ]ro[WX]")>;
+def : InstRW<[TSV110Wr_2cyc_2LdSt],                     (instregex "^STN?P[SDQ]i")>;
+def : InstRW<[TSV110Wr_2cyc_2LdSt, WriteAdr],           (instregex "^STP[SDQ](post|pre)")>;
+
+
+// ASIMD Integer Instructions
+// -----------------------------------------------------------------------------
+
+// Reference for forms in this group
+//   D form - v8i8, v4i16, v2i32
+//   Q form - v16i8, v8i16, v4i32
+//   D form - v1i8, v1i16, v1i32, v1i64
+//   Q form - v16i8, v8i16, v4i32, v2i64
+//   D form - v8i8_v8i16, v4i16_v4i32, v2i32_v2i64
+//   Q form - v16i8_v8i16, v8i16_v4i32, v4i32_v2i64
+
+// ASIMD simple arithmetic
+def : InstRW<[TSV110Wr_2cyc_1FSU1_1FSU2], (instregex "^(ABS|ADD(P)?|NEG|SUB)v")>;
+def : InstRW<[TSV110Wr_2cyc_1FSU1_1FSU2], (instregex "^[SU](ADD(L|LP|W)|SUB(L|W))v")>;
+
+// ASIMD complex arithmetic
+def : InstRW<[TSV110Wr_4cyc_1FSU1_1FSU2], (instregex "^[SU]H(ADD|SUB)v")>;
+def : InstRW<[TSV110Wr_4cyc_1FSU1_1FSU2], (instregex "^R?(ADD|SUB)HN2?v")>;
+def : InstRW<[TSV110Wr_4cyc_1FSU1_1FSU2], (instregex "^[SU]Q(ADD|SUB)v")>;
+def : InstRW<[TSV110Wr_4cyc_1FSU1_1FSU2], (instregex "^(SU|US)QADDv")>;
+def : InstRW<[TSV110Wr_4cyc_1FSU1_1FSU2], (instregex "^[SU]RHADDv")>;
+def : InstRW<[TSV110Wr_4cyc_1FSU1_1FSU2], (instregex "^[SU]ABAL?v")>;
+def : InstRW<[TSV110Wr_4cyc_1FSU1_1FSU2], (instregex "^[SU]ABDL?v")>;
+def : InstRW<[TSV110Wr_4cyc_1FSU1_1FSU2], (instregex "^[SU]ADALPv")>;
+def : InstRW<[TSV110Wr_4cyc_1FSU1_1FSU2], (instregex "^((SQ)(ABS|NEG))v")>;
+
+// ASIMD compare
+def : InstRW<[TSV110Wr_2cyc_1FSU1_1FSU2], (instregex "^CM(EQ|GE|GT|HI|HS|LE|LT|TST)v")>;
+
+// ASIMD max/min
+def : InstRW<[TSV110Wr_2cyc_1FSU1_1FSU2], (instregex "^[SU](MIN|MAX)P?v")>;
+
+// ASIMD logical
+def : InstRW<[TSV110Wr_2cyc_1FSU1_1FSU2], (instregex "^(AND|BIC|BIF|BIT|BSL|EOR|MVN|NOT|ORN|ORR)v")>;
+
+// ASIMD multiply accumulate, D-form
+def : InstRW<[TSV110Wr_4cyc_1FSU1], (instregex "^(MUL|ML[AS]|SQR?D(MULH))(v8i8|v4i16|v2i32|v1i8|v1i16|v1i32|v1i64)")>;
+// ASIMD multiply accumulate, Q-form
+def : InstRW<[TSV110Wr_8cyc_2FSU1], (instregex "^(MUL|ML[AS]|SQR?D(MULH))(v16i8|v8i16|v4i32)")>;
+
+// ASIMD multiply accumulate long
+def : InstRW<[TSV110Wr_4cyc_1FSU1], (instregex "(S|U|SQD)(MLAL|MLSL|MULL)v.*")>;
+def : InstRW<[TSV110Wr_2cyc_1FSU1], (instregex "^PMULL(v8i8|v16i8)")>;
+def : InstRW<[TSV110Wr_2cyc_1FSU1], (instregex "^PMULL(v1i64|v2i64)")>;
+
+// ASIMD shift
+// ASIMD shift accumulate
+def : InstRW<[TSV110Wr_4cyc_1FSU1], (instregex "^(S|SR|U|UR)SRA")>;
+// ASIMD shift by immed, basic
+def : InstRW<[TSV110Wr_4cyc_1FSU1],
+            (instregex "SHLv","SLIv","SRIv","SHRNv","SQXTNv","SQXTUNv","UQXTNv")>;
+// ASIMD shift by immed, complex
+def : InstRW<[TSV110Wr_4cyc_1FSU1], (instregex "^[SU]?(Q|R){1,2}SHR")>;
+def : InstRW<[TSV110Wr_4cyc_1FSU1], (instregex "^SQSHLU")>;
+// ASIMD shift by register, basic, Q-form
+def : InstRW<[TSV110Wr_4cyc_1FSU1], (instregex "^[SU]SHL(v16i8|v8i16|v4i32|v2i64)")>;
+// ASIMD shift by register, complex, D-form
+def : InstRW<[TSV110Wr_4cyc_1FSU1], (instregex "^[SU][QR]{1,2}SHL(v1i8|v1i16|v1i32|v1i64|v8i8|v4i16|v2i32|b|d|h|s)")>;
+// ASIMD shift by register, complex, Q-form
+def : InstRW<[TSV110Wr_4cyc_1FSU1], (instregex "^[SU][QR]{1,2}SHL(v16i8|v8i16|v4i32|v2i64)")>;
+
+// ASIMD reduction
+// ASIMD arith, reduce, 4H/4S
+def : InstRW<[TSV110Wr_4cyc_1FSU1_1FSU2], (instregex "^[SU]?ADDL?V(v8i8|v4i16|v2i32)v$")>;
+// ASIMD arith, reduce, 8B/8H
+def : InstRW<[TSV110Wr_8cyc_1FSU1_1FSU2], (instregex "^[SU]?ADDL?V(v8i16|v4i32)v$")>;
+// ASIMD arith, reduce, 16B
+def : InstRW<[TSV110Wr_8cyc_1FSU1_1FSU2], (instregex "^[SU]?ADDL?Vv16i8v$")>;
+
+// ASIMD max/min, reduce, 4H/4S
+def : InstRW<[TSV110Wr_4cyc_1FSU1_1FSU2], (instregex "^[SU](MIN|MAX)V(v4i16|v4i32)v$")>;
+// ASIMD max/min, reduce, 8B/8H
+def : InstRW<[TSV110Wr_8cyc_1FSU1_1FSU2], (instregex "^[SU](MIN|MAX)V(v8i8|v8i16)v$")>;
+// ASIMD max/min, reduce, 16B
+def : InstRW<[TSV110Wr_8cyc_1FSU1_1FSU2], (instregex "^[SU](MIN|MAX)Vv16i8v$")>;
+
+
+// Vector - Floating Point
+// -----------------------------------------------------------------------------
+
+// Reference for forms in this group
+//   D form - v2f32
+//   Q form - v4f32, v2f64
+//   D form - 32, 64
+//   D form - v1i32, v1i64
+//   D form - v2i32
+//   Q form - v4i32, v2i64
+
+// ASIMD FP sign manipulation
+def : InstRW<[TSV110Wr_2cyc_1F],  (instregex "^FABSv")>;
+def : InstRW<[TSV110Wr_2cyc_1F],  (instregex "^FNEGv")>;
+
+// ASIMD FP compare
+def : InstRW<[TSV110Wr_2cyc_1F],  (instregex "^F(AC|CM)(EQ|GE|GT|LE|LT)v")>;
+
+// ASIMD FP convert
+def : InstRW<[TSV110Wr_2cyc_1F],  (instregex "^FCVT[AMNPZ][SU]v")>;
+def : InstRW<[TSV110Wr_3cyc_1F],  (instregex "^FCVT(L)v")>;
+def : InstRW<[TSV110Wr_5cyc_1F],  (instregex "^FCVT(N|XN)v")>;
+
+// ASIMD FP divide, D-form, F32
+def : InstRW<[TSV110Wr_11cyc_1FSU1], (instregex "FDIVv2f32")>;
+// ASIMD FP divide, Q-form, F32
+def : InstRW<[TSV110Wr_24cyc_1FSU1], (instregex "FDIVv4f32")>;
+// ASIMD FP divide, Q-form, F64
+def : InstRW<[TSV110Wr_38cyc_1FSU1], (instregex "FDIVv2f64")>;
+
+// ASIMD FP SQRT
+def : InstRW<[TSV110Wr_17cyc_1FSU2], (instrs FSQRTv2f32)>;
+def : InstRW<[TSV110Wr_36cyc_1FSU2], (instrs FSQRTv4f32)>;
+def : InstRW<[TSV110Wr_64cyc_1FSU2], (instrs FSQRTv2f64)>;
+
+// ASIMD FP max,min
+def : InstRW<[TSV110Wr_2cyc_1F],  (instregex "^F(MAX|MIN)(NM)?v")>;
+def : InstRW<[TSV110Wr_2cyc_1F],  (instregex "^F(MAX|MIN)(NM)?Pv")>;
+def : InstRW<[TSV110Wr_4cyc_1F],  (instregex "^F(MAX|MIN)(NM)?Vv")>;
+
+// ASIMD FP add
+def : InstRW<[TSV110Wr_5cyc_1F],  (instregex "^F(ADD|ADDP|SUB)v")>;
+
+// ASIMD FP multiply
+def : InstRW<[TSV110Wr_5cyc_1F],  (instregex "^FMULX?v")>;
+
+
+// ASIMD Miscellaneous Instructions
+// -----------------------------------------------------------------------------
+
+def : InstRW<[TSV110Wr_2cyc_1FSU1_1FSU2], (instregex "^(CLS|CLZ|CNT)v")>;
+def : InstRW<[TSV110Wr_2cyc_1FSU1_1FSU2], (instregex "^(DUP|INS)v.+lane")>;
+def : InstRW<[TSV110Wr_2cyc_1FSU1_1FSU2], (instregex "^REV(16|32|64)v")>;
+def : InstRW<[TSV110Wr_2cyc_1FSU1_1FSU2], (instregex "^(UZP|ZIP)[12]v")>;
+
+def : InstRW<[TSV110Wr_2cyc_1FSU1_1FSU2], (instregex "^EXTv")>;
+def : InstRW<[TSV110Wr_2cyc_1FSU1_1FSU2], (instregex "^XTNv")>;
+def : InstRW<[TSV110Wr_2cyc_1FSU1_1FSU2], (instregex "^RBITv")>;
+
+def : InstRW<[TSV110Wr_4cyc_1F], (instregex "^(INS|DUP)v.+gpr")>;
+
+def : InstRW<[TSV110Wr_3cyc_1FSU1], (instregex "^[SU]MOVv")>;
+
+// ASIMD table lookup, D-form
+def : InstRW<[TSV110Wr_2cyc_1F], (instregex "^TB[LX]v8i8One")>;
+def : InstRW<[TSV110Wr_4cyc_2F], (instregex "^TB[LX]v8i8Two")>;
+def : InstRW<[TSV110Wr_6cyc_3F], (instregex "^TB[LX]v8i8Three")>;
+def : InstRW<[TSV110Wr_8cyc_4F], (instregex "^TB[LX]v8i8Four")>;
+// ASIMD table lookup, Q-form
+def : InstRW<[TSV110Wr_2cyc_1F], (instregex "^TB[LX]v16i8One")>;
+def : InstRW<[TSV110Wr_4cyc_2F], (instregex "^TB[LX]v16i8Two")>;
+def : InstRW<[TSV110Wr_6cyc_3F], (instregex "^TB[LX]v16i8Three")>;
+def : InstRW<[TSV110Wr_8cyc_4F], (instregex "^TB[LX]v16i8Four")>;
+
+def : InstRW<[TSV110Wr_2cyc_1F], (instregex "^FMOVv")>;
+
+def : InstRW<[TSV110Wr_3cyc_1F], (instregex "^FRINT[AIMNPXZ]v")>;
+def : InstRW<[TSV110Wr_3cyc_1F], (instregex "^[SU]CVTFv")>;
+def : InstRW<[TSV110Wr_3cyc_1F], (instregex "^[FU](RECP|RSQRT)(E|X)v")>;
+
+
+// ASIMD Load Instructions
+// -----------------------------------------------------------------------------
+
+def : InstRW<[TSV110Wr_7cyc_1F_1LdSt],            (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[TSV110Wr_7cyc_1F_1LdSt, WriteAdr],  (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[TSV110Wr_7cyc_2F_1LdSt],            (instregex "^LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[TSV110Wr_7cyc_2F_1LdSt, WriteAdr],  (instregex "^LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[TSV110Wr_8cyc_3F_1LdSt],            (instregex "^LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[TSV110Wr_8cyc_3F_1LdSt, WriteAdr],  (instregex "^LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[TSV110Wr_8cyc_3F_2LdSt],            (instregex "^LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[TSV110Wr_8cyc_3F_2LdSt, WriteAdr],  (instregex "^LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+
+def  : InstRW<[TSV110Wr_7cyc_1F_1LdSt],           (instregex "LD1i(8|16|32|64)$")>;
+def  : InstRW<[TSV110Wr_7cyc_1F_1LdSt, WriteAdr], (instregex "LD1i(8|16|32|64)_POST$")>;
+def  : InstRW<[TSV110Wr_7cyc_2F_1LdSt],           (instregex "LD2i(8|16|32|64)$")>;
+def  : InstRW<[TSV110Wr_7cyc_2F_1LdSt, WriteAdr], (instregex "LD2i(8|16|32|64)_POST$")>;
+def  : InstRW<[TSV110Wr_8cyc_3F_1LdSt],           (instregex "LD3i(8|16|32|64)$")>;
+def  : InstRW<[TSV110Wr_8cyc_3F_1LdSt, WriteAdr], (instregex "LD3i(8|16|32|64)_POST$")>;
+def  : InstRW<[TSV110Wr_8cyc_3F_2LdSt],           (instregex "LD4i(8|16|32|64)$")>;
+def  : InstRW<[TSV110Wr_8cyc_3F_2LdSt, WriteAdr], (instregex "LD4i(8|16|32|64)_POST$")>;
+
+def : InstRW<[TSV110Wr_5cyc_1LdSt],               (instregex "^LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[TSV110Wr_5cyc_1LdSt, WriteAdr],     (instregex "^LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[TSV110Wr_5cyc_1LdSt],               (instregex "^LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[TSV110Wr_5cyc_1LdSt, WriteAdr],     (instregex "^LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[TSV110Wr_6cyc_3LdSt],               (instregex "^LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[TSV110Wr_6cyc_3LdSt, WriteAdr],     (instregex "^LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[TSV110Wr_6cyc_2LdSt],               (instregex "^LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[TSV110Wr_6cyc_2LdSt, WriteAdr],     (instregex "^LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[TSV110Wr_7cyc_2F_1LdSt],            (instregex "^LD2Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[TSV110Wr_7cyc_2F_1LdSt, WriteAdr],  (instregex "^LD2Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[TSV110Wr_8cyc_3F_1LdSt],            (instregex "^LD3Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[TSV110Wr_8cyc_3F_1LdSt, WriteAdr],  (instregex "^LD3Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[TSV110Wr_10cyc_4F_4LdSt],           (instregex "^LD4Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[TSV110Wr_10cyc_4F_4LdSt, WriteAdr], (instregex "^LD4Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+
+
+// ASIMD Store Instructions
+// -----------------------------------------------------------------------------
+
+def  : InstRW<[TSV110Wr_3cyc_1F],             (instregex "ST1i(8|16|32|64)$")>;
+def  : InstRW<[TSV110Wr_3cyc_1F, WriteAdr],   (instregex "ST1i(8|16|32|64)_POST$")>;
+def  : InstRW<[TSV110Wr_4cyc_1F],             (instregex "ST2i(8|16|32|64)$")>;
+def  : InstRW<[TSV110Wr_4cyc_1F, WriteAdr],   (instregex "ST2i(8|16|32|64)_POST$")>;
+def  : InstRW<[TSV110Wr_5cyc_1F],             (instregex "ST3i(8|16|32|64)$")>;
+def  : InstRW<[TSV110Wr_5cyc_1F, WriteAdr],   (instregex "ST3i(8|16|32|64)_POST$")>;
+def  : InstRW<[TSV110Wr_6cyc_1F],             (instregex "ST4i(8|16|32|64)$")>;
+def  : InstRW<[TSV110Wr_6cyc_1F, WriteAdr],   (instregex "ST4i(8|16|32|64)_POST$")>;
+
+def : InstRW<[TSV110Wr_3cyc_1F],              (instregex "^ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[TSV110Wr_3cyc_1F, WriteAdr],    (instregex "^ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[TSV110Wr_4cyc_1F],              (instregex "^ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[TSV110Wr_4cyc_1F, WriteAdr],    (instregex "^ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[TSV110Wr_5cyc_1F],              (instregex "^ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[TSV110Wr_5cyc_1F, WriteAdr],    (instregex "^ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[TSV110Wr_6cyc_1F],              (instregex "^ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[TSV110Wr_6cyc_1F, WriteAdr],    (instregex "^ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[TSV110Wr_4cyc_1F],              (instregex "^ST2Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[TSV110Wr_4cyc_1F, WriteAdr],    (instregex "^ST2Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[TSV110Wr_5cyc_1F],              (instregex "^ST3Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[TSV110Wr_5cyc_1F, WriteAdr],    (instregex "^ST3Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[TSV110Wr_8cyc_1F],              (instregex "^ST4Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[TSV110Wr_8cyc_1F, WriteAdr],    (instregex "^ST4Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+
+} // SchedModel = TSV110Model
--- a/suite/synctools/tablegen/AArch64/AArch64SchedThunderX.td
+++ b/suite/synctools/tablegen/AArch64/AArch64SchedThunderX.td
@ -1,9 +1,8 @@
 //==- AArch64SchedThunderX.td - Cavium ThunderX T8X Scheduling Definitions -*- tablegen -*-=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@ -26,8 +25,9 @@ def ThunderXT8XModel : SchedMachineModel {
  let PostRAScheduler = 1;    // Use PostRA scheduler.
  let CompleteModel = 1;

-  list<Predicate> UnsupportedFeatures = [HasSVE];
-
+  list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F,
+                                                    PAUnsupported.F,
+                                                    SMEUnsupported.F);
  // FIXME: Remove when all errors have been fixed.
  let FullInstRWOverlapCheck = 0;
 }
@ -154,7 +154,8 @@ def : WriteRes<WriteFCmp, [THXT8XUnitFPALU]> { let Latency = 6; }
 def : WriteRes<WriteFCvt, [THXT8XUnitFPALU]> { let Latency = 6; }
 def : WriteRes<WriteFCopy, [THXT8XUnitFPALU]> { let Latency = 6; }
 def : WriteRes<WriteFImm, [THXT8XUnitFPALU]> { let Latency = 6; }
-def : WriteRes<WriteV, [THXT8XUnitFPALU]> { let Latency = 6; }
+def : WriteRes<WriteVd, [THXT8XUnitFPALU]> { let Latency = 6; }
+def : WriteRes<WriteVq, [THXT8XUnitFPALU]> { let Latency = 6; }

 // FP Mul, Div, Sqrt
 def : WriteRes<WriteFMul, [THXT8XUnitFPMDS]> { let Latency = 6; }
@ -192,6 +193,7 @@ def THXT8XWriteFSqrtDP : SchedWriteRes<[THXT8XUnitFPMDS]> {
 def : ReadAdvance<ReadExtrHi, 1>;
 def : ReadAdvance<ReadAdrBase, 2>;
 def : ReadAdvance<ReadVLD, 2>;
+def : ReadAdvance<ReadST, 2>;

 // FIXME: This needs more targeted benchmarking.
 // ALU - Most operands in the ALU pipes are not needed for two cycles. Shiftable
--- a/suite/synctools/tablegen/AArch64/AArch64SchedThunderX2T99.td
+++ b/suite/synctools/tablegen/AArch64/AArch64SchedThunderX2T99.td
@ -1,9 +1,8 @@
 //=- AArch64SchedThunderX2T99.td - Cavium ThunderX T99 ---*- tablegen -*-=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@ -26,8 +25,9 @@ def ThunderX2T99Model : SchedMachineModel {
  let PostRAScheduler       =   1; // Using PostRA sched.
  let CompleteModel         =   1;

-  list<Predicate> UnsupportedFeatures = [HasSVE];
-
+  list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F,
+                                                    PAUnsupported.F,
+                                                    SMEUnsupported.F);
  // FIXME: Remove when all errors have been fixed.
  let FullInstRWOverlapCheck = 0;
 }
@ -362,6 +362,7 @@ def : ReadAdvance<ReadID,      0>;
 def : ReadAdvance<ReadExtrHi,  0>;
 def : ReadAdvance<ReadAdrBase, 0>;
 def : ReadAdvance<ReadVLD,     0>;
+def : ReadAdvance<ReadST,      0>;

 //===----------------------------------------------------------------------===//
 // 3. Instruction Tables.
@ -1249,7 +1250,12 @@ def : InstRW<[THX2T99Write_5Cyc_F01], (instrs FMOVXDHighr, FMOVDXHighr)>;
 // ASIMD shift by register, basic, Q-form
 // ASIMD shift by register, complex, D-form
 // ASIMD shift by register, complex, Q-form
-def : WriteRes<WriteV, [THX2T99F01]> {
+def : WriteRes<WriteVd, [THX2T99F01]> {
+  let Latency = 7;
+  let NumMicroOps = 4;
+  let ResourceCycles = [4];
+}
+def : WriteRes<WriteVq, [THX2T99F01]> {
  let Latency = 7;
  let NumMicroOps = 4;
  let ResourceCycles = [4];
@ -1483,7 +1489,7 @@ def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^RBITv")>;
 // ASIMD bitwise insert, D-form
 // ASIMD bitwise insert, Q-form
 def : InstRW<[THX2T99Write_5Cyc_F01],
-            (instregex "^BIFv", "^BITv", "^BSLv")>;
+            (instregex "^BIFv", "^BITv", "^BSLv", "^BSPv")>;

 // ASIMD count, D-form
 // ASIMD count, Q-form
@ -1493,7 +1499,7 @@ def : InstRW<[THX2T99Write_5Cyc_F01],
 // ASIMD duplicate, gen reg
 // ASIMD duplicate, element
 def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^DUPv")>;
-def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^CPY")>;
+def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^DUP(i8|i16|i32|i64)$")>;
 def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^DUPv.+gpr")>;

 // ASIMD extract
@ -1518,25 +1524,6 @@ def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^MOVIv")>;
 // ASIMD move, FP immed
 def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^FMOVv")>;

-// ASIMD table lookup, D-form
-def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^TB[LX]v8i8One")>;
-def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^TB[LX]v8i8Two")>;
-def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^TB[LX]v8i8Three")>;
-def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^TB[LX]v8i8Four")>;
-
-// ASIMD table lookup, Q-form
-def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^TB[LX]v16i8One")>;
-def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^TB[LX]v16i8Two")>;
-def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^TB[LX]v16i8Three")>;
-def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^TB[LX]v16i8Four")>;
-
-// ASIMD transpose
-def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^TRN1", "^TRN2")>;
-
-// ASIMD unzip/zip
-def : InstRW<[THX2T99Write_5Cyc_F01],
-            (instregex "^UZP1", "^UZP2", "^ZIP1", "^ZIP2")>;
-
 // ASIMD reciprocal estimate, D-form
 // ASIMD reciprocal estimate, Q-form
 def : InstRW<[THX2T99Write_5Cyc_F01],
--- a/suite/synctools/tablegen/AArch64/AArch64SchedThunderX3T110.td
+++ b/suite/synctools/tablegen/AArch64/AArch64SchedThunderX3T110.td
--- a/suite/synctools/tablegen/AArch64/AArch64Schedule.td
+++ b/suite/synctools/tablegen/AArch64/AArch64Schedule.td
@ -1,9 +1,8 @@
 //==-- AArch64Schedule.td - AArch64 Scheduling Definitions -*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//

@ -48,19 +47,9 @@ def WriteAdr       : SchedWrite; // Address pre/post increment.

 def WriteLDIdx : SchedWrite; // Load from a register index (maybe scaled).
 def WriteSTIdx : SchedWrite; // Store to a register index (maybe scaled).
+def ReadST     : SchedRead;  // Read the stored value.
 def ReadAdrBase : SchedRead; // Read the base resister of a reg-offset LD/ST.

-// Predicate for determining when a shiftable register is shifted.
-def RegShiftedPred : SchedPredicate<[{TII->hasShiftedReg(*MI)}]>;
-
-// Predicate for determining when a extendedable register is extended.
-def RegExtendedPred : SchedPredicate<[{TII->hasExtendedReg(*MI)}]>;
-
-// ScaledIdxPred is true if a WriteLDIdx operand will be
-// scaled. Subtargets can use this to dynamically select resources and
-// latency for WriteLDIdx and ReadAdrBase.
-def ScaledIdxPred : SchedPredicate<[{TII->isScaledAddr(*MI)}]>;
-
 // Serialized two-level address load.
 // EXAMPLE: LOADGot
 def WriteLDAdr : WriteSequence<[WriteAdr, WriteLD]>;
@ -88,7 +77,8 @@ def WriteFImm    : SchedWrite; // Floating-point immediate.
 def WriteFMul    : SchedWrite; // Floating-point multiply.
 def WriteFDiv    : SchedWrite; // Floating-point division.

-def WriteV   : SchedWrite; // Vector ops.
+def WriteVd  : SchedWrite; // 64bit Vector D ops.
+def WriteVq  : SchedWrite; // 128bit Vector Q ops.
 def WriteVLD : SchedWrite; // Vector loads.
 def WriteVST : SchedWrite; // Vector stores.

@ -98,9 +88,9 @@ def WriteAtomic : SchedWrite; // Atomic memory operations (CAS, Swap, LDOP)
 def ReadVLD : SchedRead;

 // Sequential vector load and shuffle.
-def WriteVLDShuffle     : WriteSequence<[WriteVLD, WriteV]>;
-def WriteVLDPairShuffle : WriteSequence<[WriteVLD, WriteV, WriteV]>;
+def WriteVLDShuffle     : WriteSequence<[WriteVLD, WriteVq]>;
+def WriteVLDPairShuffle : WriteSequence<[WriteVLD, WriteVq, WriteVq]>;

 // Store a shuffled vector.
-def WriteVSTShuffle : WriteSequence<[WriteV, WriteVST]>;
-def WriteVSTPairShuffle : WriteSequence<[WriteV, WriteV, WriteVST]>;
+def WriteVSTShuffle : WriteSequence<[WriteVq, WriteVST]>;
+def WriteVSTPairShuffle : WriteSequence<[WriteVq, WriteVq, WriteVST]>;
--- a/suite/synctools/tablegen/AArch64/AArch64SystemOperands.td
+++ b/suite/synctools/tablegen/AArch64/AArch64SystemOperands.td
@ -1,9 +1,8 @@
 //===- AArch64SystemOperands.td ----------------------------*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@ -14,6 +13,30 @@

 include "llvm/TableGen/SearchableTable.td"

+//===----------------------------------------------------------------------===//
+// Features that, for the compiler, only enable system operands and PStates
+//===----------------------------------------------------------------------===//
+
+def HasCCPP    : Predicate<"Subtarget->hasCCPP()">,
+                 AssemblerPredicate<(all_of FeatureCCPP), "ccpp">;
+
+def HasPAN     : Predicate<"Subtarget->hasPAN()">,
+                 AssemblerPredicate<(all_of FeaturePAN),
+                 "ARM v8.1  Privileged Access-Never extension">;
+
+def HasPsUAO   : Predicate<"Subtarget->hasPsUAO()">,
+                 AssemblerPredicate<(all_of FeaturePsUAO),
+                 "ARM v8.2 UAO PState extension (psuao)">;
+
+def HasPAN_RWV : Predicate<"Subtarget->hasPAN_RWV()">,
+                 AssemblerPredicate<(all_of FeaturePAN_RWV),
+                 "ARM v8.2 PAN AT S1E1R and AT S1E1W Variation">;
+
+def HasCONTEXTIDREL2
+               : Predicate<"Subtarget->hasCONTEXTIDREL2()">,
+                 AssemblerPredicate<(all_of FeatureCONTEXTIDREL2),
+                 "Target contains CONTEXTIDR_EL2 RW operand">;
+
 //===----------------------------------------------------------------------===//
 // AT (address translate) instruction options.
 //===----------------------------------------------------------------------===//
@ -45,7 +68,7 @@ def : AT<"S12E1W", 0b100, 0b0111, 0b1000, 0b101>;
 def : AT<"S12E0R", 0b100, 0b0111, 0b1000, 0b110>;
 def : AT<"S12E0W", 0b100, 0b0111, 0b1000, 0b111>;

-let Requires = [{ {AArch64::HasV8_2aOps} }] in {
+let Requires = [{ {AArch64::FeaturePAN_RWV} }] in {
 def : AT<"S1E1RP", 0b000, 0b0111, 0b1001, 0b000>;
 def : AT<"S1E1WP", 0b000, 0b0111, 0b1001, 0b001>;
 }
@ -75,6 +98,21 @@ def : DB<"ld",    0xd>;
 def : DB<"st",    0xe>;
 def : DB<"sy",    0xf>;

+class DBnXS<string name, bits<4> encoding, bits<5> immValue> : SearchableTable {
+  let SearchableFields = ["Name", "Encoding", "ImmValue"];
+  let EnumValueField = "Encoding";
+
+  string Name = name;
+  bits<4> Encoding = encoding;
+  bits<5> ImmValue = immValue;
+  code Requires = [{ {AArch64::FeatureXS} }];
+}
+
+def : DBnXS<"oshnxs", 0x3, 0x10>;
+def : DBnXS<"nshnxs", 0x7, 0x14>;
+def : DBnXS<"ishnxs", 0xb, 0x18>;
+def : DBnXS<"synxs",  0xf, 0x1c>;
+
 //===----------------------------------------------------------------------===//
 // DC (data cache maintenance) instruction options.
 //===----------------------------------------------------------------------===//
@ -102,9 +140,33 @@ def : DC<"CVAU",  0b011, 0b0111, 0b1011, 0b001>;
 def : DC<"CIVAC", 0b011, 0b0111, 0b1110, 0b001>;
 def : DC<"CISW",  0b000, 0b0111, 0b1110, 0b010>;

-let Requires = [{ {AArch64::HasV8_2aOps} }] in
+let Requires = [{ {AArch64::FeatureCCPP} }] in
 def : DC<"CVAP",  0b011, 0b0111, 0b1100, 0b001>;

+let Requires = [{ {AArch64::FeatureCacheDeepPersist} }] in
+def : DC<"CVADP",  0b011, 0b0111, 0b1101, 0b001>;
+
+let Requires = [{ {AArch64::FeatureMTE} }] in {
+def : DC<"IGVAC",   0b000, 0b0111, 0b0110, 0b011>;
+def : DC<"IGSW",    0b000, 0b0111, 0b0110, 0b100>;
+def : DC<"CGSW",    0b000, 0b0111, 0b1010, 0b100>;
+def : DC<"CIGSW",   0b000, 0b0111, 0b1110, 0b100>;
+def : DC<"CGVAC",   0b011, 0b0111, 0b1010, 0b011>;
+def : DC<"CGVAP",   0b011, 0b0111, 0b1100, 0b011>;
+def : DC<"CGVADP",  0b011, 0b0111, 0b1101, 0b011>;
+def : DC<"CIGVAC",  0b011, 0b0111, 0b1110, 0b011>;
+def : DC<"GVA",     0b011, 0b0111, 0b0100, 0b011>;
+def : DC<"IGDVAC",  0b000, 0b0111, 0b0110, 0b101>;
+def : DC<"IGDSW",   0b000, 0b0111, 0b0110, 0b110>;
+def : DC<"CGDSW",   0b000, 0b0111, 0b1010, 0b110>;
+def : DC<"CIGDSW",  0b000, 0b0111, 0b1110, 0b110>;
+def : DC<"CGDVAC",  0b011, 0b0111, 0b1010, 0b101>;
+def : DC<"CGDVAP",  0b011, 0b0111, 0b1100, 0b101>;
+def : DC<"CGDVADP", 0b011, 0b0111, 0b1101, 0b101>;
+def : DC<"CIGDVAC", 0b011, 0b0111, 0b1110, 0b101>;
+def : DC<"GZVA",    0b011, 0b0111, 0b0100, 0b100>;
+}
+
 //===----------------------------------------------------------------------===//
 // IC (instruction cache maintenance) instruction options.
 //===----------------------------------------------------------------------===//
@ -154,7 +216,7 @@ class TSB<string name, bits<4> encoding> : SearchableTable{
  bits<4> Encoding;
  let Encoding = encoding;

-  code Requires = [{ {AArch64::HasV8_4aOps} }];
+  code Requires = [{ {AArch64::FeatureTRACEV8_4} }];
 }

 def : TSB<"csync", 0>;
@ -290,14 +352,41 @@ def : PState<"SPSel",   0b00101>;
 def : PState<"DAIFSet", 0b11110>;
 def : PState<"DAIFClr", 0b11111>;
 // v8.1a "Privileged Access Never" extension-specific PStates
-let Requires = [{ {AArch64::HasV8_1aOps} }] in
+let Requires = [{ {AArch64::FeaturePAN} }] in
 def : PState<"PAN",     0b00100>;
+
 // v8.2a "User Access Override" extension-specific PStates
-let Requires = [{ {AArch64::HasV8_2aOps} }] in
+let Requires = [{ {AArch64::FeaturePsUAO} }] in
 def : PState<"UAO",     0b00011>;
-// v8.4a timining insensitivity of data processing instructions
-let Requires = [{ {AArch64::HasV8_4aOps} }] in
+// v8.4a timing insensitivity of data processing instructions
+let Requires = [{ {AArch64::FeatureDIT} }] in
 def : PState<"DIT",     0b11010>;
+// v8.5a Spectre Mitigation
+let Requires = [{ {AArch64::FeatureSSBS} }] in
+def : PState<"SSBS",    0b11001>;
+// v8.5a Memory Tagging Extension
+let Requires = [{ {AArch64::FeatureMTE} }] in
+def : PState<"TCO",     0b11100>;
+
+//===----------------------------------------------------------------------===//
+// SVCR instruction options.
+//===----------------------------------------------------------------------===//
+
+class SVCR<string name, bits<3> encoding> : SearchableTable {
+  let SearchableFields = ["Name", "Encoding"];
+  let EnumValueField = "Encoding";
+
+  string Name = name;
+  bits<3> Encoding;
+  let Encoding = encoding;
+  code Requires = [{ {} }];
+}
+
+let Requires = [{ {AArch64::FeatureSME} }] in {
+def : SVCR<"SVCRSM",   0b001>;
+def : SVCR<"SVCRZA",   0b010>;
+def : SVCR<"SVCRSMZA", 0b011>;
+}

 //===----------------------------------------------------------------------===//
 // PSB instruction options.
@ -315,14 +404,28 @@ class PSB<string name, bits<5> encoding> : SearchableTable {
 def : PSB<"csync", 0x11>;

 //===----------------------------------------------------------------------===//
-// TLBI (translation lookaside buffer invalidate) instruction options.
+// BTI instruction options.
 //===----------------------------------------------------------------------===//

-class TLBI<string name, bits<3> op1, bits<4> crn, bits<4> crm,
-             bits<3> op2, bit needsreg = 1> : SearchableTable {
+class BTI<string name, bits<3> encoding> : SearchableTable {
  let SearchableFields = ["Name", "Encoding"];
  let EnumValueField = "Encoding";

+  string Name = name;
+  bits<3> Encoding;
+  let Encoding = encoding;
+}
+
+def : BTI<"c",  0b010>;
+def : BTI<"j",  0b100>;
+def : BTI<"jc", 0b110>;
+
+//===----------------------------------------------------------------------===//
+// TLBI (translation lookaside buffer invalidate) instruction options.
+//===----------------------------------------------------------------------===//
+
+class TLBIEntry<string name, bits<3> op1, bits<4> crn, bits<4> crm,
+             bits<3> op2, bit needsreg> {
  string Name = name;
  bits<14> Encoding;
  let Encoding{13-11} = op1;
@ -330,94 +433,147 @@ class TLBI<string name, bits<3> op1, bits<4> crn, bits<4> crm,
  let Encoding{6-3} = crm;
  let Encoding{2-0} = op2;
  bit NeedsReg = needsreg;
-  code Requires = [{ {} }];
+  list<string> Requires = [];
+  list<string> ExtraRequires = [];
+  code RequiresStr = [{ { }] # !interleave(Requires # ExtraRequires, [{, }]) # [{ } }];
 }

-def : TLBI<"IPAS2E1IS",    0b100, 0b1000, 0b0000, 0b001>;
-def : TLBI<"IPAS2LE1IS",   0b100, 0b1000, 0b0000, 0b101>;
-def : TLBI<"VMALLE1IS",    0b000, 0b1000, 0b0011, 0b000, 0>;
-def : TLBI<"ALLE2IS",      0b100, 0b1000, 0b0011, 0b000, 0>;
-def : TLBI<"ALLE3IS",      0b110, 0b1000, 0b0011, 0b000, 0>;
-def : TLBI<"VAE1IS",       0b000, 0b1000, 0b0011, 0b001>;
-def : TLBI<"VAE2IS",       0b100, 0b1000, 0b0011, 0b001>;
-def : TLBI<"VAE3IS",       0b110, 0b1000, 0b0011, 0b001>;
-def : TLBI<"ASIDE1IS",     0b000, 0b1000, 0b0011, 0b010>;
-def : TLBI<"VAAE1IS",      0b000, 0b1000, 0b0011, 0b011>;
-def : TLBI<"ALLE1IS",      0b100, 0b1000, 0b0011, 0b100, 0>;
-def : TLBI<"VALE1IS",      0b000, 0b1000, 0b0011, 0b101>;
-def : TLBI<"VALE2IS",      0b100, 0b1000, 0b0011, 0b101>;
-def : TLBI<"VALE3IS",      0b110, 0b1000, 0b0011, 0b101>;
-def : TLBI<"VMALLS12E1IS", 0b100, 0b1000, 0b0011, 0b110, 0>;
-def : TLBI<"VAALE1IS",     0b000, 0b1000, 0b0011, 0b111>;
-def : TLBI<"IPAS2E1",      0b100, 0b1000, 0b0100, 0b001>;
-def : TLBI<"IPAS2LE1",     0b100, 0b1000, 0b0100, 0b101>;
-def : TLBI<"VMALLE1",      0b000, 0b1000, 0b0111, 0b000, 0>;
-def : TLBI<"ALLE2",        0b100, 0b1000, 0b0111, 0b000, 0>;
-def : TLBI<"ALLE3",        0b110, 0b1000, 0b0111, 0b000, 0>;
-def : TLBI<"VAE1",         0b000, 0b1000, 0b0111, 0b001>;
-def : TLBI<"VAE2",         0b100, 0b1000, 0b0111, 0b001>;
-def : TLBI<"VAE3",         0b110, 0b1000, 0b0111, 0b001>;
-def : TLBI<"ASIDE1",       0b000, 0b1000, 0b0111, 0b010>;
-def : TLBI<"VAAE1",        0b000, 0b1000, 0b0111, 0b011>;
-def : TLBI<"ALLE1",        0b100, 0b1000, 0b0111, 0b100, 0>;
-def : TLBI<"VALE1",        0b000, 0b1000, 0b0111, 0b101>;
-def : TLBI<"VALE2",        0b100, 0b1000, 0b0111, 0b101>;
-def : TLBI<"VALE3",        0b110, 0b1000, 0b0111, 0b101>;
-def : TLBI<"VMALLS12E1",   0b100, 0b1000, 0b0111, 0b110, 0>;
-def : TLBI<"VAALE1",       0b000, 0b1000, 0b0111, 0b111>;
+def TLBITable : GenericTable {
+  let FilterClass = "TLBIEntry";
+  let CppTypeName = "TLBI";
+  let Fields = ["Name", "Encoding", "NeedsReg", "RequiresStr"];
+}

+def lookupTLBIByName : SearchIndex {
+  let Table = TLBITable;
+  let Key = ["Name"];
+}
+
+def lookupTLBIByEncoding : SearchIndex {
+  let Table = TLBITable;
+  let Key = ["Encoding"];
+}
+
+multiclass TLBI<string name, bits<3> op1, bits<4> crn, bits<4> crm,
+             bits<3> op2, bit needsreg = 1> {
+  def : TLBIEntry<name, op1, crn, crm, op2, needsreg>;
+  def : TLBIEntry<!strconcat(name, "nXS"), op1, crn, crm, op2, needsreg> {
+    let Encoding{7} = 1;
+    let ExtraRequires = ["AArch64::FeatureXS"];
+  }
+}
+
+defm : TLBI<"IPAS2E1IS",    0b100, 0b1000, 0b0000, 0b001>;
+defm : TLBI<"IPAS2LE1IS",   0b100, 0b1000, 0b0000, 0b101>;
+defm : TLBI<"VMALLE1IS",    0b000, 0b1000, 0b0011, 0b000, 0>;
+defm : TLBI<"ALLE2IS",      0b100, 0b1000, 0b0011, 0b000, 0>;
+defm : TLBI<"ALLE3IS",      0b110, 0b1000, 0b0011, 0b000, 0>;
+defm : TLBI<"VAE1IS",       0b000, 0b1000, 0b0011, 0b001>;
+defm : TLBI<"VAE2IS",       0b100, 0b1000, 0b0011, 0b001>;
+defm : TLBI<"VAE3IS",       0b110, 0b1000, 0b0011, 0b001>;
+defm : TLBI<"ASIDE1IS",     0b000, 0b1000, 0b0011, 0b010>;
+defm : TLBI<"VAAE1IS",      0b000, 0b1000, 0b0011, 0b011>;
+defm : TLBI<"ALLE1IS",      0b100, 0b1000, 0b0011, 0b100, 0>;
+defm : TLBI<"VALE1IS",      0b000, 0b1000, 0b0011, 0b101>;
+defm : TLBI<"VALE2IS",      0b100, 0b1000, 0b0011, 0b101>;
+defm : TLBI<"VALE3IS",      0b110, 0b1000, 0b0011, 0b101>;
+defm : TLBI<"VMALLS12E1IS", 0b100, 0b1000, 0b0011, 0b110, 0>;
+defm : TLBI<"VAALE1IS",     0b000, 0b1000, 0b0011, 0b111>;
+defm : TLBI<"IPAS2E1",      0b100, 0b1000, 0b0100, 0b001>;
+defm : TLBI<"IPAS2LE1",     0b100, 0b1000, 0b0100, 0b101>;
+defm : TLBI<"VMALLE1",      0b000, 0b1000, 0b0111, 0b000, 0>;
+defm : TLBI<"ALLE2",        0b100, 0b1000, 0b0111, 0b000, 0>;
+defm : TLBI<"ALLE3",        0b110, 0b1000, 0b0111, 0b000, 0>;
+defm : TLBI<"VAE1",         0b000, 0b1000, 0b0111, 0b001>;
+defm : TLBI<"VAE2",         0b100, 0b1000, 0b0111, 0b001>;
+defm : TLBI<"VAE3",         0b110, 0b1000, 0b0111, 0b001>;
+defm : TLBI<"ASIDE1",       0b000, 0b1000, 0b0111, 0b010>;
+defm : TLBI<"VAAE1",        0b000, 0b1000, 0b0111, 0b011>;
+defm : TLBI<"ALLE1",        0b100, 0b1000, 0b0111, 0b100, 0>;
+defm : TLBI<"VALE1",        0b000, 0b1000, 0b0111, 0b101>;
+defm : TLBI<"VALE2",        0b100, 0b1000, 0b0111, 0b101>;
+defm : TLBI<"VALE3",        0b110, 0b1000, 0b0111, 0b101>;
+defm : TLBI<"VMALLS12E1",   0b100, 0b1000, 0b0111, 0b110, 0>;
+defm : TLBI<"VAALE1",       0b000, 0b1000, 0b0111, 0b111>;
+
+// Armv8.4-A Translation Lookaside Buffer Instructions (TLBI)
+let Requires = ["AArch64::FeatureTLB_RMI"] in {
 // Armv8.4-A Outer Sharable TLB Maintenance instructions:
-let Requires = [{ {AArch64::HasV8_4aOps} }] in {
 //                         op1    CRn     CRm     op2
-def : TLBI<"VMALLE1OS",    0b000, 0b1000, 0b0001, 0b000, 0>;
-def : TLBI<"VAE1OS",       0b000, 0b1000, 0b0001, 0b001>;
-def : TLBI<"ASIDE1OS",     0b000, 0b1000, 0b0001, 0b010>;
-def : TLBI<"VAAE1OS",      0b000, 0b1000, 0b0001, 0b011>;
-def : TLBI<"VALE1OS",      0b000, 0b1000, 0b0001, 0b101>;
-def : TLBI<"VAALE1OS",     0b000, 0b1000, 0b0001, 0b111>;
-def : TLBI<"IPAS2E1OS",    0b100, 0b1000, 0b0100, 0b000>;
-def : TLBI<"IPAS2LE1OS",   0b100, 0b1000, 0b0100, 0b100>;
-def : TLBI<"VAE2OS",       0b100, 0b1000, 0b0001, 0b001>;
-def : TLBI<"VALE2OS",      0b100, 0b1000, 0b0001, 0b101>;
-def : TLBI<"VMALLS12E1OS", 0b100, 0b1000, 0b0001, 0b110, 0>;
-def : TLBI<"VAE3OS",       0b110, 0b1000, 0b0001, 0b001>;
-def : TLBI<"VALE3OS",      0b110, 0b1000, 0b0001, 0b101>;
-def : TLBI<"ALLE2OS",      0b100, 0b1000, 0b0001, 0b000, 0>;
-def : TLBI<"ALLE1OS",      0b100, 0b1000, 0b0001, 0b100, 0>;
-def : TLBI<"ALLE3OS",      0b110, 0b1000, 0b0001, 0b000, 0>;
+defm : TLBI<"VMALLE1OS",    0b000, 0b1000, 0b0001, 0b000, 0>;
+defm : TLBI<"VAE1OS",       0b000, 0b1000, 0b0001, 0b001>;
+defm : TLBI<"ASIDE1OS",     0b000, 0b1000, 0b0001, 0b010>;
+defm : TLBI<"VAAE1OS",      0b000, 0b1000, 0b0001, 0b011>;
+defm : TLBI<"VALE1OS",      0b000, 0b1000, 0b0001, 0b101>;
+defm : TLBI<"VAALE1OS",     0b000, 0b1000, 0b0001, 0b111>;
+defm : TLBI<"IPAS2E1OS",    0b100, 0b1000, 0b0100, 0b000>;
+defm : TLBI<"IPAS2LE1OS",   0b100, 0b1000, 0b0100, 0b100>;
+defm : TLBI<"VAE2OS",       0b100, 0b1000, 0b0001, 0b001>;
+defm : TLBI<"VALE2OS",      0b100, 0b1000, 0b0001, 0b101>;
+defm : TLBI<"VMALLS12E1OS", 0b100, 0b1000, 0b0001, 0b110, 0>;
+defm : TLBI<"VAE3OS",       0b110, 0b1000, 0b0001, 0b001>;
+defm : TLBI<"VALE3OS",      0b110, 0b1000, 0b0001, 0b101>;
+defm : TLBI<"ALLE2OS",      0b100, 0b1000, 0b0001, 0b000, 0>;
+defm : TLBI<"ALLE1OS",      0b100, 0b1000, 0b0001, 0b100, 0>;
+defm : TLBI<"ALLE3OS",      0b110, 0b1000, 0b0001, 0b000, 0>;

 // Armv8.4-A TLB Range Maintenance instructions:
 //                         op1    CRn     CRm     op2
-def : TLBI<"RVAE1",        0b000, 0b1000, 0b0110, 0b001>;
-def : TLBI<"RVAAE1",       0b000, 0b1000, 0b0110, 0b011>;
-def : TLBI<"RVALE1",       0b000, 0b1000, 0b0110, 0b101>;
-def : TLBI<"RVAALE1",      0b000, 0b1000, 0b0110, 0b111>;
-def : TLBI<"RVAE1IS",      0b000, 0b1000, 0b0010, 0b001>;
-def : TLBI<"RVAAE1IS",     0b000, 0b1000, 0b0010, 0b011>;
-def : TLBI<"RVALE1IS",     0b000, 0b1000, 0b0010, 0b101>;
-def : TLBI<"RVAALE1IS",    0b000, 0b1000, 0b0010, 0b111>;
-def : TLBI<"RVAE1OS",      0b000, 0b1000, 0b0101, 0b001>;
-def : TLBI<"RVAAE1OS",     0b000, 0b1000, 0b0101, 0b011>;
-def : TLBI<"RVALE1OS",     0b000, 0b1000, 0b0101, 0b101>;
-def : TLBI<"RVAALE1OS",    0b000, 0b1000, 0b0101, 0b111>;
-def : TLBI<"RIPAS2E1IS",   0b100, 0b1000, 0b0000, 0b010>;
-def : TLBI<"RIPAS2LE1IS",  0b100, 0b1000, 0b0000, 0b110>;
-def : TLBI<"RIPAS2E1",     0b100, 0b1000, 0b0100, 0b010>;
-def : TLBI<"RIPAS2LE1",    0b100, 0b1000, 0b0100, 0b110>;
-def : TLBI<"RIPAS2E1OS",   0b100, 0b1000, 0b0100, 0b011>;
-def : TLBI<"RIPAS2LE1OS",  0b100, 0b1000, 0b0100, 0b111>;
-def : TLBI<"RVAE2",        0b100, 0b1000, 0b0110, 0b001>;
-def : TLBI<"RVALE2",       0b100, 0b1000, 0b0110, 0b101>;
-def : TLBI<"RVAE2IS",      0b100, 0b1000, 0b0010, 0b001>;
-def : TLBI<"RVALE2IS",     0b100, 0b1000, 0b0010, 0b101>;
-def : TLBI<"RVAE2OS",      0b100, 0b1000, 0b0101, 0b001>;
-def : TLBI<"RVALE2OS",     0b100, 0b1000, 0b0101, 0b101>;
-def : TLBI<"RVAE3",        0b110, 0b1000, 0b0110, 0b001>;
-def : TLBI<"RVALE3",       0b110, 0b1000, 0b0110, 0b101>;
-def : TLBI<"RVAE3IS",      0b110, 0b1000, 0b0010, 0b001>;
-def : TLBI<"RVALE3IS",     0b110, 0b1000, 0b0010, 0b101>;
-def : TLBI<"RVAE3OS",      0b110, 0b1000, 0b0101, 0b001>;
-def : TLBI<"RVALE3OS",     0b110, 0b1000, 0b0101, 0b101>;
+defm : TLBI<"RVAE1",        0b000, 0b1000, 0b0110, 0b001>;
+defm : TLBI<"RVAAE1",       0b000, 0b1000, 0b0110, 0b011>;
+defm : TLBI<"RVALE1",       0b000, 0b1000, 0b0110, 0b101>;
+defm : TLBI<"RVAALE1",      0b000, 0b1000, 0b0110, 0b111>;
+defm : TLBI<"RVAE1IS",      0b000, 0b1000, 0b0010, 0b001>;
+defm : TLBI<"RVAAE1IS",     0b000, 0b1000, 0b0010, 0b011>;
+defm : TLBI<"RVALE1IS",     0b000, 0b1000, 0b0010, 0b101>;
+defm : TLBI<"RVAALE1IS",    0b000, 0b1000, 0b0010, 0b111>;
+defm : TLBI<"RVAE1OS",      0b000, 0b1000, 0b0101, 0b001>;
+defm : TLBI<"RVAAE1OS",     0b000, 0b1000, 0b0101, 0b011>;
+defm : TLBI<"RVALE1OS",     0b000, 0b1000, 0b0101, 0b101>;
+defm : TLBI<"RVAALE1OS",    0b000, 0b1000, 0b0101, 0b111>;
+defm : TLBI<"RIPAS2E1IS",   0b100, 0b1000, 0b0000, 0b010>;
+defm : TLBI<"RIPAS2LE1IS",  0b100, 0b1000, 0b0000, 0b110>;
+defm : TLBI<"RIPAS2E1",     0b100, 0b1000, 0b0100, 0b010>;
+defm : TLBI<"RIPAS2LE1",    0b100, 0b1000, 0b0100, 0b110>;
+defm : TLBI<"RIPAS2E1OS",   0b100, 0b1000, 0b0100, 0b011>;
+defm : TLBI<"RIPAS2LE1OS",  0b100, 0b1000, 0b0100, 0b111>;
+defm : TLBI<"RVAE2",        0b100, 0b1000, 0b0110, 0b001>;
+defm : TLBI<"RVALE2",       0b100, 0b1000, 0b0110, 0b101>;
+defm : TLBI<"RVAE2IS",      0b100, 0b1000, 0b0010, 0b001>;
+defm : TLBI<"RVALE2IS",     0b100, 0b1000, 0b0010, 0b101>;
+defm : TLBI<"RVAE2OS",      0b100, 0b1000, 0b0101, 0b001>;
+defm : TLBI<"RVALE2OS",     0b100, 0b1000, 0b0101, 0b101>;
+defm : TLBI<"RVAE3",        0b110, 0b1000, 0b0110, 0b001>;
+defm : TLBI<"RVALE3",       0b110, 0b1000, 0b0110, 0b101>;
+defm : TLBI<"RVAE3IS",      0b110, 0b1000, 0b0010, 0b001>;
+defm : TLBI<"RVALE3IS",     0b110, 0b1000, 0b0010, 0b101>;
+defm : TLBI<"RVAE3OS",      0b110, 0b1000, 0b0101, 0b001>;
+defm : TLBI<"RVALE3OS",     0b110, 0b1000, 0b0101, 0b101>;
+} //FeatureTLB_RMI
+
+// Armv9-A Realm Management Extention TLBI Instructions
+let Requires = ["AArch64::FeatureRME"] in {
+defm : TLBI<"RPAOS",        0b110, 0b1000, 0b0100, 0b011>;
+defm : TLBI<"RPALOS",       0b110, 0b1000, 0b0100, 0b111>;
+defm : TLBI<"PAALLOS",      0b110, 0b1000, 0b0001, 0b100, 0>;
+defm : TLBI<"PAALL",        0b110, 0b1000, 0b0111, 0b100, 0>;
+}
+
+// Armv8.5-A Prediction Restriction by Context instruction options:
+class PRCTX<string name, bits<4> crm> : SearchableTable {
+  let SearchableFields = ["Name", "Encoding"];
+  let EnumValueField = "Encoding";
+
+  string Name = name;
+  bits<11> Encoding;
+  let Encoding{10-4} = 0b0110111;
+  let Encoding{3-0} = crm;
+  bit NeedsReg = 1;
+  code Requires = [{ {} }];
+}
+
+let Requires = [{ {AArch64::FeaturePredRes} }] in {
+def : PRCTX<"RCTX", 0b0011>;
 }

 //===----------------------------------------------------------------------===//
@ -430,6 +586,7 @@ class SysReg<string name, bits<2> op0, bits<3> op1, bits<4> crn, bits<4> crm,
  let EnumValueField = "Encoding";

  string Name = name;
+  string AltName = name;
  bits<16> Encoding;
  let Encoding{15-14} = op0;
  let Encoding{13-11} = op1;
@ -476,8 +633,10 @@ def : ROSysReg<"PMCEID0_EL0",        0b11, 0b011, 0b1001, 0b1100, 0b110>;
 def : ROSysReg<"PMCEID1_EL0",        0b11, 0b011, 0b1001, 0b1100, 0b111>;
 def : ROSysReg<"MIDR_EL1",           0b11, 0b000, 0b0000, 0b0000, 0b000>;
 def : ROSysReg<"CCSIDR_EL1",         0b11, 0b001, 0b0000, 0b0000, 0b000>;
+
+//v8.3 CCIDX - extending the CCsIDr number of sets
 def : ROSysReg<"CCSIDR2_EL1",        0b11, 0b001, 0b0000, 0b0000, 0b010> {
-  let Requires = [{ {AArch64::HasV8_3aOps} }];
+  let Requires = [{ {AArch64::FeatureCCIDX} }];
 }
 def : ROSysReg<"CLIDR_EL1",          0b11, 0b001, 0b0000, 0b0000, 0b001>;
 def : ROSysReg<"CTR_EL0",            0b11, 0b011, 0b0000, 0b0000, 0b001>;
@ -487,6 +646,9 @@ def : ROSysReg<"AIDR_EL1",           0b11, 0b001, 0b0000, 0b0000, 0b111>;
 def : ROSysReg<"DCZID_EL0",          0b11, 0b011, 0b0000, 0b0000, 0b111>;
 def : ROSysReg<"ID_PFR0_EL1",        0b11, 0b000, 0b0000, 0b0001, 0b000>;
 def : ROSysReg<"ID_PFR1_EL1",        0b11, 0b000, 0b0000, 0b0001, 0b001>;
+def : ROSysReg<"ID_PFR2_EL1",        0b11, 0b000, 0b0000, 0b0011, 0b100> {
+    let Requires = [{ {AArch64::FeatureSpecRestrict} }];
+}
 def : ROSysReg<"ID_DFR0_EL1",        0b11, 0b000, 0b0000, 0b0001, 0b010>;
 def : ROSysReg<"ID_AFR0_EL1",        0b11, 0b000, 0b0000, 0b0001, 0b011>;
 def : ROSysReg<"ID_MMFR0_EL1",       0b11, 0b000, 0b0000, 0b0001, 0b100>;
@ -510,11 +672,10 @@ def : ROSysReg<"ID_AA64AFR0_EL1",     0b11, 0b000, 0b0000, 0b0101, 0b100>;
 def : ROSysReg<"ID_AA64AFR1_EL1",     0b11, 0b000, 0b0000, 0b0101, 0b101>;
 def : ROSysReg<"ID_AA64ISAR0_EL1",    0b11, 0b000, 0b0000, 0b0110, 0b000>;
 def : ROSysReg<"ID_AA64ISAR1_EL1",    0b11, 0b000, 0b0000, 0b0110, 0b001>;
+def : ROSysReg<"ID_AA64ISAR2_EL1",    0b11, 0b000, 0b0000, 0b0110, 0b010>;
 def : ROSysReg<"ID_AA64MMFR0_EL1",    0b11, 0b000, 0b0000, 0b0111, 0b000>;
 def : ROSysReg<"ID_AA64MMFR1_EL1",    0b11, 0b000, 0b0000, 0b0111, 0b001>;
-def : ROSysReg<"ID_AA64MMFR2_EL1",    0b11, 0b000, 0b0000, 0b0111, 0b010> {
-  let Requires = [{ {AArch64::HasV8_2aOps} }];
-}
+def : ROSysReg<"ID_AA64MMFR2_EL1",    0b11, 0b000, 0b0000, 0b0111, 0b010>;
 def : ROSysReg<"MVFR0_EL1",          0b11, 0b000, 0b0000, 0b0011, 0b000>;
 def : ROSysReg<"MVFR1_EL1",          0b11, 0b000, 0b0000, 0b0011, 0b001>;
 def : ROSysReg<"MVFR2_EL1",          0b11, 0b000, 0b0000, 0b0011, 0b010>;
@ -525,6 +686,7 @@ def : ROSysReg<"ISR_EL1",            0b11, 0b000, 0b1100, 0b0001, 0b000>;
 def : ROSysReg<"CNTPCT_EL0",         0b11, 0b011, 0b1110, 0b0000, 0b001>;
 def : ROSysReg<"CNTVCT_EL0",         0b11, 0b011, 0b1110, 0b0000, 0b010>;
 def : ROSysReg<"ID_MMFR4_EL1",       0b11, 0b000, 0b0000, 0b0010, 0b110>;
+def : ROSysReg<"ID_MMFR5_EL1",       0b11, 0b000, 0b0000, 0b0011, 0b110>;

 // Trace registers
 //                                 Op0    Op1     CRn     CRm    Op2
@ -584,7 +746,7 @@ def : ROSysReg<"ID_AA64ZFR0_EL1",    0b11, 0b000, 0b0000, 0b0100, 0b100>;

 // v8.1a "Limited Ordering Regions" extension-specific system register
 //                         Op0    Op1     CRn     CRm    Op2
-let Requires = [{ {AArch64::HasV8_1aOps} }] in
+let Requires = [{ {AArch64::FeatureLOR} }] in
 def : ROSysReg<"LORID_EL1",  0b11, 0b000, 0b1010, 0b0100, 0b111>;

 // v8.2a "RAS extension" registers
@ -594,6 +756,35 @@ def : ROSysReg<"ERRIDR_EL1", 0b11, 0b000, 0b0101, 0b0011, 0b000>;
 def : ROSysReg<"ERXFR_EL1",  0b11, 0b000, 0b0101, 0b0100, 0b000>;
 }

+// v8.5a "random number" registers
+//                       Op0   Op1    CRn     CRm     Op2
+let Requires = [{ {AArch64::FeatureRandGen} }] in {
+def : ROSysReg<"RNDR",   0b11, 0b011, 0b0010, 0b0100, 0b000>;
+def : ROSysReg<"RNDRRS", 0b11, 0b011, 0b0010, 0b0100, 0b001>;
+}
+
+// v8.5a Software Context Number registers
+let Requires = [{ {AArch64::FeatureSpecRestrict} }] in {
+def : RWSysReg<"SCXTNUM_EL0", 0b11, 0b011, 0b1101, 0b0000, 0b111>;
+def : RWSysReg<"SCXTNUM_EL1", 0b11, 0b000, 0b1101, 0b0000, 0b111>;
+def : RWSysReg<"SCXTNUM_EL2", 0b11, 0b100, 0b1101, 0b0000, 0b111>;
+def : RWSysReg<"SCXTNUM_EL3", 0b11, 0b110, 0b1101, 0b0000, 0b111>;
+def : RWSysReg<"SCXTNUM_EL12", 0b11, 0b101, 0b1101, 0b0000, 0b111>;
+}
+
+// v9a Realm Management Extension registers
+let Requires = [{ {AArch64::FeatureRME} }] in {
+def : RWSysReg<"MFAR_EL3",  0b11, 0b110, 0b0110, 0b0000, 0b101>;
+def : RWSysReg<"GPCCR_EL3", 0b11, 0b110, 0b0010, 0b0001, 0b110>;
+def : RWSysReg<"GPTBR_EL3", 0b11, 0b110, 0b0010, 0b0001, 0b100>;
+}
+
+// v9-a Scalable Matrix Extension (SME) registers
+//                                 Op0   Op1    CRn     CRm     Op2
+let Requires = [{ {AArch64::FeatureSME} }] in {
+def : ROSysReg<"ID_AA64SMFR0_EL1", 0b11, 0b000, 0b0000, 0b0100, 0b101>;
+}
+
 //===----------------------
 // Write-only regs
 //===----------------------
@ -710,6 +901,9 @@ def : RWSysReg<"ACTLR_EL1",          0b11, 0b000, 0b0001, 0b0000, 0b001>;
 def : RWSysReg<"ACTLR_EL2",          0b11, 0b100, 0b0001, 0b0000, 0b001>;
 def : RWSysReg<"ACTLR_EL3",          0b11, 0b110, 0b0001, 0b0000, 0b001>;
 def : RWSysReg<"HCR_EL2",            0b11, 0b100, 0b0001, 0b0001, 0b000>;
+def : RWSysReg<"HCRX_EL2",           0b11, 0b100, 0b0001, 0b0010, 0b010> {
+  let Requires = [{ {AArch64::FeatureHCX} }];
+}
 def : RWSysReg<"SCR_EL3",            0b11, 0b110, 0b0001, 0b0001, 0b000>;
 def : RWSysReg<"MDCR_EL2",           0b11, 0b100, 0b0001, 0b0001, 0b001>;
 def : RWSysReg<"SDER32_EL3",         0b11, 0b110, 0b0001, 0b0001, 0b001>;
@ -719,13 +913,19 @@ def : RWSysReg<"HSTR_EL2",           0b11, 0b100, 0b0001, 0b0001, 0b011>;
 def : RWSysReg<"HACR_EL2",           0b11, 0b100, 0b0001, 0b0001, 0b111>;
 def : RWSysReg<"MDCR_EL3",           0b11, 0b110, 0b0001, 0b0011, 0b001>;
 def : RWSysReg<"TTBR0_EL1",          0b11, 0b000, 0b0010, 0b0000, 0b000>;
-def : RWSysReg<"TTBR0_EL2",          0b11, 0b100, 0b0010, 0b0000, 0b000>;
 def : RWSysReg<"TTBR0_EL3",          0b11, 0b110, 0b0010, 0b0000, 0b000>;
+
+let Requires = [{ {AArch64::FeatureEL2VMSA} }] in {
+def : RWSysReg<"TTBR0_EL2",          0b11, 0b100, 0b0010, 0b0000, 0b000> {
+  let AltName = "VSCTLR_EL2";
+}
+def : RWSysReg<"VTTBR_EL2",          0b11, 0b100, 0b0010, 0b0001, 0b000>;
+}
+
 def : RWSysReg<"TTBR1_EL1",          0b11, 0b000, 0b0010, 0b0000, 0b001>;
 def : RWSysReg<"TCR_EL1",            0b11, 0b000, 0b0010, 0b0000, 0b010>;
 def : RWSysReg<"TCR_EL2",            0b11, 0b100, 0b0010, 0b0000, 0b010>;
 def : RWSysReg<"TCR_EL3",            0b11, 0b110, 0b0010, 0b0000, 0b010>;
-def : RWSysReg<"VTTBR_EL2",          0b11, 0b100, 0b0010, 0b0001, 0b000>;
 def : RWSysReg<"VTCR_EL2",           0b11, 0b100, 0b0010, 0b0001, 0b010>;
 def : RWSysReg<"DACR32_EL2",         0b11, 0b100, 0b0011, 0b0000, 0b000>;
 def : RWSysReg<"SPSR_EL1",           0b11, 0b000, 0b0100, 0b0000, 0b000>;
@ -740,7 +940,7 @@ def : RWSysReg<"SP_EL2",             0b11, 0b110, 0b0100, 0b0001, 0b000>;
 def : RWSysReg<"SPSel",              0b11, 0b000, 0b0100, 0b0010, 0b000>;
 def : RWSysReg<"NZCV",               0b11, 0b011, 0b0100, 0b0010, 0b000>;
 def : RWSysReg<"DAIF",               0b11, 0b011, 0b0100, 0b0010, 0b001>;
-def : RWSysReg<"CurrentEL",          0b11, 0b000, 0b0100, 0b0010, 0b010>;
+def : ROSysReg<"CurrentEL",          0b11, 0b000, 0b0100, 0b0010, 0b010>;
 def : RWSysReg<"SPSR_irq",           0b11, 0b100, 0b0100, 0b0011, 0b000>;
 def : RWSysReg<"SPSR_abt",           0b11, 0b100, 0b0100, 0b0011, 0b001>;
 def : RWSysReg<"SPSR_und",           0b11, 0b100, 0b0100, 0b0011, 0b010>;
@ -777,6 +977,7 @@ def : RWSysReg<"PMUSERENR_EL0",      0b11, 0b011, 0b1001, 0b1110, 0b000>;
 def : RWSysReg<"PMINTENSET_EL1",     0b11, 0b000, 0b1001, 0b1110, 0b001>;
 def : RWSysReg<"PMINTENCLR_EL1",     0b11, 0b000, 0b1001, 0b1110, 0b010>;
 def : RWSysReg<"PMOVSSET_EL0",       0b11, 0b011, 0b1001, 0b1110, 0b011>;
+def : RWSysReg<"PMMIR_EL1",          0b11, 0b000, 0b1001, 0b1110, 0b110>;
 def : RWSysReg<"MAIR_EL1",           0b11, 0b000, 0b1010, 0b0010, 0b000>;
 def : RWSysReg<"MAIR_EL2",           0b11, 0b100, 0b1010, 0b0010, 0b000>;
 def : RWSysReg<"MAIR_EL3",           0b11, 0b110, 0b1010, 0b0010, 0b000>;
@ -1063,7 +1264,6 @@ def : RWSysReg<"ICC_SRE_EL3",        0b11, 0b110, 0b1100, 0b1100, 0b101>;
 def : RWSysReg<"ICC_IGRPEN0_EL1",    0b11, 0b000, 0b1100, 0b1100, 0b110>;
 def : RWSysReg<"ICC_IGRPEN1_EL1",    0b11, 0b000, 0b1100, 0b1100, 0b111>;
 def : RWSysReg<"ICC_IGRPEN1_EL3",    0b11, 0b110, 0b1100, 0b1100, 0b111>;
-def : RWSysReg<"ICC_SEIEN_EL1",      0b11, 0b000, 0b1100, 0b1101, 0b000>;
 def : RWSysReg<"ICC_AP0R0_EL1",      0b11, 0b000, 0b1100, 0b1000, 0b100>;
 def : RWSysReg<"ICC_AP0R1_EL1",      0b11, 0b000, 0b1100, 0b1000, 0b101>;
 def : RWSysReg<"ICC_AP0R2_EL1",      0b11, 0b000, 0b1100, 0b1000, 0b110>;
@ -1081,9 +1281,8 @@ def : RWSysReg<"ICH_AP1R1_EL2",      0b11, 0b100, 0b1100, 0b1001, 0b001>;
 def : RWSysReg<"ICH_AP1R2_EL2",      0b11, 0b100, 0b1100, 0b1001, 0b010>;
 def : RWSysReg<"ICH_AP1R3_EL2",      0b11, 0b100, 0b1100, 0b1001, 0b011>;
 def : RWSysReg<"ICH_HCR_EL2",        0b11, 0b100, 0b1100, 0b1011, 0b000>;
-def : RWSysReg<"ICH_MISR_EL2",       0b11, 0b100, 0b1100, 0b1011, 0b010>;
+def : ROSysReg<"ICH_MISR_EL2",       0b11, 0b100, 0b1100, 0b1011, 0b010>;
 def : RWSysReg<"ICH_VMCR_EL2",       0b11, 0b100, 0b1100, 0b1011, 0b111>;
-def : RWSysReg<"ICH_VSEIR_EL2",      0b11, 0b100, 0b1100, 0b1001, 0b100>;
 def : RWSysReg<"ICH_LR0_EL2",        0b11, 0b100, 0b1100, 0b1100, 0b000>;
 def : RWSysReg<"ICH_LR1_EL2",        0b11, 0b100, 0b1100, 0b1100, 0b001>;
 def : RWSysReg<"ICH_LR2_EL2",        0b11, 0b100, 0b1100, 0b1100, 0b010>;
@ -1101,24 +1300,74 @@ def : RWSysReg<"ICH_LR13_EL2",       0b11, 0b100, 0b1100, 0b1101, 0b101>;
 def : RWSysReg<"ICH_LR14_EL2",       0b11, 0b100, 0b1100, 0b1101, 0b110>;
 def : RWSysReg<"ICH_LR15_EL2",       0b11, 0b100, 0b1100, 0b1101, 0b111>;

+// v8r system registers
+let Requires = [{ {AArch64::HasV8_0rOps} }] in {
+//Virtualization System Control Register
+//                                 Op0   Op1    CRn     CRm     Op2
+def : RWSysReg<"VSCTLR_EL2",       0b11, 0b100, 0b0010, 0b0000, 0b000> {
+  let AltName = "TTBR0_EL2";
+}
+
+//MPU Type Register
+//                                 Op0   Op1    CRn     CRm     Op2
+def : RWSysReg<"MPUIR_EL1",        0b11, 0b000, 0b0000, 0b0000, 0b100>;
+def : RWSysReg<"MPUIR_EL2",        0b11, 0b100, 0b0000, 0b0000, 0b100>;
+
+//Protection Region Enable Register
+//                                 Op0   Op1    CRn     CRm     Op2
+def : RWSysReg<"PRENR_EL1",        0b11, 0b000, 0b0110, 0b0001, 0b001>;
+def : RWSysReg<"PRENR_EL2",        0b11, 0b100, 0b0110, 0b0001, 0b001>;
+
+//Protection Region Selection Register
+//                                 Op0   Op1    CRn     CRm     Op2
+def : RWSysReg<"PRSELR_EL1",       0b11, 0b000, 0b0110, 0b0010, 0b001>;
+def : RWSysReg<"PRSELR_EL2",       0b11, 0b100, 0b0110, 0b0010, 0b001>;
+
+//Protection Region Base Address Register
+//                                 Op0   Op1    CRn     CRm     Op2
+def : RWSysReg<"PRBAR_EL1",        0b11, 0b000, 0b0110, 0b1000, 0b000>;
+def : RWSysReg<"PRBAR_EL2",        0b11, 0b100, 0b0110, 0b1000, 0b000>;
+
+//Protection Region Limit Address Register
+//                                 Op0   Op1    CRn     CRm     Op2
+def : RWSysReg<"PRLAR_EL1",        0b11, 0b000, 0b0110, 0b1000, 0b001>;
+def : RWSysReg<"PRLAR_EL2",        0b11, 0b100, 0b0110, 0b1000, 0b001>;
+
+foreach n = 1-15 in {
+foreach x = 1-2 in {
+//Direct acces to Protection Region Base Address Register for n th MPU region
+  def : RWSysReg<!strconcat("PRBAR"#n, "_EL"#x),
+    0b11, 0b000, 0b0110, 0b1000, 0b000>{
+    let Encoding{5-2} = n;
+    let Encoding{13} = !add(x,-1);
+  }
+
+  def : RWSysReg<!strconcat("PRLAR"#n, "_EL"#x),
+    0b11, 0b000, 0b0110, 0b1000, 0b001>{
+    let Encoding{5-2} = n;
+    let Encoding{13} = !add(x,-1);
+  }
+} //foreach x = 1-2 in
+} //foreach n = 1-15 in
+} //let Requires = [{ {AArch64::HasV8_0rOps} }] in
+
 // v8.1a "Privileged Access Never" extension-specific system registers
-let Requires = [{ {AArch64::HasV8_1aOps} }] in
+let Requires = [{ {AArch64::FeaturePAN} }] in
 def : RWSysReg<"PAN", 0b11, 0b000, 0b0100, 0b0010, 0b011>;

 // v8.1a "Limited Ordering Regions" extension-specific system registers
 //                         Op0    Op1     CRn     CRm    Op2
-let Requires = [{ {AArch64::HasV8_1aOps} }] in {
+let Requires = [{ {AArch64::FeatureLOR} }] in {
 def : RWSysReg<"LORSA_EL1",  0b11, 0b000, 0b1010, 0b0100, 0b000>;
 def : RWSysReg<"LOREA_EL1",  0b11, 0b000, 0b1010, 0b0100, 0b001>;
 def : RWSysReg<"LORN_EL1",   0b11, 0b000, 0b1010, 0b0100, 0b010>;
 def : RWSysReg<"LORC_EL1",   0b11, 0b000, 0b1010, 0b0100, 0b011>;
 }

-// v8.1a "Virtualization hos extensions" system registers
+// v8.1a "Virtualization Host extensions" system registers
 //                              Op0    Op1     CRn     CRm    Op2
-let Requires = [{ {AArch64::HasV8_1aOps} }] in {
+let Requires = [{ {AArch64::FeatureVH} }] in {
 def : RWSysReg<"TTBR1_EL2",       0b11, 0b100, 0b0010, 0b0000, 0b001>;
-def : RWSysReg<"CONTEXTIDR_EL2",  0b11, 0b100, 0b1101, 0b0000, 0b001>;
 def : RWSysReg<"CNTHV_TVAL_EL2",  0b11, 0b100, 0b1110, 0b0011, 0b000>;
 def : RWSysReg<"CNTHV_CVAL_EL2",  0b11, 0b100, 0b1110, 0b0011, 0b010>;
 def : RWSysReg<"CNTHV_CTL_EL2",   0b11, 0b100, 0b1110, 0b0011, 0b001>;
@ -1144,10 +1393,13 @@ def : RWSysReg<"CNTV_CTL_EL02",   0b11, 0b101, 0b1110, 0b0011, 0b001>;
 def : RWSysReg<"CNTV_CVAL_EL02",  0b11, 0b101, 0b1110, 0b0011, 0b010>;
 def : RWSysReg<"SPSR_EL12",       0b11, 0b101, 0b0100, 0b0000, 0b000>;
 def : RWSysReg<"ELR_EL12",        0b11, 0b101, 0b0100, 0b0000, 0b001>;
+let Requires = [{ {AArch64::FeatureCONTEXTIDREL2} }] in {
+  def : RWSysReg<"CONTEXTIDR_EL2",  0b11, 0b100, 0b1101, 0b0000, 0b001>;
+}
 }
 // v8.2a registers
 //                  Op0    Op1     CRn     CRm    Op2
-let Requires = [{ {AArch64::HasV8_2aOps} }] in
+let Requires = [{ {AArch64::FeaturePsUAO} }] in
 def : RWSysReg<"UAO", 0b11, 0b000, 0b0100, 0b0010, 0b100>;

 // v8.2a "Statistical Profiling extension" registers
@ -1156,7 +1408,7 @@ let Requires = [{ {AArch64::FeatureSPE} }] in {
 def : RWSysReg<"PMBLIMITR_EL1", 0b11, 0b000, 0b1001, 0b1010, 0b000>;
 def : RWSysReg<"PMBPTR_EL1",    0b11, 0b000, 0b1001, 0b1010, 0b001>;
 def : RWSysReg<"PMBSR_EL1",     0b11, 0b000, 0b1001, 0b1010, 0b011>;
-def : RWSysReg<"PMBIDR_EL1",    0b11, 0b000, 0b1001, 0b1010, 0b111>;
+def : ROSysReg<"PMBIDR_EL1",    0b11, 0b000, 0b1001, 0b1010, 0b111>;
 def : RWSysReg<"PMSCR_EL2",     0b11, 0b100, 0b1001, 0b1001, 0b000>;
 def : RWSysReg<"PMSCR_EL12",    0b11, 0b101, 0b1001, 0b1001, 0b000>;
 def : RWSysReg<"PMSCR_EL1",     0b11, 0b000, 0b1001, 0b1001, 0b000>;
@ -1165,7 +1417,7 @@ def : RWSysReg<"PMSIRR_EL1",    0b11, 0b000, 0b1001, 0b1001, 0b011>;
 def : RWSysReg<"PMSFCR_EL1",    0b11, 0b000, 0b1001, 0b1001, 0b100>;
 def : RWSysReg<"PMSEVFR_EL1",   0b11, 0b000, 0b1001, 0b1001, 0b101>;
 def : RWSysReg<"PMSLATFR_EL1",  0b11, 0b000, 0b1001, 0b1001, 0b110>;
-def : RWSysReg<"PMSIDR_EL1",    0b11, 0b000, 0b1001, 0b1001, 0b111>;
+def : ROSysReg<"PMSIDR_EL1",    0b11, 0b000, 0b1001, 0b1001, 0b111>;
 }

 // v8.2a "RAS extension" registers
@ -1184,7 +1436,7 @@ def : RWSysReg<"VSESR_EL2",     0b11, 0b100, 0b0101, 0b0010, 0b011>;

 // v8.3a "Pointer authentication extension" registers
 //                              Op0    Op1     CRn     CRm    Op2
-let Requires = [{ {AArch64::HasV8_3aOps} }] in {
+let Requires = [{ {AArch64::FeaturePAuth} }] in {
 def : RWSysReg<"APIAKeyLo_EL1", 0b11, 0b000, 0b0010, 0b0001, 0b000>;
 def : RWSysReg<"APIAKeyHi_EL1", 0b11, 0b000, 0b0010, 0b0001, 0b001>;
 def : RWSysReg<"APIBKeyLo_EL1", 0b11, 0b000, 0b0010, 0b0001, 0b010>;
@ -1197,12 +1449,14 @@ def : RWSysReg<"APGAKeyLo_EL1", 0b11, 0b000, 0b0010, 0b0011, 0b000>;
 def : RWSysReg<"APGAKeyHi_EL1", 0b11, 0b000, 0b0010, 0b0011, 0b001>;
 }

-let Requires = [{ {AArch64::HasV8_4aOps} }] in {
-
+// v8.4 "Secure Exception Level 2 extension"
+let Requires = [{ {AArch64::FeatureSEL2} }] in {
 // v8.4a "Virtualization secure second stage translation" registers
 //                           Op0   Op1    CRn     CRm     Op2
 def : RWSysReg<"VSTCR_EL2" , 0b11, 0b100, 0b0010, 0b0110, 0b010>;
-def : RWSysReg<"VSTTBR_EL2", 0b11, 0b100, 0b0010, 0b0110, 0b000>;
+def : RWSysReg<"VSTTBR_EL2", 0b11, 0b100, 0b0010, 0b0110, 0b000> {
+  let Requires = [{ {AArch64::HasV8_0aOps} }];
+}

 // v8.4a "Virtualization timer" registers
 //                                Op0   Op1    CRn     CRm     Op2
@ -1216,18 +1470,19 @@ def : RWSysReg<"CNTHPS_CTL_EL2",  0b11, 0b100, 0b1110, 0b0101, 0b001>;
 // v8.4a "Virtualization debug state" registers
 //                           Op0   Op1    CRn     CRm     Op2
 def : RWSysReg<"SDER32_EL2", 0b11, 0b100, 0b0001, 0b0011, 0b001>;
+} // FeatureSEL2

 // v8.4a RAS registers
-//                              Op0   Op1    CRn     CRm    Op2
+//                              Op0   Op1    CRn     CRm     Op2
 def : RWSysReg<"ERXPFGCTL_EL1", 0b11, 0b000, 0b0101, 0b0100, 0b101>;
 def : RWSysReg<"ERXPFGCDN_EL1", 0b11, 0b000, 0b0101, 0b0100, 0b110>;
-def : RWSysReg<"ERXTS_EL1",     0b11, 0b000, 0b0101, 0b0101, 0b111>;
 def : RWSysReg<"ERXMISC2_EL1",  0b11, 0b000, 0b0101, 0b0101, 0b010>;
 def : RWSysReg<"ERXMISC3_EL1",  0b11, 0b000, 0b0101, 0b0101, 0b011>;
 def : ROSysReg<"ERXPFGF_EL1",   0b11, 0b000, 0b0101, 0b0100, 0b100>;

 // v8.4a MPAM registers
 //                             Op0   Op1    CRn     CRm     Op2
+let Requires = [{ {AArch64::FeatureMPAM} }] in {
 def : RWSysReg<"MPAM0_EL1",    0b11, 0b000, 0b1010, 0b0101, 0b001>;
 def : RWSysReg<"MPAM1_EL1",    0b11, 0b000, 0b1010, 0b0101, 0b000>;
 def : RWSysReg<"MPAM2_EL2",    0b11, 0b100, 0b1010, 0b0101, 0b000>;
@ -1244,9 +1499,11 @@ def : RWSysReg<"MPAMVPM5_EL2", 0b11, 0b100, 0b1010, 0b0110, 0b101>;
 def : RWSysReg<"MPAMVPM6_EL2", 0b11, 0b100, 0b1010, 0b0110, 0b110>;
 def : RWSysReg<"MPAMVPM7_EL2", 0b11, 0b100, 0b1010, 0b0110, 0b111>;
 def : ROSysReg<"MPAMIDR_EL1",  0b11, 0b000, 0b1010, 0b0100, 0b100>;
+} //FeatureMPAM

-// v8.4a Activitiy monitor registers
+// v8.4a Activity Monitor registers
 //                                 Op0   Op1    CRn     CRm     Op2
+let Requires = [{ {AArch64::FeatureAM} }] in {
 def : RWSysReg<"AMCR_EL0",         0b11, 0b011, 0b1101, 0b0010, 0b000>;
 def : ROSysReg<"AMCFGR_EL0",       0b11, 0b011, 0b1101, 0b0010, 0b001>;
 def : ROSysReg<"AMCGCR_EL0",       0b11, 0b011, 0b1101, 0b0010, 0b010>;
@ -1295,6 +1552,7 @@ def : RWSysReg<"AMEVTYPER112_EL0", 0b11, 0b011, 0b1101, 0b1111, 0b100>;
 def : RWSysReg<"AMEVTYPER113_EL0", 0b11, 0b011, 0b1101, 0b1111, 0b101>;
 def : RWSysReg<"AMEVTYPER114_EL0", 0b11, 0b011, 0b1101, 0b1111, 0b110>;
 def : RWSysReg<"AMEVTYPER115_EL0", 0b11, 0b011, 0b1101, 0b1111, 0b111>;
+} //FeatureAM

 // v8.4a Trace Extension registers
 //
@ -1303,19 +1561,24 @@ def : RWSysReg<"AMEVTYPER115_EL0", 0b11, 0b011, 0b1101, 0b1111, 0b111>;
 // but they are already defined above.
 //
 //                                 Op0   Op1    CRn     CRm     Op2
+let Requires = [{ {AArch64::FeatureTRACEV8_4} }] in {
 def : RWSysReg<"TRFCR_EL1",        0b11, 0b000, 0b0001, 0b0010, 0b001>;
 def : RWSysReg<"TRFCR_EL2",        0b11, 0b100, 0b0001, 0b0010, 0b001>;
 def : RWSysReg<"TRFCR_EL12",       0b11, 0b101, 0b0001, 0b0010, 0b001>;
+} //FeatureTRACEV8_4

-// v8.4a Timining insensitivity of data processing instructions
+// v8.4a Timing insensitivity of data processing instructions
+// DIT: Data Independent Timing instructions
 //                                 Op0   Op1    CRn     CRm     Op2
+let Requires = [{ {AArch64::FeatureDIT} }] in {
 def : RWSysReg<"DIT",              0b11, 0b011, 0b0100, 0b0010, 0b101>;
+} //FeatureDIT

 // v8.4a Enhanced Support for Nested Virtualization
 //                                 Op0   Op1    CRn     CRm     Op2
+let Requires = [{ {AArch64::FeatureNV} }] in {
 def : RWSysReg<"VNCR_EL2",         0b11, 0b100, 0b0010, 0b0010, 0b000>;
-
-} // HasV8_4aOps
+} //FeatureNV

 // SVE control registers
 //                                 Op0   Op1    CRn     CRm     Op2
@ -1326,7 +1589,131 @@ def : RWSysReg<"ZCR_EL3",          0b11, 0b110, 0b0001, 0b0010, 0b000>;
 def : RWSysReg<"ZCR_EL12",         0b11, 0b101, 0b0001, 0b0010, 0b000>;
 }

+// V8.5a Spectre mitigation SSBS register
+//                     Op0   Op1    CRn     CRm     Op2
+let Requires = [{ {AArch64::FeatureSSBS} }] in
+def : RWSysReg<"SSBS", 0b11, 0b011, 0b0100, 0b0010, 0b110>;
+
+// v8.5a Memory Tagging Extension
+//                                 Op0   Op1    CRn     CRm     Op2
+let Requires = [{ {AArch64::FeatureMTE} }] in {
+def : RWSysReg<"TCO",              0b11, 0b011, 0b0100, 0b0010, 0b111>;
+def : RWSysReg<"GCR_EL1",          0b11, 0b000, 0b0001, 0b0000, 0b110>;
+def : RWSysReg<"RGSR_EL1",         0b11, 0b000, 0b0001, 0b0000, 0b101>;
+def : RWSysReg<"TFSR_EL1",         0b11, 0b000, 0b0101, 0b0110, 0b000>;
+def : RWSysReg<"TFSR_EL2",         0b11, 0b100, 0b0101, 0b0110, 0b000>;
+def : RWSysReg<"TFSR_EL3",         0b11, 0b110, 0b0101, 0b0110, 0b000>;
+def : RWSysReg<"TFSR_EL12",        0b11, 0b101, 0b0101, 0b0110, 0b000>;
+def : RWSysReg<"TFSRE0_EL1",       0b11, 0b000, 0b0101, 0b0110, 0b001>;
+def : ROSysReg<"GMID_EL1",         0b11, 0b001, 0b0000, 0b0000, 0b100>;
+} // HasMTE
+
+// Embedded Trace Extension R/W System registers
+let Requires = [{ {AArch64::FeatureETE} }] in {
+//              Name            Op0   Op1    CRn     CRm     Op2
+def : RWSysReg<"TRCRSR",        0b10, 0b001, 0b0000, 0b1010, 0b000>;
+//  TRCEXTINSELR0 has the same encoding as ETM TRCEXTINSELR
+def : RWSysReg<"TRCEXTINSELR0", 0b10, 0b001, 0b0000, 0b1000, 0b100>;
+def : RWSysReg<"TRCEXTINSELR1", 0b10, 0b001, 0b0000, 0b1001, 0b100>;
+def : RWSysReg<"TRCEXTINSELR2", 0b10, 0b001, 0b0000, 0b1010, 0b100>;
+def : RWSysReg<"TRCEXTINSELR3", 0b10, 0b001, 0b0000, 0b1011, 0b100>;
+} // FeatureETE
+
+// Trace Buffer Extension System registers
+let Requires = [{ {AArch64::FeatureTRBE} }] in {
+//                   Name       Op0   Op1    CRn     CRm     Op2
+def : RWSysReg<"TRBLIMITR_EL1", 0b11, 0b000, 0b1001, 0b1011, 0b000>;
+def : RWSysReg<"TRBPTR_EL1",    0b11, 0b000, 0b1001, 0b1011, 0b001>;
+def : RWSysReg<"TRBBASER_EL1",  0b11, 0b000, 0b1001, 0b1011, 0b010>;
+def : RWSysReg<"TRBSR_EL1",     0b11, 0b000, 0b1001, 0b1011, 0b011>;
+def : RWSysReg<"TRBMAR_EL1",    0b11, 0b000, 0b1001, 0b1011, 0b100>;
+def : RWSysReg<"TRBTRG_EL1",    0b11, 0b000, 0b1001, 0b1011, 0b110>;
+def : ROSysReg<"TRBIDR_EL1",    0b11, 0b000, 0b1001, 0b1011, 0b111>;
+} // FeatureTRBE
+
+
+// v8.6a Activity Monitors Virtualization Support
+let Requires = [{ {AArch64::FeatureAMVS} }] in {
+foreach n = 0-15 in {
+  foreach x = 0-1 in {
+  def : RWSysReg<"AMEVCNTVOFF"#x#n#"_EL2",
+    0b11, 0b100, 0b1101, 0b1000, 0b000>{
+      let Encoding{4} = x;
+      let Encoding{3-0} = n;
+    }
+  }
+}
+}
+
+// v8.6a Fine Grained Virtualization Traps
+//                                 Op0   Op1    CRn     CRm     Op2
+let Requires = [{ {AArch64::FeatureFineGrainedTraps} }] in {
+def : RWSysReg<"HFGRTR_EL2",       0b11, 0b100, 0b0001, 0b0001, 0b100>;
+def : RWSysReg<"HFGWTR_EL2",       0b11, 0b100, 0b0001, 0b0001, 0b101>;
+def : RWSysReg<"HFGITR_EL2",       0b11, 0b100, 0b0001, 0b0001, 0b110>;
+def : RWSysReg<"HDFGRTR_EL2",      0b11, 0b100, 0b0011, 0b0001, 0b100>;
+def : RWSysReg<"HDFGWTR_EL2",      0b11, 0b100, 0b0011, 0b0001, 0b101>;
+}
+
+// v8.6a Enhanced Counter Virtualization
+//                                 Op0   Op1    CRn     CRm     Op2
+let Requires = [{ {AArch64::FeatureEnhancedCounterVirtualization} }] in {
+def : RWSysReg<"CNTSCALE_EL2",     0b11, 0b100, 0b1110, 0b0000, 0b100>;
+def : RWSysReg<"CNTISCALE_EL2",    0b11, 0b100, 0b1110, 0b0000, 0b101>;
+def : RWSysReg<"CNTPOFF_EL2",      0b11, 0b100, 0b1110, 0b0000, 0b110>;
+def : RWSysReg<"CNTVFRQ_EL2",      0b11, 0b100, 0b1110, 0b0000, 0b111>;
+def : RWSysReg<"CNTPCTSS_EL0",     0b11, 0b011, 0b1110, 0b0000, 0b101>;
+def : RWSysReg<"CNTVCTSS_EL0",     0b11, 0b011, 0b1110, 0b0000, 0b110>;
+}
+
+// v8.7a LD64B/ST64B Accelerator Extension system register
+let Requires = [{ {AArch64::FeatureLS64} }] in
+def : RWSysReg<"ACCDATA_EL1",       0b11, 0b000, 0b1101, 0b0000, 0b101>;
+
+// Branch Record Buffer system registers
+let Requires = [{ {AArch64::FeatureBRBE} }] in {
+def : RWSysReg<"BRBCR_EL1",         0b10, 0b001, 0b1001, 0b0000, 0b000>;
+def : RWSysReg<"BRBCR_EL12",        0b10, 0b101, 0b1001, 0b0000, 0b000>;
+def : RWSysReg<"BRBCR_EL2",         0b10, 0b100, 0b1001, 0b0000, 0b000>;
+def : RWSysReg<"BRBFCR_EL1",        0b10, 0b001, 0b1001, 0b0000, 0b001>;
+def : ROSysReg<"BRBIDR0_EL1",       0b10, 0b001, 0b1001, 0b0010, 0b000>;
+def : RWSysReg<"BRBINFINJ_EL1",     0b10, 0b001, 0b1001, 0b0001, 0b000>;
+def : RWSysReg<"BRBSRCINJ_EL1",     0b10, 0b001, 0b1001, 0b0001, 0b001>;
+def : RWSysReg<"BRBTGTINJ_EL1",     0b10, 0b001, 0b1001, 0b0001, 0b010>;
+def : RWSysReg<"BRBTS_EL1",         0b10, 0b001, 0b1001, 0b0000, 0b010>;
+foreach n = 0-31 in {
+  defvar nb = !cast<bits<5>>(n);
+  def : ROSysReg<"BRBINF"#n#"_EL1", 0b10, 0b001, 0b1000, nb{3-0}, {nb{4},0b00}>;
+  def : ROSysReg<"BRBSRC"#n#"_EL1", 0b10, 0b001, 0b1000, nb{3-0}, {nb{4},0b01}>;
+  def : ROSysReg<"BRBTGT"#n#"_EL1", 0b10, 0b001, 0b1000, nb{3-0}, {nb{4},0b10}>;
+}
+}
+
+// Statistical Profiling Extension system register
+let Requires = [{ {AArch64::FeatureSPE_EEF} }] in
+def : RWSysReg<"PMSNEVFR_EL1",      0b11, 0b000, 0b1001, 0b1001, 0b001>;
+
 // Cyclone specific system registers
 //                                 Op0    Op1     CRn     CRm    Op2
-let Requires = [{ {AArch64::ProcCyclone} }] in
+let Requires = [{ {AArch64::FeatureAppleA7SysReg} }] in
 def : RWSysReg<"CPM_IOACC_CTL_EL3", 0b11, 0b111, 0b1111, 0b0010, 0b000>;
+
+// Scalable Matrix Extension (SME)
+//                                 Op0   Op1    CRn     CRm     Op2
+let Requires = [{ {AArch64::FeatureSME} }] in {
+def : RWSysReg<"SMCR_EL1",         0b11, 0b000, 0b0001, 0b0010, 0b110>;
+def : RWSysReg<"SMCR_EL2",         0b11, 0b100, 0b0001, 0b0010, 0b110>;
+def : RWSysReg<"SMCR_EL3",         0b11, 0b110, 0b0001, 0b0010, 0b110>;
+def : RWSysReg<"SMCR_EL12",        0b11, 0b101, 0b0001, 0b0010, 0b110>;
+def : RWSysReg<"SVCR",             0b11, 0b011, 0b0100, 0b0010, 0b010>;
+def : RWSysReg<"SMPRI_EL1",        0b11, 0b000, 0b0001, 0b0010, 0b100>;
+def : RWSysReg<"SMPRIMAP_EL2",     0b11, 0b100, 0b0001, 0b0010, 0b101>;
+def : ROSysReg<"SMIDR_EL1",        0b11, 0b001, 0b0000, 0b0000, 0b110>;
+def : RWSysReg<"TPIDR2_EL0",       0b11, 0b011, 0b1101, 0b0000, 0b101>;
+} // HasSME
+
+// v8.4a MPAM and SME registers
+//                              Op0   Op1    CRn     CRm     Op2
+let Requires = [{ {AArch64::FeatureMPAM, AArch64::FeatureSME} }] in {
+def : RWSysReg<"MPAMSM_EL1",    0b11, 0b000, 0b1010, 0b0101, 0b011>;
+} // HasMPAM, HasSME
--- a/suite/synctools/tablegen/AArch64/SMEInstrFormats.td
+++ b/suite/synctools/tablegen/AArch64/SMEInstrFormats.td
@ -0,0 +1,726 @@
+//=-- SMEInstrFormats.td -  AArch64 SME Instruction classes -*- tablegen -*--=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// AArch64 Scalable Matrix Extension (SME) Instruction Class Definitions.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// SME Outer Products
+//===----------------------------------------------------------------------===//
+
+class sme_fp_outer_product_inst<bit S, bit sz, MatrixTileOperand za_ty,
+                                ZPRRegOp zpr_ty, string mnemonic>
+    : I<(outs za_ty:$ZAda),
+        (ins PPR3bAny:$Pn, PPR3bAny:$Pm, zpr_ty:$Zn, zpr_ty:$Zm),
+        mnemonic, "\t$ZAda, $Pn/m, $Pm/m, $Zn, $Zm",
+        "", []>,
+      Sched<[]> {
+  bits<5> Zm;
+  bits<3> Pm;
+  bits<3> Pn;
+  bits<5> Zn;
+  let Inst{31-23} = 0b100000001;
+  let Inst{22}    = sz;
+  let Inst{21}    = 0b0;
+  let Inst{20-16} = Zm;
+  let Inst{15-13} = Pm;
+  let Inst{12-10} = Pn;
+  let Inst{9-5}   = Zn;
+  let Inst{4}     = S;
+  let Inst{3}     = 0b0;
+}
+
+class sme_outer_product_fp32<bit S, string mnemonic>
+    : sme_fp_outer_product_inst<S, 0b0, TileOp32, ZPR32, mnemonic> {
+  bits<2> ZAda;
+  let Inst{1-0} = ZAda;
+  let Inst{2}   = 0b0;
+}
+
+class sme_outer_product_fp64<bit S, string mnemonic>
+    : sme_fp_outer_product_inst<S, 0b1, TileOp64, ZPR64, mnemonic> {
+  bits<3> ZAda;
+  let Inst{2-0} = ZAda;
+}
+
+class sme_int_outer_product_inst<bit u0, bit u1, bit S, bit sz,
+                                 MatrixTileOperand za_ty, ZPRRegOp zpr_ty,
+                                 string mnemonic>
+    : I<(outs za_ty:$ZAda),
+        (ins PPR3bAny:$Pn, PPR3bAny:$Pm, zpr_ty:$Zn, zpr_ty:$Zm),
+        mnemonic, "\t$ZAda, $Pn/m, $Pm/m, $Zn, $Zm",
+        "", []>,
+      Sched<[]> {
+  bits<5> Zm;
+  bits<3> Pm;
+  bits<3> Pn;
+  bits<5> Zn;
+  let Inst{31-25} = 0b1010000;
+  let Inst{24}    = u0;
+  let Inst{23}    = 0b1;
+  let Inst{22}    = sz;
+  let Inst{21}    = u1;
+  let Inst{20-16} = Zm;
+  let Inst{15-13} = Pm;
+  let Inst{12-10} = Pn;
+  let Inst{9-5}   = Zn;
+  let Inst{4}     = S;
+  let Inst{3}     = 0b0;
+}
+
+class sme_int_outer_product_i32<bits<3> opc, string mnemonic>
+    : sme_int_outer_product_inst<opc{2}, opc{1}, opc{0}, 0b0, TileOp32, ZPR8,
+                                 mnemonic> {
+  bits<2> ZAda;
+  let Inst{1-0} = ZAda;
+  let Inst{2}   = 0b0;
+}
+
+class sme_int_outer_product_i64<bits<3> opc, string mnemonic>
+    : sme_int_outer_product_inst<opc{2}, opc{1}, opc{0}, 0b1, TileOp64, ZPR16,
+                                 mnemonic> {
+  bits<3> ZAda;
+  let Inst{2-0} = ZAda;
+}
+
+class sme_outer_product_widening_inst<bit op, bit S, string mnemonic>
+    : I<(outs TileOp32:$ZAda),
+        (ins PPR3bAny:$Pn, PPR3bAny:$Pm, ZPR16:$Zn, ZPR16:$Zm),
+        mnemonic, "\t$ZAda, $Pn/m, $Pm/m, $Zn, $Zm",
+        "", []>,
+      Sched<[]> {
+  bits<5> Zm;
+  bits<3> Pm;
+  bits<3> Pn;
+  bits<5> Zn;
+  bits<2> ZAda;
+  let Inst{31-22} = 0b1000000110;
+  let Inst{21}    = op;
+  let Inst{20-16} = Zm;
+  let Inst{15-13} = Pm;
+  let Inst{12-10} = Pn;
+  let Inst{9-5}   = Zn;
+  let Inst{4}     = S;
+  let Inst{3-2}   = 0b00;
+  let Inst{1-0}   = ZAda;
+}
+
+multiclass sme_bf16_outer_product<bit S, string mnemonic> {
+  def : sme_outer_product_widening_inst<0b0, S, mnemonic>;
+}
+
+multiclass sme_f16_outer_product<bit S, string mnemonic> {
+  def : sme_outer_product_widening_inst<0b1, S, mnemonic>;
+}
+
+//===----------------------------------------------------------------------===//
+// SME Add Vector to Tile
+//===----------------------------------------------------------------------===//
+
+class sme_add_vector_to_tile_inst<bit op, bit V, MatrixTileOperand tile_ty,
+                                  ZPRRegOp zpr_ty, string mnemonic>
+    : I<(outs tile_ty:$ZAda),
+        (ins PPR3bAny:$Pn, PPR3bAny:$Pm, zpr_ty:$Zn),
+        mnemonic, "\t$ZAda, $Pn/m, $Pm/m, $Zn",
+        "", []>, Sched<[]> {
+  bits<3> Pm;
+  bits<3> Pn;
+  bits<5> Zn;
+  let Inst{31-23} = 0b110000001;
+  let Inst{22}    = op;
+  let Inst{21-17} = 0b01000;
+  let Inst{16}    = V;
+  let Inst{15-13} = Pm;
+  let Inst{12-10} = Pn;
+  let Inst{9-5}   = Zn;
+  let Inst{4-3}   = 0b00;
+}
+
+class sme_add_vector_to_tile_u32<bit V, string mnemonic>
+    : sme_add_vector_to_tile_inst<0b0, V, TileOp32, ZPR32, mnemonic> {
+  bits<2> ZAda;
+  let Inst{2}   = 0b0;
+  let Inst{1-0} = ZAda;
+}
+
+class sme_add_vector_to_tile_u64<bit V, string mnemonic>
+    : sme_add_vector_to_tile_inst<0b1, V, TileOp64, ZPR64, mnemonic> {
+  bits<3> ZAda;
+  let Inst{2-0} = ZAda;
+}
+
+//===----------------------------------------------------------------------===//
+// SME Contiguous Loads
+//===----------------------------------------------------------------------===//
+
+class sme_mem_ld_ss_base<bit Q, bit V, bits<2> msz, dag outs, dag ins,
+                         string mnemonic, string argstr>
+    : I<outs, ins, mnemonic, argstr, "", []>, Sched<[]> {
+  bits<5> Rm;
+  bits<2> Rv;
+  bits<3> Pg;
+  bits<5> Rn;
+  let Inst{31-25} = 0b1110000;
+  let Inst{24}    = Q;
+  let Inst{23-22} = msz;
+  let Inst{21}    = 0b0;
+  let Inst{20-16} = Rm;
+  let Inst{15}    = V;
+  let Inst{14-13} = Rv;
+  let Inst{12-10} = Pg;
+  let Inst{9-5}   = Rn;
+  let Inst{4}     = 0b0;
+
+  let mayLoad = 1;
+}
+
+class sme_mem_ld_ss_inst<bit Q, bits<2> msz, string mnemonic,
+                         MatrixTileVectorOperand tile_ty, bit is_col,
+                         Operand imm_ty, RegisterOperand gpr_ty>
+    : sme_mem_ld_ss_base<
+        Q, is_col, msz, (outs tile_ty:$ZAt),
+        (ins MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm, PPR3bAny:$Pg, GPR64sp:$Rn,
+             gpr_ty:$Rm),
+        mnemonic, "\t\\{$ZAt[$Rv, $imm]\\}, $Pg/z, [$Rn, $Rm]">;
+
+multiclass sme_mem_ss_aliases_base<string mnemonic, Instruction inst,
+                                   MatrixTileVectorOperand tile_ty,
+                                   Operand imm_ty, RegisterOperand gpr_ty,
+                                   string pg_suffix=""> {
+  def : InstAlias<mnemonic # "\t$ZAt[$Rv, $imm], $Pg" # pg_suffix # ", [$Rn, $Rm]",
+                  (inst tile_ty:$ZAt, MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm, PPR3bAny:$Pg, GPR64sp:$Rn, gpr_ty:$Rm), 0>;
+  // Default XZR offset aliases
+  def : InstAlias<mnemonic # "\t\\{$ZAt[$Rv, $imm]\\}, $Pg" # pg_suffix # ", [$Rn]",
+                  (inst tile_ty:$ZAt, MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm, PPR3bAny:$Pg, GPR64sp:$Rn, XZR), 1>;
+  def : InstAlias<mnemonic # "\t$ZAt[$Rv, $imm], $Pg" # pg_suffix # ", [$Rn]",
+                  (inst tile_ty:$ZAt, MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm, PPR3bAny:$Pg, GPR64sp:$Rn, XZR), 0>;
+}
+
+multiclass sme_mem_ss_aliases<string mnemonic, string inst, bit is_col,
+                              string pg_suffix=""> {
+  defm : sme_mem_ss_aliases_base<mnemonic # "b", !cast<Instruction>(inst # _B),
+                                 !if(is_col, TileVectorOpV8, TileVectorOpH8),
+                                 sme_elm_idx0_15, GPR64shifted8, pg_suffix>;
+  defm : sme_mem_ss_aliases_base<mnemonic # "h", !cast<Instruction>(inst # _H),
+                                 !if(is_col, TileVectorOpV16, TileVectorOpH16),
+                                 sme_elm_idx0_7, GPR64shifted16, pg_suffix>;
+  defm : sme_mem_ss_aliases_base<mnemonic # "w", !cast<Instruction>(inst # _S),
+                                 !if(is_col, TileVectorOpV32, TileVectorOpH32),
+                                 sme_elm_idx0_3, GPR64shifted32, pg_suffix>;
+  defm : sme_mem_ss_aliases_base<mnemonic # "d", !cast<Instruction>(inst # _D),
+                                 !if(is_col, TileVectorOpV64, TileVectorOpH64),
+                                 sme_elm_idx0_1, GPR64shifted64, pg_suffix>;
+  defm : sme_mem_ss_aliases_base<mnemonic # "q", !cast<Instruction>(inst # _Q),
+                                 !if(is_col, TileVectorOpV128, TileVectorOpH128),
+                                 sme_elm_idx0_0, GPR64shifted128, pg_suffix>;
+}
+
+multiclass sme_mem_ld_ss_aliases<string inst, bit is_col> {
+  defm NAME : sme_mem_ss_aliases<"ld1", inst, is_col, "/z">;
+}
+
+multiclass sme_mem_ld_v_ss<string mnemonic, bit is_col> {
+  def _B : sme_mem_ld_ss_inst<0b0, 0b00, mnemonic # "b",
+                              !if(is_col, TileVectorOpV8, TileVectorOpH8),
+                              is_col, sme_elm_idx0_15, GPR64shifted8> {
+    bits<4> imm;
+    let Inst{3-0} = imm;
+  }
+  def _H : sme_mem_ld_ss_inst<0b0, 0b01, mnemonic # "h",
+                              !if(is_col, TileVectorOpV16, TileVectorOpH16),
+                              is_col, sme_elm_idx0_7, GPR64shifted16> {
+    bits<1> ZAt;
+    bits<3> imm;
+    let Inst{3}   = ZAt;
+    let Inst{2-0} = imm;
+  }
+  def _S : sme_mem_ld_ss_inst<0b0, 0b10, mnemonic # "w",
+                              !if(is_col, TileVectorOpV32, TileVectorOpH32),
+                              is_col, sme_elm_idx0_3, GPR64shifted32> {
+    bits<2> ZAt;
+    bits<2> imm;
+    let Inst{3-2} = ZAt;
+    let Inst{1-0} = imm;
+  }
+  def _D : sme_mem_ld_ss_inst<0b0, 0b11, mnemonic # "d",
+                              !if(is_col, TileVectorOpV64, TileVectorOpH64),
+                              is_col, sme_elm_idx0_1, GPR64shifted64> {
+    bits<3> ZAt;
+    bits<1> imm;
+    let Inst{3-1} = ZAt;
+    let Inst{0}   = imm;
+  }
+  def _Q : sme_mem_ld_ss_inst<0b1, 0b11, mnemonic # "q",
+                              !if(is_col, TileVectorOpV128, TileVectorOpH128),
+                              is_col, sme_elm_idx0_0, GPR64shifted128> {
+    bits<4> ZAt;
+    let Inst{3-0} = ZAt;
+  }
+
+  defm : sme_mem_ld_ss_aliases<NAME, is_col>;
+}
+
+multiclass sme_mem_ld_ss<string mnemonic> {
+  defm _H : sme_mem_ld_v_ss<mnemonic, /*is_col=*/0b0>;
+  defm _V : sme_mem_ld_v_ss<mnemonic, /*is_col=*/0b1>;
+}
+
+//===----------------------------------------------------------------------===//
+// SME Contiguous Stores
+//===----------------------------------------------------------------------===//
+
+class sme_mem_st_ss_base<bit Q, bit V, bits<2> msz, dag ins,
+                         string mnemonic, string argstr>
+    : I<(outs), ins, mnemonic, argstr, "", []>, Sched<[]> {
+  bits<5> Rm;
+  bits<2> Rv;
+  bits<3> Pg;
+  bits<5> Rn;
+  let Inst{31-25} = 0b1110000;
+  let Inst{24}    = Q;
+  let Inst{23-22} = msz;
+  let Inst{21}    = 0b1;
+  let Inst{20-16} = Rm;
+  let Inst{15}    = V;
+  let Inst{14-13} = Rv;
+  let Inst{12-10} = Pg;
+  let Inst{9-5}   = Rn;
+  let Inst{4}     = 0b0;
+
+  let mayStore = 1;
+  let hasSideEffects = 1;
+}
+
+class sme_mem_st_ss_inst<bit Q, bits<2> msz, string mnemonic,
+                         MatrixTileVectorOperand tile_ty, bit is_col,
+                         Operand imm_ty, RegisterOperand gpr_ty>
+    : sme_mem_st_ss_base<
+        Q, is_col, msz,
+        (ins tile_ty:$ZAt, MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm, PPR3bAny:$Pg,
+             GPR64sp:$Rn, gpr_ty:$Rm),
+        mnemonic, "\t\\{$ZAt[$Rv, $imm]\\}, $Pg, [$Rn, $Rm]">;
+
+multiclass sme_mem_st_ss_aliases<string inst, bit is_col> {
+  defm NAME : sme_mem_ss_aliases<"st1", inst, is_col>;
+}
+
+multiclass sme_mem_st_v_ss<string mnemonic, bit is_col> {
+  def _B : sme_mem_st_ss_inst<0b0, 0b00, mnemonic # "b",
+                              !if(is_col, TileVectorOpV8, TileVectorOpH8),
+                              is_col, sme_elm_idx0_15, GPR64shifted8> {
+    bits<4> imm;
+    let Inst{3-0} = imm;
+  }
+  def _H : sme_mem_st_ss_inst<0b0, 0b01, mnemonic # "h",
+                              !if(is_col, TileVectorOpV16, TileVectorOpH16),
+                              is_col, sme_elm_idx0_7, GPR64shifted16> {
+    bits<1> ZAt;
+    bits<3> imm;
+    let Inst{3}   = ZAt;
+    let Inst{2-0} = imm;
+  }
+  def _S : sme_mem_st_ss_inst<0b0, 0b10, mnemonic # "w",
+                              !if(is_col, TileVectorOpV32, TileVectorOpH32),
+                              is_col, sme_elm_idx0_3, GPR64shifted32> {
+    bits<2> ZAt;
+    bits<2> imm;
+    let Inst{3-2} = ZAt;
+    let Inst{1-0} = imm;
+  }
+  def _D : sme_mem_st_ss_inst<0b0, 0b11, mnemonic # "d",
+                              !if(is_col, TileVectorOpV64, TileVectorOpH64),
+                              is_col, sme_elm_idx0_1, GPR64shifted64> {
+    bits<3> ZAt;
+    bits<1> imm;
+    let Inst{3-1} = ZAt;
+    let Inst{0}   = imm;
+  }
+  def _Q : sme_mem_st_ss_inst<0b1, 0b11, mnemonic # "q",
+                              !if(is_col, TileVectorOpV128, TileVectorOpH128),
+                              is_col, sme_elm_idx0_0, GPR64shifted128> {
+    bits<4> ZAt;
+    let Inst{3-0} = ZAt;
+  }
+
+  defm : sme_mem_st_ss_aliases<NAME, is_col>;
+}
+
+multiclass sme_mem_st_ss<string mnemonic> {
+  defm _H : sme_mem_st_v_ss<mnemonic, /*is_col=*/0b0>;
+  defm _V : sme_mem_st_v_ss<mnemonic, /*is_col=*/0b1>;
+}
+
+//===----------------------------------------------------------------------===//
+// SME Save and Restore Array
+//===----------------------------------------------------------------------===//
+
+class sme_spill_fill_inst<bit isStore, dag outs, dag ins, string opcodestr>
+    : I<outs, ins, opcodestr, "\t$ZAt[$Rv, $imm4], [$Rn, $offset, mul vl]", "",
+        []>,
+      Sched<[]> {
+  bits<2> Rv;
+  bits<5> Rn;
+  bits<4> imm4;
+  let Inst{31-22} = 0b1110000100;
+  let Inst{21}    = isStore;
+  let Inst{20-15} = 0b000000;
+  let Inst{14-13} = Rv;
+  let Inst{12-10} = 0b000;
+  let Inst{9-5}   = Rn;
+  let Inst{4}     = 0b0;
+  let Inst{3-0}   = imm4;
+
+  let mayLoad = !not(isStore);
+  let mayStore = isStore;
+}
+
+multiclass sme_spill_fill<bit isStore, dag outs, dag ins, string opcodestr> {
+  def NAME : sme_spill_fill_inst<isStore, outs, ins, opcodestr>;
+
+  def : InstAlias<opcodestr # "\t$ZAt[$Rv, $imm4], [$Rn]",
+                  (!cast<Instruction>(NAME) MatrixOp:$ZAt,
+                   MatrixIndexGPR32Op12_15:$Rv, sme_elm_idx0_15:$imm4, GPR64sp:$Rn, 0), 1>;
+}
+
+multiclass sme_spill<string opcodestr> {
+  defm NAME : sme_spill_fill<0b1, (outs),
+                             (ins MatrixOp:$ZAt, MatrixIndexGPR32Op12_15:$Rv,
+                                  sme_elm_idx0_15:$imm4, GPR64sp:$Rn,
+                                  imm0_15:$offset),
+                             opcodestr>;
+}
+
+multiclass sme_fill<string opcodestr> {
+  defm NAME : sme_spill_fill<0b0, (outs MatrixOp:$ZAt),
+                             (ins MatrixIndexGPR32Op12_15:$Rv,
+                                  sme_elm_idx0_15:$imm4, GPR64sp:$Rn,
+                                  imm0_15:$offset),
+                             opcodestr>;
+}
+
+//===----------------------------------------------------------------------===//
+// Move instructions
+//===----------------------------------------------------------------------===//
+
+class sme_vector_to_tile_base<bit Q, bit V, bits<2> sz, dag outs, dag ins,
+                              string mnemonic, string argstr>
+    : I<outs, ins, mnemonic, argstr, "", []>, Sched<[]> {
+  bits<2> Rv;
+  bits<3> Pg;
+  bits<5> Zn;
+  let Inst{31-24} = 0b11000000;
+  let Inst{23-22} = sz;
+  let Inst{21-17} = 0b00000;
+  let Inst{16}    = Q;
+  let Inst{15}    = V;
+  let Inst{14-13} = Rv;
+  let Inst{12-10} = Pg;
+  let Inst{9-5}   = Zn;
+  let Inst{4}     = 0b0;
+}
+
+class sme_vector_to_tile_inst<bit Q, bits<2> sz, MatrixTileVectorOperand tile_ty,
+                              bit is_col, Operand imm_ty, ZPRRegOp zpr_ty,
+                              string mnemonic>
+    : sme_vector_to_tile_base<Q, is_col, sz, (outs tile_ty:$ZAd),
+        (ins MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm, PPR3bAny:$Pg, zpr_ty:$Zn),
+        mnemonic, "\t$ZAd[$Rv, $imm], $Pg/m, $Zn">;
+
+multiclass sme_vector_to_tile_aliases<Instruction inst,
+                                      MatrixTileVectorOperand tile_ty,
+                                      ZPRRegOp zpr_ty, Operand imm_ty> {
+  def : InstAlias<"mov\t$ZAd[$Rv, $imm], $Pg/m, $Zn",
+                  (inst tile_ty:$ZAd, MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm, PPR3bAny:$Pg, zpr_ty:$Zn), 1>;
+}
+
+multiclass sme_vector_v_to_tile<string mnemonic, bit is_col> {
+  def _B : sme_vector_to_tile_inst<0b0, 0b00, !if(is_col, TileVectorOpV8,
+                                                          TileVectorOpH8),
+                                   is_col, sme_elm_idx0_15, ZPR8, mnemonic> {
+    bits<4> imm;
+    let Inst{3-0} = imm;
+  }
+  def _H : sme_vector_to_tile_inst<0b0, 0b01, !if(is_col, TileVectorOpV16,
+                                                          TileVectorOpH16),
+                                   is_col, sme_elm_idx0_7, ZPR16, mnemonic> {
+    bits<1> ZAd;
+    bits<3> imm;
+    let Inst{3}   = ZAd;
+    let Inst{2-0} = imm;
+  }
+  def _S : sme_vector_to_tile_inst<0b0, 0b10, !if(is_col, TileVectorOpV32,
+                                                          TileVectorOpH32),
+                                   is_col, sme_elm_idx0_3, ZPR32, mnemonic> {
+    bits<2> ZAd;
+    bits<2> imm;
+    let Inst{3-2} = ZAd;
+    let Inst{1-0} = imm;
+  }
+  def _D : sme_vector_to_tile_inst<0b0, 0b11, !if(is_col, TileVectorOpV64,
+                                                          TileVectorOpH64),
+                                   is_col, sme_elm_idx0_1, ZPR64, mnemonic> {
+    bits<3> ZAd;
+    bits<1> imm;
+    let Inst{3-1} = ZAd;
+    let Inst{0}   = imm;
+  }
+  def _Q : sme_vector_to_tile_inst<0b1, 0b11, !if(is_col, TileVectorOpV128,
+                                                          TileVectorOpH128),
+                                   is_col, sme_elm_idx0_0, ZPR128, mnemonic> {
+    bits<4> ZAd;
+    bits<1> imm;
+    let Inst{3-0} = ZAd;
+  }
+
+  defm : sme_vector_to_tile_aliases<!cast<Instruction>(NAME # _B),
+                                    !if(is_col, TileVectorOpV8,
+                                                TileVectorOpH8),
+                                    ZPR8, sme_elm_idx0_15>;
+  defm : sme_vector_to_tile_aliases<!cast<Instruction>(NAME # _H),
+                                    !if(is_col, TileVectorOpV16,
+                                                TileVectorOpH16),
+                                    ZPR16, sme_elm_idx0_7>;
+  defm : sme_vector_to_tile_aliases<!cast<Instruction>(NAME # _S),
+                                    !if(is_col, TileVectorOpV32,
+                                                TileVectorOpH32),
+                                    ZPR32, sme_elm_idx0_3>;
+  defm : sme_vector_to_tile_aliases<!cast<Instruction>(NAME # _D),
+                                    !if(is_col, TileVectorOpV64,
+                                                TileVectorOpH64),
+                                    ZPR64, sme_elm_idx0_1>;
+  defm : sme_vector_to_tile_aliases<!cast<Instruction>(NAME # _Q),
+                                    !if(is_col, TileVectorOpV128,
+                                                TileVectorOpH128),
+                                    ZPR128, sme_elm_idx0_0>;
+}
+
+multiclass sme_vector_to_tile<string mnemonic> {
+  defm _H : sme_vector_v_to_tile<mnemonic, /*is_col=*/0b0>;
+  defm _V : sme_vector_v_to_tile<mnemonic, /*is_col=*/0b1>;
+}
+
+class sme_tile_to_vector_base<bit Q, bit V, bits<2> sz, dag outs, dag ins,
+                              string mnemonic, string argstr>
+    : I<outs, ins, mnemonic, argstr, "", []>, Sched<[]> {
+  bits<2> Rv;
+  bits<3> Pg;
+  bits<5> Zd;
+  let Inst{31-24} = 0b11000000;
+  let Inst{23-22} = sz;
+  let Inst{21-17} = 0b00001;
+  let Inst{16}    = Q;
+  let Inst{15}    = V;
+  let Inst{14-13} = Rv;
+  let Inst{12-10} = Pg;
+  let Inst{9}     = 0b0;
+  let Inst{4-0}   = Zd;
+}
+
+class sme_tile_to_vector_inst<bit Q, bits<2> sz, ZPRRegOp zpr_ty,
+                              MatrixTileVectorOperand tile_ty,
+                              bit is_col, Operand imm_ty, string mnemonic>
+    : sme_tile_to_vector_base<Q, is_col, sz, (outs zpr_ty:$Zd),
+        (ins PPR3bAny:$Pg, tile_ty:$ZAn, MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm),
+        mnemonic, "\t$Zd, $Pg/m, $ZAn[$Rv, $imm]">;
+
+multiclass sme_tile_to_vector_aliases<Instruction inst, ZPRRegOp zpr_ty,
+                                      MatrixTileVectorOperand tile_ty,
+                                      Operand imm_ty > {
+  def : InstAlias<"mov\t$Zd, $Pg/m, $ZAn[$Rv, $imm]",
+                  (inst zpr_ty:$Zd, PPR3bAny:$Pg, tile_ty:$ZAn, MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm), 1>;
+}
+
+multiclass sme_tile_to_vector_v<string mnemonic, bit is_col> {
+  def _B : sme_tile_to_vector_inst<0b0, 0b00, ZPR8, !if(is_col, TileVectorOpV8,
+                                                                TileVectorOpH8),
+                                   is_col, sme_elm_idx0_15, mnemonic> {
+    bits<4> imm;
+    let Inst{8-5} = imm;
+  }
+  def _H : sme_tile_to_vector_inst<0b0, 0b01, ZPR16, !if(is_col, TileVectorOpV16,
+                                                                 TileVectorOpH16),
+                                   is_col, sme_elm_idx0_7, mnemonic> {
+    bits<1> ZAn;
+    bits<3> imm;
+    let Inst{8}   = ZAn;
+    let Inst{7-5} = imm;
+  }
+  def _S : sme_tile_to_vector_inst<0b0, 0b10, ZPR32, !if(is_col, TileVectorOpV32,
+                                                                 TileVectorOpH32),
+                                   is_col, sme_elm_idx0_3, mnemonic> {
+    bits<2> ZAn;
+    bits<2> imm;
+    let Inst{8-7} = ZAn;
+    let Inst{6-5} = imm;
+  }
+  def _D : sme_tile_to_vector_inst<0b0, 0b11, ZPR64, !if(is_col, TileVectorOpV64,
+                                                                 TileVectorOpH64),
+                                   is_col, sme_elm_idx0_1, mnemonic> {
+    bits<3> ZAn;
+    bits<1> imm;
+    let Inst{8-6} = ZAn;
+    let Inst{5}   = imm;
+  }
+  def _Q : sme_tile_to_vector_inst<0b1, 0b11, ZPR128, !if(is_col, TileVectorOpV128,
+                                                                  TileVectorOpH128),
+                                   is_col, sme_elm_idx0_0, mnemonic> {
+    bits<4> ZAn;
+    let Inst{8-5} = ZAn;
+  }
+
+  defm : sme_tile_to_vector_aliases<!cast<Instruction>(NAME # _B), ZPR8,
+                                    !if(is_col, TileVectorOpV8,
+                                                TileVectorOpH8), sme_elm_idx0_15>;
+  defm : sme_tile_to_vector_aliases<!cast<Instruction>(NAME # _H), ZPR16,
+                                    !if(is_col, TileVectorOpV16,
+                                                TileVectorOpH16), sme_elm_idx0_7>;
+  defm : sme_tile_to_vector_aliases<!cast<Instruction>(NAME # _S), ZPR32,
+                                    !if(is_col, TileVectorOpV32,
+                                                TileVectorOpH32), sme_elm_idx0_3>;
+  defm : sme_tile_to_vector_aliases<!cast<Instruction>(NAME # _D), ZPR64,
+                                    !if(is_col, TileVectorOpV64,
+                                                TileVectorOpH64), sme_elm_idx0_1>;
+  defm : sme_tile_to_vector_aliases<!cast<Instruction>(NAME # _Q), ZPR128,
+                                    !if(is_col, TileVectorOpV128,
+                                                TileVectorOpH128), sme_elm_idx0_0>;
+}
+
+multiclass sme_tile_to_vector<string mnemonic> {
+  defm _H : sme_tile_to_vector_v<mnemonic, /*is_col=*/0b0>;
+  defm _V : sme_tile_to_vector_v<mnemonic, /*is_col=*/0b1>;
+}
+
+//===----------------------------------------------------------------------===//
+// SME Zero
+//===----------------------------------------------------------------------===//
+
+class sme_zero_inst<string mnemonic>
+    : I<(outs MatrixTileList:$imm), (ins),
+        mnemonic, "\t$imm", "", []>, Sched<[]> {
+  bits<8> imm;
+  let Inst{31-8} = 0b110000000000100000000000;
+  let Inst{7-0}  = imm;
+}
+
+multiclass sme_zero<string mnemonic> {
+  def NAME : sme_zero_inst<mnemonic>;
+
+  def : InstAlias<"zero\t\\{za\\}", (!cast<Instruction>(NAME) 0b11111111), 1>;
+  def : InstAlias<"zero\t\\{za0.h\\}", (!cast<Instruction>(NAME) 0b01010101), 1>;
+  def : InstAlias<"zero\t\\{za1.h\\}", (!cast<Instruction>(NAME) 0b10101010), 1>;
+  def : InstAlias<"zero\t\\{za0.s\\}", (!cast<Instruction>(NAME) 0b00010001), 1>;
+  def : InstAlias<"zero\t\\{za1.s\\}", (!cast<Instruction>(NAME) 0b00100010), 1>;
+  def : InstAlias<"zero\t\\{za2.s\\}", (!cast<Instruction>(NAME) 0b01000100), 1>;
+  def : InstAlias<"zero\t\\{za3.s\\}", (!cast<Instruction>(NAME) 0b10001000), 1>;
+  def : InstAlias<"zero\t\\{za0.s,za1.s\\}", (!cast<Instruction>(NAME) 0b00110011), 1>;
+  def : InstAlias<"zero\t\\{za0.s,za3.s\\}", (!cast<Instruction>(NAME) 0b10011001), 1>;
+  def : InstAlias<"zero\t\\{za1.s,za2.s\\}", (!cast<Instruction>(NAME) 0b01100110), 1>;
+  def : InstAlias<"zero\t\\{za2.s,za3.s\\}", (!cast<Instruction>(NAME) 0b11001100), 1>;
+  def : InstAlias<"zero\t\\{za0.s,za1.s,za2.s\\}", (!cast<Instruction>(NAME) 0b01110111), 1>;
+  def : InstAlias<"zero\t\\{za0.s,za1.s,za3.s\\}", (!cast<Instruction>(NAME) 0b10111011), 1>;
+  def : InstAlias<"zero\t\\{za0.s,za2.s,za3.s\\}", (!cast<Instruction>(NAME) 0b11011101), 1>;
+  def : InstAlias<"zero\t\\{za1.s,za2.s,za3.s\\}", (!cast<Instruction>(NAME) 0b11101110), 1>;
+}
+
+//===----------------------------------------------------------------------===//
+// SVE2 Instructions
+//===----------------------------------------------------------------------===//
+
+class sve2_int_perm_revd<string asm>
+    : I<(outs ZPR128:$Zd), (ins ZPR128:$_Zd, PPR3bAny:$Pg, ZPR128:$Zn),
+        asm, "\t$Zd, $Pg/m, $Zn", "", []>,
+      Sched<[]> {
+  bits<5> Zd;
+  bits<3> Pg;
+  bits<5> Zn;
+  let Inst{31-24} = 0b00000101;
+  let Inst{23-22} = 0b00; // size
+  let Inst{21-13} = 0b101110100;
+  let Inst{12-10} = Pg;
+  let Inst{9-5}   = Zn;
+  let Inst{4-0}   = Zd;
+
+  let Constraints = "$Zd = $_Zd";
+  let DestructiveInstType = DestructiveUnary;
+  let ElementSize = ZPR128.ElementSize;
+}
+
+class sve2_clamp<string asm, bits<2> sz, bit U, ZPRRegOp zpr_ty>
+    : I<(outs zpr_ty:$Zd), (ins zpr_ty:$Zn, zpr_ty:$Zm, zpr_ty:$_Zd),
+        asm, "\t$Zd, $Zn, $Zm", "", []>,
+      Sched<[]> {
+  bits<5> Zm;
+  bits<5> Zn;
+  bits<5> Zd;
+  let Inst{31-24} = 0b01000100;
+  let Inst{23-22} = sz;
+  let Inst{21}    = 0b0;
+  let Inst{20-16} = Zm;
+  let Inst{15-11} = 0b11000;
+  let Inst{10}    = U;
+  let Inst{9-5}   = Zn;
+  let Inst{4-0}   = Zd;
+
+  let Constraints = "$Zd = $_Zd";
+  let DestructiveInstType = DestructiveOther;
+  let ElementSize = zpr_ty.ElementSize;
+}
+
+multiclass sve2_clamp<string asm, bit U> {
+  def _B : sve2_clamp<asm, 0b00, U, ZPR8>;
+  def _H : sve2_clamp<asm, 0b01, U, ZPR16>;
+  def _S : sve2_clamp<asm, 0b10, U, ZPR32>;
+  def _D : sve2_clamp<asm, 0b11, U, ZPR64>;
+}
+
+class sve2_int_perm_sel_p<string asm, PPRRegOp ppr_ty, Operand imm_ty>
+    : I<(outs PPRAny:$Pd), (ins PPRAny:$Pn, ppr_ty:$Pm,
+                            MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm),
+        asm, "\t$Pd, $Pn, $Pm[$Rv, $imm]", "", []>,
+      Sched<[]> {
+  bits<2> Rv;
+  bits<4> Pn;
+  bits<4> Pm;
+  bits<4> Pd;
+  let Inst{31-24} = 0b00100101;
+  let Inst{21}    = 0b1;
+  let Inst{17-16} = Rv;
+  let Inst{15-14} = 0b01;
+  let Inst{13-10} = Pn;
+  let Inst{9}     = 0b0;
+  let Inst{8-5}   = Pm;
+  let Inst{4}     = 0b0;
+  let Inst{3-0}   = Pd;
+}
+
+multiclass sve2_int_perm_sel_p<string asm> {
+  def _B : sve2_int_perm_sel_p<asm, PPR8, sme_elm_idx0_15> {
+    bits<4> imm;
+    let Inst{23-22} = imm{3-2};
+    let Inst{20-19} = imm{1-0};
+    let Inst{18}    = 0b1;
+  }
+  def _H : sve2_int_perm_sel_p<asm, PPR16, sme_elm_idx0_7> {
+    bits<3> imm;
+    let Inst{23-22} = imm{2-1};
+    let Inst{20}    = imm{0};
+    let Inst{19-18} = 0b10;
+  }
+  def _S : sve2_int_perm_sel_p<asm, PPR32, sme_elm_idx0_3> {
+    bits<2> imm;
+    let Inst{23-22} = imm{1-0};
+    let Inst{20-18} = 0b100;
+  }
+  def _D : sve2_int_perm_sel_p<asm, PPR64, sme_elm_idx0_1> {
+    bits<1> imm;
+    let Inst{23}    = imm;
+    let Inst{22}    = 0b1;
+    let Inst{20-18} = 0b000;
+  }
+}
--- a/suite/synctools/tablegen/AArch64/SVEInstrFormats.td
+++ b/suite/synctools/tablegen/AArch64/SVEInstrFormats.td
--- a/suite/synctools/tablegen/include/llvm/CodeGen/AccelTable.h
+++ b/suite/synctools/tablegen/include/llvm/CodeGen/AccelTable.h
@ -0,0 +1,408 @@
+//==- include/llvm/CodeGen/AccelTable.h - Accelerator Tables -----*- C++ -*-==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file contains support for writing accelerator tables.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_ACCELTABLE_H
+#define LLVM_CODEGEN_ACCELTABLE_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/BinaryFormat/Dwarf.h"
+#include "llvm/CodeGen/DIE.h"
+#include "llvm/CodeGen/DwarfStringPoolEntry.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/DJB.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cstddef>
+#include <cstdint>
+#include <vector>
+
+/// \file
+/// The DWARF and Apple accelerator tables are an indirect hash table optimized
+/// for null lookup rather than access to known data. The Apple accelerator
+/// tables are a precursor of the newer DWARF v5 accelerator tables. Both
+/// formats share common design ideas.
+///
+/// The Apple accelerator table are output into an on-disk format that looks
+/// like this:
+///
+/// .------------------.
+/// |  HEADER          |
+/// |------------------|
+/// |  BUCKETS         |
+/// |------------------|
+/// |  HASHES          |
+/// |------------------|
+/// |  OFFSETS         |
+/// |------------------|
+/// |  DATA            |
+/// `------------------'
+///
+/// The header contains a magic number, version, type of hash function,
+/// the number of buckets, total number of hashes, and room for a special struct
+/// of data and the length of that struct.
+///
+/// The buckets contain an index (e.g. 6) into the hashes array. The hashes
+/// section contains all of the 32-bit hash values in contiguous memory, and the
+/// offsets contain the offset into the data area for the particular hash.
+///
+/// For a lookup example, we could hash a function name and take it modulo the
+/// number of buckets giving us our bucket. From there we take the bucket value
+/// as an index into the hashes table and look at each successive hash as long
+/// as the hash value is still the same modulo result (bucket value) as earlier.
+/// If we have a match we look at that same entry in the offsets table and grab
+/// the offset in the data for our final match.
+///
+/// The DWARF v5 accelerator table consists of zero or more name indices that
+/// are output into an on-disk format that looks like this:
+///
+/// .------------------.
+/// |  HEADER          |
+/// |------------------|
+/// |  CU LIST         |
+/// |------------------|
+/// |  LOCAL TU LIST   |
+/// |------------------|
+/// |  FOREIGN TU LIST |
+/// |------------------|
+/// |  HASH TABLE      |
+/// |------------------|
+/// |  NAME TABLE      |
+/// |------------------|
+/// |  ABBREV TABLE    |
+/// |------------------|
+/// |  ENTRY POOL      |
+/// `------------------'
+///
+/// For the full documentation please refer to the DWARF 5 standard.
+///
+///
+/// This file defines the class template AccelTable, which is represents an
+/// abstract view of an Accelerator table, without any notion of an on-disk
+/// layout. This class is parameterized by an entry type, which should derive
+/// from AccelTableData. This is the type of individual entries in the table,
+/// and it should store the data necessary to emit them. AppleAccelTableData is
+/// the base class for Apple Accelerator Table entries, which have a uniform
+/// structure based on a sequence of Atoms. There are different sub-classes
+/// derived from AppleAccelTable, which differ in the set of Atoms and how they
+/// obtain their values.
+///
+/// An Apple Accelerator Table can be serialized by calling emitAppleAccelTable
+/// function.
+
+namespace llvm {
+
+class AsmPrinter;
+class DwarfCompileUnit;
+class DwarfDebug;
+
+/// Interface which the different types of accelerator table data have to
+/// conform. It serves as a base class for different values of the template
+/// argument of the AccelTable class template.
+class AccelTableData {
+public:
+  virtual ~AccelTableData() = default;
+
+  bool operator<(const AccelTableData &Other) const {
+    return order() < Other.order();
+  }
+
+    // Subclasses should implement:
+    // static uint32_t hash(StringRef Name);
+
+#ifndef NDEBUG
+  virtual void print(raw_ostream &OS) const = 0;
+#endif
+protected:
+  virtual uint64_t order() const = 0;
+};
+
+/// A base class holding non-template-dependant functionality of the AccelTable
+/// class. Clients should not use this class directly but rather instantiate
+/// AccelTable with a type derived from AccelTableData.
+class AccelTableBase {
+public:
+  using HashFn = uint32_t(StringRef);
+
+  /// Represents a group of entries with identical name (and hence, hash value).
+  struct HashData {
+    DwarfStringPoolEntryRef Name;
+    uint32_t HashValue;
+    std::vector<AccelTableData *> Values;
+    MCSymbol *Sym;
+
+    HashData(DwarfStringPoolEntryRef Name, HashFn *Hash)
+        : Name(Name), HashValue(Hash(Name.getString())) {}
+
+#ifndef NDEBUG
+    void print(raw_ostream &OS) const;
+    void dump() const { print(dbgs()); }
+#endif
+  };
+  using HashList = std::vector<HashData *>;
+  using BucketList = std::vector<HashList>;
+
+protected:
+  /// Allocator for HashData and Values.
+  BumpPtrAllocator Allocator;
+
+  using StringEntries = StringMap<HashData, BumpPtrAllocator &>;
+  StringEntries Entries;
+
+  HashFn *Hash;
+  uint32_t BucketCount;
+  uint32_t UniqueHashCount;
+
+  HashList Hashes;
+  BucketList Buckets;
+
+  void computeBucketCount();
+
+  AccelTableBase(HashFn *Hash) : Entries(Allocator), Hash(Hash) {}
+
+public:
+  void finalize(AsmPrinter *Asm, StringRef Prefix);
+  ArrayRef<HashList> getBuckets() const { return Buckets; }
+  uint32_t getBucketCount() const { return BucketCount; }
+  uint32_t getUniqueHashCount() const { return UniqueHashCount; }
+  uint32_t getUniqueNameCount() const { return Entries.size(); }
+
+#ifndef NDEBUG
+  void print(raw_ostream &OS) const;
+  void dump() const { print(dbgs()); }
+#endif
+
+  AccelTableBase(const AccelTableBase &) = delete;
+  void operator=(const AccelTableBase &) = delete;
+};
+
+/// This class holds an abstract representation of an Accelerator Table,
+/// consisting of a sequence of buckets, each bucket containint a sequence of
+/// HashData entries. The class is parameterized by the type of entries it
+/// holds. The type template parameter also defines the hash function to use for
+/// hashing names.
+template <typename DataT> class AccelTable : public AccelTableBase {
+public:
+  AccelTable() : AccelTableBase(DataT::hash) {}
+
+  template <typename... Types>
+  void addName(DwarfStringPoolEntryRef Name, Types &&... Args);
+};
+
+template <typename AccelTableDataT>
+template <typename... Types>
+void AccelTable<AccelTableDataT>::addName(DwarfStringPoolEntryRef Name,
+                                          Types &&... Args) {
+  assert(Buckets.empty() && "Already finalized!");
+  // If the string is in the list already then add this die to the list
+  // otherwise add a new one.
+  auto Iter = Entries.try_emplace(Name.getString(), Name, Hash).first;
+  assert(Iter->second.Name == Name);
+  Iter->second.Values.push_back(
+      new (Allocator) AccelTableDataT(std::forward<Types>(Args)...));
+}
+
+/// A base class for different implementations of Data classes for Apple
+/// Accelerator Tables. The columns in the table are defined by the static Atoms
+/// variable defined on the subclasses.
+class AppleAccelTableData : public AccelTableData {
+public:
+  /// An Atom defines the form of the data in an Apple accelerator table.
+  /// Conceptually it is a column in the accelerator consisting of a type and a
+  /// specification of the form of its data.
+  struct Atom {
+    /// Atom Type.
+    const uint16_t Type;
+    /// DWARF Form.
+    const uint16_t Form;
+
+    constexpr Atom(uint16_t Type, uint16_t Form) : Type(Type), Form(Form) {}
+
+#ifndef NDEBUG
+    void print(raw_ostream &OS) const;
+    void dump() const { print(dbgs()); }
+#endif
+  };
+  // Subclasses should define:
+  // static constexpr Atom Atoms[];
+
+  virtual void emit(AsmPrinter *Asm) const = 0;
+
+  static uint32_t hash(StringRef Buffer) { return djbHash(Buffer); }
+};
+
+/// The Data class implementation for DWARF v5 accelerator table. Unlike the
+/// Apple Data classes, this class is just a DIE wrapper, and does not know to
+/// serialize itself. The complete serialization logic is in the
+/// emitDWARF5AccelTable function.
+class DWARF5AccelTableData : public AccelTableData {
+public:
+  static uint32_t hash(StringRef Name) { return caseFoldingDjbHash(Name); }
+
+  DWARF5AccelTableData(const DIE &Die) : Die(Die) {}
+
+#ifndef NDEBUG
+  void print(raw_ostream &OS) const override;
+#endif
+
+  const DIE &getDie() const { return Die; }
+  uint64_t getDieOffset() const { return Die.getOffset(); }
+  unsigned getDieTag() const { return Die.getTag(); }
+
+protected:
+  const DIE &Die;
+
+  uint64_t order() const override { return Die.getOffset(); }
+};
+
+class DWARF5AccelTableStaticData : public AccelTableData {
+public:
+  static uint32_t hash(StringRef Name) { return caseFoldingDjbHash(Name); }
+
+  DWARF5AccelTableStaticData(uint64_t DieOffset, unsigned DieTag,
+                             unsigned CUIndex)
+      : DieOffset(DieOffset), DieTag(DieTag), CUIndex(CUIndex) {}
+
+#ifndef NDEBUG
+  void print(raw_ostream &OS) const override;
+#endif
+
+  uint64_t getDieOffset() const { return DieOffset; }
+  unsigned getDieTag() const { return DieTag; }
+  unsigned getCUIndex() const { return CUIndex; }
+
+protected:
+  uint64_t DieOffset;
+  unsigned DieTag;
+  unsigned CUIndex;
+
+  uint64_t order() const override { return DieOffset; }
+};
+
+void emitAppleAccelTableImpl(AsmPrinter *Asm, AccelTableBase &Contents,
+                             StringRef Prefix, const MCSymbol *SecBegin,
+                             ArrayRef<AppleAccelTableData::Atom> Atoms);
+
+/// Emit an Apple Accelerator Table consisting of entries in the specified
+/// AccelTable. The DataT template parameter should be derived from
+/// AppleAccelTableData.
+template <typename DataT>
+void emitAppleAccelTable(AsmPrinter *Asm, AccelTable<DataT> &Contents,
+                         StringRef Prefix, const MCSymbol *SecBegin) {
+  static_assert(std::is_convertible<DataT *, AppleAccelTableData *>::value, "");
+  emitAppleAccelTableImpl(Asm, Contents, Prefix, SecBegin, DataT::Atoms);
+}
+
+void emitDWARF5AccelTable(AsmPrinter *Asm,
+                          AccelTable<DWARF5AccelTableData> &Contents,
+                          const DwarfDebug &DD,
+                          ArrayRef<std::unique_ptr<DwarfCompileUnit>> CUs);
+
+void emitDWARF5AccelTable(
+    AsmPrinter *Asm, AccelTable<DWARF5AccelTableStaticData> &Contents,
+    ArrayRef<MCSymbol *> CUs,
+    llvm::function_ref<unsigned(const DWARF5AccelTableStaticData &)>
+        getCUIndexForEntry);
+
+/// Accelerator table data implementation for simple Apple accelerator tables
+/// with just a DIE reference.
+class AppleAccelTableOffsetData : public AppleAccelTableData {
+public:
+  AppleAccelTableOffsetData(const DIE &D) : Die(D) {}
+
+  void emit(AsmPrinter *Asm) const override;
+
+  static constexpr Atom Atoms[] = {
+      Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4)};
+
+#ifndef NDEBUG
+  void print(raw_ostream &OS) const override;
+#endif
+protected:
+  uint64_t order() const override { return Die.getOffset(); }
+
+  const DIE &Die;
+};
+
+/// Accelerator table data implementation for Apple type accelerator tables.
+class AppleAccelTableTypeData : public AppleAccelTableOffsetData {
+public:
+  AppleAccelTableTypeData(const DIE &D) : AppleAccelTableOffsetData(D) {}
+
+  void emit(AsmPrinter *Asm) const override;
+
+  static constexpr Atom Atoms[] = {
+      Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4),
+      Atom(dwarf::DW_ATOM_die_tag, dwarf::DW_FORM_data2),
+      Atom(dwarf::DW_ATOM_type_flags, dwarf::DW_FORM_data1)};
+
+#ifndef NDEBUG
+  void print(raw_ostream &OS) const override;
+#endif
+};
+
+/// Accelerator table data implementation for simple Apple accelerator tables
+/// with a DIE offset but no actual DIE pointer.
+class AppleAccelTableStaticOffsetData : public AppleAccelTableData {
+public:
+  AppleAccelTableStaticOffsetData(uint32_t Offset) : Offset(Offset) {}
+
+  void emit(AsmPrinter *Asm) const override;
+
+  static constexpr Atom Atoms[] = {
+      Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4)};
+
+#ifndef NDEBUG
+  void print(raw_ostream &OS) const override;
+#endif
+protected:
+  uint64_t order() const override { return Offset; }
+
+  uint32_t Offset;
+};
+
+/// Accelerator table data implementation for type accelerator tables with
+/// a DIE offset but no actual DIE pointer.
+class AppleAccelTableStaticTypeData : public AppleAccelTableStaticOffsetData {
+public:
+  AppleAccelTableStaticTypeData(uint32_t Offset, uint16_t Tag,
+                                bool ObjCClassIsImplementation,
+                                uint32_t QualifiedNameHash)
+      : AppleAccelTableStaticOffsetData(Offset),
+        QualifiedNameHash(QualifiedNameHash), Tag(Tag),
+        ObjCClassIsImplementation(ObjCClassIsImplementation) {}
+
+  void emit(AsmPrinter *Asm) const override;
+
+  static constexpr Atom Atoms[] = {
+      Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4),
+      Atom(dwarf::DW_ATOM_die_tag, dwarf::DW_FORM_data2),
+      Atom(5, dwarf::DW_FORM_data1), Atom(6, dwarf::DW_FORM_data4)};
+
+#ifndef NDEBUG
+  void print(raw_ostream &OS) const override;
+#endif
+protected:
+  uint64_t order() const override { return Offset; }
+
+  uint32_t QualifiedNameHash;
+  uint16_t Tag;
+  bool ObjCClassIsImplementation;
+};
+
+} // end namespace llvm
+
+#endif // LLVM_CODEGEN_ACCELTABLE_H
--- a/suite/synctools/tablegen/include/llvm/CodeGen/Analysis.h
+++ b/suite/synctools/tablegen/include/llvm/CodeGen/Analysis.h
@ -0,0 +1,144 @@
+//===- CodeGen/Analysis.h - CodeGen LLVM IR Analysis Utilities --*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares several CodeGen-specific LLVM IR analysis utilities.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_ANALYSIS_H
+#define LLVM_CODEGEN_ANALYSIS_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/CodeGen/ISDOpcodes.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/Support/CodeGen.h"
+
+namespace llvm {
+class GlobalValue;
+class LLT;
+class MachineBasicBlock;
+class MachineFunction;
+class TargetLoweringBase;
+class TargetLowering;
+class TargetMachine;
+struct EVT;
+
+/// Compute the linearized index of a member in a nested
+/// aggregate/struct/array.
+///
+/// Given an LLVM IR aggregate type and a sequence of insertvalue or
+/// extractvalue indices that identify a member, return the linearized index of
+/// the start of the member, i.e the number of element in memory before the
+/// sought one. This is disconnected from the number of bytes.
+///
+/// \param Ty is the type indexed by \p Indices.
+/// \param Indices is an optional pointer in the indices list to the current
+/// index.
+/// \param IndicesEnd is the end of the indices list.
+/// \param CurIndex is the current index in the recursion.
+///
+/// \returns \p CurIndex plus the linear index in \p Ty  the indices list.
+unsigned ComputeLinearIndex(Type *Ty,
+                            const unsigned *Indices,
+                            const unsigned *IndicesEnd,
+                            unsigned CurIndex = 0);
+
+inline unsigned ComputeLinearIndex(Type *Ty,
+                                   ArrayRef<unsigned> Indices,
+                                   unsigned CurIndex = 0) {
+  return ComputeLinearIndex(Ty, Indices.begin(), Indices.end(), CurIndex);
+}
+
+/// ComputeValueVTs - Given an LLVM IR type, compute a sequence of
+/// EVTs that represent all the individual underlying
+/// non-aggregate types that comprise it.
+///
+/// If Offsets is non-null, it points to a vector to be filled in
+/// with the in-memory offsets of each of the individual values.
+///
+void ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, Type *Ty,
+                     SmallVectorImpl<EVT> &ValueVTs,
+                     SmallVectorImpl<uint64_t> *Offsets = nullptr,
+                     uint64_t StartingOffset = 0);
+
+/// Variant of ComputeValueVTs that also produces the memory VTs.
+void ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, Type *Ty,
+                     SmallVectorImpl<EVT> &ValueVTs,
+                     SmallVectorImpl<EVT> *MemVTs,
+                     SmallVectorImpl<uint64_t> *Offsets = nullptr,
+                     uint64_t StartingOffset = 0);
+
+/// computeValueLLTs - Given an LLVM IR type, compute a sequence of
+/// LLTs that represent all the individual underlying
+/// non-aggregate types that comprise it.
+///
+/// If Offsets is non-null, it points to a vector to be filled in
+/// with the in-memory offsets of each of the individual values.
+///
+void computeValueLLTs(const DataLayout &DL, Type &Ty,
+                      SmallVectorImpl<LLT> &ValueTys,
+                      SmallVectorImpl<uint64_t> *Offsets = nullptr,
+                      uint64_t StartingOffset = 0);
+
+/// ExtractTypeInfo - Returns the type info, possibly bitcast, encoded in V.
+GlobalValue *ExtractTypeInfo(Value *V);
+
+/// getFCmpCondCode - Return the ISD condition code corresponding to
+/// the given LLVM IR floating-point condition code.  This includes
+/// consideration of global floating-point math flags.
+///
+ISD::CondCode getFCmpCondCode(FCmpInst::Predicate Pred);
+
+/// getFCmpCodeWithoutNaN - Given an ISD condition code comparing floats,
+/// return the equivalent code if we're allowed to assume that NaNs won't occur.
+ISD::CondCode getFCmpCodeWithoutNaN(ISD::CondCode CC);
+
+/// getICmpCondCode - Return the ISD condition code corresponding to
+/// the given LLVM IR integer condition code.
+ISD::CondCode getICmpCondCode(ICmpInst::Predicate Pred);
+
+/// getICmpCondCode - Return the LLVM IR integer condition code
+/// corresponding to the given ISD integer condition code.
+ICmpInst::Predicate getICmpCondCode(ISD::CondCode Pred);
+
+/// Test if the given instruction is in a position to be optimized
+/// with a tail-call. This roughly means that it's in a block with
+/// a return and there's nothing that needs to be scheduled
+/// between it and the return.
+///
+/// This function only tests target-independent requirements.
+bool isInTailCallPosition(const CallBase &Call, const TargetMachine &TM);
+
+/// Test if given that the input instruction is in the tail call position, if
+/// there is an attribute mismatch between the caller and the callee that will
+/// inhibit tail call optimizations.
+/// \p AllowDifferingSizes is an output parameter which, if forming a tail call
+/// is permitted, determines whether it's permitted only if the size of the
+/// caller's and callee's return types match exactly.
+bool attributesPermitTailCall(const Function *F, const Instruction *I,
+                              const ReturnInst *Ret,
+                              const TargetLoweringBase &TLI,
+                              bool *AllowDifferingSizes = nullptr);
+
+/// Test if given that the input instruction is in the tail call position if the
+/// return type or any attributes of the function will inhibit tail call
+/// optimization.
+bool returnTypeIsEligibleForTailCall(const Function *F, const Instruction *I,
+                                     const ReturnInst *Ret,
+                                     const TargetLoweringBase &TLI);
+
+DenseMap<const MachineBasicBlock *, int>
+getEHScopeMembership(const MachineFunction &MF);
+
+} // End llvm namespace
+
+#endif
--- a/suite/synctools/tablegen/include/llvm/CodeGen/AntiDepBreaker.h
+++ b/suite/synctools/tablegen/include/llvm/CodeGen/AntiDepBreaker.h
@ -0,0 +1,103 @@
+//===- llvm/CodeGen/AntiDepBreaker.h - Anti-Dependence Breaking -*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the AntiDepBreaker class, which implements
+// anti-dependence breaking heuristics for post-register-allocation scheduling.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_ANTIDEPBREAKER_H
+#define LLVM_CODEGEN_ANTIDEPBREAKER_H
+
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/Support/Compiler.h"
+#include <cassert>
+#include <utility>
+#include <vector>
+
+namespace llvm {
+
+class RegisterClassInfo;
+
+/// This class works in conjunction with the post-RA scheduler to rename
+/// registers to break register anti-dependencies (WAR hazards).
+class AntiDepBreaker {
+public:
+  using DbgValueVector =
+      std::vector<std::pair<MachineInstr *, MachineInstr *>>;
+
+  virtual ~AntiDepBreaker();
+
+  /// Initialize anti-dep breaking for a new basic block.
+  virtual void StartBlock(MachineBasicBlock *BB) = 0;
+
+  /// Identifiy anti-dependencies within a basic-block region and break them by
+  /// renaming registers. Return the number of anti-dependencies broken.
+  virtual unsigned BreakAntiDependencies(const std::vector<SUnit> &SUnits,
+                                         MachineBasicBlock::iterator Begin,
+                                         MachineBasicBlock::iterator End,
+                                         unsigned InsertPosIndex,
+                                         DbgValueVector &DbgValues) = 0;
+
+  /// Update liveness information to account for the current
+  /// instruction, which will not be scheduled.
+  virtual void Observe(MachineInstr &MI, unsigned Count,
+                       unsigned InsertPosIndex) = 0;
+
+  /// Finish anti-dep breaking for a basic block.
+  virtual void FinishBlock() = 0;
+
+  /// Update DBG_VALUE or DBG_PHI if dependency breaker is updating
+  /// other machine instruction to use NewReg.
+  void UpdateDbgValue(MachineInstr &MI, unsigned OldReg, unsigned NewReg) {
+    if (MI.isDebugValue()) {
+      if (MI.getDebugOperand(0).isReg() &&
+          MI.getDebugOperand(0).getReg() == OldReg)
+        MI.getDebugOperand(0).setReg(NewReg);
+    } else if (MI.isDebugPHI()) {
+      if (MI.getOperand(0).isReg() &&
+          MI.getOperand(0).getReg() == OldReg)
+        MI.getOperand(0).setReg(NewReg);
+    } else {
+      llvm_unreachable("MI is not DBG_VALUE / DBG_PHI!");
+    }
+  }
+
+  /// Update all DBG_VALUE instructions that may be affected by the dependency
+  /// breaker's update of ParentMI to use NewReg.
+  void UpdateDbgValues(const DbgValueVector &DbgValues, MachineInstr *ParentMI,
+                       unsigned OldReg, unsigned NewReg) {
+    // The following code is dependent on the order in which the DbgValues are
+    // constructed in ScheduleDAGInstrs::buildSchedGraph.
+    MachineInstr *PrevDbgMI = nullptr;
+    for (const auto &DV : make_range(DbgValues.crbegin(), DbgValues.crend())) {
+      MachineInstr *PrevMI = DV.second;
+      if ((PrevMI == ParentMI) || (PrevMI == PrevDbgMI)) {
+        MachineInstr *DbgMI = DV.first;
+        UpdateDbgValue(*DbgMI, OldReg, NewReg);
+        PrevDbgMI = DbgMI;
+      } else if (PrevDbgMI) {
+        break; // If no match and already found a DBG_VALUE, we're done.
+      }
+    }
+  }
+};
+
+AntiDepBreaker *createAggressiveAntiDepBreaker(
+    MachineFunction &MFi, const RegisterClassInfo &RCI,
+    TargetSubtargetInfo::RegClassVector &CriticalPathRCs);
+
+AntiDepBreaker *createCriticalAntiDepBreaker(MachineFunction &MFi,
+                                             const RegisterClassInfo &RCI);
+
+} // end namespace llvm
+
+#endif // LLVM_CODEGEN_ANTIDEPBREAKER_H
--- a/suite/synctools/tablegen/include/llvm/CodeGen/AsmPrinter.h
+++ b/suite/synctools/tablegen/include/llvm/CodeGen/AsmPrinter.h
@ -0,0 +1,816 @@
+//===- llvm/CodeGen/AsmPrinter.h - AsmPrinter Framework ---------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a class to be used as the base class for target specific
+// asm writers.  This class primarily handles common functionality used by
+// all asm writers.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_ASMPRINTER_H
+#define LLVM_CODEGEN_ASMPRINTER_H
+
+#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/BinaryFormat/Dwarf.h"
+#include "llvm/CodeGen/AsmPrinterHandler.h"
+#include "llvm/CodeGen/DwarfStringPoolEntry.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/SourceMgr.h"
+#include <cstdint>
+#include <memory>
+#include <utility>
+#include <vector>
+
+namespace llvm {
+
+class BasicBlock;
+class BlockAddress;
+class Constant;
+class ConstantArray;
+class DataLayout;
+class DIE;
+class DIEAbbrev;
+class DwarfDebug;
+class GCMetadataPrinter;
+class GCStrategy;
+class GlobalObject;
+class GlobalValue;
+class GlobalVariable;
+class MachineBasicBlock;
+class MachineConstantPoolValue;
+class MachineDominatorTree;
+class MachineFunction;
+class MachineInstr;
+class MachineJumpTableInfo;
+class MachineLoopInfo;
+class MachineModuleInfo;
+class MachineOptimizationRemarkEmitter;
+class MCAsmInfo;
+class MCCFIInstruction;
+class MCContext;
+class MCExpr;
+class MCInst;
+class MCSection;
+class MCStreamer;
+class MCSubtargetInfo;
+class MCSymbol;
+class MCTargetOptions;
+class MDNode;
+class Module;
+class PseudoProbeHandler;
+class raw_ostream;
+class StackMaps;
+class StringRef;
+class TargetLoweringObjectFile;
+class TargetMachine;
+class Twine;
+
+namespace remarks {
+class RemarkStreamer;
+}
+
+/// This class is intended to be used as a driving class for all asm writers.
+class AsmPrinter : public MachineFunctionPass {
+public:
+  /// Target machine description.
+  TargetMachine &TM;
+
+  /// Target Asm Printer information.
+  const MCAsmInfo *MAI;
+
+  /// This is the context for the output file that we are streaming. This owns
+  /// all of the global MC-related objects for the generated translation unit.
+  MCContext &OutContext;
+
+  /// This is the MCStreamer object for the file we are generating. This
+  /// contains the transient state for the current translation unit that we are
+  /// generating (such as the current section etc).
+  std::unique_ptr<MCStreamer> OutStreamer;
+
+  /// The current machine function.
+  MachineFunction *MF = nullptr;
+
+  /// This is a pointer to the current MachineModuleInfo.
+  MachineModuleInfo *MMI = nullptr;
+
+  /// This is a pointer to the current MachineDominatorTree.
+  MachineDominatorTree *MDT = nullptr;
+
+  /// This is a pointer to the current MachineLoopInfo.
+  MachineLoopInfo *MLI = nullptr;
+
+  /// Optimization remark emitter.
+  MachineOptimizationRemarkEmitter *ORE;
+
+  /// The symbol for the entry in __patchable_function_entires.
+  MCSymbol *CurrentPatchableFunctionEntrySym = nullptr;
+
+  /// The symbol for the current function. This is recalculated at the beginning
+  /// of each call to runOnMachineFunction().
+  MCSymbol *CurrentFnSym = nullptr;
+
+  /// The symbol for the current function descriptor on AIX. This is created
+  /// at the beginning of each call to SetupMachineFunction().
+  MCSymbol *CurrentFnDescSym = nullptr;
+
+  /// The symbol used to represent the start of the current function for the
+  /// purpose of calculating its size (e.g. using the .size directive). By
+  /// default, this is equal to CurrentFnSym.
+  MCSymbol *CurrentFnSymForSize = nullptr;
+
+  /// Map a basic block section ID to the begin and end symbols of that section
+  ///  which determine the section's range.
+  struct MBBSectionRange {
+    MCSymbol *BeginLabel, *EndLabel;
+  };
+
+  MapVector<unsigned, MBBSectionRange> MBBSectionRanges;
+
+  /// Map global GOT equivalent MCSymbols to GlobalVariables and keep track of
+  /// its number of uses by other globals.
+  using GOTEquivUsePair = std::pair<const GlobalVariable *, unsigned>;
+  MapVector<const MCSymbol *, GOTEquivUsePair> GlobalGOTEquivs;
+
+  /// struct HandlerInfo and Handlers permit users or target extended
+  /// AsmPrinter to add their own handlers.
+  struct HandlerInfo {
+    std::unique_ptr<AsmPrinterHandler> Handler;
+    StringRef TimerName;
+    StringRef TimerDescription;
+    StringRef TimerGroupName;
+    StringRef TimerGroupDescription;
+
+    HandlerInfo(std::unique_ptr<AsmPrinterHandler> Handler, StringRef TimerName,
+                StringRef TimerDescription, StringRef TimerGroupName,
+                StringRef TimerGroupDescription)
+        : Handler(std::move(Handler)), TimerName(TimerName),
+          TimerDescription(TimerDescription), TimerGroupName(TimerGroupName),
+          TimerGroupDescription(TimerGroupDescription) {}
+  };
+
+  // Flags representing which CFI section is required for a function/module.
+  enum class CFISection : unsigned {
+    None = 0, ///< Do not emit either .eh_frame or .debug_frame
+    EH = 1,   ///< Emit .eh_frame
+    Debug = 2 ///< Emit .debug_frame
+  };
+
+private:
+  MCSymbol *CurrentFnEnd = nullptr;
+
+  /// Map a basic block section ID to the exception symbol associated with that
+  /// section. Map entries are assigned and looked up via
+  /// AsmPrinter::getMBBExceptionSym.
+  DenseMap<unsigned, MCSymbol *> MBBSectionExceptionSyms;
+
+  // The symbol used to represent the start of the current BB section of the
+  // function. This is used to calculate the size of the BB section.
+  MCSymbol *CurrentSectionBeginSym = nullptr;
+
+  // The garbage collection metadata printer table.
+  void *GCMetadataPrinters = nullptr; // Really a DenseMap.
+
+  /// Emit comments in assembly output if this is true.
+  bool VerboseAsm;
+
+  /// Output stream for the stack usage file (i.e., .su file).
+  std::unique_ptr<raw_fd_ostream> StackUsageStream;
+
+  static char ID;
+
+protected:
+  MCSymbol *CurrentFnBegin = nullptr;
+
+  /// A vector of all debug/EH info emitters we should use. This vector
+  /// maintains ownership of the emitters.
+  std::vector<HandlerInfo> Handlers;
+  size_t NumUserHandlers = 0;
+
+private:
+  /// If generated on the fly this own the instance.
+  std::unique_ptr<MachineDominatorTree> OwnedMDT;
+
+  /// If generated on the fly this own the instance.
+  std::unique_ptr<MachineLoopInfo> OwnedMLI;
+
+  /// If the target supports dwarf debug info, this pointer is non-null.
+  DwarfDebug *DD = nullptr;
+
+  /// A handler that supports pseudo probe emission with embedded inline
+  /// context.
+  PseudoProbeHandler *PP = nullptr;
+
+  /// CFISection type the module needs i.e. either .eh_frame or .debug_frame.
+  CFISection ModuleCFISection = CFISection::None;
+
+protected:
+  explicit AsmPrinter(TargetMachine &TM, std::unique_ptr<MCStreamer> Streamer);
+
+public:
+  ~AsmPrinter() override;
+
+  DwarfDebug *getDwarfDebug() { return DD; }
+  DwarfDebug *getDwarfDebug() const { return DD; }
+
+  uint16_t getDwarfVersion() const;
+  void setDwarfVersion(uint16_t Version);
+
+  bool isDwarf64() const;
+
+  /// Returns 4 for DWARF32 and 8 for DWARF64.
+  unsigned int getDwarfOffsetByteSize() const;
+
+  /// Returns 4 for DWARF32 and 12 for DWARF64.
+  unsigned int getUnitLengthFieldByteSize() const;
+
+  /// Returns information about the byte size of DW_FORM values.
+  dwarf::FormParams getDwarfFormParams() const;
+
+  bool isPositionIndependent() const;
+
+  /// Return true if assembly output should contain comments.
+  bool isVerbose() const { return VerboseAsm; }
+
+  /// Return a unique ID for the current function.
+  unsigned getFunctionNumber() const;
+
+  /// Return symbol for the function pseudo stack if the stack frame is not a
+  /// register based.
+  virtual const MCSymbol *getFunctionFrameSymbol() const { return nullptr; }
+
+  MCSymbol *getFunctionBegin() const { return CurrentFnBegin; }
+  MCSymbol *getFunctionEnd() const { return CurrentFnEnd; }
+
+  // Return the exception symbol associated with the MBB section containing a
+  // given basic block.
+  MCSymbol *getMBBExceptionSym(const MachineBasicBlock &MBB);
+
+  /// Return information about object file lowering.
+  const TargetLoweringObjectFile &getObjFileLowering() const;
+
+  /// Return information about data layout.
+  const DataLayout &getDataLayout() const;
+
+  /// Return the pointer size from the TargetMachine
+  unsigned getPointerSize() const;
+
+  /// Return information about subtarget.
+  const MCSubtargetInfo &getSubtargetInfo() const;
+
+  void EmitToStreamer(MCStreamer &S, const MCInst &Inst);
+
+  /// Emits inital debug location directive.
+  void emitInitialRawDwarfLocDirective(const MachineFunction &MF);
+
+  /// Return the current section we are emitting to.
+  const MCSection *getCurrentSection() const;
+
+  void getNameWithPrefix(SmallVectorImpl<char> &Name,
+                         const GlobalValue *GV) const;
+
+  MCSymbol *getSymbol(const GlobalValue *GV) const;
+
+  /// Similar to getSymbol() but preferred for references. On ELF, this uses a
+  /// local symbol if a reference to GV is guaranteed to be resolved to the
+  /// definition in the same module.
+  MCSymbol *getSymbolPreferLocal(const GlobalValue &GV) const;
+
+  //===------------------------------------------------------------------===//
+  // XRay instrumentation implementation.
+  //===------------------------------------------------------------------===//
+public:
+  // This describes the kind of sled we're storing in the XRay table.
+  enum class SledKind : uint8_t {
+    FUNCTION_ENTER = 0,
+    FUNCTION_EXIT = 1,
+    TAIL_CALL = 2,
+    LOG_ARGS_ENTER = 3,
+    CUSTOM_EVENT = 4,
+    TYPED_EVENT = 5,
+  };
+
+  // The table will contain these structs that point to the sled, the function
+  // containing the sled, and what kind of sled (and whether they should always
+  // be instrumented). We also use a version identifier that the runtime can use
+  // to decide what to do with the sled, depending on the version of the sled.
+  struct XRayFunctionEntry {
+    const MCSymbol *Sled;
+    const MCSymbol *Function;
+    SledKind Kind;
+    bool AlwaysInstrument;
+    const class Function *Fn;
+    uint8_t Version;
+
+    void emit(int, MCStreamer *) const;
+  };
+
+  // All the sleds to be emitted.
+  SmallVector<XRayFunctionEntry, 4> Sleds;
+
+  // Helper function to record a given XRay sled.
+  void recordSled(MCSymbol *Sled, const MachineInstr &MI, SledKind Kind,
+                  uint8_t Version = 0);
+
+  /// Emit a table with all XRay instrumentation points.
+  void emitXRayTable();
+
+  void emitPatchableFunctionEntries();
+
+  //===------------------------------------------------------------------===//
+  // MachineFunctionPass Implementation.
+  //===------------------------------------------------------------------===//
+
+  /// Record analysis usage.
+  void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+  /// Set up the AsmPrinter when we are working on a new module. If your pass
+  /// overrides this, it must make sure to explicitly call this implementation.
+  bool doInitialization(Module &M) override;
+
+  /// Shut down the asmprinter. If you override this in your pass, you must make
+  /// sure to call it explicitly.
+  bool doFinalization(Module &M) override;
+
+  /// Emit the specified function out to the OutStreamer.
+  bool runOnMachineFunction(MachineFunction &MF) override {
+    SetupMachineFunction(MF);
+    emitFunctionBody();
+    return false;
+  }
+
+  //===------------------------------------------------------------------===//
+  // Coarse grained IR lowering routines.
+  //===------------------------------------------------------------------===//
+
+  /// This should be called when a new MachineFunction is being processed from
+  /// runOnMachineFunction.
+  virtual void SetupMachineFunction(MachineFunction &MF);
+
+  /// This method emits the body and trailer for a function.
+  void emitFunctionBody();
+
+  void emitCFIInstruction(const MachineInstr &MI);
+
+  void emitFrameAlloc(const MachineInstr &MI);
+
+  void emitStackSizeSection(const MachineFunction &MF);
+
+  void emitStackUsage(const MachineFunction &MF);
+
+  void emitBBAddrMapSection(const MachineFunction &MF);
+
+  void emitPseudoProbe(const MachineInstr &MI);
+
+  void emitRemarksSection(remarks::RemarkStreamer &RS);
+
+  /// Get the CFISection type for a function.
+  CFISection getFunctionCFISectionType(const Function &F) const;
+
+  /// Get the CFISection type for a function.
+  CFISection getFunctionCFISectionType(const MachineFunction &MF) const;
+
+  /// Get the CFISection type for the module.
+  CFISection getModuleCFISectionType() const { return ModuleCFISection; }
+
+  bool needsSEHMoves();
+
+  /// Since emitting CFI unwind information is entangled with supporting the
+  /// exceptions, this returns true for platforms which use CFI unwind
+  /// information for debugging purpose when
+  /// `MCAsmInfo::ExceptionsType == ExceptionHandling::None`.
+  bool needsCFIForDebug() const;
+
+  /// Print to the current output stream assembly representations of the
+  /// constants in the constant pool MCP. This is used to print out constants
+  /// which have been "spilled to memory" by the code generator.
+  virtual void emitConstantPool();
+
+  /// Print assembly representations of the jump tables used by the current
+  /// function to the current output stream.
+  virtual void emitJumpTableInfo();
+
+  /// Emit the specified global variable to the .s file.
+  virtual void emitGlobalVariable(const GlobalVariable *GV);
+
+  /// Check to see if the specified global is a special global used by LLVM. If
+  /// so, emit it and return true, otherwise do nothing and return false.
+  bool emitSpecialLLVMGlobal(const GlobalVariable *GV);
+
+  /// `llvm.global_ctors` and `llvm.global_dtors` are arrays of Structor
+  /// structs.
+  ///
+  /// Priority - init priority
+  /// Func - global initialization or global clean-up function
+  /// ComdatKey - associated data
+  struct Structor {
+    int Priority = 0;
+    Constant *Func = nullptr;
+    GlobalValue *ComdatKey = nullptr;
+
+    Structor() = default;
+  };
+
+  /// This method gathers an array of Structors and then sorts them out by
+  /// Priority.
+  /// @param List The initializer of `llvm.global_ctors` or `llvm.global_dtors`
+  /// array.
+  /// @param[out] Structors Sorted Structor structs by Priority.
+  void preprocessXXStructorList(const DataLayout &DL, const Constant *List,
+                                SmallVector<Structor, 8> &Structors);
+
+  /// This method emits `llvm.global_ctors` or `llvm.global_dtors` list.
+  virtual void emitXXStructorList(const DataLayout &DL, const Constant *List,
+                                  bool IsCtor);
+
+  /// Emit an alignment directive to the specified power of two boundary. If a
+  /// global value is specified, and if that global has an explicit alignment
+  /// requested, it will override the alignment request if required for
+  /// correctness.
+  void emitAlignment(Align Alignment, const GlobalObject *GV = nullptr,
+                     unsigned MaxBytesToEmit = 0) const;
+
+  /// Lower the specified LLVM Constant to an MCExpr.
+  virtual const MCExpr *lowerConstant(const Constant *CV);
+
+  /// Print a general LLVM constant to the .s file.
+  void emitGlobalConstant(const DataLayout &DL, const Constant *CV);
+
+  /// Unnamed constant global variables solely contaning a pointer to
+  /// another globals variable act like a global variable "proxy", or GOT
+  /// equivalents, i.e., it's only used to hold the address of the latter. One
+  /// optimization is to replace accesses to these proxies by using the GOT
+  /// entry for the final global instead. Hence, we select GOT equivalent
+  /// candidates among all the module global variables, avoid emitting them
+  /// unnecessarily and finally replace references to them by pc relative
+  /// accesses to GOT entries.
+  void computeGlobalGOTEquivs(Module &M);
+
+  /// Constant expressions using GOT equivalent globals may not be
+  /// eligible for PC relative GOT entry conversion, in such cases we need to
+  /// emit the proxies we previously omitted in EmitGlobalVariable.
+  void emitGlobalGOTEquivs();
+
+  /// Emit the stack maps.
+  void emitStackMaps(StackMaps &SM);
+
+  //===------------------------------------------------------------------===//
+  // Overridable Hooks
+  //===------------------------------------------------------------------===//
+
+  void addAsmPrinterHandler(HandlerInfo Handler) {
+    Handlers.insert(Handlers.begin(), std::move(Handler));
+    NumUserHandlers++;
+  }
+
+  // Targets can, or in the case of EmitInstruction, must implement these to
+  // customize output.
+
+  /// This virtual method can be overridden by targets that want to emit
+  /// something at the start of their file.
+  virtual void emitStartOfAsmFile(Module &) {}
+
+  /// This virtual method can be overridden by targets that want to emit
+  /// something at the end of their file.
+  virtual void emitEndOfAsmFile(Module &) {}
+
+  /// Targets can override this to emit stuff before the first basic block in
+  /// the function.
+  virtual void emitFunctionBodyStart() {}
+
+  /// Targets can override this to emit stuff after the last basic block in the
+  /// function.
+  virtual void emitFunctionBodyEnd() {}
+
+  /// Targets can override this to emit stuff at the start of a basic block.
+  /// By default, this method prints the label for the specified
+  /// MachineBasicBlock, an alignment (if present) and a comment describing it
+  /// if appropriate.
+  virtual void emitBasicBlockStart(const MachineBasicBlock &MBB);
+
+  /// Targets can override this to emit stuff at the end of a basic block.
+  virtual void emitBasicBlockEnd(const MachineBasicBlock &MBB);
+
+  /// Targets should implement this to emit instructions.
+  virtual void emitInstruction(const MachineInstr *) {
+    llvm_unreachable("EmitInstruction not implemented");
+  }
+
+  /// Return the symbol for the specified constant pool entry.
+  virtual MCSymbol *GetCPISymbol(unsigned CPID) const;
+
+  virtual void emitFunctionEntryLabel();
+
+  virtual void emitFunctionDescriptor() {
+    llvm_unreachable("Function descriptor is target-specific.");
+  }
+
+  virtual void emitMachineConstantPoolValue(MachineConstantPoolValue *MCPV);
+
+  /// Targets can override this to change how global constants that are part of
+  /// a C++ static/global constructor list are emitted.
+  virtual void emitXXStructor(const DataLayout &DL, const Constant *CV) {
+    emitGlobalConstant(DL, CV);
+  }
+
+  /// Return true if the basic block has exactly one predecessor and the control
+  /// transfer mechanism between the predecessor and this block is a
+  /// fall-through.
+  virtual bool
+  isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const;
+
+  /// Targets can override this to customize the output of IMPLICIT_DEF
+  /// instructions in verbose mode.
+  virtual void emitImplicitDef(const MachineInstr *MI) const;
+
+  /// Emit N NOP instructions.
+  void emitNops(unsigned N);
+
+  //===------------------------------------------------------------------===//
+  // Symbol Lowering Routines.
+  //===------------------------------------------------------------------===//
+
+  MCSymbol *createTempSymbol(const Twine &Name) const;
+
+  /// Return the MCSymbol for a private symbol with global value name as its
+  /// base, with the specified suffix.
+  MCSymbol *getSymbolWithGlobalValueBase(const GlobalValue *GV,
+                                         StringRef Suffix) const;
+
+  /// Return the MCSymbol for the specified ExternalSymbol.
+  MCSymbol *GetExternalSymbolSymbol(StringRef Sym) const;
+
+  /// Return the symbol for the specified jump table entry.
+  MCSymbol *GetJTISymbol(unsigned JTID, bool isLinkerPrivate = false) const;
+
+  /// Return the symbol for the specified jump table .set
+  /// FIXME: privatize to AsmPrinter.
+  MCSymbol *GetJTSetSymbol(unsigned UID, unsigned MBBID) const;
+
+  /// Return the MCSymbol used to satisfy BlockAddress uses of the specified
+  /// basic block.
+  MCSymbol *GetBlockAddressSymbol(const BlockAddress *BA) const;
+  MCSymbol *GetBlockAddressSymbol(const BasicBlock *BB) const;
+
+  //===------------------------------------------------------------------===//
+  // Emission Helper Routines.
+  //===------------------------------------------------------------------===//
+
+  /// This is just convenient handler for printing offsets.
+  void printOffset(int64_t Offset, raw_ostream &OS) const;
+
+  /// Emit a byte directive and value.
+  void emitInt8(int Value) const;
+
+  /// Emit a short directive and value.
+  void emitInt16(int Value) const;
+
+  /// Emit a long directive and value.
+  void emitInt32(int Value) const;
+
+  /// Emit a long long directive and value.
+  void emitInt64(uint64_t Value) const;
+
+  /// Emit something like ".long Hi-Lo" where the size in bytes of the directive
+  /// is specified by Size and Hi/Lo specify the labels.  This implicitly uses
+  /// .set if it is available.
+  void emitLabelDifference(const MCSymbol *Hi, const MCSymbol *Lo,
+                           unsigned Size) const;
+
+  /// Emit something like ".uleb128 Hi-Lo".
+  void emitLabelDifferenceAsULEB128(const MCSymbol *Hi,
+                                    const MCSymbol *Lo) const;
+
+  /// Emit something like ".long Label+Offset" where the size in bytes of the
+  /// directive is specified by Size and Label specifies the label.  This
+  /// implicitly uses .set if it is available.
+  void emitLabelPlusOffset(const MCSymbol *Label, uint64_t Offset,
+                           unsigned Size, bool IsSectionRelative = false) const;
+
+  /// Emit something like ".long Label" where the size in bytes of the directive
+  /// is specified by Size and Label specifies the label.
+  void emitLabelReference(const MCSymbol *Label, unsigned Size,
+                          bool IsSectionRelative = false) const {
+    emitLabelPlusOffset(Label, 0, Size, IsSectionRelative);
+  }
+
+  //===------------------------------------------------------------------===//
+  // Dwarf Emission Helper Routines
+  //===------------------------------------------------------------------===//
+
+  /// Emit the specified signed leb128 value.
+  void emitSLEB128(int64_t Value, const char *Desc = nullptr) const;
+
+  /// Emit the specified unsigned leb128 value.
+  void emitULEB128(uint64_t Value, const char *Desc = nullptr,
+                   unsigned PadTo = 0) const;
+
+  /// Emit a .byte 42 directive that corresponds to an encoding.  If verbose
+  /// assembly output is enabled, we output comments describing the encoding.
+  /// Desc is a string saying what the encoding is specifying (e.g. "LSDA").
+  void emitEncodingByte(unsigned Val, const char *Desc = nullptr) const;
+
+  /// Return the size of the encoding in bytes.
+  unsigned GetSizeOfEncodedValue(unsigned Encoding) const;
+
+  /// Emit reference to a ttype global with a specified encoding.
+  virtual void emitTTypeReference(const GlobalValue *GV, unsigned Encoding);
+
+  /// Emit a reference to a symbol for use in dwarf. Different object formats
+  /// represent this in different ways. Some use a relocation others encode
+  /// the label offset in its section.
+  void emitDwarfSymbolReference(const MCSymbol *Label,
+                                bool ForceOffset = false) const;
+
+  /// Emit the 4- or 8-byte offset of a string from the start of its section.
+  ///
+  /// When possible, emit a DwarfStringPool section offset without any
+  /// relocations, and without using the symbol.  Otherwise, defers to \a
+  /// emitDwarfSymbolReference().
+  ///
+  /// The length of the emitted value depends on the DWARF format.
+  void emitDwarfStringOffset(DwarfStringPoolEntry S) const;
+
+  /// Emit the 4-or 8-byte offset of a string from the start of its section.
+  void emitDwarfStringOffset(DwarfStringPoolEntryRef S) const {
+    emitDwarfStringOffset(S.getEntry());
+  }
+
+  /// Emit something like ".long Label + Offset" or ".quad Label + Offset"
+  /// depending on the DWARF format.
+  void emitDwarfOffset(const MCSymbol *Label, uint64_t Offset) const;
+
+  /// Emit 32- or 64-bit value depending on the DWARF format.
+  void emitDwarfLengthOrOffset(uint64_t Value) const;
+
+  /// Emit a unit length field. The actual format, DWARF32 or DWARF64, is chosen
+  /// according to the settings.
+  void emitDwarfUnitLength(uint64_t Length, const Twine &Comment) const;
+
+  /// Emit a unit length field. The actual format, DWARF32 or DWARF64, is chosen
+  /// according to the settings.
+  /// Return the end symbol generated inside, the caller needs to emit it.
+  MCSymbol *emitDwarfUnitLength(const Twine &Prefix,
+                                const Twine &Comment) const;
+
+  /// Emit reference to a call site with a specified encoding
+  void emitCallSiteOffset(const MCSymbol *Hi, const MCSymbol *Lo,
+                          unsigned Encoding) const;
+  /// Emit an integer value corresponding to the call site encoding
+  void emitCallSiteValue(uint64_t Value, unsigned Encoding) const;
+
+  /// Get the value for DW_AT_APPLE_isa. Zero if no isa encoding specified.
+  virtual unsigned getISAEncoding() { return 0; }
+
+  /// Emit the directive and value for debug thread local expression
+  ///
+  /// \p Value - The value to emit.
+  /// \p Size - The size of the integer (in bytes) to emit.
+  virtual void emitDebugValue(const MCExpr *Value, unsigned Size) const;
+
+  //===------------------------------------------------------------------===//
+  // Dwarf Lowering Routines
+  //===------------------------------------------------------------------===//
+
+  /// Emit frame instruction to describe the layout of the frame.
+  void emitCFIInstruction(const MCCFIInstruction &Inst) const;
+
+  /// Emit Dwarf abbreviation table.
+  template <typename T> void emitDwarfAbbrevs(const T &Abbrevs) const {
+    // For each abbreviation.
+    for (const auto &Abbrev : Abbrevs)
+      emitDwarfAbbrev(*Abbrev);
+
+    // Mark end of abbreviations.
+    emitULEB128(0, "EOM(3)");
+  }
+
+  void emitDwarfAbbrev(const DIEAbbrev &Abbrev) const;
+
+  /// Recursively emit Dwarf DIE tree.
+  void emitDwarfDIE(const DIE &Die) const;
+
+  //===------------------------------------------------------------------===//
+  // Inline Asm Support
+  //===------------------------------------------------------------------===//
+
+  // These are hooks that targets can override to implement inline asm
+  // support.  These should probably be moved out of AsmPrinter someday.
+
+  /// Print information related to the specified machine instr that is
+  /// independent of the operand, and may be independent of the instr itself.
+  /// This can be useful for portably encoding the comment character or other
+  /// bits of target-specific knowledge into the asmstrings.  The syntax used is
+  /// ${:comment}.  Targets can override this to add support for their own
+  /// strange codes.
+  virtual void PrintSpecial(const MachineInstr *MI, raw_ostream &OS,
+                            StringRef Code) const;
+
+  /// Print the MachineOperand as a symbol. Targets with complex handling of
+  /// symbol references should override the base implementation.
+  virtual void PrintSymbolOperand(const MachineOperand &MO, raw_ostream &OS);
+
+  /// Print the specified operand of MI, an INLINEASM instruction, using the
+  /// specified assembler variant.  Targets should override this to format as
+  /// appropriate.  This method can return true if the operand is erroneous.
+  virtual bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+                               const char *ExtraCode, raw_ostream &OS);
+
+  /// Print the specified operand of MI, an INLINEASM instruction, using the
+  /// specified assembler variant as an address. Targets should override this to
+  /// format as appropriate.  This method can return true if the operand is
+  /// erroneous.
+  virtual bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
+                                     const char *ExtraCode, raw_ostream &OS);
+
+  /// Let the target do anything it needs to do before emitting inlineasm.
+  /// \p StartInfo - the subtarget info before parsing inline asm
+  virtual void emitInlineAsmStart() const;
+
+  /// Let the target do anything it needs to do after emitting inlineasm.
+  /// This callback can be used restore the original mode in case the
+  /// inlineasm contains directives to switch modes.
+  /// \p StartInfo - the original subtarget info before inline asm
+  /// \p EndInfo   - the final subtarget info after parsing the inline asm,
+  ///                or NULL if the value is unknown.
+  virtual void emitInlineAsmEnd(const MCSubtargetInfo &StartInfo,
+                                const MCSubtargetInfo *EndInfo) const;
+
+  /// This emits visibility information about symbol, if this is supported by
+  /// the target.
+  void emitVisibility(MCSymbol *Sym, unsigned Visibility,
+                      bool IsDefinition = true) const;
+
+  /// This emits linkage information about \p GVSym based on \p GV, if this is
+  /// supported by the target.
+  virtual void emitLinkage(const GlobalValue *GV, MCSymbol *GVSym) const;
+
+  /// Return the alignment for the specified \p GV.
+  static Align getGVAlignment(const GlobalObject *GV, const DataLayout &DL,
+                              Align InAlign = Align(1));
+
+private:
+  /// Private state for PrintSpecial()
+  // Assign a unique ID to this machine instruction.
+  mutable const MachineInstr *LastMI = nullptr;
+  mutable unsigned LastFn = 0;
+  mutable unsigned Counter = ~0U;
+
+  /// This method emits the header for the current function.
+  virtual void emitFunctionHeader();
+
+  /// This method emits a comment next to header for the current function.
+  virtual void emitFunctionHeaderComment();
+
+  /// Emit a blob of inline asm to the output streamer.
+  void
+  emitInlineAsm(StringRef Str, const MCSubtargetInfo &STI,
+                const MCTargetOptions &MCOptions,
+                const MDNode *LocMDNode = nullptr,
+                InlineAsm::AsmDialect AsmDialect = InlineAsm::AD_ATT) const;
+
+  /// This method formats and emits the specified machine instruction that is an
+  /// inline asm.
+  void emitInlineAsm(const MachineInstr *MI) const;
+
+  /// Add inline assembly info to the diagnostics machinery, so we can
+  /// emit file and position info. Returns SrcMgr memory buffer position.
+  unsigned addInlineAsmDiagBuffer(StringRef AsmStr,
+                                  const MDNode *LocMDNode) const;
+
+  //===------------------------------------------------------------------===//
+  // Internal Implementation Details
+  //===------------------------------------------------------------------===//
+
+  void emitJumpTableEntry(const MachineJumpTableInfo *MJTI,
+                          const MachineBasicBlock *MBB, unsigned uid) const;
+  void emitLLVMUsedList(const ConstantArray *InitList);
+  /// Emit llvm.ident metadata in an '.ident' directive.
+  void emitModuleIdents(Module &M);
+  /// Emit bytes for llvm.commandline metadata.
+  void emitModuleCommandLines(Module &M);
+
+  GCMetadataPrinter *GetOrCreateGCPrinter(GCStrategy &S);
+  void emitGlobalAlias(Module &M, const GlobalAlias &GA);
+  void emitGlobalIFunc(Module &M, const GlobalIFunc &GI);
+
+  /// This method decides whether the specified basic block requires a label.
+  bool shouldEmitLabelForBasicBlock(const MachineBasicBlock &MBB) const;
+
+protected:
+  virtual bool shouldEmitWeakSwiftAsyncExtendedFramePointerFlags() const {
+    return false;
+  }
+};
+
+} // end namespace llvm
+
+#endif // LLVM_CODEGEN_ASMPRINTER_H
--- a/suite/synctools/tablegen/include/llvm/CodeGen/AsmPrinterHandler.h
+++ b/suite/synctools/tablegen/include/llvm/CodeGen/AsmPrinterHandler.h
@ -0,0 +1,84 @@
+//===-- llvm/CodeGen/AsmPrinterHandler.h -----------------------*- C++ -*--===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a generic interface for AsmPrinter handlers,
+// like debug and EH info emitters.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_ASMPRINTERHANDLER_H
+#define LLVM_CODEGEN_ASMPRINTERHANDLER_H
+
+#include "llvm/Support/DataTypes.h"
+
+namespace llvm {
+
+class AsmPrinter;
+class MachineBasicBlock;
+class MachineFunction;
+class MachineInstr;
+class MCSymbol;
+class Module;
+
+typedef MCSymbol *ExceptionSymbolProvider(AsmPrinter *Asm,
+                                          const MachineBasicBlock *MBB);
+
+/// Collects and handles AsmPrinter objects required to build debug
+/// or EH information.
+class AsmPrinterHandler {
+public:
+  virtual ~AsmPrinterHandler();
+
+  /// For symbols that have a size designated (e.g. common symbols),
+  /// this tracks that size.
+  virtual void setSymbolSize(const MCSymbol *Sym, uint64_t Size) = 0;
+
+  virtual void beginModule(Module *M) {}
+
+  /// Emit all sections that should come after the content.
+  virtual void endModule() = 0;
+
+  /// Gather pre-function debug information.
+  /// Every beginFunction(MF) call should be followed by an endFunction(MF)
+  /// call.
+  virtual void beginFunction(const MachineFunction *MF) = 0;
+
+  // Emit any of function marker (like .cfi_endproc). This is called
+  // before endFunction and cannot switch sections.
+  virtual void markFunctionEnd();
+
+  /// Gather post-function debug information.
+  /// Please note that some AsmPrinter implementations may not call
+  /// beginFunction at all.
+  virtual void endFunction(const MachineFunction *MF) = 0;
+
+  virtual void beginFragment(const MachineBasicBlock *MBB,
+                             ExceptionSymbolProvider ESP) {}
+  virtual void endFragment() {}
+
+  /// Emit target-specific EH funclet machinery.
+  virtual void beginFunclet(const MachineBasicBlock &MBB,
+                            MCSymbol *Sym = nullptr) {}
+  virtual void endFunclet() {}
+
+  /// Process beginning of an instruction.
+  virtual void beginInstruction(const MachineInstr *MI) = 0;
+
+  /// Process end of an instruction.
+  virtual void endInstruction() = 0;
+
+  /// Process beginning of a basic block during basic block sections.
+  virtual void beginBasicBlock(const MachineBasicBlock &MBB) {}
+
+  /// Process end of a basic block during basic block sections.
+  virtual void endBasicBlock(const MachineBasicBlock &MBB) {}
+};
+
+} // End of namespace llvm
+
+#endif
--- a/suite/synctools/tablegen/include/llvm/CodeGen/AtomicExpandUtils.h
+++ b/suite/synctools/tablegen/include/llvm/CodeGen/AtomicExpandUtils.h
@ -0,0 +1,64 @@
+//===- AtomicExpandUtils.h - Utilities for expanding atomic instructions --===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_ATOMICEXPANDUTILS_H
+#define LLVM_CODEGEN_ATOMICEXPANDUTILS_H
+
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/Support/AtomicOrdering.h"
+
+namespace llvm {
+
+class AtomicRMWInst;
+class Value;
+
+/// Parameters (see the expansion example below):
+/// (the builder, %addr, %loaded, %new_val, ordering,
+///  /* OUT */ %success, /* OUT */ %new_loaded)
+using CreateCmpXchgInstFun =
+    function_ref<void(IRBuilder<> &, Value *, Value *, Value *, Align,
+                      AtomicOrdering, SyncScope::ID, Value *&, Value *&)>;
+
+/// Expand an atomic RMW instruction into a loop utilizing
+/// cmpxchg. You'll want to make sure your target machine likes cmpxchg
+/// instructions in the first place and that there isn't another, better,
+/// transformation available (for example AArch32/AArch64 have linked loads).
+///
+/// This is useful in passes which can't rewrite the more exotic RMW
+/// instructions directly into a platform specific intrinsics (because, say,
+/// those intrinsics don't exist). If such a pass is able to expand cmpxchg
+/// instructions directly however, then, with this function, it could avoid two
+/// extra module passes (avoiding passes by `-atomic-expand` and itself). A
+/// specific example would be PNaCl's `RewriteAtomics` pass.
+///
+/// Given: atomicrmw some_op iN* %addr, iN %incr ordering
+///
+/// The standard expansion we produce is:
+///     [...]
+///     %init_loaded = load atomic iN* %addr
+///     br label %loop
+/// loop:
+///     %loaded = phi iN [ %init_loaded, %entry ], [ %new_loaded, %loop ]
+///     %new = some_op iN %loaded, %incr
+/// ; This is what -atomic-expand will produce using this function on i686
+/// targets:
+///     %pair = cmpxchg iN* %addr, iN %loaded, iN %new_val
+///     %new_loaded = extractvalue { iN, i1 } %pair, 0
+///     %success = extractvalue { iN, i1 } %pair, 1
+/// ; End callback produced IR
+///     br i1 %success, label %atomicrmw.end, label %loop
+/// atomicrmw.end:
+///     [...]
+///
+/// Returns true if the containing function was modified.
+bool expandAtomicRMWToCmpXchg(AtomicRMWInst *AI, CreateCmpXchgInstFun CreateCmpXchg);
+
+} // end namespace llvm
+
+#endif // LLVM_CODEGEN_ATOMICEXPANDUTILS_H
--- a/suite/synctools/tablegen/include/llvm/CodeGen/BasicBlockSectionUtils.h
+++ b/suite/synctools/tablegen/include/llvm/CodeGen/BasicBlockSectionUtils.h
@ -0,0 +1,30 @@
+//===- BasicBlockSectionUtils.h - Utilities for basic block sections     --===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_BASICBLOCKSECTIONUTILS_H
+#define LLVM_CODEGEN_BASICBLOCKSECTIONUTILS_H
+
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/CommandLine.h"
+
+namespace llvm {
+
+extern cl::opt<std::string> BBSectionsColdTextPrefix;
+
+class MachineFunction;
+class MachineBasicBlock;
+
+using MachineBasicBlockComparator =
+    function_ref<bool(const MachineBasicBlock &, const MachineBasicBlock &)>;
+
+void sortBasicBlocksAndUpdateBranches(MachineFunction &MF,
+                                      MachineBasicBlockComparator MBBCmp);
+
+} // end namespace llvm
+
+#endif // LLVM_CODEGEN_BASICBLOCKSECTIONUTILS_H
--- a/suite/synctools/tablegen/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/suite/synctools/tablegen/include/llvm/CodeGen/BasicTTIImpl.h
--- a/suite/synctools/tablegen/include/llvm/CodeGen/CSEConfigBase.h
+++ b/suite/synctools/tablegen/include/llvm/CodeGen/CSEConfigBase.h
@ -0,0 +1,28 @@
+//===- CSEConfigBase.h - A CSEConfig interface ------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_CSECONFIGBASE_H
+#define LLVM_CODEGEN_CSECONFIGBASE_H
+
+namespace llvm {
+// Class representing some configuration that can be done during GlobalISel's
+// CSEInfo analysis. We define it here because TargetPassConfig can't depend on
+// the GlobalISel library, and so we use this in the interface between them
+// so that the derived classes in GISel can reference generic opcodes.
+class CSEConfigBase {
+public:
+  virtual ~CSEConfigBase() = default;
+  // Hook for defining which Generic instructions should be CSEd.
+  // GISelCSEInfo currently only calls this hook when dealing with generic
+  // opcodes.
+  virtual bool shouldCSEOpc(unsigned Opc) { return false; }
+};
+
+} // namespace llvm
+
+#endif // LLVM_CODEGEN_CSECONFIGBASE_H
--- a/suite/synctools/tablegen/include/llvm/CodeGen/CalcSpillWeights.h
+++ b/suite/synctools/tablegen/include/llvm/CodeGen/CalcSpillWeights.h
@ -0,0 +1,119 @@
+//===- lib/CodeGen/CalcSpillWeights.h ---------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_CALCSPILLWEIGHTS_H
+#define LLVM_CODEGEN_CALCSPILLWEIGHTS_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/CodeGen/SlotIndexes.h"
+
+namespace llvm {
+
+class LiveInterval;
+class LiveIntervals;
+class MachineBlockFrequencyInfo;
+class MachineFunction;
+class MachineLoopInfo;
+class VirtRegMap;
+
+  /// Normalize the spill weight of a live interval
+  ///
+  /// The spill weight of a live interval is computed as:
+  ///
+  ///   (sum(use freq) + sum(def freq)) / (K + size)
+  ///
+  /// @param UseDefFreq Expected number of executed use and def instructions
+  ///                   per function call. Derived from block frequencies.
+  /// @param Size       Size of live interval as returnexd by getSize()
+  /// @param NumInstr   Number of instructions using this live interval
+  static inline float normalizeSpillWeight(float UseDefFreq, unsigned Size,
+                                           unsigned NumInstr) {
+    // The constant 25 instructions is added to avoid depending too much on
+    // accidental SlotIndex gaps for small intervals. The effect is that small
+    // intervals have a spill weight that is mostly proportional to the number
+    // of uses, while large intervals get a spill weight that is closer to a use
+    // density.
+    return UseDefFreq / (Size + 25*SlotIndex::InstrDist);
+  }
+
+  /// Calculate auxiliary information for a virtual register such as its
+  /// spill weight and allocation hint.
+  class VirtRegAuxInfo {
+    MachineFunction &MF;
+    LiveIntervals &LIS;
+    const VirtRegMap &VRM;
+    const MachineLoopInfo &Loops;
+    const MachineBlockFrequencyInfo &MBFI;
+
+    /// Returns true if Reg of live interval LI is used in instruction with many
+    /// operands like STATEPOINT.
+    bool isLiveAtStatepointVarArg(LiveInterval &LI);
+
+  public:
+    VirtRegAuxInfo(MachineFunction &MF, LiveIntervals &LIS,
+                   const VirtRegMap &VRM, const MachineLoopInfo &Loops,
+                   const MachineBlockFrequencyInfo &MBFI)
+        : MF(MF), LIS(LIS), VRM(VRM), Loops(Loops), MBFI(MBFI) {}
+
+    virtual ~VirtRegAuxInfo() = default;
+
+    /// (re)compute li's spill weight and allocation hint.
+    void calculateSpillWeightAndHint(LiveInterval &LI);
+
+    /// Compute future expected spill weight of a split artifact of LI
+    /// that will span between start and end slot indexes.
+    /// \param LI     The live interval to be split.
+    /// \param Start  The expected beginning of the split artifact. Instructions
+    ///               before start will not affect the weight.
+    /// \param End    The expected end of the split artifact. Instructions
+    ///               after end will not affect the weight.
+    /// \return The expected spill weight of the split artifact. Returns
+    /// negative weight for unspillable LI.
+    float futureWeight(LiveInterval &LI, SlotIndex Start, SlotIndex End);
+
+    /// Compute spill weights and allocation hints for all virtual register
+    /// live intervals.
+    void calculateSpillWeightsAndHints();
+
+    /// Return the preferred allocation register for reg, given a COPY
+    /// instruction.
+    static Register copyHint(const MachineInstr *MI, unsigned Reg,
+                             const TargetRegisterInfo &TRI,
+                             const MachineRegisterInfo &MRI);
+
+    /// Determine if all values in LI are rematerializable.
+    static bool isRematerializable(const LiveInterval &LI,
+                                   const LiveIntervals &LIS,
+                                   const VirtRegMap &VRM,
+                                   const TargetInstrInfo &TII);
+
+  protected:
+    /// Helper function for weight calculations.
+    /// (Re)compute LI's spill weight and allocation hint, or, for non null
+    /// start and end - compute future expected spill weight of a split
+    /// artifact of LI that will span between start and end slot indexes.
+    /// \param LI     The live interval for which to compute the weight.
+    /// \param Start  The expected beginning of the split artifact. Instructions
+    ///               before start will not affect the weight. Relevant for
+    ///               weight calculation of future split artifact.
+    /// \param End    The expected end of the split artifact. Instructions
+    ///               after end will not affect the weight. Relevant for
+    ///               weight calculation of future split artifact.
+    /// \return The spill weight. Returns negative weight for unspillable LI.
+    float weightCalcHelper(LiveInterval &LI, SlotIndex *Start = nullptr,
+                           SlotIndex *End = nullptr);
+
+    /// Weight normalization function.
+    virtual float normalize(float UseDefFreq, unsigned Size,
+                            unsigned NumInstr) {
+      return normalizeSpillWeight(UseDefFreq, Size, NumInstr);
+    }
+  };
+} // end namespace llvm
+
+#endif // LLVM_CODEGEN_CALCSPILLWEIGHTS_H
--- a/suite/synctools/tablegen/include/llvm/CodeGen/CallingConvLower.h
+++ b/suite/synctools/tablegen/include/llvm/CodeGen/CallingConvLower.h
@ -0,0 +1,568 @@
+//===- llvm/CallingConvLower.h - Calling Conventions ------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the CCState and CCValAssign classes, used for lowering
+// and implementing calling conventions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_CALLINGCONVLOWER_H
+#define LLVM_CODEGEN_CALLINGCONVLOWER_H
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/Register.h"
+#include "llvm/CodeGen/TargetCallingConv.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Support/Alignment.h"
+
+namespace llvm {
+
+class CCState;
+class MachineFunction;
+class MVT;
+class TargetRegisterInfo;
+
+/// CCValAssign - Represent assignment of one arg/retval to a location.
+class CCValAssign {
+public:
+  enum LocInfo {
+    Full,      // The value fills the full location.
+    SExt,      // The value is sign extended in the location.
+    ZExt,      // The value is zero extended in the location.
+    AExt,      // The value is extended with undefined upper bits.
+    SExtUpper, // The value is in the upper bits of the location and should be
+               // sign extended when retrieved.
+    ZExtUpper, // The value is in the upper bits of the location and should be
+               // zero extended when retrieved.
+    AExtUpper, // The value is in the upper bits of the location and should be
+               // extended with undefined upper bits when retrieved.
+    BCvt,      // The value is bit-converted in the location.
+    Trunc,     // The value is truncated in the location.
+    VExt,      // The value is vector-widened in the location.
+               // FIXME: Not implemented yet. Code that uses AExt to mean
+               // vector-widen should be fixed to use VExt instead.
+    FPExt,     // The floating-point value is fp-extended in the location.
+    Indirect   // The location contains pointer to the value.
+    // TODO: a subset of the value is in the location.
+  };
+
+private:
+  /// ValNo - This is the value number being assigned (e.g. an argument number).
+  unsigned ValNo;
+
+  /// Loc is either a stack offset or a register number.
+  unsigned Loc;
+
+  /// isMem - True if this is a memory loc, false if it is a register loc.
+  unsigned isMem : 1;
+
+  /// isCustom - True if this arg/retval requires special handling.
+  unsigned isCustom : 1;
+
+  /// Information about how the value is assigned.
+  LocInfo HTP : 6;
+
+  /// ValVT - The type of the value being assigned.
+  MVT ValVT;
+
+  /// LocVT - The type of the location being assigned to.
+  MVT LocVT;
+public:
+
+  static CCValAssign getReg(unsigned ValNo, MVT ValVT,
+                            unsigned RegNo, MVT LocVT,
+                            LocInfo HTP) {
+    CCValAssign Ret;
+    Ret.ValNo = ValNo;
+    Ret.Loc = RegNo;
+    Ret.isMem = false;
+    Ret.isCustom = false;
+    Ret.HTP = HTP;
+    Ret.ValVT = ValVT;
+    Ret.LocVT = LocVT;
+    return Ret;
+  }
+
+  static CCValAssign getCustomReg(unsigned ValNo, MVT ValVT,
+                                  unsigned RegNo, MVT LocVT,
+                                  LocInfo HTP) {
+    CCValAssign Ret;
+    Ret = getReg(ValNo, ValVT, RegNo, LocVT, HTP);
+    Ret.isCustom = true;
+    return Ret;
+  }
+
+  static CCValAssign getMem(unsigned ValNo, MVT ValVT,
+                            unsigned Offset, MVT LocVT,
+                            LocInfo HTP) {
+    CCValAssign Ret;
+    Ret.ValNo = ValNo;
+    Ret.Loc = Offset;
+    Ret.isMem = true;
+    Ret.isCustom = false;
+    Ret.HTP = HTP;
+    Ret.ValVT = ValVT;
+    Ret.LocVT = LocVT;
+    return Ret;
+  }
+
+  static CCValAssign getCustomMem(unsigned ValNo, MVT ValVT,
+                                  unsigned Offset, MVT LocVT,
+                                  LocInfo HTP) {
+    CCValAssign Ret;
+    Ret = getMem(ValNo, ValVT, Offset, LocVT, HTP);
+    Ret.isCustom = true;
+    return Ret;
+  }
+
+  // There is no need to differentiate between a pending CCValAssign and other
+  // kinds, as they are stored in a different list.
+  static CCValAssign getPending(unsigned ValNo, MVT ValVT, MVT LocVT,
+                                LocInfo HTP, unsigned ExtraInfo = 0) {
+    return getReg(ValNo, ValVT, ExtraInfo, LocVT, HTP);
+  }
+
+  void convertToReg(unsigned RegNo) {
+    Loc = RegNo;
+    isMem = false;
+  }
+
+  void convertToMem(unsigned Offset) {
+    Loc = Offset;
+    isMem = true;
+  }
+
+  unsigned getValNo() const { return ValNo; }
+  MVT getValVT() const { return ValVT; }
+
+  bool isRegLoc() const { return !isMem; }
+  bool isMemLoc() const { return isMem; }
+
+  bool needsCustom() const { return isCustom; }
+
+  Register getLocReg() const { assert(isRegLoc()); return Loc; }
+  unsigned getLocMemOffset() const { assert(isMemLoc()); return Loc; }
+  unsigned getExtraInfo() const { return Loc; }
+  MVT getLocVT() const { return LocVT; }
+
+  LocInfo getLocInfo() const { return HTP; }
+  bool isExtInLoc() const {
+    return (HTP == AExt || HTP == SExt || HTP == ZExt);
+  }
+
+  bool isUpperBitsInLoc() const {
+    return HTP == AExtUpper || HTP == SExtUpper || HTP == ZExtUpper;
+  }
+};
+
+/// Describes a register that needs to be forwarded from the prologue to a
+/// musttail call.
+struct ForwardedRegister {
+  ForwardedRegister(Register VReg, MCPhysReg PReg, MVT VT)
+      : VReg(VReg), PReg(PReg), VT(VT) {}
+  Register VReg;
+  MCPhysReg PReg;
+  MVT VT;
+};
+
+/// CCAssignFn - This function assigns a location for Val, updating State to
+/// reflect the change.  It returns 'true' if it failed to handle Val.
+typedef bool CCAssignFn(unsigned ValNo, MVT ValVT,
+                        MVT LocVT, CCValAssign::LocInfo LocInfo,
+                        ISD::ArgFlagsTy ArgFlags, CCState &State);
+
+/// CCCustomFn - This function assigns a location for Val, possibly updating
+/// all args to reflect changes and indicates if it handled it. It must set
+/// isCustom if it handles the arg and returns true.
+typedef bool CCCustomFn(unsigned &ValNo, MVT &ValVT,
+                        MVT &LocVT, CCValAssign::LocInfo &LocInfo,
+                        ISD::ArgFlagsTy &ArgFlags, CCState &State);
+
+/// CCState - This class holds information needed while lowering arguments and
+/// return values.  It captures which registers are already assigned and which
+/// stack slots are used.  It provides accessors to allocate these values.
+class CCState {
+private:
+  CallingConv::ID CallingConv;
+  bool IsVarArg;
+  bool AnalyzingMustTailForwardedRegs = false;
+  MachineFunction &MF;
+  const TargetRegisterInfo &TRI;
+  SmallVectorImpl<CCValAssign> &Locs;
+  LLVMContext &Context;
+
+  unsigned StackOffset;
+  Align MaxStackArgAlign;
+  SmallVector<uint32_t, 16> UsedRegs;
+  SmallVector<CCValAssign, 4> PendingLocs;
+  SmallVector<ISD::ArgFlagsTy, 4> PendingArgFlags;
+
+  // ByValInfo and SmallVector<ByValInfo, 4> ByValRegs:
+  //
+  // Vector of ByValInfo instances (ByValRegs) is introduced for byval registers
+  // tracking.
+  // Or, in another words it tracks byval parameters that are stored in
+  // general purpose registers.
+  //
+  // For 4 byte stack alignment,
+  // instance index means byval parameter number in formal
+  // arguments set. Assume, we have some "struct_type" with size = 4 bytes,
+  // then, for function "foo":
+  //
+  // i32 foo(i32 %p, %struct_type* %r, i32 %s, %struct_type* %t)
+  //
+  // ByValRegs[0] describes how "%r" is stored (Begin == r1, End == r2)
+  // ByValRegs[1] describes how "%t" is stored (Begin == r3, End == r4).
+  //
+  // In case of 8 bytes stack alignment,
+  // In function shown above, r3 would be wasted according to AAPCS rules.
+  // ByValRegs vector size still would be 2,
+  // while "%t" goes to the stack: it wouldn't be described in ByValRegs.
+  //
+  // Supposed use-case for this collection:
+  // 1. Initially ByValRegs is empty, InRegsParamsProcessed is 0.
+  // 2. HandleByVal fills up ByValRegs.
+  // 3. Argument analysis (LowerFormatArguments, for example). After
+  // some byval argument was analyzed, InRegsParamsProcessed is increased.
+  struct ByValInfo {
+    ByValInfo(unsigned B, unsigned E) : Begin(B), End(E) {}
+
+    // First register allocated for current parameter.
+    unsigned Begin;
+
+    // First after last register allocated for current parameter.
+    unsigned End;
+  };
+  SmallVector<ByValInfo, 4 > ByValRegs;
+
+  // InRegsParamsProcessed - shows how many instances of ByValRegs was proceed
+  // during argument analysis.
+  unsigned InRegsParamsProcessed;
+
+public:
+  CCState(CallingConv::ID CC, bool isVarArg, MachineFunction &MF,
+          SmallVectorImpl<CCValAssign> &locs, LLVMContext &C);
+
+  void addLoc(const CCValAssign &V) {
+    Locs.push_back(V);
+  }
+
+  LLVMContext &getContext() const { return Context; }
+  MachineFunction &getMachineFunction() const { return MF; }
+  CallingConv::ID getCallingConv() const { return CallingConv; }
+  bool isVarArg() const { return IsVarArg; }
+
+  /// getNextStackOffset - Return the next stack offset such that all stack
+  /// slots satisfy their alignment requirements.
+  unsigned getNextStackOffset() const {
+    return StackOffset;
+  }
+
+  /// getAlignedCallFrameSize - Return the size of the call frame needed to
+  /// be able to store all arguments and such that the alignment requirement
+  /// of each of the arguments is satisfied.
+  unsigned getAlignedCallFrameSize() const {
+    return alignTo(StackOffset, MaxStackArgAlign);
+  }
+
+  /// isAllocated - Return true if the specified register (or an alias) is
+  /// allocated.
+  bool isAllocated(MCRegister Reg) const {
+    return UsedRegs[Reg / 32] & (1 << (Reg & 31));
+  }
+
+  /// AnalyzeFormalArguments - Analyze an array of argument values,
+  /// incorporating info about the formals into this state.
+  void AnalyzeFormalArguments(const SmallVectorImpl<ISD::InputArg> &Ins,
+                              CCAssignFn Fn);
+
+  /// The function will invoke AnalyzeFormalArguments.
+  void AnalyzeArguments(const SmallVectorImpl<ISD::InputArg> &Ins,
+                        CCAssignFn Fn) {
+    AnalyzeFormalArguments(Ins, Fn);
+  }
+
+  /// AnalyzeReturn - Analyze the returned values of a return,
+  /// incorporating info about the result values into this state.
+  void AnalyzeReturn(const SmallVectorImpl<ISD::OutputArg> &Outs,
+                     CCAssignFn Fn);
+
+  /// CheckReturn - Analyze the return values of a function, returning
+  /// true if the return can be performed without sret-demotion, and
+  /// false otherwise.
+  bool CheckReturn(const SmallVectorImpl<ISD::OutputArg> &Outs,
+                   CCAssignFn Fn);
+
+  /// AnalyzeCallOperands - Analyze the outgoing arguments to a call,
+  /// incorporating info about the passed values into this state.
+  void AnalyzeCallOperands(const SmallVectorImpl<ISD::OutputArg> &Outs,
+                           CCAssignFn Fn);
+
+  /// AnalyzeCallOperands - Same as above except it takes vectors of types
+  /// and argument flags.
+  void AnalyzeCallOperands(SmallVectorImpl<MVT> &ArgVTs,
+                           SmallVectorImpl<ISD::ArgFlagsTy> &Flags,
+                           CCAssignFn Fn);
+
+  /// The function will invoke AnalyzeCallOperands.
+  void AnalyzeArguments(const SmallVectorImpl<ISD::OutputArg> &Outs,
+                        CCAssignFn Fn) {
+    AnalyzeCallOperands(Outs, Fn);
+  }
+
+  /// AnalyzeCallResult - Analyze the return values of a call,
+  /// incorporating info about the passed values into this state.
+  void AnalyzeCallResult(const SmallVectorImpl<ISD::InputArg> &Ins,
+                         CCAssignFn Fn);
+
+  /// A shadow allocated register is a register that was allocated
+  /// but wasn't added to the location list (Locs).
+  /// \returns true if the register was allocated as shadow or false otherwise.
+  bool IsShadowAllocatedReg(MCRegister Reg) const;
+
+  /// AnalyzeCallResult - Same as above except it's specialized for calls which
+  /// produce a single value.
+  void AnalyzeCallResult(MVT VT, CCAssignFn Fn);
+
+  /// getFirstUnallocated - Return the index of the first unallocated register
+  /// in the set, or Regs.size() if they are all allocated.
+  unsigned getFirstUnallocated(ArrayRef<MCPhysReg> Regs) const {
+    for (unsigned i = 0; i < Regs.size(); ++i)
+      if (!isAllocated(Regs[i]))
+        return i;
+    return Regs.size();
+  }
+
+  void DeallocateReg(MCPhysReg Reg) {
+    assert(isAllocated(Reg) && "Trying to deallocate an unallocated register");
+    MarkUnallocated(Reg);
+  }
+
+  /// AllocateReg - Attempt to allocate one register.  If it is not available,
+  /// return zero.  Otherwise, return the register, marking it and any aliases
+  /// as allocated.
+  MCRegister AllocateReg(MCPhysReg Reg) {
+    if (isAllocated(Reg))
+      return MCRegister();
+    MarkAllocated(Reg);
+    return Reg;
+  }
+
+  /// Version of AllocateReg with extra register to be shadowed.
+  MCRegister AllocateReg(MCPhysReg Reg, MCPhysReg ShadowReg) {
+    if (isAllocated(Reg))
+      return MCRegister();
+    MarkAllocated(Reg);
+    MarkAllocated(ShadowReg);
+    return Reg;
+  }
+
+  /// AllocateReg - Attempt to allocate one of the specified registers.  If none
+  /// are available, return zero.  Otherwise, return the first one available,
+  /// marking it and any aliases as allocated.
+  MCPhysReg AllocateReg(ArrayRef<MCPhysReg> Regs) {
+    unsigned FirstUnalloc = getFirstUnallocated(Regs);
+    if (FirstUnalloc == Regs.size())
+      return MCRegister();    // Didn't find the reg.
+
+    // Mark the register and any aliases as allocated.
+    MCPhysReg Reg = Regs[FirstUnalloc];
+    MarkAllocated(Reg);
+    return Reg;
+  }
+
+  /// AllocateRegBlock - Attempt to allocate a block of RegsRequired consecutive
+  /// registers. If this is not possible, return zero. Otherwise, return the first
+  /// register of the block that were allocated, marking the entire block as allocated.
+  MCPhysReg AllocateRegBlock(ArrayRef<MCPhysReg> Regs, unsigned RegsRequired) {
+    if (RegsRequired > Regs.size())
+      return 0;
+
+    for (unsigned StartIdx = 0; StartIdx <= Regs.size() - RegsRequired;
+         ++StartIdx) {
+      bool BlockAvailable = true;
+      // Check for already-allocated regs in this block
+      for (unsigned BlockIdx = 0; BlockIdx < RegsRequired; ++BlockIdx) {
+        if (isAllocated(Regs[StartIdx + BlockIdx])) {
+          BlockAvailable = false;
+          break;
+        }
+      }
+      if (BlockAvailable) {
+        // Mark the entire block as allocated
+        for (unsigned BlockIdx = 0; BlockIdx < RegsRequired; ++BlockIdx) {
+          MarkAllocated(Regs[StartIdx + BlockIdx]);
+        }
+        return Regs[StartIdx];
+      }
+    }
+    // No block was available
+    return 0;
+  }
+
+  /// Version of AllocateReg with list of registers to be shadowed.
+  MCRegister AllocateReg(ArrayRef<MCPhysReg> Regs, const MCPhysReg *ShadowRegs) {
+    unsigned FirstUnalloc = getFirstUnallocated(Regs);
+    if (FirstUnalloc == Regs.size())
+      return MCRegister();    // Didn't find the reg.
+
+    // Mark the register and any aliases as allocated.
+    MCRegister Reg = Regs[FirstUnalloc], ShadowReg = ShadowRegs[FirstUnalloc];
+    MarkAllocated(Reg);
+    MarkAllocated(ShadowReg);
+    return Reg;
+  }
+
+  /// AllocateStack - Allocate a chunk of stack space with the specified size
+  /// and alignment.
+  unsigned AllocateStack(unsigned Size, Align Alignment) {
+    StackOffset = alignTo(StackOffset, Alignment);
+    unsigned Result = StackOffset;
+    StackOffset += Size;
+    MaxStackArgAlign = std::max(Alignment, MaxStackArgAlign);
+    ensureMaxAlignment(Alignment);
+    return Result;
+  }
+
+  void ensureMaxAlignment(Align Alignment);
+
+  /// Version of AllocateStack with list of extra registers to be shadowed.
+  /// Note that, unlike AllocateReg, this shadows ALL of the shadow registers.
+  unsigned AllocateStack(unsigned Size, Align Alignment,
+                         ArrayRef<MCPhysReg> ShadowRegs) {
+    for (unsigned i = 0; i < ShadowRegs.size(); ++i)
+      MarkAllocated(ShadowRegs[i]);
+    return AllocateStack(Size, Alignment);
+  }
+
+  // HandleByVal - Allocate a stack slot large enough to pass an argument by
+  // value. The size and alignment information of the argument is encoded in its
+  // parameter attribute.
+  void HandleByVal(unsigned ValNo, MVT ValVT, MVT LocVT,
+                   CCValAssign::LocInfo LocInfo, int MinSize, Align MinAlign,
+                   ISD::ArgFlagsTy ArgFlags);
+
+  // Returns count of byval arguments that are to be stored (even partly)
+  // in registers.
+  unsigned getInRegsParamsCount() const { return ByValRegs.size(); }
+
+  // Returns count of byval in-regs arguments processed.
+  unsigned getInRegsParamsProcessed() const { return InRegsParamsProcessed; }
+
+  // Get information about N-th byval parameter that is stored in registers.
+  // Here "ByValParamIndex" is N.
+  void getInRegsParamInfo(unsigned InRegsParamRecordIndex,
+                          unsigned& BeginReg, unsigned& EndReg) const {
+    assert(InRegsParamRecordIndex < ByValRegs.size() &&
+           "Wrong ByVal parameter index");
+
+    const ByValInfo& info = ByValRegs[InRegsParamRecordIndex];
+    BeginReg = info.Begin;
+    EndReg = info.End;
+  }
+
+  // Add information about parameter that is kept in registers.
+  void addInRegsParamInfo(unsigned RegBegin, unsigned RegEnd) {
+    ByValRegs.push_back(ByValInfo(RegBegin, RegEnd));
+  }
+
+  // Goes either to next byval parameter (excluding "waste" record), or
+  // to the end of collection.
+  // Returns false, if end is reached.
+  bool nextInRegsParam() {
+    unsigned e = ByValRegs.size();
+    if (InRegsParamsProcessed < e)
+      ++InRegsParamsProcessed;
+    return InRegsParamsProcessed < e;
+  }
+
+  // Clear byval registers tracking info.
+  void clearByValRegsInfo() {
+    InRegsParamsProcessed = 0;
+    ByValRegs.clear();
+  }
+
+  // Rewind byval registers tracking info.
+  void rewindByValRegsInfo() {
+    InRegsParamsProcessed = 0;
+  }
+
+  // Get list of pending assignments
+  SmallVectorImpl<CCValAssign> &getPendingLocs() {
+    return PendingLocs;
+  }
+
+  // Get a list of argflags for pending assignments.
+  SmallVectorImpl<ISD::ArgFlagsTy> &getPendingArgFlags() {
+    return PendingArgFlags;
+  }
+
+  /// Compute the remaining unused register parameters that would be used for
+  /// the given value type. This is useful when varargs are passed in the
+  /// registers that normal prototyped parameters would be passed in, or for
+  /// implementing perfect forwarding.
+  void getRemainingRegParmsForType(SmallVectorImpl<MCPhysReg> &Regs, MVT VT,
+                                   CCAssignFn Fn);
+
+  /// Compute the set of registers that need to be preserved and forwarded to
+  /// any musttail calls.
+  void analyzeMustTailForwardedRegisters(
+      SmallVectorImpl<ForwardedRegister> &Forwards, ArrayRef<MVT> RegParmTypes,
+      CCAssignFn Fn);
+
+  /// Returns true if the results of the two calling conventions are compatible.
+  /// This is usually part of the check for tailcall eligibility.
+  static bool resultsCompatible(CallingConv::ID CalleeCC,
+                                CallingConv::ID CallerCC, MachineFunction &MF,
+                                LLVMContext &C,
+                                const SmallVectorImpl<ISD::InputArg> &Ins,
+                                CCAssignFn CalleeFn, CCAssignFn CallerFn);
+
+  /// The function runs an additional analysis pass over function arguments.
+  /// It will mark each argument with the attribute flag SecArgPass.
+  /// After running, it will sort the locs list.
+  template <class T>
+  void AnalyzeArgumentsSecondPass(const SmallVectorImpl<T> &Args,
+                                  CCAssignFn Fn) {
+    unsigned NumFirstPassLocs = Locs.size();
+
+    /// Creates similar argument list to \p Args in which each argument is
+    /// marked using SecArgPass flag.
+    SmallVector<T, 16> SecPassArg;
+    // SmallVector<ISD::InputArg, 16> SecPassArg;
+    for (auto Arg : Args) {
+      Arg.Flags.setSecArgPass();
+      SecPassArg.push_back(Arg);
+    }
+
+    // Run the second argument pass
+    AnalyzeArguments(SecPassArg, Fn);
+
+    // Sort the locations of the arguments according to their original position.
+    SmallVector<CCValAssign, 16> TmpArgLocs;
+    TmpArgLocs.swap(Locs);
+    auto B = TmpArgLocs.begin(), E = TmpArgLocs.end();
+    std::merge(B, B + NumFirstPassLocs, B + NumFirstPassLocs, E,
+               std::back_inserter(Locs),
+               [](const CCValAssign &A, const CCValAssign &B) -> bool {
+                 return A.getValNo() < B.getValNo();
+               });
+  }
+
+private:
+  /// MarkAllocated - Mark a register and all of its aliases as allocated.
+  void MarkAllocated(MCPhysReg Reg);
+
+  void MarkUnallocated(MCPhysReg Reg);
+};
+
+} // end namespace llvm
+
+#endif // LLVM_CODEGEN_CALLINGCONVLOWER_H
--- a/suite/synctools/tablegen/include/llvm/CodeGen/CodeGenCommonISel.h
+++ b/suite/synctools/tablegen/include/llvm/CodeGen/CodeGenCommonISel.h
@ -0,0 +1,219 @@
+//===- CodeGenCommonISel.h - Common code between ISels ---------*- C++ -*--===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares common utilities that are shared between SelectionDAG and
+// GlobalISel frameworks.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_CODEGENCOMMONISEL_H
+#define LLVM_CODEGEN_CODEGENCOMMONISEL_H
+
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include <cassert>
+namespace llvm {
+
+class BasicBlock;
+class MachineBasicBlock;
+/// Encapsulates all of the information needed to generate a stack protector
+/// check, and signals to isel when initialized that one needs to be generated.
+///
+/// *NOTE* The following is a high level documentation of SelectionDAG Stack
+/// Protector Generation. This is now also ported be shared with GlobalISel,
+/// but without any significant changes.
+///
+/// High Level Overview of ISel Stack Protector Generation:
+///
+/// Previously, the "stack protector" IR pass handled stack protector
+/// generation. This necessitated splitting basic blocks at the IR level to
+/// create the success/failure basic blocks in the tail of the basic block in
+/// question. As a result of this, calls that would have qualified for the
+/// sibling call optimization were no longer eligible for optimization since
+/// said calls were no longer right in the "tail position" (i.e. the immediate
+/// predecessor of a ReturnInst instruction).
+///
+/// Since the sibling call optimization causes the callee to reuse the caller's
+/// stack, if we could delay the generation of the stack protector check until
+/// later in CodeGen after the sibling call decision was made, we get both the
+/// tail call optimization and the stack protector check!
+///
+/// A few goals in solving this problem were:
+///
+///   1. Preserve the architecture independence of stack protector generation.
+///
+///   2. Preserve the normal IR level stack protector check for platforms like
+///      OpenBSD for which we support platform-specific stack protector
+///      generation.
+///
+/// The main problem that guided the present solution is that one can not
+/// solve this problem in an architecture independent manner at the IR level
+/// only. This is because:
+///
+///   1. The decision on whether or not to perform a sibling call on certain
+///      platforms (for instance i386) requires lower level information
+///      related to available registers that can not be known at the IR level.
+///
+///   2. Even if the previous point were not true, the decision on whether to
+///      perform a tail call is done in LowerCallTo in SelectionDAG (or
+///      CallLowering in GlobalISel) which occurs after the Stack Protector
+///      Pass. As a result, one would need to put the relevant callinst into the
+///      stack protector check success basic block (where the return inst is
+///      placed) and then move it back later at ISel/MI time before the
+///      stack protector check if the tail call optimization failed. The MI
+///      level option was nixed immediately since it would require
+///      platform-specific pattern matching. The ISel level option was
+///      nixed because SelectionDAG only processes one IR level basic block at a
+///      time implying one could not create a DAG Combine to move the callinst.
+///
+/// To get around this problem:
+///
+///   1. SelectionDAG can only process one block at a time, we can generate
+///      multiple machine basic blocks for one IR level basic block.
+///      This is how we handle bit tests and switches.
+///
+///   2. At the MI level, tail calls are represented via a special return
+///      MIInst called "tcreturn". Thus if we know the basic block in which we
+///      wish to insert the stack protector check, we get the correct behavior
+///      by always inserting the stack protector check right before the return
+///      statement. This is a "magical transformation" since no matter where
+///      the stack protector check intrinsic is, we always insert the stack
+///      protector check code at the end of the BB.
+///
+/// Given the aforementioned constraints, the following solution was devised:
+///
+///   1. On platforms that do not support ISel stack protector check
+///      generation, allow for the normal IR level stack protector check
+///      generation to continue.
+///
+///   2. On platforms that do support ISel stack protector check
+///      generation:
+///
+///     a. Use the IR level stack protector pass to decide if a stack
+///        protector is required/which BB we insert the stack protector check
+///        in by reusing the logic already therein.
+///
+///     b. After we finish selecting the basic block, we produce the validation
+///        code with one of these techniques:
+///          1) with a call to a guard check function
+///          2) with inlined instrumentation
+///
+///        1) We insert a call to the check function before the terminator.
+///
+///        2) We first find a splice point in the parent basic block
+///        before the terminator and then splice the terminator of said basic
+///        block into the success basic block. Then we code-gen a new tail for
+///        the parent basic block consisting of the two loads, the comparison,
+///        and finally two branches to the success/failure basic blocks. We
+///        conclude by code-gening the failure basic block if we have not
+///        code-gened it already (all stack protector checks we generate in
+///        the same function, use the same failure basic block).
+class StackProtectorDescriptor {
+public:
+  StackProtectorDescriptor() = default;
+
+  /// Returns true if all fields of the stack protector descriptor are
+  /// initialized implying that we should/are ready to emit a stack protector.
+  bool shouldEmitStackProtector() const {
+    return ParentMBB && SuccessMBB && FailureMBB;
+  }
+
+  bool shouldEmitFunctionBasedCheckStackProtector() const {
+    return ParentMBB && !SuccessMBB && !FailureMBB;
+  }
+
+  /// Initialize the stack protector descriptor structure for a new basic
+  /// block.
+  void initialize(const BasicBlock *BB, MachineBasicBlock *MBB,
+                  bool FunctionBasedInstrumentation) {
+    // Make sure we are not initialized yet.
+    assert(!shouldEmitStackProtector() && "Stack Protector Descriptor is "
+                                          "already initialized!");
+    ParentMBB = MBB;
+    if (!FunctionBasedInstrumentation) {
+      SuccessMBB = addSuccessorMBB(BB, MBB, /* IsLikely */ true);
+      FailureMBB = addSuccessorMBB(BB, MBB, /* IsLikely */ false, FailureMBB);
+    }
+  }
+
+  /// Reset state that changes when we handle different basic blocks.
+  ///
+  /// This currently includes:
+  ///
+  /// 1. The specific basic block we are generating a
+  /// stack protector for (ParentMBB).
+  ///
+  /// 2. The successor machine basic block that will contain the tail of
+  /// parent mbb after we create the stack protector check (SuccessMBB). This
+  /// BB is visited only on stack protector check success.
+  void resetPerBBState() {
+    ParentMBB = nullptr;
+    SuccessMBB = nullptr;
+  }
+
+  /// Reset state that only changes when we switch functions.
+  ///
+  /// This currently includes:
+  ///
+  /// 1. FailureMBB since we reuse the failure code path for all stack
+  /// protector checks created in an individual function.
+  ///
+  /// 2.The guard variable since the guard variable we are checking against is
+  /// always the same.
+  void resetPerFunctionState() { FailureMBB = nullptr; }
+
+  MachineBasicBlock *getParentMBB() { return ParentMBB; }
+  MachineBasicBlock *getSuccessMBB() { return SuccessMBB; }
+  MachineBasicBlock *getFailureMBB() { return FailureMBB; }
+
+private:
+  /// The basic block for which we are generating the stack protector.
+  ///
+  /// As a result of stack protector generation, we will splice the
+  /// terminators of this basic block into the successor mbb SuccessMBB and
+  /// replace it with a compare/branch to the successor mbbs
+  /// SuccessMBB/FailureMBB depending on whether or not the stack protector
+  /// was violated.
+  MachineBasicBlock *ParentMBB = nullptr;
+
+  /// A basic block visited on stack protector check success that contains the
+  /// terminators of ParentMBB.
+  MachineBasicBlock *SuccessMBB = nullptr;
+
+  /// This basic block visited on stack protector check failure that will
+  /// contain a call to __stack_chk_fail().
+  MachineBasicBlock *FailureMBB = nullptr;
+
+  /// Add a successor machine basic block to ParentMBB. If the successor mbb
+  /// has not been created yet (i.e. if SuccMBB = 0), then the machine basic
+  /// block will be created. Assign a large weight if IsLikely is true.
+  MachineBasicBlock *addSuccessorMBB(const BasicBlock *BB,
+                                     MachineBasicBlock *ParentMBB,
+                                     bool IsLikely,
+                                     MachineBasicBlock *SuccMBB = nullptr);
+};
+
+/// Find the split point at which to splice the end of BB into its success stack
+/// protector check machine basic block.
+///
+/// On many platforms, due to ABI constraints, terminators, even before register
+/// allocation, use physical registers. This creates an issue for us since
+/// physical registers at this point can not travel across basic
+/// blocks. Luckily, selectiondag always moves physical registers into vregs
+/// when they enter functions and moves them through a sequence of copies back
+/// into the physical registers right before the terminator creating a
+/// ``Terminator Sequence''. This function is searching for the beginning of the
+/// terminator sequence so that we can ensure that we splice off not just the
+/// terminator, but additionally the copies that move the vregs into the
+/// physical registers.
+MachineBasicBlock::iterator
+findSplitPointForStackProtector(MachineBasicBlock *BB,
+                                const TargetInstrInfo &TII);
+
+} // namespace llvm
+
+#endif // LLVM_CODEGEN_CODEGENCOMMONISEL_H
--- a/suite/synctools/tablegen/include/llvm/CodeGen/CodeGenPassBuilder.h
+++ b/suite/synctools/tablegen/include/llvm/CodeGen/CodeGenPassBuilder.h
--- a/suite/synctools/tablegen/include/llvm/CodeGen/CommandFlags.h
+++ b/suite/synctools/tablegen/include/llvm/CodeGen/CommandFlags.h
@ -0,0 +1,181 @@
+//===-- CommandFlags.h - Command Line Flags Interface -----------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains codegen-specific flags that are shared between different
+// command line tools. The tools "llc" and "opt" both use this file to prevent
+// flag duplication.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_COMMANDFLAGS_H
+#define LLVM_CODEGEN_COMMANDFLAGS_H
+
+#include "llvm/ADT/FloatingPointMode.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/MC/MCTargetOptionsCommandFlags.h"
+#include "llvm/Support/CodeGen.h"
+#include "llvm/Target/TargetOptions.h"
+#include <string>
+#include <vector>
+
+namespace llvm {
+
+class Module;
+
+namespace codegen {
+
+std::string getMArch();
+
+std::string getMCPU();
+
+std::vector<std::string> getMAttrs();
+
+Reloc::Model getRelocModel();
+Optional<Reloc::Model> getExplicitRelocModel();
+
+ThreadModel::Model getThreadModel();
+
+CodeModel::Model getCodeModel();
+Optional<CodeModel::Model> getExplicitCodeModel();
+
+llvm::ExceptionHandling getExceptionModel();
+
+Optional<CodeGenFileType> getExplicitFileType();
+
+CodeGenFileType getFileType();
+
+FramePointerKind getFramePointerUsage();
+
+bool getEnableUnsafeFPMath();
+
+bool getEnableNoInfsFPMath();
+
+bool getEnableNoNaNsFPMath();
+
+bool getEnableNoSignedZerosFPMath();
+
+bool getEnableNoTrappingFPMath();
+
+DenormalMode::DenormalModeKind getDenormalFPMath();
+DenormalMode::DenormalModeKind getDenormalFP32Math();
+
+bool getEnableHonorSignDependentRoundingFPMath();
+
+llvm::FloatABI::ABIType getFloatABIForCalls();
+
+llvm::FPOpFusion::FPOpFusionMode getFuseFPOps();
+
+SwiftAsyncFramePointerMode getSwiftAsyncFramePointer();
+
+bool getDontPlaceZerosInBSS();
+
+bool getEnableGuaranteedTailCallOpt();
+
+bool getEnableAIXExtendedAltivecABI();
+
+bool getDisableTailCalls();
+
+bool getStackSymbolOrdering();
+
+unsigned getOverrideStackAlignment();
+
+bool getStackRealign();
+
+std::string getTrapFuncName();
+
+bool getUseCtors();
+
+bool getRelaxELFRelocations();
+
+bool getDataSections();
+Optional<bool> getExplicitDataSections();
+
+bool getFunctionSections();
+Optional<bool> getExplicitFunctionSections();
+
+bool getIgnoreXCOFFVisibility();
+
+bool getXCOFFTracebackTable();
+
+std::string getBBSections();
+
+unsigned getTLSSize();
+
+bool getEmulatedTLS();
+
+bool getUniqueSectionNames();
+
+bool getUniqueBasicBlockSectionNames();
+
+llvm::EABI getEABIVersion();
+
+llvm::DebuggerKind getDebuggerTuningOpt();
+
+bool getEnableStackSizeSection();
+
+bool getEnableAddrsig();
+
+bool getEmitCallSiteInfo();
+
+bool getEnableMachineFunctionSplitter();
+
+bool getEnableDebugEntryValues();
+
+bool getValueTrackingVariableLocations();
+Optional<bool> getExplicitValueTrackingVariableLocations();
+
+bool getForceDwarfFrameSection();
+
+bool getXRayOmitFunctionIndex();
+
+bool getDebugStrictDwarf();
+
+unsigned getAlignLoops();
+
+/// Create this object with static storage to register codegen-related command
+/// line options.
+struct RegisterCodeGenFlags {
+  RegisterCodeGenFlags();
+};
+
+llvm::BasicBlockSection getBBSectionsMode(llvm::TargetOptions &Options);
+
+/// Common utility function tightly tied to the options listed here. Initializes
+/// a TargetOptions object with CodeGen flags and returns it.
+/// \p TheTriple is used to determine the default value for options if
+///    options are not explicitly specified. If those triple dependant options
+///    value do not have effect for your component, a default Triple() could be
+///    passed in.
+TargetOptions InitTargetOptionsFromCodeGenFlags(const llvm::Triple &TheTriple);
+
+std::string getCPUStr();
+
+std::string getFeaturesStr();
+
+std::vector<std::string> getFeatureList();
+
+void renderBoolStringAttr(AttrBuilder &B, StringRef Name, bool Val);
+
+/// Set function attributes of function \p F based on CPU, Features, and command
+/// line flags.
+void setFunctionAttributes(StringRef CPU, StringRef Features, Function &F);
+
+/// Set function attributes of functions in Module M based on CPU,
+/// Features, and command line flags.
+void setFunctionAttributes(StringRef CPU, StringRef Features, Module &M);
+
+/// Should value-tracking variable locations / instruction referencing be
+/// enabled by default for this triple?
+bool getDefaultValueTrackingVariableLocations(const llvm::Triple &T);
+} // namespace codegen
+} // namespace llvm
+
+#endif // LLVM_CODEGEN_COMMANDFLAGS_H
--- a/suite/synctools/tablegen/include/llvm/CodeGen/CostTable.h
+++ b/suite/synctools/tablegen/include/llvm/CodeGen/CostTable.h
@ -0,0 +1,89 @@
+//===-- CostTable.h - Instruction Cost Table handling -----------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// Cost tables and simple lookup functions
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_COSTTABLE_H_
+#define LLVM_CODEGEN_COSTTABLE_H_
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/MachineValueType.h"
+
+namespace llvm {
+
+/// Cost Table Entry
+template <typename CostType>
+struct CostTblEntryT {
+  int ISD;
+  MVT::SimpleValueType Type;
+  CostType Cost;
+};
+using CostTblEntry = CostTblEntryT<unsigned>;
+
+/// Find in cost table.
+template <class CostType>
+inline const CostTblEntryT<CostType> *
+CostTableLookup(ArrayRef<CostTblEntryT<CostType>> Tbl, int ISD, MVT Ty) {
+  auto I = find_if(Tbl, [=](const CostTblEntryT<CostType> &Entry) {
+    return ISD == Entry.ISD && Ty == Entry.Type;
+  });
+  if (I != Tbl.end())
+    return I;
+
+  // Could not find an entry.
+  return nullptr;
+}
+
+template <size_t N, class CostType>
+inline const CostTblEntryT<CostType> *
+CostTableLookup(const CostTblEntryT<CostType> (&Table)[N], int ISD, MVT Ty) {
+  // Wrapper to fix template argument deduction failures.
+  return CostTableLookup<CostType>(makeArrayRef(Table), ISD, Ty);
+}
+
+/// Type Conversion Cost Table
+template <typename CostType>
+struct TypeConversionCostTblEntryT {
+  int ISD;
+  MVT::SimpleValueType Dst;
+  MVT::SimpleValueType Src;
+  CostType Cost;
+};
+using TypeConversionCostTblEntry = TypeConversionCostTblEntryT<unsigned>;
+
+/// Find in type conversion cost table.
+template <class CostType>
+inline const TypeConversionCostTblEntryT<CostType> *
+ConvertCostTableLookup(ArrayRef<TypeConversionCostTblEntryT<CostType>> Tbl,
+                       int ISD, MVT Dst, MVT Src) {
+  auto I =
+      find_if(Tbl, [=](const TypeConversionCostTblEntryT<CostType> &Entry) {
+        return ISD == Entry.ISD && Src == Entry.Src && Dst == Entry.Dst;
+      });
+  if (I != Tbl.end())
+    return I;
+
+  // Could not find an entry.
+  return nullptr;
+}
+
+template <size_t N, class CostType>
+inline const TypeConversionCostTblEntryT<CostType> *
+ConvertCostTableLookup(const TypeConversionCostTblEntryT<CostType> (&Table)[N],
+                       int ISD, MVT Dst, MVT Src) {
+  // Wrapper to fix template argument deduction failures.
+  return ConvertCostTableLookup<CostType>(makeArrayRef(Table), ISD, Dst, Src);
+}
+
+} // namespace llvm
+
+#endif /* LLVM_CODEGEN_COSTTABLE_H_ */
--- a/suite/synctools/tablegen/include/llvm/CodeGen/DAGCombine.h
+++ b/suite/synctools/tablegen/include/llvm/CodeGen/DAGCombine.h
@ -0,0 +1,24 @@
+//===-- llvm/CodeGen/DAGCombine.h  ------- SelectionDAG Nodes ---*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+
+#ifndef LLVM_CODEGEN_DAGCOMBINE_H
+#define LLVM_CODEGEN_DAGCOMBINE_H
+
+namespace llvm {
+
+enum CombineLevel {
+  BeforeLegalizeTypes,
+  AfterLegalizeTypes,
+  AfterLegalizeVectorOps,
+  AfterLegalizeDAG
+};
+
+} // end llvm namespace
+
+#endif
--- a/suite/synctools/tablegen/include/llvm/CodeGen/DFAPacketizer.h
+++ b/suite/synctools/tablegen/include/llvm/CodeGen/DFAPacketizer.h
@ -0,0 +1,200 @@
+//===- llvm/CodeGen/DFAPacketizer.h - DFA Packetizer for VLIW ---*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+// This class implements a deterministic finite automaton (DFA) based
+// packetizing mechanism for VLIW architectures. It provides APIs to
+// determine whether there exists a legal mapping of instructions to
+// functional unit assignments in a packet. The DFA is auto-generated from
+// the target's Schedule.td file.
+//
+// A DFA consists of 3 major elements: states, inputs, and transitions. For
+// the packetizing mechanism, the input is the set of instruction classes for
+// a target. The state models all possible combinations of functional unit
+// consumption for a given set of instructions in a packet. A transition
+// models the addition of an instruction to a packet. In the DFA constructed
+// by this class, if an instruction can be added to a packet, then a valid
+// transition exists from the corresponding state. Invalid transitions
+// indicate that the instruction cannot be added to the current packet.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_DFAPACKETIZER_H
+#define LLVM_CODEGEN_DFAPACKETIZER_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/ScheduleDAGMutation.h"
+#include "llvm/Support/Automaton.h"
+#include <cstdint>
+#include <map>
+#include <memory>
+#include <utility>
+#include <vector>
+
+namespace llvm {
+
+class DefaultVLIWScheduler;
+class InstrItineraryData;
+class MachineFunction;
+class MachineInstr;
+class MachineLoopInfo;
+class MCInstrDesc;
+class SUnit;
+class TargetInstrInfo;
+
+class DFAPacketizer {
+private:
+  const InstrItineraryData *InstrItins;
+  Automaton<uint64_t> A;
+  /// For every itinerary, an "action" to apply to the automaton. This removes
+  /// the redundancy in actions between itinerary classes.
+  ArrayRef<unsigned> ItinActions;
+
+public:
+  DFAPacketizer(const InstrItineraryData *InstrItins, Automaton<uint64_t> a,
+                ArrayRef<unsigned> ItinActions)
+      : InstrItins(InstrItins), A(std::move(a)), ItinActions(ItinActions) {
+    // Start off with resource tracking disabled.
+    A.enableTranscription(false);
+  }
+
+  // Reset the current state to make all resources available.
+  void clearResources() {
+    A.reset();
+  }
+
+  // Set whether this packetizer should track not just whether instructions
+  // can be packetized, but also which functional units each instruction ends up
+  // using after packetization.
+  void setTrackResources(bool Track) {
+    A.enableTranscription(Track);
+  }
+
+  // Check if the resources occupied by a MCInstrDesc are available in
+  // the current state.
+  bool canReserveResources(const MCInstrDesc *MID);
+
+  // Reserve the resources occupied by a MCInstrDesc and change the current
+  // state to reflect that change.
+  void reserveResources(const MCInstrDesc *MID);
+
+  // Check if the resources occupied by a machine instruction are available
+  // in the current state.
+  bool canReserveResources(MachineInstr &MI);
+
+  // Reserve the resources occupied by a machine instruction and change the
+  // current state to reflect that change.
+  void reserveResources(MachineInstr &MI);
+
+  // Return the resources used by the InstIdx'th instruction added to this
+  // packet. The resources are returned as a bitvector of functional units.
+  //
+  // Note that a bundle may be packed in multiple valid ways. This function
+  // returns one arbitary valid packing.
+  //
+  // Requires setTrackResources(true) to have been called.
+  unsigned getUsedResources(unsigned InstIdx);
+
+  const InstrItineraryData *getInstrItins() const { return InstrItins; }
+};
+
+// VLIWPacketizerList implements a simple VLIW packetizer using DFA. The
+// packetizer works on machine basic blocks. For each instruction I in BB,
+// the packetizer consults the DFA to see if machine resources are available
+// to execute I. If so, the packetizer checks if I depends on any instruction
+// in the current packet. If no dependency is found, I is added to current
+// packet and the machine resource is marked as taken. If any dependency is
+// found, a target API call is made to prune the dependence.
+class VLIWPacketizerList {
+protected:
+  MachineFunction &MF;
+  const TargetInstrInfo *TII;
+  AAResults *AA;
+
+  // The VLIW Scheduler.
+  DefaultVLIWScheduler *VLIWScheduler;
+  // Vector of instructions assigned to the current packet.
+  std::vector<MachineInstr*> CurrentPacketMIs;
+  // DFA resource tracker.
+  DFAPacketizer *ResourceTracker;
+  // Map: MI -> SU.
+  std::map<MachineInstr*, SUnit*> MIToSUnit;
+
+public:
+  // The AAResults parameter can be nullptr.
+  VLIWPacketizerList(MachineFunction &MF, MachineLoopInfo &MLI,
+                     AAResults *AA);
+
+  virtual ~VLIWPacketizerList();
+
+  // Implement this API in the backend to bundle instructions.
+  void PacketizeMIs(MachineBasicBlock *MBB,
+                    MachineBasicBlock::iterator BeginItr,
+                    MachineBasicBlock::iterator EndItr);
+
+  // Return the ResourceTracker.
+  DFAPacketizer *getResourceTracker() {return ResourceTracker;}
+
+  // addToPacket - Add MI to the current packet.
+  virtual MachineBasicBlock::iterator addToPacket(MachineInstr &MI) {
+    CurrentPacketMIs.push_back(&MI);
+    ResourceTracker->reserveResources(MI);
+    return MI;
+  }
+
+  // End the current packet and reset the state of the packetizer.
+  // Overriding this function allows the target-specific packetizer
+  // to perform custom finalization.
+  virtual void endPacket(MachineBasicBlock *MBB,
+                         MachineBasicBlock::iterator MI);
+
+  // Perform initialization before packetizing an instruction. This
+  // function is supposed to be overrided by the target dependent packetizer.
+  virtual void initPacketizerState() {}
+
+  // Check if the given instruction I should be ignored by the packetizer.
+  virtual bool ignorePseudoInstruction(const MachineInstr &I,
+                                       const MachineBasicBlock *MBB) {
+    return false;
+  }
+
+  // Return true if instruction MI can not be packetized with any other
+  // instruction, which means that MI itself is a packet.
+  virtual bool isSoloInstruction(const MachineInstr &MI) { return true; }
+
+  // Check if the packetizer should try to add the given instruction to
+  // the current packet. One reasons for which it may not be desirable
+  // to include an instruction in the current packet could be that it
+  // would cause a stall.
+  // If this function returns "false", the current packet will be ended,
+  // and the instruction will be added to the next packet.
+  virtual bool shouldAddToPacket(const MachineInstr &MI) { return true; }
+
+  // Check if it is legal to packetize SUI and SUJ together.
+  virtual bool isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) {
+    return false;
+  }
+
+  // Check if it is legal to prune dependece between SUI and SUJ.
+  virtual bool isLegalToPruneDependencies(SUnit *SUI, SUnit *SUJ) {
+    return false;
+  }
+
+  // Add a DAG mutation to be done before the packetization begins.
+  void addMutation(std::unique_ptr<ScheduleDAGMutation> Mutation);
+
+  bool alias(const MachineInstr &MI1, const MachineInstr &MI2,
+             bool UseTBAA = true) const;
+
+private:
+  bool alias(const MachineMemOperand &Op1, const MachineMemOperand &Op2,
+             bool UseTBAA = true) const;
+};
+
+} // end namespace llvm
+
+#endif // LLVM_CODEGEN_DFAPACKETIZER_H
--- a/suite/synctools/tablegen/include/llvm/CodeGen/DIE.h
+++ b/suite/synctools/tablegen/include/llvm/CodeGen/DIE.h
@ -0,0 +1,998 @@
+//===- lib/CodeGen/DIE.h - DWARF Info Entries -------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Data structures for DWARF info entries.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_DIE_H
+#define LLVM_CODEGEN_DIE_H
+
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/PointerIntPair.h"
+#include "llvm/ADT/PointerUnion.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/iterator.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/BinaryFormat/Dwarf.h"
+#include "llvm/CodeGen/DwarfStringPoolEntry.h"
+#include "llvm/Support/AlignOf.h"
+#include "llvm/Support/Allocator.h"
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <iterator>
+#include <new>
+#include <type_traits>
+#include <utility>
+#include <vector>
+
+namespace llvm {
+
+class AsmPrinter;
+class DIE;
+class DIEUnit;
+class DwarfCompileUnit;
+class MCExpr;
+class MCSection;
+class MCSymbol;
+class raw_ostream;
+
+//===--------------------------------------------------------------------===//
+/// Dwarf abbreviation data, describes one attribute of a Dwarf abbreviation.
+class DIEAbbrevData {
+  /// Dwarf attribute code.
+  dwarf::Attribute Attribute;
+
+  /// Dwarf form code.
+  dwarf::Form Form;
+
+  /// Dwarf attribute value for DW_FORM_implicit_const
+  int64_t Value = 0;
+
+public:
+  DIEAbbrevData(dwarf::Attribute A, dwarf::Form F)
+      : Attribute(A), Form(F) {}
+  DIEAbbrevData(dwarf::Attribute A, int64_t V)
+      : Attribute(A), Form(dwarf::DW_FORM_implicit_const), Value(V) {}
+
+  /// Accessors.
+  /// @{
+  dwarf::Attribute getAttribute() const { return Attribute; }
+  dwarf::Form getForm() const { return Form; }
+  int64_t getValue() const { return Value; }
+  /// @}
+
+  /// Used to gather unique data for the abbreviation folding set.
+  void Profile(FoldingSetNodeID &ID) const;
+};
+
+//===--------------------------------------------------------------------===//
+/// Dwarf abbreviation, describes the organization of a debug information
+/// object.
+class DIEAbbrev : public FoldingSetNode {
+  /// Unique number for node.
+  unsigned Number = 0;
+
+  /// Dwarf tag code.
+  dwarf::Tag Tag;
+
+  /// Whether or not this node has children.
+  ///
+  /// This cheats a bit in all of the uses since the values in the standard
+  /// are 0 and 1 for no children and children respectively.
+  bool Children;
+
+  /// Raw data bytes for abbreviation.
+  SmallVector<DIEAbbrevData, 12> Data;
+
+public:
+  DIEAbbrev(dwarf::Tag T, bool C) : Tag(T), Children(C) {}
+
+  /// Accessors.
+  /// @{
+  dwarf::Tag getTag() const { return Tag; }
+  unsigned getNumber() const { return Number; }
+  bool hasChildren() const { return Children; }
+  const SmallVectorImpl<DIEAbbrevData> &getData() const { return Data; }
+  void setChildrenFlag(bool hasChild) { Children = hasChild; }
+  void setNumber(unsigned N) { Number = N; }
+  /// @}
+
+  /// Adds another set of attribute information to the abbreviation.
+  void AddAttribute(dwarf::Attribute Attribute, dwarf::Form Form) {
+    Data.push_back(DIEAbbrevData(Attribute, Form));
+  }
+
+  /// Adds attribute with DW_FORM_implicit_const value
+  void AddImplicitConstAttribute(dwarf::Attribute Attribute, int64_t Value) {
+    Data.push_back(DIEAbbrevData(Attribute, Value));
+  }
+
+  /// Used to gather unique data for the abbreviation folding set.
+  void Profile(FoldingSetNodeID &ID) const;
+
+  /// Print the abbreviation using the specified asm printer.
+  void Emit(const AsmPrinter *AP) const;
+
+  void print(raw_ostream &O) const;
+  void dump() const;
+};
+
+//===--------------------------------------------------------------------===//
+/// Helps unique DIEAbbrev objects and assigns abbreviation numbers.
+///
+/// This class will unique the DIE abbreviations for a llvm::DIE object and
+/// assign a unique abbreviation number to each unique DIEAbbrev object it
+/// finds. The resulting collection of DIEAbbrev objects can then be emitted
+/// into the .debug_abbrev section.
+class DIEAbbrevSet {
+  /// The bump allocator to use when creating DIEAbbrev objects in the uniqued
+  /// storage container.
+  BumpPtrAllocator &Alloc;
+  /// FoldingSet that uniques the abbreviations.
+  FoldingSet<DIEAbbrev> AbbreviationsSet;
+  /// A list of all the unique abbreviations in use.
+  std::vector<DIEAbbrev *> Abbreviations;
+
+public:
+  DIEAbbrevSet(BumpPtrAllocator &A) : Alloc(A) {}
+  ~DIEAbbrevSet();
+
+  /// Generate the abbreviation declaration for a DIE and return a pointer to
+  /// the generated abbreviation.
+  ///
+  /// \param Die the debug info entry to generate the abbreviation for.
+  /// \returns A reference to the uniqued abbreviation declaration that is
+  /// owned by this class.
+  DIEAbbrev &uniqueAbbreviation(DIE &Die);
+
+  /// Print all abbreviations using the specified asm printer.
+  void Emit(const AsmPrinter *AP, MCSection *Section) const;
+};
+
+//===--------------------------------------------------------------------===//
+/// An integer value DIE.
+///
+class DIEInteger {
+  uint64_t Integer;
+
+public:
+  explicit DIEInteger(uint64_t I) : Integer(I) {}
+
+  /// Choose the best form for integer.
+  static dwarf::Form BestForm(bool IsSigned, uint64_t Int) {
+    if (IsSigned) {
+      const int64_t SignedInt = Int;
+      if ((char)Int == SignedInt)
+        return dwarf::DW_FORM_data1;
+      if ((short)Int == SignedInt)
+        return dwarf::DW_FORM_data2;
+      if ((int)Int == SignedInt)
+        return dwarf::DW_FORM_data4;
+    } else {
+      if ((unsigned char)Int == Int)
+        return dwarf::DW_FORM_data1;
+      if ((unsigned short)Int == Int)
+        return dwarf::DW_FORM_data2;
+      if ((unsigned int)Int == Int)
+        return dwarf::DW_FORM_data4;
+    }
+    return dwarf::DW_FORM_data8;
+  }
+
+  uint64_t getValue() const { return Integer; }
+  void setValue(uint64_t Val) { Integer = Val; }
+
+  void emitValue(const AsmPrinter *Asm, dwarf::Form Form) const;
+  unsigned sizeOf(const dwarf::FormParams &FormParams, dwarf::Form Form) const;
+
+  void print(raw_ostream &O) const;
+};
+
+//===--------------------------------------------------------------------===//
+/// An expression DIE.
+class DIEExpr {
+  const MCExpr *Expr;
+
+public:
+  explicit DIEExpr(const MCExpr *E) : Expr(E) {}
+
+  /// Get MCExpr.
+  const MCExpr *getValue() const { return Expr; }
+
+  void emitValue(const AsmPrinter *AP, dwarf::Form Form) const;
+  unsigned sizeOf(const dwarf::FormParams &FormParams, dwarf::Form Form) const;
+
+  void print(raw_ostream &O) const;
+};
+
+//===--------------------------------------------------------------------===//
+/// A label DIE.
+class DIELabel {
+  const MCSymbol *Label;
+
+public:
+  explicit DIELabel(const MCSymbol *L) : Label(L) {}
+
+  /// Get MCSymbol.
+  const MCSymbol *getValue() const { return Label; }
+
+  void emitValue(const AsmPrinter *AP, dwarf::Form Form) const;
+  unsigned sizeOf(const dwarf::FormParams &FormParams, dwarf::Form Form) const;
+
+  void print(raw_ostream &O) const;
+};
+
+//===--------------------------------------------------------------------===//
+/// A BaseTypeRef DIE.
+class DIEBaseTypeRef {
+  const DwarfCompileUnit *CU;
+  const uint64_t Index;
+  static constexpr unsigned ULEB128PadSize = 4;
+
+public:
+  explicit DIEBaseTypeRef(const DwarfCompileUnit *TheCU, uint64_t Idx)
+    : CU(TheCU), Index(Idx) {}
+
+  /// EmitValue - Emit base type reference.
+  void emitValue(const AsmPrinter *AP, dwarf::Form Form) const;
+  /// sizeOf - Determine size of the base type reference in bytes.
+  unsigned sizeOf(const dwarf::FormParams &, dwarf::Form) const;
+
+  void print(raw_ostream &O) const;
+  uint64_t getIndex() const { return Index; }
+};
+
+//===--------------------------------------------------------------------===//
+/// A simple label difference DIE.
+///
+class DIEDelta {
+  const MCSymbol *LabelHi;
+  const MCSymbol *LabelLo;
+
+public:
+  DIEDelta(const MCSymbol *Hi, const MCSymbol *Lo) : LabelHi(Hi), LabelLo(Lo) {}
+
+  void emitValue(const AsmPrinter *AP, dwarf::Form Form) const;
+  unsigned sizeOf(const dwarf::FormParams &FormParams, dwarf::Form Form) const;
+
+  void print(raw_ostream &O) const;
+};
+
+//===--------------------------------------------------------------------===//
+/// A container for string pool string values.
+///
+/// This class is used with the DW_FORM_strp and DW_FORM_GNU_str_index forms.
+class DIEString {
+  DwarfStringPoolEntryRef S;
+
+public:
+  DIEString(DwarfStringPoolEntryRef S) : S(S) {}
+
+  /// Grab the string out of the object.
+  StringRef getString() const { return S.getString(); }
+
+  void emitValue(const AsmPrinter *AP, dwarf::Form Form) const;
+  unsigned sizeOf(const dwarf::FormParams &FormParams, dwarf::Form Form) const;
+
+  void print(raw_ostream &O) const;
+};
+
+//===--------------------------------------------------------------------===//
+/// A container for inline string values.
+///
+/// This class is used with the DW_FORM_string form.
+class DIEInlineString {
+  StringRef S;
+
+public:
+  template <typename Allocator>
+  explicit DIEInlineString(StringRef Str, Allocator &A) : S(Str.copy(A)) {}
+
+  ~DIEInlineString() = default;
+
+  /// Grab the string out of the object.
+  StringRef getString() const { return S; }
+
+  void emitValue(const AsmPrinter *AP, dwarf::Form Form) const;
+  unsigned sizeOf(const dwarf::FormParams &, dwarf::Form) const;
+
+  void print(raw_ostream &O) const;
+};
+
+//===--------------------------------------------------------------------===//
+/// A pointer to another debug information entry.  An instance of this class can
+/// also be used as a proxy for a debug information entry not yet defined
+/// (ie. types.)
+class DIEEntry {
+  DIE *Entry;
+
+public:
+  DIEEntry() = delete;
+  explicit DIEEntry(DIE &E) : Entry(&E) {}
+
+  DIE &getEntry() const { return *Entry; }
+
+  void emitValue(const AsmPrinter *AP, dwarf::Form Form) const;
+  unsigned sizeOf(const dwarf::FormParams &FormParams, dwarf::Form Form) const;
+
+  void print(raw_ostream &O) const;
+};
+
+//===--------------------------------------------------------------------===//
+/// Represents a pointer to a location list in the debug_loc
+/// section.
+class DIELocList {
+  /// Index into the .debug_loc vector.
+  size_t Index;
+
+public:
+  DIELocList(size_t I) : Index(I) {}
+
+  /// Grab the current index out.
+  size_t getValue() const { return Index; }
+
+  void emitValue(const AsmPrinter *AP, dwarf::Form Form) const;
+  unsigned sizeOf(const dwarf::FormParams &FormParams, dwarf::Form Form) const;
+
+  void print(raw_ostream &O) const;
+};
+
+//===--------------------------------------------------------------------===//
+/// A BaseTypeRef DIE.
+class DIEAddrOffset {
+  DIEInteger Addr;
+  DIEDelta Offset;
+
+public:
+  explicit DIEAddrOffset(uint64_t Idx, const MCSymbol *Hi, const MCSymbol *Lo)
+      : Addr(Idx), Offset(Hi, Lo) {}
+
+  void emitValue(const AsmPrinter *AP, dwarf::Form Form) const;
+  unsigned sizeOf(const dwarf::FormParams &FormParams, dwarf::Form Form) const;
+
+  void print(raw_ostream &O) const;
+};
+
+//===--------------------------------------------------------------------===//
+/// A debug information entry value. Some of these roughly correlate
+/// to DWARF attribute classes.
+class DIEBlock;
+class DIELoc;
+class DIEValue {
+public:
+  enum Type {
+    isNone,
+#define HANDLE_DIEVALUE(T) is##T,
+#include "llvm/CodeGen/DIEValue.def"
+  };
+
+private:
+  /// Type of data stored in the value.
+  Type Ty = isNone;
+  dwarf::Attribute Attribute = (dwarf::Attribute)0;
+  dwarf::Form Form = (dwarf::Form)0;
+
+  /// Storage for the value.
+  ///
+  /// All values that aren't standard layout (or are larger than 8 bytes)
+  /// should be stored by reference instead of by value.
+  using ValTy =
+      AlignedCharArrayUnion<DIEInteger, DIEString, DIEExpr, DIELabel,
+                            DIEDelta *, DIEEntry, DIEBlock *, DIELoc *,
+                            DIELocList, DIEBaseTypeRef *, DIEAddrOffset *>;
+
+  static_assert(sizeof(ValTy) <= sizeof(uint64_t) ||
+                    sizeof(ValTy) <= sizeof(void *),
+                "Expected all large types to be stored via pointer");
+
+  /// Underlying stored value.
+  ValTy Val;
+
+  template <class T> void construct(T V) {
+    static_assert(std::is_standard_layout<T>::value ||
+                      std::is_pointer<T>::value,
+                  "Expected standard layout or pointer");
+    new (reinterpret_cast<void *>(&Val)) T(V);
+  }
+
+  template <class T> T *get() { return reinterpret_cast<T *>(&Val); }
+  template <class T> const T *get() const {
+    return reinterpret_cast<const T *>(&Val);
+  }
+  template <class T> void destruct() { get<T>()->~T(); }
+
+  /// Destroy the underlying value.
+  ///
+  /// This should get optimized down to a no-op.  We could skip it if we could
+  /// add a static assert on \a std::is_trivially_copyable(), but we currently
+  /// support versions of GCC that don't understand that.
+  void destroyVal() {
+    switch (Ty) {
+    case isNone:
+      return;
+#define HANDLE_DIEVALUE_SMALL(T)                                               \
+  case is##T:                                                                  \
+    destruct<DIE##T>();                                                        \
+    return;
+#define HANDLE_DIEVALUE_LARGE(T)                                               \
+  case is##T:                                                                  \
+    destruct<const DIE##T *>();                                                \
+    return;
+#include "llvm/CodeGen/DIEValue.def"
+    }
+  }
+
+  /// Copy the underlying value.
+  ///
+  /// This should get optimized down to a simple copy.  We need to actually
+  /// construct the value, rather than calling memcpy, to satisfy strict
+  /// aliasing rules.
+  void copyVal(const DIEValue &X) {
+    switch (Ty) {
+    case isNone:
+      return;
+#define HANDLE_DIEVALUE_SMALL(T)                                               \
+  case is##T:                                                                  \
+    construct<DIE##T>(*X.get<DIE##T>());                                       \
+    return;
+#define HANDLE_DIEVALUE_LARGE(T)                                               \
+  case is##T:                                                                  \
+    construct<const DIE##T *>(*X.get<const DIE##T *>());                       \
+    return;
+#include "llvm/CodeGen/DIEValue.def"
+    }
+  }
+
+public:
+  DIEValue() = default;
+
+  DIEValue(const DIEValue &X) : Ty(X.Ty), Attribute(X.Attribute), Form(X.Form) {
+    copyVal(X);
+  }
+
+  DIEValue &operator=(const DIEValue &X) {
+    destroyVal();
+    Ty = X.Ty;
+    Attribute = X.Attribute;
+    Form = X.Form;
+    copyVal(X);
+    return *this;
+  }
+
+  ~DIEValue() { destroyVal(); }
+
+#define HANDLE_DIEVALUE_SMALL(T)                                               \
+  DIEValue(dwarf::Attribute Attribute, dwarf::Form Form, const DIE##T &V)      \
+      : Ty(is##T), Attribute(Attribute), Form(Form) {                          \
+    construct<DIE##T>(V);                                                      \
+  }
+#define HANDLE_DIEVALUE_LARGE(T)                                               \
+  DIEValue(dwarf::Attribute Attribute, dwarf::Form Form, const DIE##T *V)      \
+      : Ty(is##T), Attribute(Attribute), Form(Form) {                          \
+    assert(V && "Expected valid value");                                       \
+    construct<const DIE##T *>(V);                                              \
+  }
+#include "llvm/CodeGen/DIEValue.def"
+
+  /// Accessors.
+  /// @{
+  Type getType() const { return Ty; }
+  dwarf::Attribute getAttribute() const { return Attribute; }
+  dwarf::Form getForm() const { return Form; }
+  explicit operator bool() const { return Ty; }
+  /// @}
+
+#define HANDLE_DIEVALUE_SMALL(T)                                               \
+  const DIE##T &getDIE##T() const {                                            \
+    assert(getType() == is##T && "Expected " #T);                              \
+    return *get<DIE##T>();                                                     \
+  }
+#define HANDLE_DIEVALUE_LARGE(T)                                               \
+  const DIE##T &getDIE##T() const {                                            \
+    assert(getType() == is##T && "Expected " #T);                              \
+    return **get<const DIE##T *>();                                            \
+  }
+#include "llvm/CodeGen/DIEValue.def"
+
+  /// Emit value via the Dwarf writer.
+  void emitValue(const AsmPrinter *AP) const;
+
+  /// Return the size of a value in bytes.
+  unsigned sizeOf(const dwarf::FormParams &FormParams) const;
+
+  void print(raw_ostream &O) const;
+  void dump() const;
+};
+
+struct IntrusiveBackListNode {
+  PointerIntPair<IntrusiveBackListNode *, 1> Next;
+
+  IntrusiveBackListNode() : Next(this, true) {}
+
+  IntrusiveBackListNode *getNext() const {
+    return Next.getInt() ? nullptr : Next.getPointer();
+  }
+};
+
+struct IntrusiveBackListBase {
+  using Node = IntrusiveBackListNode;
+
+  Node *Last = nullptr;
+
+  bool empty() const { return !Last; }
+
+  void push_back(Node &N) {
+    assert(N.Next.getPointer() == &N && "Expected unlinked node");
+    assert(N.Next.getInt() == true && "Expected unlinked node");
+
+    if (Last) {
+      N.Next = Last->Next;
+      Last->Next.setPointerAndInt(&N, false);
+    }
+    Last = &N;
+  }
+
+  void push_front(Node &N) {
+    assert(N.Next.getPointer() == &N && "Expected unlinked node");
+    assert(N.Next.getInt() == true && "Expected unlinked node");
+
+    if (Last) {
+      N.Next.setPointerAndInt(Last->Next.getPointer(), false);
+      Last->Next.setPointerAndInt(&N, true);
+    } else {
+      Last = &N;
+    }
+  }
+};
+
+template <class T> class IntrusiveBackList : IntrusiveBackListBase {
+public:
+  using IntrusiveBackListBase::empty;
+
+  void push_back(T &N) { IntrusiveBackListBase::push_back(N); }
+  void push_front(T &N) { IntrusiveBackListBase::push_front(N); }
+  T &back() { return *static_cast<T *>(Last); }
+  const T &back() const { return *static_cast<T *>(Last); }
+  T &front() {
+    return *static_cast<T *>(Last ? Last->Next.getPointer() : nullptr);
+  }
+  const T &front() const {
+    return *static_cast<T *>(Last ? Last->Next.getPointer() : nullptr);
+  }
+
+  void takeNodes(IntrusiveBackList<T> &Other) {
+    if (Other.empty())
+      return;
+
+    T *FirstNode = static_cast<T *>(Other.Last->Next.getPointer());
+    T *IterNode = FirstNode;
+    do {
+      // Keep a pointer to the node and increment the iterator.
+      T *TmpNode = IterNode;
+      IterNode = static_cast<T *>(IterNode->Next.getPointer());
+
+      // Unlink the node and push it back to this list.
+      TmpNode->Next.setPointerAndInt(TmpNode, true);
+      push_back(*TmpNode);
+    } while (IterNode != FirstNode);
+
+    Other.Last = nullptr;
+  }
+
+  class const_iterator;
+  class iterator
+      : public iterator_facade_base<iterator, std::forward_iterator_tag, T> {
+    friend class const_iterator;
+
+    Node *N = nullptr;
+
+  public:
+    iterator() = default;
+    explicit iterator(T *N) : N(N) {}
+
+    iterator &operator++() {
+      N = N->getNext();
+      return *this;
+    }
+
+    explicit operator bool() const { return N; }
+    T &operator*() const { return *static_cast<T *>(N); }
+
+    bool operator==(const iterator &X) const { return N == X.N; }
+  };
+
+  class const_iterator
+      : public iterator_facade_base<const_iterator, std::forward_iterator_tag,
+                                    const T> {
+    const Node *N = nullptr;
+
+  public:
+    const_iterator() = default;
+    // Placate MSVC by explicitly scoping 'iterator'.
+    const_iterator(typename IntrusiveBackList<T>::iterator X) : N(X.N) {}
+    explicit const_iterator(const T *N) : N(N) {}
+
+    const_iterator &operator++() {
+      N = N->getNext();
+      return *this;
+    }
+
+    explicit operator bool() const { return N; }
+    const T &operator*() const { return *static_cast<const T *>(N); }
+
+    bool operator==(const const_iterator &X) const { return N == X.N; }
+  };
+
+  iterator begin() {
+    return Last ? iterator(static_cast<T *>(Last->Next.getPointer())) : end();
+  }
+  const_iterator begin() const {
+    return const_cast<IntrusiveBackList *>(this)->begin();
+  }
+  iterator end() { return iterator(); }
+  const_iterator end() const { return const_iterator(); }
+
+  static iterator toIterator(T &N) { return iterator(&N); }
+  static const_iterator toIterator(const T &N) { return const_iterator(&N); }
+};
+
+/// A list of DIE values.
+///
+/// This is a singly-linked list, but instead of reversing the order of
+/// insertion, we keep a pointer to the back of the list so we can push in
+/// order.
+///
+/// There are two main reasons to choose a linked list over a customized
+/// vector-like data structure.
+///
+///  1. For teardown efficiency, we want DIEs to be BumpPtrAllocated.  Using a
+///     linked list here makes this way easier to accomplish.
+///  2. Carrying an extra pointer per \a DIEValue isn't expensive.  45% of DIEs
+///     have 2 or fewer values, and 90% have 5 or fewer.  A vector would be
+///     over-allocated by 50% on average anyway, the same cost as the
+///     linked-list node.
+class DIEValueList {
+  struct Node : IntrusiveBackListNode {
+    DIEValue V;
+
+    explicit Node(DIEValue V) : V(V) {}
+  };
+
+  using ListTy = IntrusiveBackList<Node>;
+
+  ListTy List;
+
+public:
+  class const_value_iterator;
+  class value_iterator
+      : public iterator_adaptor_base<value_iterator, ListTy::iterator,
+                                     std::forward_iterator_tag, DIEValue> {
+    friend class const_value_iterator;
+
+    using iterator_adaptor =
+        iterator_adaptor_base<value_iterator, ListTy::iterator,
+                              std::forward_iterator_tag, DIEValue>;
+
+  public:
+    value_iterator() = default;
+    explicit value_iterator(ListTy::iterator X) : iterator_adaptor(X) {}
+
+    explicit operator bool() const { return bool(wrapped()); }
+    DIEValue &operator*() const { return wrapped()->V; }
+  };
+
+  class const_value_iterator : public iterator_adaptor_base<
+                                   const_value_iterator, ListTy::const_iterator,
+                                   std::forward_iterator_tag, const DIEValue> {
+    using iterator_adaptor =
+        iterator_adaptor_base<const_value_iterator, ListTy::const_iterator,
+                              std::forward_iterator_tag, const DIEValue>;
+
+  public:
+    const_value_iterator() = default;
+    const_value_iterator(DIEValueList::value_iterator X)
+        : iterator_adaptor(X.wrapped()) {}
+    explicit const_value_iterator(ListTy::const_iterator X)
+        : iterator_adaptor(X) {}
+
+    explicit operator bool() const { return bool(wrapped()); }
+    const DIEValue &operator*() const { return wrapped()->V; }
+  };
+
+  using value_range = iterator_range<value_iterator>;
+  using const_value_range = iterator_range<const_value_iterator>;
+
+  value_iterator addValue(BumpPtrAllocator &Alloc, const DIEValue &V) {
+    List.push_back(*new (Alloc) Node(V));
+    return value_iterator(ListTy::toIterator(List.back()));
+  }
+  template <class T>
+  value_iterator addValue(BumpPtrAllocator &Alloc, dwarf::Attribute Attribute,
+                    dwarf::Form Form, T &&Value) {
+    return addValue(Alloc, DIEValue(Attribute, Form, std::forward<T>(Value)));
+  }
+
+  /// Take ownership of the nodes in \p Other, and append them to the back of
+  /// the list.
+  void takeValues(DIEValueList &Other) { List.takeNodes(Other.List); }
+
+  value_range values() {
+    return make_range(value_iterator(List.begin()), value_iterator(List.end()));
+  }
+  const_value_range values() const {
+    return make_range(const_value_iterator(List.begin()),
+                      const_value_iterator(List.end()));
+  }
+};
+
+//===--------------------------------------------------------------------===//
+/// A structured debug information entry.  Has an abbreviation which
+/// describes its organization.
+class DIE : IntrusiveBackListNode, public DIEValueList {
+  friend class IntrusiveBackList<DIE>;
+  friend class DIEUnit;
+
+  /// Dwarf unit relative offset.
+  unsigned Offset = 0;
+  /// Size of instance + children.
+  unsigned Size = 0;
+  unsigned AbbrevNumber = ~0u;
+  /// Dwarf tag code.
+  dwarf::Tag Tag = (dwarf::Tag)0;
+  /// Set to true to force a DIE to emit an abbreviation that says it has
+  /// children even when it doesn't. This is used for unit testing purposes.
+  bool ForceChildren = false;
+  /// Children DIEs.
+  IntrusiveBackList<DIE> Children;
+
+  /// The owner is either the parent DIE for children of other DIEs, or a
+  /// DIEUnit which contains this DIE as its unit DIE.
+  PointerUnion<DIE *, DIEUnit *> Owner;
+
+  explicit DIE(dwarf::Tag Tag) : Tag(Tag) {}
+
+public:
+  DIE() = delete;
+  DIE(const DIE &RHS) = delete;
+  DIE(DIE &&RHS) = delete;
+  DIE &operator=(const DIE &RHS) = delete;
+  DIE &operator=(const DIE &&RHS) = delete;
+
+  static DIE *get(BumpPtrAllocator &Alloc, dwarf::Tag Tag) {
+    return new (Alloc) DIE(Tag);
+  }
+
+  // Accessors.
+  unsigned getAbbrevNumber() const { return AbbrevNumber; }
+  dwarf::Tag getTag() const { return Tag; }
+  /// Get the compile/type unit relative offset of this DIE.
+  unsigned getOffset() const {
+    // A real Offset can't be zero because the unit headers are at offset zero.
+    assert(Offset && "Offset being queried before it's been computed.");
+    return Offset;
+  }
+  unsigned getSize() const {
+    // A real Size can't be zero because it includes the non-empty abbrev code.
+    assert(Size && "Size being queried before it's been ocmputed.");
+    return Size;
+  }
+  bool hasChildren() const { return ForceChildren || !Children.empty(); }
+  void setForceChildren(bool B) { ForceChildren = B; }
+
+  using child_iterator = IntrusiveBackList<DIE>::iterator;
+  using const_child_iterator = IntrusiveBackList<DIE>::const_iterator;
+  using child_range = iterator_range<child_iterator>;
+  using const_child_range = iterator_range<const_child_iterator>;
+
+  child_range children() {
+    return make_range(Children.begin(), Children.end());
+  }
+  const_child_range children() const {
+    return make_range(Children.begin(), Children.end());
+  }
+
+  DIE *getParent() const;
+
+  /// Generate the abbreviation for this DIE.
+  ///
+  /// Calculate the abbreviation for this, which should be uniqued and
+  /// eventually used to call \a setAbbrevNumber().
+  DIEAbbrev generateAbbrev() const;
+
+  /// Set the abbreviation number for this DIE.
+  void setAbbrevNumber(unsigned I) { AbbrevNumber = I; }
+
+  /// Get the absolute offset within the .debug_info or .debug_types section
+  /// for this DIE.
+  uint64_t getDebugSectionOffset() const;
+
+  /// Compute the offset of this DIE and all its children.
+  ///
+  /// This function gets called just before we are going to generate the debug
+  /// information and gives each DIE a chance to figure out its CU relative DIE
+  /// offset, unique its abbreviation and fill in the abbreviation code, and
+  /// return the unit offset that points to where the next DIE will be emitted
+  /// within the debug unit section. After this function has been called for all
+  /// DIE objects, the DWARF can be generated since all DIEs will be able to
+  /// properly refer to other DIE objects since all DIEs have calculated their
+  /// offsets.
+  ///
+  /// \param FormParams Used when calculating sizes.
+  /// \param AbbrevSet the abbreviation used to unique DIE abbreviations.
+  /// \param CUOffset the compile/type unit relative offset in bytes.
+  /// \returns the offset for the DIE that follows this DIE within the
+  /// current compile/type unit.
+  unsigned computeOffsetsAndAbbrevs(const dwarf::FormParams &FormParams,
+                                    DIEAbbrevSet &AbbrevSet, unsigned CUOffset);
+
+  /// Climb up the parent chain to get the compile unit or type unit DIE that
+  /// this DIE belongs to.
+  ///
+  /// \returns the compile or type unit DIE that owns this DIE, or NULL if
+  /// this DIE hasn't been added to a unit DIE.
+  const DIE *getUnitDie() const;
+
+  /// Climb up the parent chain to get the compile unit or type unit that this
+  /// DIE belongs to.
+  ///
+  /// \returns the DIEUnit that represents the compile or type unit that owns
+  /// this DIE, or NULL if this DIE hasn't been added to a unit DIE.
+  DIEUnit *getUnit() const;
+
+  void setOffset(unsigned O) { Offset = O; }
+  void setSize(unsigned S) { Size = S; }
+
+  /// Add a child to the DIE.
+  DIE &addChild(DIE *Child) {
+    assert(!Child->getParent() && "Child should be orphaned");
+    Child->Owner = this;
+    Children.push_back(*Child);
+    return Children.back();
+  }
+
+  DIE &addChildFront(DIE *Child) {
+    assert(!Child->getParent() && "Child should be orphaned");
+    Child->Owner = this;
+    Children.push_front(*Child);
+    return Children.front();
+  }
+
+  /// Find a value in the DIE with the attribute given.
+  ///
+  /// Returns a default-constructed DIEValue (where \a DIEValue::getType()
+  /// gives \a DIEValue::isNone) if no such attribute exists.
+  DIEValue findAttribute(dwarf::Attribute Attribute) const;
+
+  void print(raw_ostream &O, unsigned IndentCount = 0) const;
+  void dump() const;
+};
+
+//===--------------------------------------------------------------------===//
+/// Represents a compile or type unit.
+class DIEUnit {
+  /// The compile unit or type unit DIE. This variable must be an instance of
+  /// DIE so that we can calculate the DIEUnit from any DIE by traversing the
+  /// parent backchain and getting the Unit DIE, and then casting itself to a
+  /// DIEUnit. This allows us to be able to find the DIEUnit for any DIE without
+  /// having to store a pointer to the DIEUnit in each DIE instance.
+  DIE Die;
+  /// The section this unit will be emitted in. This may or may not be set to
+  /// a valid section depending on the client that is emitting DWARF.
+  MCSection *Section = nullptr;
+  uint64_t Offset = 0; /// .debug_info or .debug_types absolute section offset.
+protected:
+  virtual ~DIEUnit() = default;
+
+public:
+  explicit DIEUnit(dwarf::Tag UnitTag);
+  DIEUnit(const DIEUnit &RHS) = delete;
+  DIEUnit(DIEUnit &&RHS) = delete;
+  void operator=(const DIEUnit &RHS) = delete;
+  void operator=(const DIEUnit &&RHS) = delete;
+  /// Set the section that this DIEUnit will be emitted into.
+  ///
+  /// This function is used by some clients to set the section. Not all clients
+  /// that emit DWARF use this section variable.
+  void setSection(MCSection *Section) {
+    assert(!this->Section);
+    this->Section = Section;
+  }
+
+  virtual const MCSymbol *getCrossSectionRelativeBaseAddress() const {
+    return nullptr;
+  }
+
+  /// Return the section that this DIEUnit will be emitted into.
+  ///
+  /// \returns Section pointer which can be NULL.
+  MCSection *getSection() const { return Section; }
+  void setDebugSectionOffset(uint64_t O) { Offset = O; }
+  uint64_t getDebugSectionOffset() const { return Offset; }
+  DIE &getUnitDie() { return Die; }
+  const DIE &getUnitDie() const { return Die; }
+};
+
+struct BasicDIEUnit final : DIEUnit {
+  explicit BasicDIEUnit(dwarf::Tag UnitTag) : DIEUnit(UnitTag) {}
+};
+
+//===--------------------------------------------------------------------===//
+/// DIELoc - Represents an expression location.
+//
+class DIELoc : public DIEValueList {
+  mutable unsigned Size = 0; // Size in bytes excluding size header.
+
+public:
+  DIELoc() = default;
+
+  /// Calculate the size of the location expression.
+  unsigned computeSize(const dwarf::FormParams &FormParams) const;
+
+  // TODO: move setSize() and Size to DIEValueList.
+  void setSize(unsigned size) { Size = size; }
+
+  /// BestForm - Choose the best form for data.
+  ///
+  dwarf::Form BestForm(unsigned DwarfVersion) const {
+    if (DwarfVersion > 3)
+      return dwarf::DW_FORM_exprloc;
+    // Pre-DWARF4 location expressions were blocks and not exprloc.
+    if ((unsigned char)Size == Size)
+      return dwarf::DW_FORM_block1;
+    if ((unsigned short)Size == Size)
+      return dwarf::DW_FORM_block2;
+    if ((unsigned int)Size == Size)
+      return dwarf::DW_FORM_block4;
+    return dwarf::DW_FORM_block;
+  }
+
+  void emitValue(const AsmPrinter *Asm, dwarf::Form Form) const;
+  unsigned sizeOf(const dwarf::FormParams &, dwarf::Form Form) const;
+
+  void print(raw_ostream &O) const;
+};
+
+//===--------------------------------------------------------------------===//
+/// DIEBlock - Represents a block of values.
+//
+class DIEBlock : public DIEValueList {
+  mutable unsigned Size = 0; // Size in bytes excluding size header.
+
+public:
+  DIEBlock() = default;
+
+  /// Calculate the size of the location expression.
+  unsigned computeSize(const dwarf::FormParams &FormParams) const;
+
+  // TODO: move setSize() and Size to DIEValueList.
+  void setSize(unsigned size) { Size = size; }
+
+  /// BestForm - Choose the best form for data.
+  ///
+  dwarf::Form BestForm() const {
+    if ((unsigned char)Size == Size)
+      return dwarf::DW_FORM_block1;
+    if ((unsigned short)Size == Size)
+      return dwarf::DW_FORM_block2;
+    if ((unsigned int)Size == Size)
+      return dwarf::DW_FORM_block4;
+    return dwarf::DW_FORM_block;
+  }
+
+  void emitValue(const AsmPrinter *Asm, dwarf::Form Form) const;
+  unsigned sizeOf(const dwarf::FormParams &, dwarf::Form Form) const;
+
+  void print(raw_ostream &O) const;
+};
+
+} // end namespace llvm
+
+#endif // LLVM_CODEGEN_DIE_H
--- a/suite/synctools/tablegen/include/llvm/CodeGen/DIEValue.def
+++ b/suite/synctools/tablegen/include/llvm/CodeGen/DIEValue.def
@ -0,0 +1,48 @@
+//===- llvm/CodeGen/DIEValue.def - DIEValue types ---------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Macros for running through all types of DIEValue.
+//
+//===----------------------------------------------------------------------===//
+
+#if !(defined HANDLE_DIEVALUE || defined HANDLE_DIEVALUE_SMALL ||              \
+      defined HANDLE_DIEVALUE_LARGE)
+#error "Missing macro definition of HANDLE_DIEVALUE"
+#endif
+
+// Handler for all values.
+#ifndef HANDLE_DIEVALUE
+#define HANDLE_DIEVALUE(T)
+#endif
+
+// Handler for small values.
+#ifndef HANDLE_DIEVALUE_SMALL
+#define HANDLE_DIEVALUE_SMALL(T) HANDLE_DIEVALUE(T)
+#endif
+
+// Handler for large values.
+#ifndef HANDLE_DIEVALUE_LARGE
+#define HANDLE_DIEVALUE_LARGE(T) HANDLE_DIEVALUE(T)
+#endif
+
+HANDLE_DIEVALUE_SMALL(Integer)
+HANDLE_DIEVALUE_SMALL(String)
+HANDLE_DIEVALUE_SMALL(Expr)
+HANDLE_DIEVALUE_SMALL(Label)
+HANDLE_DIEVALUE_LARGE(BaseTypeRef)
+HANDLE_DIEVALUE_LARGE(Delta)
+HANDLE_DIEVALUE_SMALL(Entry)
+HANDLE_DIEVALUE_LARGE(Block)
+HANDLE_DIEVALUE_LARGE(Loc)
+HANDLE_DIEVALUE_SMALL(LocList)
+HANDLE_DIEVALUE_LARGE(InlineString)
+HANDLE_DIEVALUE_LARGE(AddrOffset)
+
+#undef HANDLE_DIEVALUE
+#undef HANDLE_DIEVALUE_SMALL
+#undef HANDLE_DIEVALUE_LARGE
--- a/suite/synctools/tablegen/include/llvm/CodeGen/DbgEntityHistoryCalculator.h
+++ b/suite/synctools/tablegen/include/llvm/CodeGen/DbgEntityHistoryCalculator.h
@ -0,0 +1,156 @@
+//===- llvm/CodeGen/DbgEntityHistoryCalculator.h ----------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_DBGENTITYHISTORYCALCULATOR_H
+#define LLVM_CODEGEN_DBGENTITYHISTORYCALCULATOR_H
+
+#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/PointerIntPair.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/LexicalScopes.h"
+#include <utility>
+
+namespace llvm {
+
+class DILocation;
+class DINode;
+class MachineFunction;
+class MachineInstr;
+class TargetRegisterInfo;
+
+/// Record instruction ordering so we can query their relative positions within
+/// a function. Meta instructions are given the same ordinal as the preceding
+/// non-meta instruction. Class state is invalid if MF is modified after
+/// calling initialize.
+class InstructionOrdering {
+public:
+  void initialize(const MachineFunction &MF);
+  void clear() { InstNumberMap.clear(); }
+
+  /// Check if instruction \p A comes before \p B, where \p A and \p B both
+  /// belong to the MachineFunction passed to initialize().
+  bool isBefore(const MachineInstr *A, const MachineInstr *B) const;
+
+private:
+  /// Each instruction is assigned an order number.
+  DenseMap<const MachineInstr *, unsigned> InstNumberMap;
+};
+
+/// For each user variable, keep a list of instruction ranges where this
+/// variable is accessible. The variables are listed in order of appearance.
+class DbgValueHistoryMap {
+public:
+  /// Index in the entry vector.
+  typedef size_t EntryIndex;
+
+  /// Special value to indicate that an entry is valid until the end of the
+  /// function.
+  static const EntryIndex NoEntry = std::numeric_limits<EntryIndex>::max();
+
+  /// Specifies a change in a variable's debug value history.
+  ///
+  /// There exist two types of entries:
+  ///
+  /// * Debug value entry:
+  ///
+  ///   A new debug value becomes live. If the entry's \p EndIndex is \p NoEntry,
+  ///   the value is valid until the end of the function. For other values, the
+  ///   index points to the entry in the entry vector that ends this debug
+  ///   value. The ending entry can either be an overlapping debug value, or
+  ///   an instruction that clobbers the value.
+  ///
+  /// * Clobbering entry:
+  ///
+  ///   This entry's instruction clobbers one or more preceding
+  ///   register-described debug values that have their end index
+  ///   set to this entry's position in the entry vector.
+  class Entry {
+    friend DbgValueHistoryMap;
+
+  public:
+    enum EntryKind { DbgValue, Clobber };
+
+    Entry(const MachineInstr *Instr, EntryKind Kind)
+        : Instr(Instr, Kind), EndIndex(NoEntry) {}
+
+    const MachineInstr *getInstr() const { return Instr.getPointer(); }
+    EntryIndex getEndIndex() const { return EndIndex; }
+    EntryKind getEntryKind() const { return Instr.getInt(); }
+
+    bool isClobber() const { return getEntryKind() == Clobber; }
+    bool isDbgValue() const { return getEntryKind() == DbgValue; }
+    bool isClosed() const { return EndIndex != NoEntry; }
+
+    void endEntry(EntryIndex EndIndex);
+
+  private:
+    PointerIntPair<const MachineInstr *, 1, EntryKind> Instr;
+    EntryIndex EndIndex;
+  };
+  using Entries = SmallVector<Entry, 4>;
+  using InlinedEntity = std::pair<const DINode *, const DILocation *>;
+  using EntriesMap = MapVector<InlinedEntity, Entries>;
+
+private:
+  EntriesMap VarEntries;
+
+public:
+  bool startDbgValue(InlinedEntity Var, const MachineInstr &MI,
+                     EntryIndex &NewIndex);
+  EntryIndex startClobber(InlinedEntity Var, const MachineInstr &MI);
+
+  Entry &getEntry(InlinedEntity Var, EntryIndex Index) {
+    auto &Entries = VarEntries[Var];
+    return Entries[Index];
+  }
+
+  /// Test whether a vector of entries features any non-empty locations. It
+  /// could have no entries, or only DBG_VALUE $noreg entries.
+  bool hasNonEmptyLocation(const Entries &Entries) const;
+
+  /// Drop location ranges which exist entirely outside each variable's scope.
+  void trimLocationRanges(const MachineFunction &MF, LexicalScopes &LScopes,
+                          const InstructionOrdering &Ordering);
+  bool empty() const { return VarEntries.empty(); }
+  void clear() { VarEntries.clear(); }
+  EntriesMap::const_iterator begin() const { return VarEntries.begin(); }
+  EntriesMap::const_iterator end() const { return VarEntries.end(); }
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+  LLVM_DUMP_METHOD void dump() const;
+#endif
+};
+
+/// For each inlined instance of a source-level label, keep the corresponding
+/// DBG_LABEL instruction. The DBG_LABEL instruction could be used to generate
+/// a temporary (assembler) label before it.
+class DbgLabelInstrMap {
+public:
+  using InlinedEntity = std::pair<const DINode *, const DILocation *>;
+  using InstrMap = MapVector<InlinedEntity, const MachineInstr *>;
+
+private:
+  InstrMap LabelInstr;
+
+public:
+  void  addInstr(InlinedEntity Label, const MachineInstr &MI);
+
+  bool empty() const { return LabelInstr.empty(); }
+  void clear() { LabelInstr.clear(); }
+  InstrMap::const_iterator begin() const { return LabelInstr.begin(); }
+  InstrMap::const_iterator end() const { return LabelInstr.end(); }
+};
+
+void calculateDbgEntityHistory(const MachineFunction *MF,
+                               const TargetRegisterInfo *TRI,
+                               DbgValueHistoryMap &DbgValues,
+                               DbgLabelInstrMap &DbgLabels);
+
+} // end namespace llvm
+
+#endif // LLVM_CODEGEN_DBGENTITYHISTORYCALCULATOR_H
--- a/suite/synctools/tablegen/include/llvm/CodeGen/DebugHandlerBase.h
+++ b/suite/synctools/tablegen/include/llvm/CodeGen/DebugHandlerBase.h
@ -0,0 +1,146 @@
+//===-- llvm/CodeGen/DebugHandlerBase.h -----------------------*- C++ -*--===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Common functionality for different debug information format backends.
+// LLVM currently supports DWARF and CodeView.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_DEBUGHANDLERBASE_H
+#define LLVM_CODEGEN_DEBUGHANDLERBASE_H
+
+#include "llvm/ADT/Optional.h"
+#include "llvm/CodeGen/AsmPrinterHandler.h"
+#include "llvm/CodeGen/DbgEntityHistoryCalculator.h"
+#include "llvm/CodeGen/LexicalScopes.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/DebugLoc.h"
+
+namespace llvm {
+
+class AsmPrinter;
+class MachineInstr;
+class MachineModuleInfo;
+
+/// Represents the location at which a variable is stored.
+struct DbgVariableLocation {
+  /// Base register.
+  unsigned Register;
+
+  /// Chain of offsetted loads necessary to load the value if it lives in
+  /// memory. Every load except for the last is pointer-sized.
+  SmallVector<int64_t, 1> LoadChain;
+
+  /// Present if the location is part of a larger variable.
+  llvm::Optional<llvm::DIExpression::FragmentInfo> FragmentInfo;
+
+  /// Extract a VariableLocation from a MachineInstr.
+  /// This will only work if Instruction is a debug value instruction
+  /// and the associated DIExpression is in one of the supported forms.
+  /// If these requirements are not met, the returned Optional will not
+  /// have a value.
+  static Optional<DbgVariableLocation>
+  extractFromMachineInstruction(const MachineInstr &Instruction);
+};
+
+/// Base class for debug information backends. Common functionality related to
+/// tracking which variables and scopes are alive at a given PC live here.
+class DebugHandlerBase : public AsmPrinterHandler {
+protected:
+  DebugHandlerBase(AsmPrinter *A);
+
+  /// Target of debug info emission.
+  AsmPrinter *Asm;
+
+  /// Collected machine module information.
+  MachineModuleInfo *MMI;
+
+  /// Previous instruction's location information. This is used to
+  /// determine label location to indicate scope boundaries in debug info.
+  /// We track the previous instruction's source location (if not line 0),
+  /// whether it was a label, and its parent BB.
+  DebugLoc PrevInstLoc;
+  MCSymbol *PrevLabel = nullptr;
+  const MachineBasicBlock *PrevInstBB = nullptr;
+
+  /// This location indicates end of function prologue and beginning of
+  /// function body.
+  DebugLoc PrologEndLoc;
+
+  /// If nonnull, stores the current machine instruction we're processing.
+  const MachineInstr *CurMI = nullptr;
+
+  LexicalScopes LScopes;
+
+  /// History of DBG_VALUE and clobber instructions for each user
+  /// variable.  Variables are listed in order of appearance.
+  DbgValueHistoryMap DbgValues;
+
+  /// Mapping of inlined labels and DBG_LABEL machine instruction.
+  DbgLabelInstrMap DbgLabels;
+
+  /// Maps instruction with label emitted before instruction.
+  /// FIXME: Make this private from DwarfDebug, we have the necessary accessors
+  /// for it.
+  DenseMap<const MachineInstr *, MCSymbol *> LabelsBeforeInsn;
+
+  /// Maps instruction with label emitted after instruction.
+  DenseMap<const MachineInstr *, MCSymbol *> LabelsAfterInsn;
+
+  /// Indentify instructions that are marking the beginning of or
+  /// ending of a scope.
+  void identifyScopeMarkers();
+
+  /// Ensure that a label will be emitted before MI.
+  void requestLabelBeforeInsn(const MachineInstr *MI) {
+    LabelsBeforeInsn.insert(std::make_pair(MI, nullptr));
+  }
+
+  /// Ensure that a label will be emitted after MI.
+  void requestLabelAfterInsn(const MachineInstr *MI) {
+    LabelsAfterInsn.insert(std::make_pair(MI, nullptr));
+  }
+
+  virtual void beginFunctionImpl(const MachineFunction *MF) = 0;
+  virtual void endFunctionImpl(const MachineFunction *MF) = 0;
+  virtual void skippedNonDebugFunction() {}
+
+private:
+  InstructionOrdering InstOrdering;
+
+  // AsmPrinterHandler overrides.
+public:
+  void beginModule(Module *M) override;
+
+  void beginInstruction(const MachineInstr *MI) override;
+  void endInstruction() override;
+
+  void beginFunction(const MachineFunction *MF) override;
+  void endFunction(const MachineFunction *MF) override;
+
+  void beginBasicBlock(const MachineBasicBlock &MBB) override;
+  void endBasicBlock(const MachineBasicBlock &MBB) override;
+
+  /// Return Label preceding the instruction.
+  MCSymbol *getLabelBeforeInsn(const MachineInstr *MI);
+
+  /// Return Label immediately following the instruction.
+  MCSymbol *getLabelAfterInsn(const MachineInstr *MI);
+
+  /// If this type is derived from a base type then return base type size.
+  static uint64_t getBaseTypeSize(const DIType *Ty);
+
+  /// Return true if type encoding is unsigned.
+  static bool isUnsignedDIType(const DIType *Ty);
+
+  const InstructionOrdering &getInstOrdering() const { return InstOrdering; }
+};
+
+} // namespace llvm
+
+#endif
--- a/suite/synctools/tablegen/include/llvm/CodeGen/DwarfStringPoolEntry.h
+++ b/suite/synctools/tablegen/include/llvm/CodeGen/DwarfStringPoolEntry.h
@ -0,0 +1,71 @@
+//===- llvm/CodeGen/DwarfStringPoolEntry.h - String pool entry --*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_DWARFSTRINGPOOLENTRY_H
+#define LLVM_CODEGEN_DWARFSTRINGPOOLENTRY_H
+
+#include "llvm/ADT/PointerIntPair.h"
+#include "llvm/ADT/StringMap.h"
+
+namespace llvm {
+
+class MCSymbol;
+
+/// Data for a string pool entry.
+struct DwarfStringPoolEntry {
+  static constexpr unsigned NotIndexed = -1;
+
+  MCSymbol *Symbol;
+  uint64_t Offset;
+  unsigned Index;
+
+  bool isIndexed() const { return Index != NotIndexed; }
+};
+
+/// String pool entry reference.
+class DwarfStringPoolEntryRef {
+  PointerIntPair<const StringMapEntry<DwarfStringPoolEntry> *, 1, bool>
+      MapEntryAndIndexed;
+
+  const StringMapEntry<DwarfStringPoolEntry> *getMapEntry() const {
+    return MapEntryAndIndexed.getPointer();
+  }
+
+public:
+  DwarfStringPoolEntryRef() = default;
+  DwarfStringPoolEntryRef(const StringMapEntry<DwarfStringPoolEntry> &Entry,
+                          bool Indexed)
+      : MapEntryAndIndexed(&Entry, Indexed) {}
+
+  explicit operator bool() const { return getMapEntry(); }
+  MCSymbol *getSymbol() const {
+    assert(getMapEntry()->second.Symbol && "No symbol available!");
+    return getMapEntry()->second.Symbol;
+  }
+  uint64_t getOffset() const { return getMapEntry()->second.Offset; }
+  bool isIndexed() const { return MapEntryAndIndexed.getInt(); }
+  unsigned getIndex() const {
+    assert(isIndexed());
+    assert(getMapEntry()->getValue().isIndexed());
+    return getMapEntry()->second.Index;
+  }
+  StringRef getString() const { return getMapEntry()->first(); }
+  /// Return the entire string pool entry for convenience.
+  DwarfStringPoolEntry getEntry() const { return getMapEntry()->getValue(); }
+
+  bool operator==(const DwarfStringPoolEntryRef &X) const {
+    return getMapEntry() == X.getMapEntry();
+  }
+  bool operator!=(const DwarfStringPoolEntryRef &X) const {
+    return getMapEntry() != X.getMapEntry();
+  }
+};
+
+} // end namespace llvm
+
+#endif
--- a/suite/synctools/tablegen/include/llvm/CodeGen/EdgeBundles.h
+++ b/suite/synctools/tablegen/include/llvm/CodeGen/EdgeBundles.h
@ -0,0 +1,62 @@
+//===-------- EdgeBundles.h - Bundles of CFG edges --------------*- c++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// The EdgeBundles analysis forms equivalence classes of CFG edges such that all
+// edges leaving a machine basic block are in the same bundle, and all edges
+// entering a machine basic block are in the same bundle.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_EDGEBUNDLES_H
+#define LLVM_CODEGEN_EDGEBUNDLES_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/IntEqClasses.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+
+namespace llvm {
+
+class EdgeBundles : public MachineFunctionPass {
+  const MachineFunction *MF;
+
+  /// EC - Each edge bundle is an equivalence class. The keys are:
+  ///   2*BB->getNumber()   -> Ingoing bundle.
+  ///   2*BB->getNumber()+1 -> Outgoing bundle.
+  IntEqClasses EC;
+
+  /// Blocks - Map each bundle to a list of basic block numbers.
+  SmallVector<SmallVector<unsigned, 8>, 4> Blocks;
+
+public:
+  static char ID;
+  EdgeBundles() : MachineFunctionPass(ID) {}
+
+  /// getBundle - Return the ingoing (Out = false) or outgoing (Out = true)
+  /// bundle number for basic block #N
+  unsigned getBundle(unsigned N, bool Out) const { return EC[2 * N + Out]; }
+
+  /// getNumBundles - Return the total number of bundles in the CFG.
+  unsigned getNumBundles() const { return EC.getNumClasses(); }
+
+  /// getBlocks - Return an array of blocks that are connected to Bundle.
+  ArrayRef<unsigned> getBlocks(unsigned Bundle) const { return Blocks[Bundle]; }
+
+  /// getMachineFunction - Return the last machine function computed.
+  const MachineFunction *getMachineFunction() const { return MF; }
+
+  /// view - Visualize the annotated bipartite CFG with Graphviz.
+  void view() const;
+
+private:
+  bool runOnMachineFunction(MachineFunction&) override;
+  void getAnalysisUsage(AnalysisUsage&) const override;
+};
+
+} // end namespace llvm
+
+#endif
--- a/Show More
+++ b/Show More