From ed6f89bcdc29f769ceef37955ed25716c34631bc Mon Sep 17 00:00:00 2001
From: Artem Tamazov <artem.tamazov@amd.com>
Date: Wed, 27 Apr 2016 16:20:23 +0000
Subject: [PATCH] [AMDGPU][llvm-mc] Add support of TTMP quads. Rework M0
 exclusion for SMRD.

Added support of TTMP quads.
Reworked M0 exclusion machinery for SMRD and similar instructions
to enable usage of TTMP registers in those instructions as destinations.
Tests added.

Differential Revision: http://reviews.llvm.org/D19342

llvm-svn: 267733
---
 .../AMDGPU/AsmParser/AMDGPUAsmParser.cpp      |  3 +-
 .../Disassembler/AMDGPUDisassembler.cpp       |  6 ++
 .../AMDGPU/Disassembler/AMDGPUDisassembler.h  |  1 +
 .../AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp  |  5 +-
 lib/Target/AMDGPU/SIInstructions.td           | 14 ++--
 lib/Target/AMDGPU/SIRegisterInfo.td           | 23 +++++--
 test/MC/AMDGPU/mubuf.s                        | 67 +++++++++++++++++++
 test/MC/AMDGPU/reg-syntax-extra.s             | 28 ++++++++
 test/MC/AMDGPU/smrd.s                         | 12 ++++
 9 files changed, 146 insertions(+), 13 deletions(-)

diff --git a/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index af85e64db20..72b0c5afc72 100644
--- a/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -611,13 +611,14 @@ static int getRegClass(RegisterKind Is, unsigned RegWidth) {
       default: return -1;
       case 1: return AMDGPU::TTMP_32RegClassID;
       case 2: return AMDGPU::TTMP_64RegClassID;
+      case 4: return AMDGPU::TTMP_128RegClassID;
     }
   } else if (Is == IS_SGPR) {
     switch (RegWidth) {
       default: return -1;
       case 1: return AMDGPU::SGPR_32RegClassID;
       case 2: return AMDGPU::SGPR_64RegClassID;
-      case 4: return AMDGPU::SReg_128RegClassID;
+      case 4: return AMDGPU::SGPR_128RegClassID;
       case 8: return AMDGPU::SReg_256RegClassID;
       case 16: return AMDGPU::SReg_512RegClassID;
     }
diff --git a/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index 2990b570f53..bbec73fda91 100644
--- a/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -68,6 +68,7 @@ DECODE_OPERAND(VReg_128)
 
 DECODE_OPERAND(SGPR_32)
 DECODE_OPERAND(SReg_32)
+DECODE_OPERAND(SReg_32_XM0)
 DECODE_OPERAND(SReg_64)
 DECODE_OPERAND(SReg_128)
 DECODE_OPERAND(SReg_256)
@@ -248,6 +249,11 @@ MCOperand AMDGPUDisassembler::decodeOperand_SReg_32(unsigned Val) const {
   return decodeSrcOp(OP32, Val);
 }
 
+MCOperand AMDGPUDisassembler::decodeOperand_SReg_32_XM0(unsigned Val) const {
+  // SReg_32_XM0 is SReg_32 without M0
+  return decodeOperand_SReg_32(Val);
+}
+
 MCOperand AMDGPUDisassembler::decodeOperand_SReg_64(unsigned Val) const {
   // see decodeOperand_SReg_32 comment
   return decodeSrcOp(OP64, Val);
diff --git a/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h b/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
index f1ba30e7bf5..680ed3068a1 100644
--- a/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
+++ b/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
@@ -64,6 +64,7 @@ namespace llvm {
 
     MCOperand decodeOperand_SGPR_32(unsigned Val) const;
     MCOperand decodeOperand_SReg_32(unsigned Val) const;
+    MCOperand decodeOperand_SReg_32_XM0(unsigned Val) const;
     MCOperand decodeOperand_SReg_64(unsigned Val) const;
     MCOperand decodeOperand_SReg_128(unsigned Val) const;
     MCOperand decodeOperand_SReg_256(unsigned Val) const;
diff --git a/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp b/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
index 1ea8c77be69..03252db84b7 100644
--- a/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
+++ b/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
@@ -240,9 +240,12 @@ void AMDGPUInstPrinter::printRegOperand(unsigned reg, raw_ostream &O,
   } else if (MRI.getRegClass(AMDGPU::VReg_128RegClassID).contains(reg)) {
     Type = "v";
     NumRegs = 4;
-  } else  if (MRI.getRegClass(AMDGPU::SReg_128RegClassID).contains(reg)) {
+  } else  if (MRI.getRegClass(AMDGPU::SGPR_128RegClassID).contains(reg)) {
     Type = "s";
     NumRegs = 4;
+  } else  if (MRI.getRegClass(AMDGPU::TTMP_128RegClassID).contains(reg)) {
+    Type = "ttmp";
+    NumRegs = 4;
   } else if (MRI.getRegClass(AMDGPU::VReg_96RegClassID).contains(reg)) {
     Type = "v";
     NumRegs = 3;
diff --git a/lib/Target/AMDGPU/SIInstructions.td b/lib/Target/AMDGPU/SIInstructions.td
index 8b730107241..dfd6eb61e1a 100644
--- a/lib/Target/AMDGPU/SIInstructions.td
+++ b/lib/Target/AMDGPU/SIInstructions.td
@@ -60,17 +60,17 @@ defm EXP : EXP_m;
 // SMRD Instructions
 //===----------------------------------------------------------------------===//
 
-// We are using the SGPR_32 and not the SReg_32 register class for 32-bit
-// SMRD instructions, because the SGPR_32 register class does not include M0
+// We are using the SReg_32_XM0 and not the SReg_32 register class for 32-bit
+// SMRD instructions, because the SReg_32_XM0 register class does not include M0
 // and writing to M0 from an SMRD instruction will hang the GPU.
-defm S_LOAD_DWORD : SMRD_Helper <smrd<0x00>, "s_load_dword", SReg_64, SGPR_32>;
+defm S_LOAD_DWORD : SMRD_Helper <smrd<0x00>, "s_load_dword", SReg_64, SReg_32_XM0>;
 defm S_LOAD_DWORDX2 : SMRD_Helper <smrd<0x01>, "s_load_dwordx2", SReg_64, SReg_64>;
 defm S_LOAD_DWORDX4 : SMRD_Helper <smrd<0x02>, "s_load_dwordx4", SReg_64, SReg_128>;
 defm S_LOAD_DWORDX8 : SMRD_Helper <smrd<0x03>, "s_load_dwordx8", SReg_64, SReg_256>;
 defm S_LOAD_DWORDX16 : SMRD_Helper <smrd<0x04>, "s_load_dwordx16", SReg_64, SReg_512>;
 
 defm S_BUFFER_LOAD_DWORD : SMRD_Helper <
-  smrd<0x08>, "s_buffer_load_dword", SReg_128, SGPR_32
+  smrd<0x08>, "s_buffer_load_dword", SReg_128, SReg_32_XM0
 >;
 
 defm S_BUFFER_LOAD_DWORDX2 : SMRD_Helper <
@@ -2087,9 +2087,9 @@ multiclass SI_SPILL_SGPR <RegisterClass sgpr_class> {
 }
 
 // It's unclear whether you can use M0 as the output of v_readlane_b32
-// instructions, so use SGPR_32 register class for spills to prevent
+// instructions, so use SReg_32_XM0 register class for spills to prevent
 // this from happening.
-defm SI_SPILL_S32  : SI_SPILL_SGPR <SGPR_32>;
+defm SI_SPILL_S32  : SI_SPILL_SGPR <SReg_32_XM0>;
 defm SI_SPILL_S64  : SI_SPILL_SGPR <SReg_64>;
 defm SI_SPILL_S128 : SI_SPILL_SGPR <SReg_128>;
 defm SI_SPILL_S256 : SI_SPILL_SGPR <SReg_256>;
@@ -3431,7 +3431,7 @@ def : ZExt_i64_i1_Pat<anyext>;
 def : Pat <
   (i64 (sext i32:$src)),
     (REG_SEQUENCE SReg_64, $src, sub0,
-    (i32 (COPY_TO_REGCLASS (S_ASHR_I32 $src, 31), SGPR_32)), sub1)
+    (i32 (COPY_TO_REGCLASS (S_ASHR_I32 $src, 31), SReg_32_XM0)), sub1)
 >;
 
 def : Pat <
diff --git a/lib/Target/AMDGPU/SIRegisterInfo.td b/lib/Target/AMDGPU/SIRegisterInfo.td
index 384b7617b25..6c6fa3c5f9a 100644
--- a/lib/Target/AMDGPU/SIRegisterInfo.td
+++ b/lib/Target/AMDGPU/SIRegisterInfo.td
@@ -132,7 +132,7 @@ def SGPR_64Regs : RegisterTuples<[sub0, sub1],
                               (add (decimate (shl SGPR_32, 1), 2))]>;
 
 // SGPR 128-bit registers
-def SGPR_128 : RegisterTuples<[sub0, sub1, sub2, sub3],
+def SGPR_128Regs : RegisterTuples<[sub0, sub1, sub2, sub3],
                               [(add (decimate SGPR_32, 4)),
                                (add (decimate (shl SGPR_32, 1), 4)),
                                (add (decimate (shl SGPR_32, 2), 4)),
@@ -255,6 +255,13 @@ def SReg_32 : RegisterClass<"AMDGPU", [i32, f32], 32,
    TTMP_32, TMA_LO, TMA_HI, TBA_LO, TBA_HI)
 >;
 
+// Subset of SReg_32 without M0 for SMRD instructions and alike.
+// See comments in SIInstructions.td for more info.
+def SReg_32_XM0 : RegisterClass<"AMDGPU", [i32, f32], 32,
+  (add SGPR_32, VCC_LO, VCC_HI, EXEC_LO, EXEC_HI, FLAT_SCR_LO, FLAT_SCR_HI,
+   TTMP_32, TMA_LO, TMA_HI, TBA_LO, TBA_HI)
+>;
+
 def SGPR_64 : RegisterClass<"AMDGPU", [v2i32, i64, f64], 32, (add SGPR_64Regs)>;
 
 def TTMP_64 : RegisterClass<"AMDGPU", [v2i32, i64, f64], 32, (add TTMP_64Regs)> {
@@ -265,11 +272,19 @@ def SReg_64 : RegisterClass<"AMDGPU", [v2i32, i64, f64, i1], 32,
   (add SGPR_64, VCC, EXEC, FLAT_SCR, TTMP_64, TBA, TMA)
 >;
 
-def SReg_128 : RegisterClass<"AMDGPU", [v4i32, v16i8, v2i64], 32, (add SGPR_128)> {
-  // Requires 2 s_mov_b64 to copy
-  let CopyCost = 2;
+// Requires 2 s_mov_b64 to copy
+let CopyCost = 2 in {
+
+def SGPR_128 : RegisterClass<"AMDGPU", [v4i32, v16i8, v2i64], 32, (add SGPR_128Regs)>;
+
+def TTMP_128 : RegisterClass<"AMDGPU", [v4i32, v16i8, v2i64], 32, (add TTMP_128Regs)> {
+  let isAllocatable = 0;
 }
 
+def SReg_128 : RegisterClass<"AMDGPU", [v4i32, v16i8, v2i64], 32, (add SGPR_128, TTMP_128)>;
+
+} // End CopyCost = 2
+
 def SReg_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 32, (add SGPR_256)> {
   // Requires 4 s_mov_b64 to copy
   let CopyCost = 4;
diff --git a/test/MC/AMDGPU/mubuf.s b/test/MC/AMDGPU/mubuf.s
index 84953319c53..3eb794a31c5 100644
--- a/test/MC/AMDGPU/mubuf.s
+++ b/test/MC/AMDGPU/mubuf.s
@@ -18,6 +18,10 @@ buffer_load_dword v1, s[4:7], s1
 // SICI: buffer_load_dword v1, s[4:7], s1 ; encoding: [0x00,0x00,0x30,0xe0,0x00,0x01,0x01,0x01]
 // VI:   buffer_load_dword v1, s[4:7], s1 ; encoding: [0x00,0x00,0x50,0xe0,0x00,0x01,0x01,0x01]
 
+buffer_load_dword v1, ttmp[4:7], s1
+// SICI: buffer_load_dword v1, ttmp[4:7], s1 ; encoding: [0x00,0x00,0x30,0xe0,0x00,0x01,0x1d,0x01]
+// VI:   buffer_load_dword v1, ttmp[4:7], s1 ; encoding: [0x00,0x00,0x50,0xe0,0x00,0x01,0x1d,0x01]
+
 buffer_load_dword v1, s[4:7], s1 offset:4
 // SICI: buffer_load_dword v1, s[4:7], s1 offset:4 ; encoding: [0x04,0x00,0x30,0xe0,0x00,0x01,0x01,0x01]
 // VI:   buffer_load_dword v1, s[4:7], s1 offset:4 ; encoding: [0x04,0x00,0x50,0xe0,0x00,0x01,0x01,0x01]
@@ -42,6 +46,9 @@ buffer_load_dword v1, s[4:7], s1 offset:4 glc slc tfe
 // SICI: buffer_load_dword v1, s[4:7], s1 offset:4 glc slc tfe ; encoding: [0x04,0x40,0x30,0xe0,0x00,0x01,0xc1,0x01]
 // VI:   buffer_load_dword v1, s[4:7], s1 offset:4 glc slc tfe ; encoding: [0x04,0x40,0x52,0xe0,0x00,0x01,0x81,0x01]
 
+buffer_load_dword v1, ttmp[4:7], s1 offset:4 glc slc tfe
+// SICI: buffer_load_dword v1, ttmp[4:7], s1 offset:4 glc slc tfe ; encoding: [0x04,0x40,0x30,0xe0,0x00,0x01,0xdd,0x01]
+// VI:   buffer_load_dword v1, ttmp[4:7], s1 offset:4 glc slc tfe ; encoding: [0x04,0x40,0x52,0xe0,0x00,0x01,0x9d,0x01]
 
 //===----------------------------------------------------------------------===//
 // load - vgpr offset
@@ -75,6 +82,10 @@ buffer_load_dword v1, v2, s[4:7], s1 offen offset:4 glc slc tfe
 // SICI: buffer_load_dword v1, v2, s[4:7], s1 offen offset:4 glc slc tfe ; encoding: [0x04,0x50,0x30,0xe0,0x02,0x01,0xc1,0x01]
 // VI:   buffer_load_dword v1, v2, s[4:7], s1 offen offset:4 glc slc tfe ; encoding: [0x04,0x50,0x52,0xe0,0x02,0x01,0x81,0x01]
 
+buffer_load_dword v1, v2, ttmp[4:7], s1 offen offset:4 glc slc tfe
+// SICI: buffer_load_dword v1, v2, ttmp[4:7], s1 offen offset:4 glc slc tfe ; encoding: [0x04,0x50,0x30,0xe0,0x02,0x01,0xdd,0x01]
+// VI:   buffer_load_dword v1, v2, ttmp[4:7], s1 offen offset:4 glc slc tfe ; encoding: [0x04,0x50,0x52,0xe0,0x02,0x01,0x9d,0x01]
+
 //===----------------------------------------------------------------------===//
 // load - vgpr index
 //===----------------------------------------------------------------------===//
@@ -107,6 +118,10 @@ buffer_load_dword v1, v2, s[4:7], s1 idxen offset:4 glc slc tfe
 // SICI: buffer_load_dword v1, v2, s[4:7], s1 idxen offset:4 glc slc tfe ; encoding: [0x04,0x60,0x30,0xe0,0x02,0x01,0xc1,0x01]
 // VI:   buffer_load_dword v1, v2, s[4:7], s1 idxen offset:4 glc slc tfe ; encoding: [0x04,0x60,0x52,0xe0,0x02,0x01,0x81,0x01]
 
+buffer_load_dword v1, v2, ttmp[4:7], s1 idxen offset:4 glc slc tfe
+// SICI: buffer_load_dword v1, v2, ttmp[4:7], s1 idxen offset:4 glc slc tfe ; encoding: [0x04,0x60,0x30,0xe0,0x02,0x01,0xdd,0x01]
+// VI:   buffer_load_dword v1, v2, ttmp[4:7], s1 idxen offset:4 glc slc tfe ; encoding: [0x04,0x60,0x52,0xe0,0x02,0x01,0x9d,0x01]
+
 //===----------------------------------------------------------------------===//
 // load - vgpr index and offset
 //===----------------------------------------------------------------------===//
@@ -139,6 +154,10 @@ buffer_load_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 glc slc tfe
 // SICI: buffer_load_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 glc slc tfe ; encoding: [0x04,0x70,0x30,0xe0,0x02,0x01,0xc1,0x01]
 // VI:   buffer_load_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 glc slc tfe ; encoding: [0x04,0x70,0x52,0xe0,0x02,0x01,0x81,0x01]
 
+buffer_load_dword v1, v[2:3], ttmp[4:7], ttmp1 idxen offen offset:4 glc slc tfe
+// SICI: buffer_load_dword v1, v[2:3], ttmp[4:7], ttmp1 idxen offen offset:4 glc slc tfe ; encoding: [0x04,0x70,0x30,0xe0,0x02,0x01,0xdd,0x71]
+// VI:   buffer_load_dword v1, v[2:3], ttmp[4:7], ttmp1 idxen offen offset:4 glc slc tfe ; encoding: [0x04,0x70,0x52,0xe0,0x02,0x01,0x9d,0x71]
+
 //===----------------------------------------------------------------------===//
 // load - addr64
 //===----------------------------------------------------------------------===//
@@ -171,6 +190,10 @@ buffer_load_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 glc slc tfe
 // SICI: buffer_load_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 glc slc tfe ; encoding: [0x04,0xc0,0x30,0xe0,0x02,0x01,0xc1,0x01]
 // NOVI: error: instruction not supported on this GPU
 
+buffer_load_dword v1, v[2:3], ttmp[4:7], ttmp1 addr64 offset:4 glc slc tfe
+// SICI: buffer_load_dword v1, v[2:3], ttmp[4:7], ttmp1 addr64 offset:4 glc slc tfe ; encoding: [0x04,0xc0,0x30,0xe0,0x02,0x01,0xdd,0x71]
+// NOVI: error: instruction not supported on this GPU
+
 //===----------------------------------------------------------------------===//
 // store - immediate offset only
 //===----------------------------------------------------------------------===//
@@ -203,6 +226,10 @@ buffer_store_dword v1, s[4:7], s1 offset:4 glc slc tfe
 // SICI: buffer_store_dword v1, s[4:7], s1 offset:4 glc slc tfe ; encoding: [0x04,0x40,0x70,0xe0,0x00,0x01,0xc1,0x01]
 // VI:   buffer_store_dword v1, s[4:7], s1 offset:4 glc slc tfe ; encoding: [0x04,0x40,0x72,0xe0,0x00,0x01,0x81,0x01]
 
+buffer_store_dword v1, ttmp[4:7], ttmp1 offset:4 glc slc tfe
+// SICI: buffer_store_dword v1, ttmp[4:7], ttmp1 offset:4 glc slc tfe ; encoding: [0x04,0x40,0x70,0xe0,0x00,0x01,0xdd,0x71]
+// VI:   buffer_store_dword v1, ttmp[4:7], ttmp1 offset:4 glc slc tfe ; encoding: [0x04,0x40,0x72,0xe0,0x00,0x01,0x9d,0x71]
+
 //===----------------------------------------------------------------------===//
 // store - vgpr offset
 //===----------------------------------------------------------------------===//
@@ -235,6 +262,10 @@ buffer_store_dword v1, v2, s[4:7], s1 offen offset:4 glc slc tfe
 // SICI: buffer_store_dword v1, v2, s[4:7], s1 offen offset:4 glc slc tfe ; encoding: [0x04,0x50,0x70,0xe0,0x02,0x01,0xc1,0x01]
 // VI:   buffer_store_dword v1, v2, s[4:7], s1 offen offset:4 glc slc tfe ; encoding: [0x04,0x50,0x72,0xe0,0x02,0x01,0x81,0x01]
 
+buffer_store_dword v1, v2, ttmp[4:7], ttmp1 offen offset:4 glc slc tfe
+// SICI: buffer_store_dword v1, v2, ttmp[4:7], ttmp1 offen offset:4 glc slc tfe ; encoding: [0x04,0x50,0x70,0xe0,0x02,0x01,0xdd,0x71]
+// VI:   buffer_store_dword v1, v2, ttmp[4:7], ttmp1 offen offset:4 glc slc tfe ; encoding: [0x04,0x50,0x72,0xe0,0x02,0x01,0x9d,0x71]
+
 //===----------------------------------------------------------------------===//
 // store - vgpr index
 //===----------------------------------------------------------------------===//
@@ -267,6 +298,10 @@ buffer_store_dword v1, v2, s[4:7], s1 idxen offset:4 glc slc tfe
 // SICI: buffer_store_dword v1, v2, s[4:7], s1 idxen offset:4 glc slc tfe ; encoding: [0x04,0x60,0x70,0xe0,0x02,0x01,0xc1,0x01]
 // VI:   buffer_store_dword v1, v2, s[4:7], s1 idxen offset:4 glc slc tfe ; encoding: [0x04,0x60,0x72,0xe0,0x02,0x01,0x81,0x01]
 
+buffer_store_dword v1, v2, ttmp[4:7], ttmp1 idxen offset:4 glc slc tfe
+// SICI: buffer_store_dword v1, v2, ttmp[4:7], ttmp1 idxen offset:4 glc slc tfe ; encoding: [0x04,0x60,0x70,0xe0,0x02,0x01,0xdd,0x71]
+// VI:   buffer_store_dword v1, v2, ttmp[4:7], ttmp1 idxen offset:4 glc slc tfe ; encoding: [0x04,0x60,0x72,0xe0,0x02,0x01,0x9d,0x71]
+
 //===----------------------------------------------------------------------===//
 // store - vgpr index and offset
 //===----------------------------------------------------------------------===//
@@ -299,6 +334,10 @@ buffer_store_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 glc slc tfe
 // SICI: buffer_store_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 glc slc tfe ; encoding: [0x04,0x70,0x70,0xe0,0x02,0x01,0xc1,0x01]
 // VI:   buffer_store_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 glc slc tfe ; encoding: [0x04,0x70,0x72,0xe0,0x02,0x01,0x81,0x01]
 
+buffer_store_dword v1, v[2:3], ttmp[4:7], ttmp1 idxen offen offset:4 glc slc tfe
+// SICI: buffer_store_dword v1, v[2:3], ttmp[4:7], ttmp1 idxen offen offset:4 glc slc tfe ; encoding: [0x04,0x70,0x70,0xe0,0x02,0x01,0xdd,0x71]
+// VI:   buffer_store_dword v1, v[2:3], ttmp[4:7], ttmp1 idxen offen offset:4 glc slc tfe ; encoding: [0x04,0x70,0x72,0xe0,0x02,0x01,0x9d,0x71]
+
 //===----------------------------------------------------------------------===//
 // store - addr64
 //===----------------------------------------------------------------------===//
@@ -331,6 +370,10 @@ buffer_store_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 glc slc tfe
 // SICI: buffer_store_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 glc slc tfe ; encoding: [0x04,0xc0,0x70,0xe0,0x02,0x01,0xc1,0x01]
 // NOVI: error: instruction not supported on this GPU
 
+buffer_store_dword v1, v[2:3], ttmp[4:7], ttmp1 addr64 offset:4 glc slc tfe
+// SICI: buffer_store_dword v1, v[2:3], ttmp[4:7], ttmp1 addr64 offset:4 glc slc tfe ; encoding: [0x04,0xc0,0x70,0xe0,0x02,0x01,0xdd,0x71]
+// NOVI: error: instruction not supported on this GPU
+
 //===----------------------------------------------------------------------===//
 // Instructions
 //===----------------------------------------------------------------------===//
@@ -367,10 +410,18 @@ buffer_store_format_xyzw v[1:4], s[4:7], s1
 // SICI: buffer_store_format_xyzw v[1:4], s[4:7], s1 ; encoding: [0x00,0x00,0x1c,0xe0,0x00,0x01,0x01,0x01]
 // VI:   buffer_store_format_xyzw v[1:4], s[4:7], s1 ; encoding: [0x00,0x00,0x1c,0xe0,0x00,0x01,0x01,0x01]
 
+buffer_store_format_xyzw v[1:4], ttmp[4:7], ttmp1
+// SICI: buffer_store_format_xyzw v[1:4], ttmp[4:7], ttmp1 ; encoding: [0x00,0x00,0x1c,0xe0,0x00,0x01,0x1d,0x71]
+// VI:   buffer_store_format_xyzw v[1:4], ttmp[4:7], ttmp1 ; encoding: [0x00,0x00,0x1c,0xe0,0x00,0x01,0x1d,0x71]
+
 buffer_load_ubyte v1, s[4:7], s1
 // SICI: buffer_load_ubyte v1, s[4:7], s1 ; encoding: [0x00,0x00,0x20,0xe0,0x00,0x01,0x01,0x01]
 // VI:   buffer_load_ubyte v1, s[4:7], s1 ; encoding: [0x00,0x00,0x40,0xe0,0x00,0x01,0x01,0x01]
 
+buffer_load_ubyte v1, ttmp[4:7], ttmp1
+// SICI: buffer_load_ubyte v1, ttmp[4:7], ttmp1 ; encoding: [0x00,0x00,0x20,0xe0,0x00,0x01,0x1d,0x71]
+// VI:   buffer_load_ubyte v1, ttmp[4:7], ttmp1 ; encoding: [0x00,0x00,0x40,0xe0,0x00,0x01,0x1d,0x71]
+
 buffer_load_sbyte v1, s[4:7], s1
 // SICI: buffer_load_sbyte v1, s[4:7], s1 ; encoding: [0x00,0x00,0x24,0xe0,0x00,0x01,0x01,0x01]
 // VI:   buffer_load_sbyte v1, s[4:7], s1 ; encoding: [0x00,0x00,0x44,0xe0,0x00,0x01,0x01,0x01]
@@ -387,6 +438,10 @@ buffer_load_dword v1, s[4:7], s1
 // SICI: buffer_load_dword v1, s[4:7], s1 ; encoding: [0x00,0x00,0x30,0xe0,0x00,0x01,0x01,0x01]
 // VI:   buffer_load_dword v1, s[4:7], s1 ; encoding: [0x00,0x00,0x50,0xe0,0x00,0x01,0x01,0x01]
 
+buffer_load_dword v1, ttmp[4:7], ttmp1
+// SICI: buffer_load_dword v1, ttmp[4:7], ttmp1 ; encoding: [0x00,0x00,0x30,0xe0,0x00,0x01,0x1d,0x71]
+// VI:   buffer_load_dword v1, ttmp[4:7], ttmp1 ; encoding: [0x00,0x00,0x50,0xe0,0x00,0x01,0x1d,0x71]
+
 buffer_load_dwordx2 v[1:2], s[4:7], s1
 // SICI: buffer_load_dwordx2 v[1:2], s[4:7], s1 ; encoding: [0x00,0x00,0x34,0xe0,0x00,0x01,0x01,0x01]
 // VI:   buffer_load_dwordx2 v[1:2], s[4:7], s1 ; encoding: [0x00,0x00,0x54,0xe0,0x00,0x01,0x01,0x01]
@@ -395,10 +450,18 @@ buffer_load_dwordx4 v[1:4], s[4:7], s1
 // SICI: buffer_load_dwordx4 v[1:4], s[4:7], s1 ; encoding: [0x00,0x00,0x38,0xe0,0x00,0x01,0x01,0x01]
 // VI:   buffer_load_dwordx4 v[1:4], s[4:7], s1 ; encoding: [0x00,0x00,0x5c,0xe0,0x00,0x01,0x01,0x01]
 
+buffer_load_dwordx4 v[1:4], ttmp[4:7], ttmp1
+// SICI: buffer_load_dwordx4 v[1:4], ttmp[4:7], ttmp1 ; encoding: [0x00,0x00,0x38,0xe0,0x00,0x01,0x1d,0x71]
+// VI:   buffer_load_dwordx4 v[1:4], ttmp[4:7], ttmp1 ; encoding: [0x00,0x00,0x5c,0xe0,0x00,0x01,0x1d,0x71]
+
 buffer_store_byte v1, s[4:7], s1
 // SICI: buffer_store_byte v1, s[4:7], s1 ; encoding: [0x00,0x00,0x60,0xe0,0x00,0x01,0x01,0x01]
 // VI:   buffer_store_byte v1, s[4:7], s1 ; encoding: [0x00,0x00,0x60,0xe0,0x00,0x01,0x01,0x01]
 
+buffer_store_byte v1, ttmp[4:7], ttmp1
+// SICI: buffer_store_byte v1, ttmp[4:7], ttmp1 ; encoding: [0x00,0x00,0x60,0xe0,0x00,0x01,0x1d,0x71]
+// VI:   buffer_store_byte v1, ttmp[4:7], ttmp1 ; encoding: [0x00,0x00,0x60,0xe0,0x00,0x01,0x1d,0x71]
+
 buffer_store_short v1, s[4:7], s1
 // SICI: buffer_store_short v1, s[4:7], s1 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x01,0x01,0x01]
 // VI:   buffer_store_short v1, s[4:7], s1 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x01,0x01,0x01]
@@ -415,6 +478,10 @@ buffer_store_dwordx4 v[1:4], s[4:7], s1
 // SICI: buffer_store_dwordx4 v[1:4], s[4:7], s1 ; encoding: [0x00,0x00,0x78,0xe0,0x00,0x01,0x01,0x01]
 // VI:   buffer_store_dwordx4 v[1:4], s[4:7], s1 ; encoding: [0x00,0x00,0x7c,0xe0,0x00,0x01,0x01,0x01]
 
+buffer_store_dwordx4 v[1:4], ttmp[4:7], ttmp1
+// SICI: buffer_store_dwordx4 v[1:4], ttmp[4:7], ttmp1 ; encoding: [0x00,0x00,0x78,0xe0,0x00,0x01,0x1d,0x71]
+// VI:   buffer_store_dwordx4 v[1:4], ttmp[4:7], ttmp1 ; encoding: [0x00,0x00,0x7c,0xe0,0x00,0x01,0x1d,0x71]
+
 //===----------------------------------------------------------------------===//
 // Cache invalidation
 //===----------------------------------------------------------------------===//
diff --git a/test/MC/AMDGPU/reg-syntax-extra.s b/test/MC/AMDGPU/reg-syntax-extra.s
index fb7778f0585..e6e537e84ed 100644
--- a/test/MC/AMDGPU/reg-syntax-extra.s
+++ b/test/MC/AMDGPU/reg-syntax-extra.s
@@ -53,3 +53,31 @@ v_rcp_f64 [v1,v2], [v2,v3]
 buffer_load_dwordx4 [v1,v2,v3,v4], [s4,s5,s6,s7], s1
 // SICI: buffer_load_dwordx4 v[1:4], s[4:7], s1 ; encoding: [0x00,0x00,0x38,0xe0,0x00,0x01,0x01,0x01]
 // VI:   buffer_load_dwordx4 v[1:4], s[4:7], s1 ; encoding: [0x00,0x00,0x5c,0xe0,0x00,0x01,0x01,0x01]
+
+buffer_load_dword v1, [ttmp4,ttmp5,ttmp6,ttmp7], s1
+// SICI: buffer_load_dword v1, ttmp[4:7], s1 ; encoding: [0x00,0x00,0x30,0xe0,0x00,0x01,0x1d,0x01]
+// VI:   buffer_load_dword v1, ttmp[4:7], s1 ; encoding: [0x00,0x00,0x50,0xe0,0x00,0x01,0x1d,0x01]
+
+buffer_store_format_xyzw v[1:4], [ttmp4,ttmp5,ttmp6,ttmp7], ttmp1
+// SICI: buffer_store_format_xyzw v[1:4], ttmp[4:7], ttmp1 ; encoding: [0x00,0x00,0x1c,0xe0,0x00,0x01,0x1d,0x71]
+// VI:   buffer_store_format_xyzw v[1:4], ttmp[4:7], ttmp1 ; encoding: [0x00,0x00,0x1c,0xe0,0x00,0x01,0x1d,0x71]
+
+buffer_load_ubyte v1, [ttmp4,ttmp5,ttmp6,ttmp7], ttmp1
+// SICI: buffer_load_ubyte v1, ttmp[4:7], ttmp1 ; encoding: [0x00,0x00,0x20,0xe0,0x00,0x01,0x1d,0x71]
+// VI:   buffer_load_ubyte v1, ttmp[4:7], ttmp1 ; encoding: [0x00,0x00,0x40,0xe0,0x00,0x01,0x1d,0x71]
+
+buffer_store_dwordx4 v[1:4], [ttmp4,ttmp5,ttmp6,ttmp7], ttmp1
+// SICI: buffer_store_dwordx4 v[1:4], ttmp[4:7], ttmp1 ; encoding: [0x00,0x00,0x78,0xe0,0x00,0x01,0x1d,0x71]
+// VI:   buffer_store_dwordx4 v[1:4], ttmp[4:7], ttmp1 ; encoding: [0x00,0x00,0x7c,0xe0,0x00,0x01,0x1d,0x71]
+
+s_load_dwordx4 [ttmp4,ttmp5,ttmp6,ttmp7], [ttmp2,ttmp3], ttmp4
+// SICI: s_load_dwordx4 ttmp[4:7], ttmp[2:3], ttmp4 ; encoding: [0x74,0x72,0xba,0xc0]
+// VI:	 s_load_dwordx4 ttmp[4:7], ttmp[2:3], ttmp4 ; encoding: [0x39,0x1d,0x08,0xc0,0x74,0x00,0x00,0x00]
+
+s_buffer_load_dword ttmp1, [ttmp4,ttmp5,ttmp6,ttmp7], ttmp4
+// SICI: s_buffer_load_dword ttmp1, ttmp[4:7], ttmp4 ; encoding: [0x74,0xf4,0x38,0xc2]
+// VI:	 s_buffer_load_dword ttmp1, ttmp[4:7], ttmp4 ; encoding: [0x7a,0x1c,0x20,0xc0,0x74,0x00,0x00,0x00]
+
+s_buffer_load_dwordx4 [ttmp8,ttmp9,ttmp10,ttmp11], [ttmp4,ttmp5,ttmp6,ttmp7], ttmp4
+// SICI: s_buffer_load_dwordx4 ttmp[8:11], ttmp[4:7], ttmp4 ; encoding: [0x74,0x74,0xbc,0xc2]
+// VI:   s_buffer_load_dwordx4 ttmp[8:11], ttmp[4:7], ttmp4 ; encoding: [0x3a,0x1e,0x28,0xc0,0x74,0x00,0x00,0x00]
diff --git a/test/MC/AMDGPU/smrd.s b/test/MC/AMDGPU/smrd.s
index 8ac4f916cb3..28c9471376a 100644
--- a/test/MC/AMDGPU/smrd.s
+++ b/test/MC/AMDGPU/smrd.s
@@ -52,6 +52,10 @@ s_load_dwordx4 s[4:7], s[2:3], s4
 // GCN: s_load_dwordx4 s[4:7], s[2:3], s4 ; encoding: [0x04,0x02,0x82,0xc0]
 // VI:	s_load_dwordx4 s[4:7], s[2:3], s4 ; encoding: [0x01,0x01,0x08,0xc0,0x04,0x00,0x00,0x00]
 
+s_load_dwordx4 ttmp[4:7], ttmp[2:3], ttmp4
+// GCN: s_load_dwordx4 ttmp[4:7], ttmp[2:3], ttmp4 ; encoding: [0x74,0x72,0xba,0xc0]
+// VI:	s_load_dwordx4 ttmp[4:7], ttmp[2:3], ttmp4 ; encoding: [0x39,0x1d,0x08,0xc0,0x74,0x00,0x00,0x00]
+
 s_load_dwordx4 s[100:103], s[2:3], s4
 // GCN: s_load_dwordx4 s[100:103], s[2:3], s4 ; encoding: [0x04,0x02,0xb2,0xc0]
 // NOVI: error: invalid operand for instruction
@@ -88,6 +92,10 @@ s_buffer_load_dword s1, s[4:7], s4
 // GCN: s_buffer_load_dword s1, s[4:7], s4 ; encoding: [0x04,0x84,0x00,0xc2]
 // VI:	s_buffer_load_dword s1, s[4:7], s4     ; encoding: [0x42,0x00,0x20,0xc0,0x04,0x00,0x00,0x00]
 
+s_buffer_load_dword ttmp1, ttmp[4:7], ttmp4
+// GCN: s_buffer_load_dword ttmp1, ttmp[4:7], ttmp4 ; encoding: [0x74,0xf4,0x38,0xc2]
+// VI:	s_buffer_load_dword ttmp1, ttmp[4:7], ttmp4 ; encoding: [0x7a,0x1c,0x20,0xc0,0x74,0x00,0x00,0x00]
+
 s_buffer_load_dwordx2 s[8:9], s[4:7], 1
 // GCN: s_buffer_load_dwordx2 s[8:9], s[4:7], 0x1 ; encoding: [0x01,0x05,0x44,0xc2]
 // VI:	s_buffer_load_dwordx2 s[8:9], s[4:7], 0x1 ; encoding: [0x02,0x02,0x26,0xc0,0x01,0x00,0x00,0x00]
@@ -104,6 +112,10 @@ s_buffer_load_dwordx4 s[8:11], s[4:7], s4
 // GCN: s_buffer_load_dwordx4 s[8:11], s[4:7], s4 ; encoding: [0x04,0x04,0x84,0xc2]
 // VI:	s_buffer_load_dwordx4 s[8:11], s[4:7], s4 ; encoding: [0x02,0x02,0x28,0xc0,0x04,0x00,0x00,0x00]
 
+s_buffer_load_dwordx4 ttmp[8:11], ttmp[4:7], ttmp4
+// GCN: s_buffer_load_dwordx4 ttmp[8:11], ttmp[4:7], ttmp4 ; encoding: [0x74,0x74,0xbc,0xc2]
+// VI:	s_buffer_load_dwordx4 ttmp[8:11], ttmp[4:7], ttmp4 ; encoding: [0x3a,0x1e,0x28,0xc0,0x74,0x00,0x00,0x00]
+
 s_buffer_load_dwordx4 s[100:103], s[4:7], s4
 // GCN: s_buffer_load_dwordx4 s[100:103], s[4:7], s4 ; encoding: [0x04,0x04,0xb2,0xc2]
 // NOVI: error: invalid operand for instruction