[AMDGPU][llvm-mc] Add support of TTMP quads. Rework M0 exclusion for SMRD.

Added support of TTMP quads. Reworked M0 exclusion machinery for SMRD and similar instructions to enable usage of TTMP registers in those instructions as destinations. Tests added. Differential Revision: http://reviews.llvm.org/D19342 llvm-svn: 267733
2024-11-26 04:40:38 +00:00 · 2016-04-27 16:20:23 +00:00 · 2016-04-27 16:20:23 +00:00 · ed6f89bcdc
commit ed6f89bcdc
parent d8e7f2f768
9 changed files with 146 additions and 13 deletions
--- a/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@ -611,13 +611,14 @@ static int getRegClass(RegisterKind Is, unsigned RegWidth) {
      default: return -1;
      case 1: return AMDGPU::TTMP_32RegClassID;
      case 2: return AMDGPU::TTMP_64RegClassID;
+      case 4: return AMDGPU::TTMP_128RegClassID;
    }
  } else if (Is == IS_SGPR) {
    switch (RegWidth) {
      default: return -1;
      case 1: return AMDGPU::SGPR_32RegClassID;
      case 2: return AMDGPU::SGPR_64RegClassID;
-      case 4: return AMDGPU::SReg_128RegClassID;
+      case 4: return AMDGPU::SGPR_128RegClassID;
      case 8: return AMDGPU::SReg_256RegClassID;
      case 16: return AMDGPU::SReg_512RegClassID;
    }
--- a/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@ -68,6 +68,7 @@ DECODE_OPERAND(VReg_128)

 DECODE_OPERAND(SGPR_32)
 DECODE_OPERAND(SReg_32)
+DECODE_OPERAND(SReg_32_XM0)
 DECODE_OPERAND(SReg_64)
 DECODE_OPERAND(SReg_128)
 DECODE_OPERAND(SReg_256)
@ -248,6 +249,11 @@ MCOperand AMDGPUDisassembler::decodeOperand_SReg_32(unsigned Val) const {
  return decodeSrcOp(OP32, Val);
 }

+MCOperand AMDGPUDisassembler::decodeOperand_SReg_32_XM0(unsigned Val) const {
+  // SReg_32_XM0 is SReg_32 without M0
+  return decodeOperand_SReg_32(Val);
+}
+
 MCOperand AMDGPUDisassembler::decodeOperand_SReg_64(unsigned Val) const {
  // see decodeOperand_SReg_32 comment
  return decodeSrcOp(OP64, Val);
--- a/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
+++ b/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
@ -64,6 +64,7 @@ namespace llvm {

    MCOperand decodeOperand_SGPR_32(unsigned Val) const;
    MCOperand decodeOperand_SReg_32(unsigned Val) const;
+    MCOperand decodeOperand_SReg_32_XM0(unsigned Val) const;
    MCOperand decodeOperand_SReg_64(unsigned Val) const;
    MCOperand decodeOperand_SReg_128(unsigned Val) const;
    MCOperand decodeOperand_SReg_256(unsigned Val) const;
--- a/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
+++ b/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
@ -240,9 +240,12 @@ void AMDGPUInstPrinter::printRegOperand(unsigned reg, raw_ostream &O,
  } else if (MRI.getRegClass(AMDGPU::VReg_128RegClassID).contains(reg)) {
    Type = "v";
    NumRegs = 4;
-  } else  if (MRI.getRegClass(AMDGPU::SReg_128RegClassID).contains(reg)) {
+  } else  if (MRI.getRegClass(AMDGPU::SGPR_128RegClassID).contains(reg)) {
    Type = "s";
    NumRegs = 4;
+  } else  if (MRI.getRegClass(AMDGPU::TTMP_128RegClassID).contains(reg)) {
+    Type = "ttmp";
+    NumRegs = 4;
  } else if (MRI.getRegClass(AMDGPU::VReg_96RegClassID).contains(reg)) {
    Type = "v";
    NumRegs = 3;
--- a/lib/Target/AMDGPU/SIInstructions.td
+++ b/lib/Target/AMDGPU/SIInstructions.td
@ -60,17 +60,17 @@ defm EXP : EXP_m;
 // SMRD Instructions
 //===----------------------------------------------------------------------===//

-// We are using the SGPR_32 and not the SReg_32 register class for 32-bit
-// SMRD instructions, because the SGPR_32 register class does not include M0
+// We are using the SReg_32_XM0 and not the SReg_32 register class for 32-bit
+// SMRD instructions, because the SReg_32_XM0 register class does not include M0
 // and writing to M0 from an SMRD instruction will hang the GPU.
-defm S_LOAD_DWORD : SMRD_Helper <smrd<0x00>, "s_load_dword", SReg_64, SGPR_32>;
+defm S_LOAD_DWORD : SMRD_Helper <smrd<0x00>, "s_load_dword", SReg_64, SReg_32_XM0>;
 defm S_LOAD_DWORDX2 : SMRD_Helper <smrd<0x01>, "s_load_dwordx2", SReg_64, SReg_64>;
 defm S_LOAD_DWORDX4 : SMRD_Helper <smrd<0x02>, "s_load_dwordx4", SReg_64, SReg_128>;
 defm S_LOAD_DWORDX8 : SMRD_Helper <smrd<0x03>, "s_load_dwordx8", SReg_64, SReg_256>;
 defm S_LOAD_DWORDX16 : SMRD_Helper <smrd<0x04>, "s_load_dwordx16", SReg_64, SReg_512>;

 defm S_BUFFER_LOAD_DWORD : SMRD_Helper <
-  smrd<0x08>, "s_buffer_load_dword", SReg_128, SGPR_32
+  smrd<0x08>, "s_buffer_load_dword", SReg_128, SReg_32_XM0
 >;

 defm S_BUFFER_LOAD_DWORDX2 : SMRD_Helper <
@ -2087,9 +2087,9 @@ multiclass SI_SPILL_SGPR <RegisterClass sgpr_class> {
 }

 // It's unclear whether you can use M0 as the output of v_readlane_b32
-// instructions, so use SGPR_32 register class for spills to prevent
+// instructions, so use SReg_32_XM0 register class for spills to prevent
 // this from happening.
-defm SI_SPILL_S32  : SI_SPILL_SGPR <SGPR_32>;
+defm SI_SPILL_S32  : SI_SPILL_SGPR <SReg_32_XM0>;
 defm SI_SPILL_S64  : SI_SPILL_SGPR <SReg_64>;
 defm SI_SPILL_S128 : SI_SPILL_SGPR <SReg_128>;
 defm SI_SPILL_S256 : SI_SPILL_SGPR <SReg_256>;
@ -3431,7 +3431,7 @@ def : ZExt_i64_i1_Pat<anyext>;
 def : Pat <
  (i64 (sext i32:$src)),
    (REG_SEQUENCE SReg_64, $src, sub0,
-    (i32 (COPY_TO_REGCLASS (S_ASHR_I32 $src, 31), SGPR_32)), sub1)
+    (i32 (COPY_TO_REGCLASS (S_ASHR_I32 $src, 31), SReg_32_XM0)), sub1)
 >;

 def : Pat <
--- a/lib/Target/AMDGPU/SIRegisterInfo.td
+++ b/lib/Target/AMDGPU/SIRegisterInfo.td
@ -132,7 +132,7 @@ def SGPR_64Regs : RegisterTuples<[sub0, sub1],
                              (add (decimate (shl SGPR_32, 1), 2))]>;

 // SGPR 128-bit registers
-def SGPR_128 : RegisterTuples<[sub0, sub1, sub2, sub3],
+def SGPR_128Regs : RegisterTuples<[sub0, sub1, sub2, sub3],
                              [(add (decimate SGPR_32, 4)),
                               (add (decimate (shl SGPR_32, 1), 4)),
                               (add (decimate (shl SGPR_32, 2), 4)),
@ -255,6 +255,13 @@ def SReg_32 : RegisterClass<"AMDGPU", [i32, f32], 32,
   TTMP_32, TMA_LO, TMA_HI, TBA_LO, TBA_HI)
 >;

+// Subset of SReg_32 without M0 for SMRD instructions and alike.
+// See comments in SIInstructions.td for more info.
+def SReg_32_XM0 : RegisterClass<"AMDGPU", [i32, f32], 32,
+  (add SGPR_32, VCC_LO, VCC_HI, EXEC_LO, EXEC_HI, FLAT_SCR_LO, FLAT_SCR_HI,
+   TTMP_32, TMA_LO, TMA_HI, TBA_LO, TBA_HI)
+>;
+
 def SGPR_64 : RegisterClass<"AMDGPU", [v2i32, i64, f64], 32, (add SGPR_64Regs)>;

 def TTMP_64 : RegisterClass<"AMDGPU", [v2i32, i64, f64], 32, (add TTMP_64Regs)> {
@ -265,11 +272,19 @@ def SReg_64 : RegisterClass<"AMDGPU", [v2i32, i64, f64, i1], 32,
  (add SGPR_64, VCC, EXEC, FLAT_SCR, TTMP_64, TBA, TMA)
 >;

-def SReg_128 : RegisterClass<"AMDGPU", [v4i32, v16i8, v2i64], 32, (add SGPR_128)> {
-  // Requires 2 s_mov_b64 to copy
-  let CopyCost = 2;
+// Requires 2 s_mov_b64 to copy
+let CopyCost = 2 in {
+
+def SGPR_128 : RegisterClass<"AMDGPU", [v4i32, v16i8, v2i64], 32, (add SGPR_128Regs)>;
+
+def TTMP_128 : RegisterClass<"AMDGPU", [v4i32, v16i8, v2i64], 32, (add TTMP_128Regs)> {
+  let isAllocatable = 0;
 }

+def SReg_128 : RegisterClass<"AMDGPU", [v4i32, v16i8, v2i64], 32, (add SGPR_128, TTMP_128)>;
+
+} // End CopyCost = 2
+
 def SReg_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 32, (add SGPR_256)> {
  // Requires 4 s_mov_b64 to copy
  let CopyCost = 4;
--- a/test/MC/AMDGPU/mubuf.s
+++ b/test/MC/AMDGPU/mubuf.s
@ -18,6 +18,10 @@ buffer_load_dword v1, s[4:7], s1
 // SICI: buffer_load_dword v1, s[4:7], s1 ; encoding: [0x00,0x00,0x30,0xe0,0x00,0x01,0x01,0x01]
 // VI:   buffer_load_dword v1, s[4:7], s1 ; encoding: [0x00,0x00,0x50,0xe0,0x00,0x01,0x01,0x01]

+buffer_load_dword v1, ttmp[4:7], s1
+// SICI: buffer_load_dword v1, ttmp[4:7], s1 ; encoding: [0x00,0x00,0x30,0xe0,0x00,0x01,0x1d,0x01]
+// VI:   buffer_load_dword v1, ttmp[4:7], s1 ; encoding: [0x00,0x00,0x50,0xe0,0x00,0x01,0x1d,0x01]
+
 buffer_load_dword v1, s[4:7], s1 offset:4
 // SICI: buffer_load_dword v1, s[4:7], s1 offset:4 ; encoding: [0x04,0x00,0x30,0xe0,0x00,0x01,0x01,0x01]
 // VI:   buffer_load_dword v1, s[4:7], s1 offset:4 ; encoding: [0x04,0x00,0x50,0xe0,0x00,0x01,0x01,0x01]
@ -42,6 +46,9 @@ buffer_load_dword v1, s[4:7], s1 offset:4 glc slc tfe
 // SICI: buffer_load_dword v1, s[4:7], s1 offset:4 glc slc tfe ; encoding: [0x04,0x40,0x30,0xe0,0x00,0x01,0xc1,0x01]
 // VI:   buffer_load_dword v1, s[4:7], s1 offset:4 glc slc tfe ; encoding: [0x04,0x40,0x52,0xe0,0x00,0x01,0x81,0x01]

+buffer_load_dword v1, ttmp[4:7], s1 offset:4 glc slc tfe
+// SICI: buffer_load_dword v1, ttmp[4:7], s1 offset:4 glc slc tfe ; encoding: [0x04,0x40,0x30,0xe0,0x00,0x01,0xdd,0x01]
+// VI:   buffer_load_dword v1, ttmp[4:7], s1 offset:4 glc slc tfe ; encoding: [0x04,0x40,0x52,0xe0,0x00,0x01,0x9d,0x01]

 //===----------------------------------------------------------------------===//
 // load - vgpr offset
@ -75,6 +82,10 @@ buffer_load_dword v1, v2, s[4:7], s1 offen offset:4 glc slc tfe
 // SICI: buffer_load_dword v1, v2, s[4:7], s1 offen offset:4 glc slc tfe ; encoding: [0x04,0x50,0x30,0xe0,0x02,0x01,0xc1,0x01]
 // VI:   buffer_load_dword v1, v2, s[4:7], s1 offen offset:4 glc slc tfe ; encoding: [0x04,0x50,0x52,0xe0,0x02,0x01,0x81,0x01]

+buffer_load_dword v1, v2, ttmp[4:7], s1 offen offset:4 glc slc tfe
+// SICI: buffer_load_dword v1, v2, ttmp[4:7], s1 offen offset:4 glc slc tfe ; encoding: [0x04,0x50,0x30,0xe0,0x02,0x01,0xdd,0x01]
+// VI:   buffer_load_dword v1, v2, ttmp[4:7], s1 offen offset:4 glc slc tfe ; encoding: [0x04,0x50,0x52,0xe0,0x02,0x01,0x9d,0x01]
+
 //===----------------------------------------------------------------------===//
 // load - vgpr index
 //===----------------------------------------------------------------------===//
@ -107,6 +118,10 @@ buffer_load_dword v1, v2, s[4:7], s1 idxen offset:4 glc slc tfe
 // SICI: buffer_load_dword v1, v2, s[4:7], s1 idxen offset:4 glc slc tfe ; encoding: [0x04,0x60,0x30,0xe0,0x02,0x01,0xc1,0x01]
 // VI:   buffer_load_dword v1, v2, s[4:7], s1 idxen offset:4 glc slc tfe ; encoding: [0x04,0x60,0x52,0xe0,0x02,0x01,0x81,0x01]

+buffer_load_dword v1, v2, ttmp[4:7], s1 idxen offset:4 glc slc tfe
+// SICI: buffer_load_dword v1, v2, ttmp[4:7], s1 idxen offset:4 glc slc tfe ; encoding: [0x04,0x60,0x30,0xe0,0x02,0x01,0xdd,0x01]
+// VI:   buffer_load_dword v1, v2, ttmp[4:7], s1 idxen offset:4 glc slc tfe ; encoding: [0x04,0x60,0x52,0xe0,0x02,0x01,0x9d,0x01]
+
 //===----------------------------------------------------------------------===//
 // load - vgpr index and offset
 //===----------------------------------------------------------------------===//
@ -139,6 +154,10 @@ buffer_load_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 glc slc tfe
 // SICI: buffer_load_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 glc slc tfe ; encoding: [0x04,0x70,0x30,0xe0,0x02,0x01,0xc1,0x01]
 // VI:   buffer_load_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 glc slc tfe ; encoding: [0x04,0x70,0x52,0xe0,0x02,0x01,0x81,0x01]

+buffer_load_dword v1, v[2:3], ttmp[4:7], ttmp1 idxen offen offset:4 glc slc tfe
+// SICI: buffer_load_dword v1, v[2:3], ttmp[4:7], ttmp1 idxen offen offset:4 glc slc tfe ; encoding: [0x04,0x70,0x30,0xe0,0x02,0x01,0xdd,0x71]
+// VI:   buffer_load_dword v1, v[2:3], ttmp[4:7], ttmp1 idxen offen offset:4 glc slc tfe ; encoding: [0x04,0x70,0x52,0xe0,0x02,0x01,0x9d,0x71]
+
 //===----------------------------------------------------------------------===//
 // load - addr64
 //===----------------------------------------------------------------------===//
@ -171,6 +190,10 @@ buffer_load_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 glc slc tfe
 // SICI: buffer_load_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 glc slc tfe ; encoding: [0x04,0xc0,0x30,0xe0,0x02,0x01,0xc1,0x01]
 // NOVI: error: instruction not supported on this GPU

+buffer_load_dword v1, v[2:3], ttmp[4:7], ttmp1 addr64 offset:4 glc slc tfe
+// SICI: buffer_load_dword v1, v[2:3], ttmp[4:7], ttmp1 addr64 offset:4 glc slc tfe ; encoding: [0x04,0xc0,0x30,0xe0,0x02,0x01,0xdd,0x71]
+// NOVI: error: instruction not supported on this GPU
+
 //===----------------------------------------------------------------------===//
 // store - immediate offset only
 //===----------------------------------------------------------------------===//
@ -203,6 +226,10 @@ buffer_store_dword v1, s[4:7], s1 offset:4 glc slc tfe
 // SICI: buffer_store_dword v1, s[4:7], s1 offset:4 glc slc tfe ; encoding: [0x04,0x40,0x70,0xe0,0x00,0x01,0xc1,0x01]
 // VI:   buffer_store_dword v1, s[4:7], s1 offset:4 glc slc tfe ; encoding: [0x04,0x40,0x72,0xe0,0x00,0x01,0x81,0x01]

+buffer_store_dword v1, ttmp[4:7], ttmp1 offset:4 glc slc tfe
+// SICI: buffer_store_dword v1, ttmp[4:7], ttmp1 offset:4 glc slc tfe ; encoding: [0x04,0x40,0x70,0xe0,0x00,0x01,0xdd,0x71]
+// VI:   buffer_store_dword v1, ttmp[4:7], ttmp1 offset:4 glc slc tfe ; encoding: [0x04,0x40,0x72,0xe0,0x00,0x01,0x9d,0x71]
+
 //===----------------------------------------------------------------------===//
 // store - vgpr offset
 //===----------------------------------------------------------------------===//
@ -235,6 +262,10 @@ buffer_store_dword v1, v2, s[4:7], s1 offen offset:4 glc slc tfe
 // SICI: buffer_store_dword v1, v2, s[4:7], s1 offen offset:4 glc slc tfe ; encoding: [0x04,0x50,0x70,0xe0,0x02,0x01,0xc1,0x01]
 // VI:   buffer_store_dword v1, v2, s[4:7], s1 offen offset:4 glc slc tfe ; encoding: [0x04,0x50,0x72,0xe0,0x02,0x01,0x81,0x01]

+buffer_store_dword v1, v2, ttmp[4:7], ttmp1 offen offset:4 glc slc tfe
+// SICI: buffer_store_dword v1, v2, ttmp[4:7], ttmp1 offen offset:4 glc slc tfe ; encoding: [0x04,0x50,0x70,0xe0,0x02,0x01,0xdd,0x71]
+// VI:   buffer_store_dword v1, v2, ttmp[4:7], ttmp1 offen offset:4 glc slc tfe ; encoding: [0x04,0x50,0x72,0xe0,0x02,0x01,0x9d,0x71]
+
 //===----------------------------------------------------------------------===//
 // store - vgpr index
 //===----------------------------------------------------------------------===//
@ -267,6 +298,10 @@ buffer_store_dword v1, v2, s[4:7], s1 idxen offset:4 glc slc tfe
 // SICI: buffer_store_dword v1, v2, s[4:7], s1 idxen offset:4 glc slc tfe ; encoding: [0x04,0x60,0x70,0xe0,0x02,0x01,0xc1,0x01]
 // VI:   buffer_store_dword v1, v2, s[4:7], s1 idxen offset:4 glc slc tfe ; encoding: [0x04,0x60,0x72,0xe0,0x02,0x01,0x81,0x01]

+buffer_store_dword v1, v2, ttmp[4:7], ttmp1 idxen offset:4 glc slc tfe
+// SICI: buffer_store_dword v1, v2, ttmp[4:7], ttmp1 idxen offset:4 glc slc tfe ; encoding: [0x04,0x60,0x70,0xe0,0x02,0x01,0xdd,0x71]
+// VI:   buffer_store_dword v1, v2, ttmp[4:7], ttmp1 idxen offset:4 glc slc tfe ; encoding: [0x04,0x60,0x72,0xe0,0x02,0x01,0x9d,0x71]
+
 //===----------------------------------------------------------------------===//
 // store - vgpr index and offset
 //===----------------------------------------------------------------------===//
@ -299,6 +334,10 @@ buffer_store_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 glc slc tfe
 // SICI: buffer_store_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 glc slc tfe ; encoding: [0x04,0x70,0x70,0xe0,0x02,0x01,0xc1,0x01]
 // VI:   buffer_store_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 glc slc tfe ; encoding: [0x04,0x70,0x72,0xe0,0x02,0x01,0x81,0x01]

+buffer_store_dword v1, v[2:3], ttmp[4:7], ttmp1 idxen offen offset:4 glc slc tfe
+// SICI: buffer_store_dword v1, v[2:3], ttmp[4:7], ttmp1 idxen offen offset:4 glc slc tfe ; encoding: [0x04,0x70,0x70,0xe0,0x02,0x01,0xdd,0x71]
+// VI:   buffer_store_dword v1, v[2:3], ttmp[4:7], ttmp1 idxen offen offset:4 glc slc tfe ; encoding: [0x04,0x70,0x72,0xe0,0x02,0x01,0x9d,0x71]
+
 //===----------------------------------------------------------------------===//
 // store - addr64
 //===----------------------------------------------------------------------===//
@ -331,6 +370,10 @@ buffer_store_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 glc slc tfe
 // SICI: buffer_store_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 glc slc tfe ; encoding: [0x04,0xc0,0x70,0xe0,0x02,0x01,0xc1,0x01]
 // NOVI: error: instruction not supported on this GPU

+buffer_store_dword v1, v[2:3], ttmp[4:7], ttmp1 addr64 offset:4 glc slc tfe
+// SICI: buffer_store_dword v1, v[2:3], ttmp[4:7], ttmp1 addr64 offset:4 glc slc tfe ; encoding: [0x04,0xc0,0x70,0xe0,0x02,0x01,0xdd,0x71]
+// NOVI: error: instruction not supported on this GPU
+
 //===----------------------------------------------------------------------===//
 // Instructions
 //===----------------------------------------------------------------------===//
@ -367,10 +410,18 @@ buffer_store_format_xyzw v[1:4], s[4:7], s1
 // SICI: buffer_store_format_xyzw v[1:4], s[4:7], s1 ; encoding: [0x00,0x00,0x1c,0xe0,0x00,0x01,0x01,0x01]
 // VI:   buffer_store_format_xyzw v[1:4], s[4:7], s1 ; encoding: [0x00,0x00,0x1c,0xe0,0x00,0x01,0x01,0x01]

+buffer_store_format_xyzw v[1:4], ttmp[4:7], ttmp1
+// SICI: buffer_store_format_xyzw v[1:4], ttmp[4:7], ttmp1 ; encoding: [0x00,0x00,0x1c,0xe0,0x00,0x01,0x1d,0x71]
+// VI:   buffer_store_format_xyzw v[1:4], ttmp[4:7], ttmp1 ; encoding: [0x00,0x00,0x1c,0xe0,0x00,0x01,0x1d,0x71]
+
 buffer_load_ubyte v1, s[4:7], s1
 // SICI: buffer_load_ubyte v1, s[4:7], s1 ; encoding: [0x00,0x00,0x20,0xe0,0x00,0x01,0x01,0x01]
 // VI:   buffer_load_ubyte v1, s[4:7], s1 ; encoding: [0x00,0x00,0x40,0xe0,0x00,0x01,0x01,0x01]

+buffer_load_ubyte v1, ttmp[4:7], ttmp1
+// SICI: buffer_load_ubyte v1, ttmp[4:7], ttmp1 ; encoding: [0x00,0x00,0x20,0xe0,0x00,0x01,0x1d,0x71]
+// VI:   buffer_load_ubyte v1, ttmp[4:7], ttmp1 ; encoding: [0x00,0x00,0x40,0xe0,0x00,0x01,0x1d,0x71]
+
 buffer_load_sbyte v1, s[4:7], s1
 // SICI: buffer_load_sbyte v1, s[4:7], s1 ; encoding: [0x00,0x00,0x24,0xe0,0x00,0x01,0x01,0x01]
 // VI:   buffer_load_sbyte v1, s[4:7], s1 ; encoding: [0x00,0x00,0x44,0xe0,0x00,0x01,0x01,0x01]
@ -387,6 +438,10 @@ buffer_load_dword v1, s[4:7], s1
 // SICI: buffer_load_dword v1, s[4:7], s1 ; encoding: [0x00,0x00,0x30,0xe0,0x00,0x01,0x01,0x01]
 // VI:   buffer_load_dword v1, s[4:7], s1 ; encoding: [0x00,0x00,0x50,0xe0,0x00,0x01,0x01,0x01]

+buffer_load_dword v1, ttmp[4:7], ttmp1
+// SICI: buffer_load_dword v1, ttmp[4:7], ttmp1 ; encoding: [0x00,0x00,0x30,0xe0,0x00,0x01,0x1d,0x71]
+// VI:   buffer_load_dword v1, ttmp[4:7], ttmp1 ; encoding: [0x00,0x00,0x50,0xe0,0x00,0x01,0x1d,0x71]
+
 buffer_load_dwordx2 v[1:2], s[4:7], s1
 // SICI: buffer_load_dwordx2 v[1:2], s[4:7], s1 ; encoding: [0x00,0x00,0x34,0xe0,0x00,0x01,0x01,0x01]
 // VI:   buffer_load_dwordx2 v[1:2], s[4:7], s1 ; encoding: [0x00,0x00,0x54,0xe0,0x00,0x01,0x01,0x01]
@ -395,10 +450,18 @@ buffer_load_dwordx4 v[1:4], s[4:7], s1
 // SICI: buffer_load_dwordx4 v[1:4], s[4:7], s1 ; encoding: [0x00,0x00,0x38,0xe0,0x00,0x01,0x01,0x01]
 // VI:   buffer_load_dwordx4 v[1:4], s[4:7], s1 ; encoding: [0x00,0x00,0x5c,0xe0,0x00,0x01,0x01,0x01]

+buffer_load_dwordx4 v[1:4], ttmp[4:7], ttmp1
+// SICI: buffer_load_dwordx4 v[1:4], ttmp[4:7], ttmp1 ; encoding: [0x00,0x00,0x38,0xe0,0x00,0x01,0x1d,0x71]
+// VI:   buffer_load_dwordx4 v[1:4], ttmp[4:7], ttmp1 ; encoding: [0x00,0x00,0x5c,0xe0,0x00,0x01,0x1d,0x71]
+
 buffer_store_byte v1, s[4:7], s1
 // SICI: buffer_store_byte v1, s[4:7], s1 ; encoding: [0x00,0x00,0x60,0xe0,0x00,0x01,0x01,0x01]
 // VI:   buffer_store_byte v1, s[4:7], s1 ; encoding: [0x00,0x00,0x60,0xe0,0x00,0x01,0x01,0x01]

+buffer_store_byte v1, ttmp[4:7], ttmp1
+// SICI: buffer_store_byte v1, ttmp[4:7], ttmp1 ; encoding: [0x00,0x00,0x60,0xe0,0x00,0x01,0x1d,0x71]
+// VI:   buffer_store_byte v1, ttmp[4:7], ttmp1 ; encoding: [0x00,0x00,0x60,0xe0,0x00,0x01,0x1d,0x71]
+
 buffer_store_short v1, s[4:7], s1
 // SICI: buffer_store_short v1, s[4:7], s1 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x01,0x01,0x01]
 // VI:   buffer_store_short v1, s[4:7], s1 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x01,0x01,0x01]
@ -415,6 +478,10 @@ buffer_store_dwordx4 v[1:4], s[4:7], s1
 // SICI: buffer_store_dwordx4 v[1:4], s[4:7], s1 ; encoding: [0x00,0x00,0x78,0xe0,0x00,0x01,0x01,0x01]
 // VI:   buffer_store_dwordx4 v[1:4], s[4:7], s1 ; encoding: [0x00,0x00,0x7c,0xe0,0x00,0x01,0x01,0x01]

+buffer_store_dwordx4 v[1:4], ttmp[4:7], ttmp1
+// SICI: buffer_store_dwordx4 v[1:4], ttmp[4:7], ttmp1 ; encoding: [0x00,0x00,0x78,0xe0,0x00,0x01,0x1d,0x71]
+// VI:   buffer_store_dwordx4 v[1:4], ttmp[4:7], ttmp1 ; encoding: [0x00,0x00,0x7c,0xe0,0x00,0x01,0x1d,0x71]
+
 //===----------------------------------------------------------------------===//
 // Cache invalidation
 //===----------------------------------------------------------------------===//
--- a/test/MC/AMDGPU/reg-syntax-extra.s
+++ b/test/MC/AMDGPU/reg-syntax-extra.s
@ -53,3 +53,31 @@ v_rcp_f64 [v1,v2], [v2,v3]
 buffer_load_dwordx4 [v1,v2,v3,v4], [s4,s5,s6,s7], s1
 // SICI: buffer_load_dwordx4 v[1:4], s[4:7], s1 ; encoding: [0x00,0x00,0x38,0xe0,0x00,0x01,0x01,0x01]
 // VI:   buffer_load_dwordx4 v[1:4], s[4:7], s1 ; encoding: [0x00,0x00,0x5c,0xe0,0x00,0x01,0x01,0x01]
+
+buffer_load_dword v1, [ttmp4,ttmp5,ttmp6,ttmp7], s1
+// SICI: buffer_load_dword v1, ttmp[4:7], s1 ; encoding: [0x00,0x00,0x30,0xe0,0x00,0x01,0x1d,0x01]
+// VI:   buffer_load_dword v1, ttmp[4:7], s1 ; encoding: [0x00,0x00,0x50,0xe0,0x00,0x01,0x1d,0x01]
+
+buffer_store_format_xyzw v[1:4], [ttmp4,ttmp5,ttmp6,ttmp7], ttmp1
+// SICI: buffer_store_format_xyzw v[1:4], ttmp[4:7], ttmp1 ; encoding: [0x00,0x00,0x1c,0xe0,0x00,0x01,0x1d,0x71]
+// VI:   buffer_store_format_xyzw v[1:4], ttmp[4:7], ttmp1 ; encoding: [0x00,0x00,0x1c,0xe0,0x00,0x01,0x1d,0x71]
+
+buffer_load_ubyte v1, [ttmp4,ttmp5,ttmp6,ttmp7], ttmp1
+// SICI: buffer_load_ubyte v1, ttmp[4:7], ttmp1 ; encoding: [0x00,0x00,0x20,0xe0,0x00,0x01,0x1d,0x71]
+// VI:   buffer_load_ubyte v1, ttmp[4:7], ttmp1 ; encoding: [0x00,0x00,0x40,0xe0,0x00,0x01,0x1d,0x71]
+
+buffer_store_dwordx4 v[1:4], [ttmp4,ttmp5,ttmp6,ttmp7], ttmp1
+// SICI: buffer_store_dwordx4 v[1:4], ttmp[4:7], ttmp1 ; encoding: [0x00,0x00,0x78,0xe0,0x00,0x01,0x1d,0x71]
+// VI:   buffer_store_dwordx4 v[1:4], ttmp[4:7], ttmp1 ; encoding: [0x00,0x00,0x7c,0xe0,0x00,0x01,0x1d,0x71]
+
+s_load_dwordx4 [ttmp4,ttmp5,ttmp6,ttmp7], [ttmp2,ttmp3], ttmp4
+// SICI: s_load_dwordx4 ttmp[4:7], ttmp[2:3], ttmp4 ; encoding: [0x74,0x72,0xba,0xc0]
+// VI:	 s_load_dwordx4 ttmp[4:7], ttmp[2:3], ttmp4 ; encoding: [0x39,0x1d,0x08,0xc0,0x74,0x00,0x00,0x00]
+
+s_buffer_load_dword ttmp1, [ttmp4,ttmp5,ttmp6,ttmp7], ttmp4
+// SICI: s_buffer_load_dword ttmp1, ttmp[4:7], ttmp4 ; encoding: [0x74,0xf4,0x38,0xc2]
+// VI:	 s_buffer_load_dword ttmp1, ttmp[4:7], ttmp4 ; encoding: [0x7a,0x1c,0x20,0xc0,0x74,0x00,0x00,0x00]
+
+s_buffer_load_dwordx4 [ttmp8,ttmp9,ttmp10,ttmp11], [ttmp4,ttmp5,ttmp6,ttmp7], ttmp4
+// SICI: s_buffer_load_dwordx4 ttmp[8:11], ttmp[4:7], ttmp4 ; encoding: [0x74,0x74,0xbc,0xc2]
+// VI:   s_buffer_load_dwordx4 ttmp[8:11], ttmp[4:7], ttmp4 ; encoding: [0x3a,0x1e,0x28,0xc0,0x74,0x00,0x00,0x00]
--- a/test/MC/AMDGPU/smrd.s
+++ b/test/MC/AMDGPU/smrd.s
@ -52,6 +52,10 @@ s_load_dwordx4 s[4:7], s[2:3], s4
 // GCN: s_load_dwordx4 s[4:7], s[2:3], s4 ; encoding: [0x04,0x02,0x82,0xc0]
 // VI:	s_load_dwordx4 s[4:7], s[2:3], s4 ; encoding: [0x01,0x01,0x08,0xc0,0x04,0x00,0x00,0x00]

+s_load_dwordx4 ttmp[4:7], ttmp[2:3], ttmp4
+// GCN: s_load_dwordx4 ttmp[4:7], ttmp[2:3], ttmp4 ; encoding: [0x74,0x72,0xba,0xc0]
+// VI:	s_load_dwordx4 ttmp[4:7], ttmp[2:3], ttmp4 ; encoding: [0x39,0x1d,0x08,0xc0,0x74,0x00,0x00,0x00]
+
 s_load_dwordx4 s[100:103], s[2:3], s4
 // GCN: s_load_dwordx4 s[100:103], s[2:3], s4 ; encoding: [0x04,0x02,0xb2,0xc0]
 // NOVI: error: invalid operand for instruction
@ -88,6 +92,10 @@ s_buffer_load_dword s1, s[4:7], s4
 // GCN: s_buffer_load_dword s1, s[4:7], s4 ; encoding: [0x04,0x84,0x00,0xc2]
 // VI:	s_buffer_load_dword s1, s[4:7], s4     ; encoding: [0x42,0x00,0x20,0xc0,0x04,0x00,0x00,0x00]

+s_buffer_load_dword ttmp1, ttmp[4:7], ttmp4
+// GCN: s_buffer_load_dword ttmp1, ttmp[4:7], ttmp4 ; encoding: [0x74,0xf4,0x38,0xc2]
+// VI:	s_buffer_load_dword ttmp1, ttmp[4:7], ttmp4 ; encoding: [0x7a,0x1c,0x20,0xc0,0x74,0x00,0x00,0x00]
+
 s_buffer_load_dwordx2 s[8:9], s[4:7], 1
 // GCN: s_buffer_load_dwordx2 s[8:9], s[4:7], 0x1 ; encoding: [0x01,0x05,0x44,0xc2]
 // VI:	s_buffer_load_dwordx2 s[8:9], s[4:7], 0x1 ; encoding: [0x02,0x02,0x26,0xc0,0x01,0x00,0x00,0x00]
@ -104,6 +112,10 @@ s_buffer_load_dwordx4 s[8:11], s[4:7], s4
 // GCN: s_buffer_load_dwordx4 s[8:11], s[4:7], s4 ; encoding: [0x04,0x04,0x84,0xc2]
 // VI:	s_buffer_load_dwordx4 s[8:11], s[4:7], s4 ; encoding: [0x02,0x02,0x28,0xc0,0x04,0x00,0x00,0x00]

+s_buffer_load_dwordx4 ttmp[8:11], ttmp[4:7], ttmp4
+// GCN: s_buffer_load_dwordx4 ttmp[8:11], ttmp[4:7], ttmp4 ; encoding: [0x74,0x74,0xbc,0xc2]
+// VI:	s_buffer_load_dwordx4 ttmp[8:11], ttmp[4:7], ttmp4 ; encoding: [0x3a,0x1e,0x28,0xc0,0x74,0x00,0x00,0x00]
+
 s_buffer_load_dwordx4 s[100:103], s[4:7], s4
 // GCN: s_buffer_load_dwordx4 s[100:103], s[4:7], s4 ; encoding: [0x04,0x04,0xb2,0xc2]
 // NOVI: error: invalid operand for instruction