[AMDGPU] Add two TSFlags: IsAtomicNoRtn and IsAtomicRtn

We are using AtomicNoRet map in multiple places to determine
if an instruction atomic, rtn or nortn atomic. This method
does not work always since we have some instructions which
only has rtn or nortn version.

One such instruction is ds_wrxchg_rtn_b32 which does not have
nortn version. This has caused changes in memory legalizer
tests.

Differential Revision: https://reviews.llvm.org/D96639
This commit is contained in:
Stanislav Mekhanoshin 2021-02-12 14:19:10 -08:00
parent c465429f28
commit 5cf9292ce3
14 changed files with 95 additions and 74 deletions

View File

@ -696,6 +696,7 @@ class MUBUF_AtomicNoRet_Pseudo<string opName, int addrKind,
let PseudoInstr = opName # "_" # getAddrName<addrKindCopy>.ret;
let glc_value = 0;
let dlc_value = 0;
let IsAtomicNoRet = 1;
let AsmMatchConverter = "cvtMubufAtomic";
}
@ -714,6 +715,7 @@ class MUBUF_AtomicRet_Pseudo<string opName, int addrKind,
let PseudoInstr = opName # "_rtn_" # getAddrName<addrKindCopy>.ret;
let glc_value = 1;
let dlc_value = 0;
let IsAtomicRet = 1;
let Constraints = "$vdata = $vdata_in";
let DisableEncoding = "$vdata_in";
let AsmMatchConverter = "cvtMubufAtomicReturn";

View File

@ -102,6 +102,7 @@ class DS_1A1D_NORET<string opName, RegisterClass rc = VGPR_32>
let has_data1 = 0;
let has_vdst = 0;
let IsAtomicNoRet = 1;
}
multiclass DS_1A1D_NORET_mc<string opName, RegisterClass rc = VGPR_32> {
@ -121,6 +122,7 @@ class DS_1A2D_NORET<string opName, RegisterClass rc = VGPR_32>
" $addr, $data0, $data1$offset$gds"> {
let has_vdst = 0;
let IsAtomicNoRet = 1;
}
multiclass DS_1A2D_NORET_mc<string opName, RegisterClass rc = VGPR_32> {
@ -161,6 +163,7 @@ class DS_1A1D_RET <string opName, RegisterClass rc = VGPR_32>
let hasPostISelHook = 1;
let has_data1 = 0;
let IsAtomicRet = 1;
}
multiclass DS_1A1D_RET_mc <string opName, RegisterClass rc = VGPR_32,
@ -184,6 +187,7 @@ class DS_1A2D_RET<string opName,
" $vdst, $addr, $data0, $data1$offset$gds"> {
let hasPostISelHook = 1;
let IsAtomicRet = 1;
}
multiclass DS_1A2D_RET_mc<string opName,

View File

@ -345,6 +345,7 @@ class FLAT_AtomicNoRet_Pseudo<string opName, dag outs, dag ins,
let dlcValue = 0;
let has_vdst = 0;
let maybeAtomic = 1;
let IsAtomicNoRet = 1;
}
class FLAT_AtomicRet_Pseudo<string opName, dag outs, dag ins,
@ -354,6 +355,8 @@ class FLAT_AtomicRet_Pseudo<string opName, dag outs, dag ins,
let has_vdst = 1;
let glcValue = 1;
let dlcValue = 0;
let IsAtomicNoRet = 0;
let IsAtomicRet = 1;
let PseudoInstr = NAME # "_RTN";
}

View File

@ -39,6 +39,7 @@ class MIMGBaseOpcode : PredicateControl {
bit Coordinates = 1;
bit LodOrClampOrMip = 0;
bit HasD16 = 0;
bit IsAtomicRet = 0;
}
def MIMGBaseOpcode : GenericEnum {
@ -556,20 +557,22 @@ multiclass MIMG_Atomic_Addr_Helper_m <mimgopc op, string asm,
}
multiclass MIMG_Atomic <mimgopc op, string asm, bit isCmpSwap = 0, bit isFP = 0> { // 64-bit atomics
def "" : MIMGBaseOpcode {
let Atomic = 1;
let AtomicX2 = isCmpSwap;
}
let IsAtomicRet = 1 in {
def "" : MIMGBaseOpcode {
let Atomic = 1;
let AtomicX2 = isCmpSwap;
}
let BaseOpcode = !cast<MIMGBaseOpcode>(NAME) in {
// _V* variants have different dst size, but the size is encoded implicitly,
// using dmask and tfe. Only 32-bit variant is registered with disassembler.
// Other variants are reconstructed by disassembler using dmask and tfe.
let VDataDwords = !if(isCmpSwap, 2, 1) in
defm _V1 : MIMG_Atomic_Addr_Helper_m <op, asm, !if(isCmpSwap, VReg_64, VGPR_32), 1, isFP>;
let VDataDwords = !if(isCmpSwap, 4, 2) in
defm _V2 : MIMG_Atomic_Addr_Helper_m <op, asm, !if(isCmpSwap, VReg_128, VReg_64), 0, isFP>;
}
let BaseOpcode = !cast<MIMGBaseOpcode>(NAME) in {
// _V* variants have different dst size, but the size is encoded implicitly,
// using dmask and tfe. Only 32-bit variant is registered with disassembler.
// Other variants are reconstructed by disassembler using dmask and tfe.
let VDataDwords = !if(isCmpSwap, 2, 1) in
defm _V1 : MIMG_Atomic_Addr_Helper_m <op, asm, !if(isCmpSwap, VReg_64, VGPR_32), 1, isFP>;
let VDataDwords = !if(isCmpSwap, 4, 2) in
defm _V2 : MIMG_Atomic_Addr_Helper_m <op, asm, !if(isCmpSwap, VReg_128, VReg_64), 0, isFP>;
}
} // End IsAtomicRet = 1
}
class MIMG_Sampler_Helper <mimgopc op, string asm, RegisterClass dst_rc,

View File

@ -106,7 +106,13 @@ enum : uint64_t {
IsDOT = UINT64_C(1) << 55,
// FLAT instruction accesses FLAT_SCRATCH segment.
IsFlatScratch = UINT64_C(1) << 56
IsFlatScratch = UINT64_C(1) << 56,
// Atomic without return.
IsAtomicNoRet = UINT64_C(1) << 57,
// Atomic with return.
IsAtomicRet = UINT64_C(1) << 58
};
// v_cmp_class_* etc. use a 10-bit mask for what operation is checked.

View File

@ -112,8 +112,7 @@ static bool isValidClauseInst(const MachineInstr &MI, bool IsVMEMClause) {
return false;
if (!MI.mayLoad() || MI.mayStore())
return false;
if (AMDGPU::getAtomicNoRetOp(MI.getOpcode()) != -1 ||
AMDGPU::getAtomicRetOp(MI.getOpcode()) != -1)
if (SIInstrInfo::isAtomic(MI))
return false;
if (IsVMEMClause && !isVMEMClauseInst(MI))
return false;

View File

@ -538,7 +538,7 @@ void WaitcntBrackets::updateByEvent(const SIInstrInfo *TII,
AMDGPU::OpName::data1),
CurrScore);
}
} else if (AMDGPU::getAtomicNoRetOp(Inst.getOpcode()) != -1 &&
} else if (SIInstrInfo::isAtomicRet(Inst) &&
Inst.getOpcode() != AMDGPU::DS_GWS_INIT &&
Inst.getOpcode() != AMDGPU::DS_GWS_SEMA_V &&
Inst.getOpcode() != AMDGPU::DS_GWS_SEMA_BR &&
@ -560,7 +560,7 @@ void WaitcntBrackets::updateByEvent(const SIInstrInfo *TII,
&Inst, TII, TRI, MRI,
AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data),
CurrScore);
} else if (AMDGPU::getAtomicNoRetOp(Inst.getOpcode()) != -1) {
} else if (SIInstrInfo::isAtomicRet(Inst)) {
setExpScore(
&Inst, TII, TRI, MRI,
AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data),
@ -569,7 +569,7 @@ void WaitcntBrackets::updateByEvent(const SIInstrInfo *TII,
} else if (TII->isMIMG(Inst)) {
if (Inst.mayStore()) {
setExpScore(&Inst, TII, TRI, MRI, 0, CurrScore);
} else if (AMDGPU::getAtomicNoRetOp(Inst.getOpcode()) != -1) {
} else if (SIInstrInfo::isAtomicRet(Inst)) {
setExpScore(
&Inst, TII, TRI, MRI,
AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data),
@ -582,7 +582,7 @@ void WaitcntBrackets::updateByEvent(const SIInstrInfo *TII,
} else if (TII->isMUBUF(Inst)) {
if (Inst.mayStore()) {
setExpScore(&Inst, TII, TRI, MRI, 0, CurrScore);
} else if (AMDGPU::getAtomicNoRetOp(Inst.getOpcode()) != -1) {
} else if (SIInstrInfo::isAtomicRet(Inst)) {
setExpScore(
&Inst, TII, TRI, MRI,
AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data),
@ -1246,8 +1246,7 @@ void SIInsertWaitcnts::updateEventWaitcntAfter(MachineInstr &Inst,
++FlatASCount;
if (!ST->hasVscnt())
ScoreBrackets->updateByEvent(TII, TRI, MRI, VMEM_ACCESS, Inst);
else if (Inst.mayLoad() &&
AMDGPU::getAtomicRetOp(Inst.getOpcode()) == -1)
else if (Inst.mayLoad() && !SIInstrInfo::isAtomicNoRet(Inst))
ScoreBrackets->updateByEvent(TII, TRI, MRI, VMEM_READ_ACCESS, Inst);
else
ScoreBrackets->updateByEvent(TII, TRI, MRI, VMEM_WRITE_ACCESS, Inst);
@ -1275,8 +1274,7 @@ void SIInsertWaitcnts::updateEventWaitcntAfter(MachineInstr &Inst,
Inst.getOpcode() != AMDGPU::BUFFER_GL1_INV) {
if (!ST->hasVscnt())
ScoreBrackets->updateByEvent(TII, TRI, MRI, VMEM_ACCESS, Inst);
else if ((Inst.mayLoad() &&
AMDGPU::getAtomicRetOp(Inst.getOpcode()) == -1) ||
else if ((Inst.mayLoad() && !SIInstrInfo::isAtomicNoRet(Inst)) ||
/* IMAGE_GET_RESINFO / IMAGE_GET_LOD */
(TII->isMIMG(Inst) && !Inst.mayLoad() && !Inst.mayStore()))
ScoreBrackets->updateByEvent(TII, TRI, MRI, VMEM_READ_ACCESS, Inst);
@ -1284,7 +1282,7 @@ void SIInsertWaitcnts::updateEventWaitcntAfter(MachineInstr &Inst,
ScoreBrackets->updateByEvent(TII, TRI, MRI, VMEM_WRITE_ACCESS, Inst);
if (ST->vmemWriteNeedsExpWaitcnt() &&
(Inst.mayStore() || AMDGPU::getAtomicNoRetOp(Inst.getOpcode()) != -1)) {
(Inst.mayStore() || SIInstrInfo::isAtomicRet(Inst))) {
ScoreBrackets->updateByEvent(TII, TRI, MRI, VMW_GPR_LOCK, Inst);
}
} else if (TII->isSMRD(Inst)) {

View File

@ -135,6 +135,12 @@ class InstSI <dag outs, dag ins, string asm = "",
// Must be 0 for non-FLAT instructions.
field bit IsFlatScratch = 0;
// Atomic without a return.
field bit IsAtomicNoRet = 0;
// Atomic with return.
field bit IsAtomicRet = 0;
// These need to be kept in sync with the enum in SIInstrFlags.
let TSFlags{0} = SALU;
let TSFlags{1} = VALU;
@ -205,6 +211,10 @@ class InstSI <dag outs, dag ins, string asm = "",
let TSFlags{56} = IsFlatScratch;
let TSFlags{57} = IsAtomicNoRet;
let TSFlags{58} = IsAtomicRet;
let SchedRW = [Write32Bit];
let AsmVariantName = AMDGPUAsmVariants.Default;

View File

@ -538,6 +538,32 @@ public:
return get(Opcode).TSFlags & SIInstrFlags::EXP;
}
static bool isAtomicNoRet(const MachineInstr &MI) {
return MI.getDesc().TSFlags & SIInstrFlags::IsAtomicNoRet;
}
bool isAtomicNoRet(uint16_t Opcode) const {
return get(Opcode).TSFlags & SIInstrFlags::IsAtomicNoRet;
}
static bool isAtomicRet(const MachineInstr &MI) {
return MI.getDesc().TSFlags & SIInstrFlags::IsAtomicRet;
}
bool isAtomicRet(uint16_t Opcode) const {
return get(Opcode).TSFlags & SIInstrFlags::IsAtomicRet;
}
static bool isAtomic(const MachineInstr &MI) {
return MI.getDesc().TSFlags & (SIInstrFlags::IsAtomicRet |
SIInstrFlags::IsAtomicNoRet);
}
bool isAtomic(uint16_t Opcode) const {
return get(Opcode).TSFlags & (SIInstrFlags::IsAtomicRet |
SIInstrFlags::IsAtomicNoRet);
}
static bool isWQM(const MachineInstr &MI) {
return MI.getDesc().TSFlags & SIInstrFlags::WQM;
}
@ -1165,9 +1191,6 @@ namespace AMDGPU {
LLVM_READONLY
int getMUBUFNoLdsInst(uint16_t Opcode);
LLVM_READONLY
int getAtomicRetOp(uint16_t Opcode);
LLVM_READONLY
int getAtomicNoRetOp(uint16_t Opcode);

View File

@ -2408,15 +2408,6 @@ def getMUBUFNoLdsInst : InstrMapping {
let ValueCols = [["0"]];
}
// Maps an atomic opcode to its version with a return value.
def getAtomicRetOp : InstrMapping {
let FilterClass = "AtomicNoRet";
let RowFields = ["NoRetOp"];
let ColFields = ["IsRet"];
let KeyCol = ["0"];
let ValueCols = [["1"]];
}
// Maps an atomic opcode to its returnless version.
def getAtomicNoRetOp : InstrMapping {
let FilterClass = "AtomicNoRet";

View File

@ -455,7 +455,7 @@ private:
/// Return true iff instruction \p MI is a atomic instruction that
/// returns a result.
bool isAtomicRet(const MachineInstr &MI) const {
return AMDGPU::getAtomicNoRetOp(MI.getOpcode()) != -1;
return SIInstrInfo::isAtomicRet(MI);
}
/// Removes all processed atomic pseudo instructions from the current

View File

@ -595,8 +595,7 @@ define amdgpu_kernel void @local_agent_acquire_atomicrmw(
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0
; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1
; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX10-WGP-NEXT: buffer_gl0_inv
; GFX10-WGP-NEXT: s_endpgm
;
@ -722,8 +721,7 @@ define amdgpu_kernel void @local_agent_acq_rel_atomicrmw(
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX10-WGP-NEXT: buffer_gl0_inv
; GFX10-WGP-NEXT: s_endpgm
;
@ -790,8 +788,7 @@ define amdgpu_kernel void @local_agent_seq_cst_atomicrmw(
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX10-WGP-NEXT: buffer_gl0_inv
; GFX10-WGP-NEXT: s_endpgm
;
@ -856,8 +853,7 @@ define amdgpu_kernel void @local_agent_acquire_ret_atomicrmw(
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0
; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1
; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX10-WGP-NEXT: buffer_gl0_inv
; GFX10-WGP-NEXT: ds_write_b32 v0, v1
; GFX10-WGP-NEXT: s_endpgm
@ -928,8 +924,7 @@ define amdgpu_kernel void @local_agent_acq_rel_ret_atomicrmw(
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX10-WGP-NEXT: buffer_gl0_inv
; GFX10-WGP-NEXT: ds_write_b32 v0, v1
; GFX10-WGP-NEXT: s_endpgm
@ -1002,8 +997,7 @@ define amdgpu_kernel void @local_agent_seq_cst_ret_atomicrmw(
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX10-WGP-NEXT: buffer_gl0_inv
; GFX10-WGP-NEXT: ds_write_b32 v0, v1
; GFX10-WGP-NEXT: s_endpgm

View File

@ -595,8 +595,7 @@ define amdgpu_kernel void @local_system_acquire_atomicrmw(
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0
; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1
; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX10-WGP-NEXT: buffer_gl0_inv
; GFX10-WGP-NEXT: s_endpgm
;
@ -722,8 +721,7 @@ define amdgpu_kernel void @local_system_acq_rel_atomicrmw(
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX10-WGP-NEXT: buffer_gl0_inv
; GFX10-WGP-NEXT: s_endpgm
;
@ -790,8 +788,7 @@ define amdgpu_kernel void @local_system_seq_cst_atomicrmw(
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX10-WGP-NEXT: buffer_gl0_inv
; GFX10-WGP-NEXT: s_endpgm
;
@ -856,8 +853,7 @@ define amdgpu_kernel void @local_system_acquire_ret_atomicrmw(
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0
; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1
; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX10-WGP-NEXT: buffer_gl0_inv
; GFX10-WGP-NEXT: ds_write_b32 v0, v1
; GFX10-WGP-NEXT: s_endpgm
@ -928,8 +924,7 @@ define amdgpu_kernel void @local_system_acq_rel_ret_atomicrmw(
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX10-WGP-NEXT: buffer_gl0_inv
; GFX10-WGP-NEXT: ds_write_b32 v0, v1
; GFX10-WGP-NEXT: s_endpgm
@ -1002,8 +997,7 @@ define amdgpu_kernel void @local_system_seq_cst_ret_atomicrmw(
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX10-WGP-NEXT: buffer_gl0_inv
; GFX10-WGP-NEXT: ds_write_b32 v0, v1
; GFX10-WGP-NEXT: s_endpgm

View File

@ -595,8 +595,7 @@ define amdgpu_kernel void @local_workgroup_acquire_atomicrmw(
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0
; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1
; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX10-WGP-NEXT: buffer_gl0_inv
; GFX10-WGP-NEXT: s_endpgm
;
@ -722,8 +721,7 @@ define amdgpu_kernel void @local_workgroup_acq_rel_atomicrmw(
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX10-WGP-NEXT: buffer_gl0_inv
; GFX10-WGP-NEXT: s_endpgm
;
@ -790,8 +788,7 @@ define amdgpu_kernel void @local_workgroup_seq_cst_atomicrmw(
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX10-WGP-NEXT: buffer_gl0_inv
; GFX10-WGP-NEXT: s_endpgm
;
@ -856,8 +853,7 @@ define amdgpu_kernel void @local_workgroup_acquire_ret_atomicrmw(
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0
; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1
; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX10-WGP-NEXT: buffer_gl0_inv
; GFX10-WGP-NEXT: ds_write_b32 v0, v1
; GFX10-WGP-NEXT: s_endpgm
@ -928,8 +924,7 @@ define amdgpu_kernel void @local_workgroup_acq_rel_ret_atomicrmw(
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX10-WGP-NEXT: buffer_gl0_inv
; GFX10-WGP-NEXT: ds_write_b32 v0, v1
; GFX10-WGP-NEXT: s_endpgm
@ -1002,8 +997,7 @@ define amdgpu_kernel void @local_workgroup_seq_cst_ret_atomicrmw(
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX10-WGP-NEXT: buffer_gl0_inv
; GFX10-WGP-NEXT: ds_write_b32 v0, v1
; GFX10-WGP-NEXT: s_endpgm