mirror of
https://github.com/capstone-engine/llvm-capstone.git
synced 2024-11-29 00:21:14 +00:00
[AMDGPU] Add two TSFlags: IsAtomicNoRtn and IsAtomicRtn
We are using AtomicNoRet map in multiple places to determine if an instruction atomic, rtn or nortn atomic. This method does not work always since we have some instructions which only has rtn or nortn version. One such instruction is ds_wrxchg_rtn_b32 which does not have nortn version. This has caused changes in memory legalizer tests. Differential Revision: https://reviews.llvm.org/D96639
This commit is contained in:
parent
c465429f28
commit
5cf9292ce3
@ -696,6 +696,7 @@ class MUBUF_AtomicNoRet_Pseudo<string opName, int addrKind,
|
||||
let PseudoInstr = opName # "_" # getAddrName<addrKindCopy>.ret;
|
||||
let glc_value = 0;
|
||||
let dlc_value = 0;
|
||||
let IsAtomicNoRet = 1;
|
||||
let AsmMatchConverter = "cvtMubufAtomic";
|
||||
}
|
||||
|
||||
@ -714,6 +715,7 @@ class MUBUF_AtomicRet_Pseudo<string opName, int addrKind,
|
||||
let PseudoInstr = opName # "_rtn_" # getAddrName<addrKindCopy>.ret;
|
||||
let glc_value = 1;
|
||||
let dlc_value = 0;
|
||||
let IsAtomicRet = 1;
|
||||
let Constraints = "$vdata = $vdata_in";
|
||||
let DisableEncoding = "$vdata_in";
|
||||
let AsmMatchConverter = "cvtMubufAtomicReturn";
|
||||
|
@ -102,6 +102,7 @@ class DS_1A1D_NORET<string opName, RegisterClass rc = VGPR_32>
|
||||
|
||||
let has_data1 = 0;
|
||||
let has_vdst = 0;
|
||||
let IsAtomicNoRet = 1;
|
||||
}
|
||||
|
||||
multiclass DS_1A1D_NORET_mc<string opName, RegisterClass rc = VGPR_32> {
|
||||
@ -121,6 +122,7 @@ class DS_1A2D_NORET<string opName, RegisterClass rc = VGPR_32>
|
||||
" $addr, $data0, $data1$offset$gds"> {
|
||||
|
||||
let has_vdst = 0;
|
||||
let IsAtomicNoRet = 1;
|
||||
}
|
||||
|
||||
multiclass DS_1A2D_NORET_mc<string opName, RegisterClass rc = VGPR_32> {
|
||||
@ -161,6 +163,7 @@ class DS_1A1D_RET <string opName, RegisterClass rc = VGPR_32>
|
||||
|
||||
let hasPostISelHook = 1;
|
||||
let has_data1 = 0;
|
||||
let IsAtomicRet = 1;
|
||||
}
|
||||
|
||||
multiclass DS_1A1D_RET_mc <string opName, RegisterClass rc = VGPR_32,
|
||||
@ -184,6 +187,7 @@ class DS_1A2D_RET<string opName,
|
||||
" $vdst, $addr, $data0, $data1$offset$gds"> {
|
||||
|
||||
let hasPostISelHook = 1;
|
||||
let IsAtomicRet = 1;
|
||||
}
|
||||
|
||||
multiclass DS_1A2D_RET_mc<string opName,
|
||||
|
@ -345,6 +345,7 @@ class FLAT_AtomicNoRet_Pseudo<string opName, dag outs, dag ins,
|
||||
let dlcValue = 0;
|
||||
let has_vdst = 0;
|
||||
let maybeAtomic = 1;
|
||||
let IsAtomicNoRet = 1;
|
||||
}
|
||||
|
||||
class FLAT_AtomicRet_Pseudo<string opName, dag outs, dag ins,
|
||||
@ -354,6 +355,8 @@ class FLAT_AtomicRet_Pseudo<string opName, dag outs, dag ins,
|
||||
let has_vdst = 1;
|
||||
let glcValue = 1;
|
||||
let dlcValue = 0;
|
||||
let IsAtomicNoRet = 0;
|
||||
let IsAtomicRet = 1;
|
||||
let PseudoInstr = NAME # "_RTN";
|
||||
}
|
||||
|
||||
|
@ -39,6 +39,7 @@ class MIMGBaseOpcode : PredicateControl {
|
||||
bit Coordinates = 1;
|
||||
bit LodOrClampOrMip = 0;
|
||||
bit HasD16 = 0;
|
||||
bit IsAtomicRet = 0;
|
||||
}
|
||||
|
||||
def MIMGBaseOpcode : GenericEnum {
|
||||
@ -556,20 +557,22 @@ multiclass MIMG_Atomic_Addr_Helper_m <mimgopc op, string asm,
|
||||
}
|
||||
|
||||
multiclass MIMG_Atomic <mimgopc op, string asm, bit isCmpSwap = 0, bit isFP = 0> { // 64-bit atomics
|
||||
def "" : MIMGBaseOpcode {
|
||||
let Atomic = 1;
|
||||
let AtomicX2 = isCmpSwap;
|
||||
}
|
||||
let IsAtomicRet = 1 in {
|
||||
def "" : MIMGBaseOpcode {
|
||||
let Atomic = 1;
|
||||
let AtomicX2 = isCmpSwap;
|
||||
}
|
||||
|
||||
let BaseOpcode = !cast<MIMGBaseOpcode>(NAME) in {
|
||||
// _V* variants have different dst size, but the size is encoded implicitly,
|
||||
// using dmask and tfe. Only 32-bit variant is registered with disassembler.
|
||||
// Other variants are reconstructed by disassembler using dmask and tfe.
|
||||
let VDataDwords = !if(isCmpSwap, 2, 1) in
|
||||
defm _V1 : MIMG_Atomic_Addr_Helper_m <op, asm, !if(isCmpSwap, VReg_64, VGPR_32), 1, isFP>;
|
||||
let VDataDwords = !if(isCmpSwap, 4, 2) in
|
||||
defm _V2 : MIMG_Atomic_Addr_Helper_m <op, asm, !if(isCmpSwap, VReg_128, VReg_64), 0, isFP>;
|
||||
}
|
||||
let BaseOpcode = !cast<MIMGBaseOpcode>(NAME) in {
|
||||
// _V* variants have different dst size, but the size is encoded implicitly,
|
||||
// using dmask and tfe. Only 32-bit variant is registered with disassembler.
|
||||
// Other variants are reconstructed by disassembler using dmask and tfe.
|
||||
let VDataDwords = !if(isCmpSwap, 2, 1) in
|
||||
defm _V1 : MIMG_Atomic_Addr_Helper_m <op, asm, !if(isCmpSwap, VReg_64, VGPR_32), 1, isFP>;
|
||||
let VDataDwords = !if(isCmpSwap, 4, 2) in
|
||||
defm _V2 : MIMG_Atomic_Addr_Helper_m <op, asm, !if(isCmpSwap, VReg_128, VReg_64), 0, isFP>;
|
||||
}
|
||||
} // End IsAtomicRet = 1
|
||||
}
|
||||
|
||||
class MIMG_Sampler_Helper <mimgopc op, string asm, RegisterClass dst_rc,
|
||||
|
@ -106,7 +106,13 @@ enum : uint64_t {
|
||||
IsDOT = UINT64_C(1) << 55,
|
||||
|
||||
// FLAT instruction accesses FLAT_SCRATCH segment.
|
||||
IsFlatScratch = UINT64_C(1) << 56
|
||||
IsFlatScratch = UINT64_C(1) << 56,
|
||||
|
||||
// Atomic without return.
|
||||
IsAtomicNoRet = UINT64_C(1) << 57,
|
||||
|
||||
// Atomic with return.
|
||||
IsAtomicRet = UINT64_C(1) << 58
|
||||
};
|
||||
|
||||
// v_cmp_class_* etc. use a 10-bit mask for what operation is checked.
|
||||
|
@ -112,8 +112,7 @@ static bool isValidClauseInst(const MachineInstr &MI, bool IsVMEMClause) {
|
||||
return false;
|
||||
if (!MI.mayLoad() || MI.mayStore())
|
||||
return false;
|
||||
if (AMDGPU::getAtomicNoRetOp(MI.getOpcode()) != -1 ||
|
||||
AMDGPU::getAtomicRetOp(MI.getOpcode()) != -1)
|
||||
if (SIInstrInfo::isAtomic(MI))
|
||||
return false;
|
||||
if (IsVMEMClause && !isVMEMClauseInst(MI))
|
||||
return false;
|
||||
|
@ -538,7 +538,7 @@ void WaitcntBrackets::updateByEvent(const SIInstrInfo *TII,
|
||||
AMDGPU::OpName::data1),
|
||||
CurrScore);
|
||||
}
|
||||
} else if (AMDGPU::getAtomicNoRetOp(Inst.getOpcode()) != -1 &&
|
||||
} else if (SIInstrInfo::isAtomicRet(Inst) &&
|
||||
Inst.getOpcode() != AMDGPU::DS_GWS_INIT &&
|
||||
Inst.getOpcode() != AMDGPU::DS_GWS_SEMA_V &&
|
||||
Inst.getOpcode() != AMDGPU::DS_GWS_SEMA_BR &&
|
||||
@ -560,7 +560,7 @@ void WaitcntBrackets::updateByEvent(const SIInstrInfo *TII,
|
||||
&Inst, TII, TRI, MRI,
|
||||
AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data),
|
||||
CurrScore);
|
||||
} else if (AMDGPU::getAtomicNoRetOp(Inst.getOpcode()) != -1) {
|
||||
} else if (SIInstrInfo::isAtomicRet(Inst)) {
|
||||
setExpScore(
|
||||
&Inst, TII, TRI, MRI,
|
||||
AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data),
|
||||
@ -569,7 +569,7 @@ void WaitcntBrackets::updateByEvent(const SIInstrInfo *TII,
|
||||
} else if (TII->isMIMG(Inst)) {
|
||||
if (Inst.mayStore()) {
|
||||
setExpScore(&Inst, TII, TRI, MRI, 0, CurrScore);
|
||||
} else if (AMDGPU::getAtomicNoRetOp(Inst.getOpcode()) != -1) {
|
||||
} else if (SIInstrInfo::isAtomicRet(Inst)) {
|
||||
setExpScore(
|
||||
&Inst, TII, TRI, MRI,
|
||||
AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data),
|
||||
@ -582,7 +582,7 @@ void WaitcntBrackets::updateByEvent(const SIInstrInfo *TII,
|
||||
} else if (TII->isMUBUF(Inst)) {
|
||||
if (Inst.mayStore()) {
|
||||
setExpScore(&Inst, TII, TRI, MRI, 0, CurrScore);
|
||||
} else if (AMDGPU::getAtomicNoRetOp(Inst.getOpcode()) != -1) {
|
||||
} else if (SIInstrInfo::isAtomicRet(Inst)) {
|
||||
setExpScore(
|
||||
&Inst, TII, TRI, MRI,
|
||||
AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data),
|
||||
@ -1246,8 +1246,7 @@ void SIInsertWaitcnts::updateEventWaitcntAfter(MachineInstr &Inst,
|
||||
++FlatASCount;
|
||||
if (!ST->hasVscnt())
|
||||
ScoreBrackets->updateByEvent(TII, TRI, MRI, VMEM_ACCESS, Inst);
|
||||
else if (Inst.mayLoad() &&
|
||||
AMDGPU::getAtomicRetOp(Inst.getOpcode()) == -1)
|
||||
else if (Inst.mayLoad() && !SIInstrInfo::isAtomicNoRet(Inst))
|
||||
ScoreBrackets->updateByEvent(TII, TRI, MRI, VMEM_READ_ACCESS, Inst);
|
||||
else
|
||||
ScoreBrackets->updateByEvent(TII, TRI, MRI, VMEM_WRITE_ACCESS, Inst);
|
||||
@ -1275,8 +1274,7 @@ void SIInsertWaitcnts::updateEventWaitcntAfter(MachineInstr &Inst,
|
||||
Inst.getOpcode() != AMDGPU::BUFFER_GL1_INV) {
|
||||
if (!ST->hasVscnt())
|
||||
ScoreBrackets->updateByEvent(TII, TRI, MRI, VMEM_ACCESS, Inst);
|
||||
else if ((Inst.mayLoad() &&
|
||||
AMDGPU::getAtomicRetOp(Inst.getOpcode()) == -1) ||
|
||||
else if ((Inst.mayLoad() && !SIInstrInfo::isAtomicNoRet(Inst)) ||
|
||||
/* IMAGE_GET_RESINFO / IMAGE_GET_LOD */
|
||||
(TII->isMIMG(Inst) && !Inst.mayLoad() && !Inst.mayStore()))
|
||||
ScoreBrackets->updateByEvent(TII, TRI, MRI, VMEM_READ_ACCESS, Inst);
|
||||
@ -1284,7 +1282,7 @@ void SIInsertWaitcnts::updateEventWaitcntAfter(MachineInstr &Inst,
|
||||
ScoreBrackets->updateByEvent(TII, TRI, MRI, VMEM_WRITE_ACCESS, Inst);
|
||||
|
||||
if (ST->vmemWriteNeedsExpWaitcnt() &&
|
||||
(Inst.mayStore() || AMDGPU::getAtomicNoRetOp(Inst.getOpcode()) != -1)) {
|
||||
(Inst.mayStore() || SIInstrInfo::isAtomicRet(Inst))) {
|
||||
ScoreBrackets->updateByEvent(TII, TRI, MRI, VMW_GPR_LOCK, Inst);
|
||||
}
|
||||
} else if (TII->isSMRD(Inst)) {
|
||||
|
@ -135,6 +135,12 @@ class InstSI <dag outs, dag ins, string asm = "",
|
||||
// Must be 0 for non-FLAT instructions.
|
||||
field bit IsFlatScratch = 0;
|
||||
|
||||
// Atomic without a return.
|
||||
field bit IsAtomicNoRet = 0;
|
||||
|
||||
// Atomic with return.
|
||||
field bit IsAtomicRet = 0;
|
||||
|
||||
// These need to be kept in sync with the enum in SIInstrFlags.
|
||||
let TSFlags{0} = SALU;
|
||||
let TSFlags{1} = VALU;
|
||||
@ -205,6 +211,10 @@ class InstSI <dag outs, dag ins, string asm = "",
|
||||
|
||||
let TSFlags{56} = IsFlatScratch;
|
||||
|
||||
let TSFlags{57} = IsAtomicNoRet;
|
||||
|
||||
let TSFlags{58} = IsAtomicRet;
|
||||
|
||||
let SchedRW = [Write32Bit];
|
||||
|
||||
let AsmVariantName = AMDGPUAsmVariants.Default;
|
||||
|
@ -538,6 +538,32 @@ public:
|
||||
return get(Opcode).TSFlags & SIInstrFlags::EXP;
|
||||
}
|
||||
|
||||
static bool isAtomicNoRet(const MachineInstr &MI) {
|
||||
return MI.getDesc().TSFlags & SIInstrFlags::IsAtomicNoRet;
|
||||
}
|
||||
|
||||
bool isAtomicNoRet(uint16_t Opcode) const {
|
||||
return get(Opcode).TSFlags & SIInstrFlags::IsAtomicNoRet;
|
||||
}
|
||||
|
||||
static bool isAtomicRet(const MachineInstr &MI) {
|
||||
return MI.getDesc().TSFlags & SIInstrFlags::IsAtomicRet;
|
||||
}
|
||||
|
||||
bool isAtomicRet(uint16_t Opcode) const {
|
||||
return get(Opcode).TSFlags & SIInstrFlags::IsAtomicRet;
|
||||
}
|
||||
|
||||
static bool isAtomic(const MachineInstr &MI) {
|
||||
return MI.getDesc().TSFlags & (SIInstrFlags::IsAtomicRet |
|
||||
SIInstrFlags::IsAtomicNoRet);
|
||||
}
|
||||
|
||||
bool isAtomic(uint16_t Opcode) const {
|
||||
return get(Opcode).TSFlags & (SIInstrFlags::IsAtomicRet |
|
||||
SIInstrFlags::IsAtomicNoRet);
|
||||
}
|
||||
|
||||
static bool isWQM(const MachineInstr &MI) {
|
||||
return MI.getDesc().TSFlags & SIInstrFlags::WQM;
|
||||
}
|
||||
@ -1165,9 +1191,6 @@ namespace AMDGPU {
|
||||
LLVM_READONLY
|
||||
int getMUBUFNoLdsInst(uint16_t Opcode);
|
||||
|
||||
LLVM_READONLY
|
||||
int getAtomicRetOp(uint16_t Opcode);
|
||||
|
||||
LLVM_READONLY
|
||||
int getAtomicNoRetOp(uint16_t Opcode);
|
||||
|
||||
|
@ -2408,15 +2408,6 @@ def getMUBUFNoLdsInst : InstrMapping {
|
||||
let ValueCols = [["0"]];
|
||||
}
|
||||
|
||||
// Maps an atomic opcode to its version with a return value.
|
||||
def getAtomicRetOp : InstrMapping {
|
||||
let FilterClass = "AtomicNoRet";
|
||||
let RowFields = ["NoRetOp"];
|
||||
let ColFields = ["IsRet"];
|
||||
let KeyCol = ["0"];
|
||||
let ValueCols = [["1"]];
|
||||
}
|
||||
|
||||
// Maps an atomic opcode to its returnless version.
|
||||
def getAtomicNoRetOp : InstrMapping {
|
||||
let FilterClass = "AtomicNoRet";
|
||||
|
@ -455,7 +455,7 @@ private:
|
||||
/// Return true iff instruction \p MI is a atomic instruction that
|
||||
/// returns a result.
|
||||
bool isAtomicRet(const MachineInstr &MI) const {
|
||||
return AMDGPU::getAtomicNoRetOp(MI.getOpcode()) != -1;
|
||||
return SIInstrInfo::isAtomicRet(MI);
|
||||
}
|
||||
|
||||
/// Removes all processed atomic pseudo instructions from the current
|
||||
|
@ -595,8 +595,7 @@ define amdgpu_kernel void @local_agent_acquire_atomicrmw(
|
||||
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0
|
||||
; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1
|
||||
; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
|
||||
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: s_endpgm
|
||||
;
|
||||
@ -722,8 +721,7 @@ define amdgpu_kernel void @local_agent_acq_rel_atomicrmw(
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
|
||||
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: s_endpgm
|
||||
;
|
||||
@ -790,8 +788,7 @@ define amdgpu_kernel void @local_agent_seq_cst_atomicrmw(
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
|
||||
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: s_endpgm
|
||||
;
|
||||
@ -856,8 +853,7 @@ define amdgpu_kernel void @local_agent_acquire_ret_atomicrmw(
|
||||
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0
|
||||
; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1
|
||||
; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1
|
||||
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: ds_write_b32 v0, v1
|
||||
; GFX10-WGP-NEXT: s_endpgm
|
||||
@ -928,8 +924,7 @@ define amdgpu_kernel void @local_agent_acq_rel_ret_atomicrmw(
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1
|
||||
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: ds_write_b32 v0, v1
|
||||
; GFX10-WGP-NEXT: s_endpgm
|
||||
@ -1002,8 +997,7 @@ define amdgpu_kernel void @local_agent_seq_cst_ret_atomicrmw(
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1
|
||||
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: ds_write_b32 v0, v1
|
||||
; GFX10-WGP-NEXT: s_endpgm
|
||||
|
@ -595,8 +595,7 @@ define amdgpu_kernel void @local_system_acquire_atomicrmw(
|
||||
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0
|
||||
; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1
|
||||
; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
|
||||
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: s_endpgm
|
||||
;
|
||||
@ -722,8 +721,7 @@ define amdgpu_kernel void @local_system_acq_rel_atomicrmw(
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
|
||||
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: s_endpgm
|
||||
;
|
||||
@ -790,8 +788,7 @@ define amdgpu_kernel void @local_system_seq_cst_atomicrmw(
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
|
||||
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: s_endpgm
|
||||
;
|
||||
@ -856,8 +853,7 @@ define amdgpu_kernel void @local_system_acquire_ret_atomicrmw(
|
||||
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0
|
||||
; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1
|
||||
; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1
|
||||
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: ds_write_b32 v0, v1
|
||||
; GFX10-WGP-NEXT: s_endpgm
|
||||
@ -928,8 +924,7 @@ define amdgpu_kernel void @local_system_acq_rel_ret_atomicrmw(
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1
|
||||
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: ds_write_b32 v0, v1
|
||||
; GFX10-WGP-NEXT: s_endpgm
|
||||
@ -1002,8 +997,7 @@ define amdgpu_kernel void @local_system_seq_cst_ret_atomicrmw(
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1
|
||||
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: ds_write_b32 v0, v1
|
||||
; GFX10-WGP-NEXT: s_endpgm
|
||||
|
@ -595,8 +595,7 @@ define amdgpu_kernel void @local_workgroup_acquire_atomicrmw(
|
||||
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0
|
||||
; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1
|
||||
; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
|
||||
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: s_endpgm
|
||||
;
|
||||
@ -722,8 +721,7 @@ define amdgpu_kernel void @local_workgroup_acq_rel_atomicrmw(
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
|
||||
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: s_endpgm
|
||||
;
|
||||
@ -790,8 +788,7 @@ define amdgpu_kernel void @local_workgroup_seq_cst_atomicrmw(
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
|
||||
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: s_endpgm
|
||||
;
|
||||
@ -856,8 +853,7 @@ define amdgpu_kernel void @local_workgroup_acquire_ret_atomicrmw(
|
||||
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0
|
||||
; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1
|
||||
; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1
|
||||
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: ds_write_b32 v0, v1
|
||||
; GFX10-WGP-NEXT: s_endpgm
|
||||
@ -928,8 +924,7 @@ define amdgpu_kernel void @local_workgroup_acq_rel_ret_atomicrmw(
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1
|
||||
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: ds_write_b32 v0, v1
|
||||
; GFX10-WGP-NEXT: s_endpgm
|
||||
@ -1002,8 +997,7 @@ define amdgpu_kernel void @local_workgroup_seq_cst_ret_atomicrmw(
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1
|
||||
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: ds_write_b32 v0, v1
|
||||
; GFX10-WGP-NEXT: s_endpgm
|
||||
|
Loading…
Reference in New Issue
Block a user