mirror of
https://github.com/RPCSX/llvm.git
synced 2024-11-28 06:00:28 +00:00
AMDGPU: Fix assert when legalizing atomic operands
The operand layout is slightly different for the atomic opcodes from the usual MUBUF loads and stores. This should only fix it on SI/CI. VI is still broken because it still emits the addr64 replacement. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@252140 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
1c18f49544
commit
76b6b15dcd
@ -1977,25 +1977,61 @@ void SIInstrInfo::legalizeOperands(MachineInstr *MI) const {
|
||||
} else {
|
||||
// This instructions is the _OFFSET variant, so we need to convert it to
|
||||
// ADDR64.
|
||||
assert(MBB.getParent()->getSubtarget<AMDGPUSubtarget>().getGeneration()
|
||||
< AMDGPUSubtarget::VOLCANIC_ISLANDS &&
|
||||
"FIXME: Need to emit flat atomics here");
|
||||
|
||||
MachineOperand *VData = getNamedOperand(*MI, AMDGPU::OpName::vdata);
|
||||
MachineOperand *Offset = getNamedOperand(*MI, AMDGPU::OpName::offset);
|
||||
MachineOperand *SOffset = getNamedOperand(*MI, AMDGPU::OpName::soffset);
|
||||
|
||||
// Create the new instruction.
|
||||
unsigned Addr64Opcode = AMDGPU::getAddr64Inst(MI->getOpcode());
|
||||
MachineInstr *Addr64 =
|
||||
BuildMI(MBB, MI, MI->getDebugLoc(), get(Addr64Opcode))
|
||||
.addOperand(*VData)
|
||||
.addReg(AMDGPU::NoRegister) // Dummy value for vaddr.
|
||||
// This will be replaced later
|
||||
// with the new value of vaddr.
|
||||
.addOperand(*SRsrc)
|
||||
.addOperand(*SOffset)
|
||||
.addOperand(*Offset)
|
||||
.addImm(0) // glc
|
||||
.addImm(0) // slc
|
||||
.addImm(0) // tfe
|
||||
.setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
|
||||
|
||||
// Atomics rith return have have an additional tied operand and are
|
||||
// missing some of the special bits.
|
||||
MachineOperand *VDataIn = getNamedOperand(*MI, AMDGPU::OpName::vdata_in);
|
||||
MachineInstr *Addr64;
|
||||
|
||||
if (!VDataIn) {
|
||||
// Regular buffer load / store.
|
||||
MachineInstrBuilder MIB
|
||||
= BuildMI(MBB, MI, MI->getDebugLoc(), get(Addr64Opcode))
|
||||
.addOperand(*VData)
|
||||
.addReg(AMDGPU::NoRegister) // Dummy value for vaddr.
|
||||
// This will be replaced later
|
||||
// with the new value of vaddr.
|
||||
.addOperand(*SRsrc)
|
||||
.addOperand(*SOffset)
|
||||
.addOperand(*Offset);
|
||||
|
||||
// Atomics do not have this operand.
|
||||
if (const MachineOperand *GLC
|
||||
= getNamedOperand(*MI, AMDGPU::OpName::glc)) {
|
||||
MIB.addImm(GLC->getImm());
|
||||
}
|
||||
|
||||
MIB.addImm(getNamedImmOperand(*MI, AMDGPU::OpName::slc));
|
||||
|
||||
if (const MachineOperand *TFE
|
||||
= getNamedOperand(*MI, AMDGPU::OpName::tfe)) {
|
||||
MIB.addImm(TFE->getImm());
|
||||
}
|
||||
|
||||
MIB.setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
|
||||
Addr64 = MIB;
|
||||
} else {
|
||||
// Atomics with return.
|
||||
Addr64 = BuildMI(MBB, MI, MI->getDebugLoc(), get(Addr64Opcode))
|
||||
.addOperand(*VData)
|
||||
.addOperand(*VDataIn)
|
||||
.addReg(AMDGPU::NoRegister) // Dummy value for vaddr.
|
||||
// This will be replaced later
|
||||
// with the new value of vaddr.
|
||||
.addOperand(*SRsrc)
|
||||
.addOperand(*SOffset)
|
||||
.addOperand(*Offset)
|
||||
.addImm(getNamedImmOperand(*MI, AMDGPU::OpName::slc))
|
||||
.setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
|
||||
}
|
||||
|
||||
MI->removeFromParent();
|
||||
MI = Addr64;
|
||||
|
@ -434,6 +434,12 @@ public:
|
||||
return getNamedOperand(const_cast<MachineInstr &>(MI), OpName);
|
||||
}
|
||||
|
||||
/// Get required immediate operand
|
||||
int64_t getNamedImmOperand(const MachineInstr &MI, unsigned OpName) const {
|
||||
int Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), OpName);
|
||||
return MI.getOperand(Idx).getImm();
|
||||
}
|
||||
|
||||
uint64_t getDefaultRsrcDataFormat() const;
|
||||
uint64_t getScratchRsrcWords23() const;
|
||||
};
|
||||
|
@ -2433,6 +2433,8 @@ multiclass MUBUF_Atomic <mubuf op, string name, RegisterClass rc,
|
||||
} // mayStore = 1, mayLoad = 1, hasPostISelHook = 1
|
||||
}
|
||||
|
||||
// FIXME: tfe can't be an operand because it requires a separate
|
||||
// opcode because it needs an N+1 register class dest register.
|
||||
multiclass MUBUF_Load_Helper <mubuf op, string name, RegisterClass regClass,
|
||||
ValueType load_vt = i32,
|
||||
SDPatternOperator ld = null_frag> {
|
||||
|
52
test/CodeGen/AMDGPU/move-to-valu-atomicrmw.ll
Normal file
52
test/CodeGen/AMDGPU/move-to-valu-atomicrmw.ll
Normal file
@ -0,0 +1,52 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
|
||||
; XUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
|
||||
|
||||
; FIXME: broken on VI because flat instructions need to be emitted
|
||||
; instead of addr64 equivalent of the _OFFSET variants.
|
||||
|
||||
; Check that moving the pointer out of the resource descriptor to
|
||||
; vaddr works for atomics.
|
||||
|
||||
declare i32 @llvm.r600.read.tidig.x() #1
|
||||
|
||||
; GCN-LABEL: {{^}}atomic_max_i32:
|
||||
; GCN: buffer_atomic_smax v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:400 glc{{$}}
|
||||
define void @atomic_max_i32(i32 addrspace(1)* %out, i32 addrspace(1)* addrspace(1)* %in, i32 addrspace(1)* %x, i32 %y) #0 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x()
|
||||
%tid.gep = getelementptr i32 addrspace(1)*, i32 addrspace(1)* addrspace(1)* %in, i32 %tid
|
||||
%ptr = load volatile i32 addrspace(1)*, i32 addrspace(1)* addrspace(1)* %tid.gep
|
||||
%xor = xor i32 %tid, 1
|
||||
%cmp = icmp ne i32 %xor, 0
|
||||
br i1 %cmp, label %atomic, label %exit
|
||||
|
||||
atomic:
|
||||
%gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 100
|
||||
%ret = atomicrmw max i32 addrspace(1)* %gep, i32 %y seq_cst
|
||||
store i32 %ret, i32 addrspace(1)* %out
|
||||
br label %exit
|
||||
|
||||
exit:
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}atomic_max_i32_noret:
|
||||
; GCN: buffer_atomic_smax v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:400{{$}}
|
||||
define void @atomic_max_i32_noret(i32 addrspace(1)* %out, i32 addrspace(1)* addrspace(1)* %in, i32 addrspace(1)* %x, i32 %y) #0 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x()
|
||||
%tid.gep = getelementptr i32 addrspace(1)*, i32 addrspace(1)* addrspace(1)* %in, i32 %tid
|
||||
%ptr = load volatile i32 addrspace(1)*, i32 addrspace(1)* addrspace(1)* %tid.gep
|
||||
%xor = xor i32 %tid, 1
|
||||
%cmp = icmp ne i32 %xor, 0
|
||||
br i1 %cmp, label %atomic, label %exit
|
||||
|
||||
atomic:
|
||||
%gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 100
|
||||
%ret = atomicrmw max i32 addrspace(1)* %gep, i32 %y seq_cst
|
||||
br label %exit
|
||||
|
||||
exit:
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
attributes #1 = { nounwind readnone }
|
Loading…
Reference in New Issue
Block a user