mirror of
https://github.com/capstone-engine/llvm-capstone.git
synced 2024-11-30 17:21:10 +00:00
[AMDGPU] Move architected SGPR implementation into isel (#79120)
(cherry picked from commit 70fc9703788e8965813c5b677a85cb84b66671b6)
This commit is contained in:
parent
ddbdd7b267
commit
27654471cc
@ -4178,10 +4178,45 @@ bool AMDGPULegalizerInfo::loadInputValue(
|
||||
Register DstReg, MachineIRBuilder &B,
|
||||
AMDGPUFunctionArgInfo::PreloadedValue ArgType) const {
|
||||
const SIMachineFunctionInfo *MFI = B.getMF().getInfo<SIMachineFunctionInfo>();
|
||||
const ArgDescriptor *Arg;
|
||||
const ArgDescriptor *Arg = nullptr;
|
||||
const TargetRegisterClass *ArgRC;
|
||||
LLT ArgTy;
|
||||
std::tie(Arg, ArgRC, ArgTy) = MFI->getPreloadedValue(ArgType);
|
||||
|
||||
CallingConv::ID CC = B.getMF().getFunction().getCallingConv();
|
||||
const ArgDescriptor WorkGroupIDX =
|
||||
ArgDescriptor::createRegister(AMDGPU::TTMP9);
|
||||
// If GridZ is not programmed in an entry function then the hardware will set
|
||||
// it to all zeros, so there is no need to mask the GridY value in the low
|
||||
// order bits.
|
||||
const ArgDescriptor WorkGroupIDY = ArgDescriptor::createRegister(
|
||||
AMDGPU::TTMP7,
|
||||
AMDGPU::isEntryFunctionCC(CC) && !MFI->hasWorkGroupIDZ() ? ~0u : 0xFFFFu);
|
||||
const ArgDescriptor WorkGroupIDZ =
|
||||
ArgDescriptor::createRegister(AMDGPU::TTMP7, 0xFFFF0000u);
|
||||
if (ST.hasArchitectedSGPRs() && AMDGPU::isCompute(CC)) {
|
||||
switch (ArgType) {
|
||||
case AMDGPUFunctionArgInfo::WORKGROUP_ID_X:
|
||||
Arg = &WorkGroupIDX;
|
||||
ArgRC = &AMDGPU::SReg_32RegClass;
|
||||
ArgTy = LLT::scalar(32);
|
||||
break;
|
||||
case AMDGPUFunctionArgInfo::WORKGROUP_ID_Y:
|
||||
Arg = &WorkGroupIDY;
|
||||
ArgRC = &AMDGPU::SReg_32RegClass;
|
||||
ArgTy = LLT::scalar(32);
|
||||
break;
|
||||
case AMDGPUFunctionArgInfo::WORKGROUP_ID_Z:
|
||||
Arg = &WorkGroupIDZ;
|
||||
ArgRC = &AMDGPU::SReg_32RegClass;
|
||||
ArgTy = LLT::scalar(32);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!Arg)
|
||||
std::tie(Arg, ArgRC, ArgTy) = MFI->getPreloadedValue(ArgType);
|
||||
|
||||
if (!Arg) {
|
||||
if (ArgType == AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR) {
|
||||
|
@ -2072,11 +2072,45 @@ SDValue SITargetLowering::getPreloadedValue(SelectionDAG &DAG,
|
||||
const SIMachineFunctionInfo &MFI,
|
||||
EVT VT,
|
||||
AMDGPUFunctionArgInfo::PreloadedValue PVID) const {
|
||||
const ArgDescriptor *Reg;
|
||||
const ArgDescriptor *Reg = nullptr;
|
||||
const TargetRegisterClass *RC;
|
||||
LLT Ty;
|
||||
|
||||
std::tie(Reg, RC, Ty) = MFI.getPreloadedValue(PVID);
|
||||
CallingConv::ID CC = DAG.getMachineFunction().getFunction().getCallingConv();
|
||||
const ArgDescriptor WorkGroupIDX =
|
||||
ArgDescriptor::createRegister(AMDGPU::TTMP9);
|
||||
// If GridZ is not programmed in an entry function then the hardware will set
|
||||
// it to all zeros, so there is no need to mask the GridY value in the low
|
||||
// order bits.
|
||||
const ArgDescriptor WorkGroupIDY = ArgDescriptor::createRegister(
|
||||
AMDGPU::TTMP7,
|
||||
AMDGPU::isEntryFunctionCC(CC) && !MFI.hasWorkGroupIDZ() ? ~0u : 0xFFFFu);
|
||||
const ArgDescriptor WorkGroupIDZ =
|
||||
ArgDescriptor::createRegister(AMDGPU::TTMP7, 0xFFFF0000u);
|
||||
if (Subtarget->hasArchitectedSGPRs() && AMDGPU::isCompute(CC)) {
|
||||
switch (PVID) {
|
||||
case AMDGPUFunctionArgInfo::WORKGROUP_ID_X:
|
||||
Reg = &WorkGroupIDX;
|
||||
RC = &AMDGPU::SReg_32RegClass;
|
||||
Ty = LLT::scalar(32);
|
||||
break;
|
||||
case AMDGPUFunctionArgInfo::WORKGROUP_ID_Y:
|
||||
Reg = &WorkGroupIDY;
|
||||
RC = &AMDGPU::SReg_32RegClass;
|
||||
Ty = LLT::scalar(32);
|
||||
break;
|
||||
case AMDGPUFunctionArgInfo::WORKGROUP_ID_Z:
|
||||
Reg = &WorkGroupIDZ;
|
||||
RC = &AMDGPU::SReg_32RegClass;
|
||||
Ty = LLT::scalar(32);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!Reg)
|
||||
std::tie(Reg, RC, Ty) = MFI.getPreloadedValue(PVID);
|
||||
if (!Reg) {
|
||||
if (PVID == AMDGPUFunctionArgInfo::PreloadedValue::KERNARG_SEGMENT_PTR) {
|
||||
// It's possible for a kernarg intrinsic call to appear in a kernel with
|
||||
@ -2505,28 +2539,24 @@ void SITargetLowering::allocateSystemSGPRs(CCState &CCInfo,
|
||||
}
|
||||
}
|
||||
|
||||
if (Info.hasWorkGroupIDX()) {
|
||||
Register Reg = Info.addWorkGroupIDX(HasArchitectedSGPRs);
|
||||
if (!HasArchitectedSGPRs)
|
||||
if (!HasArchitectedSGPRs) {
|
||||
if (Info.hasWorkGroupIDX()) {
|
||||
Register Reg = Info.addWorkGroupIDX();
|
||||
MF.addLiveIn(Reg, &AMDGPU::SGPR_32RegClass);
|
||||
CCInfo.AllocateReg(Reg);
|
||||
}
|
||||
|
||||
CCInfo.AllocateReg(Reg);
|
||||
}
|
||||
|
||||
if (Info.hasWorkGroupIDY()) {
|
||||
Register Reg = Info.addWorkGroupIDY(HasArchitectedSGPRs);
|
||||
if (!HasArchitectedSGPRs)
|
||||
if (Info.hasWorkGroupIDY()) {
|
||||
Register Reg = Info.addWorkGroupIDY();
|
||||
MF.addLiveIn(Reg, &AMDGPU::SGPR_32RegClass);
|
||||
CCInfo.AllocateReg(Reg);
|
||||
}
|
||||
|
||||
CCInfo.AllocateReg(Reg);
|
||||
}
|
||||
|
||||
if (Info.hasWorkGroupIDZ()) {
|
||||
Register Reg = Info.addWorkGroupIDZ(HasArchitectedSGPRs);
|
||||
if (!HasArchitectedSGPRs)
|
||||
if (Info.hasWorkGroupIDZ()) {
|
||||
Register Reg = Info.addWorkGroupIDZ();
|
||||
MF.addLiveIn(Reg, &AMDGPU::SGPR_32RegClass);
|
||||
|
||||
CCInfo.AllocateReg(Reg);
|
||||
CCInfo.AllocateReg(Reg);
|
||||
}
|
||||
}
|
||||
|
||||
if (Info.hasWorkGroupInfo()) {
|
||||
|
@ -751,35 +751,21 @@ public:
|
||||
}
|
||||
|
||||
// Add system SGPRs.
|
||||
Register addWorkGroupIDX(bool HasArchitectedSGPRs) {
|
||||
Register Reg =
|
||||
HasArchitectedSGPRs ? (MCPhysReg)AMDGPU::TTMP9 : getNextSystemSGPR();
|
||||
ArgInfo.WorkGroupIDX = ArgDescriptor::createRegister(Reg);
|
||||
if (!HasArchitectedSGPRs)
|
||||
NumSystemSGPRs += 1;
|
||||
|
||||
Register addWorkGroupIDX() {
|
||||
ArgInfo.WorkGroupIDX = ArgDescriptor::createRegister(getNextSystemSGPR());
|
||||
NumSystemSGPRs += 1;
|
||||
return ArgInfo.WorkGroupIDX.getRegister();
|
||||
}
|
||||
|
||||
Register addWorkGroupIDY(bool HasArchitectedSGPRs) {
|
||||
Register Reg =
|
||||
HasArchitectedSGPRs ? (MCPhysReg)AMDGPU::TTMP7 : getNextSystemSGPR();
|
||||
unsigned Mask = HasArchitectedSGPRs && hasWorkGroupIDZ() ? 0xffff : ~0u;
|
||||
ArgInfo.WorkGroupIDY = ArgDescriptor::createRegister(Reg, Mask);
|
||||
if (!HasArchitectedSGPRs)
|
||||
NumSystemSGPRs += 1;
|
||||
|
||||
Register addWorkGroupIDY() {
|
||||
ArgInfo.WorkGroupIDY = ArgDescriptor::createRegister(getNextSystemSGPR());
|
||||
NumSystemSGPRs += 1;
|
||||
return ArgInfo.WorkGroupIDY.getRegister();
|
||||
}
|
||||
|
||||
Register addWorkGroupIDZ(bool HasArchitectedSGPRs) {
|
||||
Register Reg =
|
||||
HasArchitectedSGPRs ? (MCPhysReg)AMDGPU::TTMP7 : getNextSystemSGPR();
|
||||
unsigned Mask = HasArchitectedSGPRs ? 0xffff << 16 : ~0u;
|
||||
ArgInfo.WorkGroupIDZ = ArgDescriptor::createRegister(Reg, Mask);
|
||||
if (!HasArchitectedSGPRs)
|
||||
NumSystemSGPRs += 1;
|
||||
|
||||
Register addWorkGroupIDZ() {
|
||||
ArgInfo.WorkGroupIDZ = ArgDescriptor::createRegister(getNextSystemSGPR());
|
||||
NumSystemSGPRs += 1;
|
||||
return ArgInfo.WorkGroupIDZ.getRegister();
|
||||
}
|
||||
|
||||
|
@ -55,7 +55,6 @@ define amdgpu_kernel void @indirect_call_known_no_special_inputs() {
|
||||
; GFX12-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
|
||||
; GFX12-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v4, 0
|
||||
; GFX12-NEXT: v_mov_b32_e32 v31, v0
|
||||
; GFX12-NEXT: s_mov_b32 s12, ttmp9
|
||||
; GFX12-NEXT: s_mov_b64 s[8:9], 0
|
||||
; GFX12-NEXT: s_mov_b32 s32, 0
|
||||
; GFX12-NEXT: s_wait_kmcnt 0x0
|
||||
|
295
llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-hsa.ll
Normal file
295
llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-hsa.ll
Normal file
@ -0,0 +1,295 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -mtriple=amdgcn-amd-hsa -mcpu=gfx900 -mattr=-architected-sgprs -global-isel=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9,GFX9-SDAG %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-hsa -mcpu=gfx900 -mattr=-architected-sgprs -global-isel=1 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9,GFX9-GISEL %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-hsa -mcpu=gfx900 -mattr=+architected-sgprs -global-isel=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9ARCH,GFX9ARCH-SDAG %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-hsa -mcpu=gfx900 -mattr=+architected-sgprs -global-isel=1 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9ARCH,GFX9ARCH-GISEL %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 -global-isel=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12,GFX12-SDAG %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 -global-isel=1 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12,GFX12-GISEL %s
|
||||
|
||||
define amdgpu_kernel void @workgroup_ids_kernel() {
|
||||
; GFX9-LABEL: workgroup_ids_kernel:
|
||||
; GFX9: ; %bb.0: ; %.entry
|
||||
; GFX9-NEXT: v_mov_b32_e32 v0, s0
|
||||
; GFX9-NEXT: v_mov_b32_e32 v1, s1
|
||||
; GFX9-NEXT: v_mov_b32_e32 v2, s2
|
||||
; GFX9-NEXT: buffer_store_dwordx3 v[0:2], off, s[0:3], 0
|
||||
; GFX9-NEXT: s_endpgm
|
||||
;
|
||||
; GFX9ARCH-SDAG-LABEL: workgroup_ids_kernel:
|
||||
; GFX9ARCH-SDAG: ; %bb.0: ; %.entry
|
||||
; GFX9ARCH-SDAG-NEXT: s_lshr_b32 s0, ttmp7, 16
|
||||
; GFX9ARCH-SDAG-NEXT: s_and_b32 s1, ttmp7, 0xffff
|
||||
; GFX9ARCH-SDAG-NEXT: v_mov_b32_e32 v0, ttmp9
|
||||
; GFX9ARCH-SDAG-NEXT: v_mov_b32_e32 v1, s1
|
||||
; GFX9ARCH-SDAG-NEXT: v_mov_b32_e32 v2, s0
|
||||
; GFX9ARCH-SDAG-NEXT: buffer_store_dwordx3 v[0:2], off, s[0:3], 0
|
||||
; GFX9ARCH-SDAG-NEXT: s_endpgm
|
||||
;
|
||||
; GFX9ARCH-GISEL-LABEL: workgroup_ids_kernel:
|
||||
; GFX9ARCH-GISEL: ; %bb.0: ; %.entry
|
||||
; GFX9ARCH-GISEL-NEXT: s_mov_b32 s0, ttmp9
|
||||
; GFX9ARCH-GISEL-NEXT: s_and_b32 s1, ttmp7, 0xffff
|
||||
; GFX9ARCH-GISEL-NEXT: s_lshr_b32 s2, ttmp7, 16
|
||||
; GFX9ARCH-GISEL-NEXT: v_mov_b32_e32 v0, s0
|
||||
; GFX9ARCH-GISEL-NEXT: v_mov_b32_e32 v1, s1
|
||||
; GFX9ARCH-GISEL-NEXT: v_mov_b32_e32 v2, s2
|
||||
; GFX9ARCH-GISEL-NEXT: buffer_store_dwordx3 v[0:2], off, s[0:3], 0
|
||||
; GFX9ARCH-GISEL-NEXT: s_endpgm
|
||||
;
|
||||
; GFX12-SDAG-LABEL: workgroup_ids_kernel:
|
||||
; GFX12-SDAG: ; %bb.0: ; %.entry
|
||||
; GFX12-SDAG-NEXT: s_and_b32 s0, ttmp7, 0xffff
|
||||
; GFX12-SDAG-NEXT: s_lshr_b32 s1, ttmp7, 16
|
||||
; GFX12-SDAG-NEXT: v_dual_mov_b32 v0, ttmp9 :: v_dual_mov_b32 v1, s0
|
||||
; GFX12-SDAG-NEXT: v_mov_b32_e32 v2, s1
|
||||
; GFX12-SDAG-NEXT: buffer_store_b96 v[0:2], off, s[0:3], null
|
||||
; GFX12-SDAG-NEXT: s_nop 0
|
||||
; GFX12-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
|
||||
; GFX12-SDAG-NEXT: s_endpgm
|
||||
;
|
||||
; GFX12-GISEL-LABEL: workgroup_ids_kernel:
|
||||
; GFX12-GISEL: ; %bb.0: ; %.entry
|
||||
; GFX12-GISEL-NEXT: s_mov_b32 s0, ttmp9
|
||||
; GFX12-GISEL-NEXT: s_and_b32 s1, ttmp7, 0xffff
|
||||
; GFX12-GISEL-NEXT: s_lshr_b32 s2, ttmp7, 16
|
||||
; GFX12-GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
|
||||
; GFX12-GISEL-NEXT: v_mov_b32_e32 v2, s2
|
||||
; GFX12-GISEL-NEXT: buffer_store_b96 v[0:2], off, s[0:3], null
|
||||
; GFX12-GISEL-NEXT: s_nop 0
|
||||
; GFX12-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
|
||||
; GFX12-GISEL-NEXT: s_endpgm
|
||||
.entry:
|
||||
%idx = call i32 @llvm.amdgcn.workgroup.id.x()
|
||||
%idy = call i32 @llvm.amdgcn.workgroup.id.y()
|
||||
%idz = call i32 @llvm.amdgcn.workgroup.id.z()
|
||||
%ielemx = insertelement <3 x i32> undef, i32 %idx, i64 0
|
||||
%ielemy = insertelement <3 x i32> %ielemx, i32 %idy, i64 1
|
||||
%ielemz = insertelement <3 x i32> %ielemy, i32 %idz, i64 2
|
||||
call void @llvm.amdgcn.raw.ptr.buffer.store.v3i32(<3 x i32> %ielemz, ptr addrspace(8) undef, i32 0, i32 0, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @caller() {
|
||||
; GFX9-SDAG-LABEL: caller:
|
||||
; GFX9-SDAG: ; %bb.0:
|
||||
; GFX9-SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
|
||||
; GFX9-SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
|
||||
; GFX9-SDAG-NEXT: s_mov_b32 s38, -1
|
||||
; GFX9-SDAG-NEXT: s_mov_b32 s39, 0xe00000
|
||||
; GFX9-SDAG-NEXT: s_add_u32 s36, s36, s7
|
||||
; GFX9-SDAG-NEXT: s_addc_u32 s37, s37, 0
|
||||
; GFX9-SDAG-NEXT: s_add_u32 s8, s2, 36
|
||||
; GFX9-SDAG-NEXT: s_addc_u32 s9, s3, 0
|
||||
; GFX9-SDAG-NEXT: s_getpc_b64 s[2:3]
|
||||
; GFX9-SDAG-NEXT: s_add_u32 s2, s2, callee@gotpcrel32@lo+4
|
||||
; GFX9-SDAG-NEXT: s_addc_u32 s3, s3, callee@gotpcrel32@hi+12
|
||||
; GFX9-SDAG-NEXT: s_load_dwordx2 s[14:15], s[2:3], 0x0
|
||||
; GFX9-SDAG-NEXT: s_mov_b64 s[10:11], s[4:5]
|
||||
; GFX9-SDAG-NEXT: v_lshlrev_b32_e32 v2, 20, v2
|
||||
; GFX9-SDAG-NEXT: v_lshlrev_b32_e32 v1, 10, v1
|
||||
; GFX9-SDAG-NEXT: s_mov_b64 s[4:5], s[0:1]
|
||||
; GFX9-SDAG-NEXT: s_mov_b64 s[0:1], s[36:37]
|
||||
; GFX9-SDAG-NEXT: v_or3_b32 v31, v0, v1, v2
|
||||
; GFX9-SDAG-NEXT: s_mov_b32 s12, s6
|
||||
; GFX9-SDAG-NEXT: s_mov_b64 s[2:3], s[38:39]
|
||||
; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, s6
|
||||
; GFX9-SDAG-NEXT: s_mov_b32 s32, 0
|
||||
; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX9-SDAG-NEXT: s_swappc_b64 s[30:31], s[14:15]
|
||||
; GFX9-SDAG-NEXT: s_endpgm
|
||||
;
|
||||
; GFX9-GISEL-LABEL: caller:
|
||||
; GFX9-GISEL: ; %bb.0:
|
||||
; GFX9-GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
|
||||
; GFX9-GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
|
||||
; GFX9-GISEL-NEXT: s_mov_b32 s38, -1
|
||||
; GFX9-GISEL-NEXT: s_mov_b32 s39, 0xe00000
|
||||
; GFX9-GISEL-NEXT: s_add_u32 s36, s36, s7
|
||||
; GFX9-GISEL-NEXT: s_addc_u32 s37, s37, 0
|
||||
; GFX9-GISEL-NEXT: s_add_u32 s8, s2, 36
|
||||
; GFX9-GISEL-NEXT: s_addc_u32 s9, s3, 0
|
||||
; GFX9-GISEL-NEXT: s_mov_b64 s[10:11], s[4:5]
|
||||
; GFX9-GISEL-NEXT: s_mov_b64 s[4:5], s[0:1]
|
||||
; GFX9-GISEL-NEXT: s_getpc_b64 s[0:1]
|
||||
; GFX9-GISEL-NEXT: s_add_u32 s0, s0, callee@gotpcrel32@lo+4
|
||||
; GFX9-GISEL-NEXT: s_addc_u32 s1, s1, callee@gotpcrel32@hi+12
|
||||
; GFX9-GISEL-NEXT: s_load_dwordx2 s[14:15], s[0:1], 0x0
|
||||
; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v1, 10, v1
|
||||
; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v2, 20, v2
|
||||
; GFX9-GISEL-NEXT: s_mov_b64 s[0:1], s[36:37]
|
||||
; GFX9-GISEL-NEXT: v_or3_b32 v31, v0, v1, v2
|
||||
; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s6
|
||||
; GFX9-GISEL-NEXT: s_mov_b64 s[2:3], s[38:39]
|
||||
; GFX9-GISEL-NEXT: s_mov_b32 s12, s6
|
||||
; GFX9-GISEL-NEXT: s_mov_b32 s32, 0
|
||||
; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX9-GISEL-NEXT: s_swappc_b64 s[30:31], s[14:15]
|
||||
; GFX9-GISEL-NEXT: s_endpgm
|
||||
;
|
||||
; GFX9ARCH-SDAG-LABEL: caller:
|
||||
; GFX9ARCH-SDAG: ; %bb.0:
|
||||
; GFX9ARCH-SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
|
||||
; GFX9ARCH-SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
|
||||
; GFX9ARCH-SDAG-NEXT: s_mov_b32 s38, -1
|
||||
; GFX9ARCH-SDAG-NEXT: s_mov_b32 s39, 0xe00000
|
||||
; GFX9ARCH-SDAG-NEXT: s_add_u32 s36, s36, s6
|
||||
; GFX9ARCH-SDAG-NEXT: s_addc_u32 s37, s37, 0
|
||||
; GFX9ARCH-SDAG-NEXT: s_add_u32 s8, s2, 36
|
||||
; GFX9ARCH-SDAG-NEXT: s_addc_u32 s9, s3, 0
|
||||
; GFX9ARCH-SDAG-NEXT: s_getpc_b64 s[2:3]
|
||||
; GFX9ARCH-SDAG-NEXT: s_add_u32 s2, s2, callee@gotpcrel32@lo+4
|
||||
; GFX9ARCH-SDAG-NEXT: s_addc_u32 s3, s3, callee@gotpcrel32@hi+12
|
||||
; GFX9ARCH-SDAG-NEXT: s_load_dwordx2 s[6:7], s[2:3], 0x0
|
||||
; GFX9ARCH-SDAG-NEXT: s_mov_b64 s[10:11], s[4:5]
|
||||
; GFX9ARCH-SDAG-NEXT: v_lshlrev_b32_e32 v2, 20, v2
|
||||
; GFX9ARCH-SDAG-NEXT: v_lshlrev_b32_e32 v1, 10, v1
|
||||
; GFX9ARCH-SDAG-NEXT: s_mov_b64 s[4:5], s[0:1]
|
||||
; GFX9ARCH-SDAG-NEXT: s_mov_b64 s[0:1], s[36:37]
|
||||
; GFX9ARCH-SDAG-NEXT: v_or3_b32 v31, v0, v1, v2
|
||||
; GFX9ARCH-SDAG-NEXT: s_mov_b64 s[2:3], s[38:39]
|
||||
; GFX9ARCH-SDAG-NEXT: v_mov_b32_e32 v0, ttmp9
|
||||
; GFX9ARCH-SDAG-NEXT: s_mov_b32 s32, 0
|
||||
; GFX9ARCH-SDAG-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX9ARCH-SDAG-NEXT: s_swappc_b64 s[30:31], s[6:7]
|
||||
; GFX9ARCH-SDAG-NEXT: s_endpgm
|
||||
;
|
||||
; GFX9ARCH-GISEL-LABEL: caller:
|
||||
; GFX9ARCH-GISEL: ; %bb.0:
|
||||
; GFX9ARCH-GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
|
||||
; GFX9ARCH-GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
|
||||
; GFX9ARCH-GISEL-NEXT: s_mov_b32 s38, -1
|
||||
; GFX9ARCH-GISEL-NEXT: s_mov_b32 s39, 0xe00000
|
||||
; GFX9ARCH-GISEL-NEXT: s_add_u32 s36, s36, s6
|
||||
; GFX9ARCH-GISEL-NEXT: s_addc_u32 s37, s37, 0
|
||||
; GFX9ARCH-GISEL-NEXT: s_add_u32 s8, s2, 36
|
||||
; GFX9ARCH-GISEL-NEXT: s_addc_u32 s9, s3, 0
|
||||
; GFX9ARCH-GISEL-NEXT: s_mov_b64 s[10:11], s[4:5]
|
||||
; GFX9ARCH-GISEL-NEXT: s_mov_b64 s[4:5], s[0:1]
|
||||
; GFX9ARCH-GISEL-NEXT: s_getpc_b64 s[0:1]
|
||||
; GFX9ARCH-GISEL-NEXT: s_add_u32 s0, s0, callee@gotpcrel32@lo+4
|
||||
; GFX9ARCH-GISEL-NEXT: s_addc_u32 s1, s1, callee@gotpcrel32@hi+12
|
||||
; GFX9ARCH-GISEL-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
|
||||
; GFX9ARCH-GISEL-NEXT: v_lshlrev_b32_e32 v1, 10, v1
|
||||
; GFX9ARCH-GISEL-NEXT: v_lshlrev_b32_e32 v2, 20, v2
|
||||
; GFX9ARCH-GISEL-NEXT: s_mov_b64 s[0:1], s[36:37]
|
||||
; GFX9ARCH-GISEL-NEXT: v_or3_b32 v31, v0, v1, v2
|
||||
; GFX9ARCH-GISEL-NEXT: v_mov_b32_e32 v0, ttmp9
|
||||
; GFX9ARCH-GISEL-NEXT: s_mov_b64 s[2:3], s[38:39]
|
||||
; GFX9ARCH-GISEL-NEXT: s_mov_b32 s32, 0
|
||||
; GFX9ARCH-GISEL-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX9ARCH-GISEL-NEXT: s_swappc_b64 s[30:31], s[6:7]
|
||||
; GFX9ARCH-GISEL-NEXT: s_endpgm
|
||||
;
|
||||
; GFX12-SDAG-LABEL: caller:
|
||||
; GFX12-SDAG: ; %bb.0:
|
||||
; GFX12-SDAG-NEXT: v_dual_mov_b32 v31, v0 :: v_dual_mov_b32 v0, ttmp9
|
||||
; GFX12-SDAG-NEXT: s_mov_b64 s[10:11], s[4:5]
|
||||
; GFX12-SDAG-NEXT: s_mov_b32 s7, callee@abs32@hi
|
||||
; GFX12-SDAG-NEXT: s_mov_b32 s6, callee@abs32@lo
|
||||
; GFX12-SDAG-NEXT: s_mov_b64 s[4:5], s[0:1]
|
||||
; GFX12-SDAG-NEXT: s_mov_b64 s[8:9], s[2:3]
|
||||
; GFX12-SDAG-NEXT: s_mov_b32 s32, 0
|
||||
; GFX12-SDAG-NEXT: s_swappc_b64 s[30:31], s[6:7]
|
||||
; GFX12-SDAG-NEXT: s_endpgm
|
||||
;
|
||||
; GFX12-GISEL-LABEL: caller:
|
||||
; GFX12-GISEL: ; %bb.0:
|
||||
; GFX12-GISEL-NEXT: v_dual_mov_b32 v31, v0 :: v_dual_mov_b32 v0, ttmp9
|
||||
; GFX12-GISEL-NEXT: s_mov_b64 s[10:11], s[4:5]
|
||||
; GFX12-GISEL-NEXT: s_mov_b32 s6, callee@abs32@lo
|
||||
; GFX12-GISEL-NEXT: s_mov_b32 s7, callee@abs32@hi
|
||||
; GFX12-GISEL-NEXT: s_mov_b64 s[4:5], s[0:1]
|
||||
; GFX12-GISEL-NEXT: s_mov_b64 s[8:9], s[2:3]
|
||||
; GFX12-GISEL-NEXT: s_mov_b32 s32, 0
|
||||
; GFX12-GISEL-NEXT: s_swappc_b64 s[30:31], s[6:7]
|
||||
; GFX12-GISEL-NEXT: s_endpgm
|
||||
%idx = call i32 @llvm.amdgcn.workgroup.id.x()
|
||||
call void @callee(i32 %idx) #0
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @callee(i32) #0
|
||||
|
||||
define void @workgroup_ids_device_func(ptr addrspace(1) %outx, ptr addrspace(1) %outy, ptr addrspace(1) %outz) {
|
||||
; GFX9-LABEL: workgroup_ids_device_func:
|
||||
; GFX9: ; %bb.0:
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX9-NEXT: v_mov_b32_e32 v6, s12
|
||||
; GFX9-NEXT: global_store_dword v[0:1], v6, off
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-NEXT: v_mov_b32_e32 v0, s13
|
||||
; GFX9-NEXT: global_store_dword v[2:3], v0, off
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-NEXT: v_mov_b32_e32 v0, s14
|
||||
; GFX9-NEXT: global_store_dword v[4:5], v0, off
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX9ARCH-SDAG-LABEL: workgroup_ids_device_func:
|
||||
; GFX9ARCH-SDAG: ; %bb.0:
|
||||
; GFX9ARCH-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX9ARCH-SDAG-NEXT: v_mov_b32_e32 v6, ttmp9
|
||||
; GFX9ARCH-SDAG-NEXT: s_and_b32 s4, ttmp7, 0xffff
|
||||
; GFX9ARCH-SDAG-NEXT: global_store_dword v[0:1], v6, off
|
||||
; GFX9ARCH-SDAG-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9ARCH-SDAG-NEXT: v_mov_b32_e32 v0, s4
|
||||
; GFX9ARCH-SDAG-NEXT: s_lshr_b32 s4, ttmp7, 16
|
||||
; GFX9ARCH-SDAG-NEXT: global_store_dword v[2:3], v0, off
|
||||
; GFX9ARCH-SDAG-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9ARCH-SDAG-NEXT: v_mov_b32_e32 v0, s4
|
||||
; GFX9ARCH-SDAG-NEXT: global_store_dword v[4:5], v0, off
|
||||
; GFX9ARCH-SDAG-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9ARCH-SDAG-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX9ARCH-GISEL-LABEL: workgroup_ids_device_func:
|
||||
; GFX9ARCH-GISEL: ; %bb.0:
|
||||
; GFX9ARCH-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX9ARCH-GISEL-NEXT: v_mov_b32_e32 v6, ttmp9
|
||||
; GFX9ARCH-GISEL-NEXT: s_and_b32 s4, ttmp7, 0xffff
|
||||
; GFX9ARCH-GISEL-NEXT: s_lshr_b32 s5, ttmp7, 16
|
||||
; GFX9ARCH-GISEL-NEXT: global_store_dword v[0:1], v6, off
|
||||
; GFX9ARCH-GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9ARCH-GISEL-NEXT: v_mov_b32_e32 v0, s4
|
||||
; GFX9ARCH-GISEL-NEXT: global_store_dword v[2:3], v0, off
|
||||
; GFX9ARCH-GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9ARCH-GISEL-NEXT: v_mov_b32_e32 v0, s5
|
||||
; GFX9ARCH-GISEL-NEXT: global_store_dword v[4:5], v0, off
|
||||
; GFX9ARCH-GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9ARCH-GISEL-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX12-LABEL: workgroup_ids_device_func:
|
||||
; GFX12: ; %bb.0:
|
||||
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
; GFX12-NEXT: s_wait_expcnt 0x0
|
||||
; GFX12-NEXT: s_wait_samplecnt 0x0
|
||||
; GFX12-NEXT: s_wait_bvhcnt 0x0
|
||||
; GFX12-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX12-NEXT: s_and_b32 s0, ttmp7, 0xffff
|
||||
; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
|
||||
; GFX12-NEXT: v_dual_mov_b32 v6, ttmp9 :: v_dual_mov_b32 v7, s0
|
||||
; GFX12-NEXT: s_lshr_b32 s1, ttmp7, 16
|
||||
; GFX12-NEXT: v_mov_b32_e32 v8, s1
|
||||
; GFX12-NEXT: global_store_b32 v[0:1], v6, off scope:SCOPE_SYS
|
||||
; GFX12-NEXT: s_wait_storecnt 0x0
|
||||
; GFX12-NEXT: global_store_b32 v[2:3], v7, off scope:SCOPE_SYS
|
||||
; GFX12-NEXT: s_wait_storecnt 0x0
|
||||
; GFX12-NEXT: global_store_b32 v[4:5], v8, off scope:SCOPE_SYS
|
||||
; GFX12-NEXT: s_wait_storecnt 0x0
|
||||
; GFX12-NEXT: s_setpc_b64 s[30:31]
|
||||
%id.x = call i32 @llvm.amdgcn.workgroup.id.x()
|
||||
%id.y = call i32 @llvm.amdgcn.workgroup.id.y()
|
||||
%id.z = call i32 @llvm.amdgcn.workgroup.id.z()
|
||||
store volatile i32 %id.x, ptr addrspace(1) %outx
|
||||
store volatile i32 %id.y, ptr addrspace(1) %outy
|
||||
store volatile i32 %id.z, ptr addrspace(1) %outz
|
||||
ret void
|
||||
}
|
||||
|
||||
declare i32 @llvm.amdgcn.workgroup.id.x()
|
||||
declare i32 @llvm.amdgcn.workgroup.id.y()
|
||||
declare i32 @llvm.amdgcn.workgroup.id.z()
|
||||
declare void @llvm.amdgcn.raw.ptr.buffer.store.v3i32(<3 x i32>, ptr addrspace(8), i32, i32, i32 immarg)
|
||||
|
||||
attributes #0 = { nounwind "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" }
|
||||
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
|
||||
; GFX9ARCH: {{.*}}
|
187
llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-pal.ll
Normal file
187
llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-pal.ll
Normal file
@ -0,0 +1,187 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -mtriple=amdgcn-amd-hsa -mcpu=gfx900 -mattr=-architected-sgprs -global-isel=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9,GFX9-SDAG %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-hsa -mcpu=gfx900 -mattr=-architected-sgprs -global-isel=1 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9,GFX9-GISEL %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-hsa -mcpu=gfx900 -mattr=+architected-sgprs -global-isel=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9ARCH,GFX9ARCH-SDAG %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-hsa -mcpu=gfx900 -mattr=+architected-sgprs -global-isel=1 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9ARCH,GFX9ARCH-GISEL %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 -global-isel=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12,GFX12-SDAG %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 -global-isel=1 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12,GFX12-GISEL %s
|
||||
|
||||
define amdgpu_cs void @_amdgpu_cs_main() {
|
||||
; GFX9-LABEL: _amdgpu_cs_main:
|
||||
; GFX9: ; %bb.0: ; %.entry
|
||||
; GFX9-NEXT: buffer_store_dwordx3 v[0:2], off, s[0:3], 0
|
||||
; GFX9-NEXT: s_endpgm
|
||||
;
|
||||
; GFX9ARCH-SDAG-LABEL: _amdgpu_cs_main:
|
||||
; GFX9ARCH-SDAG: ; %bb.0: ; %.entry
|
||||
; GFX9ARCH-SDAG-NEXT: s_lshr_b32 s0, ttmp7, 16
|
||||
; GFX9ARCH-SDAG-NEXT: s_and_b32 s1, ttmp7, 0xffff
|
||||
; GFX9ARCH-SDAG-NEXT: v_mov_b32_e32 v0, ttmp9
|
||||
; GFX9ARCH-SDAG-NEXT: v_mov_b32_e32 v1, s1
|
||||
; GFX9ARCH-SDAG-NEXT: v_mov_b32_e32 v2, s0
|
||||
; GFX9ARCH-SDAG-NEXT: buffer_store_dwordx3 v[0:2], off, s[0:3], 0
|
||||
; GFX9ARCH-SDAG-NEXT: s_endpgm
|
||||
;
|
||||
; GFX9ARCH-GISEL-LABEL: _amdgpu_cs_main:
|
||||
; GFX9ARCH-GISEL: ; %bb.0: ; %.entry
|
||||
; GFX9ARCH-GISEL-NEXT: s_mov_b32 s0, ttmp9
|
||||
; GFX9ARCH-GISEL-NEXT: s_and_b32 s1, ttmp7, 0xffff
|
||||
; GFX9ARCH-GISEL-NEXT: s_lshr_b32 s2, ttmp7, 16
|
||||
; GFX9ARCH-GISEL-NEXT: v_mov_b32_e32 v0, s0
|
||||
; GFX9ARCH-GISEL-NEXT: v_mov_b32_e32 v1, s1
|
||||
; GFX9ARCH-GISEL-NEXT: v_mov_b32_e32 v2, s2
|
||||
; GFX9ARCH-GISEL-NEXT: buffer_store_dwordx3 v[0:2], off, s[0:3], 0
|
||||
; GFX9ARCH-GISEL-NEXT: s_endpgm
|
||||
;
|
||||
; GFX12-SDAG-LABEL: _amdgpu_cs_main:
|
||||
; GFX12-SDAG: ; %bb.0: ; %.entry
|
||||
; GFX12-SDAG-NEXT: s_and_b32 s0, ttmp7, 0xffff
|
||||
; GFX12-SDAG-NEXT: s_lshr_b32 s1, ttmp7, 16
|
||||
; GFX12-SDAG-NEXT: v_dual_mov_b32 v0, ttmp9 :: v_dual_mov_b32 v1, s0
|
||||
; GFX12-SDAG-NEXT: v_mov_b32_e32 v2, s1
|
||||
; GFX12-SDAG-NEXT: buffer_store_b96 v[0:2], off, s[0:3], null
|
||||
; GFX12-SDAG-NEXT: s_nop 0
|
||||
; GFX12-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
|
||||
; GFX12-SDAG-NEXT: s_endpgm
|
||||
;
|
||||
; GFX12-GISEL-LABEL: _amdgpu_cs_main:
|
||||
; GFX12-GISEL: ; %bb.0: ; %.entry
|
||||
; GFX12-GISEL-NEXT: s_mov_b32 s0, ttmp9
|
||||
; GFX12-GISEL-NEXT: s_and_b32 s1, ttmp7, 0xffff
|
||||
; GFX12-GISEL-NEXT: s_lshr_b32 s2, ttmp7, 16
|
||||
; GFX12-GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
|
||||
; GFX12-GISEL-NEXT: v_mov_b32_e32 v2, s2
|
||||
; GFX12-GISEL-NEXT: buffer_store_b96 v[0:2], off, s[0:3], null
|
||||
; GFX12-GISEL-NEXT: s_nop 0
|
||||
; GFX12-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
|
||||
; GFX12-GISEL-NEXT: s_endpgm
|
||||
.entry:
|
||||
%idx = call i32 @llvm.amdgcn.workgroup.id.x()
|
||||
%idy = call i32 @llvm.amdgcn.workgroup.id.y()
|
||||
%idz = call i32 @llvm.amdgcn.workgroup.id.z()
|
||||
%ielemx = insertelement <3 x i32> undef, i32 %idx, i64 0
|
||||
%ielemy = insertelement <3 x i32> %ielemx, i32 %idy, i64 1
|
||||
%ielemz = insertelement <3 x i32> %ielemy, i32 %idz, i64 2
|
||||
call void @llvm.amdgcn.raw.ptr.buffer.store.v3i32(<3 x i32> %ielemz, ptr addrspace(8) undef, i32 0, i32 0, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_cs void @caller() {
|
||||
; GFX9-LABEL: caller:
|
||||
; GFX9: ; %bb.0:
|
||||
; GFX9-NEXT: s_mov_b32 s8, SCRATCH_RSRC_DWORD0
|
||||
; GFX9-NEXT: s_mov_b32 s9, SCRATCH_RSRC_DWORD1
|
||||
; GFX9-NEXT: s_mov_b32 s10, -1
|
||||
; GFX9-NEXT: s_mov_b32 s11, 0xe00000
|
||||
; GFX9-NEXT: s_add_u32 s8, s8, s0
|
||||
; GFX9-NEXT: s_addc_u32 s9, s9, 0
|
||||
; GFX9-NEXT: s_getpc_b64 s[0:1]
|
||||
; GFX9-NEXT: s_add_u32 s0, s0, callee@gotpcrel32@lo+4
|
||||
; GFX9-NEXT: s_addc_u32 s1, s1, callee@gotpcrel32@hi+12
|
||||
; GFX9-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
|
||||
; GFX9-NEXT: s_mov_b64 s[0:1], s[8:9]
|
||||
; GFX9-NEXT: s_mov_b64 s[2:3], s[10:11]
|
||||
; GFX9-NEXT: s_mov_b32 s32, 0
|
||||
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
|
||||
; GFX9-NEXT: s_endpgm
|
||||
;
|
||||
; GFX9ARCH-SDAG-LABEL: caller:
|
||||
; GFX9ARCH-SDAG: ; %bb.0:
|
||||
; GFX9ARCH-SDAG-NEXT: s_mov_b32 s8, SCRATCH_RSRC_DWORD0
|
||||
; GFX9ARCH-SDAG-NEXT: s_mov_b32 s9, SCRATCH_RSRC_DWORD1
|
||||
; GFX9ARCH-SDAG-NEXT: s_mov_b32 s10, -1
|
||||
; GFX9ARCH-SDAG-NEXT: s_mov_b32 s11, 0xe00000
|
||||
; GFX9ARCH-SDAG-NEXT: s_add_u32 s8, s8, s0
|
||||
; GFX9ARCH-SDAG-NEXT: s_addc_u32 s9, s9, 0
|
||||
; GFX9ARCH-SDAG-NEXT: s_getpc_b64 s[0:1]
|
||||
; GFX9ARCH-SDAG-NEXT: s_add_u32 s0, s0, callee@gotpcrel32@lo+4
|
||||
; GFX9ARCH-SDAG-NEXT: s_addc_u32 s1, s1, callee@gotpcrel32@hi+12
|
||||
; GFX9ARCH-SDAG-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
|
||||
; GFX9ARCH-SDAG-NEXT: s_mov_b64 s[0:1], s[8:9]
|
||||
; GFX9ARCH-SDAG-NEXT: s_mov_b64 s[2:3], s[10:11]
|
||||
; GFX9ARCH-SDAG-NEXT: v_mov_b32_e32 v0, ttmp9
|
||||
; GFX9ARCH-SDAG-NEXT: s_mov_b32 s32, 0
|
||||
; GFX9ARCH-SDAG-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX9ARCH-SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5]
|
||||
; GFX9ARCH-SDAG-NEXT: s_endpgm
|
||||
;
|
||||
; GFX9ARCH-GISEL-LABEL: caller:
|
||||
; GFX9ARCH-GISEL: ; %bb.0:
|
||||
; GFX9ARCH-GISEL-NEXT: s_mov_b32 s8, SCRATCH_RSRC_DWORD0
|
||||
; GFX9ARCH-GISEL-NEXT: s_mov_b32 s9, SCRATCH_RSRC_DWORD1
|
||||
; GFX9ARCH-GISEL-NEXT: s_mov_b32 s10, -1
|
||||
; GFX9ARCH-GISEL-NEXT: s_mov_b32 s11, 0xe00000
|
||||
; GFX9ARCH-GISEL-NEXT: s_add_u32 s8, s8, s0
|
||||
; GFX9ARCH-GISEL-NEXT: s_addc_u32 s9, s9, 0
|
||||
; GFX9ARCH-GISEL-NEXT: s_getpc_b64 s[0:1]
|
||||
; GFX9ARCH-GISEL-NEXT: s_add_u32 s0, s0, callee@gotpcrel32@lo+4
|
||||
; GFX9ARCH-GISEL-NEXT: s_addc_u32 s1, s1, callee@gotpcrel32@hi+12
|
||||
; GFX9ARCH-GISEL-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
|
||||
; GFX9ARCH-GISEL-NEXT: s_mov_b64 s[0:1], s[8:9]
|
||||
; GFX9ARCH-GISEL-NEXT: v_mov_b32_e32 v0, ttmp9
|
||||
; GFX9ARCH-GISEL-NEXT: s_mov_b64 s[2:3], s[10:11]
|
||||
; GFX9ARCH-GISEL-NEXT: s_mov_b32 s32, 0
|
||||
; GFX9ARCH-GISEL-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX9ARCH-GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5]
|
||||
; GFX9ARCH-GISEL-NEXT: s_endpgm
|
||||
;
|
||||
; GFX12-SDAG-LABEL: caller:
|
||||
; GFX12-SDAG: ; %bb.0:
|
||||
; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, ttmp9
|
||||
; GFX12-SDAG-NEXT: s_mov_b32 s1, callee@abs32@hi
|
||||
; GFX12-SDAG-NEXT: s_mov_b32 s0, callee@abs32@lo
|
||||
; GFX12-SDAG-NEXT: s_mov_b32 s32, 0
|
||||
; GFX12-SDAG-NEXT: s_swappc_b64 s[30:31], s[0:1]
|
||||
; GFX12-SDAG-NEXT: s_endpgm
|
||||
;
|
||||
; GFX12-GISEL-LABEL: caller:
|
||||
; GFX12-GISEL: ; %bb.0:
|
||||
; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, ttmp9
|
||||
; GFX12-GISEL-NEXT: s_mov_b32 s0, callee@abs32@lo
|
||||
; GFX12-GISEL-NEXT: s_mov_b32 s1, callee@abs32@hi
|
||||
; GFX12-GISEL-NEXT: s_mov_b32 s32, 0
|
||||
; GFX12-GISEL-NEXT: s_swappc_b64 s[30:31], s[0:1]
|
||||
; GFX12-GISEL-NEXT: s_endpgm
|
||||
%idx = call i32 @llvm.amdgcn.workgroup.id.x()
|
||||
call amdgpu_gfx void @callee(i32 %idx)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare amdgpu_gfx void @callee(i32)
|
||||
|
||||
define amdgpu_gfx void @workgroup_ids_gfx(ptr addrspace(1) %outx, ptr addrspace(1) %outy, ptr addrspace(1) %outz) {
|
||||
; GFX9-LABEL: workgroup_ids_gfx:
|
||||
; GFX9: ; %bb.0:
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX9ARCH-LABEL: workgroup_ids_gfx:
|
||||
; GFX9ARCH: ; %bb.0:
|
||||
; GFX9ARCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX9ARCH-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX12-LABEL: workgroup_ids_gfx:
|
||||
; GFX12: ; %bb.0:
|
||||
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
; GFX12-NEXT: s_wait_expcnt 0x0
|
||||
; GFX12-NEXT: s_wait_samplecnt 0x0
|
||||
; GFX12-NEXT: s_wait_bvhcnt 0x0
|
||||
; GFX12-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX12-NEXT: s_setpc_b64 s[30:31]
|
||||
%id.x = call i32 @llvm.amdgcn.workgroup.id.x()
|
||||
%id.y = call i32 @llvm.amdgcn.workgroup.id.y()
|
||||
%id.z = call i32 @llvm.amdgcn.workgroup.id.z()
|
||||
store volatile i32 %id.x, ptr addrspace(1) %outx
|
||||
store volatile i32 %id.y, ptr addrspace(1) %outy
|
||||
store volatile i32 %id.z, ptr addrspace(1) %outz
|
||||
ret void
|
||||
}
|
||||
|
||||
declare i32 @llvm.amdgcn.workgroup.id.x()
|
||||
declare i32 @llvm.amdgcn.workgroup.id.y()
|
||||
declare i32 @llvm.amdgcn.workgroup.id.z()
|
||||
declare void @llvm.amdgcn.raw.ptr.buffer.store.v3i32(<3 x i32>, ptr addrspace(8), i32, i32, i32 immarg)
|
||||
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
|
||||
; GFX9-GISEL: {{.*}}
|
||||
; GFX9-SDAG: {{.*}}
|
@ -1,128 +0,0 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=+architected-sgprs -global-isel=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9,GFX9-SDAG %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=+architected-sgprs -global-isel=1 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9,GFX9-GISEL %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 -global-isel=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12,GFX12-SDAG %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 -global-isel=1 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12,GFX12-GISEL %s
|
||||
|
||||
define amdgpu_cs void @_amdgpu_cs_main() {
|
||||
; GFX9-SDAG-LABEL: _amdgpu_cs_main:
|
||||
; GFX9-SDAG: ; %bb.0: ; %.entry
|
||||
; GFX9-SDAG-NEXT: s_lshr_b32 s2, ttmp7, 16
|
||||
; GFX9-SDAG-NEXT: s_and_b32 s1, ttmp7, 0xffff
|
||||
; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, ttmp9
|
||||
; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, s1
|
||||
; GFX9-SDAG-NEXT: v_mov_b32_e32 v2, s2
|
||||
; GFX9-SDAG-NEXT: buffer_store_dwordx3 v[0:2], off, s[0:3], 0
|
||||
; GFX9-SDAG-NEXT: s_endpgm
|
||||
;
|
||||
; GFX9-GISEL-LABEL: _amdgpu_cs_main:
|
||||
; GFX9-GISEL: ; %bb.0: ; %.entry
|
||||
; GFX9-GISEL-NEXT: s_mov_b32 s0, ttmp9
|
||||
; GFX9-GISEL-NEXT: s_and_b32 s1, ttmp7, 0xffff
|
||||
; GFX9-GISEL-NEXT: s_lshr_b32 s2, ttmp7, 16
|
||||
; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s0
|
||||
; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s1
|
||||
; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, s2
|
||||
; GFX9-GISEL-NEXT: buffer_store_dwordx3 v[0:2], off, s[0:3], 0
|
||||
; GFX9-GISEL-NEXT: s_endpgm
|
||||
;
|
||||
; GFX12-SDAG-LABEL: _amdgpu_cs_main:
|
||||
; GFX12-SDAG: ; %bb.0: ; %.entry
|
||||
; GFX12-SDAG-NEXT: s_lshr_b32 s2, ttmp7, 16
|
||||
; GFX12-SDAG-NEXT: s_and_b32 s1, ttmp7, 0xffff
|
||||
; GFX12-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
|
||||
; GFX12-SDAG-NEXT: v_dual_mov_b32 v0, ttmp9 :: v_dual_mov_b32 v1, s1
|
||||
; GFX12-SDAG-NEXT: v_mov_b32_e32 v2, s2
|
||||
; GFX12-SDAG-NEXT: buffer_store_b96 v[0:2], off, s[0:3], null
|
||||
; GFX12-SDAG-NEXT: s_nop 0
|
||||
; GFX12-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
|
||||
; GFX12-SDAG-NEXT: s_endpgm
|
||||
;
|
||||
; GFX12-GISEL-LABEL: _amdgpu_cs_main:
|
||||
; GFX12-GISEL: ; %bb.0: ; %.entry
|
||||
; GFX12-GISEL-NEXT: s_mov_b32 s0, ttmp9
|
||||
; GFX12-GISEL-NEXT: s_and_b32 s1, ttmp7, 0xffff
|
||||
; GFX12-GISEL-NEXT: s_lshr_b32 s2, ttmp7, 16
|
||||
; GFX12-GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
|
||||
; GFX12-GISEL-NEXT: v_mov_b32_e32 v2, s2
|
||||
; GFX12-GISEL-NEXT: buffer_store_b96 v[0:2], off, s[0:3], null
|
||||
; GFX12-GISEL-NEXT: s_nop 0
|
||||
; GFX12-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
|
||||
; GFX12-GISEL-NEXT: s_endpgm
|
||||
.entry:
|
||||
%idx = call i32 @llvm.amdgcn.workgroup.id.x()
|
||||
%idy = call i32 @llvm.amdgcn.workgroup.id.y()
|
||||
%idz = call i32 @llvm.amdgcn.workgroup.id.z()
|
||||
%ielemx = insertelement <3 x i32> undef, i32 %idx, i64 0
|
||||
%ielemy = insertelement <3 x i32> %ielemx, i32 %idy, i64 1
|
||||
%ielemz = insertelement <3 x i32> %ielemy, i32 %idz, i64 2
|
||||
call void @llvm.amdgcn.raw.ptr.buffer.store.v3i32(<3 x i32> %ielemz, ptr addrspace(8) undef, i32 0, i32 0, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_cs void @caller() {
|
||||
; GFX9-SDAG-LABEL: caller:
|
||||
; GFX9-SDAG: ; %bb.0:
|
||||
; GFX9-SDAG-NEXT: s_getpc_b64 s[8:9]
|
||||
; GFX9-SDAG-NEXT: s_mov_b32 s8, s0
|
||||
; GFX9-SDAG-NEXT: s_load_dwordx4 s[8:11], s[8:9], 0x10
|
||||
; GFX9-SDAG-NEXT: s_mov_b32 s5, callee@abs32@hi
|
||||
; GFX9-SDAG-NEXT: s_mov_b32 s4, callee@abs32@lo
|
||||
; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, ttmp9
|
||||
; GFX9-SDAG-NEXT: s_mov_b32 s32, 0
|
||||
; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX9-SDAG-NEXT: s_add_u32 s8, s8, s0
|
||||
; GFX9-SDAG-NEXT: s_addc_u32 s9, s9, 0
|
||||
; GFX9-SDAG-NEXT: s_mov_b64 s[0:1], s[8:9]
|
||||
; GFX9-SDAG-NEXT: s_mov_b64 s[2:3], s[10:11]
|
||||
; GFX9-SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5]
|
||||
; GFX9-SDAG-NEXT: s_endpgm
|
||||
;
|
||||
; GFX9-GISEL-LABEL: caller:
|
||||
; GFX9-GISEL: ; %bb.0:
|
||||
; GFX9-GISEL-NEXT: s_getpc_b64 s[8:9]
|
||||
; GFX9-GISEL-NEXT: s_mov_b32 s8, s0
|
||||
; GFX9-GISEL-NEXT: s_load_dwordx4 s[8:11], s[8:9], 0x10
|
||||
; GFX9-GISEL-NEXT: s_mov_b32 s4, callee@abs32@lo
|
||||
; GFX9-GISEL-NEXT: s_mov_b32 s5, callee@abs32@hi
|
||||
; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, ttmp9
|
||||
; GFX9-GISEL-NEXT: s_mov_b32 s32, 0
|
||||
; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX9-GISEL-NEXT: s_add_u32 s8, s8, s0
|
||||
; GFX9-GISEL-NEXT: s_addc_u32 s9, s9, 0
|
||||
; GFX9-GISEL-NEXT: s_mov_b64 s[0:1], s[8:9]
|
||||
; GFX9-GISEL-NEXT: s_mov_b64 s[2:3], s[10:11]
|
||||
; GFX9-GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5]
|
||||
; GFX9-GISEL-NEXT: s_endpgm
|
||||
;
|
||||
; GFX12-SDAG-LABEL: caller:
|
||||
; GFX12-SDAG: ; %bb.0:
|
||||
; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, ttmp9
|
||||
; GFX12-SDAG-NEXT: s_mov_b32 s1, callee@abs32@hi
|
||||
; GFX12-SDAG-NEXT: s_mov_b32 s0, callee@abs32@lo
|
||||
; GFX12-SDAG-NEXT: s_mov_b32 s32, 0
|
||||
; GFX12-SDAG-NEXT: s_swappc_b64 s[30:31], s[0:1]
|
||||
; GFX12-SDAG-NEXT: s_endpgm
|
||||
;
|
||||
; GFX12-GISEL-LABEL: caller:
|
||||
; GFX12-GISEL: ; %bb.0:
|
||||
; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, ttmp9
|
||||
; GFX12-GISEL-NEXT: s_mov_b32 s0, callee@abs32@lo
|
||||
; GFX12-GISEL-NEXT: s_mov_b32 s1, callee@abs32@hi
|
||||
; GFX12-GISEL-NEXT: s_mov_b32 s32, 0
|
||||
; GFX12-GISEL-NEXT: s_swappc_b64 s[30:31], s[0:1]
|
||||
; GFX12-GISEL-NEXT: s_endpgm
|
||||
%idx = call i32 @llvm.amdgcn.workgroup.id.x()
|
||||
call amdgpu_gfx void @callee(i32 %idx)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare amdgpu_gfx void @callee(i32)
|
||||
|
||||
declare i32 @llvm.amdgcn.workgroup.id.x()
|
||||
declare i32 @llvm.amdgcn.workgroup.id.y()
|
||||
declare i32 @llvm.amdgcn.workgroup.id.z()
|
||||
declare void @llvm.amdgcn.raw.ptr.buffer.store.v3i32(<3 x i32>, ptr addrspace(8), i32, i32, i32 immarg)
|
||||
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
|
||||
; GFX12: {{.*}}
|
||||
; GFX9: {{.*}}
|
@ -5,43 +5,25 @@
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -global-isel=1 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12,GFX12-GISEL %s
|
||||
|
||||
define amdgpu_kernel void @workgroup_id_x(ptr addrspace(1) %ptrx) {
|
||||
; GFX9-SDAG-LABEL: workgroup_id_x:
|
||||
; GFX9-SDAG: ; %bb.0:
|
||||
; GFX9-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
|
||||
; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, ttmp9
|
||||
; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX9-SDAG-NEXT: global_store_dword v0, v1, s[0:1]
|
||||
; GFX9-SDAG-NEXT: s_endpgm
|
||||
;
|
||||
; GFX9-GISEL-LABEL: workgroup_id_x:
|
||||
; GFX9-GISEL: ; %bb.0:
|
||||
; GFX9-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
|
||||
; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, ttmp9
|
||||
; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, 0
|
||||
; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX9-GISEL-NEXT: global_store_dword v1, v0, s[0:1]
|
||||
; GFX9-GISEL-NEXT: s_endpgm
|
||||
; GFX9-LABEL: workgroup_id_x:
|
||||
; GFX9: ; %bb.0:
|
||||
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
|
||||
; GFX9-NEXT: v_mov_b32_e32 v0, ttmp9
|
||||
; GFX9-NEXT: v_mov_b32_e32 v1, 0
|
||||
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX9-NEXT: global_store_dword v1, v0, s[0:1]
|
||||
; GFX9-NEXT: s_endpgm
|
||||
;
|
||||
; GFX12-SDAG-LABEL: workgroup_id_x:
|
||||
; GFX12-SDAG: ; %bb.0:
|
||||
; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
|
||||
; GFX12-SDAG-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, ttmp9
|
||||
; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX12-SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
|
||||
; GFX12-SDAG-NEXT: s_nop 0
|
||||
; GFX12-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
|
||||
; GFX12-SDAG-NEXT: s_endpgm
|
||||
;
|
||||
; GFX12-GISEL-LABEL: workgroup_id_x:
|
||||
; GFX12-GISEL: ; %bb.0:
|
||||
; GFX12-GISEL-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
|
||||
; GFX12-GISEL-NEXT: v_dual_mov_b32 v0, ttmp9 :: v_dual_mov_b32 v1, 0
|
||||
; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX12-GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
|
||||
; GFX12-GISEL-NEXT: s_nop 0
|
||||
; GFX12-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
|
||||
; GFX12-GISEL-NEXT: s_endpgm
|
||||
; GFX12-LABEL: workgroup_id_x:
|
||||
; GFX12: ; %bb.0:
|
||||
; GFX12-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
|
||||
; GFX12-NEXT: v_dual_mov_b32 v0, ttmp9 :: v_dual_mov_b32 v1, 0
|
||||
; GFX12-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX12-NEXT: global_store_b32 v1, v0, s[0:1]
|
||||
; GFX12-NEXT: s_nop 0
|
||||
; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
|
||||
; GFX12-NEXT: s_endpgm
|
||||
%idx = call i32 @llvm.amdgcn.workgroup.id.x()
|
||||
store i32 %idx, ptr addrspace(1) %ptrx
|
||||
|
||||
@ -52,23 +34,23 @@ define amdgpu_kernel void @workgroup_id_xy(ptr addrspace(1) %ptrx, ptr addrspace
|
||||
; GFX9-LABEL: workgroup_id_xy:
|
||||
; GFX9: ; %bb.0:
|
||||
; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
|
||||
; GFX9-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GFX9-NEXT: v_mov_b32_e32 v1, ttmp9
|
||||
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
|
||||
; GFX9-NEXT: v_mov_b32_e32 v0, ttmp9
|
||||
; GFX9-NEXT: v_mov_b32_e32 v2, 0
|
||||
; GFX9-NEXT: v_mov_b32_e32 v1, ttmp7
|
||||
; GFX9-NEXT: global_store_dword v0, v1, s[2:3]
|
||||
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX9-NEXT: global_store_dword v2, v0, s[0:1]
|
||||
; GFX9-NEXT: global_store_dword v2, v1, s[2:3]
|
||||
; GFX9-NEXT: s_endpgm
|
||||
;
|
||||
; GFX12-LABEL: workgroup_id_xy:
|
||||
; GFX12: ; %bb.0:
|
||||
; GFX12-NEXT: s_load_b128 s[0:3], s[0:1], 0x0
|
||||
; GFX12-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, ttmp9
|
||||
; GFX12-NEXT: v_mov_b32_e32 v2, ttmp7
|
||||
; GFX12-NEXT: v_dual_mov_b32 v0, ttmp9 :: v_dual_mov_b32 v1, ttmp7
|
||||
; GFX12-NEXT: v_mov_b32_e32 v2, 0
|
||||
; GFX12-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX12-NEXT: s_clause 0x1
|
||||
; GFX12-NEXT: global_store_b32 v0, v1, s[0:1]
|
||||
; GFX12-NEXT: global_store_b32 v0, v2, s[2:3]
|
||||
; GFX12-NEXT: global_store_b32 v2, v0, s[0:1]
|
||||
; GFX12-NEXT: global_store_b32 v2, v1, s[2:3]
|
||||
; GFX12-NEXT: s_nop 0
|
||||
; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
|
||||
; GFX12-NEXT: s_endpgm
|
||||
@ -81,37 +63,21 @@ define amdgpu_kernel void @workgroup_id_xy(ptr addrspace(1) %ptrx, ptr addrspace
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @workgroup_id_xyz(ptr addrspace(1) %ptrx, ptr addrspace(1) %ptry, ptr addrspace(1) %ptrz) {
|
||||
; GFX9-SDAG-LABEL: workgroup_id_xyz:
|
||||
; GFX9-SDAG: ; %bb.0:
|
||||
; GFX9-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
|
||||
; GFX9-SDAG-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x10
|
||||
; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, ttmp9
|
||||
; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX9-SDAG-NEXT: global_store_dword v0, v1, s[0:1]
|
||||
; GFX9-SDAG-NEXT: s_and_b32 s0, ttmp7, 0xffff
|
||||
; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, s0
|
||||
; GFX9-SDAG-NEXT: s_lshr_b32 s0, ttmp7, 16
|
||||
; GFX9-SDAG-NEXT: global_store_dword v0, v1, s[2:3]
|
||||
; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, s0
|
||||
; GFX9-SDAG-NEXT: global_store_dword v0, v1, s[6:7]
|
||||
; GFX9-SDAG-NEXT: s_endpgm
|
||||
;
|
||||
; GFX9-GISEL-LABEL: workgroup_id_xyz:
|
||||
; GFX9-GISEL: ; %bb.0:
|
||||
; GFX9-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
|
||||
; GFX9-GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x10
|
||||
; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, ttmp9
|
||||
; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, 0
|
||||
; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX9-GISEL-NEXT: global_store_dword v1, v0, s[0:1]
|
||||
; GFX9-GISEL-NEXT: s_and_b32 s0, ttmp7, 0xffff
|
||||
; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s0
|
||||
; GFX9-GISEL-NEXT: s_lshr_b32 s0, ttmp7, 16
|
||||
; GFX9-GISEL-NEXT: global_store_dword v1, v0, s[2:3]
|
||||
; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s0
|
||||
; GFX9-GISEL-NEXT: global_store_dword v1, v0, s[6:7]
|
||||
; GFX9-GISEL-NEXT: s_endpgm
|
||||
; GFX9-LABEL: workgroup_id_xyz:
|
||||
; GFX9: ; %bb.0:
|
||||
; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
|
||||
; GFX9-NEXT: v_mov_b32_e32 v0, ttmp9
|
||||
; GFX9-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x10
|
||||
; GFX9-NEXT: v_mov_b32_e32 v1, 0
|
||||
; GFX9-NEXT: s_and_b32 s6, ttmp7, 0xffff
|
||||
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX9-NEXT: global_store_dword v1, v0, s[0:1]
|
||||
; GFX9-NEXT: v_mov_b32_e32 v0, s6
|
||||
; GFX9-NEXT: s_lshr_b32 s0, ttmp7, 16
|
||||
; GFX9-NEXT: global_store_dword v1, v0, s[2:3]
|
||||
; GFX9-NEXT: v_mov_b32_e32 v0, s0
|
||||
; GFX9-NEXT: global_store_dword v1, v0, s[4:5]
|
||||
; GFX9-NEXT: s_endpgm
|
||||
;
|
||||
; GFX12-LABEL: workgroup_id_xyz:
|
||||
; GFX12: ; %bb.0:
|
||||
@ -119,15 +85,15 @@ define amdgpu_kernel void @workgroup_id_xyz(ptr addrspace(1) %ptrx, ptr addrspac
|
||||
; GFX12-NEXT: s_load_b128 s[4:7], s[0:1], 0x0
|
||||
; GFX12-NEXT: s_load_b64 s[0:1], s[0:1], 0x10
|
||||
; GFX12-NEXT: s_and_b32 s2, ttmp7, 0xffff
|
||||
; GFX12-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, ttmp9
|
||||
; GFX12-NEXT: v_dual_mov_b32 v0, ttmp9 :: v_dual_mov_b32 v1, 0
|
||||
; GFX12-NEXT: s_lshr_b32 s3, ttmp7, 16
|
||||
; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
|
||||
; GFX12-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
|
||||
; GFX12-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX12-NEXT: s_clause 0x2
|
||||
; GFX12-NEXT: global_store_b32 v0, v1, s[4:5]
|
||||
; GFX12-NEXT: global_store_b32 v0, v2, s[6:7]
|
||||
; GFX12-NEXT: global_store_b32 v0, v3, s[0:1]
|
||||
; GFX12-NEXT: global_store_b32 v1, v0, s[4:5]
|
||||
; GFX12-NEXT: global_store_b32 v1, v2, s[6:7]
|
||||
; GFX12-NEXT: global_store_b32 v1, v3, s[0:1]
|
||||
; GFX12-NEXT: s_nop 0
|
||||
; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
|
||||
; GFX12-NEXT: s_endpgm
|
||||
@ -144,3 +110,8 @@ define amdgpu_kernel void @workgroup_id_xyz(ptr addrspace(1) %ptrx, ptr addrspac
|
||||
declare i32 @llvm.amdgcn.workgroup.id.x()
|
||||
declare i32 @llvm.amdgcn.workgroup.id.y()
|
||||
declare i32 @llvm.amdgcn.workgroup.id.z()
|
||||
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
|
||||
; GFX12-GISEL: {{.*}}
|
||||
; GFX12-SDAG: {{.*}}
|
||||
; GFX9-GISEL: {{.*}}
|
||||
; GFX9-SDAG: {{.*}}
|
||||
|
Loading…
Reference in New Issue
Block a user