mirror of
https://github.com/RPCS3/llvm.git
synced 2025-02-15 00:16:42 +00:00
AMDGPU: Activate all lanes when spilling CSR VGPR for SGPR spills
If some lanes weren't active on entry to the function, this could clobber their VGPR values. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@361655 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
bbaa274fa9
commit
332260473f
@ -523,22 +523,20 @@ void SIFrameLowering::emitEntryFunctionScratchSetup(const GCNSubtarget &ST,
|
||||
// but we would then have to make sure that we were in fact saving at least one
|
||||
// callee-save register in the prologue, which is additional complexity that
|
||||
// doesn't seem worth the benefit.
|
||||
static unsigned findScratchNonCalleeSaveRegister(MachineBasicBlock &MBB) {
|
||||
MachineFunction *MF = MBB.getParent();
|
||||
|
||||
const GCNSubtarget &Subtarget = MF->getSubtarget<GCNSubtarget>();
|
||||
static unsigned findScratchNonCalleeSaveRegister(MachineFunction &MF,
|
||||
LivePhysRegs &LiveRegs,
|
||||
const TargetRegisterClass &RC) {
|
||||
const GCNSubtarget &Subtarget = MF.getSubtarget<GCNSubtarget>();
|
||||
const SIRegisterInfo &TRI = *Subtarget.getRegisterInfo();
|
||||
LivePhysRegs LiveRegs(TRI);
|
||||
LiveRegs.addLiveIns(MBB);
|
||||
|
||||
// Mark callee saved registers as used so we will not choose them.
|
||||
const MCPhysReg *CSRegs = TRI.getCalleeSavedRegs(MF);
|
||||
const MCPhysReg *CSRegs = TRI.getCalleeSavedRegs(&MF);
|
||||
for (unsigned i = 0; CSRegs[i]; ++i)
|
||||
LiveRegs.addReg(CSRegs[i]);
|
||||
|
||||
MachineRegisterInfo &MRI = MF->getRegInfo();
|
||||
MachineRegisterInfo &MRI = MF.getRegInfo();
|
||||
|
||||
for (unsigned Reg : AMDGPU::SReg_32_XM0RegClass) {
|
||||
for (unsigned Reg : RC) {
|
||||
if (LiveRegs.available(MRI, Reg))
|
||||
return Reg;
|
||||
}
|
||||
@ -561,6 +559,7 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
|
||||
|
||||
unsigned StackPtrReg = FuncInfo->getStackPtrOffsetReg();
|
||||
unsigned FramePtrReg = FuncInfo->getFrameOffsetReg();
|
||||
LivePhysRegs LiveRegs;
|
||||
|
||||
MachineBasicBlock::iterator MBBI = MBB.begin();
|
||||
DebugLoc DL;
|
||||
@ -578,7 +577,12 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
|
||||
|
||||
RoundedSize += Alignment;
|
||||
|
||||
unsigned ScratchSPReg = findScratchNonCalleeSaveRegister(MBB);
|
||||
LiveRegs.init(TRI);
|
||||
LiveRegs.addLiveIns(MBB);
|
||||
|
||||
unsigned ScratchSPReg
|
||||
= findScratchNonCalleeSaveRegister(MF, LiveRegs,
|
||||
AMDGPU::SReg_32_XM0RegClass);
|
||||
assert(ScratchSPReg != AMDGPU::NoRegister);
|
||||
|
||||
// s_add_u32 tmp_reg, s32, NumBytes
|
||||
@ -609,13 +613,33 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
|
||||
.setMIFlag(MachineInstr::FrameSetup);
|
||||
}
|
||||
|
||||
for (const SIMachineFunctionInfo::SGPRSpillVGPRCSR &Reg
|
||||
: FuncInfo->getSGPRSpillVGPRs()) {
|
||||
if (!Reg.FI.hasValue())
|
||||
continue;
|
||||
TII->storeRegToStackSlot(MBB, MBBI, Reg.VGPR, true,
|
||||
Reg.FI.getValue(), &AMDGPU::VGPR_32RegClass,
|
||||
&TII->getRegisterInfo());
|
||||
if (!FuncInfo->getSGPRSpillVGPRs().empty()) {
|
||||
if (LiveRegs.empty()) {
|
||||
LiveRegs.init(TRI);
|
||||
LiveRegs.addLiveIns(MBB);
|
||||
}
|
||||
|
||||
// To avoid clobbering VGPRs in lanes that weren't active on function entry,
|
||||
// turn on all lanes before doing the spill to memory.
|
||||
unsigned ScratchExecCopy
|
||||
= findScratchNonCalleeSaveRegister(MF, LiveRegs,
|
||||
AMDGPU::SReg_64_XEXECRegClass);
|
||||
|
||||
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_OR_SAVEEXEC_B64), ScratchExecCopy)
|
||||
.addImm(-1);
|
||||
|
||||
for (const SIMachineFunctionInfo::SGPRSpillVGPRCSR &Reg
|
||||
: FuncInfo->getSGPRSpillVGPRs()) {
|
||||
if (!Reg.FI.hasValue())
|
||||
continue;
|
||||
TII->storeRegToStackSlot(MBB, MBBI, Reg.VGPR, true,
|
||||
Reg.FI.getValue(), &AMDGPU::VGPR_32RegClass,
|
||||
&TII->getRegisterInfo());
|
||||
}
|
||||
|
||||
// FIXME: Split block and make terminator.
|
||||
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_MOV_B64), AMDGPU::EXEC)
|
||||
.addReg(ScratchExecCopy);
|
||||
}
|
||||
}
|
||||
|
||||
@ -628,14 +652,32 @@ void SIFrameLowering::emitEpilogue(MachineFunction &MF,
|
||||
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
|
||||
const SIInstrInfo *TII = ST.getInstrInfo();
|
||||
MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
|
||||
DebugLoc DL;
|
||||
|
||||
for (const SIMachineFunctionInfo::SGPRSpillVGPRCSR &Reg
|
||||
: FuncInfo->getSGPRSpillVGPRs()) {
|
||||
if (!Reg.FI.hasValue())
|
||||
continue;
|
||||
TII->loadRegFromStackSlot(MBB, MBBI, Reg.VGPR,
|
||||
Reg.FI.getValue(), &AMDGPU::VGPR_32RegClass,
|
||||
&TII->getRegisterInfo());
|
||||
if (!FuncInfo->getSGPRSpillVGPRs().empty()) {
|
||||
// See emitPrologue
|
||||
LivePhysRegs LiveRegs(*ST.getRegisterInfo());
|
||||
LiveRegs.addLiveIns(MBB);
|
||||
|
||||
unsigned ScratchExecCopy
|
||||
= findScratchNonCalleeSaveRegister(MF, LiveRegs,
|
||||
AMDGPU::SReg_64_XEXECRegClass);
|
||||
|
||||
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_OR_SAVEEXEC_B64), ScratchExecCopy)
|
||||
.addImm(-1);
|
||||
|
||||
for (const SIMachineFunctionInfo::SGPRSpillVGPRCSR &Reg
|
||||
: FuncInfo->getSGPRSpillVGPRs()) {
|
||||
if (!Reg.FI.hasValue())
|
||||
continue;
|
||||
TII->loadRegFromStackSlot(MBB, MBBI, Reg.VGPR,
|
||||
Reg.FI.getValue(), &AMDGPU::VGPR_32RegClass,
|
||||
&TII->getRegisterInfo());
|
||||
}
|
||||
|
||||
// FIXME: Split block and make terminator.
|
||||
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_MOV_B64), AMDGPU::EXEC)
|
||||
.addReg(ScratchExecCopy);
|
||||
}
|
||||
|
||||
unsigned StackPtrReg = FuncInfo->getStackPtrOffsetReg();
|
||||
@ -645,8 +687,6 @@ void SIFrameLowering::emitEpilogue(MachineFunction &MF,
|
||||
const MachineFrameInfo &MFI = MF.getFrameInfo();
|
||||
uint32_t NumBytes = MFI.getStackSize();
|
||||
|
||||
DebugLoc DL;
|
||||
|
||||
// FIXME: Clarify distinction between no set SP and SP. For callee functions,
|
||||
// it's really whether we need SP to be accurate or not.
|
||||
|
||||
|
@ -30,11 +30,11 @@ entry:
|
||||
|
||||
; GCN-LABEL: {{^}}void_func_byval_struct_non_leaf:
|
||||
; GCN: s_mov_b32 s5, s32
|
||||
; GCN: s_add_u32 s32, s32, 0xc00{{$}}
|
||||
; GCN-DAG: buffer_store_dword v32
|
||||
; GCN-DAG: buffer_store_dword v33
|
||||
; GCN-NOT: v_writelane_b32 v{{[0-9]+}}, s32
|
||||
; GCN-DAG: v_writelane_b32
|
||||
; GCN-DAG: s_add_u32 s32, s32, 0xc00{{$}}
|
||||
; GCN-DAG: buffer_load_dword [[LOAD0:v[0-9]+]], off, s[0:3], s5 offset:4{{$}}
|
||||
; GCN-DAG: v_add_{{[iu]}}32_e32 [[ADD0:v[0-9]+]], vcc, 1, [[LOAD0]]
|
||||
; GCN-DAG: buffer_store_dword [[ADD0]], off, s[0:3], s5 offset:4{{$}}
|
||||
|
@ -38,8 +38,8 @@ define amdgpu_kernel void @test_kernel_call_external_void_func_void_clobber_s30_
|
||||
; GCN-NEXT: ;;#ASMSTART
|
||||
; GCN-NEXT: ;;#ASMEND
|
||||
; GCN-NEXT: s_swappc_b64
|
||||
; GCN-NEXT: s_mov_b32 s5, s33
|
||||
; GCN: v_readlane_b32 s37, v32, 4
|
||||
; GCN-DAG: s_mov_b32 s5, s33
|
||||
; GCN-DAG: v_readlane_b32 s37, v32, 4
|
||||
; GCN: v_readlane_b32 s36, v32, 3
|
||||
; GCN: v_readlane_b32 s35, v32, 2
|
||||
; GCN: v_readlane_b32 s34, v32, 1
|
||||
@ -59,7 +59,7 @@ define void @test_func_call_external_void_func_void_clobber_s30_s31_call_externa
|
||||
; GCN-NEXT: s_mov_b32 s5, s33
|
||||
; GCN-NEXT: s_mov_b32 s33, s5
|
||||
; GCN-NEXT: s_swappc_b64
|
||||
; GCN-NEXT: s_mov_b32 s5, s33
|
||||
; GCN: s_mov_b32 s5, s33
|
||||
define void @test_func_call_external_void_funcx2() #0 {
|
||||
call void @external_void_func_void()
|
||||
call void @external_void_func_void()
|
||||
@ -175,7 +175,7 @@ define amdgpu_kernel void @test_call_void_func_void_preserves_v32(i32 addrspace(
|
||||
; GCN-NEXT: ; clobber
|
||||
; GCN-NEXT: #ASMEND
|
||||
; GCN-NEXT: v_readlane_b32 s33, v0, 0
|
||||
; GCN-NEXT: s_setpc_b64
|
||||
; GCN: s_setpc_b64
|
||||
define hidden void @void_func_void_clobber_s33() #2 {
|
||||
call void asm sideeffect "; clobber", "~{s33}"() #0
|
||||
ret void
|
||||
|
@ -37,19 +37,19 @@ define void @callee_with_stack() #0 {
|
||||
; GCN: ; %bb.0:
|
||||
; GCN-NEXT: s_waitcnt
|
||||
; GCN: s_mov_b32 s5, s32
|
||||
; GCN-DAG: s_add_u32 s32, s32, 0x400{{$}}
|
||||
; GCN: buffer_store_dword v32, off, s[0:3], s5 offset:8
|
||||
|
||||
; GCN-DAG: v_writelane_b32 v32, s33,
|
||||
; GCN-DAG: v_writelane_b32 v32, s34,
|
||||
; GCN-DAG: v_writelane_b32 v32, s35,
|
||||
; GCN-DAG: s_add_u32 s32, s32, 0x400{{$}}
|
||||
; GCN-DAG: v_mov_b32_e32 v0, 0{{$}}
|
||||
; GCN-DAG: buffer_store_dword v0, off, s[0:3], s5 offset:4{{$}}
|
||||
; GCN-DAG: s_mov_b32 s33, s5
|
||||
|
||||
|
||||
; GCN: s_swappc_b64
|
||||
; GCN: s_mov_b32 s5, s33
|
||||
; GCN-DAG: s_mov_b32 s5, s33
|
||||
; GCN-DAG: v_readlane_b32 s35,
|
||||
; GCN-DAG: v_readlane_b32 s34,
|
||||
; GCN-DAG: v_readlane_b32 s33,
|
||||
@ -72,7 +72,9 @@ define void @callee_with_stack_and_call() #0 {
|
||||
; GCN-LABEL: {{^}}callee_no_stack_with_call:
|
||||
; GCN: s_waitcnt
|
||||
; GCN: s_mov_b32 s5, s32
|
||||
; GCN: buffer_store_dword v32, off, s[0:3], s5 offset:4
|
||||
; GCN: s_or_saveexec_b64 [[COPY_EXEC0:s\[[0-9]+:[0-9]+\]]], -1{{$}}
|
||||
; GCN-NEXT: buffer_store_dword v32, off, s[0:3], s5 offset:4
|
||||
; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC0]]
|
||||
; GCN-DAG: v_writelane_b32 v32, s33, 0
|
||||
; GCN-DAG: v_writelane_b32 v32, s34, 1
|
||||
; GCN: s_mov_b32 s33, s5
|
||||
@ -81,9 +83,12 @@ define void @callee_with_stack_and_call() #0 {
|
||||
|
||||
; GCN-DAG: v_readlane_b32 s34, v32, 1
|
||||
; GCN-DAG: v_readlane_b32 s33, v32, 0
|
||||
; GCN: buffer_load_dword v32, off, s[0:3], s5 offset:4
|
||||
; GCN: s_sub_u32 s32, s32, 0x400
|
||||
|
||||
; GCN: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}}
|
||||
; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s5 offset:4
|
||||
; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC1]]
|
||||
|
||||
; GCN: s_sub_u32 s32, s32, 0x400
|
||||
; GCN: s_setpc_b64
|
||||
define void @callee_no_stack_with_call() #0 {
|
||||
call void @external_void_func_void()
|
||||
@ -94,11 +99,18 @@ declare void @external_void_func_void() #0
|
||||
|
||||
; Make sure if a CSR vgpr is used for SGPR spilling, it is saved and restored
|
||||
; GCN-LABEL: {{^}}callee_func_sgpr_spill_no_calls:
|
||||
; GCN: buffer_store_dword v32, off, s[0:3], s5 offset:4 ; 4-byte Folded Spill
|
||||
; GCN: s_or_saveexec_b64 [[COPY_EXEC0:s\[[0-9]+:[0-9]+\]]], -1{{$}}
|
||||
; GCN-NEXT: buffer_store_dword v32, off, s[0:3], s5 offset:4 ; 4-byte Folded Spill
|
||||
; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC0]]
|
||||
|
||||
; GCN: v_writelane_b32 v32
|
||||
; GCN: ;;#ASMSTART
|
||||
; GCN: v_readlane_b32 s{{[0-9]+}}, v32
|
||||
; GCN: buffer_load_dword v32, off, s[0:3], s5 offset:4 ; 4-byte Folded Reload
|
||||
|
||||
; GCN: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}}
|
||||
; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s5 offset:4 ; 4-byte Folded Reload
|
||||
; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC1]]
|
||||
|
||||
; GCN-NEXT: s_waitcnt
|
||||
; GCN-NEXT: s_setpc_b64
|
||||
define void @callee_func_sgpr_spill_no_calls(i32 %in) #0 {
|
||||
|
@ -326,8 +326,8 @@ define void @func_call_too_many_args_use_workitem_id_x(i32 %arg0) #1 {
|
||||
|
||||
; Requires loading and storing to stack slot.
|
||||
; GCN-LABEL: {{^}}too_many_args_call_too_many_args_use_workitem_id_x:
|
||||
; GCN: buffer_store_dword v32, off, s[0:3], s5 offset:8 ; 4-byte Folded Spill
|
||||
; GCN: s_add_u32 s32, s32, 0x400{{$}}
|
||||
; GCN: buffer_store_dword v32, off, s[0:3], s5 offset:8 ; 4-byte Folded Spill
|
||||
; GCN: buffer_load_dword v32, off, s[0:3], s5 offset:4
|
||||
|
||||
; GCN: buffer_store_dword v32, off, s[0:3], s32 offset:4{{$}}
|
||||
|
@ -28,10 +28,12 @@ define float @call_split_type_used_outside_block_v2f32() #0 {
|
||||
; GCN: ; %bb.0: ; %bb0
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GCN-NEXT: s_mov_b32 s5, s32
|
||||
; GCN-NEXT: s_add_u32 s32, s32, 0x400
|
||||
; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1
|
||||
; GCN-NEXT: buffer_store_dword v32, off, s[0:3], s5 offset:4 ; 4-byte Folded Spill
|
||||
; GCN-NEXT: s_mov_b64 exec, s[6:7]
|
||||
; GCN-NEXT: v_writelane_b32 v32, s33, 0
|
||||
; GCN-NEXT: v_writelane_b32 v32, s34, 1
|
||||
; GCN-NEXT: s_add_u32 s32, s32, 0x400
|
||||
; GCN-NEXT: v_writelane_b32 v32, s35, 2
|
||||
; GCN-NEXT: s_getpc_b64 s[6:7]
|
||||
; GCN-NEXT: s_add_u32 s6, s6, func_v2f32@rel32@lo+4
|
||||
@ -39,12 +41,14 @@ define float @call_split_type_used_outside_block_v2f32() #0 {
|
||||
; GCN-NEXT: s_mov_b64 s[34:35], s[30:31]
|
||||
; GCN-NEXT: s_mov_b32 s33, s5
|
||||
; GCN-NEXT: s_swappc_b64 s[30:31], s[6:7]
|
||||
; GCN-NEXT: s_mov_b32 s5, s33
|
||||
; GCN-NEXT: s_mov_b64 s[30:31], s[34:35]
|
||||
; GCN-NEXT: v_readlane_b32 s35, v32, 2
|
||||
; GCN-NEXT: s_mov_b32 s5, s33
|
||||
; GCN-NEXT: v_readlane_b32 s34, v32, 1
|
||||
; GCN-NEXT: v_readlane_b32 s33, v32, 0
|
||||
; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1
|
||||
; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s5 offset:4 ; 4-byte Folded Reload
|
||||
; GCN-NEXT: s_mov_b64 exec, s[6:7]
|
||||
; GCN-NEXT: s_sub_u32 s32, s32, 0x400
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0)
|
||||
; GCN-NEXT: s_setpc_b64 s[30:31]
|
||||
@ -62,10 +66,12 @@ define float @call_split_type_used_outside_block_v3f32() #0 {
|
||||
; GCN: ; %bb.0: ; %bb0
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GCN-NEXT: s_mov_b32 s5, s32
|
||||
; GCN-NEXT: s_add_u32 s32, s32, 0x400
|
||||
; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1
|
||||
; GCN-NEXT: buffer_store_dword v32, off, s[0:3], s5 offset:4 ; 4-byte Folded Spill
|
||||
; GCN-NEXT: s_mov_b64 exec, s[6:7]
|
||||
; GCN-NEXT: v_writelane_b32 v32, s33, 0
|
||||
; GCN-NEXT: v_writelane_b32 v32, s34, 1
|
||||
; GCN-NEXT: s_add_u32 s32, s32, 0x400
|
||||
; GCN-NEXT: v_writelane_b32 v32, s35, 2
|
||||
; GCN-NEXT: s_getpc_b64 s[6:7]
|
||||
; GCN-NEXT: s_add_u32 s6, s6, func_v3f32@rel32@lo+4
|
||||
@ -73,12 +79,14 @@ define float @call_split_type_used_outside_block_v3f32() #0 {
|
||||
; GCN-NEXT: s_mov_b64 s[34:35], s[30:31]
|
||||
; GCN-NEXT: s_mov_b32 s33, s5
|
||||
; GCN-NEXT: s_swappc_b64 s[30:31], s[6:7]
|
||||
; GCN-NEXT: s_mov_b32 s5, s33
|
||||
; GCN-NEXT: s_mov_b64 s[30:31], s[34:35]
|
||||
; GCN-NEXT: v_readlane_b32 s35, v32, 2
|
||||
; GCN-NEXT: s_mov_b32 s5, s33
|
||||
; GCN-NEXT: v_readlane_b32 s34, v32, 1
|
||||
; GCN-NEXT: v_readlane_b32 s33, v32, 0
|
||||
; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1
|
||||
; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s5 offset:4 ; 4-byte Folded Reload
|
||||
; GCN-NEXT: s_mov_b64 exec, s[6:7]
|
||||
; GCN-NEXT: s_sub_u32 s32, s32, 0x400
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0)
|
||||
; GCN-NEXT: s_setpc_b64 s[30:31]
|
||||
@ -96,10 +104,12 @@ define half @call_split_type_used_outside_block_v4f16() #0 {
|
||||
; GCN: ; %bb.0: ; %bb0
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GCN-NEXT: s_mov_b32 s5, s32
|
||||
; GCN-NEXT: s_add_u32 s32, s32, 0x400
|
||||
; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1
|
||||
; GCN-NEXT: buffer_store_dword v32, off, s[0:3], s5 offset:4 ; 4-byte Folded Spill
|
||||
; GCN-NEXT: s_mov_b64 exec, s[6:7]
|
||||
; GCN-NEXT: v_writelane_b32 v32, s33, 0
|
||||
; GCN-NEXT: v_writelane_b32 v32, s34, 1
|
||||
; GCN-NEXT: s_add_u32 s32, s32, 0x400
|
||||
; GCN-NEXT: v_writelane_b32 v32, s35, 2
|
||||
; GCN-NEXT: s_getpc_b64 s[6:7]
|
||||
; GCN-NEXT: s_add_u32 s6, s6, func_v4f16@rel32@lo+4
|
||||
@ -107,12 +117,14 @@ define half @call_split_type_used_outside_block_v4f16() #0 {
|
||||
; GCN-NEXT: s_mov_b64 s[34:35], s[30:31]
|
||||
; GCN-NEXT: s_mov_b32 s33, s5
|
||||
; GCN-NEXT: s_swappc_b64 s[30:31], s[6:7]
|
||||
; GCN-NEXT: s_mov_b32 s5, s33
|
||||
; GCN-NEXT: s_mov_b64 s[30:31], s[34:35]
|
||||
; GCN-NEXT: v_readlane_b32 s35, v32, 2
|
||||
; GCN-NEXT: s_mov_b32 s5, s33
|
||||
; GCN-NEXT: v_readlane_b32 s34, v32, 1
|
||||
; GCN-NEXT: v_readlane_b32 s33, v32, 0
|
||||
; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1
|
||||
; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s5 offset:4 ; 4-byte Folded Reload
|
||||
; GCN-NEXT: s_mov_b64 exec, s[6:7]
|
||||
; GCN-NEXT: s_sub_u32 s32, s32, 0x400
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0)
|
||||
; GCN-NEXT: s_setpc_b64 s[30:31]
|
||||
@ -130,10 +142,12 @@ define { i32, half } @call_split_type_used_outside_block_struct() #0 {
|
||||
; GCN: ; %bb.0: ; %bb0
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GCN-NEXT: s_mov_b32 s5, s32
|
||||
; GCN-NEXT: s_add_u32 s32, s32, 0x400
|
||||
; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1
|
||||
; GCN-NEXT: buffer_store_dword v32, off, s[0:3], s5 offset:4 ; 4-byte Folded Spill
|
||||
; GCN-NEXT: s_mov_b64 exec, s[6:7]
|
||||
; GCN-NEXT: v_writelane_b32 v32, s33, 0
|
||||
; GCN-NEXT: v_writelane_b32 v32, s34, 1
|
||||
; GCN-NEXT: s_add_u32 s32, s32, 0x400
|
||||
; GCN-NEXT: v_writelane_b32 v32, s35, 2
|
||||
; GCN-NEXT: s_getpc_b64 s[6:7]
|
||||
; GCN-NEXT: s_add_u32 s6, s6, func_struct@rel32@lo+4
|
||||
@ -141,13 +155,15 @@ define { i32, half } @call_split_type_used_outside_block_struct() #0 {
|
||||
; GCN-NEXT: s_mov_b64 s[34:35], s[30:31]
|
||||
; GCN-NEXT: s_mov_b32 s33, s5
|
||||
; GCN-NEXT: s_swappc_b64 s[30:31], s[6:7]
|
||||
; GCN-NEXT: s_mov_b32 s5, s33
|
||||
; GCN-NEXT: s_mov_b64 s[30:31], s[34:35]
|
||||
; GCN-NEXT: v_readlane_b32 s35, v32, 2
|
||||
; GCN-NEXT: s_mov_b32 s5, s33
|
||||
; GCN-NEXT: v_readlane_b32 s34, v32, 1
|
||||
; GCN-NEXT: v_readlane_b32 s33, v32, 0
|
||||
; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s5 offset:4 ; 4-byte Folded Reload
|
||||
; GCN-NEXT: v_mov_b32_e32 v1, v4
|
||||
; GCN-NEXT: v_readlane_b32 s33, v32, 0
|
||||
; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1
|
||||
; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s5 offset:4 ; 4-byte Folded Reload
|
||||
; GCN-NEXT: s_mov_b64 exec, s[6:7]
|
||||
; GCN-NEXT: s_sub_u32 s32, s32, 0x400
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0)
|
||||
; GCN-NEXT: s_setpc_b64 s[30:31]
|
||||
|
@ -10,9 +10,12 @@ declare void @external_void_func_i32(i32) #0
|
||||
; GCN-LABEL: {{^}}test_func_call_external_void_func_i32_imm:
|
||||
; GCN: s_waitcnt
|
||||
; GCN: s_mov_b32 s5, s32
|
||||
; Spill CSR VGPR used for SGPR spilling
|
||||
; GCN: buffer_store_dword v32, off, s[0:3], s5 offset:4
|
||||
; GCN-DAG: s_add_u32 s32, s32, 0x400
|
||||
; Spill CSR VGPR used for SGPR spilling
|
||||
; GCN: s_or_saveexec_b64 [[COPY_EXEC0:s\[[0-9]+:[0-9]+\]]], -1{{$}}
|
||||
; GCN-NEXT: buffer_store_dword v32, off, s[0:3], s5 offset:4
|
||||
; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC0]]
|
||||
|
||||
; GCN-DAG: v_writelane_b32 v32, s33, 0
|
||||
; GCN-DAG: v_writelane_b32 v32, s34, 1
|
||||
; GCN-DAG: v_writelane_b32 v32, s35, 2
|
||||
@ -22,7 +25,10 @@ declare void @external_void_func_i32(i32) #0
|
||||
; GCN: v_readlane_b32 s35, v32, 2
|
||||
; GCN: v_readlane_b32 s34, v32, 1
|
||||
; GCN: v_readlane_b32 s33, v32, 0
|
||||
; GCN: buffer_load_dword v32, off, s[0:3], s5 offset:4
|
||||
; GCN: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}}
|
||||
; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s5 offset:4
|
||||
; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC1]]
|
||||
|
||||
; GCN: s_sub_u32 s32, s32, 0x400
|
||||
; GCN: s_setpc_b64
|
||||
define void @test_func_call_external_void_func_i32_imm() #0 {
|
||||
|
@ -207,13 +207,17 @@ entry:
|
||||
; Have another non-tail in the function
|
||||
; GCN-LABEL: {{^}}sibling_call_i32_fastcc_i32_i32_other_call:
|
||||
; GCN: s_mov_b32 s5, s32
|
||||
; GCN: buffer_store_dword v34, off, s[0:3], s5 offset:12
|
||||
; GCN: s_add_u32 s32, s32, 0x400
|
||||
|
||||
; GCN: s_or_saveexec_b64 s{{\[[0-9]+:[0-9]+\]}}, -1
|
||||
; GCN-NEXT: buffer_store_dword v34, off, s[0:3], s5 offset:12
|
||||
; GCN-NEXT: s_mov_b64 exec
|
||||
|
||||
; GCN: buffer_store_dword v32, off, s[0:3], s5 offset:8 ; 4-byte Folded Spill
|
||||
; GCN: buffer_store_dword v33, off, s[0:3], s5 offset:4 ; 4-byte Folded Spill
|
||||
; GCN-DAG: v_writelane_b32 v34, s33, 0
|
||||
; GCN-DAG: v_writelane_b32 v34, s34, 1
|
||||
; GCN-DAG: v_writelane_b32 v34, s35, 2
|
||||
; GCN-DAG: s_add_u32 s32, s32, 0x400
|
||||
|
||||
; GCN-DAG: s_getpc_b64
|
||||
; GCN: s_swappc_b64
|
||||
@ -228,7 +232,10 @@ entry:
|
||||
|
||||
; GCN: buffer_load_dword v33, off, s[0:3], s5 offset:4
|
||||
; GCN: buffer_load_dword v32, off, s[0:3], s5 offset:8
|
||||
; GCN: buffer_load_dword v34, off, s[0:3], s5 offset:12
|
||||
; GCN: s_or_saveexec_b64 s{{\[[0-9]+:[0-9]+\]}}, -1
|
||||
; GCN-NEXT: buffer_load_dword v34, off, s[0:3], s5 offset:12
|
||||
; GCN-NEXT: s_mov_b64 exec
|
||||
|
||||
; GCN: s_sub_u32 s32, s32, 0x400
|
||||
; GCN: s_setpc_b64 s[6:7]
|
||||
define fastcc i32 @sibling_call_i32_fastcc_i32_i32_other_call(i32 %a, i32 %b, i32 %c) #1 {
|
||||
|
Loading…
x
Reference in New Issue
Block a user