mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-24 21:05:23 +00:00
AMDGPU: Fix not using s33 for scratch wave offset in kernels
Fixes missing piece from r363990. llvm-svn: 364099
This commit is contained in:
parent
4f5a568045
commit
47811338cc
@ -1829,11 +1829,8 @@ static void reservePrivateMemoryRegs(const TargetMachine &TM,
|
||||
Info.setScratchRSrcReg(ReservedBufferReg);
|
||||
}
|
||||
|
||||
// This should be accurate for kernels even before the frame is finalized.
|
||||
const bool HasFP = ST.getFrameLowering()->hasFP(MF);
|
||||
if (HasFP) {
|
||||
unsigned ReservedOffsetReg =
|
||||
TRI.reservedPrivateSegmentWaveByteOffsetReg(MF);
|
||||
// hasFP should be accurate for kernels even before the frame is finalized.
|
||||
if (ST.getFrameLowering()->hasFP(MF)) {
|
||||
MachineRegisterInfo &MRI = MF.getRegInfo();
|
||||
|
||||
// Try to use s32 as the SP, but move it if it would interfere with input
|
||||
@ -1860,8 +1857,15 @@ static void reservePrivateMemoryRegs(const TargetMachine &TM,
|
||||
report_fatal_error("failed to find register for SP");
|
||||
}
|
||||
|
||||
Info.setScratchWaveOffsetReg(ReservedOffsetReg);
|
||||
Info.setFrameOffsetReg(ReservedOffsetReg);
|
||||
if (MFI.hasCalls()) {
|
||||
Info.setScratchWaveOffsetReg(AMDGPU::SGPR33);
|
||||
Info.setFrameOffsetReg(AMDGPU::SGPR33);
|
||||
} else {
|
||||
unsigned ReservedOffsetReg =
|
||||
TRI.reservedPrivateSegmentWaveByteOffsetReg(MF);
|
||||
Info.setScratchWaveOffsetReg(ReservedOffsetReg);
|
||||
Info.setFrameOffsetReg(ReservedOffsetReg);
|
||||
}
|
||||
} else if (RequiresStackAccess) {
|
||||
assert(!MFI.hasCalls());
|
||||
// We know there are accesses and they will be done relative to SP, so just
|
||||
|
@ -104,9 +104,9 @@ define amdgpu_kernel void @test_call_void_func_void_clobber_vcc(i32 addrspace(1)
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}test_call_void_func_void_mayclobber_s31:
|
||||
; GCN: s_mov_b32 s33, s31
|
||||
; GCN: s_mov_b32 s34, s31
|
||||
; GCN-NEXT: s_swappc_b64
|
||||
; GCN-NEXT: s_mov_b32 s31, s33
|
||||
; GCN-NEXT: s_mov_b32 s31, s34
|
||||
define amdgpu_kernel void @test_call_void_func_void_mayclobber_s31(i32 addrspace(1)* %out) #0 {
|
||||
%s31 = call i32 asm sideeffect "; def $0", "={s31}"()
|
||||
call void @external_void_func_void()
|
||||
@ -128,15 +128,14 @@ define amdgpu_kernel void @test_call_void_func_void_mayclobber_v31(i32 addrspace
|
||||
; FIXME: What is the expected behavior for reserved registers here?
|
||||
|
||||
; GCN-LABEL: {{^}}test_call_void_func_void_preserves_s33:
|
||||
; GCN: s_mov_b32 s34, s9
|
||||
; GCN-NOT: s33
|
||||
; GCN: s_mov_b32 s33, s9
|
||||
; GCN: s_mov_b32 s32, s33
|
||||
; GCN: #ASMSTART
|
||||
; GCN-NEXT: ; def s33
|
||||
; GCN-NEXT: #ASMEND
|
||||
; GCN: s_getpc_b64 s[4:5]
|
||||
; GCN-NEXT: s_add_u32 s4, s4, external_void_func_void@rel32@lo+4
|
||||
; GCN-NEXT: s_addc_u32 s5, s5, external_void_func_void@rel32@hi+4
|
||||
; GCN: s_mov_b32 s32, s34
|
||||
; GCN: s_swappc_b64 s[30:31], s[4:5]
|
||||
; GCN: ;;#ASMSTART
|
||||
; GCN-NEXT: ; use s33
|
||||
|
@ -30,7 +30,7 @@ define hidden void @func() #1 {
|
||||
; GCN-NOT: writelane
|
||||
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v8
|
||||
|
||||
; GCN: ; NumSgprs: 37
|
||||
; GCN: ; NumSgprs: 38
|
||||
; GCN: ; NumVgprs: 9
|
||||
define amdgpu_kernel void @kernel_call() #0 {
|
||||
%vgpr = load volatile i32, i32 addrspace(1)* undef
|
||||
|
@ -91,7 +91,8 @@ define void @force_realign4(i32 %idx) #1 {
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}kernel_call_align16_from_8:
|
||||
; GCN: s_add_u32 s32, s8, 0x400{{$}}
|
||||
; GCN: s_mov_b32 s33, s7{{$}}
|
||||
; GCN-NEXT: s_add_u32 s32, s33, 0x400{{$}}
|
||||
; GCN-NOT: s32
|
||||
; GCN: s_swappc_b64
|
||||
define amdgpu_kernel void @kernel_call_align16_from_8() #0 {
|
||||
@ -103,7 +104,8 @@ define amdgpu_kernel void @kernel_call_align16_from_8() #0 {
|
||||
|
||||
; The call sequence should keep the stack on call aligned to 4
|
||||
; GCN-LABEL: {{^}}kernel_call_align16_from_5:
|
||||
; GCN: s_add_u32 s32, s8, 0x400
|
||||
; GCN: s_mov_b32 s33, s7{{$}}
|
||||
; GCN-NEXT: s_add_u32 s32, s33, 0x400
|
||||
; GCN: s_swappc_b64
|
||||
define amdgpu_kernel void @kernel_call_align16_from_5() {
|
||||
%alloca0 = alloca i8, align 1, addrspace(5)
|
||||
@ -114,7 +116,8 @@ define amdgpu_kernel void @kernel_call_align16_from_5() {
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}kernel_call_align4_from_5:
|
||||
; GCN: s_add_u32 s32, s8, 0x400
|
||||
; GCN: s_mov_b32 s33, s7{{$}}
|
||||
; GCN: s_add_u32 s32, s33, 0x400
|
||||
; GCN: s_swappc_b64
|
||||
define amdgpu_kernel void @kernel_call_align4_from_5() {
|
||||
%alloca0 = alloca i8, align 1, addrspace(5)
|
||||
|
Loading…
x
Reference in New Issue
Block a user