diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp index bcd320ec22d..64018fd92ac 100644 --- a/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/lib/Target/AMDGPU/SIISelLowering.cpp @@ -1829,11 +1829,8 @@ static void reservePrivateMemoryRegs(const TargetMachine &TM, Info.setScratchRSrcReg(ReservedBufferReg); } - // This should be accurate for kernels even before the frame is finalized. - const bool HasFP = ST.getFrameLowering()->hasFP(MF); - if (HasFP) { - unsigned ReservedOffsetReg = - TRI.reservedPrivateSegmentWaveByteOffsetReg(MF); + // hasFP should be accurate for kernels even before the frame is finalized. + if (ST.getFrameLowering()->hasFP(MF)) { MachineRegisterInfo &MRI = MF.getRegInfo(); // Try to use s32 as the SP, but move it if it would interfere with input @@ -1860,8 +1857,15 @@ static void reservePrivateMemoryRegs(const TargetMachine &TM, report_fatal_error("failed to find register for SP"); } - Info.setScratchWaveOffsetReg(ReservedOffsetReg); - Info.setFrameOffsetReg(ReservedOffsetReg); + if (MFI.hasCalls()) { + Info.setScratchWaveOffsetReg(AMDGPU::SGPR33); + Info.setFrameOffsetReg(AMDGPU::SGPR33); + } else { + unsigned ReservedOffsetReg = + TRI.reservedPrivateSegmentWaveByteOffsetReg(MF); + Info.setScratchWaveOffsetReg(ReservedOffsetReg); + Info.setFrameOffsetReg(ReservedOffsetReg); + } } else if (RequiresStackAccess) { assert(!MFI.hasCalls()); // We know there are accesses and they will be done relative to SP, so just diff --git a/test/CodeGen/AMDGPU/call-preserved-registers.ll b/test/CodeGen/AMDGPU/call-preserved-registers.ll index 42a7ea8b21c..05af715eb78 100644 --- a/test/CodeGen/AMDGPU/call-preserved-registers.ll +++ b/test/CodeGen/AMDGPU/call-preserved-registers.ll @@ -104,9 +104,9 @@ define amdgpu_kernel void @test_call_void_func_void_clobber_vcc(i32 addrspace(1) } ; GCN-LABEL: {{^}}test_call_void_func_void_mayclobber_s31: -; GCN: s_mov_b32 s33, s31 +; GCN: s_mov_b32 s34, s31 ; GCN-NEXT: s_swappc_b64 -; GCN-NEXT: s_mov_b32 s31, s33 +; GCN-NEXT: s_mov_b32 s31, s34 define amdgpu_kernel void @test_call_void_func_void_mayclobber_s31(i32 addrspace(1)* %out) #0 { %s31 = call i32 asm sideeffect "; def $0", "={s31}"() call void @external_void_func_void() @@ -128,15 +128,14 @@ define amdgpu_kernel void @test_call_void_func_void_mayclobber_v31(i32 addrspace ; FIXME: What is the expected behavior for reserved registers here? ; GCN-LABEL: {{^}}test_call_void_func_void_preserves_s33: -; GCN: s_mov_b32 s34, s9 -; GCN-NOT: s33 +; GCN: s_mov_b32 s33, s9 +; GCN: s_mov_b32 s32, s33 ; GCN: #ASMSTART ; GCN-NEXT: ; def s33 ; GCN-NEXT: #ASMEND ; GCN: s_getpc_b64 s[4:5] ; GCN-NEXT: s_add_u32 s4, s4, external_void_func_void@rel32@lo+4 ; GCN-NEXT: s_addc_u32 s5, s5, external_void_func_void@rel32@hi+4 -; GCN: s_mov_b32 s32, s34 ; GCN: s_swappc_b64 s[30:31], s[4:5] ; GCN: ;;#ASMSTART ; GCN-NEXT: ; use s33 diff --git a/test/CodeGen/AMDGPU/ipra.ll b/test/CodeGen/AMDGPU/ipra.ll index 8027d6739da..a39423a8dad 100644 --- a/test/CodeGen/AMDGPU/ipra.ll +++ b/test/CodeGen/AMDGPU/ipra.ll @@ -30,7 +30,7 @@ define hidden void @func() #1 { ; GCN-NOT: writelane ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v8 -; GCN: ; NumSgprs: 37 +; GCN: ; NumSgprs: 38 ; GCN: ; NumVgprs: 9 define amdgpu_kernel void @kernel_call() #0 { %vgpr = load volatile i32, i32 addrspace(1)* undef diff --git a/test/CodeGen/AMDGPU/stack-realign.ll b/test/CodeGen/AMDGPU/stack-realign.ll index 202a5326176..9bc5d7f4a5e 100644 --- a/test/CodeGen/AMDGPU/stack-realign.ll +++ b/test/CodeGen/AMDGPU/stack-realign.ll @@ -91,7 +91,8 @@ define void @force_realign4(i32 %idx) #1 { } ; GCN-LABEL: {{^}}kernel_call_align16_from_8: -; GCN: s_add_u32 s32, s8, 0x400{{$}} +; GCN: s_mov_b32 s33, s7{{$}} +; GCN-NEXT: s_add_u32 s32, s33, 0x400{{$}} ; GCN-NOT: s32 ; GCN: s_swappc_b64 define amdgpu_kernel void @kernel_call_align16_from_8() #0 { @@ -103,7 +104,8 @@ define amdgpu_kernel void @kernel_call_align16_from_8() #0 { ; The call sequence should keep the stack on call aligned to 4 ; GCN-LABEL: {{^}}kernel_call_align16_from_5: -; GCN: s_add_u32 s32, s8, 0x400 +; GCN: s_mov_b32 s33, s7{{$}} +; GCN-NEXT: s_add_u32 s32, s33, 0x400 ; GCN: s_swappc_b64 define amdgpu_kernel void @kernel_call_align16_from_5() { %alloca0 = alloca i8, align 1, addrspace(5) @@ -114,7 +116,8 @@ define amdgpu_kernel void @kernel_call_align16_from_5() { } ; GCN-LABEL: {{^}}kernel_call_align4_from_5: -; GCN: s_add_u32 s32, s8, 0x400 +; GCN: s_mov_b32 s33, s7{{$}} +; GCN: s_add_u32 s32, s33, 0x400 ; GCN: s_swappc_b64 define amdgpu_kernel void @kernel_call_align4_from_5() { %alloca0 = alloca i8, align 1, addrspace(5)