mirror of
https://github.com/RPCS3/llvm.git
synced 2026-01-31 01:25:19 +01:00
Using SplitCSR for the frame register was very broken. Often the copies in the prolog and epilog were optimized out, in addition to them being inserted after the true prolog where the FP was clobbered. I have a hacky solution which works that continues to use split CSR, but for now this is simpler and will get to working programs. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@313274 91177308-0d34-0410-b5e6-96231b3b80d8
109 lines
2.6 KiB
LLVM
109 lines
2.6 KiB
LLVM
; RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs -enable-ipra -amdgpu-sroa=0 < %s | FileCheck -check-prefix=GCN %s
|
|
|
|
; Kernels are not called, so there is no call preserved mask.
|
|
; GCN-LABEL: {{^}}kernel:
|
|
; GCN: flat_store_dword
|
|
define amdgpu_kernel void @kernel(i32 addrspace(1)* %out) #0 {
|
|
entry:
|
|
store i32 0, i32 addrspace(1)* %out
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}func:
|
|
; GCN: ; NumVgprs: 8
|
|
define void @func() #1 {
|
|
call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7}"() #0
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}kernel_call:
|
|
; GCN-NOT: buffer_store
|
|
; GCN-NOT: buffer_load
|
|
; GCN-NOT: readlane
|
|
; GCN-NOT: writelane
|
|
; GCN: flat_load_dword v8
|
|
; GCN: s_swappc_b64
|
|
; GCN-NOT: buffer_store
|
|
; GCN-NOT: buffer_load
|
|
; GCN-NOT: readlane
|
|
; GCN-NOT: writelane
|
|
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v8
|
|
|
|
; GCN: ; NumSgprs: 37
|
|
; GCN: ; NumVgprs: 9
|
|
define amdgpu_kernel void @kernel_call() #0 {
|
|
%vgpr = load volatile i32, i32 addrspace(1)* undef
|
|
tail call void @func()
|
|
store volatile i32 %vgpr, i32 addrspace(1)* undef
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}func_regular_call:
|
|
; GCN-NOT: buffer_store
|
|
; GCN-NOT: buffer_load
|
|
; GCN-NOT: readlane
|
|
; GCN-NOT: writelane
|
|
; GCN: flat_load_dword v8
|
|
; GCN: s_swappc_b64
|
|
; GCN-NOT: buffer_store
|
|
; GCN-NOT: buffer_load
|
|
; GCN-NOT: readlane
|
|
; GCN-NOT: writelane
|
|
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v8
|
|
|
|
; GCN: ; NumSgprs: 32
|
|
; GCN: ; NumVgprs: 9
|
|
define void @func_regular_call() #1 {
|
|
%vgpr = load volatile i32, i32 addrspace(1)* undef
|
|
tail call void @func()
|
|
store volatile i32 %vgpr, i32 addrspace(1)* undef
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}func_tail_call:
|
|
; GCN: s_waitcnt
|
|
; GCN-NEXT: s_getpc_b64 s[6:7]
|
|
; GCN-NEXT: s_add_u32 s6,
|
|
; GCN-NEXT: s_addc_u32 s7,
|
|
; GCN-NEXT: s_setpc_b64 s[6:7]
|
|
|
|
; GCN: ; NumSgprs: 32
|
|
; GCN: ; NumVgprs: 8
|
|
define void @func_tail_call() #1 {
|
|
tail call void @func()
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}func_call_tail_call:
|
|
; GCN: flat_load_dword v8
|
|
; GCN: s_swappc_b64
|
|
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v8
|
|
; GCN: s_setpc_b64
|
|
|
|
; GCN: ; NumSgprs: 32
|
|
; GCN: ; NumVgprs: 9
|
|
define void @func_call_tail_call() #1 {
|
|
%vgpr = load volatile i32, i32 addrspace(1)* undef
|
|
tail call void @func()
|
|
store volatile i32 %vgpr, i32 addrspace(1)* undef
|
|
tail call void @func()
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_void() noinline {
|
|
ret void
|
|
}
|
|
|
|
; Make sure we don't get save/restore of FP between calls.
|
|
; GCN-LABEL: {{^}}test_funcx2:
|
|
; GCN-NOT: s5
|
|
; GCN-NOT: s32
|
|
define void @test_funcx2() #0 {
|
|
call void @void_func_void()
|
|
call void @void_func_void()
|
|
ret void
|
|
}
|
|
|
|
attributes #0 = { nounwind }
|
|
attributes #1 = { nounwind noinline }
|