mirror of
https://github.com/RPCSX/llvm.git
synced 2025-01-10 06:00:30 +00:00
7985e4be56
This was assuming it could use all memory before, which is a bad decision because it restricts occupancy. By default, only try to use enough space that could reduce occupancy to 7, an arbitrarily chosen limit. Based on the exist LDS usage, try to round up to the limit in the current tier instead of further hurting occupancy. This isn't ideal, because it doesn't accurately know how much space is going to be used for alignment padding. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@269708 91177308-0d34-0410-b5e6-96231b3b80d8
39 lines
1.8 KiB
LLVM
39 lines
1.8 KiB
LLVM
; RUN: llc -O0 -march=amdgcn -mtriple=amdgcn-unknown-amdhsa -mattr=+promote-alloca < %s | FileCheck -check-prefix=NOOPTS -check-prefix=ALL %s
|
|
; RUN: llc -O1 -march=amdgcn -mtriple=amdgcn-unknown-amdhsa -mattr=+promote-alloca < %s | FileCheck -check-prefix=OPTS -check-prefix=ALL %s
|
|
|
|
; ALL-LABEL: {{^}}promote_alloca_i32_array_array:
|
|
; NOOPTS: workgroup_group_segment_byte_size = 0{{$}}
|
|
; NOOPTS-NOT ds_write
|
|
; OPTS: ds_write
|
|
define void @promote_alloca_i32_array_array(i32 addrspace(1)* %out, i32 %index) #0 {
|
|
entry:
|
|
%alloca = alloca [2 x [2 x i32]]
|
|
%gep0 = getelementptr inbounds [2 x [2 x i32]], [2 x [2 x i32]]* %alloca, i32 0, i32 0, i32 0
|
|
%gep1 = getelementptr inbounds [2 x [2 x i32]], [2 x [2 x i32]]* %alloca, i32 0, i32 0, i32 1
|
|
store i32 0, i32* %gep0
|
|
store i32 1, i32* %gep1
|
|
%gep2 = getelementptr inbounds [2 x [2 x i32]], [2 x [2 x i32]]* %alloca, i32 0, i32 0, i32 %index
|
|
%load = load i32, i32* %gep2
|
|
store i32 %load, i32 addrspace(1)* %out
|
|
ret void
|
|
}
|
|
|
|
; ALL-LABEL: {{^}}optnone_promote_alloca_i32_array_array:
|
|
; ALL: workgroup_group_segment_byte_size = 0{{$}}
|
|
; ALL-NOT ds_write
|
|
define void @optnone_promote_alloca_i32_array_array(i32 addrspace(1)* %out, i32 %index) #1 {
|
|
entry:
|
|
%alloca = alloca [2 x [2 x i32]]
|
|
%gep0 = getelementptr inbounds [2 x [2 x i32]], [2 x [2 x i32]]* %alloca, i32 0, i32 0, i32 0
|
|
%gep1 = getelementptr inbounds [2 x [2 x i32]], [2 x [2 x i32]]* %alloca, i32 0, i32 0, i32 1
|
|
store i32 0, i32* %gep0
|
|
store i32 1, i32* %gep1
|
|
%gep2 = getelementptr inbounds [2 x [2 x i32]], [2 x [2 x i32]]* %alloca, i32 0, i32 0, i32 %index
|
|
%load = load i32, i32* %gep2
|
|
store i32 %load, i32 addrspace(1)* %out
|
|
ret void
|
|
}
|
|
|
|
attributes #0 = { nounwind "amdgpu-max-work-group-size"="64" }
|
|
attributes #1 = { nounwind optnone noinline "amdgpu-max-work-group-size"="64" }
|