mirror of
https://github.com/RPCSX/llvm.git
synced 2025-02-06 12:26:45 +00:00
[AMDGPU] Fix MaxWorkGroupsPerCU for large workgroups
This patch corrects the maximum workgroups per CU if we have big workgroups (more than 128). This calculation contributes to the occupancy calculation in respect to LDS size. Differential Revision: https://reviews.llvm.org/D29974 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@295134 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
52b918216d
commit
237ec36765
@ -151,7 +151,11 @@ unsigned getMaxWorkGroupsPerCU(const FeatureBitset &Features,
|
||||
unsigned FlatWorkGroupSize) {
|
||||
if (!Features.test(FeatureGCN))
|
||||
return 8;
|
||||
return getWavesPerWorkGroup(Features, FlatWorkGroupSize) == 1 ? 40 : 16;
|
||||
unsigned N = getWavesPerWorkGroup(Features, FlatWorkGroupSize);
|
||||
if (N == 1)
|
||||
return 40;
|
||||
N = 40 / N;
|
||||
return std::min(N, 16u);
|
||||
}
|
||||
|
||||
unsigned getMaxWavesPerCU(const FeatureBitset &Features) {
|
||||
|
@ -69,7 +69,8 @@ entry:
|
||||
}
|
||||
|
||||
; ALL-LABEL: @occupancy_0(
|
||||
; ALL: alloca [5 x i32]
|
||||
; CI-NOT: alloca [5 x i32]
|
||||
; SI: alloca [5 x i32]
|
||||
define void @occupancy_0(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) #3 {
|
||||
entry:
|
||||
%stack = alloca [5 x i32], align 4
|
||||
@ -91,7 +92,8 @@ entry:
|
||||
}
|
||||
|
||||
; ALL-LABEL: @occupancy_max(
|
||||
; ALL: alloca [5 x i32]
|
||||
; CI-NOT: alloca [5 x i32]
|
||||
; SI: alloca [5 x i32]
|
||||
define void @occupancy_max(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) #4 {
|
||||
entry:
|
||||
%stack = alloca [5 x i32], align 4
|
||||
|
Loading…
x
Reference in New Issue
Block a user