mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-02-10 22:43:53 +00:00
AMDGPU: Disable AMDGPUPromoteAlloca pass for shader calling conventions.
Summary: The work item intrinsics are not available for the shader calling conventions. And even if we did hook them up most shader stages haves some extra restrictions on the amount of available LDS. Reviewers: tstellarAMD, arsenm Subscribers: nhaehnle, arsenm, llvm-commits, kzhuravl Differential Revision: https://reviews.llvm.org/D20728 llvm-svn: 275779
This commit is contained in:
parent
e2ac7e161f
commit
bd0d64e160
@ -649,6 +649,12 @@ void AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I) {
|
||||
|
||||
const Function &ContainingFunction = *I.getParent()->getParent();
|
||||
|
||||
// Don't promote the alloca to LDS for shader calling conventions as the work
|
||||
// item ID intrinsics are not supported for these calling conventions.
|
||||
// Furthermore not all LDS is available for some of the stages.
|
||||
if (AMDGPU::isShader(ContainingFunction.getCallingConv()))
|
||||
return;
|
||||
|
||||
// FIXME: We should also try to get this value from the reqd_work_group_size
|
||||
// function attribute if it is available.
|
||||
unsigned WorkGroupSize = AMDGPU::getMaximumWorkGroupSize(ContainingFunction);
|
||||
|
29
test/CodeGen/AMDGPU/promote-alloca-shaders.ll
Normal file
29
test/CodeGen/AMDGPU/promote-alloca-shaders.ll
Normal file
@ -0,0 +1,29 @@
|
||||
; RUN: opt -S -mtriple=amdgcn-unknown-unknown -amdgpu-promote-alloca < %s | FileCheck -check-prefix=IR %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=ASM %s
|
||||
|
||||
; IR-LABEL: define amdgpu_vs void @promote_alloca_shaders(i32 addrspace(1)* inreg %out, i32 addrspace(1)* inreg %in) #0 {
|
||||
; IR: alloca [5 x i32]
|
||||
; ASM-LABEL: {{^}}promote_alloca_shaders:
|
||||
; ASM: ; LDSByteSize: 0 bytes/workgroup (compile time only)
|
||||
|
||||
define amdgpu_vs void @promote_alloca_shaders(i32 addrspace(1)* inreg %out, i32 addrspace(1)* inreg %in) #0 {
|
||||
entry:
|
||||
%stack = alloca [5 x i32], align 4
|
||||
%tmp0 = load i32, i32 addrspace(1)* %in, align 4
|
||||
%arrayidx1 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %tmp0
|
||||
store i32 4, i32* %arrayidx1, align 4
|
||||
%arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 1
|
||||
%tmp1 = load i32, i32 addrspace(1)* %arrayidx2, align 4
|
||||
%arrayidx3 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %tmp1
|
||||
store i32 5, i32* %arrayidx3, align 4
|
||||
%arrayidx4 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 0
|
||||
%tmp2 = load i32, i32* %arrayidx4, align 4
|
||||
store i32 %tmp2, i32 addrspace(1)* %out, align 4
|
||||
%arrayidx5 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 1
|
||||
%tmp3 = load i32, i32* %arrayidx5
|
||||
%arrayidx6 = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 1
|
||||
store i32 %tmp3, i32 addrspace(1)* %arrayidx6
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind "amdgpu-max-work-group-size"="64" }
|
Loading…
x
Reference in New Issue
Block a user