llvm/test/CodeGen/AMDGPU/spill-m0.ll
Matt Arsenault 2d7bc6b1e1 AMDGPU: Fix using incorrect private resource with no allocation
It's possible to have a use of the private resource descriptor or
scratch wave offset registers even though there are no allocated
stack objects. This would result in continuing to use the maximum
number reserved registers. This could go over the number of SGPRs
available on VI, or violate the SGPR limit requested by
the function attributes.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@285435 91177308-0d34-0410-b5e6-96231b3b80d8
2016-10-28 19:43:31 +00:00

81 lines
3.0 KiB
LLVM

; RUN: llc -O0 -amdgpu-spill-sgpr-to-vgpr=1 -march=amdgcn -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck -check-prefix=TOVGPR -check-prefix=GCN %s
; RUN: llc -O0 -amdgpu-spill-sgpr-to-vgpr=1 -march=amdgcn -mcpu=tonga -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck -check-prefix=TOVGPR -check-prefix=GCN %s
; RUN: llc -O0 -amdgpu-spill-sgpr-to-vgpr=0 -march=amdgcn -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck -check-prefix=TOVMEM -check-prefix=GCN %s
; RUN: llc -O0 -amdgpu-spill-sgpr-to-vgpr=0 -march=amdgcn -mattr=+vgpr-spilling -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=TOVMEM -check-prefix=GCN %s
; XXX - Why does it like to use vcc?
; GCN-LABEL: {{^}}spill_m0:
; TOSMEM: s_mov_b32 s88, SCRATCH_RSRC_DWORD0
; GCN: s_cmp_lg_u32
; TOVGPR: s_mov_b32 vcc_hi, m0
; TOVGPR: v_writelane_b32 [[SPILL_VREG:v[0-9]+]], vcc_hi, 0
; TOVMEM: v_mov_b32_e32 [[SPILL_VREG:v[0-9]+]], m0
; TOVMEM: buffer_store_dword [[SPILL_VREG]], off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} ; 4-byte Folded Spill
; TOVMEM: s_waitcnt vmcnt(0)
; GCN: s_cbranch_scc1 [[ENDIF:BB[0-9]+_[0-9]+]]
; GCN: [[ENDIF]]:
; TOVGPR: v_readlane_b32 vcc_hi, [[SPILL_VREG]], 0
; TOVGPR: s_mov_b32 m0, vcc_hi
; TOVMEM: buffer_load_dword [[RELOAD_VREG:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} ; 4-byte Folded Reload
; TOVMEM: s_waitcnt vmcnt(0)
; TOVMEM: v_readfirstlane_b32 vcc_hi, [[RELOAD_VREG]]
; TOVMEM: s_mov_b32 m0, vcc_hi
; GCN: s_add_i32 m0, m0, 1
define void @spill_m0(i32 %cond, i32 addrspace(1)* %out) #0 {
entry:
%m0 = call i32 asm sideeffect "s_mov_b32 m0, 0", "={M0}"() #0
%cmp0 = icmp eq i32 %cond, 0
br i1 %cmp0, label %if, label %endif
if:
call void asm sideeffect "v_nop", ""() #0
br label %endif
endif:
%foo = call i32 asm sideeffect "s_add_i32 $0, $1, 1", "=s,{M0}"(i32 %m0) #0
store i32 %foo, i32 addrspace(1)* %out
ret void
}
@lds = internal addrspace(3) global [64 x float] undef
; GCN-LABEL: {{^}}spill_m0_lds:
; GCN-NOT: v_readlane_b32 m0
define amdgpu_ps void @spill_m0_lds(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg) #0 {
main_body:
%4 = call float @llvm.SI.fs.constant(i32 0, i32 0, i32 %3)
%cmp = fcmp ueq float 0.0, %4
br i1 %cmp, label %if, label %else
if:
%lds_ptr = getelementptr [64 x float], [64 x float] addrspace(3)* @lds, i32 0, i32 0
%lds_data = load float, float addrspace(3)* %lds_ptr
br label %endif
else:
%interp = call float @llvm.SI.fs.constant(i32 0, i32 0, i32 %3)
br label %endif
endif:
%export = phi float [%lds_data, %if], [%interp, %else]
%5 = call i32 @llvm.SI.packf16(float %export, float %export)
%6 = bitcast i32 %5 to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %6, float %6, float %6, float %6)
ret void
}
declare float @llvm.SI.fs.constant(i32, i32, i32) readnone
declare i32 @llvm.SI.packf16(float, float) readnone
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { nounwind }