llvm/test/CodeGen/AMDGPU/local-stack-slot-offset.ll
Matt Arsenault d019e8638a Enable FeatureFlatForGlobal on Volcanic Islands
This switches to the workaround that HSA defaults to
for the mesa path.

This should be applied to the 4.0 branch.

Patch by Vedran Miletić <vedran@miletic.net>

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@292982 91177308-0d34-0410-b5e6-96231b3b80d8
2017-01-24 22:02:15 +00:00

36 lines
1.5 KiB
LLVM

;RUN: llc < %s -march=amdgcn -mcpu=verde -mattr=+vgpr-spilling -mattr=-promote-alloca -verify-machineinstrs | FileCheck %s -check-prefix=CHECK
;RUN: llc < %s -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -mattr=+vgpr-spilling -mattr=-promote-alloca -verify-machineinstrs | FileCheck %s -check-prefix=CHECK
; Allocate two stack slots of 2052 bytes each requiring a total of 4104 bytes.
; Extracting the last element of each does not fit into the offset field of
; MUBUF instructions, so a new base register is needed. This used to not
; happen, leading to an assertion.
; CHECK-LABEL: {{^}}main:
; CHECK: buffer_store_dword
; CHECK: buffer_store_dword
; CHECK: buffer_load_dword
; CHECK: buffer_load_dword
define amdgpu_gs float @main(float %v1, float %v2, i32 %idx1, i32 %idx2) {
main_body:
%m1 = alloca [513 x float]
%m2 = alloca [513 x float]
%gep1.store = getelementptr [513 x float], [513 x float]* %m1, i32 0, i32 %idx1
store float %v1, float* %gep1.store
%gep2.store = getelementptr [513 x float], [513 x float]* %m2, i32 0, i32 %idx2
store float %v2, float* %gep2.store
; This used to use a base reg equal to 0.
%gep1.load = getelementptr [513 x float], [513 x float]* %m1, i32 0, i32 0
%out1 = load float, float* %gep1.load
; This used to attempt to re-use the base reg at 0, generating an out-of-bounds instruction offset.
%gep2.load = getelementptr [513 x float], [513 x float]* %m2, i32 0, i32 512
%out2 = load float, float* %gep2.load
%r = fadd float %out1, %out2
ret float %r
}