llvm/test/CodeGen/AMDGPU/move-addr64-rsrc-dead-subreg-writes.ll
Changpeng Fang 89e60598f6 AMDGPU/SI: Use flat for global load/store when targeting HSA
Summary:
  For some reason doing executing an MUBUF instruction with the addr64
  bit set and a zero base pointer in the resource descriptor causes
  the memory operation to be dropped when the shader is executed using
  the HSA runtime.

  This kind of MUBUF instruction is commonly used when the pointer is
  stored in VGPRs.  The base pointer field in the resource descriptor
  is set to zero and and the pointer is stored in the vaddr field.

  This patch resolves the issue by only using flat instructions for
  global memory operations when targeting HSA. This is an overly
  conservative fix as all other configurations of MUBUF instructions
  appear to work.

  NOTE: re-commit by fixing a failure in Codegen/AMDGPU/llvm.dbg.value.ll

Reviewers: tstellarAMD

Subscribers: arsenm, llvm-commits

Differential Revision: http://reviews.llvm.org/D15543

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@256282 91177308-0d34-0410-b5e6-96231b3b80d8
2015-12-22 20:55:23 +00:00

37 lines
1.4 KiB
LLVM

; RUN: llc -march=amdgcn -mcpu=kaveri -mtriple=amdgcn-unknown-amdhsa -mattr=-flat-for-global < %s | FileCheck -check-prefix=GCN %s
; Check that when mubuf addr64 instruction is handled in moveToVALU
; from the pointer, dead register writes are not emitted.
; FIXME: We should be able to use the SGPR directly as src0 to v_add_i32
; GCN-LABEL: {{^}}clobber_vgpr_pair_pointer_add:
; GCN: s_load_dwordx2 s{{\[}}[[ARG1LO:[0-9]+]]:[[ARG1HI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x0{{$}}
; GCN: buffer_load_dwordx2 v{{\[}}[[LDPTRLO:[0-9]+]]:[[LDPTRHI:[0-9]+]]{{\]}}
; GCN-NOT: v_mov_b32
; GCN: v_mov_b32_e32 v[[VARG1LO:[0-9]+]], s[[ARG1LO]]
; GCN-NEXT: v_mov_b32_e32 v[[VARG1HI:[0-9]+]], s[[ARG1HI]]
; GCN-NOT: v_mov_b32
; GCN: v_add_i32_e32 v[[PTRLO:[0-9]+]], vcc, v[[LDPTRLO]], v[[VARG1LO]]
; GCN: v_addc_u32_e32 v[[PTRHI:[0-9]+]], vcc, v[[LDPTRHI]], v[[VARG1HI]]
; GCN: buffer_load_ubyte v{{[0-9]+}}, v{{\[}}[[PTRLO]]:[[PTRHI]]{{\]}},
define void @clobber_vgpr_pair_pointer_add(i64 %arg1, i8 addrspace(1)* addrspace(1)* %ptrarg, i32 %arg3) #0 {
bb:
%tmp = icmp sgt i32 %arg3, 0
br i1 %tmp, label %bb4, label %bb17
bb4:
%tmp14 = load volatile i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* %ptrarg
%tmp15 = getelementptr inbounds i8, i8 addrspace(1)* %tmp14, i64 %arg1
%tmp16 = load volatile i8, i8 addrspace(1)* %tmp15
br label %bb17
bb17:
ret void
}
attributes #0 = { nounwind }