llvm/test/CodeGen/AMDGPU/merge-store-usedef.ll
Nicolai Haehnle ce0a5230f5 AMDGPU: Fix SILoadStoreOptimizer when writes cannot be merged due register dependencies
Summary:
When finding a match for a merge and collecting the instructions that must
be moved, keep in mind that the instruction we merge might actually use one
of the defs that are being moved.

Fixes piglit spec/arb_enhanced_layouts/execution/component-layout/vs-tcs-load-output[-indirect].

The fact that the ds_read in the test case is not eliminated suggests that
there might be another problem related to alias analysis, but that's a
separate problem: this pass should still work correctly even when earlier
optimization passes missed something or were disabled.

Reviewers: tstellarAMD, arsenm

Subscribers: kzhuravl, wdng, yaxunl, llvm-commits, tony-tye

Differential Revision: https://reviews.llvm.org/D25829

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@285273 91177308-0d34-0410-b5e6-96231b3b80d8
2016-10-27 08:15:07 +00:00

24 lines
832 B
LLVM

; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck %s
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck %s
; CHECK-LABEL: {{^}}test1:
; CHECK: ds_write_b32
; CHECK: ds_read_b32
; CHECK: ds_write_b32
define amdgpu_vs void @test1(i32 %v) #0 {
%p0 = getelementptr i32, i32 addrspace(3)* null, i32 0
%p1 = getelementptr i32, i32 addrspace(3)* null, i32 1
store i32 %v, i32 addrspace(3)* %p0
call void @llvm.SI.tbuffer.store.i32(<16 x i8> undef, i32 %v, i32 1, i32 undef, i32 undef, i32 0, i32 4, i32 4, i32 1, i32 0, i32 1, i32 1, i32 0)
%w = load i32, i32 addrspace(3)* %p0
store i32 %w, i32 addrspace(3)* %p1
ret void
}
declare void @llvm.SI.tbuffer.store.i32(<16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) #0
attributes #0 = { nounwind }