mirror of
https://github.com/RPCSX/llvm.git
synced 2025-01-10 06:00:30 +00:00
26b4602681
In case of COPY-like instruction we may be able to deduce that a certain input is unused, based on the used lanes of the register defined by the instruction. This even works accross otherwise incompatible copies (no need to have compatible lanemasks, completely unused operands are still completely unused). It even makes sense to redo the analysis in this case since we gained information for a case we previously stopped at because of the incompatible masks. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@268815 91177308-0d34-0410-b5e6-96231b3b80d8
44 lines
1.4 KiB
LLVM
44 lines
1.4 KiB
LLVM
; RUN: llc -march=amdgcn -mcpu=SI -o - %s | FileCheck %s
|
|
; Don't crash when the use of an undefined value is only detected by the
|
|
; register coalescer because it is hidden with subregister insert/extract.
|
|
target triple="amdgcn--"
|
|
|
|
; CHECK-LABEL: foobar:
|
|
; CHECK: s_load_dword s2, s[0:1], 0x9
|
|
; CHECK-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK: v_mbcnt_lo_u32_b32_e64
|
|
; CHECK-NEXT: v_cmp_eq_i32_e32 vcc, 0, v0
|
|
; CHECK-NEXT: s_and_saveexec_b64 s[2:3], vcc
|
|
; CHECK-NEXT: s_xor_b64 s[2:3], exec, s[2:3]
|
|
; BB0_1:
|
|
; CHECK: s_load_dword s0, s[0:1], 0xa
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; BB0_2:
|
|
; CHECK: s_or_b64 exec, exec, s[2:3]
|
|
; CHECK-NEXT: s_mov_b32 s7, 0xf000
|
|
; CHECK-NEXT: s_mov_b32 s6, -1
|
|
; CHECK-NEXT: buffer_store_dword v1, off, s[4:7], 0
|
|
; CHECK-NEXT: s_endpgm
|
|
define void @foobar(float %a0, float %a1, float addrspace(1)* %out) nounwind {
|
|
entry:
|
|
%v0 = insertelement <4 x float> undef, float %a0, i32 0
|
|
%tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
|
|
%cnd = icmp eq i32 %tid, 0
|
|
br i1 %cnd, label %ift, label %ife
|
|
|
|
ift:
|
|
%v1 = insertelement <4 x float> undef, float %a1, i32 0
|
|
br label %ife
|
|
|
|
ife:
|
|
%val = phi <4 x float> [ %v1, %ift ], [ %v0, %entry ]
|
|
%v2 = extractelement <4 x float> %val, i32 1
|
|
store float %v2, float addrspace(1)* %out, align 4
|
|
ret void
|
|
}
|
|
|
|
declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #0
|
|
|
|
attributes #0 = { nounwind readnone }
|