mirror of
https://github.com/capstone-engine/llvm-capstone.git
synced 2025-01-10 01:55:08 +00:00
AMDGPU/InsertWaitcnts: Remove the dependence on MachineLoopInfo
Summary: MachineLoopInfo cannot be relied on for correctness, because it cannot properly recognize loops in irreducible control flow which can be introduced by late machine basic block optimization passes. See the new test case for the reduced form of an example that occurred in practice. Use a simple fixpoint iteration instead. In order to facilitate this change, refactor WaitcntBrackets so that it only tracks pending events and registers, rather than also maintaining state that is relevant for the high-level algorithm. Various accessor methods can be removed or made private as a consequence. Affects (in radv): - dEQP-VK.glsl.loops.special.{for,while}_uniform_iterations.select_iteration_count_{fragment,vertex} Fixes: r345719 ("AMDGPU: Rewrite SILowerI1Copies to always stay on SALU") Reviewers: msearles, rampitec, scott.linder, kanarayan Subscribers: arsenm, kzhuravl, jvesely, wdng, yaxunl, dstuttard, tpr, t-tye, llvm-commits, hakzsam Differential Revision: https://reviews.llvm.org/D54231 llvm-svn: 347853
This commit is contained in:
parent
ab43bf60fe
commit
7bed696915
File diff suppressed because it is too large
Load Diff
47
llvm/test/CodeGen/AMDGPU/waitcnt-loop-irreducible.mir
Normal file
47
llvm/test/CodeGen/AMDGPU/waitcnt-loop-irreducible.mir
Normal file
@ -0,0 +1,47 @@
|
||||
# RUN: llc -march=amdgcn -verify-machineinstrs -run-pass si-insert-waitcnts -o - %s | FileCheck -check-prefixes=GCN %s
|
||||
|
||||
# GCN-LABEL: name: irreducible_loop{{$}}
|
||||
# GCN: S_LOAD_DWORDX4_IMM
|
||||
# GCN: S_WAITCNT 127{{$}}
|
||||
# GCN: S_BUFFER_LOAD_DWORD_IMM
|
||||
# GCN: S_WAITCNT 127{{$}}
|
||||
# GCN: S_CMP_GE_I32
|
||||
--- |
|
||||
|
||||
define amdgpu_ps void @irreducible_loop() {
|
||||
main:
|
||||
ret void
|
||||
}
|
||||
|
||||
...
|
||||
---
|
||||
name: irreducible_loop
|
||||
body: |
|
||||
bb.0:
|
||||
successors: %bb.3, %bb.2
|
||||
|
||||
S_CBRANCH_VCCZ %bb.2, implicit $vcc
|
||||
S_BRANCH %bb.3
|
||||
|
||||
bb.1:
|
||||
successors: %bb.3, %bb.2
|
||||
|
||||
S_CBRANCH_VCCNZ %bb.3, implicit $vcc
|
||||
|
||||
bb.2:
|
||||
successors: %bb.3
|
||||
|
||||
renamable $sgpr4_sgpr5_sgpr6_sgpr7 = S_LOAD_DWORDX4_IMM renamable $sgpr0_sgpr1, 0, 0
|
||||
renamable $sgpr3 = S_BUFFER_LOAD_DWORD_IMM killed renamable $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0
|
||||
|
||||
bb.3:
|
||||
successors: %bb.1, %bb.4
|
||||
|
||||
S_CMP_GE_I32 renamable $sgpr2, renamable $sgpr3, implicit-def $scc
|
||||
S_CBRANCH_SCC0 %bb.1, implicit killed $scc
|
||||
|
||||
bb.4:
|
||||
|
||||
S_ENDPGM
|
||||
|
||||
...
|
Loading…
Reference in New Issue
Block a user