mirror of
https://github.com/RPCS3/llvm.git
synced 2025-02-14 15:57:47 +00:00
AMDGPU/SI: Fix threshold calculation for branching when exec is zero
Summary: When control flow is implemented using the exec mask, the compiler will insert branch instructions to skip over the masked section when exec is zero if the section contains more than a certain number of instructions. The previous code would only count instructions in successor blocks, and this patch modifies the code to start counting instructions in all blocks between the start and end of the branch. Reviewers: nhaehnle, arsenm Subscribers: arsenm, llvm-commits Differential Revision: http://reviews.llvm.org/D18282 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@263969 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
9c1ab0ea68
commit
1f213b9b37
@ -130,10 +130,12 @@ bool SILowerControlFlow::shouldSkip(MachineBasicBlock *From,
|
||||
|
||||
unsigned NumInstr = 0;
|
||||
|
||||
for (MachineBasicBlock *MBB = From; MBB != To && !MBB->succ_empty();
|
||||
MBB = *MBB->succ_begin()) {
|
||||
for (MachineFunction::iterator MBBI = MachineFunction::iterator(From),
|
||||
ToI = MachineFunction::iterator(To); MBBI != ToI; ++MBBI) {
|
||||
|
||||
for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
|
||||
MachineBasicBlock &MBB = *MBBI;
|
||||
|
||||
for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
|
||||
NumInstr < SkipThreshold && I != E; ++I) {
|
||||
|
||||
if (I->isBundle() || !I->isBundled()) {
|
||||
|
@ -24,5 +24,39 @@ out:
|
||||
ret void
|
||||
}
|
||||
|
||||
;CHECK-LABEL: {{^}}test2:
|
||||
;CHECK: s_and_saveexec_b64
|
||||
;CHECK: s_xor_b64
|
||||
;CHECK-NEXT: s_cbranch_execz
|
||||
define void @test2(i32 addrspace(1)* %out, i32 %a, i32 %b) {
|
||||
main_body:
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%cc = icmp eq i32 %tid, 0
|
||||
br i1 %cc, label %done1, label %if
|
||||
|
||||
if:
|
||||
%cmp = icmp eq i32 %a, 0
|
||||
br i1 %cmp, label %done0, label %loop_body
|
||||
|
||||
loop_body:
|
||||
%counter = phi i32 [ 0, %if ], [0, %done0], [ %incr, %loop_body ]
|
||||
|
||||
; Prevent the loop from being optimized out
|
||||
call void asm sideeffect "", "" ()
|
||||
|
||||
%incr = add i32 %counter, 1
|
||||
%lc = icmp sge i32 %incr, 1000
|
||||
br i1 %lc, label %done1, label %loop_body
|
||||
|
||||
done0:
|
||||
%cmp0 = icmp eq i32 %b, 0
|
||||
br i1 %cmp0, label %done1, label %loop_body
|
||||
|
||||
done1:
|
||||
ret void
|
||||
}
|
||||
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
|
||||
attributes #0 = { "ShaderType"="0" }
|
||||
attributes #1 = { nounwind readonly }
|
||||
|
Loading…
x
Reference in New Issue
Block a user