[DA] conservatively mark the join of every divergent branch

For a loop, a join block is a block that is reachable along multiple
disjoint paths from the exiting block of a loop. If the exit condition
of the loop is divergent, then such join blocks must also be marked
divergent. This currently fails in some cases because not all join
blocks are identified correctly.

The workaround is to conservatively mark every join block of any
branch (not necessarily the exiting block of a loop) as divergent.

https://bugs.llvm.org/show_bug.cgi?id=46372

Reviewed By: simoll

Differential Revision: https://reviews.llvm.org/D81806
This commit is contained in:
Sameer Sahasrabuddhe 2020-06-18 17:39:20 +05:30
parent e49aa96f95
commit 07b90bc454
3 changed files with 67 additions and 6 deletions

View File

@ -295,14 +295,11 @@ bool DivergenceAnalysis::propagateJoinDivergence(const BasicBlock &JoinBlock,
// push non-divergent phi nodes in JoinBlock to the worklist
pushPHINodes(JoinBlock);
// JoinBlock is a divergent loop exit
if (BranchLoop && !BranchLoop->contains(&JoinBlock)) {
return true;
}
// disjoint-paths divergent at JoinBlock
markBlockJoinDivergent(JoinBlock);
return false;
// JoinBlock is a divergent loop exit
return BranchLoop && !BranchLoop->contains(&JoinBlock);
}
void DivergenceAnalysis::propagateBranchDivergence(const Instruction &Term) {

View File

@ -0,0 +1,36 @@
; RUN: opt -mtriple amdgcn-unknown-amdhsa -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s
; CHECK: bb3:
; CHECK: DIVERGENT: %Guard.bb4 = phi i1 [ true, %bb1 ], [ false, %bb2 ]
; CHECK: DIVERGENT: br i1 %Guard.bb4, label %bb4, label %bb5
; Function Attrs: nounwind readnone speculatable
declare i32 @llvm.amdgcn.workitem.id.x() #0
define protected amdgpu_kernel void @test() {
bb0:
%tid.x = call i32 @llvm.amdgcn.workitem.id.x()
%i5 = icmp eq i32 %tid.x, -1
br label %bb1
bb1: ; preds = %bb2, %bb0
%lsr.iv = phi i32 [ 7, %bb0 ], [ %lsr.iv.next, %bb2 ]
br i1 %i5, label %bb2, label %bb3
bb2: ; preds = %bb1
%lsr.iv.next = add nsw i32 %lsr.iv, -1
%i14 = icmp eq i32 %lsr.iv.next, 0
br i1 %i14, label %bb3, label %bb1
bb3: ; preds = %bb2, %bb1
%Guard.bb4 = phi i1 [ true, %bb1 ], [ false, %bb2 ]
br i1 %Guard.bb4, label %bb4, label %bb5
bb4: ; preds = %bb3
br label %bb5
bb5: ; preds = %bb3, %bb4
ret void
}
attributes #0 = { nounwind readnone speculatable }

View File

@ -0,0 +1,28 @@
; RUN: opt -mtriple amdgcn-unknown-amdhsa -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s
; XFAIL: *
; https://bugs.llvm.org/show_bug.cgi?id=46372
; CHECK: bb2:
; CHECK-NOT: DIVERGENT: %Guard.bb2 = phi i1 [ true, %bb1 ], [ false, %bb0 ]
; Function Attrs: nounwind readnone speculatable
declare i32 @llvm.amdgcn.workitem.id.x() #0
define protected amdgpu_kernel void @test2(i1 %uni) {
bb0:
%tid.x = call i32 @llvm.amdgcn.workitem.id.x()
%i5 = icmp eq i32 %tid.x, -1
br i1 %uni, label %bb1, label %bb2
bb1: ; preds = %bb2, %bb0
%lsr.iv = phi i32 [ 7, %bb0 ], [ %lsr.iv.next, %bb1 ]
%lsr.iv.next = add nsw i32 %lsr.iv, -1
br i1 %i5, label %bb2, label %bb1
bb2: ; preds = %bb2, %bb1
%Guard.bb2 = phi i1 [ true, %bb1 ], [ false, %bb0 ]
ret void
}
attributes #0 = { nounwind readnone speculatable }