[SDA] Bug fix: Use IPD outside the loop as divergence bound

Summary:
The immediate post dominator of the loop header may be part of the divergent loop.
Since this /was/ the divergence propagation bound the SDA would not detect joins of divergent paths outside the loop.

Reviewers: nhaehnle

Reviewed By: nhaehnle

Subscribers: mmasten, arsenm, jvesely, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D59042

llvm-svn: 358681
This commit is contained in:
Nicolai Haehnle 2019-04-18 16:17:35 +00:00
parent dff6412af4
commit b3249a7b08
2 changed files with 56 additions and 9 deletions

View File

@ -218,14 +218,9 @@ struct DivergencePropagator {
template <typename SuccessorIterable>
std::unique_ptr<ConstBlockSet>
computeJoinPoints(const BasicBlock &RootBlock,
SuccessorIterable NodeSuccessors, const Loop *ParentLoop) {
SuccessorIterable NodeSuccessors, const Loop *ParentLoop, const BasicBlock * PdBoundBlock) {
assert(JoinBlocks);
// immediate post dominator (no join block beyond that block)
const auto *PdNode = PDT.getNode(const_cast<BasicBlock *>(&RootBlock));
const auto *IpdNode = PdNode->getIDom();
const auto *PdBoundBlock = IpdNode ? IpdNode->getBlock() : nullptr;
// bootstrap with branch targets
for (const auto *SuccBlock : NodeSuccessors) {
DefMap.emplace(SuccBlock, SuccBlock);
@ -340,13 +335,23 @@ const ConstBlockSet &SyncDependenceAnalysis::join_blocks(const Loop &Loop) {
// already available in cache?
auto ItCached = CachedLoopExitJoins.find(&Loop);
if (ItCached != CachedLoopExitJoins.end())
if (ItCached != CachedLoopExitJoins.end()) {
return *ItCached->second;
}
// dont propagte beyond the immediate post dom of the loop
const auto *PdNode = PDT.getNode(const_cast<BasicBlock *>(Loop.getHeader()));
const auto *IpdNode = PdNode->getIDom();
const auto *PdBoundBlock = IpdNode ? IpdNode->getBlock() : nullptr;
while (PdBoundBlock && Loop.contains(PdBoundBlock)) {
IpdNode = IpdNode->getIDom();
PdBoundBlock = IpdNode ? IpdNode->getBlock() : nullptr;
}
// compute all join points
DivergencePropagator Propagator{FuncRPOT, DT, PDT, LI};
auto JoinBlocks = Propagator.computeJoinPoints<const LoopExitVec &>(
*Loop.getHeader(), LoopExits, Loop.getParentLoop());
*Loop.getHeader(), LoopExits, Loop.getParentLoop(), PdBoundBlock);
auto ItInserted = CachedLoopExitJoins.emplace(&Loop, std::move(JoinBlocks));
assert(ItInserted.second);
@ -365,11 +370,16 @@ SyncDependenceAnalysis::join_blocks(const Instruction &Term) {
if (ItCached != CachedBranchJoins.end())
return *ItCached->second;
// dont propagate beyond the immediate post dominator of the branch
const auto *PdNode = PDT.getNode(const_cast<BasicBlock *>(Term.getParent()));
const auto *IpdNode = PdNode->getIDom();
const auto *PdBoundBlock = IpdNode ? IpdNode->getBlock() : nullptr;
// compute all join points
DivergencePropagator Propagator{FuncRPOT, DT, PDT, LI};
const auto &TermBlock = *Term.getParent();
auto JoinBlocks = Propagator.computeJoinPoints<succ_const_range>(
TermBlock, successors(Term.getParent()), LI.getLoopFor(&TermBlock));
TermBlock, successors(Term.getParent()), LI.getLoopFor(&TermBlock), PdBoundBlock);
auto ItInserted = CachedBranchJoins.emplace(&Term, std::move(JoinBlocks));
assert(ItInserted.second);

View File

@ -21,6 +21,43 @@ merge:
ret void
}
define amdgpu_kernel void @hidden_loop_ipd(i32 %n, i32 %a, i32 %b) #0 {
; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'hidden_loop_ipd'
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%cond.var = icmp slt i32 %tid, 0
; CHECK: DIVERGENT: %cond.var = icmp
%cond.uni = icmp slt i32 %n, 0
; CHECK-NOT: DIVERGENT: %cond.uni = icmp
br label %for.header
for.header:
br i1 %cond.var, label %A, label %B
A:
br label %C
B:
br label %C
C:
br i1 %cond.uni, label %E, label %D
D:
br i1 %cond.var, label %for.header, label %F
E:
%e.lcssa.uni = phi i32 [ 0, %C ]
; CHECK-NOT: DIVERGENT: %e.lcssa.uni = phi i32
br label %G
F:
%f.lcssa.uni = phi i32 [ 1, %D ]
; CHECK-NOT: DIVERGENT: %f.lcssa.uni = phi i32
br label %G
G:
%g.join.var = phi i32 [ %e.lcssa.uni, %E ], [ %f.lcssa.uni, %F ]
; CHECK: DIVERGENT: %g.join.var = phi i32
ret void
}
declare i32 @llvm.amdgcn.workitem.id.x() #0
attributes #0 = { nounwind readnone }