mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-27 06:54:30 +00:00
[SDA] Bug fix: Use IPD outside the loop as divergence bound
Summary: The immediate post dominator of the loop header may be part of the divergent loop. Since this /was/ the divergence propagation bound the SDA would not detect joins of divergent paths outside the loop. Reviewers: nhaehnle Reviewed By: nhaehnle Subscribers: mmasten, arsenm, jvesely, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D59042 llvm-svn: 358681
This commit is contained in:
parent
dff6412af4
commit
b3249a7b08
@ -218,14 +218,9 @@ struct DivergencePropagator {
|
||||
template <typename SuccessorIterable>
|
||||
std::unique_ptr<ConstBlockSet>
|
||||
computeJoinPoints(const BasicBlock &RootBlock,
|
||||
SuccessorIterable NodeSuccessors, const Loop *ParentLoop) {
|
||||
SuccessorIterable NodeSuccessors, const Loop *ParentLoop, const BasicBlock * PdBoundBlock) {
|
||||
assert(JoinBlocks);
|
||||
|
||||
// immediate post dominator (no join block beyond that block)
|
||||
const auto *PdNode = PDT.getNode(const_cast<BasicBlock *>(&RootBlock));
|
||||
const auto *IpdNode = PdNode->getIDom();
|
||||
const auto *PdBoundBlock = IpdNode ? IpdNode->getBlock() : nullptr;
|
||||
|
||||
// bootstrap with branch targets
|
||||
for (const auto *SuccBlock : NodeSuccessors) {
|
||||
DefMap.emplace(SuccBlock, SuccBlock);
|
||||
@ -340,13 +335,23 @@ const ConstBlockSet &SyncDependenceAnalysis::join_blocks(const Loop &Loop) {
|
||||
|
||||
// already available in cache?
|
||||
auto ItCached = CachedLoopExitJoins.find(&Loop);
|
||||
if (ItCached != CachedLoopExitJoins.end())
|
||||
if (ItCached != CachedLoopExitJoins.end()) {
|
||||
return *ItCached->second;
|
||||
}
|
||||
|
||||
// dont propagte beyond the immediate post dom of the loop
|
||||
const auto *PdNode = PDT.getNode(const_cast<BasicBlock *>(Loop.getHeader()));
|
||||
const auto *IpdNode = PdNode->getIDom();
|
||||
const auto *PdBoundBlock = IpdNode ? IpdNode->getBlock() : nullptr;
|
||||
while (PdBoundBlock && Loop.contains(PdBoundBlock)) {
|
||||
IpdNode = IpdNode->getIDom();
|
||||
PdBoundBlock = IpdNode ? IpdNode->getBlock() : nullptr;
|
||||
}
|
||||
|
||||
// compute all join points
|
||||
DivergencePropagator Propagator{FuncRPOT, DT, PDT, LI};
|
||||
auto JoinBlocks = Propagator.computeJoinPoints<const LoopExitVec &>(
|
||||
*Loop.getHeader(), LoopExits, Loop.getParentLoop());
|
||||
*Loop.getHeader(), LoopExits, Loop.getParentLoop(), PdBoundBlock);
|
||||
|
||||
auto ItInserted = CachedLoopExitJoins.emplace(&Loop, std::move(JoinBlocks));
|
||||
assert(ItInserted.second);
|
||||
@ -365,11 +370,16 @@ SyncDependenceAnalysis::join_blocks(const Instruction &Term) {
|
||||
if (ItCached != CachedBranchJoins.end())
|
||||
return *ItCached->second;
|
||||
|
||||
// dont propagate beyond the immediate post dominator of the branch
|
||||
const auto *PdNode = PDT.getNode(const_cast<BasicBlock *>(Term.getParent()));
|
||||
const auto *IpdNode = PdNode->getIDom();
|
||||
const auto *PdBoundBlock = IpdNode ? IpdNode->getBlock() : nullptr;
|
||||
|
||||
// compute all join points
|
||||
DivergencePropagator Propagator{FuncRPOT, DT, PDT, LI};
|
||||
const auto &TermBlock = *Term.getParent();
|
||||
auto JoinBlocks = Propagator.computeJoinPoints<succ_const_range>(
|
||||
TermBlock, successors(Term.getParent()), LI.getLoopFor(&TermBlock));
|
||||
TermBlock, successors(Term.getParent()), LI.getLoopFor(&TermBlock), PdBoundBlock);
|
||||
|
||||
auto ItInserted = CachedBranchJoins.emplace(&Term, std::move(JoinBlocks));
|
||||
assert(ItInserted.second);
|
||||
|
@ -21,6 +21,43 @@ merge:
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @hidden_loop_ipd(i32 %n, i32 %a, i32 %b) #0 {
|
||||
; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'hidden_loop_ipd'
|
||||
entry:
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%cond.var = icmp slt i32 %tid, 0
|
||||
; CHECK: DIVERGENT: %cond.var = icmp
|
||||
%cond.uni = icmp slt i32 %n, 0
|
||||
; CHECK-NOT: DIVERGENT: %cond.uni = icmp
|
||||
br label %for.header
|
||||
for.header:
|
||||
br i1 %cond.var, label %A, label %B
|
||||
A:
|
||||
br label %C
|
||||
B:
|
||||
br label %C
|
||||
C:
|
||||
br i1 %cond.uni, label %E, label %D
|
||||
D:
|
||||
br i1 %cond.var, label %for.header, label %F
|
||||
|
||||
E:
|
||||
%e.lcssa.uni = phi i32 [ 0, %C ]
|
||||
; CHECK-NOT: DIVERGENT: %e.lcssa.uni = phi i32
|
||||
br label %G
|
||||
|
||||
F:
|
||||
%f.lcssa.uni = phi i32 [ 1, %D ]
|
||||
; CHECK-NOT: DIVERGENT: %f.lcssa.uni = phi i32
|
||||
br label %G
|
||||
|
||||
G:
|
||||
%g.join.var = phi i32 [ %e.lcssa.uni, %E ], [ %f.lcssa.uni, %F ]
|
||||
; CHECK: DIVERGENT: %g.join.var = phi i32
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
|
||||
attributes #0 = { nounwind readnone }
|
||||
|
Loading…
x
Reference in New Issue
Block a user