Move the handling of unanalyzable branches out of the loop-driven chain

formation phase and into the initial walk of the basic blocks. We
essentially pre-merge all blocks where unanalyzable fallthrough exists,
as we won't be able to update the terminators effectively after any
reorderings. This is quite a bit more principled as there may be CFGs
where the second half of the unanalyzable pair has some analyzable
predecessor that gets placed first. Then it may get placed next,
implicitly breaking the unanalyzable branch even though we never even
looked at the part that isn't analyzable. I've included a test case that
triggers this (thanks Benjamin yet again!), and I'm hoping to synthesize
some more general ones as I dig into related issues.

Also, to make this new scheme work we have to be able to handle branches
into the middle of a chain, so add this check. We always fallback on the
incoming ordering.

Finally, this starts to really underscore a known limitation of the
current implementation -- we don't consider broken predecessors when
merging successors. This can caused major missed opportunities, and is
something I'm planning on looking at next (modulo more bug reports).

llvm-svn: 144994
This commit is contained in:
Chandler Carruth 2011-11-19 10:26:02 +00:00
parent 44b06b0096
commit f24d3f8fc7
2 changed files with 58 additions and 25 deletions

View File

@ -355,6 +355,10 @@ MachineBasicBlock *MachineBlockPlacement::selectBestSuccessor(
DEBUG(dbgs() << " " << getBlockName(*SI) << " -> Already merged!\n");
continue;
}
if (*SI != *SuccChain.begin()) {
DEBUG(dbgs() << " " << getBlockName(*SI) << " -> Mid chain!\n");
continue;
}
uint32_t SuccWeight = MBPI->getEdgeWeight(BB, *SI);
BranchProbability SuccProb(SuccWeight / WeightScale, SumWeight);
@ -472,7 +476,6 @@ void MachineBlockPlacement::buildChain(
assert(BB);
assert(BlockToChain[BB] == &Chain);
assert(*Chain.begin() == BB);
SmallVector<MachineOperand, 4> Cond; // For AnalyzeBranch.
MachineFunction &F = *BB->getParent();
MachineFunction::iterator PrevUnplacedBlockIt = F.begin();
@ -485,26 +488,9 @@ void MachineBlockPlacement::buildChain(
assert(*llvm::prior(Chain.end()) == BB);
MachineBasicBlock *BestSucc = 0;
// Check for unreasonable branches, and forcibly merge the existing layout
// successor for them. We can handle cases that AnalyzeBranch can't: jump
// tables etc are fine. The case we want to handle specially is when there
// is potential fallthrough, but the branch cannot be analyzed. This
// includes blocks without terminators as well as other cases.
Cond.clear();
MachineBasicBlock *TBB = 0, *FBB = 0; // For AnalyzeBranch.
if (TII->AnalyzeBranch(*BB, TBB, FBB, Cond) && BB->canFallThrough()) {
MachineFunction::iterator I(BB), NextI(llvm::next(I));
// Ensure that the layout successor is a viable block, as we know that
// fallthrough is a possibility. Note that this may not be a valid block
// in the loop, but we allow that to cope with degenerate situations.
assert(NextI != BB->getParent()->end());
BestSucc = NextI;
}
// Otherwise, look for the best viable successor if there is one to place
// immediately after this block.
if (!BestSucc)
BestSucc = selectBestSuccessor(BB, Chain, BlockFilter);
// Look for the best viable successor if there is one to place immediately
// after this block.
BestSucc = selectBestSuccessor(BB, Chain, BlockFilter);
// If an immediate successor isn't available, look for the best viable
// block among those we've identified as not violating the loop's CFG at
@ -624,9 +610,32 @@ void MachineBlockPlacement::buildLoopChains(MachineFunction &F,
void MachineBlockPlacement::buildCFGChains(MachineFunction &F) {
// Ensure that every BB in the function has an associated chain to simplify
// the assumptions of the remaining algorithm.
for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI)
BlockToChain[&*FI] =
new (ChainAllocator.Allocate()) BlockChain(BlockToChain, &*FI);
SmallVector<MachineOperand, 4> Cond; // For AnalyzeBranch.
for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) {
MachineBasicBlock *BB = FI;
BlockChain *&Chain = BlockToChain[BB];
Chain = new (ChainAllocator.Allocate()) BlockChain(BlockToChain, BB);
// Also, merge any blocks which we cannot reason about and must preserve
// the exact fallthrough behavior for.
for (;;) {
Cond.clear();
MachineBasicBlock *TBB = 0, *FBB = 0; // For AnalyzeBranch.
if (!TII->AnalyzeBranch(*BB, TBB, FBB, Cond) || !FI->canFallThrough())
break;
MachineFunction::iterator NextFI(llvm::next(FI));
MachineBasicBlock *NextBB = NextFI;
// Ensure that the layout successor is a viable block, as we know that
// fallthrough is a possibility.
assert(NextFI != FE && "Can't fallthrough past the last block.");
DEBUG(dbgs() << "Pre-merging due to unanalyzable fallthrough: "
<< getBlockName(BB) << " -> " << getBlockName(NextBB)
<< "\n");
Chain->merge(NextBB, 0);
FI = NextFI;
BB = NextBB;
}
}
// Build any loop-based chains.
for (MachineLoopInfo::iterator LI = MLI->begin(), LE = MLI->end(); LI != LE;
@ -692,7 +701,6 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) {
// Splice the blocks into place.
MachineFunction::iterator InsertPos = F.begin();
SmallVector<MachineOperand, 4> Cond; // For AnalyzeBranch.
for (BlockChain::iterator BI = FunctionChain.begin(),
BE = FunctionChain.end();
BI != BE; ++BI) {

View File

@ -393,3 +393,28 @@ exit:
%merge = phi i32 [ 3, %step ], [ 6, %entry ]
ret i32 %merge
}
define void @fpcmp_unanalyzable_branch(i1 %cond) {
entry:
br i1 %cond, label %entry.if.then_crit_edge, label %lor.lhs.false
entry.if.then_crit_edge:
%.pre14 = load i8* undef, align 1, !tbaa !0
br label %if.then
lor.lhs.false:
br i1 undef, label %if.end, label %exit
exit:
%cmp.i = fcmp une double 0.000000e+00, undef
br i1 %cmp.i, label %if.then, label %if.end
if.then:
%0 = phi i8 [ %.pre14, %entry.if.then_crit_edge ], [ undef, %exit ]
%1 = and i8 %0, 1
store i8 %1, i8* undef, align 4, !tbaa !0
br label %if.end
if.end:
ret void
}