mirror of
https://github.com/RPCS3/llvm.git
synced 2025-01-17 23:44:43 +00:00
Enhance CodePlacementOpt's unconditional intra-loop branch elimination logic
to be more general and understand more varieties of loops. Teach CodePlacementOpt to reorganize the basic blocks of a loop so that they are contiguous. This also includes a fair amount of logic for preserving fall-through edges while doing so. This fixes a BranchFolding-ism where blocks which can't be made to use a fall-through edge and don't conveniently fit anywhere nearby get tossed out to the end of the function. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@84295 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
b68d67caf3
commit
3bdd8de280
@ -34,14 +34,6 @@ namespace {
|
||||
const TargetInstrInfo *TII;
|
||||
const TargetLowering *TLI;
|
||||
|
||||
/// ChangedMBBs - BBs which are modified by OptimizeIntraLoopEdges.
|
||||
SmallPtrSet<MachineBasicBlock*, 8> ChangedMBBs;
|
||||
|
||||
/// UncondJmpMBBs - A list of BBs which are in loops and end with
|
||||
/// unconditional branches.
|
||||
SmallVector<std::pair<MachineBasicBlock*,MachineBasicBlock*>, 4>
|
||||
UncondJmpMBBs;
|
||||
|
||||
public:
|
||||
static char ID;
|
||||
CodePlacementOpt() : MachineFunctionPass(&ID) {}
|
||||
@ -58,7 +50,15 @@ namespace {
|
||||
}
|
||||
|
||||
private:
|
||||
bool OptimizeIntraLoopEdges();
|
||||
bool HasFallthrough(MachineBasicBlock *MBB);
|
||||
bool HasAnalyzableTerminator(MachineBasicBlock *MBB);
|
||||
void Splice(MachineFunction &MF,
|
||||
MachineFunction::iterator InsertPt,
|
||||
MachineFunction::iterator Begin,
|
||||
MachineFunction::iterator End);
|
||||
void UpdateTerminator(MachineBasicBlock *MBB);
|
||||
bool OptimizeIntraLoopEdges(MachineFunction &MF);
|
||||
bool OptimizeIntraLoopEdgesInLoop(MachineFunction &MF, MachineLoop *L);
|
||||
bool AlignLoops(MachineFunction &MF);
|
||||
bool AlignLoop(MachineFunction &MF, MachineLoop *L, unsigned Align);
|
||||
};
|
||||
@ -70,168 +70,364 @@ FunctionPass *llvm::createCodePlacementOptPass() {
|
||||
return new CodePlacementOpt();
|
||||
}
|
||||
|
||||
/// OptimizeBackEdges - Place loop back edges to move unconditional branches
|
||||
/// out of the loop.
|
||||
/// HasFallthrough - Test whether the given branch has a fallthrough, either as
|
||||
/// a plain fallthrough or as a fallthrough case of a conditional branch.
|
||||
///
|
||||
/// A:
|
||||
/// ...
|
||||
/// <fallthrough to B>
|
||||
bool CodePlacementOpt::HasFallthrough(MachineBasicBlock *MBB) {
|
||||
MachineBasicBlock *TBB = 0, *FBB = 0;
|
||||
SmallVector<MachineOperand, 4> Cond;
|
||||
if (TII->AnalyzeBranch(*MBB, TBB, FBB, Cond))
|
||||
return false;
|
||||
// This conditional branch has no fallthrough.
|
||||
if (FBB)
|
||||
return false;
|
||||
// An unconditional branch has no fallthrough.
|
||||
if (Cond.empty() && TBB)
|
||||
return false;
|
||||
// It has a fallthrough.
|
||||
return true;
|
||||
}
|
||||
|
||||
/// HasAnalyzableTerminator - Test whether AnalyzeBranch will succeed on MBB.
|
||||
/// This is called before major changes are begun to test whether it will be
|
||||
/// possible to complete the changes.
|
||||
///
|
||||
/// B: --> loop header
|
||||
/// ...
|
||||
/// jcc <cond> C, [exit]
|
||||
/// Target-specific code is hereby encouraged to make AnalyzeBranch succeed
|
||||
/// whenever possible.
|
||||
///
|
||||
/// C:
|
||||
/// ...
|
||||
/// jmp B
|
||||
///
|
||||
/// ==>
|
||||
///
|
||||
/// A:
|
||||
/// ...
|
||||
/// jmp B
|
||||
///
|
||||
/// C:
|
||||
/// ...
|
||||
/// <fallthough to B>
|
||||
///
|
||||
/// B: --> loop header
|
||||
/// ...
|
||||
/// jcc <cond> C, [exit]
|
||||
///
|
||||
bool CodePlacementOpt::OptimizeIntraLoopEdges() {
|
||||
if (!TLI->shouldOptimizeCodePlacement())
|
||||
bool CodePlacementOpt::HasAnalyzableTerminator(MachineBasicBlock *MBB) {
|
||||
// Conservatively ignore EH landing pads.
|
||||
if (MBB->isLandingPad()) return false;
|
||||
|
||||
// Ignore blocks which look like they might have EH-related control flow.
|
||||
// At the time of this writing, there are blocks which AnalyzeBranch
|
||||
// thinks end in single uncoditional branches, yet which have two CFG
|
||||
// successors. Code in this file is not prepared to reason about such things.
|
||||
if (!MBB->empty() && MBB->back().getOpcode() == TargetInstrInfo::EH_LABEL)
|
||||
return false;
|
||||
|
||||
bool Changed = false;
|
||||
for (unsigned i = 0, e = UncondJmpMBBs.size(); i != e; ++i) {
|
||||
MachineBasicBlock *MBB = UncondJmpMBBs[i].first;
|
||||
MachineBasicBlock *SuccMBB = UncondJmpMBBs[i].second;
|
||||
MachineLoop *L = MLI->getLoopFor(MBB);
|
||||
assert(L && "BB is expected to be in a loop!");
|
||||
// Aggressively handle return blocks and similar constructs.
|
||||
if (MBB->succ_empty()) return true;
|
||||
|
||||
if (ChangedMBBs.count(MBB)) {
|
||||
// BB has been modified, re-analyze.
|
||||
MachineBasicBlock *TBB = 0, *FBB = 0;
|
||||
SmallVector<MachineOperand, 4> Cond;
|
||||
if (TII->AnalyzeBranch(*MBB, TBB, FBB, Cond) || !Cond.empty())
|
||||
continue;
|
||||
if (MLI->getLoopFor(TBB) != L || TBB->isLandingPad())
|
||||
continue;
|
||||
SuccMBB = TBB;
|
||||
// Ask the target's AnalyzeBranch if it can handle this block.
|
||||
MachineBasicBlock *TBB = 0, *FBB = 0;
|
||||
SmallVector<MachineOperand, 4> Cond;
|
||||
// Make the the terminator is understood.
|
||||
if (TII->AnalyzeBranch(*MBB, TBB, FBB, Cond))
|
||||
return false;
|
||||
// Make sure we have the option of reversing the condition.
|
||||
if (!Cond.empty() && TII->ReverseBranchCondition(Cond))
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
/// Splice - Move the sequence of instructions [Begin,End) to just before
|
||||
/// InsertPt. Update branch instructions as needed to account for broken
|
||||
/// fallthrough edges and to take advantage of newly exposed fallthrough
|
||||
/// opportunities.
|
||||
///
|
||||
void CodePlacementOpt::Splice(MachineFunction &MF,
|
||||
MachineFunction::iterator InsertPt,
|
||||
MachineFunction::iterator Begin,
|
||||
MachineFunction::iterator End) {
|
||||
assert(Begin != MF.begin() && End != MF.begin() && InsertPt != MF.begin() &&
|
||||
"Splice can't change the entry block!");
|
||||
MachineFunction::iterator OldBeginPrior = prior(Begin);
|
||||
MachineFunction::iterator OldEndPrior = prior(End);
|
||||
|
||||
MF.splice(InsertPt, Begin, End);
|
||||
|
||||
UpdateTerminator(prior(Begin));
|
||||
UpdateTerminator(OldBeginPrior);
|
||||
UpdateTerminator(OldEndPrior);
|
||||
}
|
||||
|
||||
/// UpdateTerminator - Update the terminator instructions in MBB to account
|
||||
/// for changes to the layout. If the block previously used a fallthrough,
|
||||
/// it may now need a branch, and if it previously used branching it may now
|
||||
/// be able to use a fallthrough.
|
||||
///
|
||||
void CodePlacementOpt::UpdateTerminator(MachineBasicBlock *MBB) {
|
||||
// A block with no successors has no concerns with fall-through edges.
|
||||
if (MBB->succ_empty()) return;
|
||||
|
||||
MachineBasicBlock *TBB = 0, *FBB = 0;
|
||||
SmallVector<MachineOperand, 4> Cond;
|
||||
bool B = TII->AnalyzeBranch(*MBB, TBB, FBB, Cond);
|
||||
assert(!B && "UpdateTerminators requires analyzable predecessors!");
|
||||
if (Cond.empty()) {
|
||||
if (TBB) {
|
||||
// The block has an unconditional branch. If its successor is now
|
||||
// its layout successor, delete the branch.
|
||||
if (MBB->isLayoutSuccessor(TBB))
|
||||
TII->RemoveBranch(*MBB);
|
||||
} else {
|
||||
assert(MLI->getLoopFor(SuccMBB) == L &&
|
||||
"Successor is not in the same loop!");
|
||||
// The block has an unconditional fallthrough. If its successor is not
|
||||
// its layout successor, insert a branch.
|
||||
TBB = *MBB->succ_begin();
|
||||
if (!MBB->isLayoutSuccessor(TBB))
|
||||
TII->InsertBranch(*MBB, TBB, 0, Cond);
|
||||
}
|
||||
|
||||
if (MBB->isLayoutSuccessor(SuccMBB)) {
|
||||
// Successor is right after MBB, just eliminate the unconditional jmp.
|
||||
// Can this happen?
|
||||
TII->RemoveBranch(*MBB);
|
||||
ChangedMBBs.insert(MBB);
|
||||
++NumIntraElim;
|
||||
Changed = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Now check if the predecessor is fallthrough from any BB. If there is,
|
||||
// that BB should be from outside the loop since edge will become a jmp.
|
||||
bool OkToMove = true;
|
||||
MachineBasicBlock *FtMBB = 0, *FtTBB = 0, *FtFBB = 0;
|
||||
SmallVector<MachineOperand, 4> FtCond;
|
||||
for (MachineBasicBlock::pred_iterator PI = SuccMBB->pred_begin(),
|
||||
PE = SuccMBB->pred_end(); PI != PE; ++PI) {
|
||||
MachineBasicBlock *PredMBB = *PI;
|
||||
if (PredMBB->isLayoutSuccessor(SuccMBB)) {
|
||||
if (TII->AnalyzeBranch(*PredMBB, FtTBB, FtFBB, FtCond)) {
|
||||
OkToMove = false;
|
||||
break;
|
||||
}
|
||||
if (!FtTBB)
|
||||
FtTBB = SuccMBB;
|
||||
else if (!FtFBB) {
|
||||
assert(FtFBB != SuccMBB && "Unexpected control flow!");
|
||||
FtFBB = SuccMBB;
|
||||
}
|
||||
|
||||
// A fallthrough.
|
||||
FtMBB = PredMBB;
|
||||
MachineLoop *PL = MLI->getLoopFor(PredMBB);
|
||||
if (PL && (PL == L || PL->getLoopDepth() >= L->getLoopDepth()))
|
||||
OkToMove = false;
|
||||
|
||||
break;
|
||||
} else {
|
||||
if (FBB) {
|
||||
// The block has a non-fallthrough conditional branch. If one of its
|
||||
// successors is its layout successor, rewrite it to a fallthrough
|
||||
// conditional branch.
|
||||
if (MBB->isLayoutSuccessor(TBB)) {
|
||||
TII->RemoveBranch(*MBB);
|
||||
TII->ReverseBranchCondition(Cond);
|
||||
TII->InsertBranch(*MBB, FBB, 0, Cond);
|
||||
} else if (MBB->isLayoutSuccessor(FBB)) {
|
||||
TII->RemoveBranch(*MBB);
|
||||
TII->InsertBranch(*MBB, TBB, 0, Cond);
|
||||
}
|
||||
} else {
|
||||
// The block has a fallthrough conditional branch.
|
||||
MachineBasicBlock *MBBA = *MBB->succ_begin();
|
||||
MachineBasicBlock *MBBB = *next(MBB->succ_begin());
|
||||
if (MBBA == TBB) std::swap(MBBB, MBBA);
|
||||
if (MBB->isLayoutSuccessor(TBB)) {
|
||||
TII->RemoveBranch(*MBB);
|
||||
TII->ReverseBranchCondition(Cond);
|
||||
TII->InsertBranch(*MBB, MBBA, 0, Cond);
|
||||
} else if (!MBB->isLayoutSuccessor(MBBA)) {
|
||||
TII->RemoveBranch(*MBB);
|
||||
TII->InsertBranch(*MBB, TBB, MBBA, Cond);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!OkToMove)
|
||||
continue;
|
||||
/// OptimizeIntraLoopEdges - Reposition loop blocks to minimize
|
||||
/// intra-loop branching and to form contiguous loops.
|
||||
///
|
||||
bool CodePlacementOpt::OptimizeIntraLoopEdges(MachineFunction &MF) {
|
||||
bool Changed = false;
|
||||
|
||||
// Is it profitable? If SuccMBB can fallthrough itself, that can be changed
|
||||
// into a jmp.
|
||||
MachineBasicBlock *TBB = 0, *FBB = 0;
|
||||
SmallVector<MachineOperand, 4> Cond;
|
||||
if (TII->AnalyzeBranch(*SuccMBB, TBB, FBB, Cond))
|
||||
continue;
|
||||
if (!TBB && Cond.empty())
|
||||
TBB = next(MachineFunction::iterator(SuccMBB));
|
||||
else if (!FBB && !Cond.empty())
|
||||
FBB = next(MachineFunction::iterator(SuccMBB));
|
||||
if (!TLI->shouldOptimizeCodePlacement())
|
||||
return Changed;
|
||||
|
||||
// This calculate the cost of the transformation. Also, it finds the *only*
|
||||
// intra-loop edge if there is one.
|
||||
int Cost = 0;
|
||||
bool HasOneIntraSucc = true;
|
||||
MachineBasicBlock *IntraSucc = 0;
|
||||
for (MachineBasicBlock::succ_iterator SI = SuccMBB->succ_begin(),
|
||||
SE = SuccMBB->succ_end(); SI != SE; ++SI) {
|
||||
MachineBasicBlock *SSMBB = *SI;
|
||||
if (MLI->getLoopFor(SSMBB) == L) {
|
||||
if (!IntraSucc)
|
||||
IntraSucc = SSMBB;
|
||||
else
|
||||
HasOneIntraSucc = false;
|
||||
}
|
||||
for (MachineLoopInfo::iterator I = MLI->begin(), E = MLI->end();
|
||||
I != E; ++I)
|
||||
Changed |= OptimizeIntraLoopEdgesInLoop(MF, *I);
|
||||
|
||||
if (SuccMBB->isLayoutSuccessor(SSMBB))
|
||||
// This will become a jmp.
|
||||
++Cost;
|
||||
else if (MBB->isLayoutSuccessor(SSMBB)) {
|
||||
// One of the successor will become the new fallthrough.
|
||||
if (SSMBB == FBB) {
|
||||
FBB = 0;
|
||||
--Cost;
|
||||
} else if (!FBB && SSMBB == TBB && Cond.empty()) {
|
||||
TBB = 0;
|
||||
--Cost;
|
||||
} else if (!Cond.empty() && !TII->ReverseBranchCondition(Cond)) {
|
||||
assert(SSMBB == TBB);
|
||||
TBB = FBB;
|
||||
FBB = 0;
|
||||
--Cost;
|
||||
}
|
||||
}
|
||||
return Changed;
|
||||
}
|
||||
|
||||
/// OptimizeIntraLoopEdgesInLoop - Reposition loop blocks to minimize
|
||||
/// intra-loop branching and to form contiguous loops.
|
||||
///
|
||||
/// This code takes the approach of making minor changes to the existing
|
||||
/// layout to fix specific loop-oriented problems. Also, it depends on
|
||||
/// AnalyzeBranch, which can't understand complex control instructions.
|
||||
///
|
||||
bool CodePlacementOpt::OptimizeIntraLoopEdgesInLoop(MachineFunction &MF,
|
||||
MachineLoop *L) {
|
||||
bool Changed = false;
|
||||
|
||||
// Do optimization for nested loops.
|
||||
for (MachineLoop::iterator I = L->begin(), E = L->end(); I != E; ++I)
|
||||
Changed |= OptimizeIntraLoopEdgesInLoop(MF, *I);
|
||||
|
||||
// Keep a record of which blocks are in the portion of the loop contiguous
|
||||
// with the loop header.
|
||||
SmallPtrSet<MachineBasicBlock *, 8> ContiguousBlocks;
|
||||
ContiguousBlocks.insert(L->getHeader());
|
||||
|
||||
// Find the loop "top", ignoring any discontiguous parts.
|
||||
MachineBasicBlock *TopMBB = L->getHeader();
|
||||
if (TopMBB != MF.begin()) {
|
||||
MachineBasicBlock *PriorMBB = prior(MachineFunction::iterator(TopMBB));
|
||||
while (L->contains(PriorMBB)) {
|
||||
ContiguousBlocks.insert(PriorMBB);
|
||||
TopMBB = PriorMBB;
|
||||
if (TopMBB == MF.begin()) break;
|
||||
PriorMBB = prior(MachineFunction::iterator(TopMBB));
|
||||
}
|
||||
if (Cost)
|
||||
continue;
|
||||
|
||||
// Now, let's move the successor to below the BB to eliminate the jmp.
|
||||
SuccMBB->moveAfter(MBB);
|
||||
TII->RemoveBranch(*MBB);
|
||||
TII->RemoveBranch(*SuccMBB);
|
||||
if (TBB)
|
||||
TII->InsertBranch(*SuccMBB, TBB, FBB, Cond);
|
||||
ChangedMBBs.insert(MBB);
|
||||
ChangedMBBs.insert(SuccMBB);
|
||||
if (FtMBB) {
|
||||
TII->RemoveBranch(*FtMBB);
|
||||
TII->InsertBranch(*FtMBB, FtTBB, FtFBB, FtCond);
|
||||
ChangedMBBs.insert(FtMBB);
|
||||
}
|
||||
Changed = true;
|
||||
}
|
||||
|
||||
++NumIntraMoved;
|
||||
// Find the loop "bottom", ignoring any discontiguous parts.
|
||||
MachineBasicBlock *BotMBB = L->getHeader();
|
||||
if (BotMBB != prior(MF.end())) {
|
||||
MachineBasicBlock *NextMBB = next(MachineFunction::iterator(BotMBB));
|
||||
while (L->contains(NextMBB)) {
|
||||
ContiguousBlocks.insert(NextMBB);
|
||||
BotMBB = NextMBB;
|
||||
if (BotMBB == next(MachineFunction::iterator(BotMBB))) break;
|
||||
NextMBB = next(MachineFunction::iterator(BotMBB));
|
||||
}
|
||||
}
|
||||
|
||||
// First, move blocks which unconditionally jump to the loop top to the
|
||||
// top of the loop so that they have a fall through. This can introduce a
|
||||
// branch on entry to the loop, but it can eliminate a branch within the
|
||||
// loop. See the @simple case in test/CodeGen/X86/loop_blocks.ll for an
|
||||
// example of this.
|
||||
|
||||
bool BotHasFallthrough = HasFallthrough(BotMBB);
|
||||
|
||||
if (TopMBB == MF.begin() ||
|
||||
HasAnalyzableTerminator(prior(MachineFunction::iterator(TopMBB)))) {
|
||||
new_top:
|
||||
for (MachineBasicBlock::pred_iterator PI = TopMBB->pred_begin(),
|
||||
PE = TopMBB->pred_end(); PI != PE; ++PI) {
|
||||
MachineBasicBlock *Pred = *PI;
|
||||
if (Pred == TopMBB) continue;
|
||||
if (HasFallthrough(Pred)) continue;
|
||||
if (!L->contains(Pred)) continue;
|
||||
|
||||
// Verify that we can analyze all the loop entry edges before beginning
|
||||
// any changes which will require us to be able to analyze them.
|
||||
if (Pred == MF.begin())
|
||||
continue;
|
||||
if (!HasAnalyzableTerminator(Pred))
|
||||
continue;
|
||||
if (!HasAnalyzableTerminator(prior(MachineFunction::iterator(Pred))))
|
||||
continue;
|
||||
|
||||
// Move the block.
|
||||
Changed = true;
|
||||
ContiguousBlocks.insert(Pred);
|
||||
|
||||
// Move it and all the blocks that can reach it via fallthrough edges
|
||||
// exclusively, to keep existing fallthrough-edges intact.
|
||||
MachineFunction::iterator Begin = Pred;
|
||||
MachineFunction::iterator End = next(Begin);
|
||||
while (Begin != MF.begin()) {
|
||||
MachineFunction::iterator Prior = prior(Begin);
|
||||
if (Prior == MF.begin())
|
||||
break;
|
||||
// Stop when a non-fallthrough edge is found.
|
||||
if (!HasFallthrough(Prior))
|
||||
break;
|
||||
// Stop if a block which could fall-through out of the loop is found.
|
||||
if (Prior->isSuccessor(End))
|
||||
break;
|
||||
// If we've reached the top, stop scanning.
|
||||
if (Prior == MachineFunction::iterator(TopMBB)) {
|
||||
// We know top currently has a fall through (because we just checked
|
||||
// it) which would be lost if we do the transformation, so it isn't
|
||||
// worthwhile to do the transformation unless it would expose a new
|
||||
// fallthrough edge.
|
||||
if (!Prior->isSuccessor(End))
|
||||
goto next_pred;
|
||||
// Otherwise we can stop scanning and procede to move the blocks.
|
||||
break;
|
||||
}
|
||||
// If we hit a switch or something complicated, don't move anything
|
||||
// for this predecessor.
|
||||
if (!HasAnalyzableTerminator(prior(MachineFunction::iterator(Prior))))
|
||||
break;
|
||||
Begin = Prior;
|
||||
ContiguousBlocks.insert(Begin);
|
||||
++NumIntraMoved;
|
||||
}
|
||||
|
||||
// Update BotMBB, before moving Begin/End around and forgetting where
|
||||
// the new bottom is.
|
||||
if (BotMBB == prior(End))
|
||||
BotMBB = prior(Begin);
|
||||
|
||||
// Move the blocks.
|
||||
Splice(MF, TopMBB, Begin, End);
|
||||
|
||||
// Update TopMBB, now that all the updates requiring the old top are
|
||||
// complete.
|
||||
TopMBB = Begin;
|
||||
|
||||
// We have a new loop top. Iterate on it. We shouldn't have to do this
|
||||
// too many times if BranchFolding has done a reasonable job.
|
||||
goto new_top;
|
||||
next_pred:;
|
||||
}
|
||||
}
|
||||
|
||||
// If the loop previously didn't exit with a fall-through and it now does,
|
||||
// we eliminated a branch.
|
||||
if (!BotHasFallthrough && HasFallthrough(BotMBB)) {
|
||||
++NumIntraElim;
|
||||
BotHasFallthrough = true;
|
||||
}
|
||||
|
||||
// Next, move any loop blocks that are not in the portion of the loop
|
||||
// contiguous with the header. This makes the loop contiguous, provided that
|
||||
// AnalyzeBranch can handle all the relevant branching. See the @cfg_islands
|
||||
// case in test/CodeGen/X86/loop_blocks.ll for an example of this.
|
||||
|
||||
// Determine a position to move orphaned loop blocks to. If TopMBB is not
|
||||
// entered via fallthrough and BotMBB is exited via fallthrough, prepend them
|
||||
// to the top of the loop to avoid loosing that fallthrough. Otherwise append
|
||||
// them to the bottom, even if it previously had a fallthrough, on the theory
|
||||
// that it's worth an extra branch to keep the loop contiguous.
|
||||
MachineFunction::iterator InsertPt = next(MachineFunction::iterator(BotMBB));
|
||||
bool InsertAtTop = false;
|
||||
if (TopMBB != MF.begin() &&
|
||||
!HasFallthrough(prior(MachineFunction::iterator(TopMBB))) &&
|
||||
HasFallthrough(BotMBB)) {
|
||||
InsertPt = TopMBB;
|
||||
InsertAtTop = true;
|
||||
}
|
||||
|
||||
// Find non-contigous blocks and fix them.
|
||||
if (InsertPt != MF.begin() && HasAnalyzableTerminator(prior(InsertPt)))
|
||||
for (MachineLoop::block_iterator BI = L->block_begin(), BE = L->block_end();
|
||||
BI != BE; ++BI) {
|
||||
MachineBasicBlock *BB = *BI;
|
||||
|
||||
// Verify that we can analyze all the loop entry edges before beginning
|
||||
// any changes which will require us to be able to analyze them.
|
||||
if (!HasAnalyzableTerminator(BB))
|
||||
continue;
|
||||
if (!HasAnalyzableTerminator(prior(MachineFunction::iterator(BB))))
|
||||
continue;
|
||||
|
||||
// If the layout predecessor is part of the loop, this block will be
|
||||
// processed along with it. This keeps them in their relative order.
|
||||
if (BB != MF.begin() &&
|
||||
L->contains(prior(MachineFunction::iterator(BB))))
|
||||
continue;
|
||||
|
||||
// Check to see if this block is already contiguous with the main
|
||||
// portion of the loop.
|
||||
if (!ContiguousBlocks.insert(BB))
|
||||
continue;
|
||||
|
||||
// Move the block.
|
||||
Changed = true;
|
||||
|
||||
// Process this block and all loop blocks contiguous with it, to keep
|
||||
// them in their relative order.
|
||||
MachineFunction::iterator Begin = BB;
|
||||
MachineFunction::iterator End = next(MachineFunction::iterator(BB));
|
||||
for (; End != MF.end(); ++End) {
|
||||
if (!L->contains(End)) break;
|
||||
if (!HasAnalyzableTerminator(End)) break;
|
||||
ContiguousBlocks.insert(End);
|
||||
++NumIntraMoved;
|
||||
}
|
||||
|
||||
// Update BotMBB.
|
||||
if (!InsertAtTop)
|
||||
BotMBB = prior(End);
|
||||
|
||||
// If we're inserting at the bottom of the loop, and the code we're
|
||||
// moving originally had fall-through successors, bring the sucessors
|
||||
// up with the loop blocks to preserve the fall-through edges.
|
||||
if (!InsertAtTop)
|
||||
for (; End != MF.end(); ++End) {
|
||||
if (L->contains(End)) break;
|
||||
if (!HasAnalyzableTerminator(End)) break;
|
||||
if (!HasFallthrough(prior(End))) break;
|
||||
}
|
||||
|
||||
// Move the blocks.
|
||||
Splice(MF, InsertPt, Begin, End);
|
||||
|
||||
// Update TopMBB.
|
||||
if (InsertAtTop)
|
||||
TopMBB = Begin;
|
||||
}
|
||||
|
||||
return Changed;
|
||||
}
|
||||
|
||||
@ -255,6 +451,8 @@ bool CodePlacementOpt::AlignLoops(MachineFunction &MF) {
|
||||
return Changed;
|
||||
}
|
||||
|
||||
/// AlignLoop - Align loop headers to target preferred alignments.
|
||||
///
|
||||
bool CodePlacementOpt::AlignLoop(MachineFunction &MF, MachineLoop *L,
|
||||
unsigned Align) {
|
||||
bool Changed = false;
|
||||
@ -264,13 +462,13 @@ bool CodePlacementOpt::AlignLoop(MachineFunction &MF, MachineLoop *L,
|
||||
Changed |= AlignLoop(MF, *I, Align);
|
||||
|
||||
MachineBasicBlock *TopMBB = L->getHeader();
|
||||
if (TopMBB == MF.begin()) return Changed;
|
||||
|
||||
MachineBasicBlock *PredMBB = prior(MachineFunction::iterator(TopMBB));
|
||||
while (MLI->getLoopFor(PredMBB) == L) {
|
||||
TopMBB = PredMBB;
|
||||
if (TopMBB == MF.begin()) return Changed;
|
||||
PredMBB = prior(MachineFunction::iterator(TopMBB));
|
||||
if (TopMBB != MF.begin()) {
|
||||
MachineBasicBlock *PredMBB = prior(MachineFunction::iterator(TopMBB));
|
||||
while (L->contains(PredMBB)) {
|
||||
TopMBB = PredMBB;
|
||||
if (TopMBB == MF.begin()) break;
|
||||
PredMBB = prior(MachineFunction::iterator(TopMBB));
|
||||
}
|
||||
}
|
||||
|
||||
TopMBB->setAlignment(Align);
|
||||
@ -288,30 +486,9 @@ bool CodePlacementOpt::runOnMachineFunction(MachineFunction &MF) {
|
||||
TLI = MF.getTarget().getTargetLowering();
|
||||
TII = MF.getTarget().getInstrInfo();
|
||||
|
||||
// Analyze the BBs first and keep track of BBs that
|
||||
// end with an unconditional jmp to another block in the same loop.
|
||||
for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
|
||||
MachineBasicBlock *MBB = I;
|
||||
if (MBB->isLandingPad())
|
||||
continue;
|
||||
MachineLoop *L = MLI->getLoopFor(MBB);
|
||||
if (!L)
|
||||
continue;
|
||||
|
||||
MachineBasicBlock *TBB = 0, *FBB = 0;
|
||||
SmallVector<MachineOperand, 4> Cond;
|
||||
if (TII->AnalyzeBranch(*MBB, TBB, FBB, Cond) || !Cond.empty())
|
||||
continue;
|
||||
if (MLI->getLoopFor(TBB) == L && !TBB->isLandingPad())
|
||||
UncondJmpMBBs.push_back(std::make_pair(MBB, TBB));
|
||||
}
|
||||
|
||||
bool Changed = OptimizeIntraLoopEdges();
|
||||
bool Changed = OptimizeIntraLoopEdges(MF);
|
||||
|
||||
Changed |= AlignLoops(MF);
|
||||
|
||||
ChangedMBBs.clear();
|
||||
UncondJmpMBBs.clear();
|
||||
|
||||
return Changed;
|
||||
}
|
||||
|
@ -1,4 +1,4 @@
|
||||
; RUN: llc < %s -mtriple=x86_64-apple-darwin10.0 -relocation-model=pic -disable-fp-elim -stats |& grep asm-printer | grep 84
|
||||
; RUN: llc < %s -mtriple=x86_64-apple-darwin10.0 -relocation-model=pic -disable-fp-elim -stats |& grep asm-printer | grep 83
|
||||
; rdar://6802189
|
||||
|
||||
; Test if linearscan is unfavoring registers for allocation to allow more reuse
|
||||
|
207
test/CodeGen/X86/loop_blocks.ll
Normal file
207
test/CodeGen/X86/loop_blocks.ll
Normal file
@ -0,0 +1,207 @@
|
||||
; RUN: llc < %s -march=x86-64 -mtriple=x86_64-unknown-linux-gnu -asm-verbose=false | FileCheck %s
|
||||
|
||||
; These tests check for loop branching structure, and that the loop align
|
||||
; directive is placed in the expected place.
|
||||
|
||||
; CodeGen should insert a branch into the middle of the loop in
|
||||
; order to avoid a branch within the loop.
|
||||
|
||||
; CHECK: simple:
|
||||
; CHECK: jmp .LBB1_1
|
||||
; CHECK-NEXT: align
|
||||
; CHECK-NEXT: .LBB1_2:
|
||||
; CHECK-NEXT: call loop_latch
|
||||
; CHECK-NEXT: .LBB1_1:
|
||||
; CHECK-NEXT: call loop_header
|
||||
|
||||
define void @simple() nounwind {
|
||||
entry:
|
||||
br label %loop
|
||||
|
||||
loop:
|
||||
call void @loop_header()
|
||||
%t0 = tail call i32 @get()
|
||||
%t1 = icmp slt i32 %t0, 0
|
||||
br i1 %t1, label %done, label %bb
|
||||
|
||||
bb:
|
||||
call void @loop_latch()
|
||||
br label %loop
|
||||
|
||||
done:
|
||||
call void @exit()
|
||||
ret void
|
||||
}
|
||||
|
||||
; CodeGen should move block_a to the top of the loop so that it
|
||||
; falls through into the loop, avoiding a branch within the loop.
|
||||
|
||||
; CHECK: slightly_more_involved:
|
||||
; CHECK: jmp .LBB2_1
|
||||
; CHECK-NEXT: align
|
||||
; CHECK-NEXT: .LBB2_4:
|
||||
; CHECK-NEXT: call bar99
|
||||
; CHECK-NEXT: .LBB2_1:
|
||||
; CHECK-NEXT: call body
|
||||
|
||||
define void @slightly_more_involved() nounwind {
|
||||
entry:
|
||||
br label %loop
|
||||
|
||||
loop:
|
||||
call void @body()
|
||||
%t0 = call i32 @get()
|
||||
%t1 = icmp slt i32 %t0, 2
|
||||
br i1 %t1, label %block_a, label %bb
|
||||
|
||||
bb:
|
||||
%t2 = call i32 @get()
|
||||
%t3 = icmp slt i32 %t2, 99
|
||||
br i1 %t3, label %exit, label %loop
|
||||
|
||||
block_a:
|
||||
call void @bar99()
|
||||
br label %loop
|
||||
|
||||
exit:
|
||||
call void @exit()
|
||||
ret void
|
||||
}
|
||||
|
||||
; Same as slightly_more_involved, but block_a is now a CFG diamond with
|
||||
; fallthrough edges which should be preserved.
|
||||
|
||||
; CHECK: yet_more_involved:
|
||||
; CHECK: jmp .LBB3_1
|
||||
; CHECK-NEXT: align
|
||||
; CHECK-NEXT: .LBB3_7:
|
||||
; CHECK-NEXT: call block_a_true_func
|
||||
; CHECK-NEXT: jmp .LBB3_4
|
||||
; CHECK-NEXT: .LBB3_2:
|
||||
; CHECK-NEXT: call bar99
|
||||
; CHECK-NEXT: call get
|
||||
; CHECK-NEXT: cmpl $2999, %eax
|
||||
; CHECK-NEXT: jle .LBB3_7
|
||||
; CHECK-NEXT: call block_a_false_func
|
||||
; CHECK-NEXT: .LBB3_4:
|
||||
; CHECK-NEXT: call block_a_merge_func
|
||||
; CHECK-NEXT: .LBB3_1:
|
||||
; CHECK-NEXT: call body
|
||||
|
||||
define void @yet_more_involved() nounwind {
|
||||
entry:
|
||||
br label %loop
|
||||
|
||||
loop:
|
||||
call void @body()
|
||||
%t0 = call i32 @get()
|
||||
%t1 = icmp slt i32 %t0, 2
|
||||
br i1 %t1, label %block_a, label %bb
|
||||
|
||||
bb:
|
||||
%t2 = call i32 @get()
|
||||
%t3 = icmp slt i32 %t2, 99
|
||||
br i1 %t3, label %exit, label %loop
|
||||
|
||||
block_a:
|
||||
call void @bar99()
|
||||
%z0 = call i32 @get()
|
||||
%z1 = icmp slt i32 %z0, 3000
|
||||
br i1 %z1, label %block_a_true, label %block_a_false
|
||||
|
||||
block_a_true:
|
||||
call void @block_a_true_func()
|
||||
br label %block_a_merge
|
||||
|
||||
block_a_false:
|
||||
call void @block_a_false_func()
|
||||
br label %block_a_merge
|
||||
|
||||
block_a_merge:
|
||||
call void @block_a_merge_func()
|
||||
br label %loop
|
||||
|
||||
exit:
|
||||
call void @exit()
|
||||
ret void
|
||||
}
|
||||
|
||||
; CodeGen should move the CFG islands that are part of the loop but don't
|
||||
; conveniently fit anywhere so that they are at least contiguous with the
|
||||
; loop.
|
||||
|
||||
; CHECK: cfg_islands:
|
||||
; CHECK: jmp .LBB4_1
|
||||
; CHECK-NEXT: align
|
||||
; CHECK-NEXT: .LBB4_7:
|
||||
; CHECK-NEXT: call bar100
|
||||
; CHECK-NEXT: jmp .LBB4_1
|
||||
; CHECK-NEXT: .LBB4_8:
|
||||
; CHECK-NEXT: call bar101
|
||||
; CHECK-NEXT: jmp .LBB4_1
|
||||
; CHECK-NEXT: .LBB4_9:
|
||||
; CHECK-NEXT: call bar102
|
||||
; CHECK-NEXT: jmp .LBB4_1
|
||||
; CHECK-NEXT: .LBB4_5:
|
||||
; CHECK-NEXT: call loop_latch
|
||||
; CHECK-NEXT: .LBB4_1:
|
||||
; CHECK-NEXT: call loop_header
|
||||
|
||||
define void @cfg_islands() nounwind {
|
||||
entry:
|
||||
br label %loop
|
||||
|
||||
loop:
|
||||
call void @loop_header()
|
||||
%t0 = call i32 @get()
|
||||
%t1 = icmp slt i32 %t0, 100
|
||||
br i1 %t1, label %block100, label %bb
|
||||
|
||||
bb:
|
||||
%t2 = call i32 @get()
|
||||
%t3 = icmp slt i32 %t2, 101
|
||||
br i1 %t3, label %block101, label %bb1
|
||||
|
||||
bb1:
|
||||
%t4 = call i32 @get()
|
||||
%t5 = icmp slt i32 %t4, 102
|
||||
br i1 %t5, label %block102, label %bb2
|
||||
|
||||
bb2:
|
||||
%t6 = call i32 @get()
|
||||
%t7 = icmp slt i32 %t6, 103
|
||||
br i1 %t7, label %exit, label %bb3
|
||||
|
||||
bb3:
|
||||
call void @loop_latch()
|
||||
br label %loop
|
||||
|
||||
exit:
|
||||
call void @exit()
|
||||
ret void
|
||||
|
||||
block100:
|
||||
call void @bar100()
|
||||
br label %loop
|
||||
|
||||
block101:
|
||||
call void @bar101()
|
||||
br label %loop
|
||||
|
||||
block102:
|
||||
call void @bar102()
|
||||
br label %loop
|
||||
}
|
||||
|
||||
declare void @bar99() nounwind
|
||||
declare void @bar100() nounwind
|
||||
declare void @bar101() nounwind
|
||||
declare void @bar102() nounwind
|
||||
declare void @body() nounwind
|
||||
declare void @exit() nounwind
|
||||
declare void @loop_header() nounwind
|
||||
declare void @loop_latch() nounwind
|
||||
declare i32 @get() nounwind
|
||||
declare void @block_a_true_func() nounwind
|
||||
declare void @block_a_false_func() nounwind
|
||||
declare void @block_a_merge_func() nounwind
|
Loading…
x
Reference in New Issue
Block a user