Enable the new LoopInfo algorithm by default.

The primary advantage is that loop optimizations will be applied in a
stable order. This helps debugging and unit test creation. It is also
a better overall implementation without pathologically bad performance
on deep functions.

On large functions (llvm-stress --size=200000 | opt -loops)
Before: 0.1263s
After:  0.0225s

On deep functions (after tweaking llvm-stress, thanks Nadav):
Before: 0.2281s
After:  0.0227s

See r158790 for more comments.

The loop tree is now consistently generated in forward order, but loop
passes are applied in reverse order over the program. If we have a
loop optimization that prefers forward order, that can easily be
achieved by adding a different type of LoopPassManager.

llvm-svn: 159183
This commit is contained in:
Andrew Trick 2012-06-26 04:11:38 +00:00
parent ed67e8a44a
commit c5e08120a4
10 changed files with 52 additions and 244 deletions

View File

@ -127,8 +127,12 @@ public:
const std::vector<LoopT *> &getSubLoops() const { return SubLoops; }
std::vector<LoopT *> &getSubLoopsVector() { return SubLoops; }
typedef typename std::vector<LoopT *>::const_iterator iterator;
typedef typename std::vector<LoopT *>::const_reverse_iterator
reverse_iterator;
iterator begin() const { return SubLoops.begin(); }
iterator end() const { return SubLoops.end(); }
reverse_iterator rbegin() const { return SubLoops.rbegin(); }
reverse_iterator rend() const { return SubLoops.rend(); }
bool empty() const { return SubLoops.empty(); }
/// getBlocks - Get a list of the basic blocks which make up this loop.
@ -431,8 +435,12 @@ public:
/// function.
///
typedef typename std::vector<LoopT *>::const_iterator iterator;
typedef typename std::vector<LoopT *>::const_reverse_iterator
reverse_iterator;
iterator begin() const { return TopLevelLoops.begin(); }
iterator end() const { return TopLevelLoops.end(); }
reverse_iterator rbegin() const { return TopLevelLoops.rbegin(); }
reverse_iterator rend() const { return TopLevelLoops.rend(); }
bool empty() const { return TopLevelLoops.empty(); }
/// getLoopFor - Return the inner most loop that BB lives in. If a basic
@ -525,19 +533,6 @@ public:
return isNotAlreadyContainedIn(SubLoop->getParentLoop(), ParentLoop);
}
void Calculate(DominatorTreeBase<BlockT> &DT);
LoopT *ConsiderForLoop(BlockT *BB, DominatorTreeBase<BlockT> &DT);
/// MoveSiblingLoopInto - This method moves the NewChild loop to live inside
/// of the NewParent Loop, instead of being a sibling of it.
void MoveSiblingLoopInto(LoopT *NewChild, LoopT *NewParent);
/// InsertLoopInto - This inserts loop L into the specified parent loop. If
/// the parent loop contains a loop which should contain L, the loop gets
/// inserted into L instead.
void InsertLoopInto(LoopT *L, LoopT *Parent);
/// Create the loop forest using a stable algorithm.
void Analyze(DominatorTreeBase<BlockT> &DomTree);
@ -570,8 +565,11 @@ public:
/// function.
///
typedef LoopInfoBase<BasicBlock, Loop>::iterator iterator;
typedef LoopInfoBase<BasicBlock, Loop>::reverse_iterator reverse_iterator;
inline iterator begin() const { return LI.begin(); }
inline iterator end() const { return LI.end(); }
inline reverse_iterator rbegin() const { return LI.rbegin(); }
inline reverse_iterator rend() const { return LI.rend(); }
bool empty() const { return LI.empty(); }
/// getLoopFor - Return the inner most loop that BB lives in. If a basic

View File

@ -353,185 +353,6 @@ void LoopBase<BlockT, LoopT>::print(raw_ostream &OS, unsigned Depth) const {
(*I)->print(OS, Depth+2);
}
//===----------------------------------------------------------------------===//
/// LoopInfo - This class builds and contains all of the top level loop
/// structures in the specified function.
///
template<class BlockT, class LoopT>
void LoopInfoBase<BlockT, LoopT>::Calculate(DominatorTreeBase<BlockT> &DT) {
BlockT *RootNode = DT.getRootNode()->getBlock();
for (df_iterator<BlockT*> NI = df_begin(RootNode),
NE = df_end(RootNode); NI != NE; ++NI)
if (LoopT *L = ConsiderForLoop(*NI, DT))
TopLevelLoops.push_back(L);
}
template<class BlockT, class LoopT>
LoopT *LoopInfoBase<BlockT, LoopT>::
ConsiderForLoop(BlockT *BB, DominatorTreeBase<BlockT> &DT) {
if (BBMap.count(BB)) return 0; // Haven't processed this node?
std::vector<BlockT *> TodoStack;
// Scan the predecessors of BB, checking to see if BB dominates any of
// them. This identifies backedges which target this node...
typedef GraphTraits<Inverse<BlockT*> > InvBlockTraits;
for (typename InvBlockTraits::ChildIteratorType I =
InvBlockTraits::child_begin(BB), E = InvBlockTraits::child_end(BB);
I != E; ++I) {
typename InvBlockTraits::NodeType *N = *I;
// If BB dominates its predecessor...
if (DT.dominates(BB, N) && DT.isReachableFromEntry(N))
TodoStack.push_back(N);
}
if (TodoStack.empty()) return 0; // No backedges to this block...
// Create a new loop to represent this basic block...
LoopT *L = new LoopT(BB);
BBMap[BB] = L;
while (!TodoStack.empty()) { // Process all the nodes in the loop
BlockT *X = TodoStack.back();
TodoStack.pop_back();
if (!L->contains(X) && // As of yet unprocessed??
DT.isReachableFromEntry(X)) {
// Check to see if this block already belongs to a loop. If this occurs
// then we have a case where a loop that is supposed to be a child of
// the current loop was processed before the current loop. When this
// occurs, this child loop gets added to a part of the current loop,
// making it a sibling to the current loop. We have to reparent this
// loop.
if (LoopT *SubLoop =
const_cast<LoopT *>(getLoopFor(X)))
if (SubLoop->getHeader() == X && isNotAlreadyContainedIn(SubLoop, L)){
// Remove the subloop from its current parent...
assert(SubLoop->ParentLoop && SubLoop->ParentLoop != L);
LoopT *SLP = SubLoop->ParentLoop; // SubLoopParent
typename std::vector<LoopT *>::iterator I =
std::find(SLP->SubLoops.begin(), SLP->SubLoops.end(), SubLoop);
assert(I != SLP->SubLoops.end() &&"SubLoop not a child of parent?");
SLP->SubLoops.erase(I); // Remove from parent...
// Add the subloop to THIS loop...
SubLoop->ParentLoop = L;
L->SubLoops.push_back(SubLoop);
}
// Normal case, add the block to our loop...
L->Blocks.push_back(X);
typedef GraphTraits<Inverse<BlockT*> > InvBlockTraits;
// Add all of the predecessors of X to the end of the work stack...
TodoStack.insert(TodoStack.end(), InvBlockTraits::child_begin(X),
InvBlockTraits::child_end(X));
}
}
// If there are any loops nested within this loop, create them now!
for (typename std::vector<BlockT*>::iterator I = L->Blocks.begin(),
E = L->Blocks.end(); I != E; ++I)
if (LoopT *NewLoop = ConsiderForLoop(*I, DT)) {
L->SubLoops.push_back(NewLoop);
NewLoop->ParentLoop = L;
}
// Add the basic blocks that comprise this loop to the BBMap so that this
// loop can be found for them.
//
for (typename std::vector<BlockT*>::iterator I = L->Blocks.begin(),
E = L->Blocks.end(); I != E; ++I)
BBMap.insert(std::make_pair(*I, L));
// Now that we have a list of all of the child loops of this loop, check to
// see if any of them should actually be nested inside of each other. We
// can accidentally pull loops our of their parents, so we must make sure to
// organize the loop nests correctly now.
{
std::map<BlockT *, LoopT *> ContainingLoops;
for (unsigned i = 0; i != L->SubLoops.size(); ++i) {
LoopT *Child = L->SubLoops[i];
assert(Child->getParentLoop() == L && "Not proper child loop?");
if (LoopT *ContainingLoop = ContainingLoops[Child->getHeader()]) {
// If there is already a loop which contains this loop, move this loop
// into the containing loop.
MoveSiblingLoopInto(Child, ContainingLoop);
--i; // The loop got removed from the SubLoops list.
} else {
// This is currently considered to be a top-level loop. Check to see
// if any of the contained blocks are loop headers for subloops we
// have already processed.
for (unsigned b = 0, e = Child->Blocks.size(); b != e; ++b) {
LoopT *&BlockLoop = ContainingLoops[Child->Blocks[b]];
if (BlockLoop == 0) { // Child block not processed yet...
BlockLoop = Child;
} else if (BlockLoop != Child) {
LoopT *SubLoop = BlockLoop;
// Reparent all of the blocks which used to belong to BlockLoops
for (unsigned j = 0, f = SubLoop->Blocks.size(); j != f; ++j)
ContainingLoops[SubLoop->Blocks[j]] = Child;
// There is already a loop which contains this block, that means
// that we should reparent the loop which the block is currently
// considered to belong to to be a child of this loop.
MoveSiblingLoopInto(SubLoop, Child);
--i; // We just shrunk the SubLoops list.
}
}
}
}
}
return L;
}
/// MoveSiblingLoopInto - This method moves the NewChild loop to live inside
/// of the NewParent Loop, instead of being a sibling of it.
template<class BlockT, class LoopT>
void LoopInfoBase<BlockT, LoopT>::
MoveSiblingLoopInto(LoopT *NewChild, LoopT *NewParent) {
LoopT *OldParent = NewChild->getParentLoop();
assert(OldParent && OldParent == NewParent->getParentLoop() &&
NewChild != NewParent && "Not sibling loops!");
// Remove NewChild from being a child of OldParent
typename std::vector<LoopT *>::iterator I =
std::find(OldParent->SubLoops.begin(), OldParent->SubLoops.end(),
NewChild);
assert(I != OldParent->SubLoops.end() && "Parent fields incorrect??");
OldParent->SubLoops.erase(I); // Remove from parent's subloops list
NewChild->ParentLoop = 0;
InsertLoopInto(NewChild, NewParent);
}
/// InsertLoopInto - This inserts loop L into the specified parent loop. If
/// the parent loop contains a loop which should contain L, the loop gets
/// inserted into L instead.
template<class BlockT, class LoopT>
void LoopInfoBase<BlockT, LoopT>::InsertLoopInto(LoopT *L, LoopT *Parent) {
BlockT *LHeader = L->getHeader();
assert(Parent->contains(LHeader) &&
"This loop should not be inserted here!");
// Check to see if it belongs in a child loop...
for (unsigned i = 0, e = static_cast<unsigned>(Parent->SubLoops.size());
i != e; ++i)
if (Parent->SubLoops[i]->contains(LHeader)) {
InsertLoopInto(L, Parent->SubLoops[i]);
return;
}
// If not, insert it here!
Parent->SubLoops.push_back(L);
L->ParentLoop = Parent;
}
//===----------------------------------------------------------------------===//
/// Stable LoopInfo Analysis - Build a loop tree using stable iterators so the
/// result does / not depend on use list (block predecessor) order.

View File

@ -44,10 +44,6 @@ static cl::opt<bool,true>
VerifyLoopInfoX("verify-loop-info", cl::location(VerifyLoopInfo),
cl::desc("Verify loop info (time consuming)"));
static cl::opt<bool>
StableLoopInfo("stable-loops", cl::Hidden, cl::init(false),
cl::desc("Compute a stable loop tree."));
char LoopInfo::ID = 0;
INITIALIZE_PASS_BEGIN(LoopInfo, "loops", "Natural Loop Information", true, true)
INITIALIZE_PASS_DEPENDENCY(DominatorTree)
@ -516,10 +512,7 @@ Loop *UnloopUpdater::getNearestLoop(BasicBlock *BB, Loop *BBLoop) {
//
bool LoopInfo::runOnFunction(Function &) {
releaseMemory();
if (StableLoopInfo)
LI.Analyze(getAnalysis<DominatorTree>().getBase());
else
LI.Calculate(getAnalysis<DominatorTree>().getBase()); // Update
LI.Analyze(getAnalysis<DominatorTree>().getBase());
return false;
}

View File

@ -162,7 +162,7 @@ void LPPassManager::deleteSimpleAnalysisValue(Value *V, Loop *L) {
// Recurse through all subloops and all loops into LQ.
static void addLoopIntoQueue(Loop *L, std::deque<Loop *> &LQ) {
LQ.push_back(L);
for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I)
for (Loop::reverse_iterator I = L->rbegin(), E = L->rend(); I != E; ++I)
addLoopIntoQueue(*I, LQ);
}
@ -183,8 +183,12 @@ bool LPPassManager::runOnFunction(Function &F) {
// Collect inherited analysis from Module level pass manager.
populateInheritedAnalysis(TPM->activeStack);
// Populate Loop Queue
for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I)
// Populate the loop queue in reverse program order. There is no clear need to
// process sibling loops in either forward or reverse order. There may be some
// advantage in deleting uses in a later loop before optimizing the
// definitions in an earlier loop. If we find a clear reason to process in
// forward order, then a forward variant of LoopPassManager should be created.
for (LoopInfo::reverse_iterator I = LI->rbegin(), E = LI->rend(); I != E; ++I)
addLoopIntoQueue(*I, LQ);
if (LQ.empty()) // No loops, skip calling finalizers

View File

@ -18,7 +18,6 @@
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/Analysis/LoopInfoImpl.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
using namespace llvm;
@ -26,10 +25,6 @@ using namespace llvm;
template class llvm::LoopBase<MachineBasicBlock, MachineLoop>;
template class llvm::LoopInfoBase<MachineBasicBlock, MachineLoop>;
static cl::opt<bool>
StableLoopInfo("stable-machine-loops", cl::Hidden, cl::init(false),
cl::desc("Compute a stable loop tree."));
char MachineLoopInfo::ID = 0;
INITIALIZE_PASS_BEGIN(MachineLoopInfo, "machine-loops",
"Machine Natural Loop Construction", true, true)
@ -41,10 +36,7 @@ char &llvm::MachineLoopInfoID = MachineLoopInfo::ID;
bool MachineLoopInfo::runOnMachineFunction(MachineFunction &) {
releaseMemory();
if (StableLoopInfo)
LI.Analyze(getAnalysis<MachineDominatorTree>().getBase());
else
LI.Calculate(getAnalysis<MachineDominatorTree>().getBase()); // Update
LI.Analyze(getAnalysis<MachineDominatorTree>().getBase());
return false;
}

View File

@ -1,9 +1,9 @@
; RUN: llc -asm-verbose=false -disable-branch-fold -disable-code-place -disable-tail-duplicate -march=x86-64 -mcpu=nehalem < %s | FileCheck %s
; rdar://7236213
; Xfailed now that scheduler 2-address hack is disabled a lea is generated.
; The code isn't any worse though.
; XFAIL: *
;
; The scheduler's 2-address hack has been disabled, so there is
; currently no good guarantee that this test will pass until the
; machine scheduler develops an equivalent heuristic.
; CodeGen shouldn't require any lea instructions inside the marked loop.
; It should properly set up post-increment uses and do coalescing for

View File

@ -2,9 +2,9 @@
; CHECK: exit1:
; CHECK: .lcssa =
; CHECK: exit2:
; CHECK: .lcssa2 =
; CHECK: .lcssa1 =
; CHECK: exit3:
; CHECK-NOT: .lcssa1 =
; CHECK-NOT: .lcssa
; Test to ensure that when there are multiple exit blocks, PHI nodes are
; only inserted by LCSSA when there is a use dominated by a given exit

View File

@ -35,11 +35,11 @@
; CHECK: loop_begin.us1: ; preds = %loop_begin.backedge.us5, %.split.split.us
; CHECK-NEXT: %var_val.us2 = load i32* %var
; CHECK-NEXT: switch i32 2, label %default.us-lcssa.us-lcssa.us [
; CHECK-NEXT: i32 1, label %inc.us3
; CHECK-NEXT: i32 2, label %dec.us4
; CHECK-NEXT: i32 1, label %inc.us4
; CHECK-NEXT: i32 2, label %dec.us3
; CHECK-NEXT: ]
; CHECK: dec.us4: ; preds = %loop_begin.us1
; CHECK: dec.us3: ; preds = %loop_begin.us1
; CHECK-NEXT: call void @decf() noreturn nounwind
; CHECK-NEXT: br label %loop_begin.backedge.us5
@ -81,7 +81,7 @@ inc:
dec:
call void @decf() noreturn nounwind
br label %loop_begin
default:
default:
br label %loop_exit
loop_exit:
ret i32 0

View File

@ -19,15 +19,15 @@
; CHECK: switch i32 1, label %second_switch.us [
; CHECK-NEXT: i32 1, label %inc.us
; CHECK: inc.us: ; preds = %second_switch.us, %loop_begin.us
; CHECK-NEXT: call void @incf() noreturn nounwind
; CHECK-NEXT: br label %loop_begin.backedge.us
; CHECK: second_switch.us: ; preds = %loop_begin.us
; CHECK-NEXT: switch i32 %d, label %default.us [
; CHECK-NEXT: i32 1, label %inc.us
; CHECK-NEXT: ]
; CHECK: inc.us: ; preds = %second_switch.us, %loop_begin.us
; CHECK-NEXT: call void @incf() noreturn nounwind
; CHECK-NEXT: br label %loop_begin.backedge.us
; CHECK: .split: ; preds = %..split_crit_edge
; CHECK-NEXT: br label %loop_begin
@ -73,7 +73,7 @@ inc:
call void @incf() noreturn nounwind
br label %loop_begin
default:
default:
br label %loop_begin
loop_exit:

View File

@ -25,14 +25,14 @@
; CHECK-NEXT: switch i32 1, label %second_switch.us.us [
; CHECK-NEXT: i32 1, label %inc.us.us
; CHECK: inc.us.us: ; preds = %second_switch.us.us, %loop_begin.us.us
; CHECK-NEXT: call void @incf() noreturn nounwind
; CHECK-NEXT: br label %loop_begin.backedge.us.us
; CHECK: second_switch.us.us: ; preds = %loop_begin.us.us
; CHECK-NEXT: switch i32 1, label %default.us.us [
; CHECK-NEXT: i32 1, label %inc.us.us
; CHECK: inc.us.us: ; preds = %second_switch.us.us, %loop_begin.us.us
; CHECK-NEXT: call void @incf() noreturn nounwind
; CHECK-NEXT: br label %loop_begin.backedge.us.us
; CHECK: .split.us.split: ; preds = %.split.us..split.us.split_crit_edge
; CHECK-NEXT: br label %loop_begin.us
@ -41,10 +41,6 @@
; CHECK-NEXT: switch i32 1, label %second_switch.us [
; CHECK-NEXT: i32 1, label %inc.us
; CHECK: inc.us: ; preds = %second_switch.us.inc.us_crit_edge, %loop_begin.us
; CHECK-NEXT: call void @incf() noreturn nounwind
; CHECK-NEXT: br label %loop_begin.backedge.us
; CHECK: second_switch.us: ; preds = %loop_begin.us
; CHECK-NEXT: switch i32 %d, label %default.us [
; CHECK-NEXT: i32 1, label %second_switch.us.inc.us_crit_edge
@ -53,6 +49,10 @@
; CHECK: second_switch.us.inc.us_crit_edge: ; preds = %second_switch.us
; CHECK-NEXT: br i1 true, label %us-unreachable8, label %inc.us
; CHECK: inc.us: ; preds = %second_switch.us.inc.us_crit_edge, %loop_begin.us
; CHECK-NEXT: call void @incf() noreturn nounwind
; CHECK-NEXT: br label %loop_begin.backedge.us
; CHECK: .split: ; preds = %..split_crit_edge
; CHECK-NEXT: %3 = icmp eq i32 %d, 1
; CHECK-NEXT: br i1 %3, label %.split.split.us, label %.split..split.split_crit_edge
@ -65,21 +65,21 @@
; CHECK: loop_begin.us1: ; preds = %loop_begin.backedge.us6, %.split.split.us
; CHECK-NEXT: %var_val.us2 = load i32* %var
; CHECK-NEXT: switch i32 %c, label %second_switch.us4 [
; CHECK-NEXT: switch i32 %c, label %second_switch.us3 [
; CHECK-NEXT: i32 1, label %loop_begin.inc_crit_edge.us
; CHECK-NEXT: ]
; CHECK: inc.us3: ; preds = %loop_begin.inc_crit_edge.us, %second_switch.us4
; CHECK: second_switch.us3: ; preds = %loop_begin.us1
; CHECK-NEXT: switch i32 1, label %default.us5 [
; CHECK-NEXT: i32 1, label %inc.us4
; CHECK-NEXT: ]
; CHECK: inc.us4: ; preds = %loop_begin.inc_crit_edge.us, %second_switch.us3
; CHECK-NEXT: call void @incf() noreturn nounwind
; CHECK-NEXT: br label %loop_begin.backedge.us6
; CHECK: second_switch.us4: ; preds = %loop_begin.us1
; CHECK-NEXT: switch i32 1, label %default.us5 [
; CHECK-NEXT: i32 1, label %inc.us3
; CHECK-NEXT: ]
; CHECK: loop_begin.inc_crit_edge.us: ; preds = %loop_begin.us1
; CHECK-NEXT: br i1 true, label %us-unreachable.us-lcssa.us, label %inc.us3
; CHECK-NEXT: br i1 true, label %us-unreachable.us-lcssa.us, label %inc.us4
; CHECK: .split.split: ; preds = %.split..split.split_crit_edge
; CHECK-NEXT: br label %loop_begin
@ -127,7 +127,7 @@ inc:
call void @incf() noreturn nounwind
br label %loop_begin
default:
default:
br label %loop_begin
loop_exit: