llvm/lib/Target/R600/AMDGPUStructurizeCFG.cpp
Chandler Carruth 0b8c9a80f2 Move all of the header files which are involved in modelling the LLVM IR
into their new header subdirectory: include/llvm/IR. This matches the
directory structure of lib, and begins to correct a long standing point
of file layout clutter in LLVM.

There are still more header files to move here, but I wanted to handle
them in separate commits to make tracking what files make sense at each
layer easier.

The only really questionable files here are the target intrinsic
tablegen files. But that's a battle I'd rather not fight today.

I've updated both CMake and Makefile build systems (I think, and my
tests think, but I may have missed something).

I've also re-sorted the includes throughout the project. I'll be
committing updates to Clang, DragonEgg, and Polly momentarily.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171366 91177308-0d34-0410-b5e6-96231b3b80d8
2013-01-02 11:36:10 +00:00

715 lines
21 KiB
C++

//===-- AMDGPUStructurizeCFG.cpp - ------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
/// \file
/// The pass implemented in this file transforms the programs control flow
/// graph into a form that's suitable for code generation on hardware that
/// implements control flow by execution masking. This currently includes all
/// AMD GPUs but may as well be useful for other types of hardware.
//
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
#include "llvm/ADT/SCCIterator.h"
#include "llvm/Analysis/RegionInfo.h"
#include "llvm/Analysis/RegionIterator.h"
#include "llvm/Analysis/RegionPass.h"
#include "llvm/IR/Module.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
using namespace llvm;
namespace {
// Definition of the complex types used in this pass.
typedef std::pair<BasicBlock *, Value *> BBValuePair;
typedef ArrayRef<BasicBlock*> BBVecRef;
typedef SmallVector<RegionNode*, 8> RNVector;
typedef SmallVector<BasicBlock*, 8> BBVector;
typedef SmallVector<BBValuePair, 2> BBValueVector;
typedef DenseMap<PHINode *, BBValueVector> PhiMap;
typedef DenseMap<BasicBlock *, PhiMap> BBPhiMap;
typedef DenseMap<BasicBlock *, Value *> BBPredicates;
typedef DenseMap<BasicBlock *, BBPredicates> PredMap;
typedef DenseMap<BasicBlock *, unsigned> VisitedMap;
// The name for newly created blocks.
static const char *FlowBlockName = "Flow";
/// @brief Transforms the control flow graph on one single entry/exit region
/// at a time.
///
/// After the transform all "If"/"Then"/"Else" style control flow looks like
/// this:
///
/// \verbatim
/// 1
/// ||
/// | |
/// 2 |
/// | /
/// |/
/// 3
/// || Where:
/// | | 1 = "If" block, calculates the condition
/// 4 | 2 = "Then" subregion, runs if the condition is true
/// | / 3 = "Flow" blocks, newly inserted flow blocks, rejoins the flow
/// |/ 4 = "Else" optional subregion, runs if the condition is false
/// 5 5 = "End" block, also rejoins the control flow
/// \endverbatim
///
/// Control flow is expressed as a branch where the true exit goes into the
/// "Then"/"Else" region, while the false exit skips the region
/// The condition for the optional "Else" region is expressed as a PHI node.
/// The incomming values of the PHI node are true for the "If" edge and false
/// for the "Then" edge.
///
/// Additionally to that even complicated loops look like this:
///
/// \verbatim
/// 1
/// ||
/// | |
/// 2 ^ Where:
/// | / 1 = "Entry" block
/// |/ 2 = "Loop" optional subregion, with all exits at "Flow" block
/// 3 3 = "Flow" block, with back edge to entry block
/// |
/// \endverbatim
///
/// The back edge of the "Flow" block is always on the false side of the branch
/// while the true side continues the general flow. So the loop condition
/// consist of a network of PHI nodes where the true incoming values expresses
/// breaks and the false values expresses continue states.
class AMDGPUStructurizeCFG : public RegionPass {
static char ID;
Type *Boolean;
ConstantInt *BoolTrue;
ConstantInt *BoolFalse;
UndefValue *BoolUndef;
Function *Func;
Region *ParentRegion;
DominatorTree *DT;
RNVector Order;
VisitedMap Visited;
PredMap Predicates;
BBPhiMap DeletedPhis;
BBVector FlowsInserted;
BasicBlock *LoopStart;
BasicBlock *LoopEnd;
BBPredicates LoopPred;
void orderNodes();
void buildPredicate(BranchInst *Term, unsigned Idx,
BBPredicates &Pred, bool Invert);
void analyzeBlock(BasicBlock *BB);
void analyzeLoop(BasicBlock *BB, unsigned &LoopIdx);
void collectInfos();
bool dominatesPredicates(BasicBlock *A, BasicBlock *B);
void killTerminator(BasicBlock *BB);
RegionNode *skipChained(RegionNode *Node);
void delPhiValues(BasicBlock *From, BasicBlock *To);
void addPhiValues(BasicBlock *From, BasicBlock *To);
BasicBlock *getNextFlow(BasicBlock *Prev);
bool isPredictableTrue(BasicBlock *Prev, BasicBlock *Node);
BasicBlock *wireFlowBlock(BasicBlock *Prev, RegionNode *Node);
void createFlow();
void insertConditions();
void rebuildSSA();
public:
AMDGPUStructurizeCFG():
RegionPass(ID) {
initializeRegionInfoPass(*PassRegistry::getPassRegistry());
}
virtual bool doInitialization(Region *R, RGPassManager &RGM);
virtual bool runOnRegion(Region *R, RGPassManager &RGM);
virtual const char *getPassName() const {
return "AMDGPU simplify control flow";
}
void getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<DominatorTree>();
AU.addPreserved<DominatorTree>();
RegionPass::getAnalysisUsage(AU);
}
};
} // end anonymous namespace
char AMDGPUStructurizeCFG::ID = 0;
/// \brief Initialize the types and constants used in the pass
bool AMDGPUStructurizeCFG::doInitialization(Region *R, RGPassManager &RGM) {
LLVMContext &Context = R->getEntry()->getContext();
Boolean = Type::getInt1Ty(Context);
BoolTrue = ConstantInt::getTrue(Context);
BoolFalse = ConstantInt::getFalse(Context);
BoolUndef = UndefValue::get(Boolean);
return false;
}
/// \brief Build up the general order of nodes
void AMDGPUStructurizeCFG::orderNodes() {
scc_iterator<Region *> I = scc_begin(ParentRegion),
E = scc_end(ParentRegion);
for (Order.clear(); I != E; ++I) {
std::vector<RegionNode *> &Nodes = *I;
Order.append(Nodes.begin(), Nodes.end());
}
}
/// \brief Build blocks and loop predicates
void AMDGPUStructurizeCFG::buildPredicate(BranchInst *Term, unsigned Idx,
BBPredicates &Pred, bool Invert) {
Value *True = Invert ? BoolFalse : BoolTrue;
Value *False = Invert ? BoolTrue : BoolFalse;
RegionInfo *RI = ParentRegion->getRegionInfo();
BasicBlock *BB = Term->getParent();
// Handle the case where multiple regions start at the same block
Region *R = BB != ParentRegion->getEntry() ?
RI->getRegionFor(BB) : ParentRegion;
if (R == ParentRegion) {
// It's a top level block in our region
Value *Cond = True;
if (Term->isConditional()) {
BasicBlock *Other = Term->getSuccessor(!Idx);
if (Visited.count(Other)) {
if (!Pred.count(Other))
Pred[Other] = False;
if (!Pred.count(BB))
Pred[BB] = True;
return;
}
Cond = Term->getCondition();
if (Idx != Invert)
Cond = BinaryOperator::CreateNot(Cond, "", Term);
}
Pred[BB] = Cond;
} else if (ParentRegion->contains(R)) {
// It's a block in a sub region
while(R->getParent() != ParentRegion)
R = R->getParent();
Pred[R->getEntry()] = True;
} else {
// It's a branch from outside into our parent region
Pred[BB] = True;
}
}
/// \brief Analyze the successors of each block and build up predicates
void AMDGPUStructurizeCFG::analyzeBlock(BasicBlock *BB) {
pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
BBPredicates &Pred = Predicates[BB];
for (; PI != PE; ++PI) {
BranchInst *Term = cast<BranchInst>((*PI)->getTerminator());
for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) {
BasicBlock *Succ = Term->getSuccessor(i);
if (Succ != BB)
continue;
buildPredicate(Term, i, Pred, false);
}
}
}
/// \brief Analyze the conditions leading to loop to a previous block
void AMDGPUStructurizeCFG::analyzeLoop(BasicBlock *BB, unsigned &LoopIdx) {
BranchInst *Term = cast<BranchInst>(BB->getTerminator());
for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) {
BasicBlock *Succ = Term->getSuccessor(i);
// Ignore it if it's not a back edge
if (!Visited.count(Succ))
continue;
buildPredicate(Term, i, LoopPred, true);
LoopEnd = BB;
if (Visited[Succ] < LoopIdx) {
LoopIdx = Visited[Succ];
LoopStart = Succ;
}
}
}
/// \brief Collect various loop and predicate infos
void AMDGPUStructurizeCFG::collectInfos() {
unsigned Number = 0, LoopIdx = ~0;
// Reset predicate
Predicates.clear();
// and loop infos
LoopStart = LoopEnd = 0;
LoopPred.clear();
RNVector::reverse_iterator OI = Order.rbegin(), OE = Order.rend();
for (Visited.clear(); OI != OE; Visited[(*OI++)->getEntry()] = ++Number) {
// Analyze all the conditions leading to a node
analyzeBlock((*OI)->getEntry());
if ((*OI)->isSubRegion())
continue;
// Find the first/last loop nodes and loop predicates
analyzeLoop((*OI)->getNodeAs<BasicBlock>(), LoopIdx);
}
}
/// \brief Does A dominate all the predicates of B ?
bool AMDGPUStructurizeCFG::dominatesPredicates(BasicBlock *A, BasicBlock *B) {
BBPredicates &Preds = Predicates[B];
for (BBPredicates::iterator PI = Preds.begin(), PE = Preds.end();
PI != PE; ++PI) {
if (!DT->dominates(A, PI->first))
return false;
}
return true;
}
/// \brief Remove phi values from all successors and the remove the terminator.
void AMDGPUStructurizeCFG::killTerminator(BasicBlock *BB) {
TerminatorInst *Term = BB->getTerminator();
if (!Term)
return;
for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB);
SI != SE; ++SI) {
delPhiValues(BB, *SI);
}
Term->eraseFromParent();
}
/// First: Skip forward to the first region node that either isn't a subregion or not
/// dominating it's exit, remove all the skipped nodes from the node order.
///
/// Second: Handle the first successor directly if the resulting nodes successor
/// predicates are still dominated by the original entry
RegionNode *AMDGPUStructurizeCFG::skipChained(RegionNode *Node) {
BasicBlock *Entry = Node->getEntry();
// Skip forward as long as it is just a linear flow
while (true) {
BasicBlock *Entry = Node->getEntry();
BasicBlock *Exit;
if (Node->isSubRegion()) {
Exit = Node->getNodeAs<Region>()->getExit();
} else {
TerminatorInst *Term = Entry->getTerminator();
if (Term->getNumSuccessors() != 1)
break;
Exit = Term->getSuccessor(0);
}
// It's a back edge, break here so we can insert a loop node
if (!Visited.count(Exit))
return Node;
// More than node edges are pointing to exit
if (!DT->dominates(Entry, Exit))
return Node;
RegionNode *Next = ParentRegion->getNode(Exit);
RNVector::iterator I = std::find(Order.begin(), Order.end(), Next);
assert(I != Order.end());
Visited.erase(Next->getEntry());
Order.erase(I);
Node = Next;
}
BasicBlock *BB = Node->getEntry();
TerminatorInst *Term = BB->getTerminator();
if (Term->getNumSuccessors() != 2)
return Node;
// Our node has exactly two succesors, check if we can handle
// any of them directly
BasicBlock *Succ = Term->getSuccessor(0);
if (!Visited.count(Succ) || !dominatesPredicates(Entry, Succ)) {
Succ = Term->getSuccessor(1);
if (!Visited.count(Succ) || !dominatesPredicates(Entry, Succ))
return Node;
} else {
BasicBlock *Succ2 = Term->getSuccessor(1);
if (Visited.count(Succ2) && Visited[Succ] > Visited[Succ2] &&
dominatesPredicates(Entry, Succ2))
Succ = Succ2;
}
RegionNode *Next = ParentRegion->getNode(Succ);
RNVector::iterator E = Order.end();
RNVector::iterator I = std::find(Order.begin(), E, Next);
assert(I != E);
killTerminator(BB);
FlowsInserted.push_back(BB);
Visited.erase(Succ);
Order.erase(I);
return ParentRegion->getNode(wireFlowBlock(BB, Next));
}
/// \brief Remove all PHI values coming from "From" into "To" and remember
/// them in DeletedPhis
void AMDGPUStructurizeCFG::delPhiValues(BasicBlock *From, BasicBlock *To) {
PhiMap &Map = DeletedPhis[To];
for (BasicBlock::iterator I = To->begin(), E = To->end();
I != E && isa<PHINode>(*I);) {
PHINode &Phi = cast<PHINode>(*I++);
while (Phi.getBasicBlockIndex(From) != -1) {
Value *Deleted = Phi.removeIncomingValue(From, false);
Map[&Phi].push_back(std::make_pair(From, Deleted));
}
}
}
/// \brief Add the PHI values back once we knew the new predecessor
void AMDGPUStructurizeCFG::addPhiValues(BasicBlock *From, BasicBlock *To) {
if (!DeletedPhis.count(To))
return;
PhiMap &Map = DeletedPhis[To];
SSAUpdater Updater;
for (PhiMap::iterator I = Map.begin(), E = Map.end(); I != E; ++I) {
PHINode *Phi = I->first;
Updater.Initialize(Phi->getType(), "");
BasicBlock *Fallback = To;
bool HaveFallback = false;
for (BBValueVector::iterator VI = I->second.begin(), VE = I->second.end();
VI != VE; ++VI) {
Updater.AddAvailableValue(VI->first, VI->second);
BasicBlock *Dom = DT->findNearestCommonDominator(Fallback, VI->first);
if (Dom == VI->first)
HaveFallback = true;
else if (Dom != Fallback)
HaveFallback = false;
Fallback = Dom;
}
if (!HaveFallback) {
Value *Undef = UndefValue::get(Phi->getType());
Updater.AddAvailableValue(Fallback, Undef);
}
Phi->addIncoming(Updater.GetValueAtEndOfBlock(From), From);
}
DeletedPhis.erase(To);
}
/// \brief Create a new flow node and update dominator tree and region info
BasicBlock *AMDGPUStructurizeCFG::getNextFlow(BasicBlock *Prev) {
LLVMContext &Context = Func->getContext();
BasicBlock *Insert = Order.empty() ? ParentRegion->getExit() :
Order.back()->getEntry();
BasicBlock *Flow = BasicBlock::Create(Context, FlowBlockName,
Func, Insert);
DT->addNewBlock(Flow, Prev);
ParentRegion->getRegionInfo()->setRegionFor(Flow, ParentRegion);
FlowsInserted.push_back(Flow);
return Flow;
}
/// \brief Can we predict that this node will always be called?
bool AMDGPUStructurizeCFG::isPredictableTrue(BasicBlock *Prev,
BasicBlock *Node) {
BBPredicates &Preds = Predicates[Node];
bool Dominated = false;
for (BBPredicates::iterator I = Preds.begin(), E = Preds.end();
I != E; ++I) {
if (I->second != BoolTrue)
return false;
if (!Dominated && DT->dominates(I->first, Prev))
Dominated = true;
}
return Dominated;
}
/// \brief Wire up the new control flow by inserting or updating the branch
/// instructions at node exits
BasicBlock *AMDGPUStructurizeCFG::wireFlowBlock(BasicBlock *Prev,
RegionNode *Node) {
BasicBlock *Entry = Node->getEntry();
if (LoopStart == Entry) {
LoopStart = Prev;
LoopPred[Prev] = BoolTrue;
}
// Wire it up temporary, skipChained may recurse into us
BranchInst::Create(Entry, Prev);
DT->changeImmediateDominator(Entry, Prev);
addPhiValues(Prev, Entry);
Node = skipChained(Node);
BasicBlock *Next = getNextFlow(Prev);
if (!isPredictableTrue(Prev, Entry)) {
// Let Prev point to entry and next block
Prev->getTerminator()->eraseFromParent();
BranchInst::Create(Entry, Next, BoolUndef, Prev);
} else {
DT->changeImmediateDominator(Next, Entry);
}
// Let node exit(s) point to next block
if (Node->isSubRegion()) {
Region *SubRegion = Node->getNodeAs<Region>();
BasicBlock *Exit = SubRegion->getExit();
// Find all the edges from the sub region to the exit
BBVector ToDo;
for (pred_iterator I = pred_begin(Exit), E = pred_end(Exit); I != E; ++I) {
if (SubRegion->contains(*I))
ToDo.push_back(*I);
}
// Modify the edges to point to the new flow block
for (BBVector::iterator I = ToDo.begin(), E = ToDo.end(); I != E; ++I) {
delPhiValues(*I, Exit);
TerminatorInst *Term = (*I)->getTerminator();
Term->replaceUsesOfWith(Exit, Next);
}
// Update the region info
SubRegion->replaceExit(Next);
} else {
BasicBlock *BB = Node->getNodeAs<BasicBlock>();
killTerminator(BB);
BranchInst::Create(Next, BB);
if (BB == LoopEnd)
LoopEnd = 0;
}
return Next;
}
/// Destroy node order and visited map, build up flow order instead.
/// After this function control flow looks like it should be, but
/// branches only have undefined conditions.
void AMDGPUStructurizeCFG::createFlow() {
DeletedPhis.clear();
BasicBlock *Prev = Order.pop_back_val()->getEntry();
assert(Prev == ParentRegion->getEntry() && "Incorrect node order!");
Visited.erase(Prev);
if (LoopStart == Prev) {
// Loop starts at entry, split entry so that we can predicate it
BasicBlock::iterator Insert = Prev->getFirstInsertionPt();
BasicBlock *Split = Prev->splitBasicBlock(Insert, FlowBlockName);
DT->addNewBlock(Split, Prev);
ParentRegion->getRegionInfo()->setRegionFor(Split, ParentRegion);
Predicates[Split] = Predicates[Prev];
Order.push_back(ParentRegion->getBBNode(Split));
LoopPred[Prev] = BoolTrue;
} else if (LoopStart == Order.back()->getEntry()) {
// Loop starts behind entry, split entry so that we can jump to it
Instruction *Term = Prev->getTerminator();
BasicBlock *Split = Prev->splitBasicBlock(Term, FlowBlockName);
DT->addNewBlock(Split, Prev);
ParentRegion->getRegionInfo()->setRegionFor(Split, ParentRegion);
Prev = Split;
}
killTerminator(Prev);
FlowsInserted.clear();
FlowsInserted.push_back(Prev);
while (!Order.empty()) {
RegionNode *Node = Order.pop_back_val();
Visited.erase(Node->getEntry());
Prev = wireFlowBlock(Prev, Node);
if (LoopStart && !LoopEnd) {
// Create an extra loop end node
LoopEnd = Prev;
Prev = getNextFlow(LoopEnd);
BranchInst::Create(Prev, LoopStart, BoolUndef, LoopEnd);
addPhiValues(LoopEnd, LoopStart);
}
}
BasicBlock *Exit = ParentRegion->getExit();
BranchInst::Create(Exit, Prev);
addPhiValues(Prev, Exit);
if (DT->dominates(ParentRegion->getEntry(), Exit))
DT->changeImmediateDominator(Exit, Prev);
if (LoopStart && LoopEnd) {
BBVector::iterator FI = std::find(FlowsInserted.begin(),
FlowsInserted.end(),
LoopStart);
for (; *FI != LoopEnd; ++FI) {
addPhiValues(*FI, (*FI)->getTerminator()->getSuccessor(0));
}
}
assert(Order.empty());
assert(Visited.empty());
assert(DeletedPhis.empty());
}
/// \brief Insert the missing branch conditions
void AMDGPUStructurizeCFG::insertConditions() {
SSAUpdater PhiInserter;
for (BBVector::iterator FI = FlowsInserted.begin(), FE = FlowsInserted.end();
FI != FE; ++FI) {
BranchInst *Term = cast<BranchInst>((*FI)->getTerminator());
if (Term->isUnconditional())
continue;
PhiInserter.Initialize(Boolean, "");
PhiInserter.AddAvailableValue(&Func->getEntryBlock(), BoolFalse);
BasicBlock *Succ = Term->getSuccessor(0);
BBPredicates &Preds = (*FI == LoopEnd) ? LoopPred : Predicates[Succ];
for (BBPredicates::iterator PI = Preds.begin(), PE = Preds.end();
PI != PE; ++PI) {
PhiInserter.AddAvailableValue(PI->first, PI->second);
}
Term->setCondition(PhiInserter.GetValueAtEndOfBlock(*FI));
}
}
/// Handle a rare case where the disintegrated nodes instructions
/// no longer dominate all their uses. Not sure if this is really nessasary
void AMDGPUStructurizeCFG::rebuildSSA() {
SSAUpdater Updater;
for (Region::block_iterator I = ParentRegion->block_begin(),
E = ParentRegion->block_end();
I != E; ++I) {
BasicBlock *BB = *I;
for (BasicBlock::iterator II = BB->begin(), IE = BB->end();
II != IE; ++II) {
bool Initialized = false;
for (Use *I = &II->use_begin().getUse(), *Next; I; I = Next) {
Next = I->getNext();
Instruction *User = cast<Instruction>(I->getUser());
if (User->getParent() == BB) {
continue;
} else if (PHINode *UserPN = dyn_cast<PHINode>(User)) {
if (UserPN->getIncomingBlock(*I) == BB)
continue;
}
if (DT->dominates(II, User))
continue;
if (!Initialized) {
Value *Undef = UndefValue::get(II->getType());
Updater.Initialize(II->getType(), "");
Updater.AddAvailableValue(&Func->getEntryBlock(), Undef);
Updater.AddAvailableValue(BB, II);
Initialized = true;
}
Updater.RewriteUseAfterInsertions(*I);
}
}
}
}
/// \brief Run the transformation for each region found
bool AMDGPUStructurizeCFG::runOnRegion(Region *R, RGPassManager &RGM) {
if (R->isTopLevelRegion())
return false;
Func = R->getEntry()->getParent();
ParentRegion = R;
DT = &getAnalysis<DominatorTree>();
orderNodes();
collectInfos();
createFlow();
insertConditions();
rebuildSSA();
Order.clear();
Visited.clear();
Predicates.clear();
DeletedPhis.clear();
FlowsInserted.clear();
return true;
}
/// \brief Create the pass
Pass *llvm::createAMDGPUStructurizeCFGPass() {
return new AMDGPUStructurizeCFG();
}