Initial checkin of Datastructure analysis.

Has bugs, but shouldn't crash in theory.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@1994 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Chris Lattner 2002-03-26 22:39:06 +00:00
parent d9ddf05014
commit bb2a28fec5
7 changed files with 1329 additions and 0 deletions

View File

@ -0,0 +1,265 @@
//===- ComputeClosure.cpp - Implement interprocedural closing of graphs ---===//
//
// Compute the interprocedural closure of a data structure graph
//
//===----------------------------------------------------------------------===//
// DEBUG_IP_CLOSURE - Define this to debug the act of linking up graphs
//#define DEBUG_IP_CLOSURE 1
#include "llvm/Analysis/DataStructure.h"
#include "llvm/iOther.h"
#include "Support/STLExtras.h"
#include <algorithm>
#ifdef DEBUG_IP_CLOSURE
#include "llvm/Assembly/Writer.h"
#endif
// copyEdgesFromTo - Make a copy of all of the edges to Node to also point
// PV. If there are edges out of Node, the edges are added to the subgraph
// starting at PV.
//
static void copyEdgesFromTo(DSNode *Node, const PointerValSet &PVS) {
// Make all of the pointers that pointed to Node now also point to PV...
const vector<PointerValSet*> &PVSToUpdate(Node->getReferrers());
for (unsigned i = 0, e = PVSToUpdate.size(); i != e; ++i)
for (unsigned pn = 0, pne = PVS.size(); pn != pne; ++pn)
PVSToUpdate[i]->add(PVS[pn]);
}
static void CalculateNodeMapping(ShadowDSNode *Shadow, DSNode *Node,
multimap<ShadowDSNode *, DSNode *> &NodeMapping) {
#ifdef DEBUG_IP_CLOSURE
cerr << "Mapping " << (void*)Shadow << " to " << (void*)Node << "\n";
cerr << "Type = '" << Shadow->getType() << "' and '"
<< Node->getType() << "'\n";
cerr << "Shadow Node:\n";
Shadow->print(cerr);
cerr << "\nMapped Node:\n";
Node->print(cerr);
#endif
assert(Shadow->getType() == Node->getType() &&
"Shadow and mapped nodes disagree about type!");
multimap<ShadowDSNode *, DSNode *>::iterator
NI = NodeMapping.lower_bound(Shadow),
NE = NodeMapping.upper_bound(Shadow);
for (; NI != NE; ++NI)
if (NI->second == Node) return; // Already processed node, return.
NodeMapping.insert(make_pair(Shadow, Node)); // Add a mapping...
// Loop over all of the outgoing links in the shadow node...
//
assert(Node->getNumLinks() == Shadow->getNumLinks() &&
"Same type, but different number of links?");
for (unsigned i = 0, e = Shadow->getNumLinks(); i != e; ++i) {
PointerValSet &Link = Shadow->getLink(i);
// Loop over all of the values coming out of this pointer...
for (unsigned l = 0, le = Link.size(); l != le; ++l) {
// If the outgoing node points to a shadow node, map the shadow node to
// all of the outgoing values in Node.
//
if (ShadowDSNode *ShadOut = dyn_cast<ShadowDSNode>(Link[l].Node)) {
PointerValSet &NLink = Node->getLink(i);
for (unsigned ol = 0, ole = NLink.size(); ol != ole; ++ol)
CalculateNodeMapping(ShadOut, NLink[ol].Node, NodeMapping);
}
}
}
}
static void ResolveNodesTo(const PointerVal &FromPtr,
const PointerValSet &ToVals) {
assert(FromPtr.Index == 0 &&
"Resolved node return pointer should be index 0!");
if (!isa<ShadowDSNode>(FromPtr.Node)) return;
ShadowDSNode *Shadow = cast<ShadowDSNode>(FromPtr.Node);
typedef multimap<ShadowDSNode *, DSNode *> ShadNodeMapTy;
ShadNodeMapTy NodeMapping;
for (unsigned i = 0, e = ToVals.size(); i != e; ++i)
CalculateNodeMapping(Shadow, ToVals[i].Node, NodeMapping);
copyEdgesFromTo(Shadow, ToVals);
// Now loop through the shadow node graph, mirroring the edges in the shadow
// graph onto the realized graph...
//
for (ShadNodeMapTy::iterator I = NodeMapping.begin(),
E = NodeMapping.end(); I != E; ++I) {
DSNode *Node = I->second;
ShadowDSNode *ShadNode = I->first;
// Must loop over edges in the shadow graph, adding edges in the real graph
// that correspond to to the edges, but are mapped into real values by the
// NodeMapping.
//
for (unsigned i = 0, e = Node->getNumLinks(); i != e; ++i) {
const PointerValSet &ShadLinks = ShadNode->getLink(i);
PointerValSet &NewLinks = Node->getLink(i);
// Add a link to all of the nodes pointed to by the shadow field...
for (unsigned l = 0, le = ShadLinks.size(); l != le; ++l) {
DSNode *ShadLink = ShadLinks[l].Node;
if (ShadowDSNode *SL = dyn_cast<ShadowDSNode>(ShadLink)) {
// Loop over all of the values in the range
ShadNodeMapTy::iterator St = NodeMapping.lower_bound(SL),
En = NodeMapping.upper_bound(SL);
if (St != En) {
for (; St != En; ++St)
NewLinks.add(PointerVal(St->second, ShadLinks[l].Index));
} else {
// We must retain the shadow node...
NewLinks.add(ShadLinks[l]);
}
} else {
// Otherwise, add a direct link to the data structure pointed to by
// the shadow node...
NewLinks.add(ShadLinks[l]);
}
}
}
}
}
// ResolveNodeTo - The specified node is now known to point to the set of values
// in ToVals, instead of the old shadow node subgraph that it was pointing to.
//
static void ResolveNodeTo(DSNode *Node, const PointerValSet &ToVals) {
assert(Node->getNumLinks() == 1 && "Resolved node can only be a scalar!!");
PointerValSet PVS = Node->getLink(0);
for (unsigned i = 0, e = PVS.size(); i != e; ++i)
ResolveNodesTo(PVS[i], ToVals);
}
// isResolvableCallNode - Return true if node is a call node and it is a call
// node that we can inline...
//
static bool isResolvableCallNode(DSNode *N) {
// Only operate on call nodes...
CallDSNode *CN = dyn_cast<CallDSNode>(N);
if (CN == 0) return false;
// Only operate on call nodes with direct method calls
Function *F = CN->getCall()->getCalledFunction();
if (F == 0) return false;
// Only work on call nodes with direct calls to methods with bodies.
return !F->isExternal();
}
// computeClosure - Replace all of the resolvable call nodes with the contents
// of their corresponding method data structure graph...
//
void FunctionDSGraph::computeClosure(const DataStructure &DS) {
vector<DSNode*>::iterator NI = std::find_if(Nodes.begin(), Nodes.end(),
isResolvableCallNode);
map<Function*, unsigned> InlineCount; // FIXME
// Loop over the resolvable call nodes...
while (NI != Nodes.end()) {
CallDSNode *CN = cast<CallDSNode>(*NI);
Function *F = CN->getCall()->getCalledFunction();
//if (F == Func) return; // Do not do self inlining
// FIXME: Gross hack to prevent explosions when inlining a recursive func.
if (InlineCount[F]++ > 2) return;
Nodes.erase(NI); // Remove the call node from the graph
unsigned CallNodeOffset = NI-Nodes.begin();
// StartNode - The first node of the incorporated graph, last node of the
// preexisting data structure graph...
//
unsigned StartNode = Nodes.size();
// Hold the set of values that correspond to the incorporated methods
// return set.
//
PointerValSet RetVals;
if (F != Func) { // If this is not a recursive call...
// Get the datastructure graph for the new method. Note that we are not
// allowed to modify this graph because it will be the cached graph that
// is returned by other users that want the local datastructure graph for
// a method.
//
const FunctionDSGraph &NewFunction = DS.getDSGraph(F);
// Incorporate a copy of the called function graph into the current graph,
// allowing us to do local transformations to local graph to link
// arguments to call values, and call node to return value...
//
RetVals = cloneFunctionIntoSelf(NewFunction, false);
} else { // We are looking at a recursive function!
StartNode = 0; // Arg nodes start at 0 now...
RetVals = RetNode;
}
// If the function returns a pointer value... Resolve values pointing to
// the shadow nodes pointed to by CN to now point the values in RetVals...
//
if (CN->getNumLinks()) ResolveNodeTo(CN, RetVals);
// If the call node has arguments, process them now!
if (CN->getNumArgs()) {
// The ArgNodes of the incorporated graph should be the nodes starting at
// StartNode, ordered the same way as the call arguments. The arg nodes
// are seperated by a single shadow node, so we need to be sure to step
// over them.
//
unsigned ArgOffset = StartNode;
for (unsigned i = 0, e = CN->getNumArgs(); i != e; ++i) {
// Get the arg node of the incorporated method...
ArgDSNode *ArgNode = cast<ArgDSNode>(Nodes[ArgOffset]);
// Now we make all of the nodes inside of the incorporated method point
// to the real arguments values, not to the shadow nodes for the
// argument.
//
ResolveNodeTo(ArgNode, CN->getArgValues(i));
if (StartNode == 0) { // Self recursion?
ArgOffset += 2; // Skip over the argument & the shadow node...
} else {
// Remove the argnode from the set of nodes in this method...
Nodes.erase(Nodes.begin()+ArgOffset);
// ArgNode is no longer useful, delete now!
delete ArgNode;
ArgOffset++; // Skip over the shadow node for the argument
}
}
}
// Now the call node is completely destructable. Eliminate it now.
delete CN;
// Eliminate shadow nodes that are not distinguishable from some other
// node in the graph...
//
UnlinkUndistinguishableShadowNodes();
// Eliminate shadow nodes that are now extraneous due to linking...
RemoveUnreachableShadowNodes();
//if (F == Func) return; // Only do one self inlining
// Move on to the next call node...
NI = std::find_if(Nodes.begin(), Nodes.end(), isResolvableCallNode);
}
}

View File

@ -0,0 +1,117 @@
//===- DataStructure.cpp - Analysis for data structure identification -------=//
//
// Implement the LLVM data structure analysis library.
//
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/DataStructure.h"
#include "llvm/Module.h"
#include "llvm/Function.h"
#include <fstream>
#include <algorithm>
//===----------------------------------------------------------------------===//
// DataStructure Class Implementation
//
AnalysisID DataStructure::ID(AnalysisID::create<DataStructure>());
// releaseMemory - If the pass pipeline is done with this pass, we can release
// our memory... here...
void DataStructure::releaseMemory() {
for (InfoMap::iterator I = DSInfo.begin(), E = DSInfo.end(); I != E; ++I) {
delete I->second.first;
delete I->second.second;
}
// Empty map so next time memory is released, data structures are not
// re-deleted.
DSInfo.clear();
}
// print - Print out the analysis results...
void DataStructure::print(std::ostream &O, Module *M) const {
for (Module::iterator I = M->begin(), E = M->end(); I != E; ++I)
if (!(*I)->isExternal()) {
string Filename = "ds." + (*I)->getName() + ".dot";
O << "Writing '" << Filename << "'...\n";
ofstream F(Filename.c_str());
if (F.good()) {
F << "digraph DataStructures {\n"
<< "\tnode [shape=Mrecord];\n"
<< "\tedge [arrowtail=\"dot\"];\n"
<< "\tsize=\"10,7.5\";\n"
<< "\trotate=\"90\";\n";
getDSGraph(*I).printFunction(F, "Local");
getClosedDSGraph(*I).printFunction(F, "Closed");
F << "}\n";
} else {
O << " error opening file for writing!\n";
}
}
}
//===----------------------------------------------------------------------===//
// PointerVal Class Implementation
//
void PointerVal::print(std::ostream &O) const {
if (Node) {
O << " Node: " << Node->getCaption() << "[" << Index << "]\n";
} else {
O << " NULL NODE\n";
}
}
//===----------------------------------------------------------------------===//
// PointerValSet Class Implementation
//
void PointerValSet::addRefs() {
for (unsigned i = 0, e = Vals.size(); i != e; ++i)
Vals[i].Node->addReferrer(this);
}
void PointerValSet::dropRefs() {
for (unsigned i = 0, e = Vals.size(); i != e; ++i)
Vals[i].Node->removeReferrer(this);
}
const PointerValSet &PointerValSet::operator=(const PointerValSet &PVS) {
dropRefs();
Vals.clear();
Vals = PVS.Vals;
addRefs();
return *this;
}
bool PointerValSet::add(const PointerVal &PV, Value *Pointer) {
if (std::find(Vals.begin(), Vals.end(), PV) != Vals.end())
return false;
Vals.push_back(PV);
if (Pointer) PV.Node->addPointer(Pointer);
PV.Node->addReferrer(this);
return true;
}
// removePointerTo - Remove a single pointer val that points to the specified
// node...
void PointerValSet::removePointerTo(DSNode *Node) {
vector<PointerVal>::iterator I = std::find(Vals.begin(), Vals.end(), Node);
assert(I != Vals.end() && "Couldn't remove nonexistent edge!");
Vals.erase(I);
Node->removeReferrer(this);
}
void PointerValSet::print(std::ostream &O) const {
for (unsigned i = 0, e = Vals.size(); i != e; ++i)
Vals[i].print(O);
}

View File

@ -0,0 +1,127 @@
//===- ShadowNodeEliminate.cpp - Optimize away shadow nodes ---------------===//
//
// This file contains two shadow node optimizations:
// 1. UnlinkUndistinguishableShadowNodes - Often, after unification, shadow
// nodes are left around that should not exist anymore. An example is when
// a shadow gets unified with a 'new' node, the following graph gets
// generated: %X -> Shadow, %X -> New. Since all of the edges to the
// shadow node also all go to the New node, we can eliminate the shadow.
//
// 2. RemoveUnreachableShadowNodes - Remove shadow nodes that are not
// reachable from some other node in the graph. Unreachable shadow nodes
// are left lying around because other transforms don't go to the trouble
// or removing them, since this pass exists.
//
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/DataStructure.h"
#include "llvm/Value.h"
#include "Support/STLExtras.h"
#include <algorithm>
// removeEdgesTo - Erase all edges in the graph that point to the specified node
static void removeEdgesTo(DSNode *Node) {
while (!Node->getReferrers().empty()) {
PointerValSet *PVS = Node->getReferrers().back();
PVS->removePointerTo(Node);
}
}
// UnlinkUndistinguishableShadowNodes - Eliminate shadow nodes that are not
// distinguishable from some other node in the graph...
//
void FunctionDSGraph::UnlinkUndistinguishableShadowNodes() {
// TODO:
}
static void MarkReferredNodesReachable(DSNode *N, vector<ShadowDSNode*> &Nodes,
vector<bool> &Reachable);
static inline void MarkReferredNodeSetReachable(const PointerValSet &PVS,
vector<ShadowDSNode*> &Nodes,
vector<bool> &Reachable) {
for (unsigned i = 0, e = PVS.size(); i != e; ++i)
if (ShadowDSNode *Shad = dyn_cast<ShadowDSNode>(PVS[i].Node))
MarkReferredNodesReachable(Shad, Nodes, Reachable);
}
static void MarkReferredNodesReachable(DSNode *N, vector<ShadowDSNode*> &Nodes,
vector<bool> &Reachable) {
assert(Nodes.size() == Reachable.size());
ShadowDSNode *Shad = dyn_cast<ShadowDSNode>(N);
if (Shad) {
vector<ShadowDSNode*>::iterator I =
std::find(Nodes.begin(), Nodes.end(), Shad);
unsigned i = I-Nodes.begin();
if (Reachable[i]) return; // Recursion detected, abort...
Reachable[i] = true;
}
for (unsigned i = 0, e = N->getNumLinks(); i != e; ++i)
MarkReferredNodeSetReachable(N->getLink(i), Nodes, Reachable);
const std::vector<PointerValSet> *Links = N->getAuxLinks();
if (Links)
for (unsigned i = 0, e = Links->size(); i != e; ++i)
MarkReferredNodeSetReachable((*Links)[i], Nodes, Reachable);
}
void FunctionDSGraph::RemoveUnreachableShadowNodes() {
while (1) {
// Reachable - Contains true if there is an edge from a nonshadow node to
// the numbered node...
//
vector<bool> Reachable(ShadowNodes.size());
// Mark all shadow nodes that have edges from other nodes as reachable.
// Recursively mark any shadow nodes pointed to by the newly live shadow
// nodes as also alive.
//
for (unsigned i = 0, e = Nodes.size(); i != e; ++i)
// Loop over all of the nodes referred and mark them live if they are
// shadow nodes...
MarkReferredNodesReachable(Nodes[i], ShadowNodes, Reachable);
// Mark all nodes in the return set as being reachable...
MarkReferredNodeSetReachable(RetNode, ShadowNodes, Reachable);
// Mark all nodes in the value map as being reachable...
for (std::map<Value*, PointerValSet>::iterator I = ValueMap.begin(),
E = ValueMap.end(); I != E; ++I)
MarkReferredNodeSetReachable(I->second, ShadowNodes, Reachable);
// At this point, all reachable shadow nodes have a true value in the
// Reachable vector. This means that any shadow nodes without an entry in
// the reachable vector are not reachable and should be removed. This is
// a two part process, because we must drop all references before we delete
// the shadow nodes [in case cycles exist].
//
vector<ShadowDSNode*> DeadNodes;
for (unsigned i = 0; i != ShadowNodes.size(); ++i)
if (!Reachable[i]) {
// Track all unreachable nodes...
#if 0
cerr << "Unreachable node eliminated:\n";
ShadowNodes[i]->print(cerr);
#endif
DeadNodes.push_back(ShadowNodes[i]);
ShadowNodes[i]->dropAllReferences(); // Drop references to other nodes
Reachable.erase(Reachable.begin()+i); // Remove from reachable...
ShadowNodes.erase(ShadowNodes.begin()+i); // Remove node entry
--i; // Don't skip the next node.
}
if (DeadNodes.empty()) return; // No more dead nodes...
// All dead nodes are in the DeadNodes vector... delete them now.
for_each(DeadNodes.begin(), DeadNodes.end(), deleter<DSNode>);
}
}

View File

@ -0,0 +1,331 @@
//===- FunctionRepBuilder.cpp - Build the datastructure graph for a method --===//
//
// Build the local datastructure graph for a single method.
//
//===----------------------------------------------------------------------===//
#include "FunctionRepBuilder.h"
#include "llvm/Function.h"
#include "llvm/iMemory.h"
#include "llvm/iPHINode.h"
#include "llvm/iOther.h"
#include "llvm/iTerminators.h"
#include "llvm/DerivedTypes.h"
#include "Support/STLExtras.h"
#include <algorithm>
// synthesizeNode - Create a new shadow node that is to be linked into this
// chain..
// FIXME: This should not take a FunctionRepBuilder as an argument!
//
ShadowDSNode *ShadowDSNode::synthesizeNode(const Type *Ty,
FunctionRepBuilder *Rep) {
// If we are a derived shadow node, defer to our parent to synthesize the node
if (ShadowParent) return ShadowParent->synthesizeNode(Ty, Rep);
// See if we have already synthesized a node of this type...
for (unsigned i = 0, e = SynthNodes.size(); i != e; ++i)
if (SynthNodes[i].first == Ty) return SynthNodes[i].second;
// No we haven't. Do so now and add it to our list of saved nodes...
ShadowDSNode *SN = new ShadowDSNode(Ty, Mod, this);
SynthNodes.push_back(make_pair(Ty, SN));
Rep->addShadowNode(SN);
return SN;
}
// visitOperand - If the specified instruction operand is a global value, add
// a node for it...
//
void InitVisitor::visitOperand(Value *V) {
if (!Rep->ValueMap.count(V)) // Only process it once...
if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
GlobalDSNode *N = new GlobalDSNode(GV);
Rep->Nodes.push_back(N);
Rep->ValueMap[V].add(N);
Rep->addAllUsesToWorkList(GV);
}
}
// visitCallInst - Create a call node for the callinst, and create as shadow
// node if the call returns a pointer value. Check to see if the call node
// uses any global variables...
//
void InitVisitor::visitCallInst(CallInst *CI) {
CallDSNode *C = new CallDSNode(CI);
Rep->Nodes.push_back(C);
Rep->CallMap[CI] = C;
if (isa<PointerType>(CI->getType())) {
// Create a shadow node to represent the memory object that the return
// value points to...
ShadowDSNode *Shad = new ShadowDSNode(C, Func->getParent());
Rep->ShadowNodes.push_back(Shad);
// The return value of the function is a pointer to the shadow value
// just created...
//
C->getLink(0).add(Shad);
// The call instruction returns a pointer to the shadow block...
Rep->ValueMap[CI].add(Shad, CI);
// If the call returns a value with pointer type, add all of the users
// of the call instruction to the work list...
Rep->addAllUsesToWorkList(CI);
}
// Loop over all of the operands of the call instruction (except the first
// one), to look for global variable references...
//
for_each(CI->op_begin()+1, CI->op_end(), // Skip first arg
bind_obj(this, &InitVisitor::visitOperand));
}
// visitAllocationInst - Create an allocation node for the allocation. Since
// allocation instructions do not take pointer arguments, they cannot refer to
// global vars...
//
void InitVisitor::visitAllocationInst(AllocationInst *AI) {
NewDSNode *N = new NewDSNode(AI);
Rep->Nodes.push_back(N);
Rep->ValueMap[AI].add(N, AI);
// Add all of the users of the malloc instruction to the work list...
Rep->addAllUsesToWorkList(AI);
}
// Visit all other instruction types. Here we just scan, looking for uses of
// global variables...
//
void InitVisitor::visitInstruction(Instruction *I) {
for_each(I->op_begin(), I->op_end(),
bind_obj(this, &InitVisitor::visitOperand));
}
// addAllUsesToWorkList - Add all of the instructions users of the specified
// value to the work list for further processing...
//
void FunctionRepBuilder::addAllUsesToWorkList(Value *V) {
//cerr << "Adding all uses of " << V << "\n";
for (Value::use_iterator I = V->use_begin(), E = V->use_end(); I != E; ++I) {
Instruction *Inst = cast<Instruction>(*I);
// When processing global values, it's possible that the instructions on
// the use list are not all in this method. Only add the instructions
// that _are_ in this method.
//
if (Inst->getParent()->getParent() == F->getFunction())
// Only let an instruction occur on the work list once...
if (std::find(WorkList.begin(), WorkList.end(), Inst) == WorkList.end())
WorkList.push_back(Inst);
}
}
void FunctionRepBuilder::initializeWorkList(Function *Func) {
// Add all of the arguments to the method to the graph and add all users to
// the worklists...
//
for (Function::ArgumentListType::iterator I = Func->getArgumentList().begin(),
E = Func->getArgumentList().end(); I != E; ++I)
// Only process arguments that are of pointer type...
if (isa<PointerType>((*I)->getType())) {
ArgDSNode *Arg = new ArgDSNode(*I);
Nodes.push_back(Arg);
// Add a shadow value for it to represent what it is pointing
// to and add this to the value map...
ShadowDSNode *Shad = new ShadowDSNode(Arg, Func->getParent());
ShadowNodes.push_back(Shad);
ValueMap[*I].add(PointerVal(Shad), *I);
// The value of the argument is the shadow value...
Arg->getLink(0).add(Shad);
// Make sure that all users of the argument are processed...
addAllUsesToWorkList(*I);
}
// Iterate over the instructions in the method. Create nodes for malloc and
// call instructions. Add all uses of these to the worklist of instructions
// to process.
//
InitVisitor IV(this, Func);
IV.visit(Func);
}
PointerVal FunctionRepBuilder::getIndexedPointerDest(const PointerVal &InP,
const MemAccessInst *MAI) {
unsigned Index = InP.Index;
const Type *SrcTy = MAI->getPointerOperand()->getType();
for (MemAccessInst::const_op_iterator I = MAI->idx_begin(),
E = MAI->idx_end(); I != E; ++I)
if ((*I)->getType() == Type::UByteTy) { // Look for struct indices...
StructType *STy = cast<StructType>(SrcTy);
unsigned StructIdx = cast<ConstantUInt>(*I)->getValue();
for (unsigned i = 0; i != StructIdx; ++i)
Index += countPointerFields(STy->getContainedType(i));
// Advance SrcTy to be the new element type...
SrcTy = STy->getContainedType(StructIdx);
} else {
// Otherwise, stepping into array or initial pointer, just increment type
SrcTy = cast<SequentialType>(SrcTy)->getElementType();
}
return PointerVal(InP.Node, Index);
}
static PointerValSet &getField(const PointerVal &DestPtr) {
assert(DestPtr.Node != 0);
return DestPtr.Node->getLink(DestPtr.Index);
}
// Reprocessing a GEP instruction is the result of the pointer operand
// changing. This means that the set of possible values for the GEP
// needs to be expanded.
//
void FunctionRepBuilder::visitGetElementPtrInst(GetElementPtrInst *GEP) {
PointerValSet &GEPPVS = ValueMap[GEP]; // PointerValSet to expand
// Get the input pointer val set...
const PointerValSet &SrcPVS = ValueMap[GEP->getOperand(0)];
bool Changed = false; // Process each input value... propogating it.
for (unsigned i = 0, e = SrcPVS.size(); i != e; ++i) {
// Calculate where the resulting pointer would point based on an
// input of 'Val' as the pointer type... and add it to our outgoing
// value set. Keep track of whether or not we actually changed
// anything.
//
Changed |= GEPPVS.add(getIndexedPointerDest(SrcPVS[i], GEP));
}
// If our current value set changed, notify all of the users of our
// value.
//
if (Changed) addAllUsesToWorkList(GEP);
}
void FunctionRepBuilder::visitReturnInst(ReturnInst *RI) {
RetNode.add(ValueMap[RI->getOperand(0)]);
}
void FunctionRepBuilder::visitLoadInst(LoadInst *LI) {
// Only loads that return pointers are interesting...
if (!isa<PointerType>(LI->getType())) return;
const PointerType *DestTy = cast<PointerType>(LI->getType());
const PointerValSet &SrcPVS = ValueMap[LI->getOperand(0)];
PointerValSet &LIPVS = ValueMap[LI];
bool Changed = false;
for (unsigned si = 0, se = SrcPVS.size(); si != se; ++si) {
PointerVal Ptr = getIndexedPointerDest(SrcPVS[si], LI);
PointerValSet &Field = getField(Ptr);
if (Field.size()) { // Field loaded wasn't null?
Changed |= LIPVS.add(Field);
} else if (Ptr.Node->NodeType == DSNode::ShadowNode) {
// If we are loading a null field out of a shadow node, we need to
// synthesize a new shadow node and link it in...
//
ShadowDSNode *Shad = (ShadowDSNode*)Ptr.Node;
ShadowDSNode *SynthNode =
Shad->synthesizeNode(DestTy->getElementType(), this);
Field.add(SynthNode);
Changed |= LIPVS.add(Field);
}
}
if (Changed) addAllUsesToWorkList(LI);
}
void FunctionRepBuilder::visitStoreInst(StoreInst *SI) {
// The only stores that are interesting are stores the store pointers
// into data structures...
//
if (!isa<PointerType>(SI->getOperand(0)->getType())) return;
const PointerValSet &SrcPVS = ValueMap[SI->getOperand(0)];
const PointerValSet &PtrPVS = ValueMap[SI->getOperand(1)];
for (unsigned si = 0, se = SrcPVS.size(); si != se; ++si) {
const PointerVal &SrcPtr = SrcPVS[si];
for (unsigned pi = 0, pe = PtrPVS.size(); pi != pe; ++pi) {
PointerVal Dest = getIndexedPointerDest(PtrPVS[pi], SI);
#if 0
cerr << "Setting Dest:\n";
Dest.print(cerr);
cerr << "to point to Src:\n";
SrcPtr.print(cerr);
#endif
// Add SrcPtr into the Dest field...
if (getField(Dest).add(SrcPtr)) {
// If we modified the dest field, then invalidate everyone that points
// to Dest.
const std::vector<Value*> &Ptrs = Dest.Node->getPointers();
for (unsigned i = 0, e = Ptrs.size(); i != e; ++i)
addAllUsesToWorkList(Ptrs[i]);
}
}
}
}
void FunctionRepBuilder::visitCallInst(CallInst *CI) {
CallDSNode *DSN = CallMap[CI];
unsigned PtrNum = 0, i = 0;
if (isa<Function>(CI->getOperand(0)))
++i; // Not an Indirect function call? Skip the function pointer...
for (unsigned e = CI->getNumOperands(); i != e; ++i)
if (isa<PointerType>(CI->getOperand(i)->getType()))
DSN->addArgValue(PtrNum++, ValueMap[CI->getOperand(i)]);
}
void FunctionRepBuilder::visitPHINode(PHINode *PN) {
assert(isa<PointerType>(PN->getType()) && "Should only update ptr phis");
PointerValSet &PN_PVS = ValueMap[PN];
bool Changed = false;
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
Changed |= PN_PVS.add(ValueMap[PN->getIncomingValue(i)],
PN->getIncomingValue(i));
if (Changed) addAllUsesToWorkList(PN);
}
// FunctionDSGraph constructor - Perform the global analysis to determine
// what the data structure usage behavior or a method looks like.
//
FunctionDSGraph::FunctionDSGraph(Function *F) : Func(F) {
FunctionRepBuilder Builder(this);
Nodes = Builder.getNodes();
ShadowNodes = Builder.getShadowNodes();
RetNode = Builder.getRetNode();
ValueMap = Builder.getValueMap();
}

View File

@ -0,0 +1,130 @@
//===- FunctionRepBuilder.h - Structures for graph building ------*- C++ -*--=//
//
// This file defines the FunctionRepBuilder and InitVisitor classes that are
// used to build the local data structure graph for a method.
//
//===----------------------------------------------------------------------===//
#ifndef DATA_STRUCTURE_METHOD_REP_BUILDER_H
#define DATA_STRUCTURE_METHOD_REP_BUILDER_H
#include "llvm/Analysis/DataStructure.h"
#include "llvm/Support/InstVisitor.h"
// DEBUG_DATA_STRUCTURE_CONSTRUCTION - Define this to 1 if you want debug output
#define DEBUG_DATA_STRUCTURE_CONSTRUCTION 0
class FunctionRepBuilder;
// InitVisitor - Used to initialize the worklists for data structure analysis.
// Iterate over the instructions in the method, creating nodes for malloc and
// call instructions. Add all uses of these to the worklist of instructions
// to process.
//
class InitVisitor : public InstVisitor<InitVisitor> {
FunctionRepBuilder *Rep;
Function *Func;
public:
InitVisitor(FunctionRepBuilder *R, Function *F) : Rep(R), Func(F) {}
void visitCallInst(CallInst *CI);
void visitAllocationInst(AllocationInst *AI);
void visitInstruction(Instruction *I);
// visitOperand - If the specified instruction operand is a global value, add
// a node for it...
//
void visitOperand(Value *V);
};
// FunctionRepBuilder - This builder object creates the datastructure graph for
// a method.
//
class FunctionRepBuilder : InstVisitor<FunctionRepBuilder> {
friend class InitVisitor;
FunctionDSGraph *F;
PointerValSet RetNode;
// ValueMap - Mapping between values we are processing and the possible
// datastructures that they may point to...
map<Value*, PointerValSet> ValueMap;
// CallMap - Keep track of which call nodes correspond to which call insns.
// The reverse mapping is stored in the CallDSNodes themselves.
//
map<CallInst*, CallDSNode*> CallMap;
// Worklist - Vector of (pointer typed) instructions to process still...
std::vector<Instruction *> WorkList;
// Nodes - Keep track of all of the resultant nodes, because there may not
// be edges connecting these to anything.
//
std::vector<DSNode*> Nodes;
std::vector<ShadowDSNode*> ShadowNodes;
// addAllUsesToWorkList - Add all of the instructions users of the specified
// value to the work list for further processing...
//
void addAllUsesToWorkList(Value *V);
public:
FunctionRepBuilder(FunctionDSGraph *f) : F(f) {
initializeWorkList(F->getFunction());
processWorkList();
}
void addNode(DSNode *N) { Nodes.push_back(N); }
const std::vector<DSNode*> &getNodes() const { return Nodes; }
void addShadowNode(ShadowDSNode *N) { ShadowNodes.push_back(N); }
const std::vector<ShadowDSNode*> &getShadowNodes() const {return ShadowNodes;}
const PointerValSet &getRetNode() const { return RetNode; }
const map<Value*, PointerValSet> &getValueMap() const { return ValueMap; }
private:
static PointerVal getIndexedPointerDest(const PointerVal &InP,
const MemAccessInst *MAI);
void initializeWorkList(Function *Func);
void processWorkList() {
// While the worklist still has instructions to process, process them!
while (!WorkList.empty()) {
Instruction *I = WorkList.back(); WorkList.pop_back();
#if DEBUG_DATA_STRUCTURE_CONSTRUCTION
cerr << "Processing worklist inst: " << I;
#endif
visit(I); // Dispatch to a visitXXX function based on instruction type...
#if DEBUG_DATA_STRUCTURE_CONSTRUCTION
if (I->hasName() && ValueMap.count(I)) {
cerr << "Inst %" << I->getName() << " value is:\n";
ValueMap[I].print(cerr);
}
#endif
}
}
//===--------------------------------------------------------------------===//
// Functions used to process the worklist of instructions...
//
// Allow the visitor base class to invoke these methods...
friend class InstVisitor<FunctionRepBuilder>;
void visitGetElementPtrInst(GetElementPtrInst *GEP);
void visitReturnInst(ReturnInst *RI);
void visitLoadInst(LoadInst *LI);
void visitStoreInst(StoreInst *SI);
void visitCallInst(CallInst *CI);
void visitPHINode(PHINode *PN);
void visitSetCondInst(SetCondInst *SCI) {} // SetEQ & friends are ignored
void visitFreeInst(FreeInst *FI) {} // Ignore free instructions
void visitInstruction(Instruction *I) {
std::cerr << "\n\n\nUNKNOWN INSTRUCTION type: " << I << "\n\n\n";
assert(0 && "Cannot proceed");
}
};
#endif

View File

@ -0,0 +1,7 @@
LEVEL = ../../..
LIBRARYNAME = datastructure
include $(LEVEL)/Makefile.common

View File

@ -0,0 +1,352 @@
//===- NodeImpl.cpp - Implement the data structure analysis nodes ---------===//
//
// Implement the LLVM data structure analysis library.
//
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/DataStructure.h"
#include "llvm/DerivedTypes.h"
#include "llvm/Function.h"
#include "llvm/BasicBlock.h"
#include "llvm/iMemory.h"
#include "llvm/iOther.h"
#include "llvm/Assembly/Writer.h"
#include "Support/STLExtras.h"
#include <algorithm>
#include <sstream>
//===----------------------------------------------------------------------===//
// DSNode Class Implementation
//
static void MapPVS(PointerValSet &PVSOut, const PointerValSet &PVSIn,
map<const DSNode*, DSNode*> &NodeMap) {
assert(PVSOut.empty() && "Value set already initialized!");
for (unsigned i = 0, e = PVSIn.size(); i != e; ++i)
PVSOut.add(PointerVal(NodeMap[PVSIn[i].Node], PVSIn[i].Index));
}
unsigned countPointerFields(const Type *Ty) {
switch (Ty->getPrimitiveID()) {
case Type::StructTyID: {
const StructType *ST = cast<StructType>(Ty);
unsigned Sum = 0;
for (unsigned i = 0, e = ST->getNumContainedTypes(); i != e; ++i)
Sum += countPointerFields(ST->getContainedType(i));
return Sum;
}
case Type::ArrayTyID:
// All array elements are folded together...
return countPointerFields(cast<ArrayType>(Ty)->getElementType());
case Type::PointerTyID:
return 1;
default: // Some other type, just treat it like a scalar
return 0;
}
}
DSNode::DSNode(enum NodeTy NT, const Type *T) : Ty(T), NodeType(NT) {
// Create field entries for all of the values in this type...
FieldLinks.resize(countPointerFields(getType()));
}
void DSNode::removeReferrer(PointerValSet *PVS) {
vector<PointerValSet*>::iterator I = std::find(Referrers.begin(),
Referrers.end(), PVS);
assert(I != Referrers.end() && "PVS not pointing to node!");
Referrers.erase(I);
}
static void replaceIn(std::string &S, char From, const std::string &To) {
for (unsigned i = 0; i < S.size(); )
if (S[i] == From) {
S.replace(S.begin()+i, S.begin()+i+1,
To.begin(), To.end());
i += To.size();
} else {
++i;
}
}
static void writeEdges(std::ostream &O, const void *SrcNode,
const char *SrcNodePortName, int SrcNodeIdx,
const PointerValSet &VS, const string &EdgeAttr = "") {
for (unsigned j = 0, je = VS.size(); j != je; ++j) {
O << "\t\tNode" << SrcNode << SrcNodePortName;
if (SrcNodeIdx != -1) O << SrcNodeIdx;
O << " -> Node" << VS[j].Node;
if (VS[j].Index)
O << ":g" << VS[j].Index;
if (!EdgeAttr.empty())
O << "[" << EdgeAttr << "]";
O << ";\n";
}
}
static string escapeLabel(const string &In) {
string Label(In);
replaceIn(Label, '\\', "\\\\\\\\"); // Escape caption...
replaceIn(Label, ' ', "\\ ");
replaceIn(Label, '{', "\\{");
replaceIn(Label, '}', "\\}");
return Label;
}
void DSNode::print(std::ostream &O) const {
string Caption = escapeLabel(getCaption());
O << "\t\tNode" << (void*)this << " [ label =\"{" << Caption;
const vector<PointerValSet> *Links = getAuxLinks();
if (Links && !Links->empty()) {
O << "|{";
for (unsigned i = 0; i < Links->size(); ++i) {
if (i) O << "|";
O << "<f" << i << ">";
}
O << "}";
}
if (!FieldLinks.empty()) {
O << "|{";
for (unsigned i = 0; i < FieldLinks.size(); ++i) {
if (i) O << "|";
O << "<g" << i << ">";
}
O << "}";
}
O << "}\"];\n";
if (Links)
for (unsigned i = 0; i < Links->size(); ++i)
writeEdges(O, this, ":f", i, (*Links)[i]);
for (unsigned i = 0; i < FieldLinks.size(); ++i)
writeEdges(O, this, ":g", i, FieldLinks[i]);
}
void DSNode::mapNode(map<const DSNode*, DSNode*> &NodeMap, const DSNode *Old) {
assert(FieldLinks.size() == Old->FieldLinks.size() &&
"Cloned nodes do not have the same number of links!");
for (unsigned j = 0, je = FieldLinks.size(); j != je; ++j)
MapPVS(FieldLinks[j], Old->FieldLinks[j], NodeMap);
}
NewDSNode::NewDSNode(AllocationInst *V)
: DSNode(NewNode, V->getType()->getElementType()), Allocation(V) {
}
string NewDSNode::getCaption() const {
stringstream OS;
if (isa<MallocInst>(Allocation))
OS << "new ";
else
OS << "alloca ";
WriteTypeSymbolic(OS, getType(),
Allocation->getParent()->getParent()->getParent());
if (Allocation->isArrayAllocation())
OS << "[ ]";
return OS.str();
}
GlobalDSNode::GlobalDSNode(GlobalValue *V)
: DSNode(GlobalNode, V->getType()->getElementType()), Val(V) {
}
string GlobalDSNode::getCaption() const {
stringstream OS;
WriteTypeSymbolic(OS, getType(), Val->getParent());
return "global " + OS.str();
}
ShadowDSNode::ShadowDSNode(DSNode *P, Module *M)
: DSNode(ShadowNode, cast<PointerType>(P->getType())->getElementType()) {
Parent = P;
Mod = M;
ShadowParent = 0;
}
ShadowDSNode::ShadowDSNode(const Type *Ty, Module *M, ShadowDSNode *ShadParent)
: DSNode(ShadowNode, Ty) {
Parent = 0;
Mod = M;
ShadowParent = ShadParent;
}
std::string ShadowDSNode::getCaption() const {
stringstream OS;
WriteTypeSymbolic(OS, getType(), Mod);
return "shadow " + OS.str();
}
void ShadowDSNode::mapNode(map<const DSNode*, DSNode*> &NodeMap,
const DSNode *O) {
const ShadowDSNode *Old = (ShadowDSNode*)O;
DSNode::mapNode(NodeMap, Old); // Map base portions first...
// Map our SynthNodes...
assert(SynthNodes.empty() && "Synthnodes already mapped?");
SynthNodes.reserve(Old->SynthNodes.size());
for (unsigned i = 0, e = Old->SynthNodes.size(); i != e; ++i)
SynthNodes.push_back(std::make_pair(Old->SynthNodes[i].first,
(ShadowDSNode*)NodeMap[Old->SynthNodes[i].second]));
}
CallDSNode::CallDSNode(CallInst *ci) : DSNode(CallNode, ci->getType()), CI(ci) {
unsigned NumPtrs = 0;
if (!isa<Function>(ci->getOperand(0)))
NumPtrs++; // Include the method pointer...
for (unsigned i = 1, e = ci->getNumOperands(); i != e; ++i)
if (isa<PointerType>(ci->getOperand(i)->getType()))
NumPtrs++;
ArgLinks.resize(NumPtrs);
}
string CallDSNode::getCaption() const {
stringstream OS;
if (const Function *CM = CI->getCalledFunction())
OS << "call " << CM->getName();
else
OS << "call <indirect>";
OS << "|Ret: ";
WriteTypeSymbolic(OS, getType(),
CI->getParent()->getParent()->getParent());
return OS.str();
}
void CallDSNode::mapNode(map<const DSNode*, DSNode*> &NodeMap,
const DSNode *O) {
const CallDSNode *Old = (CallDSNode*)O;
DSNode::mapNode(NodeMap, Old); // Map base portions first...
assert(ArgLinks.size() == Old->ArgLinks.size() && "# Arguments changed!?");
for (unsigned i = 0, e = Old->ArgLinks.size(); i != e; ++i)
MapPVS(ArgLinks[i], Old->ArgLinks[i], NodeMap);
}
ArgDSNode::ArgDSNode(FunctionArgument *FA)
: DSNode(ArgNode, FA->getType()), FuncArg(FA) {
}
string ArgDSNode::getCaption() const {
stringstream OS;
OS << "arg %" << FuncArg->getName() << "|Ty: ";
WriteTypeSymbolic(OS, getType(), FuncArg->getParent()->getParent());
return OS.str();
}
void FunctionDSGraph::printFunction(std::ostream &O,
const char *Label) const {
O << "\tsubgraph cluster_" << Label << "_Function" << (void*)this << " {\n";
O << "\t\tlabel=\"" << Label << " Function\\ " << Func->getName() << "\";\n";
for (unsigned i = 0, e = Nodes.size(); i != e; ++i)
Nodes[i]->print(O);
for (unsigned i = 0, e = ShadowNodes.size(); i != e; ++i)
ShadowNodes[i]->print(O);
if (RetNode.size()) {
O << "\t\tNode" << (void*)this << Label
<< " [shape=\"ellipse\", label=\"Returns\"];\n";
writeEdges(O, this, Label, -1, RetNode);
}
O << "\n";
for (std::map<Value*, PointerValSet>::const_iterator I = ValueMap.begin(),
E = ValueMap.end(); I != E; ++I) {
if (I->second.size()) { // Only output nodes with edges...
stringstream OS;
WriteTypeSymbolic(OS, I->first->getType(), Func->getParent());
// Create node for I->first
O << "\t\tNode" << (void*)I->first << Label << " [shape=\"box\", label=\""
<< escapeLabel(OS.str()) << "\\n%" << escapeLabel(I->first->getName())
<< "\",fontsize=\"12.0\",color=\"gray70\"];\n";
// add edges from I->first to all pointers in I->second
writeEdges(O, I->first, Label, -1, I->second,
"weight=\"0.9\",color=\"gray70\"");
}
}
O << "\t}\n";
}
// Copy constructor - Since we copy the nodes over, we have to be sure to go
// through and fix pointers to point into the new graph instead of into the old
// graph...
//
FunctionDSGraph::FunctionDSGraph(const FunctionDSGraph &DSG) : Func(DSG.Func) {
RetNode = cloneFunctionIntoSelf(DSG, true);
}
// cloneFunctionIntoSelf - Clone the specified method graph into the current
// method graph, returning the Return's set of the graph. If ValueMap is set
// to true, the ValueMap of the function is cloned into this function as well
// as the data structure graph itself.
//
PointerValSet FunctionDSGraph::cloneFunctionIntoSelf(const FunctionDSGraph &DSG,
bool CloneValueMap) {
map<const DSNode*, DSNode*> NodeMap; // Map from old graph to new graph...
unsigned StartSize = Nodes.size(); // We probably already have nodes...
Nodes.reserve(StartSize+DSG.Nodes.size());
unsigned StartShadowSize = ShadowNodes.size();
ShadowNodes.reserve(StartShadowSize+DSG.ShadowNodes.size());
// Clone all of the nodes, keeping track of the mapping...
for (unsigned i = 0, e = DSG.Nodes.size(); i != e; ++i)
Nodes.push_back(NodeMap[DSG.Nodes[i]] = DSG.Nodes[i]->clone());
// Clone all of the shadow nodes similarly...
for (unsigned i = 0, e = DSG.ShadowNodes.size(); i != e; ++i)
ShadowNodes.push_back(cast<ShadowDSNode>(NodeMap[DSG.ShadowNodes[i]] = DSG.ShadowNodes[i]->clone()));
// Convert all of the links over in the nodes now that the map has been filled
// in all the way...
//
for (unsigned i = 0, e = DSG.Nodes.size(); i != e; ++i)
Nodes[i+StartSize]->mapNode(NodeMap, DSG.Nodes[i]);
for (unsigned i = 0, e = DSG.ShadowNodes.size(); i != e; ++i)
ShadowNodes[i+StartShadowSize]->mapNode(NodeMap, DSG.ShadowNodes[i]);
if (CloneValueMap) {
// Convert value map... the values themselves stay the same, just the nodes
// have to change...
//
for (std::map<Value*,PointerValSet>::const_iterator I =DSG.ValueMap.begin(),
E = DSG.ValueMap.end(); I != E; ++I)
MapPVS(ValueMap[I->first], I->second, NodeMap);
}
// Convert over return node...
PointerValSet RetVals;
MapPVS(RetVals, DSG.RetNode, NodeMap);
return RetVals;
}
FunctionDSGraph::~FunctionDSGraph() {
RetNode.clear();
ValueMap.clear();
for_each(Nodes.begin(), Nodes.end(), mem_fun(&DSNode::dropAllReferences));
for_each(ShadowNodes.begin(), ShadowNodes.end(),
mem_fun(&DSNode::dropAllReferences));
for_each(Nodes.begin(), Nodes.end(), deleter<DSNode>);
for_each(ShadowNodes.begin(), ShadowNodes.end(), deleter<DSNode>);
}