llvm/lib/Analysis/CFLGraph.h
George Burgess IV 723a3ff949 [CFLAA] Add an initial CFLAnders implementation.
This adds an incomplete anders-style implementation for CFLAA. It's
incomplete in that it's missing interprocedural analysis, attrs
handling, etc. and that it needs more tests. More tests and features
will be added in future commits.

Patch by Jia Chen.

Differential Revision: https://reviews.llvm.org/D22291


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@275602 91177308-0d34-0410-b5e6-96231b3b80d8
2016-07-15 19:53:25 +00:00

545 lines
18 KiB
C++

//======- CFLGraph.h - Abstract stratified sets implementation. --------======//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
/// \file
/// This file defines CFLGraph, an auxiliary data structure used by CFL-based
/// alias analysis.
///
//===----------------------------------------------------------------------===//
#ifndef LLVM_ANALYSIS_CFLGRAPH_H
#define LLVM_ANALYSIS_CFLGRAPH_H
#include "AliasAnalysisSummary.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/IR/InstVisitor.h"
#include "llvm/IR/Instructions.h"
namespace llvm {
namespace cflaa {
/// \brief The Program Expression Graph (PEG) of CFL analysis
/// CFLGraph is auxiliary data structure used by CFL-based alias analysis to
/// describe flow-insensitive pointer-related behaviors. Given an LLVM function,
/// the main purpose of this graph is to abstract away unrelated facts and
/// translate the rest into a form that can be easily digested by CFL analyses.
/// Each Node in the graph is an InstantiatedValue, and each edge represent a
/// pointer assignment between InstantiatedValue. Pointer
/// references/dereferences are not explicitly stored in the graph: we
/// implicitly assume that for each node (X, I) it has a dereference edge to (X,
/// I+1) and a reference edge to (X, I-1).
class CFLGraph {
public:
typedef InstantiatedValue Node;
struct Edge {
Node Other;
};
typedef std::vector<Edge> EdgeList;
struct NodeInfo {
EdgeList Edges, ReverseEdges;
AliasAttrs Attr;
};
class ValueInfo {
std::vector<NodeInfo> Levels;
public:
bool addNodeToLevel(unsigned Level) {
auto NumLevels = Levels.size();
if (NumLevels > Level)
return false;
Levels.resize(Level + 1);
return true;
}
NodeInfo &getNodeInfoAtLevel(unsigned Level) {
assert(Level < Levels.size());
return Levels[Level];
}
const NodeInfo &getNodeInfoAtLevel(unsigned Level) const {
assert(Level < Levels.size());
return Levels[Level];
}
unsigned getNumLevels() const { return Levels.size(); }
};
private:
typedef DenseMap<Value *, ValueInfo> ValueMap;
ValueMap ValueImpls;
NodeInfo *getNode(Node N) {
auto Itr = ValueImpls.find(N.Val);
if (Itr == ValueImpls.end() || Itr->second.getNumLevels() <= N.DerefLevel)
return nullptr;
return &Itr->second.getNodeInfoAtLevel(N.DerefLevel);
}
public:
typedef ValueMap::const_iterator const_value_iterator;
bool addNode(Node N, AliasAttrs Attr = AliasAttrs()) {
assert(N.Val != nullptr);
auto &ValInfo = ValueImpls[N.Val];
auto Changed = ValInfo.addNodeToLevel(N.DerefLevel);
ValInfo.getNodeInfoAtLevel(N.DerefLevel).Attr |= Attr;
return Changed;
}
void addAttr(Node N, AliasAttrs Attr) {
auto *Info = getNode(N);
assert(Info != nullptr);
Info->Attr |= Attr;
}
void addEdge(Node From, Node To, int64_t Offset = 0) {
auto *FromInfo = getNode(From);
assert(FromInfo != nullptr);
auto *ToInfo = getNode(To);
assert(ToInfo != nullptr);
FromInfo->Edges.push_back(Edge{To});
ToInfo->ReverseEdges.push_back(Edge{From});
}
const NodeInfo *getNode(Node N) const {
auto Itr = ValueImpls.find(N.Val);
if (Itr == ValueImpls.end() || Itr->second.getNumLevels() <= N.DerefLevel)
return nullptr;
return &Itr->second.getNodeInfoAtLevel(N.DerefLevel);
}
AliasAttrs attrFor(Node N) const {
auto *Info = getNode(N);
assert(Info != nullptr);
return Info->Attr;
}
iterator_range<const_value_iterator> value_mappings() const {
return make_range<const_value_iterator>(ValueImpls.begin(),
ValueImpls.end());
}
};
///\brief A builder class used to create CFLGraph instance from a given function
/// The CFL-AA that uses this builder must provide its own type as a template
/// argument. This is necessary for interprocedural processing: CFLGraphBuilder
/// needs a way of obtaining the summary of other functions when callinsts are
/// encountered.
/// As a result, we expect the said CFL-AA to expose a getAliasSummary() public
/// member function that takes a Function& and returns the corresponding summary
/// as a const AliasSummary*.
template <typename CFLAA> class CFLGraphBuilder {
// Input of the builder
CFLAA &Analysis;
const TargetLibraryInfo &TLI;
// Output of the builder
CFLGraph Graph;
SmallVector<Value *, 4> ReturnedValues;
// Helper class
/// Gets the edges our graph should have, based on an Instruction*
class GetEdgesVisitor : public InstVisitor<GetEdgesVisitor, void> {
CFLAA &AA;
const TargetLibraryInfo &TLI;
CFLGraph &Graph;
SmallVectorImpl<Value *> &ReturnValues;
static bool hasUsefulEdges(ConstantExpr *CE) {
// ConstantExpr doesn't have terminators, invokes, or fences, so only
// needs
// to check for compares.
return CE->getOpcode() != Instruction::ICmp &&
CE->getOpcode() != Instruction::FCmp;
}
// Returns possible functions called by CS into the given SmallVectorImpl.
// Returns true if targets found, false otherwise.
static bool getPossibleTargets(CallSite CS,
SmallVectorImpl<Function *> &Output) {
if (auto *Fn = CS.getCalledFunction()) {
Output.push_back(Fn);
return true;
}
// TODO: If the call is indirect, we might be able to enumerate all
// potential
// targets of the call and return them, rather than just failing.
return false;
}
void addNode(Value *Val, AliasAttrs Attr = AliasAttrs()) {
assert(Val != nullptr && Val->getType()->isPointerTy());
if (auto GVal = dyn_cast<GlobalValue>(Val)) {
if (Graph.addNode(InstantiatedValue{GVal, 0},
getGlobalOrArgAttrFromValue(*GVal)))
Graph.addNode(InstantiatedValue{GVal, 1}, getAttrUnknown());
} else if (auto CExpr = dyn_cast<ConstantExpr>(Val)) {
if (hasUsefulEdges(CExpr)) {
if (Graph.addNode(InstantiatedValue{CExpr, 0}))
visitConstantExpr(CExpr);
}
} else
Graph.addNode(InstantiatedValue{Val, 0}, Attr);
}
void addAssignEdge(Value *From, Value *To, int64_t Offset = 0) {
assert(From != nullptr && To != nullptr);
if (!From->getType()->isPointerTy() || !To->getType()->isPointerTy())
return;
addNode(From);
if (To != From) {
addNode(To);
Graph.addEdge(InstantiatedValue{From, 0}, InstantiatedValue{To, 0},
Offset);
}
}
void addDerefEdge(Value *From, Value *To, bool IsRead) {
assert(From != nullptr && To != nullptr);
if (!From->getType()->isPointerTy() || !To->getType()->isPointerTy())
return;
addNode(From);
addNode(To);
if (IsRead) {
Graph.addNode(InstantiatedValue{From, 1});
Graph.addEdge(InstantiatedValue{From, 1}, InstantiatedValue{To, 0});
} else {
Graph.addNode(InstantiatedValue{To, 1});
Graph.addEdge(InstantiatedValue{From, 0}, InstantiatedValue{To, 1});
}
}
void addLoadEdge(Value *From, Value *To) { addDerefEdge(From, To, true); }
void addStoreEdge(Value *From, Value *To) { addDerefEdge(From, To, false); }
public:
GetEdgesVisitor(CFLGraphBuilder &Builder)
: AA(Builder.Analysis), TLI(Builder.TLI), Graph(Builder.Graph),
ReturnValues(Builder.ReturnedValues) {}
void visitInstruction(Instruction &) {
llvm_unreachable("Unsupported instruction encountered");
}
void visitReturnInst(ReturnInst &Inst) {
if (auto RetVal = Inst.getReturnValue()) {
if (RetVal->getType()->isPointerTy()) {
addNode(RetVal);
ReturnValues.push_back(RetVal);
}
}
}
void visitPtrToIntInst(PtrToIntInst &Inst) {
auto *Ptr = Inst.getOperand(0);
addNode(Ptr, getAttrEscaped());
}
void visitIntToPtrInst(IntToPtrInst &Inst) {
auto *Ptr = &Inst;
addNode(Ptr, getAttrUnknown());
}
void visitCastInst(CastInst &Inst) {
auto *Src = Inst.getOperand(0);
addAssignEdge(Src, &Inst);
}
void visitBinaryOperator(BinaryOperator &Inst) {
auto *Op1 = Inst.getOperand(0);
auto *Op2 = Inst.getOperand(1);
addAssignEdge(Op1, &Inst);
addAssignEdge(Op2, &Inst);
}
void visitAtomicCmpXchgInst(AtomicCmpXchgInst &Inst) {
auto *Ptr = Inst.getPointerOperand();
auto *Val = Inst.getNewValOperand();
addStoreEdge(Val, Ptr);
}
void visitAtomicRMWInst(AtomicRMWInst &Inst) {
auto *Ptr = Inst.getPointerOperand();
auto *Val = Inst.getValOperand();
addStoreEdge(Val, Ptr);
}
void visitPHINode(PHINode &Inst) {
for (Value *Val : Inst.incoming_values())
addAssignEdge(Val, &Inst);
}
void visitGetElementPtrInst(GetElementPtrInst &Inst) {
auto *Op = Inst.getPointerOperand();
addAssignEdge(Op, &Inst);
}
void visitSelectInst(SelectInst &Inst) {
// Condition is not processed here (The actual statement producing
// the condition result is processed elsewhere). For select, the
// condition is evaluated, but not loaded, stored, or assigned
// simply as a result of being the condition of a select.
auto *TrueVal = Inst.getTrueValue();
auto *FalseVal = Inst.getFalseValue();
addAssignEdge(TrueVal, &Inst);
addAssignEdge(FalseVal, &Inst);
}
void visitAllocaInst(AllocaInst &Inst) { addNode(&Inst); }
void visitLoadInst(LoadInst &Inst) {
auto *Ptr = Inst.getPointerOperand();
auto *Val = &Inst;
addLoadEdge(Ptr, Val);
}
void visitStoreInst(StoreInst &Inst) {
auto *Ptr = Inst.getPointerOperand();
auto *Val = Inst.getValueOperand();
addStoreEdge(Val, Ptr);
}
void visitVAArgInst(VAArgInst &Inst) {
// We can't fully model va_arg here. For *Ptr = Inst.getOperand(0), it
// does
// two things:
// 1. Loads a value from *((T*)*Ptr).
// 2. Increments (stores to) *Ptr by some target-specific amount.
// For now, we'll handle this like a landingpad instruction (by placing
// the
// result in its own group, and having that group alias externals).
addNode(&Inst, getAttrUnknown());
}
static bool isFunctionExternal(Function *Fn) {
return !Fn->hasExactDefinition();
}
bool tryInterproceduralAnalysis(CallSite CS,
const SmallVectorImpl<Function *> &Fns) {
assert(Fns.size() > 0);
if (CS.arg_size() > MaxSupportedArgsInSummary)
return false;
// Exit early if we'll fail anyway
for (auto *Fn : Fns) {
if (isFunctionExternal(Fn) || Fn->isVarArg())
return false;
// Fail if the caller does not provide enough arguments
assert(Fn->arg_size() <= CS.arg_size());
if (!AA.getAliasSummary(*Fn))
return false;
}
for (auto *Fn : Fns) {
auto Summary = AA.getAliasSummary(*Fn);
assert(Summary != nullptr);
auto &RetParamRelations = Summary->RetParamRelations;
for (auto &Relation : RetParamRelations) {
auto IRelation = instantiateExternalRelation(Relation, CS);
if (IRelation.hasValue()) {
Graph.addNode(IRelation->From);
Graph.addNode(IRelation->To);
Graph.addEdge(IRelation->From, IRelation->To);
}
}
auto &RetParamAttributes = Summary->RetParamAttributes;
for (auto &Attribute : RetParamAttributes) {
auto IAttr = instantiateExternalAttribute(Attribute, CS);
if (IAttr.hasValue())
Graph.addNode(IAttr->IValue, IAttr->Attr);
}
}
return true;
}
void visitCallSite(CallSite CS) {
auto Inst = CS.getInstruction();
// Make sure all arguments and return value are added to the graph first
for (Value *V : CS.args())
if (V->getType()->isPointerTy())
addNode(V);
if (Inst->getType()->isPointerTy())
addNode(Inst);
// Check if Inst is a call to a library function that
// allocates/deallocates
// on the heap. Those kinds of functions do not introduce any aliases.
// TODO: address other common library functions such as realloc(),
// strdup(),
// etc.
if (isMallocLikeFn(Inst, &TLI) || isCallocLikeFn(Inst, &TLI) ||
isFreeCall(Inst, &TLI))
return;
// TODO: Add support for noalias args/all the other fun function
// attributes
// that we can tack on.
SmallVector<Function *, 4> Targets;
if (getPossibleTargets(CS, Targets))
if (tryInterproceduralAnalysis(CS, Targets))
return;
// Because the function is opaque, we need to note that anything
// could have happened to the arguments (unless the function is marked
// readonly or readnone), and that the result could alias just about
// anything, too (unless the result is marked noalias).
if (!CS.onlyReadsMemory())
for (Value *V : CS.args()) {
if (V->getType()->isPointerTy()) {
// The argument itself escapes.
Graph.addAttr(InstantiatedValue{V, 0}, getAttrEscaped());
// The fate of argument memory is unknown. Note that since
// AliasAttrs is transitive with respect to dereference, we only
// need to specify it for the first-level memory.
Graph.addNode(InstantiatedValue{V, 1}, getAttrUnknown());
}
}
if (Inst->getType()->isPointerTy()) {
auto *Fn = CS.getCalledFunction();
if (Fn == nullptr || !Fn->doesNotAlias(0))
// No need to call addNode() since we've added Inst at the
// beginning of this function and we know it is not a global.
Graph.addAttr(InstantiatedValue{Inst, 0}, getAttrUnknown());
}
}
/// Because vectors/aggregates are immutable and unaddressable, there's
/// nothing we can do to coax a value out of them, other than calling
/// Extract{Element,Value}. We can effectively treat them as pointers to
/// arbitrary memory locations we can store in and load from.
void visitExtractElementInst(ExtractElementInst &Inst) {
auto *Ptr = Inst.getVectorOperand();
auto *Val = &Inst;
addLoadEdge(Ptr, Val);
}
void visitInsertElementInst(InsertElementInst &Inst) {
auto *Vec = Inst.getOperand(0);
auto *Val = Inst.getOperand(1);
addAssignEdge(Vec, &Inst);
addStoreEdge(Val, &Inst);
}
void visitLandingPadInst(LandingPadInst &Inst) {
// Exceptions come from "nowhere", from our analysis' perspective.
// So we place the instruction its own group, noting that said group may
// alias externals
addNode(&Inst, getAttrUnknown());
}
void visitInsertValueInst(InsertValueInst &Inst) {
auto *Agg = Inst.getOperand(0);
auto *Val = Inst.getOperand(1);
addAssignEdge(Agg, &Inst);
addStoreEdge(Val, &Inst);
}
void visitExtractValueInst(ExtractValueInst &Inst) {
auto *Ptr = Inst.getAggregateOperand();
addLoadEdge(Ptr, &Inst);
}
void visitShuffleVectorInst(ShuffleVectorInst &Inst) {
auto *From1 = Inst.getOperand(0);
auto *From2 = Inst.getOperand(1);
addAssignEdge(From1, &Inst);
addAssignEdge(From2, &Inst);
}
void visitConstantExpr(ConstantExpr *CE) {
switch (CE->getOpcode()) {
default:
llvm_unreachable("Unknown instruction type encountered!");
// Build the switch statement using the Instruction.def file.
#define HANDLE_INST(NUM, OPCODE, CLASS) \
case Instruction::OPCODE: \
this->visit##OPCODE(*(CLASS *)CE); \
break;
#include "llvm/IR/Instruction.def"
}
}
};
// Helper functions
// Determines whether or not we an instruction is useless to us (e.g.
// FenceInst)
static bool hasUsefulEdges(Instruction *Inst) {
bool IsNonInvokeRetTerminator = isa<TerminatorInst>(Inst) &&
!isa<InvokeInst>(Inst) &&
!isa<ReturnInst>(Inst);
return !isa<CmpInst>(Inst) && !isa<FenceInst>(Inst) &&
!IsNonInvokeRetTerminator;
}
void addArgumentToGraph(Argument &Arg) {
if (Arg.getType()->isPointerTy()) {
Graph.addNode(InstantiatedValue{&Arg, 0},
getGlobalOrArgAttrFromValue(Arg));
// Pointees of a formal parameter is known to the caller
Graph.addNode(InstantiatedValue{&Arg, 1}, getAttrCaller());
}
}
// Given an Instruction, this will add it to the graph, along with any
// Instructions that are potentially only available from said Instruction
// For example, given the following line:
// %0 = load i16* getelementptr ([1 x i16]* @a, 0, 0), align 2
// addInstructionToGraph would add both the `load` and `getelementptr`
// instructions to the graph appropriately.
void addInstructionToGraph(GetEdgesVisitor &Visitor, Instruction &Inst) {
if (!hasUsefulEdges(&Inst))
return;
Visitor.visit(Inst);
}
// Builds the graph needed for constructing the StratifiedSets for the given
// function
void buildGraphFrom(Function &Fn) {
GetEdgesVisitor Visitor(*this);
for (auto &Bb : Fn.getBasicBlockList())
for (auto &Inst : Bb.getInstList())
addInstructionToGraph(Visitor, Inst);
for (auto &Arg : Fn.args())
addArgumentToGraph(Arg);
}
public:
CFLGraphBuilder(CFLAA &Analysis, const TargetLibraryInfo &TLI, Function &Fn)
: Analysis(Analysis), TLI(TLI) {
buildGraphFrom(Fn);
}
const CFLGraph &getCFLGraph() const { return Graph; }
const SmallVector<Value *, 4> &getReturnValues() const {
return ReturnedValues;
}
};
}
}
#endif