[CallGraph] Refine call graph for indirect calls with !callees metadata

For indirect call sites having a small set of possible callees,
!callees metadata can be used to indicate what those callees are.
This patch updates the call graph and lazy call graph analyses so
that they consider this metadata when encountering call sites. For
the call graph, it adds a new external call graph node to the graph
for each unique !callees metadata node. A call graph edge connects
an indirect call site with the external node associated with the
!callees metadata that is attached to it. And there is an edge from
this external node to each of the callees indicated by the metadata.
Similarly, for the lazy call graph, the patch adds Ref edges from a
caller to the possible callees indicated by the metadata.

The primary purpose of the patch is to facilitate iterating over the
functions in a module such that all of the callees indicated by a
given !callees metadata node will be visited prior to the functions
containing call sites annotated by that node. This property is
required by optimizations performing a bottom-up traversal of the
SCC DAG. For example, the inliner can be made to inline through an
indirect call. If the call site is annotated with !callees metadata,
this patch ensures that the inliner will have visited all of the
callees prior to the caller, allowing it to reliably compute the
cost of inlining one or more of the potential callees.

Original patch by @mssimpso. I've made some small changes to get it
to apply, build, and pass tests on the top of tree, as well as
some minor tweaks to formatting and functionality.

Subscribers: mehdi_amini, hiraditya, llvm-commits, mssimpso

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D39339

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@369025 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Mark Lacey 2019-08-15 17:47:53 +00:00
parent 49aed1565e
commit c4ed7f707a
8 changed files with 159 additions and 7 deletions

View File

@ -46,6 +46,7 @@
#define LLVM_ANALYSIS_CALLGRAPH_H
#include "llvm/ADT/GraphTraits.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/InstrTypes.h"
@ -76,9 +77,22 @@ class CallGraph {
using FunctionMapTy =
std::map<const Function *, std::unique_ptr<CallGraphNode>>;
/// \brief A type for maintaining dummy nodes.
///
/// Dummy nodes (i.e., nodes having a null function) include, for example,
/// those created to represent !callees metadata. We use a void pointer as
/// the key to allow for various kinds of dummy nodes. We use a MapVector to
/// ensure a deterministic iteration order (there's no good way to sort dummy
/// nodes). A deterministic iteration order is primarily useful for printing.
using DummyNodeMapTy =
MapVector<const MDNode *, std::unique_ptr<CallGraphNode>>;
/// A map from \c Function* to \c CallGraphNode*.
FunctionMapTy FunctionMap;
/// \brief A map for maintaining dummy nodes.
DummyNodeMapTy DummyNodeMap;
/// This node has edges to all external functions and those internal
/// functions that have their address taken.
CallGraphNode *ExternalCallingNode;
@ -155,6 +169,9 @@ public:
/// Similar to operator[], but this will insert a new CallGraphNode for
/// \c F if one does not already exist.
CallGraphNode *getOrInsertFunction(const Function *F);
/// \brief Return the dummy node associated with the given metadata node.
CallGraphNode *getOrInsertNodeForCalleesMD(MDNode *Callees);
};
/// A node in the call graph for a module.

View File

@ -663,6 +663,30 @@ public:
return false;
}
/// Return the set of functions this call site is known to call. For direct
/// calls, the returned set contains the called function. For indirect calls,
/// this function collects known callees from !callees metadata, if present.
SmallVector<FunTy *, 1> getKnownCallees() {
SmallVector<FunTy *, 1> Callees;
if (getCalledFunction()) {
// If the call site is direct, just add the called function to the set.
Callees.push_back(getCalledFunction());
return Callees;
}
InstrTy *Inst = getInstruction();
if (auto *Node = Inst->getMetadata(LLVMContext::MD_callees)) {
// Otherwise, if the call site is indirect, collect the known callees from
// !callees metadata if present.
for (const MDOperand &Op : Node->operands())
if (auto *MDConstant = mdconst::extract_or_null<Constant>(Op))
Callees.push_back(cast<FunTy>(MDConstant));
}
return Callees;
}
private:
IterTy getCallee() const {
return cast<CallBase>(getInstruction())->op_end() - 1;

View File

@ -449,7 +449,7 @@ LazyCallGraph::SCC &llvm::updateCGAndAnalysisManagerForFunctionPass(
// irrelevant.
for (Instruction &I : instructions(F))
if (auto CS = CallSite(&I))
if (Function *Callee = CS.getCalledFunction())
for (Function *Callee : CS.getKnownCallees()) {
if (Visited.insert(Callee).second && !Callee->isDeclaration()) {
Node &CalleeN = *G.lookup(*Callee);
Edge *E = N->lookup(CalleeN);
@ -467,6 +467,7 @@ LazyCallGraph::SCC &llvm::updateCGAndAnalysisManagerForFunctionPass(
if (!E->isCall())
PromotedRefTargets.insert(&CalleeN);
}
}
// Now walk all references.
for (Instruction &I : instructions(F))

View File

@ -37,6 +37,7 @@ CallGraph::CallGraph(Module &M)
CallGraph::CallGraph(CallGraph &&Arg)
: M(Arg.M), FunctionMap(std::move(Arg.FunctionMap)),
DummyNodeMap(std::move(Arg.DummyNodeMap)),
ExternalCallingNode(Arg.ExternalCallingNode),
CallsExternalNode(std::move(Arg.CallsExternalNode)) {
Arg.FunctionMap.clear();
@ -53,6 +54,8 @@ CallGraph::~CallGraph() {
#ifndef NDEBUG
for (auto &I : FunctionMap)
I.second->allReferencesDropped();
for (auto &N : DummyNodeMap)
N.second->allReferencesDropped();
#endif
}
@ -74,7 +77,14 @@ void CallGraph::addToCallGraph(Function *F) {
for (Instruction &I : BB) {
if (auto *Call = dyn_cast<CallBase>(&I)) {
const Function *Callee = Call->getCalledFunction();
if (!Callee || !Intrinsic::isLeaf(Callee->getIntrinsicID()))
MDNode *CalleesMD = I.getMetadata(LLVMContext::MD_callees);
if (!Callee && CalleesMD)
// If an indirect call site has !callees metadata indicating its
// possible callees, we add an edge from the call site to a dummy
// node. When we construct the dummy node, we add edges from it to
// the functions indicated in the !callees metadata.
Node->addCalledFunction(Call, getOrInsertNodeForCalleesMD(CalleesMD));
else if (!Callee || !Intrinsic::isLeaf(Callee->getIntrinsicID()))
// Indirect calls of intrinsics are not allowed so no need to check.
// We can be more precise here by using TargetArg returned by
// Intrinsic::isLeaf.
@ -105,6 +115,11 @@ void CallGraph::print(raw_ostream &OS) const {
for (CallGraphNode *CN : Nodes)
CN->print(OS);
// The iteration order of the DummyNodeMap is deterministic, so we don't need
// to sort the nodes. Just print them.
for (auto &Entry : DummyNodeMap)
Entry.second->print(OS);
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@ -120,6 +135,12 @@ LLVM_DUMP_METHOD void CallGraph::dump() const { print(dbgs()); }
Function *CallGraph::removeFunctionFromModule(CallGraphNode *CGN) {
assert(CGN->empty() && "Cannot remove function from call "
"graph if it references other functions!");
// If any dummy node references the node for the given function, we first
// need to remove those edges.
for (auto &Entry : DummyNodeMap)
Entry.second->removeAnyCallEdgeTo(CGN);
Function *F = CGN->getFunction(); // Get the function for the call graph node
FunctionMap.erase(F); // Remove the call graph node from the map
@ -154,6 +175,21 @@ CallGraphNode *CallGraph::getOrInsertFunction(const Function *F) {
return CGN.get();
}
CallGraphNode *CallGraph::getOrInsertNodeForCalleesMD(MDNode *Callees) {
auto &CGN = DummyNodeMap[Callees];
if (CGN)
return CGN.get();
CGN = llvm::make_unique<CallGraphNode>(nullptr);
for (const MDOperand &Op : Callees->operands())
if (auto *MDConstant = mdconst::extract_or_null<Constant>(Op)) {
auto *F = cast<Function>(MDConstant);
assert(!F->isIntrinsic());
CGN->addCalledFunction(nullptr, getOrInsertFunction(F));
}
return CGN.get();
}
//===----------------------------------------------------------------------===//
// Implementations of the CallGraphNode class methods.
//
@ -171,7 +207,7 @@ void CallGraphNode::print(raw_ostream &OS) const {
if (Function *FI = I.second->getFunction())
OS << "function '" << FI->getName() <<"'\n";
else
OS << "external node\n";
OS << "<<null function>><<" << I.second << ">>\n";
}
OS << '\n';
}

View File

@ -100,12 +100,14 @@ LazyCallGraph::EdgeSequence &LazyCallGraph::Node::populateSlow() {
for (BasicBlock &BB : *F)
for (Instruction &I : BB) {
if (auto CS = CallSite(&I))
if (Function *Callee = CS.getCalledFunction())
for (Function *Callee : CS.getKnownCallees())
if (!Callee->isDeclaration())
if (Callees.insert(Callee).second) {
Visited.insert(Callee);
auto EdgeK = CS.getCalledFunction() ? LazyCallGraph::Edge::Call
: LazyCallGraph::Edge::Ref;
addEdge(Edges->Edges, Edges->EdgeIndexMap, G->get(*Callee),
LazyCallGraph::Edge::Call);
EdgeK);
}
for (Value *Op : I.operand_values())

View File

@ -0,0 +1,34 @@
; RUN: opt < %s -print-callgraph -disable-output 2>&1 | FileCheck %s
; CHECK: Call graph node <<null function>><<{{.+}}>> #uses=0
; CHECK-DAG: CS<0x0> calls function 'main'
; CHECK-DAG: CS<0x0> calls function 'add'
; CHECK-DAG: CS<0x0> calls function 'sub'
;
; CHECK: Call graph node for function: 'add'<<{{.+}}>> #uses=2
;
; CHECK: Call graph node for function: 'main'<<{{.+}}>> #uses=1
; CHECK-NEXT: CS<{{.+}}> calls <<null function>><<[[CALLEES:.+]]>>
;
; CHECK: Call graph node for function: 'sub'<<{{.+}}>> #uses=2
;
; CHECK: Call graph node <<null function>><<[[CALLEES]]>> #uses=1
; CHECK-DAG: CS<0x0> calls function 'add'
; CHECK-DAG: CS<0x0> calls function 'sub'
define i64 @main(i64 %x, i64 %y, i64 (i64, i64)* %binop) {
%tmp0 = call i64 %binop(i64 %x, i64 %y), !callees !0
ret i64 %tmp0
}
define i64 @add(i64 %x, i64 %y) {
%tmp0 = add i64 %x, %y
ret i64 %tmp0
}
define i64 @sub(i64 %x, i64 %y) {
%tmp0 = sub i64 %x, %y
ret i64 %tmp0
}
!0 = !{i64 (i64, i64)* @add, i64 (i64, i64)* @sub}

View File

@ -26,7 +26,7 @@ entry:
; CHECK: CS<0x0> calls function 'f'
; CHECK: Call graph node for function: 'calls_patchpoint'
; CHECK-NEXT: CS<[[addr_1:[^>]+]]> calls external node
; CHECK-NEXT: CS<[[addr_1:[^>]+]]> calls <<null function>>
; CHECK: Call graph node for function: 'calls_statepoint'
; CHECK-NEXT: CS<[[addr_0:[^>]+]]> calls external node
; CHECK-NEXT: CS<[[addr_0:[^>]+]]> calls <<null function>>

View File

@ -0,0 +1,38 @@
; RUN: opt < %s -passes=print-lcg -disable-output 2>&1 | FileCheck %s
; CHECK: Edges in function: main
; CHECK-DAG: ref -> add
; CHECK-DAG: ref -> sub
;
; CHECK: Edges in function: add
;
; CHECK: Edges in function: sub
;
; CHECK: RefSCC with 1 call SCCs:
; CHECK-NEXT: SCC with 1 functions:
; CHECK-NEXT: sub
;
; CHECK: RefSCC with 1 call SCCs:
; CHECK-NEXT: SCC with 1 functions:
; CHECK-NEXT: add
;
; CHECK: RefSCC with 1 call SCCs:
; CHECK-NEXT: SCC with 1 functions:
; CHECK-NEXT: main
define i64 @main(i64 %x, i64 %y, i64 (i64, i64)* %binop) {
%tmp0 = call i64 %binop(i64 %x, i64 %y), !callees !0
ret i64 %tmp0
}
define i64 @add(i64 %x, i64 %y) {
%tmp0 = add i64 %x, %y
ret i64 %tmp0
}
define i64 @sub(i64 %x, i64 %y) {
%tmp0 = sub i64 %x, %y
ret i64 %tmp0
}
!0 = !{i64 (i64, i64)* @add, i64 (i64, i64)* @sub}