mirror of
https://github.com/RPCS3/llvm.git
synced 2026-01-31 01:25:19 +01:00
Summary: This patch adds support for including a full reference graph including call graph edges and other GV references in the summary. The reference graph edges can be used to make importing decisions without materializing any source modules, can be used in the plugin to make file staging decisions for distributed build systems, and is expected to have other uses. The call graph edges are recorded in each function summary in the bitcode via a list of <CalleeValueIds, StaticCount> tuples when no PGO data exists, or <CalleeValueId, StaticCount, ProfileCount> pairs when there is PGO, where the ValueId can be mapped to the function GUID via the ValueSymbolTable. In the function index in memory, the call graph edges reference the target via the CalleeGUID instead of the CalleeValueId. The reference graph edges are recorded in each summary record with a list of referenced value IDs, which can be mapped to value GUID via the ValueSymbolTable. Addtionally, a new summary record type is added to record references from global variable initializers. A number of bitcode records and data structures have been renamed to reflect the newly expanded scope of the summary beyond functions. More cleanup will follow. Reviewers: joker.eph, davidxl Subscribers: joker.eph, llvm-commits Differential Revision: http://reviews.llvm.org/D17212 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@263275 91177308-0d34-0410-b5e6-96231b3b80d8
391 lines
13 KiB
C++
391 lines
13 KiB
C++
//===-ThinLTOCodeGenerator.cpp - LLVM Link Time Optimizer -----------------===//
|
|
//
|
|
// The LLVM Compiler Infrastructure
|
|
//
|
|
// This file is distributed under the University of Illinois Open Source
|
|
// License. See LICENSE.TXT for details.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
// This file implements the Thin Link Time Optimization library. This library is
|
|
// intended to be used by linker to optimize code at link time.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "llvm/LTO/ThinLTOCodeGenerator.h"
|
|
|
|
#include "llvm/ADT/StringExtras.h"
|
|
#include "llvm/ADT/Statistic.h"
|
|
#include "llvm/Analysis/TargetLibraryInfo.h"
|
|
#include "llvm/Analysis/TargetTransformInfo.h"
|
|
#include "llvm/Bitcode/ReaderWriter.h"
|
|
#include "llvm/Bitcode/BitcodeWriterPass.h"
|
|
#include "llvm/ExecutionEngine/ObjectMemoryBuffer.h"
|
|
#include "llvm/IR/LLVMContext.h"
|
|
#include "llvm/IR/DiagnosticPrinter.h"
|
|
#include "llvm/IR/LegacyPassManager.h"
|
|
#include "llvm/IR/Mangler.h"
|
|
#include "llvm/IRReader/IRReader.h"
|
|
#include "llvm/Linker/Linker.h"
|
|
#include "llvm/MC/SubtargetFeature.h"
|
|
#include "llvm/Object/FunctionIndexObjectFile.h"
|
|
#include "llvm/Support/SourceMgr.h"
|
|
#include "llvm/Support/TargetRegistry.h"
|
|
#include "llvm/Support/ThreadPool.h"
|
|
#include "llvm/Target/TargetMachine.h"
|
|
#include "llvm/Transforms/IPO.h"
|
|
#include "llvm/Transforms/IPO/FunctionImport.h"
|
|
#include "llvm/Transforms/IPO/PassManagerBuilder.h"
|
|
#include "llvm/Transforms/ObjCARC.h"
|
|
#include "llvm/Transforms/Utils/FunctionImportUtils.h"
|
|
|
|
using namespace llvm;
|
|
|
|
namespace llvm {
|
|
// Flags -discard-value-names, defined in LTOCodeGenerator.cpp
|
|
extern cl::opt<bool> LTODiscardValueNames;
|
|
}
|
|
|
|
namespace {
|
|
|
|
static cl::opt<int> ThreadCount("threads",
|
|
cl::init(std::thread::hardware_concurrency()));
|
|
|
|
static void diagnosticHandler(const DiagnosticInfo &DI) {
|
|
DiagnosticPrinterRawOStream DP(errs());
|
|
DI.print(DP);
|
|
errs() << '\n';
|
|
}
|
|
|
|
// Simple helper to load a module from bitcode
|
|
static std::unique_ptr<Module>
|
|
loadModuleFromBuffer(const MemoryBufferRef &Buffer, LLVMContext &Context,
|
|
bool Lazy) {
|
|
SMDiagnostic Err;
|
|
ErrorOr<std::unique_ptr<Module>> ModuleOrErr(nullptr);
|
|
if (Lazy) {
|
|
ModuleOrErr =
|
|
getLazyBitcodeModule(MemoryBuffer::getMemBuffer(Buffer, false), Context,
|
|
/* ShouldLazyLoadMetadata */ Lazy);
|
|
} else {
|
|
ModuleOrErr = parseBitcodeFile(Buffer, Context);
|
|
}
|
|
if (std::error_code EC = ModuleOrErr.getError()) {
|
|
Err = SMDiagnostic(Buffer.getBufferIdentifier(), SourceMgr::DK_Error,
|
|
EC.message());
|
|
Err.print("ThinLTO", errs());
|
|
report_fatal_error("Can't load module, abort.");
|
|
}
|
|
return std::move(ModuleOrErr.get());
|
|
}
|
|
|
|
// Simple helper to save temporary files for debug.
|
|
static void saveTempBitcode(const Module &TheModule, StringRef TempDir,
|
|
unsigned count, StringRef Suffix) {
|
|
if (TempDir.empty())
|
|
return;
|
|
// User asked to save temps, let dump the bitcode file after import.
|
|
auto SaveTempPath = TempDir + llvm::utostr(count) + Suffix;
|
|
std::error_code EC;
|
|
raw_fd_ostream OS(SaveTempPath.str(), EC, sys::fs::F_None);
|
|
if (EC)
|
|
report_fatal_error(Twine("Failed to open ") + SaveTempPath +
|
|
" to save optimized bitcode\n");
|
|
WriteBitcodeToFile(&TheModule, OS, true, false);
|
|
}
|
|
|
|
static StringMap<MemoryBufferRef>
|
|
generateModuleMap(const std::vector<MemoryBufferRef> &Modules) {
|
|
StringMap<MemoryBufferRef> ModuleMap;
|
|
for (auto &ModuleBuffer : Modules) {
|
|
assert(ModuleMap.find(ModuleBuffer.getBufferIdentifier()) ==
|
|
ModuleMap.end() &&
|
|
"Expect unique Buffer Identifier");
|
|
ModuleMap[ModuleBuffer.getBufferIdentifier()] = ModuleBuffer;
|
|
}
|
|
return ModuleMap;
|
|
}
|
|
|
|
/// Provide a "loader" for the FunctionImporter to access function from other
|
|
/// modules.
|
|
class ModuleLoader {
|
|
/// The context that will be used for importing.
|
|
LLVMContext &Context;
|
|
|
|
/// Map from Module identifier to MemoryBuffer. Used by clients like the
|
|
/// FunctionImported to request loading a Module.
|
|
StringMap<MemoryBufferRef> &ModuleMap;
|
|
|
|
public:
|
|
ModuleLoader(LLVMContext &Context, StringMap<MemoryBufferRef> &ModuleMap)
|
|
: Context(Context), ModuleMap(ModuleMap) {}
|
|
|
|
/// Load a module on demand.
|
|
std::unique_ptr<Module> operator()(StringRef Identifier) {
|
|
return loadModuleFromBuffer(ModuleMap[Identifier], Context, /*Lazy*/ true);
|
|
}
|
|
};
|
|
|
|
static void promoteModule(Module &TheModule, const FunctionInfoIndex &Index) {
|
|
if (renameModuleForThinLTO(TheModule, Index))
|
|
report_fatal_error("renameModuleForThinLTO failed");
|
|
}
|
|
|
|
static void crossImportIntoModule(Module &TheModule,
|
|
const FunctionInfoIndex &Index,
|
|
StringMap<MemoryBufferRef> &ModuleMap) {
|
|
ModuleLoader Loader(TheModule.getContext(), ModuleMap);
|
|
FunctionImporter Importer(Index, Loader);
|
|
Importer.importFunctions(TheModule);
|
|
}
|
|
|
|
static void optimizeModule(Module &TheModule, TargetMachine &TM) {
|
|
// Populate the PassManager
|
|
PassManagerBuilder PMB;
|
|
PMB.LibraryInfo = new TargetLibraryInfoImpl(TM.getTargetTriple());
|
|
PMB.Inliner = createFunctionInliningPass();
|
|
// FIXME: should get it from the bitcode?
|
|
PMB.OptLevel = 3;
|
|
PMB.LoopVectorize = true;
|
|
PMB.SLPVectorize = true;
|
|
PMB.VerifyInput = true;
|
|
PMB.VerifyOutput = false;
|
|
|
|
legacy::PassManager PM;
|
|
|
|
// Add the TTI (required to inform the vectorizer about register size for
|
|
// instance)
|
|
PM.add(createTargetTransformInfoWrapperPass(TM.getTargetIRAnalysis()));
|
|
|
|
// Add optimizations
|
|
PMB.populateThinLTOPassManager(PM);
|
|
PM.add(createObjCARCContractPass());
|
|
|
|
PM.run(TheModule);
|
|
}
|
|
|
|
std::unique_ptr<MemoryBuffer> codegenModule(Module &TheModule,
|
|
TargetMachine &TM) {
|
|
SmallVector<char, 128> OutputBuffer;
|
|
|
|
// CodeGen
|
|
{
|
|
raw_svector_ostream OS(OutputBuffer);
|
|
legacy::PassManager PM;
|
|
if (TM.addPassesToEmitFile(PM, OS, TargetMachine::CGFT_ObjectFile,
|
|
/* DisableVerify */ true))
|
|
report_fatal_error("Failed to setup codegen");
|
|
|
|
// Run codegen now. resulting binary is in OutputBuffer.
|
|
PM.run(TheModule);
|
|
}
|
|
return make_unique<ObjectMemoryBuffer>(std::move(OutputBuffer));
|
|
}
|
|
|
|
static std::unique_ptr<MemoryBuffer>
|
|
ProcessThinLTOModule(Module &TheModule, const FunctionInfoIndex &Index,
|
|
StringMap<MemoryBufferRef> &ModuleMap, TargetMachine &TM,
|
|
ThinLTOCodeGenerator::CachingOptions CacheOptions,
|
|
StringRef SaveTempsDir, unsigned count) {
|
|
|
|
// Save temps: after IPO.
|
|
saveTempBitcode(TheModule, SaveTempsDir, count, ".1.IPO.bc");
|
|
|
|
// "Benchmark"-like optimization: single-source case
|
|
bool SingleModule = (ModuleMap.size() == 1);
|
|
|
|
if (!SingleModule) {
|
|
promoteModule(TheModule, Index);
|
|
|
|
// Save temps: after promotion.
|
|
saveTempBitcode(TheModule, SaveTempsDir, count, ".2.promoted.bc");
|
|
|
|
crossImportIntoModule(TheModule, Index, ModuleMap);
|
|
|
|
// Save temps: after cross-module import.
|
|
saveTempBitcode(TheModule, SaveTempsDir, count, ".3.imported.bc");
|
|
}
|
|
|
|
optimizeModule(TheModule, TM);
|
|
|
|
saveTempBitcode(TheModule, SaveTempsDir, count, ".3.opt.bc");
|
|
|
|
return codegenModule(TheModule, TM);
|
|
}
|
|
|
|
// Initialize the TargetMachine builder for a given Triple
|
|
static void initTMBuilder(TargetMachineBuilder &TMBuilder,
|
|
const Triple &TheTriple) {
|
|
// Set a default CPU for Darwin triples (copied from LTOCodeGenerator).
|
|
// FIXME this looks pretty terrible...
|
|
if (TMBuilder.MCpu.empty() && TheTriple.isOSDarwin()) {
|
|
if (TheTriple.getArch() == llvm::Triple::x86_64)
|
|
TMBuilder.MCpu = "core2";
|
|
else if (TheTriple.getArch() == llvm::Triple::x86)
|
|
TMBuilder.MCpu = "yonah";
|
|
else if (TheTriple.getArch() == llvm::Triple::aarch64)
|
|
TMBuilder.MCpu = "cyclone";
|
|
}
|
|
TMBuilder.TheTriple = std::move(TheTriple);
|
|
}
|
|
|
|
} // end anonymous namespace
|
|
|
|
void ThinLTOCodeGenerator::addModule(StringRef Identifier, StringRef Data) {
|
|
MemoryBufferRef Buffer(Data, Identifier);
|
|
if (Modules.empty()) {
|
|
// First module added, so initialize the triple and some options
|
|
LLVMContext Context;
|
|
Triple TheTriple(getBitcodeTargetTriple(Buffer, Context));
|
|
initTMBuilder(TMBuilder, Triple(TheTriple));
|
|
}
|
|
#ifndef NDEBUG
|
|
else {
|
|
LLVMContext Context;
|
|
assert(TMBuilder.TheTriple.str() ==
|
|
getBitcodeTargetTriple(Buffer, Context) &&
|
|
"ThinLTO modules with different triple not supported");
|
|
}
|
|
#endif
|
|
Modules.push_back(Buffer);
|
|
}
|
|
|
|
void ThinLTOCodeGenerator::preserveSymbol(StringRef Name) {
|
|
PreservedSymbols.insert(Name);
|
|
}
|
|
|
|
void ThinLTOCodeGenerator::crossReferenceSymbol(StringRef Name) {
|
|
CrossReferencedSymbols.insert(Name);
|
|
}
|
|
|
|
// TargetMachine factory
|
|
std::unique_ptr<TargetMachine> TargetMachineBuilder::create() const {
|
|
std::string ErrMsg;
|
|
const Target *TheTarget =
|
|
TargetRegistry::lookupTarget(TheTriple.str(), ErrMsg);
|
|
if (!TheTarget) {
|
|
report_fatal_error("Can't load target for this Triple: " + ErrMsg);
|
|
}
|
|
|
|
// Use MAttr as the default set of features.
|
|
SubtargetFeatures Features(MAttr);
|
|
Features.getDefaultSubtargetFeatures(TheTriple);
|
|
std::string FeatureStr = Features.getString();
|
|
return std::unique_ptr<TargetMachine>(TheTarget->createTargetMachine(
|
|
TheTriple.str(), MCpu, FeatureStr, Options, RelocModel,
|
|
CodeModel::Default, CGOptLevel));
|
|
}
|
|
|
|
/**
|
|
* Produce the combined function index from all the bitcode files:
|
|
* "thin-link".
|
|
*/
|
|
std::unique_ptr<FunctionInfoIndex> ThinLTOCodeGenerator::linkCombinedIndex() {
|
|
std::unique_ptr<FunctionInfoIndex> CombinedIndex;
|
|
uint64_t NextModuleId = 0;
|
|
for (auto &ModuleBuffer : Modules) {
|
|
ErrorOr<std::unique_ptr<object::FunctionIndexObjectFile>> ObjOrErr =
|
|
object::FunctionIndexObjectFile::create(ModuleBuffer, diagnosticHandler,
|
|
false);
|
|
if (std::error_code EC = ObjOrErr.getError()) {
|
|
// FIXME diagnose
|
|
errs() << "error: can't create FunctionIndexObjectFile for buffer: "
|
|
<< EC.message() << "\n";
|
|
return nullptr;
|
|
}
|
|
auto Index = (*ObjOrErr)->takeIndex();
|
|
if (CombinedIndex) {
|
|
CombinedIndex->mergeFrom(std::move(Index), ++NextModuleId);
|
|
} else {
|
|
CombinedIndex = std::move(Index);
|
|
}
|
|
}
|
|
return CombinedIndex;
|
|
}
|
|
|
|
/**
|
|
* Perform promotion and renaming of exported internal functions.
|
|
*/
|
|
void ThinLTOCodeGenerator::promote(Module &TheModule,
|
|
FunctionInfoIndex &Index) {
|
|
promoteModule(TheModule, Index);
|
|
}
|
|
|
|
/**
|
|
* Perform cross-module importing for the module identified by ModuleIdentifier.
|
|
*/
|
|
void ThinLTOCodeGenerator::crossModuleImport(Module &TheModule,
|
|
FunctionInfoIndex &Index) {
|
|
auto ModuleMap = generateModuleMap(Modules);
|
|
crossImportIntoModule(TheModule, Index, ModuleMap);
|
|
}
|
|
|
|
/**
|
|
* Perform post-importing ThinLTO optimizations.
|
|
*/
|
|
void ThinLTOCodeGenerator::optimize(Module &TheModule) {
|
|
initTMBuilder(TMBuilder, Triple(TheModule.getTargetTriple()));
|
|
optimizeModule(TheModule, *TMBuilder.create());
|
|
}
|
|
|
|
/**
|
|
* Perform ThinLTO CodeGen.
|
|
*/
|
|
std::unique_ptr<MemoryBuffer> ThinLTOCodeGenerator::codegen(Module &TheModule) {
|
|
initTMBuilder(TMBuilder, Triple(TheModule.getTargetTriple()));
|
|
return codegenModule(TheModule, *TMBuilder.create());
|
|
}
|
|
|
|
// Main entry point for the ThinLTO processing
|
|
void ThinLTOCodeGenerator::run() {
|
|
// Sequential linking phase
|
|
auto Index = linkCombinedIndex();
|
|
|
|
// Save temps: index.
|
|
if (!SaveTempsDir.empty()) {
|
|
auto SaveTempPath = SaveTempsDir + "index.bc";
|
|
std::error_code EC;
|
|
raw_fd_ostream OS(SaveTempPath, EC, sys::fs::F_None);
|
|
if (EC)
|
|
report_fatal_error(Twine("Failed to open ") + SaveTempPath +
|
|
" to save optimized bitcode\n");
|
|
WriteIndexToFile(*Index, OS);
|
|
}
|
|
|
|
// Prepare the resulting object vector
|
|
assert(ProducedBinaries.empty() && "The generator should not be reused");
|
|
ProducedBinaries.resize(Modules.size());
|
|
|
|
// Prepare the module map.
|
|
auto ModuleMap = generateModuleMap(Modules);
|
|
|
|
// Parallel optimizer + codegen
|
|
{
|
|
ThreadPool Pool(ThreadCount);
|
|
int count = 0;
|
|
for (auto &ModuleBuffer : Modules) {
|
|
Pool.async([&](int count) {
|
|
LLVMContext Context;
|
|
Context.setDiscardValueNames(LTODiscardValueNames);
|
|
|
|
// Parse module now
|
|
auto TheModule = loadModuleFromBuffer(ModuleBuffer, Context, false);
|
|
|
|
// Save temps: original file.
|
|
if (!SaveTempsDir.empty()) {
|
|
saveTempBitcode(*TheModule, SaveTempsDir, count, ".0.original.bc");
|
|
}
|
|
|
|
ProducedBinaries[count] = ProcessThinLTOModule(
|
|
*TheModule, *Index, ModuleMap, *TMBuilder.create(), CacheOptions,
|
|
SaveTempsDir, count);
|
|
}, count);
|
|
count++;
|
|
}
|
|
}
|
|
|
|
// If statistics were requested, print them out now.
|
|
if (llvm::AreStatisticsEnabled())
|
|
llvm::PrintStatistics();
|
|
}
|