[ThinLTO] Add caching to the new LTO API

Add the ability to plug a cache on the LTO API.
I tried to write such that a linker implementation can
control the cache backend. This is intrusive and I'm
not totally happy with it, but I can't figure out a
better design right now.

Differential Revision: https://reviews.llvm.org/D23599

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@279576 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Mehdi Amini 2016-08-23 21:30:12 +00:00
parent e9aa7e0db9
commit 242275b349
8 changed files with 393 additions and 35 deletions

100
include/llvm/LTO/Caching.h Normal file
View File

@ -0,0 +1,100 @@
//===- Caching.h - LLVM Link Time Optimizer Configuration -----------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file defines the lto::CacheObjectOutput data structure, which allows
// clients to add a filesystem cache to ThinLTO
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_LTO_CACHING_H
#define LLVM_LTO_CACHING_H
#include "llvm/ADT/SmallString.h"
#include "llvm/LTO/Config.h"
#include "llvm/Support/MemoryBuffer.h"
namespace llvm {
namespace lto {
/// Type for client-supplied callback when a buffer is loaded from the cache.
typedef std::function<void(std::unique_ptr<MemoryBuffer>)> AddBufferFn;
/// Manage caching on the filesystem.
///
/// The general scheme is the following:
///
/// void do_stuff(AddBufferFn CallBack) {
/// /* ... */
/// {
/// /* Create the CacheObjectOutput pointing to a cache directory */
/// auto Output = CacheObjectOutput("/tmp/cache", CallBack)
///
/// /* Call some processing function */
/// process(Output);
///
/// } /* Callback is only called now, on destruction of the Output object */
/// /* ... */
/// }
///
///
/// void process(NativeObjectOutput &Output) {
/// /* check if caching is supported */
/// if (Output.isCachingEnabled()) {
/// auto Key = ComputeKeyForEntry(...); // "expensive" call
/// if (Output.tryLoadFromCache())
/// return; // Cache hit
/// }
///
/// auto OS = Output.getStream();
///
/// OS << ...;
/// /* Note that the callback is not called here, but only when the caller
/// destroys Output */
/// }
///
class CacheObjectOutput : public NativeObjectOutput {
/// Path to the on-disk cache directory
StringRef CacheDirectoryPath;
/// Path to this entry in the cache, initialized by tryLoadFromCache().
SmallString<128> EntryPath;
/// Path to temporary file used to buffer output that will be committed to the
/// cache entry when this object is destroyed
SmallString<128> TempFilename;
/// User-supplied callback, called when the buffer is pulled out of the cache
/// (potentially after creating it).
AddBufferFn AddBuffer;
public:
/// The destructor pulls the entry from the cache and calls the AddBuffer
/// callback, after committing the entry into the cache on miss.
~CacheObjectOutput();
/// Create a CacheObjectOutput: the client is supposed to create it in the
/// callback supplied to LTO::run. The \p CacheDirectoryPath points to the
/// directory on disk where to store the cache, and \p AddBuffer will be
/// called when the buffer is pulled out of the cache (potentially after
/// creating it).
CacheObjectOutput(StringRef CacheDirectoryPath, AddBufferFn AddBuffer)
: CacheDirectoryPath(CacheDirectoryPath), AddBuffer(AddBuffer) {}
/// Return an allocated stream for the output, or null in case of failure.
std::unique_ptr<raw_pwrite_stream> getStream() override;
/// Set EntryPath, try loading from a possible cache first, return true on
/// cache hit.
bool tryLoadFromCache(StringRef Key) override;
/// Returns true to signal that this implementation of NativeObjectFile
/// support caching.
bool isCachingEnabled() const override { return true; }
};
} // namespace lto
} // namespace llvm
#endif

View File

@ -32,10 +32,33 @@ namespace lto {
/// Abstract class representing a single Task output to be implemented by the
/// client of the LTO API.
///
/// The general scheme the API is called is the following:
///
/// void process(NativeObjectOutput &Output) {
/// /* check if caching is supported */
/// if (Output.isCachingEnabled()) {
/// auto Key = ComputeKeyForEntry(...); // "expensive" call
/// if (Output.tryLoadFromCache())
/// return; // Cache hit
/// }
///
/// auto OS = Output.getStream();
///
/// OS << ....;
/// }
///
class NativeObjectOutput {
public:
// Return an allocated stream for the output, or null in case of failure.
virtual std::unique_ptr<raw_pwrite_stream> getStream() = 0;
// Try loading from a possible cache first, return true on cache hit.
virtual bool tryLoadFromCache(StringRef Key) { return false; }
// Returns true if a cache is available
virtual bool isCachingEnabled() const { return false; }
virtual ~NativeObjectOutput() = default;
};

View File

@ -48,6 +48,7 @@ endif()
add_llvm_library(LLVMLTO
Caching.cpp
LTO.cpp
LTOBackend.cpp
LTOModule.cpp

104
lib/LTO/Caching.cpp Normal file
View File

@ -0,0 +1,104 @@
//===-Caching.cpp - LLVM Link Time Optimizer Cache Handling ---------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file implements the Caching for ThinLTO.
//
//===----------------------------------------------------------------------===//
#include "llvm/LTO/Caching.h"
#ifdef HAVE_LLVM_REVISION
#include "LLVMLTORevision.h"
#endif
#include "llvm/ADT/StringExtras.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
using namespace llvm::lto;
static void commitEntry(StringRef TempFilename, StringRef EntryPath) {
// Rename to final destination (hopefully race condition won't matter here)
auto EC = sys::fs::rename(TempFilename, EntryPath);
if (EC) {
// Renaming failed, probably not the same filesystem, copy and delete.
{
auto ReloadedBufferOrErr = MemoryBuffer::getFile(TempFilename);
if (auto EC = ReloadedBufferOrErr.getError())
report_fatal_error(Twine("Failed to open temp file '") + TempFilename +
"': " + EC.message() + "\n");
raw_fd_ostream OS(EntryPath, EC, sys::fs::F_None);
if (EC)
report_fatal_error(Twine("Failed to open ") + EntryPath +
" to save cached entry\n");
// I'm not sure what are the guarantee if two processes are doing this
// at the same time.
OS << (*ReloadedBufferOrErr)->getBuffer();
}
sys::fs::remove(TempFilename);
}
}
CacheObjectOutput::~CacheObjectOutput() {
if (EntryPath.empty())
// The entry was never used by the client (tryLoadFromCache() wasn't called)
return;
// TempFilename is only set if getStream() was called, i.e. on cache miss when
// tryLoadFromCache() returned false. And EntryPath is valid if a Key was
// submitted, otherwise it has been set to CacheDirectoryPath in
// tryLoadFromCache.
if (!TempFilename.empty()) {
if (EntryPath == CacheDirectoryPath)
// The Key supplied to tryLoadFromCache was empty, do not commit the temp.
EntryPath = TempFilename;
else
// We commit the tempfile into the cache now, by moving it to EntryPath.
commitEntry(TempFilename, EntryPath);
}
// Load the entry from the cache now.
auto ReloadedBufferOrErr = MemoryBuffer::getFile(EntryPath);
if (auto EC = ReloadedBufferOrErr.getError())
report_fatal_error(Twine("Can't reload cached file '") + EntryPath + "': " +
EC.message() + "\n");
// Supply the resulting buffer to the user.
AddBuffer(std::move(*ReloadedBufferOrErr));
}
// Return an allocated stream for the output, or null in case of failure.
std::unique_ptr<raw_pwrite_stream> CacheObjectOutput::getStream() {
assert(!EntryPath.empty() && "API Violation: client didn't call "
"tryLoadFromCache() before getStream()");
// Write to a temporary to avoid race condition
int TempFD;
std::error_code EC =
sys::fs::createTemporaryFile("Thin", "tmp.o", TempFD, TempFilename);
if (EC) {
errs() << "Error: " << EC.message() << "\n";
report_fatal_error("ThinLTO: Can't get a temporary file");
}
return llvm::make_unique<raw_fd_ostream>(TempFD, /* ShouldClose */ true);
}
// Try loading from a possible cache first, return true on cache hit.
bool CacheObjectOutput::tryLoadFromCache(StringRef Key) {
assert(!CacheDirectoryPath.empty() &&
"CacheObjectOutput was initialized without a cache path");
if (Key.empty()) {
// Client didn't compute a valid key. EntryPath has been set to
// CacheDirectoryPath.
EntryPath = CacheDirectoryPath;
return false;
}
sys::path::append(EntryPath, CacheDirectoryPath, Key);
return sys::fs::exists(EntryPath);
}

View File

@ -25,6 +25,7 @@
#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/SHA1.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/ThreadPool.h"
@ -41,6 +42,61 @@ using namespace llvm;
using namespace lto;
using namespace object;
#define DEBUG_TYPE "lto"
// Returns a unique hash for the Module considering the current list of
// export/import and other global analysis results.
// The hash is produced in \p Key.
static void computeCacheKey(
SmallString<40> &Key, const ModuleSummaryIndex &Index, StringRef ModuleID,
const FunctionImporter::ImportMapTy &ImportList,
const FunctionImporter::ExportSetTy &ExportList,
const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR,
const GVSummaryMapTy &DefinedGlobals) {
// Compute the unique hash for this entry.
// This is based on the current compiler version, the module itself, the
// export list, the hash for every single module in the import list, the
// list of ResolvedODR for the module, and the list of preserved symbols.
SHA1 Hasher;
// Start with the compiler revision
Hasher.update(LLVM_VERSION_STRING);
#ifdef HAVE_LLVM_REVISION
Hasher.update(LLVM_REVISION);
#endif
// Include the hash for the current module
auto ModHash = Index.getModuleHash(ModuleID);
Hasher.update(ArrayRef<uint8_t>((uint8_t *)&ModHash[0], sizeof(ModHash)));
for (auto F : ExportList)
// The export list can impact the internalization, be conservative here
Hasher.update(ArrayRef<uint8_t>((uint8_t *)&F, sizeof(F)));
// Include the hash for every module we import functions from
for (auto &Entry : ImportList) {
auto ModHash = Index.getModuleHash(Entry.first());
Hasher.update(ArrayRef<uint8_t>((uint8_t *)&ModHash[0], sizeof(ModHash)));
}
// Include the hash for the resolved ODR.
for (auto &Entry : ResolvedODR) {
Hasher.update(ArrayRef<uint8_t>((const uint8_t *)&Entry.first,
sizeof(GlobalValue::GUID)));
Hasher.update(ArrayRef<uint8_t>((const uint8_t *)&Entry.second,
sizeof(GlobalValue::LinkageTypes)));
}
// Include the hash for the linkage type to reflect internalization and weak
// resolution.
for (auto &GS : DefinedGlobals) {
GlobalValue::LinkageTypes Linkage = GS.second->linkage();
Hasher.update(
ArrayRef<uint8_t>((const uint8_t *)&Linkage, sizeof(Linkage)));
}
Key = toHex(Hasher.result());
}
// Simple helper to load a module from bitcode
std::unique_ptr<Module>
llvm::loadModuleFromBuffer(const MemoryBufferRef &Buffer, LLVMContext &Context,
@ -429,9 +485,12 @@ public:
ModuleToDefinedGVSummaries(ModuleToDefinedGVSummaries) {}
virtual ~ThinBackendProc() {}
virtual Error start(unsigned Task, MemoryBufferRef MBRef,
const FunctionImporter::ImportMapTy &ImportList,
MapVector<StringRef, MemoryBufferRef> &ModuleMap) = 0;
virtual Error start(
unsigned Task, MemoryBufferRef MBRef,
const FunctionImporter::ImportMapTy &ImportList,
const FunctionImporter::ExportSetTy &ExportList,
const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR,
MapVector<StringRef, MemoryBufferRef> &ModuleMap) = 0;
virtual Error wait() = 0;
};
@ -451,35 +510,57 @@ public:
BackendThreadPool(ThinLTOParallelismLevel),
AddOutput(std::move(AddOutput)) {}
Error
runThinLTOBackendThread(AddOutputFn AddOutput, unsigned Task,
MemoryBufferRef MBRef,
ModuleSummaryIndex &CombinedIndex,
const FunctionImporter::ImportMapTy &ImportList,
const GVSummaryMapTy &DefinedGlobals,
MapVector<StringRef, MemoryBufferRef> &ModuleMap) {
LTOLLVMContext BackendContext(Conf);
Error runThinLTOBackendThread(
AddOutputFn AddOutput, unsigned Task, MemoryBufferRef MBRef,
ModuleSummaryIndex &CombinedIndex,
const FunctionImporter::ImportMapTy &ImportList,
const FunctionImporter::ExportSetTy &ExportList,
const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR,
const GVSummaryMapTy &DefinedGlobals,
MapVector<StringRef, MemoryBufferRef> &ModuleMap) {
auto ModuleIdentifier = MBRef.getBufferIdentifier();
auto Output = AddOutput(Task);
if (Output->isCachingEnabled()) {
SmallString<40> Key;
// The module may be cached, this helps handling it.
computeCacheKey(Key, CombinedIndex, ModuleIdentifier, ImportList,
ExportList, ResolvedODR, DefinedGlobals);
if (Output->tryLoadFromCache(Key))
return Error();
}
LTOLLVMContext BackendContext(Conf);
ErrorOr<std::unique_ptr<Module>> MOrErr =
parseBitcodeFile(MBRef, BackendContext);
assert(MOrErr && "Unable to load module in thread?");
return thinBackend(Conf, Task, AddOutput, **MOrErr, CombinedIndex,
auto AddOutputWrapper = [&](unsigned TaskId) {
assert(Task == TaskId && "Unexpexted TaskId mismatch");
return std::move(Output);
};
return thinBackend(Conf, Task, AddOutputWrapper, **MOrErr, CombinedIndex,
ImportList, DefinedGlobals, ModuleMap);
}
Error start(unsigned Task, MemoryBufferRef MBRef,
const FunctionImporter::ImportMapTy &ImportList,
MapVector<StringRef, MemoryBufferRef> &ModuleMap) override {
Error start(
unsigned Task, MemoryBufferRef MBRef,
const FunctionImporter::ImportMapTy &ImportList,
const FunctionImporter::ExportSetTy &ExportList,
const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR,
MapVector<StringRef, MemoryBufferRef> &ModuleMap) override {
StringRef ModulePath = MBRef.getBufferIdentifier();
BackendThreadPool.async(
[=](MemoryBufferRef MBRef, ModuleSummaryIndex &CombinedIndex,
const FunctionImporter::ImportMapTy &ImportList,
const FunctionImporter::ExportSetTy &ExportList,
const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes>
&ResolvedODR,
GVSummaryMapTy &DefinedGlobals,
MapVector<StringRef, MemoryBufferRef> &ModuleMap) {
Error E =
runThinLTOBackendThread(AddOutput, Task, MBRef, CombinedIndex,
ImportList, DefinedGlobals, ModuleMap);
Error E = runThinLTOBackendThread(
AddOutput, Task, MBRef, CombinedIndex, ImportList, ExportList,
ResolvedODR, DefinedGlobals, ModuleMap);
if (E) {
std::unique_lock<std::mutex> L(ErrMu);
if (Err)
@ -489,6 +570,7 @@ public:
}
},
MBRef, std::ref(CombinedIndex), std::ref(ImportList),
std::ref(ExportList), std::ref(ResolvedODR),
std::ref(ModuleToDefinedGVSummaries[ModulePath]), std::ref(ModuleMap));
return Error();
}
@ -550,9 +632,12 @@ public:
return NewPath.str();
}
Error start(unsigned Task, MemoryBufferRef MBRef,
const FunctionImporter::ImportMapTy &ImportList,
MapVector<StringRef, MemoryBufferRef> &ModuleMap) override {
Error start(
unsigned Task, MemoryBufferRef MBRef,
const FunctionImporter::ImportMapTy &ImportList,
const FunctionImporter::ExportSetTy &ExportList,
const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR,
MapVector<StringRef, MemoryBufferRef> &ModuleMap) override {
StringRef ModulePath = MBRef.getBufferIdentifier();
std::string NewModulePath =
getThinLTOOutputFile(ModulePath, OldPrefix, NewPrefix);
@ -638,18 +723,25 @@ Error LTO::runThinLTO(AddOutputFn AddOutput) {
ExportedGUIDs.count(GUID);
};
thinLTOInternalizeAndPromoteInIndex(ThinLTO.CombinedIndex, isExported);
thinLTOResolveWeakForLinkerInIndex(
ThinLTO.CombinedIndex, isPrevailing,
[](StringRef, GlobalValue::GUID, GlobalValue::LinkageTypes) {});
StringMap<std::map<GlobalValue::GUID, GlobalValue::LinkageTypes>> ResolvedODR;
auto recordNewLinkage = [&](StringRef ModuleIdentifier,
GlobalValue::GUID GUID,
GlobalValue::LinkageTypes NewLinkage) {
ResolvedODR[ModuleIdentifier][GUID] = NewLinkage;
};
thinLTOResolveWeakForLinkerInIndex(ThinLTO.CombinedIndex, isPrevailing,
recordNewLinkage);
std::unique_ptr<ThinBackendProc> BackendProc = ThinLTO.Backend(
Conf, ThinLTO.CombinedIndex, ModuleToDefinedGVSummaries, AddOutput);
// Partition numbers for ThinLTO jobs start at 1 (see comments for
// GlobalResolution in LTO.h). Task numbers, however, start at
// ParallelCodeGenParallelismLevel, as tasks 0 through
// ParallelCodeGenParallelismLevel-1 are reserved for parallel code generation
// partitions.
// ParallelCodeGenParallelismLevel if an LTO module is present, as tasks 0
// through ParallelCodeGenParallelismLevel-1 are reserved for parallel code
// generation partitions.
unsigned Task = RegularLTO.CombinedModule
? RegularLTO.ParallelCodeGenParallelismLevel
: 0;
@ -657,7 +749,8 @@ Error LTO::runThinLTO(AddOutputFn AddOutput) {
for (auto &Mod : ThinLTO.ModuleMap) {
if (Error E = BackendProc->start(Task, Mod.second, ImportLists[Mod.first],
ThinLTO.ModuleMap))
ExportLists[Mod.first],
ResolvedODR[Mod.first], ThinLTO.ModuleMap))
return E;
++Task;

View File

@ -143,6 +143,20 @@ bool opt(Config &C, TargetMachine *TM, unsigned Task, Module &M,
return true;
}
/// Monolithic LTO does not support caching (yet), this is a convenient wrapper
/// around AddOutput to workaround this.
static AddOutputFn getUncachedOutputWrapper(AddOutputFn &AddOutput,
unsigned Task) {
return [Task, &AddOutput](unsigned TaskId) {
auto Output = AddOutput(Task);
if (Output->isCachingEnabled() && Output->tryLoadFromCache(""))
report_fatal_error("Cache hit without a valid key?");
errs() << Task << " == " << TaskId << "\n";
assert(Task == TaskId && "Unexpexted TaskId mismatch");
return Output;
};
}
void codegen(Config &C, TargetMachine *TM, AddOutputFn AddOutput, unsigned Task,
Module &M) {
if (C.PreCodeGenModuleHook && !C.PreCodeGenModuleHook(Task, M))
@ -190,7 +204,10 @@ void splitCodeGen(Config &C, TargetMachine *TM, AddOutputFn AddOutput,
std::unique_ptr<TargetMachine> TM =
createTargetMachine(C, MPartInCtx->getTargetTriple(), T);
codegen(C, TM.get(), AddOutput, ThreadId, *MPartInCtx);
codegen(C, TM.get(),
getUncachedOutputWrapper(AddOutput, ThreadId), ThreadId,
*MPartInCtx);
},
// Pass BC using std::move to ensure that it get moved rather than
// copied into the thread's context.
@ -228,11 +245,12 @@ Error lto::backend(Config &C, AddOutputFn AddOutput,
if (!opt(C, TM.get(), 0, *M, /*IsThinLto=*/false))
return Error();
if (ParallelCodeGenParallelismLevel == 1)
codegen(C, TM.get(), AddOutput, 0, *M);
else
if (ParallelCodeGenParallelismLevel == 1) {
codegen(C, TM.get(), getUncachedOutputWrapper(AddOutput, 0), 0, *M);
} else {
splitCodeGen(C, TM.get(), AddOutput, ParallelCodeGenParallelismLevel,
std::move(M));
}
return Error();
}

View File

@ -1,5 +1,5 @@
; RUN: opt -module-summary %s -o %t.bc
; RUN: opt -module-summary %p/Inputs/funcimport.ll -o %t2.bc
; RUN: opt -module-summary %p/Inputs/cache.ll -o %t2.bc
; Verify that enabling caching is working
; RUN: rm -Rf %t.cache && mkdir %t.cache
@ -7,6 +7,14 @@
; RUN: ls %t.cache/llvmcache.timestamp
; RUN: ls %t.cache | count 3
; Verify that enabling caching is working with llvm-lto2
; RUN: rm -Rf %t.cache && mkdir %t.cache
; RUN: llvm-lto2 -o %t.o %t2.bc %t.bc -cache-dir %t.cache \
; RUN: -r=%t2.bc,_main,plx \
; RUN: -r=%t2.bc,_globalfunc,lx \
; RUN: -r=%t.bc,_globalfunc,plx
; RUN: ls %t.cache | count 2
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.11.0"

View File

@ -16,6 +16,7 @@
//
//===----------------------------------------------------------------------===//
#include "llvm/LTO/Caching.h"
#include "llvm/LTO/LTO.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/TargetSelect.h"
@ -31,6 +32,9 @@ static cl::opt<std::string> OutputFilename("o", cl::Required,
cl::desc("Output filename"),
cl::value_desc("filename"));
static cl::opt<std::string> CacheDir("cache-dir", cl::desc("Cache Directory"),
cl::value_desc("directory"));
static cl::opt<bool> SaveTemps("save-temps", cl::desc("Save temporary files"));
static cl::opt<bool>
@ -187,9 +191,16 @@ int main(int argc, char **argv) {
if (HasErrors)
return 1;
auto AddOutput = [&](size_t Task) {
auto AddOutput =
[&](size_t Task) -> std::unique_ptr<lto::NativeObjectOutput> {
std::string Path = OutputFilename + "." + utostr(Task);
return llvm::make_unique<LTOOutput>(std::move(Path));
if (CacheDir.empty())
return llvm::make_unique<LTOOutput>(std::move(Path));
return llvm::make_unique<CacheObjectOutput>(
CacheDir, [Path](std::unique_ptr<MemoryBuffer> Buffer) {
*LTOOutput(Path).getStream() << Buffer->getBuffer();
});
};
check(Lto.run(AddOutput), "LTO::run failed");