From dbca62ee4e04db4f0c44d91ae511d59d35401bcc Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Wed, 14 Dec 2016 04:56:42 +0000 Subject: [PATCH] [ThinLTO] Add an API to trigger file-based API for returning objects to the linker Summary: The motivation is to support better the -object_path_lto option on Darwin. The linker needs to write down the generate object files on disk for later use by lldb or dsymutil (debug info are not present in the final binary). We're moving this into libLTO so that we can be smarter when a cache is enabled and hard-link when possible instead of duplicating the files. Reviewers: tejohnson, deadalnix, pcc Subscribers: dexonsmith, llvm-commits Differential Revision: https://reviews.llvm.org/D27507 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@289631 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm-c/lto.h | 36 +++++- .../llvm/LTO/legacy/ThinLTOCodeGenerator.h | 35 +++++- include/llvm/Support/FileSystem.h | 8 ++ lib/LTO/ThinLTOCodeGenerator.cpp | 105 ++++++++++++++---- lib/Support/Unix/Path.inc | 13 +++ lib/Support/Windows/Path.inc | 4 + test/ThinLTO/X86/save_objects.ll | 30 +++++ tools/llvm-lto/llvm-lto.cpp | 12 ++ tools/lto/lto.cpp | 15 +++ tools/lto/lto.exports | 5 +- 10 files changed, 239 insertions(+), 24 deletions(-) create mode 100644 test/ThinLTO/X86/save_objects.ll diff --git a/include/llvm-c/lto.h b/include/llvm-c/lto.h index ffdf5561dcf..c3af74cdeda 100644 --- a/include/llvm-c/lto.h +++ b/include/llvm-c/lto.h @@ -44,7 +44,7 @@ typedef bool lto_bool_t; * @{ */ -#define LTO_API_VERSION 20 +#define LTO_API_VERSION 21 /** * \since prior to LTO_API_VERSION=3 @@ -636,6 +636,29 @@ extern unsigned int thinlto_module_get_num_objects(thinlto_code_gen_t cg); extern LTOObjectBuffer thinlto_module_get_object(thinlto_code_gen_t cg, unsigned int index); +/** + * Returns the number of object files produced by the ThinLTO CodeGenerator. + * + * It usually matches the number of input files, but this is not a guarantee of + * the API and may change in future implementation, so the client should not + * assume it. + * + * \since LTO_API_VERSION=21 + */ +unsigned int thinlto_module_get_num_object_files(thinlto_code_gen_t cg); + +/** + * Returns the path to the ith object file produced by the ThinLTO + * CodeGenerator. + * + * Client should use \p thinlto_module_get_num_object_files() to get the number + * of available objects. + * + * \since LTO_API_VERSION=21 + */ +const char *thinlto_module_get_object_file(thinlto_code_gen_t cg, + unsigned int index); + /** * Sets which PIC code model to generate. * Returns true on error (check lto_get_error_message() for details). @@ -724,6 +747,17 @@ extern void thinlto_codegen_set_cache_entry_expiration(thinlto_code_gen_t cg, extern void thinlto_codegen_set_savetemps_dir(thinlto_code_gen_t cg, const char *save_temps_dir); +/** + * Set the path to a directory where to save generated object files. This + * path can be used by a linker to request on-disk files instead of in-memory + * buffers. When set, results are available through + * thinlto_module_get_object_file() instead of thinlto_module_get_object(). + * + * \since LTO_API_VERSION=21 + */ +void thinlto_set_generated_objects_dir(thinlto_code_gen_t cg, + const char *save_temps_dir); + /** * Sets the cpu to generate code for. * diff --git a/include/llvm/LTO/legacy/ThinLTOCodeGenerator.h b/include/llvm/LTO/legacy/ThinLTOCodeGenerator.h index 539880e8d3a..3a3600ecccf 100644 --- a/include/llvm/LTO/legacy/ThinLTOCodeGenerator.h +++ b/include/llvm/LTO/legacy/ThinLTOCodeGenerator.h @@ -72,17 +72,31 @@ public: /** * Process all the modules that were added to the code generator in parallel. * - * Client can access the resulting object files using getProducedBinaries() + * Client can access the resulting object files using getProducedBinaries(), + * unless setGeneratedObjectsDirectory() has been called, in which case + * results are available through getProducedBinaryFiles(). */ void run(); /** - * Return the "in memory" binaries produced by the code generator. + * Return the "in memory" binaries produced by the code generator. This is + * filled after run() unless setGeneratedObjectsDirectory() has been + * called, in which case results are available through + * getProducedBinaryFiles(). */ std::vector> &getProducedBinaries() { return ProducedBinaries; } + /** + * Return the "on-disk" binaries produced by the code generator. This is + * filled after run() when setGeneratedObjectsDirectory() has been + * called, in which case results are available through getProducedBinaries(). + */ + std::vector &getProducedBinaryFiles() { + return ProducedBinaryFiles; + } + /** * \defgroup Options setters * @{ @@ -156,6 +170,14 @@ public: /// the processing. void setSaveTempsDir(std::string Path) { SaveTempsDir = std::move(Path); } + /// Set the path to a directory where to save generated object files. This + /// path can be used by a linker to request on-disk files instead of in-memory + /// buffers. When set, results are available through getProducedBinaryFiles() + /// instead of getProducedBinaries(). + void setGeneratedObjectsDirectory(std::string Path) { + SavedObjectsDirectoryPath = std::move(Path); + } + /// CPU to use to initialize the TargetMachine void setCpu(std::string Cpu) { TMBuilder.MCpu = std::move(Cpu); } @@ -244,9 +266,13 @@ private: /// Helper factory to build a TargetMachine TargetMachineBuilder TMBuilder; - /// Vector holding the in-memory buffer containing the produced binaries. + /// Vector holding the in-memory buffer containing the produced binaries, when + /// SavedObjectsDirectoryPath isn't set. std::vector> ProducedBinaries; + /// Path to generated files in the supplied SavedObjectsDirectoryPath if any. + std::vector ProducedBinaryFiles; + /// Vector holding the input buffers containing the bitcode modules to /// process. std::vector Modules; @@ -264,6 +290,9 @@ private: /// Path to a directory to save the temporary bitcode files. std::string SaveTempsDir; + /// Path to a directory to save the generated object files. + std::string SavedObjectsDirectoryPath; + /// Flag to enable/disable CodeGen. When set to true, the process stops after /// optimizations and a bitcode is produced. bool DisableCodeGen = false; diff --git a/include/llvm/Support/FileSystem.h b/include/llvm/Support/FileSystem.h index 85e6658b820..9d8d8c3ffb5 100644 --- a/include/llvm/Support/FileSystem.h +++ b/include/llvm/Support/FileSystem.h @@ -342,6 +342,14 @@ std::error_code create_directory(const Twine &path, bool IgnoreExisting = true, /// specific error_code. std::error_code create_link(const Twine &to, const Twine &from); +/// Create a hard link from \a from to \a to, or return an error. +/// +/// @param to The path to hard link to. +/// @param from The path to hard link from. This is created. +/// @returns errc::success if the link was created, otherwise a platform +/// specific error_code. +std::error_code create_hard_link(const Twine &to, const Twine &from); + /// @brief Get the current path. /// /// @param result Holds the current path on return. diff --git a/lib/LTO/ThinLTOCodeGenerator.cpp b/lib/LTO/ThinLTOCodeGenerator.cpp index ae5d8a0255f..950930176c0 100644 --- a/lib/LTO/ThinLTOCodeGenerator.cpp +++ b/lib/LTO/ThinLTOCodeGenerator.cpp @@ -343,10 +343,9 @@ public: } // Cache the Produced object file - std::unique_ptr - write(std::unique_ptr OutputBuffer) { + void write(const MemoryBuffer &OutputBuffer) { if (EntryPath.empty()) - return OutputBuffer; + return; // Write to a temporary to avoid race condition SmallString<128> TempFilename; @@ -359,7 +358,7 @@ public: } { raw_fd_ostream OS(TempFD, /* ShouldClose */ true); - OS << OutputBuffer->getBuffer(); + OS << OutputBuffer.getBuffer(); } // Rename to final destination (hopefully race condition won't matter here) EC = sys::fs::rename(TempFilename, EntryPath); @@ -369,16 +368,8 @@ public: if (EC) report_fatal_error(Twine("Failed to open ") + EntryPath + " to save cached entry\n"); - OS << OutputBuffer->getBuffer(); + OS << OutputBuffer.getBuffer(); } - auto ReloadedBufferOrErr = MemoryBuffer::getFile(EntryPath); - if (auto EC = ReloadedBufferOrErr.getError()) { - // FIXME diagnose - errs() << "error: can't reload cached file '" << EntryPath - << "': " << EC.message() << "\n"; - return OutputBuffer; - } - return std::move(*ReloadedBufferOrErr); } }; @@ -745,6 +736,43 @@ std::unique_ptr ThinLTOCodeGenerator::codegen(Module &TheModule) { return codegenModule(TheModule, *TMBuilder.create()); } +/// Write out the generated object file, either from CacheEntryPath or from +/// OutputBuffer, preferring hard-link when possible. +/// Returns the path to the generated file in SavedObjectsDirectoryPath. +static std::string writeGeneratedObject(int count, StringRef CacheEntryPath, + StringRef SavedObjectsDirectoryPath, + const MemoryBuffer &OutputBuffer) { + SmallString<128> OutputPath(SavedObjectsDirectoryPath); + llvm::sys::path::append(OutputPath, Twine(count) + ".thinlto.o"); + OutputPath.c_str(); // Ensure the string is null terminated. + if (sys::fs::exists(OutputPath)) + sys::fs::remove(OutputPath); + + // We don't return a memory buffer to the linker, just a list of files. + if (!CacheEntryPath.empty()) { + // Cache is enabled, hard-link the entry (or copy if hard-link fails). + auto Err = sys::fs::create_hard_link(CacheEntryPath, OutputPath); + if (!Err) + return OutputPath.str(); + // Hard linking failed, try to copy. + Err = sys::fs::copy_file(CacheEntryPath, OutputPath); + if (!Err) + return OutputPath.str(); + // Copy failed (could be because the CacheEntry was removed from the cache + // in the meantime by another process), fall back and try to write down the + // buffer to the output. + errs() << "error: can't link or copy from cached entry '" << CacheEntryPath + << "' to '" << OutputPath << "'\n"; + } + // No cache entry, just write out the buffer. + std::error_code Err; + raw_fd_ostream OS(OutputPath, Err, sys::fs::F_None); + if (Err) + report_fatal_error("Can't open output '" + OutputPath + "'\n"); + OS << OutputBuffer.getBuffer(); + return OutputPath.str(); +} + // Main entry point for the ThinLTO processing void ThinLTOCodeGenerator::run() { if (CodeGenOnly) { @@ -785,7 +813,16 @@ void ThinLTOCodeGenerator::run() { // Prepare the resulting object vector assert(ProducedBinaries.empty() && "The generator should not be reused"); - ProducedBinaries.resize(Modules.size()); + if (SavedObjectsDirectoryPath.empty()) + ProducedBinaries.resize(Modules.size()); + else { + sys::fs::create_directories(SavedObjectsDirectoryPath); + bool IsDir; + sys::fs::is_directory(SavedObjectsDirectoryPath, IsDir); + if (!IsDir) + report_fatal_error("Unexistent dir: '" + SavedObjectsDirectoryPath + "'"); + ProducedBinaryFiles.resize(Modules.size()); + } // Prepare the module map. auto ModuleMap = generateModuleMap(Modules); @@ -865,16 +902,22 @@ void ThinLTOCodeGenerator::run() { ImportLists[ModuleIdentifier], ExportList, ResolvedODR[ModuleIdentifier], DefinedFunctions, GUIDPreservedSymbols); + auto CacheEntryPath = CacheEntry.getEntryPath(); { auto ErrOrBuffer = CacheEntry.tryLoadingBuffer(); DEBUG(dbgs() << "Cache " << (ErrOrBuffer ? "hit" : "miss") << " '" - << CacheEntry.getEntryPath() << "' for buffer " << count - << " " << ModuleIdentifier << "\n"); + << CacheEntryPath << "' for buffer " << count << " " + << ModuleIdentifier << "\n"); if (ErrOrBuffer) { // Cache Hit! - ProducedBinaries[count] = std::move(ErrOrBuffer.get()); + if (SavedObjectsDirectoryPath.empty()) + ProducedBinaries[count] = std::move(ErrOrBuffer.get()); + else + ProducedBinaryFiles[count] = writeGeneratedObject( + count, CacheEntryPath, SavedObjectsDirectoryPath, + *ErrOrBuffer.get()); return; } } @@ -903,8 +946,32 @@ void ThinLTOCodeGenerator::run() { ModuleToDefinedGVSummaries[ModuleIdentifier], CacheOptions, DisableCodeGen, SaveTempsDir, count); - OutputBuffer = CacheEntry.write(std::move(OutputBuffer)); - ProducedBinaries[count] = std::move(OutputBuffer); + // Commit to the cache (if enabled) + CacheEntry.write(*OutputBuffer); + + if (SavedObjectsDirectoryPath.empty()) { + // We need to generated a memory buffer for the linker. + if (!CacheEntryPath.empty()) { + // Cache is enabled, reload from the cache + // We do this to lower memory pressuree: the buffer is on the heap + // and releasing it frees memory that can be used for the next input + // file. The final binary link will read from the VFS cache + // (hopefully!) or from disk if the memory pressure wasn't too high. + auto ReloadedBufferOrErr = CacheEntry.tryLoadingBuffer(); + if (auto EC = ReloadedBufferOrErr.getError()) { + // On error, keeping the preexisting buffer and printing a + // diagnostic is more friendly than just crashing. + errs() << "error: can't reload cached file '" << CacheEntryPath + << "': " << EC.message() << "\n"; + } else { + OutputBuffer = std::move(*ReloadedBufferOrErr); + } + } + ProducedBinaries[count] = std::move(OutputBuffer); + return; + } + ProducedBinaryFiles[count] = writeGeneratedObject( + count, CacheEntryPath, SavedObjectsDirectoryPath, *OutputBuffer); }, IndexCount); } } diff --git a/lib/Support/Unix/Path.inc b/lib/Support/Unix/Path.inc index 3812c5fb5de..e0b11aaff00 100644 --- a/lib/Support/Unix/Path.inc +++ b/lib/Support/Unix/Path.inc @@ -285,6 +285,19 @@ std::error_code create_link(const Twine &to, const Twine &from) { return std::error_code(); } +std::error_code create_hard_link(const Twine &to, const Twine &from) { + // Get arguments. + SmallString<128> from_storage; + SmallString<128> to_storage; + StringRef f = from.toNullTerminatedStringRef(from_storage); + StringRef t = to.toNullTerminatedStringRef(to_storage); + + if (::link(t.begin(), f.begin()) == -1) + return std::error_code(errno, std::generic_category()); + + return std::error_code(); +} + std::error_code remove(const Twine &path, bool IgnoreNonExisting) { SmallString<128> path_storage; StringRef p = path.toNullTerminatedStringRef(path_storage); diff --git a/lib/Support/Windows/Path.inc b/lib/Support/Windows/Path.inc index f7bc22ab2cf..27b250b428a 100644 --- a/lib/Support/Windows/Path.inc +++ b/lib/Support/Windows/Path.inc @@ -232,6 +232,10 @@ std::error_code create_link(const Twine &to, const Twine &from) { return std::error_code(); } +std::error_code create_hard_link(const Twine &to, const Twine &from) { + return create_link(to, from); +} + std::error_code remove(const Twine &path, bool IgnoreNonExisting) { SmallVector path_utf16; diff --git a/test/ThinLTO/X86/save_objects.ll b/test/ThinLTO/X86/save_objects.ll new file mode 100644 index 00000000000..4e12aee3dd8 --- /dev/null +++ b/test/ThinLTO/X86/save_objects.ll @@ -0,0 +1,30 @@ +; RUN: opt -module-hash -module-summary %s -o %t.bc +; RUN: opt -module-hash -module-summary %p/Inputs/cache.ll -o %t2.bc + +; Check that the generating object files is working without cache +; RUN: rm -Rf %t.thin.out +; RUN: llvm-lto -thinlto-save-objects=%t.thin.out -thinlto-action=run %t2.bc %t.bc -exported-symbol=main +; RUN: ls %t.thin.out | count 2 + +; Same with cache +; RUN: rm -Rf %t.thin.out +; RUN: rm -Rf %t.cache && mkdir %t.cache +; RUN: llvm-lto -thinlto-save-objects=%t.thin.out -thinlto-action=run %t2.bc %t.bc -exported-symbol=main -thinlto-cache-dir %t.cache +; RUN: ls %t.thin.out | count 2 +; RUN: ls %t.cache | count 3 + +; Same with hot cache +; RUN: rm -Rf %t.thin.out +; RUN: rm -Rf %t.cache && mkdir %t.cache +; RUN: llvm-lto -thinlto-save-objects=%t.thin.out -thinlto-action=run %t2.bc %t.bc -exported-symbol=main -thinlto-cache-dir %t.cache +; RUN: ls %t.thin.out | count 2 +; RUN: ls %t.cache | count 3 + + +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.11.0" + +define void @globalfunc() #0 { +entry: + ret void +} diff --git a/tools/llvm-lto/llvm-lto.cpp b/tools/llvm-lto/llvm-lto.cpp index 2f0a1dfb9fa..ece0130c9fb 100644 --- a/tools/llvm-lto/llvm-lto.cpp +++ b/tools/llvm-lto/llvm-lto.cpp @@ -130,6 +130,11 @@ static cl::opt ThinLTOSaveTempsPrefix( cl::desc("Save ThinLTO temp files using filenames created by adding " "suffixes to the given file path prefix.")); +static cl::opt ThinLTOGeneratedObjectsDir( + "thinlto-save-objects", + cl::desc("Save ThinLTO generated object files using filenames created in " + "the given directory.")); + static cl::opt SaveModuleFile("save-merged-module", cl::init(false), cl::desc("Write merged LTO module to file before CodeGen")); @@ -707,6 +712,13 @@ private: if (!ThinLTOSaveTempsPrefix.empty()) ThinGenerator.setSaveTempsDir(ThinLTOSaveTempsPrefix); + + if (!ThinLTOGeneratedObjectsDir.empty()) { + ThinGenerator.setGeneratedObjectsDirectory(ThinLTOGeneratedObjectsDir); + ThinGenerator.run(); + return; + } + ThinGenerator.run(); auto &Binaries = ThinGenerator.getProducedBinaries(); diff --git a/tools/lto/lto.cpp b/tools/lto/lto.cpp index 2f0dd26d64d..aa61f2ad2ff 100644 --- a/tools/lto/lto.cpp +++ b/tools/lto/lto.cpp @@ -488,6 +488,16 @@ LTOObjectBuffer thinlto_module_get_object(thinlto_code_gen_t cg, MemBuffer->getBufferSize()}; } +unsigned int thinlto_module_get_num_object_files(thinlto_code_gen_t cg) { + return unwrap(cg)->getProducedBinaryFiles().size(); +} +const char *thinlto_module_get_object_file(thinlto_code_gen_t cg, + unsigned int index) { + assert(index < unwrap(cg)->getProducedBinaryFiles().size() && + "Index overflow"); + return unwrap(cg)->getProducedBinaryFiles()[index].c_str(); +} + void thinlto_codegen_disable_codegen(thinlto_code_gen_t cg, lto_bool_t disable) { unwrap(cg)->disableCodeGen(disable); @@ -551,6 +561,11 @@ void thinlto_codegen_set_savetemps_dir(thinlto_code_gen_t cg, return unwrap(cg)->setSaveTempsDir(save_temps_dir); } +void thinlto_set_generated_objects_dir(thinlto_code_gen_t cg, + const char *save_temps_dir) { + unwrap(cg)->setGeneratedObjectsDirectory(save_temps_dir); +} + lto_bool_t thinlto_codegen_set_pic_model(thinlto_code_gen_t cg, lto_codegen_model model) { switch (model) { diff --git a/tools/lto/lto.exports b/tools/lto/lto.exports index 74091c2641b..2e09026ae50 100644 --- a/tools/lto/lto.exports +++ b/tools/lto/lto.exports @@ -64,4 +64,7 @@ thinlto_codegen_add_must_preserve_symbol thinlto_codegen_add_cross_referenced_symbol thinlto_codegen_set_final_cache_size_relative_to_available_space thinlto_codegen_set_codegen_only -thinlto_codegen_disable_codegen \ No newline at end of file +thinlto_codegen_disable_codegen +thinlto_module_get_num_object_files +thinlto_module_get_object_file +thinlto_set_generated_objects_dir \ No newline at end of file