[ThinLTO] Add caching to the new LTO API

Add the ability to plug a cache on the LTO API. I tried to write such that a linker implementation can control the cache backend. This is intrusive and I'm not totally happy with it, but I can't figure out a better design right now. Differential Revision: https://reviews.llvm.org/D23599 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@279576 91177308-0d34-0410-b5e6-96231b3b80d8
2025-02-25 21:33:25 +00:00 · 2016-08-23 21:30:12 +00:00 · 2016-08-23 21:30:12 +00:00 · 242275b349
commit 242275b349
parent e9aa7e0db9
8 changed files with 393 additions and 35 deletions
--- a/include/llvm/LTO/Caching.h
+++ b/include/llvm/LTO/Caching.h
@ -0,0 +1,100 @@
+//===- Caching.h - LLVM Link Time Optimizer Configuration -----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the lto::CacheObjectOutput data structure, which allows
+// clients to add a filesystem cache to ThinLTO
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LTO_CACHING_H
+#define LLVM_LTO_CACHING_H
+
+#include "llvm/ADT/SmallString.h"
+#include "llvm/LTO/Config.h"
+#include "llvm/Support/MemoryBuffer.h"
+
+namespace llvm {
+namespace lto {
+/// Type for client-supplied callback when a buffer is loaded from the cache.
+typedef std::function<void(std::unique_ptr<MemoryBuffer>)> AddBufferFn;
+
+/// Manage caching on the filesystem.
+///
+/// The general scheme is the following:
+///
+/// void do_stuff(AddBufferFn CallBack) {
+///   /* ... */
+///   {
+///     /* Create the CacheObjectOutput pointing to a cache directory */
+///     auto Output = CacheObjectOutput("/tmp/cache", CallBack)
+///
+///     /* Call some processing function */
+///     process(Output);
+///
+///   } /* Callback is only called now, on destruction of the Output object */
+///   /* ... */
+/// }
+///
+///
+/// void process(NativeObjectOutput &Output) {
+///   /* check if caching is supported */
+///   if (Output.isCachingEnabled()) {
+///     auto Key = ComputeKeyForEntry(...); // "expensive" call
+///     if (Output.tryLoadFromCache())
+///        return; // Cache hit
+///   }
+///
+///   auto OS = Output.getStream();
+///
+///   OS << ...;
+///   /* Note that the callback is not called here, but only when the caller
+///      destroys Output */
+/// }
+///
+class CacheObjectOutput : public NativeObjectOutput {
+  /// Path to the on-disk cache directory
+  StringRef CacheDirectoryPath;
+  /// Path to this entry in the cache, initialized by tryLoadFromCache().
+  SmallString<128> EntryPath;
+  /// Path to temporary file used to buffer output that will be committed to the
+  /// cache entry when this object is destroyed
+  SmallString<128> TempFilename;
+  /// User-supplied callback, called when the buffer is pulled out of the cache
+  /// (potentially after creating it).
+  AddBufferFn AddBuffer;
+
+public:
+  /// The destructor pulls the entry from the cache and calls the AddBuffer
+  /// callback, after committing the entry into the cache on miss.
+  ~CacheObjectOutput();
+
+  /// Create a CacheObjectOutput: the client is supposed to create it in the
+  /// callback supplied to LTO::run. The \p CacheDirectoryPath points to the
+  /// directory on disk where to store the cache, and \p AddBuffer will be
+  /// called when the buffer is pulled out of the cache (potentially after
+  /// creating it).
+  CacheObjectOutput(StringRef CacheDirectoryPath, AddBufferFn AddBuffer)
+      : CacheDirectoryPath(CacheDirectoryPath), AddBuffer(AddBuffer) {}
+
+  /// Return an allocated stream for the output, or null in case of failure.
+  std::unique_ptr<raw_pwrite_stream> getStream() override;
+
+  /// Set EntryPath, try loading from a possible cache first, return true on
+  /// cache hit.
+  bool tryLoadFromCache(StringRef Key) override;
+
+  /// Returns true to signal that this implementation of NativeObjectFile
+  /// support caching.
+  bool isCachingEnabled() const override { return true; }
+};
+
+} // namespace lto
+} // namespace llvm
+
+#endif
--- a/include/llvm/LTO/Config.h
+++ b/include/llvm/LTO/Config.h
@ -32,10 +32,33 @@ namespace lto {

 /// Abstract class representing a single Task output to be implemented by the
 /// client of the LTO API.
+///
+/// The general scheme the API is called is the following:
+///
+/// void process(NativeObjectOutput &Output) {
+///   /* check if caching is supported */
+///   if (Output.isCachingEnabled()) {
+///     auto Key = ComputeKeyForEntry(...); // "expensive" call
+///     if (Output.tryLoadFromCache())
+///        return; // Cache hit
+///   }
+///
+///   auto OS = Output.getStream();
+///
+///   OS << ....;
+/// }
+///
 class NativeObjectOutput {
 public:
  // Return an allocated stream for the output, or null in case of failure.
  virtual std::unique_ptr<raw_pwrite_stream> getStream() = 0;
+
+  // Try loading from a possible cache first, return true on cache hit.
+  virtual bool tryLoadFromCache(StringRef Key) { return false; }
+
+  // Returns true if a cache is available
+  virtual bool isCachingEnabled() const { return false; }
+
  virtual ~NativeObjectOutput() = default;
 };

--- a/lib/LTO/CMakeLists.txt
+++ b/lib/LTO/CMakeLists.txt
@ -48,6 +48,7 @@ endif()


 add_llvm_library(LLVMLTO
+  Caching.cpp
  LTO.cpp
  LTOBackend.cpp
  LTOModule.cpp
--- a/lib/LTO/Caching.cpp
+++ b/lib/LTO/Caching.cpp
@ -0,0 +1,104 @@
+//===-Caching.cpp - LLVM Link Time Optimizer Cache Handling ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Caching for ThinLTO.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/LTO/Caching.h"
+
+#ifdef HAVE_LLVM_REVISION
+#include "LLVMLTORevision.h"
+#endif
+
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+using namespace llvm::lto;
+
+static void commitEntry(StringRef TempFilename, StringRef EntryPath) {
+  // Rename to final destination (hopefully race condition won't matter here)
+  auto EC = sys::fs::rename(TempFilename, EntryPath);
+  if (EC) {
+    // Renaming failed, probably not the same filesystem, copy and delete.
+    {
+      auto ReloadedBufferOrErr = MemoryBuffer::getFile(TempFilename);
+      if (auto EC = ReloadedBufferOrErr.getError())
+        report_fatal_error(Twine("Failed to open temp file '") + TempFilename +
+                           "': " + EC.message() + "\n");
+
+      raw_fd_ostream OS(EntryPath, EC, sys::fs::F_None);
+      if (EC)
+        report_fatal_error(Twine("Failed to open ") + EntryPath +
+                           " to save cached entry\n");
+      // I'm not sure what are the guarantee if two processes are doing this
+      // at the same time.
+      OS << (*ReloadedBufferOrErr)->getBuffer();
+    }
+    sys::fs::remove(TempFilename);
+  }
+}
+
+CacheObjectOutput::~CacheObjectOutput() {
+  if (EntryPath.empty())
+    // The entry was never used by the client (tryLoadFromCache() wasn't called)
+    return;
+  // TempFilename is only set if getStream() was called, i.e. on cache miss when
+  // tryLoadFromCache() returned false. And EntryPath is valid if a Key was
+  // submitted, otherwise it has been set to CacheDirectoryPath in
+  // tryLoadFromCache.
+  if (!TempFilename.empty()) {
+    if (EntryPath == CacheDirectoryPath)
+      // The Key supplied to tryLoadFromCache was empty, do not commit the temp.
+      EntryPath = TempFilename;
+    else
+      // We commit the tempfile into the cache now, by moving it to EntryPath.
+      commitEntry(TempFilename, EntryPath);
+  }
+  // Load the entry from the cache now.
+  auto ReloadedBufferOrErr = MemoryBuffer::getFile(EntryPath);
+  if (auto EC = ReloadedBufferOrErr.getError())
+    report_fatal_error(Twine("Can't reload cached file '") + EntryPath + "': " +
+                       EC.message() + "\n");
+
+  // Supply the resulting buffer to the user.
+  AddBuffer(std::move(*ReloadedBufferOrErr));
+}
+
+// Return an allocated stream for the output, or null in case of failure.
+std::unique_ptr<raw_pwrite_stream> CacheObjectOutput::getStream() {
+  assert(!EntryPath.empty() && "API Violation: client didn't call "
+                               "tryLoadFromCache() before getStream()");
+  // Write to a temporary to avoid race condition
+  int TempFD;
+  std::error_code EC =
+      sys::fs::createTemporaryFile("Thin", "tmp.o", TempFD, TempFilename);
+  if (EC) {
+    errs() << "Error: " << EC.message() << "\n";
+    report_fatal_error("ThinLTO: Can't get a temporary file");
+  }
+  return llvm::make_unique<raw_fd_ostream>(TempFD, /* ShouldClose */ true);
+}
+
+// Try loading from a possible cache first, return true on cache hit.
+bool CacheObjectOutput::tryLoadFromCache(StringRef Key) {
+  assert(!CacheDirectoryPath.empty() &&
+         "CacheObjectOutput was initialized without a cache path");
+  if (Key.empty()) {
+    // Client didn't compute a valid key. EntryPath has been set to
+    // CacheDirectoryPath.
+    EntryPath = CacheDirectoryPath;
+    return false;
+  }
+  sys::path::append(EntryPath, CacheDirectoryPath, Key);
+  return sys::fs::exists(EntryPath);
+}
--- a/lib/LTO/LTO.cpp
+++ b/lib/LTO/LTO.cpp
@ -25,6 +25,7 @@
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/Path.h"
+#include "llvm/Support/SHA1.h"
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/ThreadPool.h"
@ -41,6 +42,61 @@ using namespace llvm;
 using namespace lto;
 using namespace object;

+#define DEBUG_TYPE "lto"
+
+// Returns a unique hash for the Module considering the current list of
+// export/import and other global analysis results.
+// The hash is produced in \p Key.
+static void computeCacheKey(
+    SmallString<40> &Key, const ModuleSummaryIndex &Index, StringRef ModuleID,
+    const FunctionImporter::ImportMapTy &ImportList,
+    const FunctionImporter::ExportSetTy &ExportList,
+    const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR,
+    const GVSummaryMapTy &DefinedGlobals) {
+  // Compute the unique hash for this entry.
+  // This is based on the current compiler version, the module itself, the
+  // export list, the hash for every single module in the import list, the
+  // list of ResolvedODR for the module, and the list of preserved symbols.
+  SHA1 Hasher;
+
+  // Start with the compiler revision
+  Hasher.update(LLVM_VERSION_STRING);
+#ifdef HAVE_LLVM_REVISION
+  Hasher.update(LLVM_REVISION);
+#endif
+
+  // Include the hash for the current module
+  auto ModHash = Index.getModuleHash(ModuleID);
+  Hasher.update(ArrayRef<uint8_t>((uint8_t *)&ModHash[0], sizeof(ModHash)));
+  for (auto F : ExportList)
+    // The export list can impact the internalization, be conservative here
+    Hasher.update(ArrayRef<uint8_t>((uint8_t *)&F, sizeof(F)));
+
+  // Include the hash for every module we import functions from
+  for (auto &Entry : ImportList) {
+    auto ModHash = Index.getModuleHash(Entry.first());
+    Hasher.update(ArrayRef<uint8_t>((uint8_t *)&ModHash[0], sizeof(ModHash)));
+  }
+
+  // Include the hash for the resolved ODR.
+  for (auto &Entry : ResolvedODR) {
+    Hasher.update(ArrayRef<uint8_t>((const uint8_t *)&Entry.first,
+                                    sizeof(GlobalValue::GUID)));
+    Hasher.update(ArrayRef<uint8_t>((const uint8_t *)&Entry.second,
+                                    sizeof(GlobalValue::LinkageTypes)));
+  }
+
+  // Include the hash for the linkage type to reflect internalization and weak
+  // resolution.
+  for (auto &GS : DefinedGlobals) {
+    GlobalValue::LinkageTypes Linkage = GS.second->linkage();
+    Hasher.update(
+        ArrayRef<uint8_t>((const uint8_t *)&Linkage, sizeof(Linkage)));
+  }
+
+  Key = toHex(Hasher.result());
+}
+
 // Simple helper to load a module from bitcode
 std::unique_ptr<Module>
 llvm::loadModuleFromBuffer(const MemoryBufferRef &Buffer, LLVMContext &Context,
@ -429,9 +485,12 @@ public:
        ModuleToDefinedGVSummaries(ModuleToDefinedGVSummaries) {}

  virtual ~ThinBackendProc() {}
-  virtual Error start(unsigned Task, MemoryBufferRef MBRef,
-                      const FunctionImporter::ImportMapTy &ImportList,
-                      MapVector<StringRef, MemoryBufferRef> &ModuleMap) = 0;
+  virtual Error start(
+      unsigned Task, MemoryBufferRef MBRef,
+      const FunctionImporter::ImportMapTy &ImportList,
+      const FunctionImporter::ExportSetTy &ExportList,
+      const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR,
+      MapVector<StringRef, MemoryBufferRef> &ModuleMap) = 0;
  virtual Error wait() = 0;
 };

@ -451,35 +510,57 @@ public:
        BackendThreadPool(ThinLTOParallelismLevel),
        AddOutput(std::move(AddOutput)) {}

-  Error
-  runThinLTOBackendThread(AddOutputFn AddOutput, unsigned Task,
-                          MemoryBufferRef MBRef,
-                          ModuleSummaryIndex &CombinedIndex,
-                          const FunctionImporter::ImportMapTy &ImportList,
-                          const GVSummaryMapTy &DefinedGlobals,
-                          MapVector<StringRef, MemoryBufferRef> &ModuleMap) {
-    LTOLLVMContext BackendContext(Conf);
+  Error runThinLTOBackendThread(
+      AddOutputFn AddOutput, unsigned Task, MemoryBufferRef MBRef,
+      ModuleSummaryIndex &CombinedIndex,
+      const FunctionImporter::ImportMapTy &ImportList,
+      const FunctionImporter::ExportSetTy &ExportList,
+      const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR,
+      const GVSummaryMapTy &DefinedGlobals,
+      MapVector<StringRef, MemoryBufferRef> &ModuleMap) {

+    auto ModuleIdentifier = MBRef.getBufferIdentifier();
+    auto Output = AddOutput(Task);
+    if (Output->isCachingEnabled()) {
+      SmallString<40> Key;
+      // The module may be cached, this helps handling it.
+      computeCacheKey(Key, CombinedIndex, ModuleIdentifier, ImportList,
+                      ExportList, ResolvedODR, DefinedGlobals);
+      if (Output->tryLoadFromCache(Key))
+        return Error();
+    }
+
+    LTOLLVMContext BackendContext(Conf);
    ErrorOr<std::unique_ptr<Module>> MOrErr =
        parseBitcodeFile(MBRef, BackendContext);
    assert(MOrErr && "Unable to load module in thread?");

-    return thinBackend(Conf, Task, AddOutput, **MOrErr, CombinedIndex,
+    auto AddOutputWrapper = [&](unsigned TaskId) {
+      assert(Task == TaskId && "Unexpexted TaskId mismatch");
+      return std::move(Output);
+    };
+    return thinBackend(Conf, Task, AddOutputWrapper, **MOrErr, CombinedIndex,
                       ImportList, DefinedGlobals, ModuleMap);
  }

-  Error start(unsigned Task, MemoryBufferRef MBRef,
-              const FunctionImporter::ImportMapTy &ImportList,
-              MapVector<StringRef, MemoryBufferRef> &ModuleMap) override {
+  Error start(
+      unsigned Task, MemoryBufferRef MBRef,
+      const FunctionImporter::ImportMapTy &ImportList,
+      const FunctionImporter::ExportSetTy &ExportList,
+      const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR,
+      MapVector<StringRef, MemoryBufferRef> &ModuleMap) override {
    StringRef ModulePath = MBRef.getBufferIdentifier();
    BackendThreadPool.async(
        [=](MemoryBufferRef MBRef, ModuleSummaryIndex &CombinedIndex,
            const FunctionImporter::ImportMapTy &ImportList,
+            const FunctionImporter::ExportSetTy &ExportList,
+            const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes>
+                &ResolvedODR,
            GVSummaryMapTy &DefinedGlobals,
            MapVector<StringRef, MemoryBufferRef> &ModuleMap) {
-          Error E =
-              runThinLTOBackendThread(AddOutput, Task, MBRef, CombinedIndex,
-                                      ImportList, DefinedGlobals, ModuleMap);
+          Error E = runThinLTOBackendThread(
+              AddOutput, Task, MBRef, CombinedIndex, ImportList, ExportList,
+              ResolvedODR, DefinedGlobals, ModuleMap);
          if (E) {
            std::unique_lock<std::mutex> L(ErrMu);
            if (Err)
@ -489,6 +570,7 @@ public:
          }
        },
        MBRef, std::ref(CombinedIndex), std::ref(ImportList),
+        std::ref(ExportList), std::ref(ResolvedODR),
        std::ref(ModuleToDefinedGVSummaries[ModulePath]), std::ref(ModuleMap));
    return Error();
  }
@ -550,9 +632,12 @@ public:
    return NewPath.str();
  }

-  Error start(unsigned Task, MemoryBufferRef MBRef,
-              const FunctionImporter::ImportMapTy &ImportList,
-              MapVector<StringRef, MemoryBufferRef> &ModuleMap) override {
+  Error start(
+      unsigned Task, MemoryBufferRef MBRef,
+      const FunctionImporter::ImportMapTy &ImportList,
+      const FunctionImporter::ExportSetTy &ExportList,
+      const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR,
+      MapVector<StringRef, MemoryBufferRef> &ModuleMap) override {
    StringRef ModulePath = MBRef.getBufferIdentifier();
    std::string NewModulePath =
        getThinLTOOutputFile(ModulePath, OldPrefix, NewPrefix);
@ -638,18 +723,25 @@ Error LTO::runThinLTO(AddOutputFn AddOutput) {
           ExportedGUIDs.count(GUID);
  };
  thinLTOInternalizeAndPromoteInIndex(ThinLTO.CombinedIndex, isExported);
-  thinLTOResolveWeakForLinkerInIndex(
-      ThinLTO.CombinedIndex, isPrevailing,
-      [](StringRef, GlobalValue::GUID, GlobalValue::LinkageTypes) {});
+
+  StringMap<std::map<GlobalValue::GUID, GlobalValue::LinkageTypes>> ResolvedODR;
+  auto recordNewLinkage = [&](StringRef ModuleIdentifier,
+                              GlobalValue::GUID GUID,
+                              GlobalValue::LinkageTypes NewLinkage) {
+    ResolvedODR[ModuleIdentifier][GUID] = NewLinkage;
+  };
+
+  thinLTOResolveWeakForLinkerInIndex(ThinLTO.CombinedIndex, isPrevailing,
+                                     recordNewLinkage);

  std::unique_ptr<ThinBackendProc> BackendProc = ThinLTO.Backend(
      Conf, ThinLTO.CombinedIndex, ModuleToDefinedGVSummaries, AddOutput);

  // Partition numbers for ThinLTO jobs start at 1 (see comments for
  // GlobalResolution in LTO.h). Task numbers, however, start at
-  // ParallelCodeGenParallelismLevel, as tasks 0 through
-  // ParallelCodeGenParallelismLevel-1 are reserved for parallel code generation
-  // partitions.
+  // ParallelCodeGenParallelismLevel if an LTO module is present, as tasks 0
+  // through ParallelCodeGenParallelismLevel-1 are reserved for parallel code
+  // generation partitions.
  unsigned Task = RegularLTO.CombinedModule
                      ? RegularLTO.ParallelCodeGenParallelismLevel
                      : 0;
@ -657,7 +749,8 @@ Error LTO::runThinLTO(AddOutputFn AddOutput) {

  for (auto &Mod : ThinLTO.ModuleMap) {
    if (Error E = BackendProc->start(Task, Mod.second, ImportLists[Mod.first],
-                                     ThinLTO.ModuleMap))
+                                     ExportLists[Mod.first],
+                                     ResolvedODR[Mod.first], ThinLTO.ModuleMap))
      return E;

    ++Task;
--- a/lib/LTO/LTOBackend.cpp
+++ b/lib/LTO/LTOBackend.cpp
@ -143,6 +143,20 @@ bool opt(Config &C, TargetMachine *TM, unsigned Task, Module &M,
  return true;
 }

+/// Monolithic LTO does not support caching (yet), this is a convenient wrapper
+/// around AddOutput to workaround this.
+static AddOutputFn getUncachedOutputWrapper(AddOutputFn &AddOutput,
+                                            unsigned Task) {
+  return [Task, &AddOutput](unsigned TaskId) {
+    auto Output = AddOutput(Task);
+    if (Output->isCachingEnabled() && Output->tryLoadFromCache(""))
+      report_fatal_error("Cache hit without a valid key?");
+    errs() << Task << " == " << TaskId << "\n";
+    assert(Task == TaskId && "Unexpexted TaskId mismatch");
+    return Output;
+  };
+}
+
 void codegen(Config &C, TargetMachine *TM, AddOutputFn AddOutput, unsigned Task,
             Module &M) {
  if (C.PreCodeGenModuleHook && !C.PreCodeGenModuleHook(Task, M))
@ -190,7 +204,10 @@ void splitCodeGen(Config &C, TargetMachine *TM, AddOutputFn AddOutput,

              std::unique_ptr<TargetMachine> TM =
                  createTargetMachine(C, MPartInCtx->getTargetTriple(), T);
-              codegen(C, TM.get(), AddOutput, ThreadId, *MPartInCtx);
+
+              codegen(C, TM.get(),
+                      getUncachedOutputWrapper(AddOutput, ThreadId), ThreadId,
+                      *MPartInCtx);
            },
            // Pass BC using std::move to ensure that it get moved rather than
            // copied into the thread's context.
@ -228,11 +245,12 @@ Error lto::backend(Config &C, AddOutputFn AddOutput,
    if (!opt(C, TM.get(), 0, *M, /*IsThinLto=*/false))
      return Error();

-  if (ParallelCodeGenParallelismLevel == 1)
-    codegen(C, TM.get(), AddOutput, 0, *M);
-  else
+  if (ParallelCodeGenParallelismLevel == 1) {
+    codegen(C, TM.get(), getUncachedOutputWrapper(AddOutput, 0), 0, *M);
+  } else {
    splitCodeGen(C, TM.get(), AddOutput, ParallelCodeGenParallelismLevel,
                 std::move(M));
+  }
  return Error();
 }

--- a/test/ThinLTO/X86/cache.ll
+++ b/test/ThinLTO/X86/cache.ll
@ -1,5 +1,5 @@
 ; RUN: opt -module-summary %s -o %t.bc
-; RUN: opt -module-summary %p/Inputs/funcimport.ll -o %t2.bc
+; RUN: opt -module-summary %p/Inputs/cache.ll -o %t2.bc

 ; Verify that enabling caching is working
 ; RUN: rm -Rf %t.cache && mkdir %t.cache
@ -7,6 +7,14 @@
 ; RUN: ls %t.cache/llvmcache.timestamp
 ; RUN: ls %t.cache | count 3

+; Verify that enabling caching is working with llvm-lto2
+; RUN: rm -Rf %t.cache && mkdir %t.cache
+; RUN: llvm-lto2 -o %t.o %t2.bc  %t.bc -cache-dir %t.cache \
+; RUN:  -r=%t2.bc,_main,plx \
+; RUN:  -r=%t2.bc,_globalfunc,lx \
+; RUN:  -r=%t.bc,_globalfunc,plx
+; RUN: ls %t.cache | count 2
+
 target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.11.0"

--- a/tools/llvm-lto2/llvm-lto2.cpp
+++ b/tools/llvm-lto2/llvm-lto2.cpp
@ -16,6 +16,7 @@
 //
 //===----------------------------------------------------------------------===//

+#include "llvm/LTO/Caching.h"
 #include "llvm/LTO/LTO.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/TargetSelect.h"
@ -31,6 +32,9 @@ static cl::opt<std::string> OutputFilename("o", cl::Required,
                                           cl::desc("Output filename"),
                                           cl::value_desc("filename"));

+static cl::opt<std::string> CacheDir("cache-dir", cl::desc("Cache Directory"),
+                                     cl::value_desc("directory"));
+
 static cl::opt<bool> SaveTemps("save-temps", cl::desc("Save temporary files"));

 static cl::opt<bool>
@ -187,9 +191,16 @@ int main(int argc, char **argv) {
  if (HasErrors)
    return 1;

-  auto AddOutput = [&](size_t Task) {
+  auto AddOutput =
+      [&](size_t Task) -> std::unique_ptr<lto::NativeObjectOutput> {
    std::string Path = OutputFilename + "." + utostr(Task);
-    return llvm::make_unique<LTOOutput>(std::move(Path));
+    if (CacheDir.empty())
+      return llvm::make_unique<LTOOutput>(std::move(Path));
+
+    return llvm::make_unique<CacheObjectOutput>(
+        CacheDir, [Path](std::unique_ptr<MemoryBuffer> Buffer) {
+          *LTOOutput(Path).getStream() << Buffer->getBuffer();
+        });
  };

  check(Lto.run(AddOutput), "LTO::run failed");