From d1504012781d76b01ccd6c386cef888622beda32 Mon Sep 17 00:00:00 2001 From: Peter Collingbourne Date: Fri, 16 Dec 2016 00:26:30 +0000 Subject: [PATCH] IPO: Introduce ThinLTOBitcodeWriter pass. This pass prepares a module containing type metadata for ThinLTO by splitting it into regular and thin LTO parts if possible, and writing both parts to a multi-module bitcode file. Modules that do not contain type metadata are written unmodified as a single module. All globals with type metadata are added to the regular LTO module, and the rest are added to the thin LTO module. Differential Revision: https://reviews.llvm.org/D27324 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@289899 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/InitializePasses.h | 1 + include/llvm/Transforms/IPO.h | 4 + lib/Transforms/IPO/CMakeLists.txt | 1 + lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp | 344 ++++++++++++++++++ .../ThinLTOBitcodeWriter/no-type-md.ll | 13 + .../split-internal-typeid.ll | 40 ++ .../ThinLTOBitcodeWriter/split-internal1.ll | 27 ++ .../ThinLTOBitcodeWriter/split-internal2.ll | 32 ++ test/Transforms/ThinLTOBitcodeWriter/split.ll | 26 ++ .../ThinLTOBitcodeWriter/unsplittable.ll | 21 ++ tools/opt/opt.cpp | 8 +- 11 files changed, 516 insertions(+), 1 deletion(-) create mode 100644 lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp create mode 100644 test/Transforms/ThinLTOBitcodeWriter/no-type-md.ll create mode 100644 test/Transforms/ThinLTOBitcodeWriter/split-internal-typeid.ll create mode 100644 test/Transforms/ThinLTOBitcodeWriter/split-internal1.ll create mode 100644 test/Transforms/ThinLTOBitcodeWriter/split-internal2.ll create mode 100644 test/Transforms/ThinLTOBitcodeWriter/split.ll create mode 100644 test/Transforms/ThinLTOBitcodeWriter/unsplittable.ll diff --git a/include/llvm/InitializePasses.h b/include/llvm/InitializePasses.h index b00d26b8e87..5c90519dfb4 100644 --- a/include/llvm/InitializePasses.h +++ b/include/llvm/InitializePasses.h @@ -354,6 +354,7 @@ void initializeVirtRegRewriterPass(PassRegistry&); void initializeWholeProgramDevirtPass(PassRegistry &); void initializeWinEHPreparePass(PassRegistry&); void initializeWriteBitcodePassPass(PassRegistry &); +void initializeWriteThinLTOBitcodePass(PassRegistry &); void initializeXRayInstrumentationPass(PassRegistry &); } diff --git a/include/llvm/Transforms/IPO.h b/include/llvm/Transforms/IPO.h index 34cba185f9b..eef151af73f 100644 --- a/include/llvm/Transforms/IPO.h +++ b/include/llvm/Transforms/IPO.h @@ -28,6 +28,7 @@ class Pass; class Function; class BasicBlock; class GlobalValue; +class raw_ostream; //===----------------------------------------------------------------------===// // @@ -235,6 +236,9 @@ ModulePass *createGlobalSplitPass(); ModulePass *createSampleProfileLoaderPass(); ModulePass *createSampleProfileLoaderPass(StringRef Name); +/// Write ThinLTO-ready bitcode to Str. +ModulePass *createWriteThinLTOBitcodePass(raw_ostream &Str); + } // End llvm namespace #endif diff --git a/lib/Transforms/IPO/CMakeLists.txt b/lib/Transforms/IPO/CMakeLists.txt index fffa1c3356a..67f18a307b9 100644 --- a/lib/Transforms/IPO/CMakeLists.txt +++ b/lib/Transforms/IPO/CMakeLists.txt @@ -28,6 +28,7 @@ add_llvm_library(LLVMipo SampleProfile.cpp StripDeadPrototypes.cpp StripSymbols.cpp + ThinLTOBitcodeWriter.cpp WholeProgramDevirt.cpp ADDITIONAL_HEADER_DIRS diff --git a/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp b/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp new file mode 100644 index 00000000000..3680cfc813a --- /dev/null +++ b/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp @@ -0,0 +1,344 @@ +//===- ThinLTOBitcodeWriter.cpp - Bitcode writing pass for ThinLTO --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass prepares a module containing type metadata for ThinLTO by splitting +// it into regular and thin LTO parts if possible, and writing both parts to +// a multi-module bitcode file. Modules that do not contain type metadata are +// written unmodified as a single module. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/IPO.h" +#include "llvm/Analysis/ModuleSummaryAnalysis.h" +#include "llvm/Analysis/TypeMetadataUtils.h" +#include "llvm/Bitcode/BitcodeWriter.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/PassManager.h" +#include "llvm/Pass.h" +#include "llvm/Support/ScopedPrinter.h" +#include "llvm/Transforms/Utils/Cloning.h" +using namespace llvm; + +namespace { + +// Produce a unique identifier for this module by taking the MD5 sum of the +// names of the module's strong external symbols. This identifier is +// normally guaranteed to be unique, or the program would fail to link due to +// multiply defined symbols. +// +// If the module has no strong external symbols (such a module may still have a +// semantic effect if it performs global initialization), we cannot produce a +// unique identifier for this module, so we return the empty string, which +// causes the entire module to be written as a regular LTO module. +std::string getModuleId(Module *M) { + MD5 Md5; + bool ExportsSymbols = false; + auto AddGlobal = [&](GlobalValue &GV) { + if (GV.isDeclaration() || GV.getName().startswith("llvm.") || + !GV.hasExternalLinkage()) + return; + ExportsSymbols = true; + Md5.update(GV.getName()); + Md5.update(ArrayRef{0}); + }; + + for (auto &F : *M) + AddGlobal(F); + for (auto &GV : M->globals()) + AddGlobal(GV); + for (auto &GA : M->aliases()) + AddGlobal(GA); + for (auto &IF : M->ifuncs()) + AddGlobal(IF); + + if (!ExportsSymbols) + return ""; + + MD5::MD5Result R; + Md5.final(R); + + SmallString<32> Str; + MD5::stringifyResult(R, Str); + return ("$" + Str).str(); +} + +// Promote each local-linkage entity defined by ExportM and used by ImportM by +// changing visibility and appending the given ModuleId. +void promoteInternals(Module &ExportM, Module &ImportM, StringRef ModuleId) { + auto PromoteInternal = [&](GlobalValue &ExportGV) { + if (!ExportGV.hasLocalLinkage()) + return; + + GlobalValue *ImportGV = ImportM.getNamedValue(ExportGV.getName()); + if (!ImportGV || ImportGV->use_empty()) + return; + + std::string NewName = (ExportGV.getName() + ModuleId).str(); + + ExportGV.setName(NewName); + ExportGV.setLinkage(GlobalValue::ExternalLinkage); + ExportGV.setVisibility(GlobalValue::HiddenVisibility); + + ImportGV->setName(NewName); + ImportGV->setVisibility(GlobalValue::HiddenVisibility); + }; + + for (auto &F : ExportM) + PromoteInternal(F); + for (auto &GV : ExportM.globals()) + PromoteInternal(GV); + for (auto &GA : ExportM.aliases()) + PromoteInternal(GA); + for (auto &IF : ExportM.ifuncs()) + PromoteInternal(IF); +} + +// Promote all internal (i.e. distinct) type ids used by the module by replacing +// them with external type ids formed using the module id. +// +// Note that this needs to be done before we clone the module because each clone +// will receive its own set of distinct metadata nodes. +void promoteTypeIds(Module &M, StringRef ModuleId) { + DenseMap LocalToGlobal; + auto ExternalizeTypeId = [&](CallInst *CI, unsigned ArgNo) { + Metadata *MD = + cast(CI->getArgOperand(ArgNo))->getMetadata(); + + if (isa(MD) && cast(MD)->isDistinct()) { + Metadata *&GlobalMD = LocalToGlobal[MD]; + if (!GlobalMD) { + std::string NewName = + (to_string(LocalToGlobal.size()) + ModuleId).str(); + GlobalMD = MDString::get(M.getContext(), NewName); + } + + CI->setArgOperand(ArgNo, + MetadataAsValue::get(M.getContext(), GlobalMD)); + } + }; + + if (Function *TypeTestFunc = + M.getFunction(Intrinsic::getName(Intrinsic::type_test))) { + for (const Use &U : TypeTestFunc->uses()) { + auto CI = cast(U.getUser()); + ExternalizeTypeId(CI, 1); + } + } + + if (Function *TypeCheckedLoadFunc = + M.getFunction(Intrinsic::getName(Intrinsic::type_checked_load))) { + for (const Use &U : TypeCheckedLoadFunc->uses()) { + auto CI = cast(U.getUser()); + ExternalizeTypeId(CI, 2); + } + } + + for (GlobalObject &GO : M.global_objects()) { + SmallVector MDs; + GO.getMetadata(LLVMContext::MD_type, MDs); + + GO.eraseMetadata(LLVMContext::MD_type); + for (auto MD : MDs) { + auto I = LocalToGlobal.find(MD->getOperand(1)); + if (I == LocalToGlobal.end()) { + GO.addMetadata(LLVMContext::MD_type, *MD); + continue; + } + GO.addMetadata( + LLVMContext::MD_type, + *MDNode::get(M.getContext(), + ArrayRef{MD->getOperand(0), I->second})); + } + } +} + +// Drop unused globals, and drop type information from function declarations. +// FIXME: If we made functions typeless then there would be no need to do this. +void simplifyExternals(Module &M) { + FunctionType *EmptyFT = + FunctionType::get(Type::getVoidTy(M.getContext()), false); + + for (auto I = M.begin(), E = M.end(); I != E;) { + Function &F = *I++; + if (F.isDeclaration() && F.use_empty()) { + F.eraseFromParent(); + continue; + } + + if (!F.isDeclaration() || F.getFunctionType() == EmptyFT) + continue; + + Function *NewF = + Function::Create(EmptyFT, GlobalValue::ExternalLinkage, "", &M); + NewF->setVisibility(F.getVisibility()); + NewF->takeName(&F); + F.replaceAllUsesWith(ConstantExpr::getBitCast(NewF, F.getType())); + F.eraseFromParent(); + } + + for (auto I = M.global_begin(), E = M.global_end(); I != E;) { + GlobalVariable &GV = *I++; + if (GV.isDeclaration() && GV.use_empty()) { + GV.eraseFromParent(); + continue; + } + } +} + +void filterModule( + Module *M, std::function ShouldKeepDefinition) { + for (Function &F : *M) { + if (ShouldKeepDefinition(&F)) + continue; + + F.deleteBody(); + F.clearMetadata(); + } + + for (GlobalVariable &GV : M->globals()) { + if (ShouldKeepDefinition(&GV)) + continue; + + GV.setInitializer(nullptr); + GV.setLinkage(GlobalValue::ExternalLinkage); + GV.clearMetadata(); + } + + for (Module::alias_iterator I = M->alias_begin(), E = M->alias_end(); + I != E;) { + GlobalAlias *GA = &*I++; + if (ShouldKeepDefinition(GA)) + continue; + + GlobalObject *GO; + if (I->getValueType()->isFunctionTy()) + GO = Function::Create(cast(GA->getValueType()), + GlobalValue::ExternalLinkage, "", M); + else + GO = new GlobalVariable( + *M, GA->getValueType(), false, GlobalValue::ExternalLinkage, + (Constant *)nullptr, "", (GlobalVariable *)nullptr, + GA->getThreadLocalMode(), GA->getType()->getAddressSpace()); + GO->takeName(GA); + GA->replaceAllUsesWith(GO); + GA->eraseFromParent(); + } +} + +// If it's possible to split M into regular and thin LTO parts, do so and write +// a multi-module bitcode file with the two parts to OS. Otherwise, write only a +// regular LTO bitcode file to OS. +void splitAndWriteThinLTOBitcode(raw_ostream &OS, Module &M) { + std::string ModuleId = getModuleId(&M); + if (ModuleId.empty()) { + // We couldn't generate a module ID for this module, just write it out as a + // regular LTO module. + WriteBitcodeToFile(&M, OS); + return; + } + + promoteTypeIds(M, ModuleId); + + auto IsInMergedM = [&](const GlobalValue *GV) { + auto *GVar = dyn_cast(GV->getBaseObject()); + if (!GVar) + return false; + + SmallVector MDs; + GVar->getMetadata(LLVMContext::MD_type, MDs); + return !MDs.empty(); + }; + + ValueToValueMapTy VMap; + std::unique_ptr MergedM(CloneModule(&M, VMap, IsInMergedM)); + + filterModule(&M, [&](const GlobalValue *GV) { return !IsInMergedM(GV); }); + + promoteInternals(*MergedM, M, ModuleId); + promoteInternals(M, *MergedM, ModuleId); + + simplifyExternals(*MergedM); + + SmallVector Buffer; + BitcodeWriter W(Buffer); + + // FIXME: Try to re-use BSI and PFI from the original module here. + ModuleSummaryIndex Index = buildModuleSummaryIndex(M, nullptr, nullptr); + W.writeModule(&M, /*ShouldPreserveUseListOrder=*/false, &Index, + /*GenerateHash=*/true); + + W.writeModule(MergedM.get()); + + OS << Buffer; +} + +// Returns whether this module needs to be split because it uses type metadata. +bool requiresSplit(Module &M) { + SmallVector MDs; + for (auto &GO : M.global_objects()) { + GO.getMetadata(LLVMContext::MD_type, MDs); + if (!MDs.empty()) + return true; + } + + return false; +} + +void writeThinLTOBitcode(raw_ostream &OS, Module &M, + const ModuleSummaryIndex *Index) { + // See if this module has any type metadata. If so, we need to split it. + if (requiresSplit(M)) + return splitAndWriteThinLTOBitcode(OS, M); + + // Otherwise we can just write it out as a regular module. + WriteBitcodeToFile(&M, OS, /*ShouldPreserveUseListOrder=*/false, Index, + /*GenerateHash=*/true); +} + +class WriteThinLTOBitcode : public ModulePass { + raw_ostream &OS; // raw_ostream to print on + +public: + static char ID; // Pass identification, replacement for typeid + WriteThinLTOBitcode() : ModulePass(ID), OS(dbgs()) { + initializeWriteThinLTOBitcodePass(*PassRegistry::getPassRegistry()); + } + + explicit WriteThinLTOBitcode(raw_ostream &o) + : ModulePass(ID), OS(o) { + initializeWriteThinLTOBitcodePass(*PassRegistry::getPassRegistry()); + } + + StringRef getPassName() const override { return "ThinLTO Bitcode Writer"; } + + bool runOnModule(Module &M) override { + const ModuleSummaryIndex *Index = + &(getAnalysis().getIndex()); + writeThinLTOBitcode(OS, M, Index); + return true; + } + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesAll(); + AU.addRequired(); + } +}; +} // anonymous namespace + +char WriteThinLTOBitcode::ID = 0; +INITIALIZE_PASS_BEGIN(WriteThinLTOBitcode, "write-thinlto-bitcode", + "Write ThinLTO Bitcode", false, true) +INITIALIZE_PASS_DEPENDENCY(ModuleSummaryIndexWrapperPass) +INITIALIZE_PASS_END(WriteThinLTOBitcode, "write-thinlto-bitcode", + "Write ThinLTO Bitcode", false, true) + +ModulePass *llvm::createWriteThinLTOBitcodePass(raw_ostream &Str) { + return new WriteThinLTOBitcode(Str); +} diff --git a/test/Transforms/ThinLTOBitcodeWriter/no-type-md.ll b/test/Transforms/ThinLTOBitcodeWriter/no-type-md.ll new file mode 100644 index 00000000000..f1ada67abe5 --- /dev/null +++ b/test/Transforms/ThinLTOBitcodeWriter/no-type-md.ll @@ -0,0 +1,13 @@ +; RUN: opt -thinlto-bc -o %t %s +; RUN: llvm-dis -o - %t | FileCheck %s +; RUN: llvm-bcanalyzer -dump %t | FileCheck --check-prefix=BCA %s + +; BCA: &1 | FileCheck --check-prefix=ERROR %s +; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=M0 %s +; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=M1 %s +; RUN: llvm-bcanalyzer -dump %t0 | FileCheck --check-prefix=BCA0 %s +; RUN: llvm-bcanalyzer -dump %t1 | FileCheck --check-prefix=BCA1 %s + +; ERROR: llvm-modextract: error: module index out of range; bitcode file contains 2 module(s) + +; BCA0: &1 | FileCheck --check-prefix=ERROR %s +; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=M0 %s +; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=M1 %s +; RUN: llvm-bcanalyzer -dump %t0 | FileCheck --check-prefix=BCA0 %s +; RUN: llvm-bcanalyzer -dump %t1 | FileCheck --check-prefix=BCA1 %s + +; ERROR: llvm-modextract: error: module index out of range; bitcode file contains 2 module(s) + +; BCA0: