diff --git a/include/llvm/Bitcode/BitcodeWriter.h b/include/llvm/Bitcode/BitcodeWriter.h index 7c3c4b2e0cb..f8b7fb341e8 100644 --- a/include/llvm/Bitcode/BitcodeWriter.h +++ b/include/llvm/Bitcode/BitcodeWriter.h @@ -28,18 +28,34 @@ namespace llvm { std::unique_ptr Stream; StringTableBuilder StrtabBuilder{StringTableBuilder::RAW}; - bool WroteStrtab = false; + + // Owns any strings created by the irsymtab writer until we create the + // string table. + BumpPtrAllocator Alloc; + + bool WroteStrtab = false, WroteSymtab = false; void writeBlob(unsigned Block, unsigned Record, StringRef Blob); + std::vector Mods; + public: /// Create a BitcodeWriter that writes to Buffer. BitcodeWriter(SmallVectorImpl &Buffer); ~BitcodeWriter(); + /// Attempt to write a symbol table to the bitcode file. This must be called + /// at most once after all modules have been written. + /// + /// A reader does not require a symbol table to interpret a bitcode file; + /// the symbol table is needed only to improve link-time performance. So + /// this function may decide not to write a symbol table. It may so decide + /// if, for example, the target is unregistered or the IR is malformed. + void writeSymtab(); + /// Write the bitcode file's string table. This must be called exactly once - /// after all modules have been written. + /// after all modules and the optional symbol table have been written. void writeStrtab(); /// Copy the string table for another module into this bitcode file. This diff --git a/include/llvm/Bitcode/LLVMBitCodes.h b/include/llvm/Bitcode/LLVMBitCodes.h index 4e3e177cac8..5435e48ff42 100644 --- a/include/llvm/Bitcode/LLVMBitCodes.h +++ b/include/llvm/Bitcode/LLVMBitCodes.h @@ -22,7 +22,7 @@ namespace llvm { namespace bitc { -// The only top-level block types are MODULE, IDENTIFICATION and STRTAB. +// The only top-level block types are MODULE, IDENTIFICATION, STRTAB and SYMTAB. enum BlockIDs { // Blocks MODULE_BLOCK_ID = FIRST_APPLICATION_BLOCKID, @@ -57,6 +57,8 @@ enum BlockIDs { STRTAB_BLOCK_ID, FULL_LTO_GLOBALVAL_SUMMARY_BLOCK_ID, + + SYMTAB_BLOCK_ID, }; /// Identification block contains a string that describes the producer details, @@ -571,6 +573,10 @@ enum StrtabCodes { STRTAB_BLOB = 1, }; +enum SymtabCodes { + SYMTAB_BLOB = 1, +}; + } // End bitc namespace } // End llvm namespace diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp index feeba31908a..b2b1ea6de37 100644 --- a/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -29,10 +29,12 @@ #include "llvm/IR/UseListOrder.h" #include "llvm/IR/ValueSymbolTable.h" #include "llvm/MC/StringTableBuilder.h" +#include "llvm/Object/IRSymtab.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/Program.h" #include "llvm/Support/SHA1.h" +#include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" #include #include @@ -3820,6 +3822,38 @@ void BitcodeWriter::writeBlob(unsigned Block, unsigned Record, StringRef Blob) { Stream->ExitBlock(); } +void BitcodeWriter::writeSymtab() { + assert(!WroteStrtab && !WroteSymtab); + + // If any module has module-level inline asm, we will require a registered asm + // parser for the target so that we can create an accurate symbol table for + // the module. + for (Module *M : Mods) { + if (M->getModuleInlineAsm().empty()) + continue; + + std::string Err; + const Triple TT(M->getTargetTriple()); + const Target *T = TargetRegistry::lookupTarget(TT.str(), Err); + if (!T || !T->hasMCAsmParser()) + return; + } + + WroteSymtab = true; + SmallVector Symtab; + // The irsymtab::build function may be unable to create a symbol table if the + // module is malformed (e.g. it contains an invalid alias). Writing a symbol + // table is not required for correctness, but we still want to be able to + // write malformed modules to bitcode files, so swallow the error. + if (Error E = irsymtab::build(Mods, Symtab, StrtabBuilder, Alloc)) { + consumeError(std::move(E)); + return; + } + + writeBlob(bitc::SYMTAB_BLOCK_ID, bitc::SYMTAB_BLOB, + {Symtab.data(), Symtab.size()}); +} + void BitcodeWriter::writeStrtab() { assert(!WroteStrtab); @@ -3843,6 +3877,15 @@ void BitcodeWriter::writeModule(const Module *M, bool ShouldPreserveUseListOrder, const ModuleSummaryIndex *Index, bool GenerateHash, ModuleHash *ModHash) { + assert(!WroteStrtab); + + // The Mods vector is used by irsymtab::build, which requires non-const + // Modules in case it needs to materialize metadata. But the bitcode writer + // requires that the module is materialized, so we can cast to non-const here, + // after checking that it is in fact materialized. + assert(M->isMaterialized()); + Mods.push_back(const_cast(M)); + ModuleBitcodeWriter ModuleWriter(M, Buffer, StrtabBuilder, *Stream, ShouldPreserveUseListOrder, Index, GenerateHash, ModHash); @@ -3875,6 +3918,7 @@ void llvm::WriteBitcodeToFile(const Module *M, raw_ostream &Out, BitcodeWriter Writer(Buffer); Writer.writeModule(M, ShouldPreserveUseListOrder, Index, GenerateHash, ModHash); + Writer.writeSymtab(); Writer.writeStrtab(); if (TT.isOSDarwin() || TT.isOSBinFormatMachO()) diff --git a/lib/Object/CMakeLists.txt b/lib/Object/CMakeLists.txt index 1d08a9efd8b..fd5e7707c54 100644 --- a/lib/Object/CMakeLists.txt +++ b/lib/Object/CMakeLists.txt @@ -27,4 +27,5 @@ add_llvm_library(LLVMObject DEPENDS intrinsics_gen + llvm_vcsrevision_h ) diff --git a/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp b/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp index 802f470ffe1..8d494fe9cde 100644 --- a/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp +++ b/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp @@ -371,6 +371,7 @@ void splitAndWriteThinLTOBitcode( /*GenerateHash=*/true, &ModHash); W.writeModule(MergedM.get(), /*ShouldPreserveUseListOrder=*/false, &MergedMIndex); + W.writeSymtab(); W.writeStrtab(); OS << Buffer; @@ -385,6 +386,7 @@ void splitAndWriteThinLTOBitcode( /*GenerateHash=*/false, &ModHash); W2.writeModule(MergedM.get(), /*ShouldPreserveUseListOrder=*/false, &MergedMIndex); + W2.writeSymtab(); W2.writeStrtab(); *ThinLinkOS << Buffer; } diff --git a/test/Bitcode/thinlto-alias.ll b/test/Bitcode/thinlto-alias.ll index 2c235f0620e..81fbb767ba9 100644 --- a/test/Bitcode/thinlto-alias.ll +++ b/test/Bitcode/thinlto-alias.ll @@ -18,7 +18,7 @@ ; CHECK-NEXT: ; CHECK: ; CHECK-NEXT: ; CHECK: ; CHECK: ; CHECK: ; CHECK: ; CHECK: blob data = '\x00\x00\x00\x00\x06\x00\x00\x00\x08\x00\x00\x00D\x00\x00\x00\x01\x00\x00\x00P\x00\x00\x00\x00\x00\x00\x00P\x00\x00\x00\x02\x00\x00\x00\x80\x00\x00\x00\x00\x00\x00\x00\x0E\x00\x00\x00\x18\x00\x00\x00&\x00\x00\x00\x0B\x00\x00\x001\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\xFF\xFF\xFF\xFF\x00$\x00\x00\x03\x00\x00\x00\x03\x00\x00\x00\x03\x00\x00\x00\x03\x00\x00\x00\xFF\xFF\xFF\xFF\x08$\x00\x00' +; BCA-NEXT: +; BCA-NEXT: blob data = 'foobarproducerx86_64-unknown-linux-gnuirsymtab.ll' +; BCA-NEXT: + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" +source_filename = "irsymtab.ll" + +define void @foo() { + ret void +} + +declare void @bar() diff --git a/test/ThinLTO/X86/autoupgrade.ll b/test/ThinLTO/X86/autoupgrade.ll index cbbe833d262..2188d031c43 100644 --- a/test/ThinLTO/X86/autoupgrade.ll +++ b/test/ThinLTO/X86/autoupgrade.ll @@ -10,7 +10,7 @@ ; RUN: | llvm-bcanalyzer -dump | FileCheck %s ; CHECK: