diff --git a/include/llvm/Bitcode/BitcodeReader.h b/include/llvm/Bitcode/BitcodeReader.h index 2fd19ffdbab..30ae4abfa96 100644 --- a/include/llvm/Bitcode/BitcodeReader.h +++ b/include/llvm/Bitcode/BitcodeReader.h @@ -66,6 +66,8 @@ namespace llvm { bool ShouldLazyLoadMetadata); public: + ArrayRef getBuffer() const { return Buffer; } + /// Read the bitcode module and prepare for lazy deserialization of function /// bodies. If ShouldLazyLoadMetadata is true, lazily load metadata as well. Expected> diff --git a/include/llvm/Bitcode/BitcodeWriter.h b/include/llvm/Bitcode/BitcodeWriter.h index 57955dfcef0..4f72f98bbf9 100644 --- a/include/llvm/Bitcode/BitcodeWriter.h +++ b/include/llvm/Bitcode/BitcodeWriter.h @@ -18,9 +18,36 @@ #include namespace llvm { + class BitstreamWriter; class Module; class raw_ostream; + class BitcodeWriter { + SmallVectorImpl &Buffer; + std::unique_ptr Stream; + + public: + /// Create a BitcodeWriter that writes to Buffer. + BitcodeWriter(SmallVectorImpl &Buffer); + + ~BitcodeWriter(); + + /// Write the specified module to the buffer specified at construction time. + /// + /// If \c ShouldPreserveUseListOrder, encode the use-list order for each \a + /// Value in \c M. These will be reconstructed exactly when \a M is + /// deserialized. + /// + /// If \c Index is supplied, the bitcode will contain the summary index + /// (currently for use in ThinLTO optimization). + /// + /// \p GenerateHash enables hashing the Module and including the hash in the + /// bitcode (currently for use in ThinLTO incremental build). + void writeModule(const Module *M, bool ShouldPreserveUseListOrder = false, + const ModuleSummaryIndex *Index = nullptr, + bool GenerateHash = false); + }; + /// \brief Write the specified module to the specified raw output stream. /// /// For streams where it matters, the given stream should be in "binary" diff --git a/include/llvm/Bitcode/BitstreamWriter.h b/include/llvm/Bitcode/BitstreamWriter.h index 707b2105c16..b79e88d2c05 100644 --- a/include/llvm/Bitcode/BitstreamWriter.h +++ b/include/llvm/Bitcode/BitstreamWriter.h @@ -509,6 +509,7 @@ public: void EnterBlockInfoBlock() { EnterSubblock(bitc::BLOCKINFO_BLOCK_ID, 2); BlockInfoCurBID = ~0U; + BlockInfoRecords.clear(); } private: /// SwitchToBlockID - If we aren't already talking about the specified block diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp index c62cd1f2042..c324100e1b7 100644 --- a/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -65,36 +65,20 @@ enum { }; /// Abstract class to manage the bitcode writing, subclassed for each bitcode -/// file type. Owns the BitstreamWriter, and includes the main entry point for -/// writing. -class BitcodeWriter { +/// file type. +class BitcodeWriterBase { protected: - /// Pointer to the buffer allocated by caller for bitcode writing. - const SmallVectorImpl &Buffer; - - /// The stream created and owned by the BitodeWriter. - BitstreamWriter Stream; + /// The stream created and owned by the client. + BitstreamWriter &Stream; /// Saves the offset of the VSTOffset record that must eventually be /// backpatched with the offset of the actual VST. uint64_t VSTOffsetPlaceholder = 0; public: - /// Constructs a BitcodeWriter object, and initializes a BitstreamRecord, - /// writing to the provided \p Buffer. - BitcodeWriter(SmallVectorImpl &Buffer) - : Buffer(Buffer), Stream(Buffer) {} - - virtual ~BitcodeWriter() = default; - - /// Main entry point to write the bitcode file, which writes the bitcode - /// header and will then invoke the virtual writeBlocks() method. - void write(); - -private: - /// Derived classes must implement this to write the corresponding blocks for - /// that bitcode file type. - virtual void writeBlocks() = 0; + /// Constructs a BitcodeWriterBase object that writes to the provided + /// \p Stream. + BitcodeWriterBase(BitstreamWriter &Stream) : Stream(Stream) {} protected: bool hasVSTOffsetPlaceholder() { return VSTOffsetPlaceholder != 0; } @@ -103,7 +87,10 @@ protected: }; /// Class to manage the bitcode writing for a module. -class ModuleBitcodeWriter : public BitcodeWriter { +class ModuleBitcodeWriter : public BitcodeWriterBase { + /// Pointer to the buffer allocated by caller for bitcode writing. + const SmallVectorImpl &Buffer; + /// The Module to write to bitcode. const Module &M; @@ -116,8 +103,8 @@ class ModuleBitcodeWriter : public BitcodeWriter { /// True if a module hash record should be written. bool GenerateHash; - /// The start bit of the module block, for use in generating a module hash - uint64_t BitcodeStartBit = 0; + /// The start bit of the identification block. + uint64_t BitcodeStartBit; /// Map that holds the correspondence between GUIDs in the summary index, /// that came from indirect call profiles, and a value id generated by this @@ -131,16 +118,11 @@ public: /// Constructs a ModuleBitcodeWriter object for the given Module, /// writing to the provided \p Buffer. ModuleBitcodeWriter(const Module *M, SmallVectorImpl &Buffer, - bool ShouldPreserveUseListOrder, + BitstreamWriter &Stream, bool ShouldPreserveUseListOrder, const ModuleSummaryIndex *Index, bool GenerateHash) - : BitcodeWriter(Buffer), M(*M), VE(*M, ShouldPreserveUseListOrder), - Index(Index), GenerateHash(GenerateHash) { - // Save the start bit of the actual bitcode, in case there is space - // saved at the start for the darwin header above. The reader stream - // will start at the bitcode, and we need the offset of the VST - // to line up. - BitcodeStartBit = Stream.GetCurrentBitNo(); - + : BitcodeWriterBase(Stream), Buffer(Buffer), M(*M), + VE(*M, ShouldPreserveUseListOrder), Index(Index), + GenerateHash(GenerateHash), BitcodeStartBit(Stream.GetCurrentBitNo()) { // Assign ValueIds to any callee values in the index that came from // indirect call profiles and were recorded as a GUID not a Value* // (which would have been assigned an ID by the ValueEnumerator). @@ -162,21 +144,12 @@ public: assignValueId(CallEdge.first.getGUID()); } -private: - /// Main entry point for writing a module to bitcode, invoked by - /// BitcodeWriter::write() after it writes the header. - void writeBlocks() override; - - /// Create the "IDENTIFICATION_BLOCK_ID" containing a single string with the - /// current llvm version, and a record for the epoch number. - void writeIdentificationBlock(); - /// Emit the current module to the bitstream. - void writeModule(); + void write(); +private: uint64_t bitcodeStartBit() { return BitcodeStartBit; } - void writeStringRecord(unsigned Code, StringRef Str, unsigned AbbrevToUse); void writeAttributeGroupTable(); void writeAttributeTable(); void writeTypeTable(); @@ -310,7 +283,7 @@ private: }; /// Class to manage the bitcode writing for a combined index. -class IndexBitcodeWriter : public BitcodeWriter { +class IndexBitcodeWriter : public BitcodeWriterBase { /// The combined index to write to bitcode. const ModuleSummaryIndex &Index; @@ -329,11 +302,10 @@ public: /// Constructs a IndexBitcodeWriter object for the given combined index, /// writing to the provided \p Buffer. When writing a subset of the index /// for a distributed backend, provide a \p ModuleToSummariesForIndex map. - IndexBitcodeWriter(SmallVectorImpl &Buffer, - const ModuleSummaryIndex &Index, + IndexBitcodeWriter(BitstreamWriter &Stream, const ModuleSummaryIndex &Index, const std::map *ModuleToSummariesForIndex = nullptr) - : BitcodeWriter(Buffer), Index(Index), + : BitcodeWriterBase(Stream), Index(Index), ModuleToSummariesForIndex(ModuleToSummariesForIndex) { // Assign unique value ids to all summaries to be written, for use // in writing out the call graph edges. Save the mapping from GUID @@ -480,11 +452,10 @@ public: /// Obtain the end iterator over the summaries to be written. iterator end() { return iterator(*this, /*IsAtEnd=*/true); } -private: - /// Main entry point for writing a combined index to bitcode, invoked by - /// BitcodeWriter::write() after it writes the header. - void writeBlocks() override; + /// Main entry point for writing a combined index to bitcode. + void write(); +private: void writeIndex(); void writeModStrings(); void writeCombinedValueSymbolTable(); @@ -597,8 +568,8 @@ static unsigned getEncodedSynchScope(SynchronizationScope SynchScope) { llvm_unreachable("Invalid synch scope"); } -void ModuleBitcodeWriter::writeStringRecord(unsigned Code, StringRef Str, - unsigned AbbrevToUse) { +static void writeStringRecord(BitstreamWriter &Stream, unsigned Code, + StringRef Str, unsigned AbbrevToUse) { SmallVector Vals; // Code: [strchar x N] @@ -922,7 +893,7 @@ void ModuleBitcodeWriter::writeTypeTable() { // Emit the name if it is present. if (!ST->getName().empty()) - writeStringRecord(bitc::TYPE_CODE_STRUCT_NAME, ST->getName(), + writeStringRecord(Stream, bitc::TYPE_CODE_STRUCT_NAME, ST->getName(), StructNameAbbrev); } break; @@ -1073,7 +1044,7 @@ void ModuleBitcodeWriter::writeComdats() { /// Write a record that will eventually hold the word offset of the /// module-level VST. For now the offset is 0, which will be backpatched /// after the real VST is written. Saves the bit offset to backpatch. -void BitcodeWriter::writeValueSymbolTableForwardDecl() { +void BitcodeWriterBase::writeValueSymbolTableForwardDecl() { // Write a placeholder value in for the offset of the real VST, // which is written after the function blocks so that it can include // the offset of each function. The placeholder offset will be @@ -1120,13 +1091,13 @@ static StringEncoding getStringEncoding(const char *Str, unsigned StrLen) { void ModuleBitcodeWriter::writeModuleInfo() { // Emit various pieces of data attached to a module. if (!M.getTargetTriple().empty()) - writeStringRecord(bitc::MODULE_CODE_TRIPLE, M.getTargetTriple(), + writeStringRecord(Stream, bitc::MODULE_CODE_TRIPLE, M.getTargetTriple(), 0 /*TODO*/); const std::string &DL = M.getDataLayoutStr(); if (!DL.empty()) - writeStringRecord(bitc::MODULE_CODE_DATALAYOUT, DL, 0 /*TODO*/); + writeStringRecord(Stream, bitc::MODULE_CODE_DATALAYOUT, DL, 0 /*TODO*/); if (!M.getModuleInlineAsm().empty()) - writeStringRecord(bitc::MODULE_CODE_ASM, M.getModuleInlineAsm(), + writeStringRecord(Stream, bitc::MODULE_CODE_ASM, M.getModuleInlineAsm(), 0 /*TODO*/); // Emit information about sections and GC, computing how many there are. Also @@ -1142,7 +1113,7 @@ void ModuleBitcodeWriter::writeModuleInfo() { // Give section names unique ID's. unsigned &Entry = SectionMap[GV.getSection()]; if (!Entry) { - writeStringRecord(bitc::MODULE_CODE_SECTIONNAME, GV.getSection(), + writeStringRecord(Stream, bitc::MODULE_CODE_SECTIONNAME, GV.getSection(), 0 /*TODO*/); Entry = SectionMap.size(); } @@ -1154,7 +1125,7 @@ void ModuleBitcodeWriter::writeModuleInfo() { // Give section names unique ID's. unsigned &Entry = SectionMap[F.getSection()]; if (!Entry) { - writeStringRecord(bitc::MODULE_CODE_SECTIONNAME, F.getSection(), + writeStringRecord(Stream, bitc::MODULE_CODE_SECTIONNAME, F.getSection(), 0 /*TODO*/); Entry = SectionMap.size(); } @@ -1163,7 +1134,8 @@ void ModuleBitcodeWriter::writeModuleInfo() { // Same for GC names. unsigned &Entry = GCMap[F.getGC()]; if (!Entry) { - writeStringRecord(bitc::MODULE_CODE_GCNAME, F.getGC(), 0 /*TODO*/); + writeStringRecord(Stream, bitc::MODULE_CODE_GCNAME, F.getGC(), + 0 /*TODO*/); Entry = GCMap.size(); } } @@ -2761,11 +2733,13 @@ void ModuleBitcodeWriter::writeValueSymbolTable( // Get the offset of the VST we are writing, and backpatch it into // the VST forward declaration record. uint64_t VSTOffset = Stream.GetCurrentBitNo(); - // The BitcodeStartBit was the stream offset of the actual bitcode - // (e.g. excluding any initial darwin header). + // The BitcodeStartBit was the stream offset of the identification block. VSTOffset -= bitcodeStartBit(); assert((VSTOffset & 31) == 0 && "VST block not 32-bit aligned"); - Stream.BackpatchWord(VSTOffsetPlaceholder, VSTOffset / 32); + // Note that we add 1 here because the offset is relative to one word + // before the start of the identification block, which was historically + // always the start of the regular bitcode header. + Stream.BackpatchWord(VSTOffsetPlaceholder, VSTOffset / 32 + 1); } Stream.EnterSubblock(bitc::VALUE_SYMTAB_BLOCK_ID, 4); @@ -2853,7 +2827,10 @@ void ModuleBitcodeWriter::writeValueSymbolTable( // actual bitcode written to the stream). uint64_t BitcodeIndex = (*FunctionToBitcodeIndex)[F] - bitcodeStartBit(); assert((BitcodeIndex & 31) == 0 && "function block not 32-bit aligned"); - NameVals.push_back(BitcodeIndex / 32); + // Note that we add 1 here because the offset is relative to one word + // before the start of the identification block, which was historically + // always the start of the regular bitcode header. + NameVals.push_back(BitcodeIndex / 32 + 1); Code = bitc::VST_CODE_FNENTRY; AbbrevToUse = FnEntry8BitAbbrev; @@ -3617,7 +3594,9 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() { Stream.ExitBlock(); } -void ModuleBitcodeWriter::writeIdentificationBlock() { +/// Create the "IDENTIFICATION_BLOCK_ID" containing a single string with the +/// current llvm version, and a record for the epoch number. +void writeIdentificationBlock(BitstreamWriter &Stream) { Stream.EnterSubblock(bitc::IDENTIFICATION_BLOCK_ID, 5); // Write the "user readable" string identifying the bitcode producer @@ -3626,7 +3605,7 @@ void ModuleBitcodeWriter::writeIdentificationBlock() { Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Char6)); auto StringAbbrev = Stream.EmitAbbrev(Abbv); - writeStringRecord(bitc::IDENTIFICATION_CODE_STRING, + writeStringRecord(Stream, bitc::IDENTIFICATION_CODE_STRING, "LLVM" LLVM_VERSION_STRING, StringAbbrev); // Write the epoch version @@ -3655,24 +3634,9 @@ void ModuleBitcodeWriter::writeModuleHash(size_t BlockStartPos) { Stream.EmitRecord(bitc::MODULE_CODE_HASH, Vals); } -void BitcodeWriter::write() { - // Emit the file header first. - writeBitcodeHeader(); +void ModuleBitcodeWriter::write() { + writeIdentificationBlock(Stream); - writeBlocks(); -} - -void ModuleBitcodeWriter::writeBlocks() { - writeIdentificationBlock(); - writeModule(); -} - -void IndexBitcodeWriter::writeBlocks() { - // Index contains only a single outer (module) block. - writeIndex(); -} - -void ModuleBitcodeWriter::writeModule() { Stream.EnterSubblock(bitc::MODULE_BLOCK_ID, 3); size_t BlockStartPos = Buffer.size(); @@ -3801,7 +3765,7 @@ static void emitDarwinBCHeaderAndTrailer(SmallVectorImpl &Buffer, } /// Helper to write the header common to all bitcode files. -void BitcodeWriter::writeBitcodeHeader() { +static void writeBitcodeHeader(BitstreamWriter &Stream) { // Emit the file header. Stream.Emit((unsigned)'B', 8); Stream.Emit((unsigned)'C', 8); @@ -3811,6 +3775,22 @@ void BitcodeWriter::writeBitcodeHeader() { Stream.Emit(0xD, 4); } +BitcodeWriter::BitcodeWriter(SmallVectorImpl &Buffer) + : Buffer(Buffer), Stream(new BitstreamWriter(Buffer)) { + writeBitcodeHeader(*Stream); +} + +BitcodeWriter::~BitcodeWriter() = default; + +void BitcodeWriter::writeModule(const Module *M, + bool ShouldPreserveUseListOrder, + const ModuleSummaryIndex *Index, + bool GenerateHash) { + ModuleBitcodeWriter ModuleWriter( + M, Buffer, *Stream, ShouldPreserveUseListOrder, Index, GenerateHash); + ModuleWriter.write(); +} + /// WriteBitcodeToFile - Write the specified module to the specified output /// stream. void llvm::WriteBitcodeToFile(const Module *M, raw_ostream &Out, @@ -3826,10 +3806,8 @@ void llvm::WriteBitcodeToFile(const Module *M, raw_ostream &Out, if (TT.isOSDarwin() || TT.isOSBinFormatMachO()) Buffer.insert(Buffer.begin(), BWH_HeaderSize, 0); - // Emit the module into the buffer. - ModuleBitcodeWriter ModuleWriter(M, Buffer, ShouldPreserveUseListOrder, Index, - GenerateHash); - ModuleWriter.write(); + BitcodeWriter Writer(Buffer); + Writer.writeModule(M, ShouldPreserveUseListOrder, Index, GenerateHash); if (TT.isOSDarwin() || TT.isOSBinFormatMachO()) emitDarwinBCHeaderAndTrailer(Buffer, TT); @@ -3838,7 +3816,7 @@ void llvm::WriteBitcodeToFile(const Module *M, raw_ostream &Out, Out.write((char*)&Buffer.front(), Buffer.size()); } -void IndexBitcodeWriter::writeIndex() { +void IndexBitcodeWriter::write() { Stream.EnterSubblock(bitc::MODULE_BLOCK_ID, 3); SmallVector Vals; @@ -3872,7 +3850,10 @@ void llvm::WriteIndexToFile( SmallVector Buffer; Buffer.reserve(256 * 1024); - IndexBitcodeWriter IndexWriter(Buffer, Index, ModuleToSummariesForIndex); + BitstreamWriter Stream(Buffer); + writeBitcodeHeader(Stream); + + IndexBitcodeWriter IndexWriter(Stream, Index, ModuleToSummariesForIndex); IndexWriter.write(); Out.write((char *)&Buffer.front(), Buffer.size()); diff --git a/test/Bitcode/Inputs/multi-module.ll b/test/Bitcode/Inputs/multi-module.ll new file mode 100644 index 00000000000..e4e9b82be64 --- /dev/null +++ b/test/Bitcode/Inputs/multi-module.ll @@ -0,0 +1,3 @@ +define void @f2() { + ret void +} diff --git a/test/Bitcode/multi-module.ll b/test/Bitcode/multi-module.ll new file mode 100644 index 00000000000..a6c25c7cdbe --- /dev/null +++ b/test/Bitcode/multi-module.ll @@ -0,0 +1,39 @@ +; RUN: llvm-cat -o %t %s %S/Inputs/multi-module.ll +; RUN: not llvm-dis -o - %t 2>&1 | FileCheck --check-prefix=ERROR %s +; ERROR: Expected a single module + +; FIXME: Introduce a tool for extracting modules from bitcode and use it here. +; For now we can at least check that the bitcode contains multiple modules. +; RUN: llvm-bcanalyzer -dump %t | FileCheck --check-prefix=BCA %s + +; RUN: llvm-as -o %t1 %s +; RUN: llvm-as -o %t2 %S/Inputs/multi-module.ll +; RUN: llvm-cat -o %t %t1 %t2 +; RUN: not llvm-dis -o - %t 2>&1 | FileCheck --check-prefix=ERROR %s +; RUN: llvm-bcanalyzer -dump %t | FileCheck --check-prefix=BCA %s + +; RUN: llvm-cat -b -o %t %t1 %t2 +; RUN: not llvm-dis -o - %t 2>&1 | FileCheck --check-prefix=ERROR %s +; RUN: llvm-bcanalyzer -dump %t | FileCheck --check-prefix=BCA %s + +; RUN: llvm-cat -b -o %t3 %t %t +; RUN: not llvm-dis -o - %t3 2>&1 | FileCheck --check-prefix=ERROR %s +; RUN: llvm-bcanalyzer -dump %t3 | FileCheck --check-prefix=BCA4 %s + +; BCA: + BinaryCat("b", cl::desc("Whether to perform binary concatenation")); + +static cl::opt OutputFilename("o", cl::Required, + cl::desc("Output filename"), + cl::value_desc("filename")); + +static cl::list InputFilenames(cl::Positional, cl::OneOrMore, + cl::desc("")); + +int main(int argc, char **argv) { + cl::ParseCommandLineOptions(argc, argv, "Module concatenation"); + + ExitOnError ExitOnErr("llvm-cat: "); + LLVMContext Context; + + SmallVector Buffer; + BitcodeWriter Writer(Buffer); + if (BinaryCat) { + for (std::string InputFilename : InputFilenames) { + std::unique_ptr MB = ExitOnErr( + errorOrToExpected(MemoryBuffer::getFileOrSTDIN(InputFilename))); + std::vector Mods = ExitOnErr(getBitcodeModuleList(*MB)); + for (auto &BitcodeMod : Mods) + Buffer.insert(Buffer.end(), BitcodeMod.getBuffer().begin(), + BitcodeMod.getBuffer().end()); + } + } else { + for (std::string InputFilename : InputFilenames) { + SMDiagnostic Err; + std::unique_ptr M = parseIRFile(InputFilename, Err, Context); + if (!M) { + Err.print(argv[0], errs()); + return 1; + } + Writer.writeModule(M.get()); + } + } + + std::error_code EC; + raw_fd_ostream OS(OutputFilename, EC, sys::fs::OpenFlags::F_None); + if (EC) { + llvm::errs() << argv[0] << ": cannot open " << OutputFilename + << " for writing: " << EC.message(); + return 1; + } + + OS.write(Buffer.data(), Buffer.size()); + return 0; +}