Bitcode: Introduce BitcodeWriter interface.

This interface allows clients to write multiple modules to a single
bitcode file. Also introduce the llvm-cat utility which can be used
to create a bitcode file containing multiple modules.

Differential Revision: https://reviews.llvm.org/D26179

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@288195 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Peter Collingbourne 2016-11-29 20:43:47 +00:00
parent c83371f0eb
commit e6480e2add
11 changed files with 254 additions and 94 deletions

View File

@ -66,6 +66,8 @@ namespace llvm {
bool ShouldLazyLoadMetadata);
public:
ArrayRef<uint8_t> getBuffer() const { return Buffer; }
/// Read the bitcode module and prepare for lazy deserialization of function
/// bodies. If ShouldLazyLoadMetadata is true, lazily load metadata as well.
Expected<std::unique_ptr<Module>>

View File

@ -18,9 +18,36 @@
#include <string>
namespace llvm {
class BitstreamWriter;
class Module;
class raw_ostream;
class BitcodeWriter {
SmallVectorImpl<char> &Buffer;
std::unique_ptr<BitstreamWriter> Stream;
public:
/// Create a BitcodeWriter that writes to Buffer.
BitcodeWriter(SmallVectorImpl<char> &Buffer);
~BitcodeWriter();
/// Write the specified module to the buffer specified at construction time.
///
/// If \c ShouldPreserveUseListOrder, encode the use-list order for each \a
/// Value in \c M. These will be reconstructed exactly when \a M is
/// deserialized.
///
/// If \c Index is supplied, the bitcode will contain the summary index
/// (currently for use in ThinLTO optimization).
///
/// \p GenerateHash enables hashing the Module and including the hash in the
/// bitcode (currently for use in ThinLTO incremental build).
void writeModule(const Module *M, bool ShouldPreserveUseListOrder = false,
const ModuleSummaryIndex *Index = nullptr,
bool GenerateHash = false);
};
/// \brief Write the specified module to the specified raw output stream.
///
/// For streams where it matters, the given stream should be in "binary"

View File

@ -509,6 +509,7 @@ public:
void EnterBlockInfoBlock() {
EnterSubblock(bitc::BLOCKINFO_BLOCK_ID, 2);
BlockInfoCurBID = ~0U;
BlockInfoRecords.clear();
}
private:
/// SwitchToBlockID - If we aren't already talking about the specified block

View File

@ -65,36 +65,20 @@ enum {
};
/// Abstract class to manage the bitcode writing, subclassed for each bitcode
/// file type. Owns the BitstreamWriter, and includes the main entry point for
/// writing.
class BitcodeWriter {
/// file type.
class BitcodeWriterBase {
protected:
/// Pointer to the buffer allocated by caller for bitcode writing.
const SmallVectorImpl<char> &Buffer;
/// The stream created and owned by the BitodeWriter.
BitstreamWriter Stream;
/// The stream created and owned by the client.
BitstreamWriter &Stream;
/// Saves the offset of the VSTOffset record that must eventually be
/// backpatched with the offset of the actual VST.
uint64_t VSTOffsetPlaceholder = 0;
public:
/// Constructs a BitcodeWriter object, and initializes a BitstreamRecord,
/// writing to the provided \p Buffer.
BitcodeWriter(SmallVectorImpl<char> &Buffer)
: Buffer(Buffer), Stream(Buffer) {}
virtual ~BitcodeWriter() = default;
/// Main entry point to write the bitcode file, which writes the bitcode
/// header and will then invoke the virtual writeBlocks() method.
void write();
private:
/// Derived classes must implement this to write the corresponding blocks for
/// that bitcode file type.
virtual void writeBlocks() = 0;
/// Constructs a BitcodeWriterBase object that writes to the provided
/// \p Stream.
BitcodeWriterBase(BitstreamWriter &Stream) : Stream(Stream) {}
protected:
bool hasVSTOffsetPlaceholder() { return VSTOffsetPlaceholder != 0; }
@ -103,7 +87,10 @@ protected:
};
/// Class to manage the bitcode writing for a module.
class ModuleBitcodeWriter : public BitcodeWriter {
class ModuleBitcodeWriter : public BitcodeWriterBase {
/// Pointer to the buffer allocated by caller for bitcode writing.
const SmallVectorImpl<char> &Buffer;
/// The Module to write to bitcode.
const Module &M;
@ -116,8 +103,8 @@ class ModuleBitcodeWriter : public BitcodeWriter {
/// True if a module hash record should be written.
bool GenerateHash;
/// The start bit of the module block, for use in generating a module hash
uint64_t BitcodeStartBit = 0;
/// The start bit of the identification block.
uint64_t BitcodeStartBit;
/// Map that holds the correspondence between GUIDs in the summary index,
/// that came from indirect call profiles, and a value id generated by this
@ -131,16 +118,11 @@ public:
/// Constructs a ModuleBitcodeWriter object for the given Module,
/// writing to the provided \p Buffer.
ModuleBitcodeWriter(const Module *M, SmallVectorImpl<char> &Buffer,
bool ShouldPreserveUseListOrder,
BitstreamWriter &Stream, bool ShouldPreserveUseListOrder,
const ModuleSummaryIndex *Index, bool GenerateHash)
: BitcodeWriter(Buffer), M(*M), VE(*M, ShouldPreserveUseListOrder),
Index(Index), GenerateHash(GenerateHash) {
// Save the start bit of the actual bitcode, in case there is space
// saved at the start for the darwin header above. The reader stream
// will start at the bitcode, and we need the offset of the VST
// to line up.
BitcodeStartBit = Stream.GetCurrentBitNo();
: BitcodeWriterBase(Stream), Buffer(Buffer), M(*M),
VE(*M, ShouldPreserveUseListOrder), Index(Index),
GenerateHash(GenerateHash), BitcodeStartBit(Stream.GetCurrentBitNo()) {
// Assign ValueIds to any callee values in the index that came from
// indirect call profiles and were recorded as a GUID not a Value*
// (which would have been assigned an ID by the ValueEnumerator).
@ -162,21 +144,12 @@ public:
assignValueId(CallEdge.first.getGUID());
}
private:
/// Main entry point for writing a module to bitcode, invoked by
/// BitcodeWriter::write() after it writes the header.
void writeBlocks() override;
/// Create the "IDENTIFICATION_BLOCK_ID" containing a single string with the
/// current llvm version, and a record for the epoch number.
void writeIdentificationBlock();
/// Emit the current module to the bitstream.
void writeModule();
void write();
private:
uint64_t bitcodeStartBit() { return BitcodeStartBit; }
void writeStringRecord(unsigned Code, StringRef Str, unsigned AbbrevToUse);
void writeAttributeGroupTable();
void writeAttributeTable();
void writeTypeTable();
@ -310,7 +283,7 @@ private:
};
/// Class to manage the bitcode writing for a combined index.
class IndexBitcodeWriter : public BitcodeWriter {
class IndexBitcodeWriter : public BitcodeWriterBase {
/// The combined index to write to bitcode.
const ModuleSummaryIndex &Index;
@ -329,11 +302,10 @@ public:
/// Constructs a IndexBitcodeWriter object for the given combined index,
/// writing to the provided \p Buffer. When writing a subset of the index
/// for a distributed backend, provide a \p ModuleToSummariesForIndex map.
IndexBitcodeWriter(SmallVectorImpl<char> &Buffer,
const ModuleSummaryIndex &Index,
IndexBitcodeWriter(BitstreamWriter &Stream, const ModuleSummaryIndex &Index,
const std::map<std::string, GVSummaryMapTy>
*ModuleToSummariesForIndex = nullptr)
: BitcodeWriter(Buffer), Index(Index),
: BitcodeWriterBase(Stream), Index(Index),
ModuleToSummariesForIndex(ModuleToSummariesForIndex) {
// Assign unique value ids to all summaries to be written, for use
// in writing out the call graph edges. Save the mapping from GUID
@ -480,11 +452,10 @@ public:
/// Obtain the end iterator over the summaries to be written.
iterator end() { return iterator(*this, /*IsAtEnd=*/true); }
private:
/// Main entry point for writing a combined index to bitcode, invoked by
/// BitcodeWriter::write() after it writes the header.
void writeBlocks() override;
/// Main entry point for writing a combined index to bitcode.
void write();
private:
void writeIndex();
void writeModStrings();
void writeCombinedValueSymbolTable();
@ -597,8 +568,8 @@ static unsigned getEncodedSynchScope(SynchronizationScope SynchScope) {
llvm_unreachable("Invalid synch scope");
}
void ModuleBitcodeWriter::writeStringRecord(unsigned Code, StringRef Str,
unsigned AbbrevToUse) {
static void writeStringRecord(BitstreamWriter &Stream, unsigned Code,
StringRef Str, unsigned AbbrevToUse) {
SmallVector<unsigned, 64> Vals;
// Code: [strchar x N]
@ -922,7 +893,7 @@ void ModuleBitcodeWriter::writeTypeTable() {
// Emit the name if it is present.
if (!ST->getName().empty())
writeStringRecord(bitc::TYPE_CODE_STRUCT_NAME, ST->getName(),
writeStringRecord(Stream, bitc::TYPE_CODE_STRUCT_NAME, ST->getName(),
StructNameAbbrev);
}
break;
@ -1073,7 +1044,7 @@ void ModuleBitcodeWriter::writeComdats() {
/// Write a record that will eventually hold the word offset of the
/// module-level VST. For now the offset is 0, which will be backpatched
/// after the real VST is written. Saves the bit offset to backpatch.
void BitcodeWriter::writeValueSymbolTableForwardDecl() {
void BitcodeWriterBase::writeValueSymbolTableForwardDecl() {
// Write a placeholder value in for the offset of the real VST,
// which is written after the function blocks so that it can include
// the offset of each function. The placeholder offset will be
@ -1120,13 +1091,13 @@ static StringEncoding getStringEncoding(const char *Str, unsigned StrLen) {
void ModuleBitcodeWriter::writeModuleInfo() {
// Emit various pieces of data attached to a module.
if (!M.getTargetTriple().empty())
writeStringRecord(bitc::MODULE_CODE_TRIPLE, M.getTargetTriple(),
writeStringRecord(Stream, bitc::MODULE_CODE_TRIPLE, M.getTargetTriple(),
0 /*TODO*/);
const std::string &DL = M.getDataLayoutStr();
if (!DL.empty())
writeStringRecord(bitc::MODULE_CODE_DATALAYOUT, DL, 0 /*TODO*/);
writeStringRecord(Stream, bitc::MODULE_CODE_DATALAYOUT, DL, 0 /*TODO*/);
if (!M.getModuleInlineAsm().empty())
writeStringRecord(bitc::MODULE_CODE_ASM, M.getModuleInlineAsm(),
writeStringRecord(Stream, bitc::MODULE_CODE_ASM, M.getModuleInlineAsm(),
0 /*TODO*/);
// Emit information about sections and GC, computing how many there are. Also
@ -1142,7 +1113,7 @@ void ModuleBitcodeWriter::writeModuleInfo() {
// Give section names unique ID's.
unsigned &Entry = SectionMap[GV.getSection()];
if (!Entry) {
writeStringRecord(bitc::MODULE_CODE_SECTIONNAME, GV.getSection(),
writeStringRecord(Stream, bitc::MODULE_CODE_SECTIONNAME, GV.getSection(),
0 /*TODO*/);
Entry = SectionMap.size();
}
@ -1154,7 +1125,7 @@ void ModuleBitcodeWriter::writeModuleInfo() {
// Give section names unique ID's.
unsigned &Entry = SectionMap[F.getSection()];
if (!Entry) {
writeStringRecord(bitc::MODULE_CODE_SECTIONNAME, F.getSection(),
writeStringRecord(Stream, bitc::MODULE_CODE_SECTIONNAME, F.getSection(),
0 /*TODO*/);
Entry = SectionMap.size();
}
@ -1163,7 +1134,8 @@ void ModuleBitcodeWriter::writeModuleInfo() {
// Same for GC names.
unsigned &Entry = GCMap[F.getGC()];
if (!Entry) {
writeStringRecord(bitc::MODULE_CODE_GCNAME, F.getGC(), 0 /*TODO*/);
writeStringRecord(Stream, bitc::MODULE_CODE_GCNAME, F.getGC(),
0 /*TODO*/);
Entry = GCMap.size();
}
}
@ -2761,11 +2733,13 @@ void ModuleBitcodeWriter::writeValueSymbolTable(
// Get the offset of the VST we are writing, and backpatch it into
// the VST forward declaration record.
uint64_t VSTOffset = Stream.GetCurrentBitNo();
// The BitcodeStartBit was the stream offset of the actual bitcode
// (e.g. excluding any initial darwin header).
// The BitcodeStartBit was the stream offset of the identification block.
VSTOffset -= bitcodeStartBit();
assert((VSTOffset & 31) == 0 && "VST block not 32-bit aligned");
Stream.BackpatchWord(VSTOffsetPlaceholder, VSTOffset / 32);
// Note that we add 1 here because the offset is relative to one word
// before the start of the identification block, which was historically
// always the start of the regular bitcode header.
Stream.BackpatchWord(VSTOffsetPlaceholder, VSTOffset / 32 + 1);
}
Stream.EnterSubblock(bitc::VALUE_SYMTAB_BLOCK_ID, 4);
@ -2853,7 +2827,10 @@ void ModuleBitcodeWriter::writeValueSymbolTable(
// actual bitcode written to the stream).
uint64_t BitcodeIndex = (*FunctionToBitcodeIndex)[F] - bitcodeStartBit();
assert((BitcodeIndex & 31) == 0 && "function block not 32-bit aligned");
NameVals.push_back(BitcodeIndex / 32);
// Note that we add 1 here because the offset is relative to one word
// before the start of the identification block, which was historically
// always the start of the regular bitcode header.
NameVals.push_back(BitcodeIndex / 32 + 1);
Code = bitc::VST_CODE_FNENTRY;
AbbrevToUse = FnEntry8BitAbbrev;
@ -3617,7 +3594,9 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
Stream.ExitBlock();
}
void ModuleBitcodeWriter::writeIdentificationBlock() {
/// Create the "IDENTIFICATION_BLOCK_ID" containing a single string with the
/// current llvm version, and a record for the epoch number.
void writeIdentificationBlock(BitstreamWriter &Stream) {
Stream.EnterSubblock(bitc::IDENTIFICATION_BLOCK_ID, 5);
// Write the "user readable" string identifying the bitcode producer
@ -3626,7 +3605,7 @@ void ModuleBitcodeWriter::writeIdentificationBlock() {
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Char6));
auto StringAbbrev = Stream.EmitAbbrev(Abbv);
writeStringRecord(bitc::IDENTIFICATION_CODE_STRING,
writeStringRecord(Stream, bitc::IDENTIFICATION_CODE_STRING,
"LLVM" LLVM_VERSION_STRING, StringAbbrev);
// Write the epoch version
@ -3655,24 +3634,9 @@ void ModuleBitcodeWriter::writeModuleHash(size_t BlockStartPos) {
Stream.EmitRecord(bitc::MODULE_CODE_HASH, Vals);
}
void BitcodeWriter::write() {
// Emit the file header first.
writeBitcodeHeader();
void ModuleBitcodeWriter::write() {
writeIdentificationBlock(Stream);
writeBlocks();
}
void ModuleBitcodeWriter::writeBlocks() {
writeIdentificationBlock();
writeModule();
}
void IndexBitcodeWriter::writeBlocks() {
// Index contains only a single outer (module) block.
writeIndex();
}
void ModuleBitcodeWriter::writeModule() {
Stream.EnterSubblock(bitc::MODULE_BLOCK_ID, 3);
size_t BlockStartPos = Buffer.size();
@ -3801,7 +3765,7 @@ static void emitDarwinBCHeaderAndTrailer(SmallVectorImpl<char> &Buffer,
}
/// Helper to write the header common to all bitcode files.
void BitcodeWriter::writeBitcodeHeader() {
static void writeBitcodeHeader(BitstreamWriter &Stream) {
// Emit the file header.
Stream.Emit((unsigned)'B', 8);
Stream.Emit((unsigned)'C', 8);
@ -3811,6 +3775,22 @@ void BitcodeWriter::writeBitcodeHeader() {
Stream.Emit(0xD, 4);
}
BitcodeWriter::BitcodeWriter(SmallVectorImpl<char> &Buffer)
: Buffer(Buffer), Stream(new BitstreamWriter(Buffer)) {
writeBitcodeHeader(*Stream);
}
BitcodeWriter::~BitcodeWriter() = default;
void BitcodeWriter::writeModule(const Module *M,
bool ShouldPreserveUseListOrder,
const ModuleSummaryIndex *Index,
bool GenerateHash) {
ModuleBitcodeWriter ModuleWriter(
M, Buffer, *Stream, ShouldPreserveUseListOrder, Index, GenerateHash);
ModuleWriter.write();
}
/// WriteBitcodeToFile - Write the specified module to the specified output
/// stream.
void llvm::WriteBitcodeToFile(const Module *M, raw_ostream &Out,
@ -3826,10 +3806,8 @@ void llvm::WriteBitcodeToFile(const Module *M, raw_ostream &Out,
if (TT.isOSDarwin() || TT.isOSBinFormatMachO())
Buffer.insert(Buffer.begin(), BWH_HeaderSize, 0);
// Emit the module into the buffer.
ModuleBitcodeWriter ModuleWriter(M, Buffer, ShouldPreserveUseListOrder, Index,
GenerateHash);
ModuleWriter.write();
BitcodeWriter Writer(Buffer);
Writer.writeModule(M, ShouldPreserveUseListOrder, Index, GenerateHash);
if (TT.isOSDarwin() || TT.isOSBinFormatMachO())
emitDarwinBCHeaderAndTrailer(Buffer, TT);
@ -3838,7 +3816,7 @@ void llvm::WriteBitcodeToFile(const Module *M, raw_ostream &Out,
Out.write((char*)&Buffer.front(), Buffer.size());
}
void IndexBitcodeWriter::writeIndex() {
void IndexBitcodeWriter::write() {
Stream.EnterSubblock(bitc::MODULE_BLOCK_ID, 3);
SmallVector<unsigned, 1> Vals;
@ -3872,7 +3850,10 @@ void llvm::WriteIndexToFile(
SmallVector<char, 0> Buffer;
Buffer.reserve(256 * 1024);
IndexBitcodeWriter IndexWriter(Buffer, Index, ModuleToSummariesForIndex);
BitstreamWriter Stream(Buffer);
writeBitcodeHeader(Stream);
IndexBitcodeWriter IndexWriter(Stream, Index, ModuleToSummariesForIndex);
IndexWriter.write();
Out.write((char *)&Buffer.front(), Buffer.size());

View File

@ -0,0 +1,3 @@
define void @f2() {
ret void
}

View File

@ -0,0 +1,39 @@
; RUN: llvm-cat -o %t %s %S/Inputs/multi-module.ll
; RUN: not llvm-dis -o - %t 2>&1 | FileCheck --check-prefix=ERROR %s
; ERROR: Expected a single module
; FIXME: Introduce a tool for extracting modules from bitcode and use it here.
; For now we can at least check that the bitcode contains multiple modules.
; RUN: llvm-bcanalyzer -dump %t | FileCheck --check-prefix=BCA %s
; RUN: llvm-as -o %t1 %s
; RUN: llvm-as -o %t2 %S/Inputs/multi-module.ll
; RUN: llvm-cat -o %t %t1 %t2
; RUN: not llvm-dis -o - %t 2>&1 | FileCheck --check-prefix=ERROR %s
; RUN: llvm-bcanalyzer -dump %t | FileCheck --check-prefix=BCA %s
; RUN: llvm-cat -b -o %t %t1 %t2
; RUN: not llvm-dis -o - %t 2>&1 | FileCheck --check-prefix=ERROR %s
; RUN: llvm-bcanalyzer -dump %t | FileCheck --check-prefix=BCA %s
; RUN: llvm-cat -b -o %t3 %t %t
; RUN: not llvm-dis -o - %t3 2>&1 | FileCheck --check-prefix=ERROR %s
; RUN: llvm-bcanalyzer -dump %t3 | FileCheck --check-prefix=BCA4 %s
; BCA: <IDENTIFICATION_BLOCK
; BCA: <MODULE_BLOCK
; BCA: <IDENTIFICATION_BLOCK
; BCA: <MODULE_BLOCK
; BCA4: <IDENTIFICATION_BLOCK
; BCA4: <MODULE_BLOCK
; BCA4: <IDENTIFICATION_BLOCK
; BCA4: <MODULE_BLOCK
; BCA4: <IDENTIFICATION_BLOCK
; BCA4: <MODULE_BLOCK
; BCA4: <IDENTIFICATION_BLOCK
; BCA4: <MODULE_BLOCK
define void @f1() {
ret void
}

View File

@ -32,6 +32,7 @@ set(LLVM_TEST_DEPENDS
llvm-as
llvm-bcanalyzer
llvm-c-test
llvm-cat
llvm-cxxfilt
llvm-config
llvm-cov

View File

@ -24,6 +24,7 @@ subdirectories =
llvm-ar
llvm-as
llvm-bcanalyzer
llvm-cat
llvm-cov
llvm-diff
llvm-dis

View File

@ -0,0 +1,10 @@
set(LLVM_LINK_COMPONENTS
IRReader
BitWriter
Core
Support
)
add_llvm_tool(llvm-cat
llvm-cat.cpp
)

View File

@ -0,0 +1,22 @@
;===- ./tools/llvm-cat/LLVMBuild.txt ---------------------------*- Conf -*--===;
;
; The LLVM Compiler Infrastructure
;
; This file is distributed under the University of Illinois Open Source
; License. See LICENSE.TXT for details.
;
;===------------------------------------------------------------------------===;
;
; This is an LLVMBuild description file for the components in this subdirectory.
;
; For more information on the LLVMBuild system, please see:
;
; http://llvm.org/docs/LLVMBuild.html
;
;===------------------------------------------------------------------------===;
[component_0]
type = Tool
name = llvm-cat
parent = Tools
required_libraries = AsmParser BitWriter

View File

@ -0,0 +1,73 @@
//===-- llvm-cat.cpp - LLVM module concatenation utility ------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This program is for testing features that rely on multi-module bitcode files.
// It takes a list of input modules and uses them to create a multi-module
// bitcode file.
//
//===----------------------------------------------------------------------===//
#include "llvm/Bitcode/BitcodeReader.h"
#include "llvm/Bitcode/BitcodeWriter.h"
#include "llvm/IRReader/IRReader.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/FileSystem.h"
using namespace llvm;
static cl::opt<bool>
BinaryCat("b", cl::desc("Whether to perform binary concatenation"));
static cl::opt<std::string> OutputFilename("o", cl::Required,
cl::desc("Output filename"),
cl::value_desc("filename"));
static cl::list<std::string> InputFilenames(cl::Positional, cl::OneOrMore,
cl::desc("<input files>"));
int main(int argc, char **argv) {
cl::ParseCommandLineOptions(argc, argv, "Module concatenation");
ExitOnError ExitOnErr("llvm-cat: ");
LLVMContext Context;
SmallVector<char, 0> Buffer;
BitcodeWriter Writer(Buffer);
if (BinaryCat) {
for (std::string InputFilename : InputFilenames) {
std::unique_ptr<MemoryBuffer> MB = ExitOnErr(
errorOrToExpected(MemoryBuffer::getFileOrSTDIN(InputFilename)));
std::vector<BitcodeModule> Mods = ExitOnErr(getBitcodeModuleList(*MB));
for (auto &BitcodeMod : Mods)
Buffer.insert(Buffer.end(), BitcodeMod.getBuffer().begin(),
BitcodeMod.getBuffer().end());
}
} else {
for (std::string InputFilename : InputFilenames) {
SMDiagnostic Err;
std::unique_ptr<Module> M = parseIRFile(InputFilename, Err, Context);
if (!M) {
Err.print(argv[0], errs());
return 1;
}
Writer.writeModule(M.get());
}
}
std::error_code EC;
raw_fd_ostream OS(OutputFilename, EC, sys::fs::OpenFlags::F_None);
if (EC) {
llvm::errs() << argv[0] << ": cannot open " << OutputFilename
<< " for writing: " << EC.message();
return 1;
}
OS.write(Buffer.data(), Buffer.size());
return 0;
}