Files
archived-llvm-mirror/include/llvm/Bitcode/BitcodeReader.h
Peter Collingbourne 2d3eb26eeb Bitcode: Add a string table to the bitcode format.
Add a top-level STRTAB block containing a string table blob, and start storing
strings for module codes FUNCTION, GLOBALVAR, ALIAS, IFUNC and COMDAT in
the string table.

This change allows us to share names between globals and comdats as well
as between modules, and improves the efficiency of loading bitcode files by
no longer using a bit encoding for symbol names. Once we start writing the
irsymtab to the bitcode file we will also be able to share strings between
it and the module.

On my machine, link time for Chromium for Linux with ThinLTO decreases by
about 7% for no-op incremental builds or about 1% for full builds. Total
bitcode file size decreases by about 3%.

As discussed on llvm-dev:
http://lists.llvm.org/pipermail/llvm-dev/2017-April/111732.html

Differential Revision: https://reviews.llvm.org/D31838

llvm-svn: 300464
2017-04-17 17:51:36 +00:00

229 lines
9.1 KiB
C++

//===-- llvm/Bitcode/BitcodeReader.h - Bitcode reader ----*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This header defines interfaces to read LLVM bitcode files/streams.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_BITCODE_BITCODEREADER_H
#define LLVM_BITCODE_BITCODEREADER_H
#include "llvm/Bitcode/BitCodes.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/ModuleSummaryIndex.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/ErrorOr.h"
#include "llvm/Support/MemoryBuffer.h"
#include <memory>
namespace llvm {
class LLVMContext;
class Module;
// These functions are for converting Expected/Error values to
// ErrorOr/std::error_code for compatibility with legacy clients. FIXME:
// Remove these functions once no longer needed by the C and libLTO APIs.
std::error_code errorToErrorCodeAndEmitErrors(LLVMContext &Ctx, Error Err);
template <typename T>
ErrorOr<T> expectedToErrorOrAndEmitErrors(LLVMContext &Ctx, Expected<T> Val) {
if (!Val)
return errorToErrorCodeAndEmitErrors(Ctx, Val.takeError());
return std::move(*Val);
}
/// Represents a module in a bitcode file.
class BitcodeModule {
// This covers the identification (if present) and module blocks.
ArrayRef<uint8_t> Buffer;
StringRef ModuleIdentifier;
// The string table used to interpret this module.
StringRef Strtab;
// The bitstream location of the IDENTIFICATION_BLOCK.
uint64_t IdentificationBit;
// The bitstream location of this module's MODULE_BLOCK.
uint64_t ModuleBit;
BitcodeModule(ArrayRef<uint8_t> Buffer, StringRef ModuleIdentifier,
uint64_t IdentificationBit, uint64_t ModuleBit)
: Buffer(Buffer), ModuleIdentifier(ModuleIdentifier),
IdentificationBit(IdentificationBit), ModuleBit(ModuleBit) {}
// Calls the ctor.
friend Expected<std::vector<BitcodeModule>>
getBitcodeModuleList(MemoryBufferRef Buffer);
Expected<std::unique_ptr<Module>> getModuleImpl(LLVMContext &Context,
bool MaterializeAll,
bool ShouldLazyLoadMetadata,
bool IsImporting);
public:
StringRef getBuffer() const {
return StringRef((const char *)Buffer.begin(), Buffer.size());
}
StringRef getStrtab() const { return Strtab; }
StringRef getModuleIdentifier() const { return ModuleIdentifier; }
/// Read the bitcode module and prepare for lazy deserialization of function
/// bodies. If ShouldLazyLoadMetadata is true, lazily load metadata as well.
/// If IsImporting is true, this module is being parsed for ThinLTO
/// importing into another module.
Expected<std::unique_ptr<Module>> getLazyModule(LLVMContext &Context,
bool ShouldLazyLoadMetadata,
bool IsImporting);
/// Read the entire bitcode module and return it.
Expected<std::unique_ptr<Module>> parseModule(LLVMContext &Context);
/// Check if the given bitcode buffer contains a summary block.
Expected<bool> hasSummary();
/// Parse the specified bitcode buffer, returning the module summary index.
Expected<std::unique_ptr<ModuleSummaryIndex>> getSummary();
};
/// Returns a list of modules in the specified bitcode buffer.
Expected<std::vector<BitcodeModule>>
getBitcodeModuleList(MemoryBufferRef Buffer);
/// Read the header of the specified bitcode buffer and prepare for lazy
/// deserialization of function bodies. If ShouldLazyLoadMetadata is true,
/// lazily load metadata as well. If IsImporting is true, this module is
/// being parsed for ThinLTO importing into another module.
Expected<std::unique_ptr<Module>>
getLazyBitcodeModule(MemoryBufferRef Buffer, LLVMContext &Context,
bool ShouldLazyLoadMetadata = false,
bool IsImporting = false);
/// Like getLazyBitcodeModule, except that the module takes ownership of
/// the memory buffer if successful. If successful, this moves Buffer. On
/// error, this *does not* move Buffer. If IsImporting is true, this module is
/// being parsed for ThinLTO importing into another module.
Expected<std::unique_ptr<Module>> getOwningLazyBitcodeModule(
std::unique_ptr<MemoryBuffer> &&Buffer, LLVMContext &Context,
bool ShouldLazyLoadMetadata = false, bool IsImporting = false);
/// Read the header of the specified bitcode buffer and extract just the
/// triple information. If successful, this returns a string. On error, this
/// returns "".
Expected<std::string> getBitcodeTargetTriple(MemoryBufferRef Buffer);
/// Return true if \p Buffer contains a bitcode file with ObjC code (category
/// or class) in it.
Expected<bool> isBitcodeContainingObjCCategory(MemoryBufferRef Buffer);
/// Read the header of the specified bitcode buffer and extract just the
/// producer string information. If successful, this returns a string. On
/// error, this returns "".
Expected<std::string> getBitcodeProducerString(MemoryBufferRef Buffer);
/// Read the specified bitcode file, returning the module.
Expected<std::unique_ptr<Module>> parseBitcodeFile(MemoryBufferRef Buffer,
LLVMContext &Context);
/// Check if the given bitcode buffer contains a summary block.
Expected<bool> hasGlobalValueSummary(MemoryBufferRef Buffer);
/// Parse the specified bitcode buffer, returning the module summary index.
Expected<std::unique_ptr<ModuleSummaryIndex>>
getModuleSummaryIndex(MemoryBufferRef Buffer);
/// isBitcodeWrapper - Return true if the given bytes are the magic bytes
/// for an LLVM IR bitcode wrapper.
///
inline bool isBitcodeWrapper(const unsigned char *BufPtr,
const unsigned char *BufEnd) {
// See if you can find the hidden message in the magic bytes :-).
// (Hint: it's a little-endian encoding.)
return BufPtr != BufEnd &&
BufPtr[0] == 0xDE &&
BufPtr[1] == 0xC0 &&
BufPtr[2] == 0x17 &&
BufPtr[3] == 0x0B;
}
/// isRawBitcode - Return true if the given bytes are the magic bytes for
/// raw LLVM IR bitcode (without a wrapper).
///
inline bool isRawBitcode(const unsigned char *BufPtr,
const unsigned char *BufEnd) {
// These bytes sort of have a hidden message, but it's not in
// little-endian this time, and it's a little redundant.
return BufPtr != BufEnd &&
BufPtr[0] == 'B' &&
BufPtr[1] == 'C' &&
BufPtr[2] == 0xc0 &&
BufPtr[3] == 0xde;
}
/// isBitcode - Return true if the given bytes are the magic bytes for
/// LLVM IR bitcode, either with or without a wrapper.
///
inline bool isBitcode(const unsigned char *BufPtr,
const unsigned char *BufEnd) {
return isBitcodeWrapper(BufPtr, BufEnd) ||
isRawBitcode(BufPtr, BufEnd);
}
/// SkipBitcodeWrapperHeader - Some systems wrap bc files with a special
/// header for padding or other reasons. The format of this header is:
///
/// struct bc_header {
/// uint32_t Magic; // 0x0B17C0DE
/// uint32_t Version; // Version, currently always 0.
/// uint32_t BitcodeOffset; // Offset to traditional bitcode file.
/// uint32_t BitcodeSize; // Size of traditional bitcode file.
/// ... potentially other gunk ...
/// };
///
/// This function is called when we find a file with a matching magic number.
/// In this case, skip down to the subsection of the file that is actually a
/// BC file.
/// If 'VerifyBufferSize' is true, check that the buffer is large enough to
/// contain the whole bitcode file.
inline bool SkipBitcodeWrapperHeader(const unsigned char *&BufPtr,
const unsigned char *&BufEnd,
bool VerifyBufferSize) {
// Must contain the offset and size field!
if (unsigned(BufEnd - BufPtr) < BWH_SizeField + 4)
return true;
unsigned Offset = support::endian::read32le(&BufPtr[BWH_OffsetField]);
unsigned Size = support::endian::read32le(&BufPtr[BWH_SizeField]);
uint64_t BitcodeOffsetEnd = (uint64_t)Offset + (uint64_t)Size;
// Verify that Offset+Size fits in the file.
if (VerifyBufferSize && BitcodeOffsetEnd > uint64_t(BufEnd-BufPtr))
return true;
BufPtr += Offset;
BufEnd = BufPtr+Size;
return false;
}
const std::error_category &BitcodeErrorCategory();
enum class BitcodeError { CorruptedBitcode = 1 };
inline std::error_code make_error_code(BitcodeError E) {
return std::error_code(static_cast<int>(E), BitcodeErrorCategory());
}
} // End llvm namespace
namespace std {
template <> struct is_error_code_enum<llvm::BitcodeError> : std::true_type {};
}
#endif