Bitcode: Change expected layout of module blocks.

We now expect each module's identification block to appear immediately before
the module block. Any module block that appears without an identification block
immediately before it is interpreted as if it does not have a module block.

Also change the interpretation of VST and function offsets in bitcode.
The offset is always taken as relative to the start of the identification
(or module if not present) block, minus one word. This corresponds to the
historical interpretation of offsets, i.e. relative to the start of the file.

These changes allow for bitcode modules to be concatenated by copying bytes.

Differential Revision: https://reviews.llvm.org/D27184

llvm-svn: 288098
This commit is contained in:
Peter Collingbourne 2016-11-29 02:27:04 +00:00
parent f93801b9e8
commit 73ec8f79de
2 changed files with 36 additions and 10 deletions

View File

@ -42,6 +42,7 @@ namespace llvm {
/// Represents a module in a bitcode file.
class BitcodeModule {
// This covers the identification (if present) and module blocks.
ArrayRef<uint8_t> Buffer;
StringRef ModuleIdentifier;

View File

@ -2306,7 +2306,10 @@ Error BitcodeReader::parseValueSymbolTable(uint64_t Offset) {
assert(GO);
}
uint64_t FuncWordOffset = Record[1];
// Note that we subtract 1 here because the offset is relative to one word
// before the start of the identification or module block, which was
// historically always the start of the regular bitcode header.
uint64_t FuncWordOffset = Record[1] - 1;
Function *F = dyn_cast<Function>(GO);
assert(F);
uint64_t FuncBitOffset = FuncWordOffset * 32;
@ -4354,7 +4357,10 @@ Error BitcodeReader::parseModule(uint64_t ResumeBit,
case bitc::MODULE_CODE_VSTOFFSET:
if (Record.size() < 1)
return error("Invalid record");
VSTOffset = Record[0];
// Note that we subtract 1 here because the offset is relative to one word
// before the start of the identification or module block, which was
// historically always the start of the regular bitcode header.
VSTOffset = Record[0] - 1;
break;
/// MODULE_CODE_SOURCE_FILENAME: [namechar x N]
case bitc::MODULE_CODE_SOURCE_FILENAME:
@ -6549,13 +6555,14 @@ llvm::getBitcodeModuleList(MemoryBufferRef Buffer) {
return StreamOrErr.takeError();
BitstreamCursor &Stream = *StreamOrErr;
uint64_t IdentificationBit = -1ull;
std::vector<BitcodeModule> Modules;
while (true) {
uint64_t BCBegin = Stream.getCurrentByteNo();
// We may be consuming bitcode from a client that leaves garbage at the end
// of the bitcode stream (e.g. Apple's ar tool). If we are close enough to
// the end that there cannot possibly be another module, stop looking.
if (Stream.getCurrentByteNo() + 8 >= Stream.getBitcodeBytes().size())
if (BCBegin + 8 >= Stream.getBitcodeBytes().size())
return Modules;
BitstreamEntry Entry = Stream.advance();
@ -6564,17 +6571,35 @@ llvm::getBitcodeModuleList(MemoryBufferRef Buffer) {
case BitstreamEntry::Error:
return error("Malformed block");
case BitstreamEntry::SubBlock:
if (Entry.ID == bitc::IDENTIFICATION_BLOCK_ID)
IdentificationBit = Stream.GetCurrentBitNo();
else if (Entry.ID == bitc::MODULE_BLOCK_ID)
Modules.push_back({Stream.getBitcodeBytes(),
case BitstreamEntry::SubBlock: {
uint64_t IdentificationBit = -1ull;
if (Entry.ID == bitc::IDENTIFICATION_BLOCK_ID) {
IdentificationBit = Stream.GetCurrentBitNo() - BCBegin * 8;
if (Stream.SkipBlock())
return error("Malformed block");
Entry = Stream.advance();
if (Entry.Kind != BitstreamEntry::SubBlock ||
Entry.ID != bitc::MODULE_BLOCK_ID)
return error("Malformed block");
}
if (Entry.ID == bitc::MODULE_BLOCK_ID) {
uint64_t ModuleBit = Stream.GetCurrentBitNo() - BCBegin * 8;
if (Stream.SkipBlock())
return error("Malformed block");
Modules.push_back({Stream.getBitcodeBytes().slice(
BCBegin, Stream.getCurrentByteNo() - BCBegin),
Buffer.getBufferIdentifier(), IdentificationBit,
Stream.GetCurrentBitNo()});
ModuleBit});
continue;
}
if (Stream.SkipBlock())
return error("Malformed block");
continue;
}
case BitstreamEntry::Record:
Stream.skipRecord(Entry.ID);
continue;