Bitcode: Change expected layout of module blocks.

We now expect each module's identification block to appear immediately before the module block. Any module block that appears without an identification block immediately before it is interpreted as if it does not have a module block. Also change the interpretation of VST and function offsets in bitcode. The offset is always taken as relative to the start of the identification (or module if not present) block, minus one word. This corresponds to the historical interpretation of offsets, i.e. relative to the start of the file. These changes allow for bitcode modules to be concatenated by copying bytes. Differential Revision: https://reviews.llvm.org/D27184 llvm-svn: 288098
2024-12-12 22:17:47 +00:00 · 2016-11-29 02:27:04 +00:00 · 2016-11-29 02:27:04 +00:00 · 73ec8f79de
commit 73ec8f79de
parent f93801b9e8
2 changed files with 36 additions and 10 deletions
--- a/include/llvm/Bitcode/BitcodeReader.h
+++ b/include/llvm/Bitcode/BitcodeReader.h
@ -42,6 +42,7 @@ namespace llvm {

  /// Represents a module in a bitcode file.
  class BitcodeModule {
+    // This covers the identification (if present) and module blocks.
    ArrayRef<uint8_t> Buffer;
    StringRef ModuleIdentifier;

--- a/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/lib/Bitcode/Reader/BitcodeReader.cpp
@ -2306,7 +2306,10 @@ Error BitcodeReader::parseValueSymbolTable(uint64_t Offset) {
        assert(GO);
      }

-      uint64_t FuncWordOffset = Record[1];
+      // Note that we subtract 1 here because the offset is relative to one word
+      // before the start of the identification or module block, which was
+      // historically always the start of the regular bitcode header.
+      uint64_t FuncWordOffset = Record[1] - 1;
      Function *F = dyn_cast<Function>(GO);
      assert(F);
      uint64_t FuncBitOffset = FuncWordOffset * 32;
@ -4354,7 +4357,10 @@ Error BitcodeReader::parseModule(uint64_t ResumeBit,
    case bitc::MODULE_CODE_VSTOFFSET:
      if (Record.size() < 1)
        return error("Invalid record");
-      VSTOffset = Record[0];
+      // Note that we subtract 1 here because the offset is relative to one word
+      // before the start of the identification or module block, which was
+      // historically always the start of the regular bitcode header.
+      VSTOffset = Record[0] - 1;
      break;
    /// MODULE_CODE_SOURCE_FILENAME: [namechar x N]
    case bitc::MODULE_CODE_SOURCE_FILENAME:
@ -6549,13 +6555,14 @@ llvm::getBitcodeModuleList(MemoryBufferRef Buffer) {
    return StreamOrErr.takeError();
  BitstreamCursor &Stream = *StreamOrErr;

-  uint64_t IdentificationBit = -1ull;
  std::vector<BitcodeModule> Modules;
  while (true) {
+    uint64_t BCBegin = Stream.getCurrentByteNo();
+
    // We may be consuming bitcode from a client that leaves garbage at the end
    // of the bitcode stream (e.g. Apple's ar tool). If we are close enough to
    // the end that there cannot possibly be another module, stop looking.
-    if (Stream.getCurrentByteNo() + 8 >= Stream.getBitcodeBytes().size())
+    if (BCBegin + 8 >= Stream.getBitcodeBytes().size())
      return Modules;

    BitstreamEntry Entry = Stream.advance();
@ -6564,17 +6571,35 @@ llvm::getBitcodeModuleList(MemoryBufferRef Buffer) {
    case BitstreamEntry::Error:
      return error("Malformed block");

-    case BitstreamEntry::SubBlock:
-      if (Entry.ID == bitc::IDENTIFICATION_BLOCK_ID)
-        IdentificationBit = Stream.GetCurrentBitNo();
-      else if (Entry.ID == bitc::MODULE_BLOCK_ID)
-        Modules.push_back({Stream.getBitcodeBytes(),
+    case BitstreamEntry::SubBlock: {
+      uint64_t IdentificationBit = -1ull;
+      if (Entry.ID == bitc::IDENTIFICATION_BLOCK_ID) {
+        IdentificationBit = Stream.GetCurrentBitNo() - BCBegin * 8;
+        if (Stream.SkipBlock())
+          return error("Malformed block");
+
+        Entry = Stream.advance();
+        if (Entry.Kind != BitstreamEntry::SubBlock ||
+            Entry.ID != bitc::MODULE_BLOCK_ID)
+          return error("Malformed block");
+      }
+
+      if (Entry.ID == bitc::MODULE_BLOCK_ID) {
+        uint64_t ModuleBit = Stream.GetCurrentBitNo() - BCBegin * 8;
+        if (Stream.SkipBlock())
+          return error("Malformed block");
+
+        Modules.push_back({Stream.getBitcodeBytes().slice(
+                               BCBegin, Stream.getCurrentByteNo() - BCBegin),
                           Buffer.getBufferIdentifier(), IdentificationBit,
-                           Stream.GetCurrentBitNo()});
+                           ModuleBit});
+        continue;
+      }

      if (Stream.SkipBlock())
        return error("Malformed block");
      continue;
+    }
    case BitstreamEntry::Record:
      Stream.skipRecord(Entry.ID);
      continue;