From d2f4701e4a36488b0f5e3b74aaa4cffa2ad5779e Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Fri, 1 Apr 2016 05:33:11 +0000 Subject: [PATCH] Add a module Hash in the bitcode and the combined index, implementing a kind of "build-id" This is intended to be used for ThinLTO incremental build. Differential Revision: http://reviews.llvm.org/D18213 This is a recommit of r265095 after fixing the Windows issues. From: Mehdi Amini git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@265111 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Bitcode/BitstreamReader.h | 2 + include/llvm/Bitcode/LLVMBitCodes.h | 4 ++ include/llvm/Bitcode/ReaderWriter.h | 3 +- include/llvm/IR/ModuleSummaryIndex.h | 43 +++++++++---- lib/Bitcode/Reader/BitcodeReader.cpp | 54 ++++++++++++++--- lib/Bitcode/Writer/BitcodeWriter.cpp | 74 ++++++++++++++++++++--- lib/IR/ModuleSummaryIndex.cpp | 8 ++- lib/Transforms/IPO/FunctionImport.cpp | 2 +- test/Bitcode/Inputs/module_hash.ll | 4 ++ test/Bitcode/module_hash.ll | 35 +++++++++++ tools/llvm-as/llvm-as.cpp | 5 +- tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp | 40 ++++++++++++ 12 files changed, 236 insertions(+), 38 deletions(-) create mode 100644 test/Bitcode/Inputs/module_hash.ll create mode 100644 test/Bitcode/module_hash.ll diff --git a/include/llvm/Bitcode/BitstreamReader.h b/include/llvm/Bitcode/BitstreamReader.h index 628a83cb0de..b331ceea051 100644 --- a/include/llvm/Bitcode/BitstreamReader.h +++ b/include/llvm/Bitcode/BitstreamReader.h @@ -446,6 +446,8 @@ public: using SimpleBitstreamCursor::canSkipToPos; using SimpleBitstreamCursor::AtEndOfStream; using SimpleBitstreamCursor::GetCurrentBitNo; + using SimpleBitstreamCursor::getCurrentByteNo; + using SimpleBitstreamCursor::getPointerToByte; using SimpleBitstreamCursor::getBitStreamReader; using SimpleBitstreamCursor::JumpToBit; using SimpleBitstreamCursor::fillCurWord; diff --git a/include/llvm/Bitcode/LLVMBitCodes.h b/include/llvm/Bitcode/LLVMBitCodes.h index a93ed58ebb9..0c4cc854cdc 100644 --- a/include/llvm/Bitcode/LLVMBitCodes.h +++ b/include/llvm/Bitcode/LLVMBitCodes.h @@ -107,6 +107,9 @@ enum ModuleCodes { // SOURCE_FILENAME: [namechar x N] MODULE_CODE_SOURCE_FILENAME = 16, + + // HASH: [5*i32] + MODULE_CODE_HASH = 17, }; /// PARAMATTR blocks have code for defining a parameter attribute set. @@ -183,6 +186,7 @@ enum ValueSymtabCodes { // The module path symbol table only has one code (MST_CODE_ENTRY). enum ModulePathSymtabCodes { MST_CODE_ENTRY = 1, // MST_ENTRY: [modid, namechar x N] + MST_CODE_HASH = 2, // MST_HASH: [5*i32] }; // The summary section uses different codes in the per-module diff --git a/include/llvm/Bitcode/ReaderWriter.h b/include/llvm/Bitcode/ReaderWriter.h index bbce15a8f93..1afffa05527 100644 --- a/include/llvm/Bitcode/ReaderWriter.h +++ b/include/llvm/Bitcode/ReaderWriter.h @@ -107,7 +107,8 @@ namespace llvm { /// for use in ThinLTO optimization). void WriteBitcodeToFile(const Module *M, raw_ostream &Out, bool ShouldPreserveUseListOrder = false, - bool EmitSummaryIndex = false); + bool EmitSummaryIndex = false, + bool GenerateHash = false); /// Write the specified module summary index to the given raw output stream, /// where it will be written in a new bitcode block. This is used when diff --git a/include/llvm/IR/ModuleSummaryIndex.h b/include/llvm/IR/ModuleSummaryIndex.h index 30a7145cb42..fe0385e76b9 100644 --- a/include/llvm/IR/ModuleSummaryIndex.h +++ b/include/llvm/IR/ModuleSummaryIndex.h @@ -25,6 +25,8 @@ #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/raw_ostream.h" +#include + namespace llvm { /// \brief Class to accumulate and hold information about a callee. @@ -228,6 +230,9 @@ public: void setBitcodeIndex(uint64_t Offset) { BitcodeIndex = Offset; } }; +/// 160 bits SHA1 +typedef std::array ModuleHash; + /// List of global value info structures for a particular value held /// in the GlobalValueMap. Requires a vector in the case of multiple /// COMDAT values of the same name. @@ -245,9 +250,9 @@ typedef GlobalValueInfoMapTy::const_iterator const_globalvalueinfo_iterator; typedef GlobalValueInfoMapTy::iterator globalvalueinfo_iterator; /// String table to hold/own module path strings, which additionally holds the -/// module ID assigned to each module during the plugin step. The StringMap -/// makes a copy of and owns inserted strings. -typedef StringMap ModulePathStringTableTy; +/// module ID assigned to each module during the plugin step, as well as a hash +/// of the module. The StringMap makes a copy of and owns inserted strings. +typedef StringMap> ModulePathStringTableTy; /// Class to hold module path string table and global value map, /// and encapsulate methods for operating on them. @@ -304,17 +309,26 @@ public: GlobalValueMap[ValueGUID].push_back(std::move(Info)); } - /// Table of modules, containing an id. - const StringMap &modulePaths() const { + /// Table of modules, containing module hash and id. + const StringMap> &modulePaths() const { return ModulePathStringTable; } - /// Table of modules, containing an id. - StringMap &modulePaths() { return ModulePathStringTable; } + /// Table of modules, containing hash and id. + StringMap> &modulePaths() { + return ModulePathStringTable; + } /// Get the module ID recorded for the given module path. uint64_t getModuleId(const StringRef ModPath) const { - return ModulePathStringTable.lookup(ModPath); + return ModulePathStringTable.lookup(ModPath).first; + } + + /// Get the module SHA1 hash recorded for the given module path. + const ModuleHash &getModuleHash(const StringRef ModPath) const { + auto It = ModulePathStringTable.find(ModPath); + assert(It != ModulePathStringTable.end() && "Module not registered"); + return It->second.second; } /// Add the given per-module index into this module index/summary, @@ -333,11 +347,14 @@ public: return NewName.str(); } - /// Add a new module path, mapped to the given module Id, and return StringRef - /// owned by string table map. - StringRef addModulePath(StringRef ModPath, uint64_t ModId) { - return ModulePathStringTable.insert(std::make_pair(ModPath, ModId)) - .first->first(); + /// Add a new module path with the given \p Hash, mapped to the given \p + /// ModID, and return an iterator to the entry in the index. + ModulePathStringTableTy::iterator + addModulePath(StringRef ModPath, uint64_t ModId, + ModuleHash Hash = ModuleHash{{0}}) { + return ModulePathStringTable.insert(std::make_pair( + ModPath, + std::make_pair(ModId, Hash))).first; } /// Check if the given Module has any functions available for exporting diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp index 93496fe8e6f..1840b60cc01 100644 --- a/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/lib/Bitcode/Reader/BitcodeReader.cpp @@ -5632,11 +5632,7 @@ std::error_code ModuleSummaryIndexBitcodeReader::parseModule() { } continue; - case BitstreamEntry::Record: - // Once we find the last record of interest, skip the rest. - if (VSTOffset > 0) - Stream.skipRecord(Entry.ID); - else { + case BitstreamEntry::Record: { Record.clear(); auto BitCode = Stream.readRecord(Entry.ID, Record); switch (BitCode) { @@ -5650,6 +5646,25 @@ std::error_code ModuleSummaryIndexBitcodeReader::parseModule() { SourceFileName = ValueName.c_str(); break; } + /// MODULE_CODE_HASH: [5*i32] + case bitc::MODULE_CODE_HASH: { + if (Record.size() != 5) + return error("Invalid hash length " + Twine(Record.size()).str()); + if (!TheIndex) + break; + if (TheIndex->modulePaths().empty()) + // Does not have any summary emitted. + break; + if (TheIndex->modulePaths().size() != 1) + return error("Don't expect multiple modules defined?"); + auto &Hash = TheIndex->modulePaths().begin()->second.second; + int Pos = 0; + for (auto &Val : Record) { + assert(!(Val >> 32) && "Unexpected high bits set"); + Hash[Pos++] = Val; + } + break; + } /// MODULE_CODE_VSTOFFSET: [offset] case bitc::MODULE_CODE_VSTOFFSET: if (Record.size() < 1) @@ -5761,7 +5776,7 @@ std::error_code ModuleSummaryIndexBitcodeReader::parseEntireSummary() { // module path string table entry with an empty (0) ID to take // ownership. FS->setModulePath( - TheIndex->addModulePath(Buffer->getBufferIdentifier(), 0)); + TheIndex->addModulePath(Buffer->getBufferIdentifier(), 0)->first()); static int RefListStartIndex = 4; int CallGraphEdgeStartIndex = RefListStartIndex + NumRefs; assert(Record.size() >= RefListStartIndex + NumRefs && @@ -5799,7 +5814,7 @@ std::error_code ModuleSummaryIndexBitcodeReader::parseEntireSummary() { std::unique_ptr FS = llvm::make_unique(getDecodedLinkage(RawLinkage)); FS->setModulePath( - TheIndex->addModulePath(Buffer->getBufferIdentifier(), 0)); + TheIndex->addModulePath(Buffer->getBufferIdentifier(), 0)->first()); for (unsigned I = 2, E = Record.size(); I != E; ++I) { unsigned RefValueId = Record[I]; uint64_t RefGUID = getGUIDFromValueId(RefValueId); @@ -5887,6 +5902,7 @@ std::error_code ModuleSummaryIndexBitcodeReader::parseModuleStringTable() { SmallVector Record; SmallString<128> ModulePath; + ModulePathStringTableTy::iterator LastSeenModulePath; while (1) { BitstreamEntry Entry = Stream.advanceSkippingSubblocks(); @@ -5907,14 +5923,32 @@ std::error_code ModuleSummaryIndexBitcodeReader::parseModuleStringTable() { break; case bitc::MST_CODE_ENTRY: { // MST_ENTRY: [modid, namechar x N] + uint64_t ModuleId = Record[0]; + if (convertToString(Record, 1, ModulePath)) return error("Invalid record"); - uint64_t ModuleId = Record[0]; - StringRef ModulePathInMap = TheIndex->addModulePath(ModulePath, ModuleId); - ModuleIdMap[ModuleId] = ModulePathInMap; + + LastSeenModulePath = TheIndex->addModulePath(ModulePath, ModuleId); + ModuleIdMap[ModuleId] = LastSeenModulePath->first(); + ModulePath.clear(); break; } + /// MST_CODE_HASH: [5*i32] + case bitc::MST_CODE_HASH: { + if (Record.size() != 5) + return error("Invalid hash length " + Twine(Record.size()).str()); + if (LastSeenModulePath == TheIndex->modulePaths().end()) + return error("Invalid hash that does not follow a module path"); + int Pos = 0; + for (auto &Val : Record) { + assert(!(Val >> 32) && "Unexpected high bits set"); + LastSeenModulePath->second.second[Pos++] = Val; + } + // Reset LastSeenModulePath to avoid overriding the hash unexpectedly. + LastSeenModulePath = TheIndex->modulePaths().end(); + break; + } } } llvm_unreachable("Exit infinite loop"); diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp index c1513416817..18fb7ad8d11 100644 --- a/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "ValueEnumerator.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Triple.h" #include "llvm/Analysis/BlockFrequencyInfo.h" @@ -39,6 +40,7 @@ #include "llvm/Support/MathExtras.h" #include "llvm/Support/Program.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Support/SHA1.h" #include #include using namespace llvm; @@ -2852,8 +2854,18 @@ static void WriteModStrings(const ModuleSummaryIndex &I, Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Char6)); unsigned Abbrev6Bit = Stream.EmitAbbrev(Abbv); - SmallVector NameVals; - for (const StringMapEntry &MPSE : I.modulePaths()) { + // Module Hash, 160 bits SHA1. Optionally, emitted after each MST_CODE_ENTRY. + Abbv = new BitCodeAbbrev(); + Abbv->Add(BitCodeAbbrevOp(bitc::MST_CODE_HASH)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); + unsigned AbbrevHash = Stream.EmitAbbrev(Abbv); + + SmallVector Vals; + for (const auto &MPSE : I.modulePaths()) { StringEncoding Bits = getStringEncoding(MPSE.getKey().data(), MPSE.getKey().size()); unsigned AbbrevToUse = Abbrev8Bit; @@ -2862,14 +2874,29 @@ static void WriteModStrings(const ModuleSummaryIndex &I, else if (Bits == SE_Fixed7) AbbrevToUse = Abbrev7Bit; - NameVals.push_back(MPSE.getValue()); + Vals.push_back(MPSE.getValue().first); for (const auto P : MPSE.getKey()) - NameVals.push_back((unsigned char)P); + Vals.push_back((unsigned char)P); // Emit the finished record. - Stream.EmitRecord(bitc::MST_CODE_ENTRY, NameVals, AbbrevToUse); - NameVals.clear(); + Stream.EmitRecord(bitc::MST_CODE_ENTRY, Vals, AbbrevToUse); + + Vals.clear(); + // Emit an optional hash for the module now + auto &Hash = MPSE.getValue().second; + bool AllZero = true; // Detect if the hash is empty, and do not generate it + for (auto Val : Hash) { + if (Val) + AllZero = false; + Vals.push_back(Val); + } + if (!AllZero) { + // Emit the hash record. + Stream.EmitRecord(bitc::MST_CODE_HASH, Vals, AbbrevHash); + } + + Vals.clear(); } Stream.ExitBlock(); } @@ -3177,11 +3204,36 @@ static void WriteIdentificationBlock(const Module *M, BitstreamWriter &Stream) { Stream.ExitBlock(); } +static void writeModuleHash(BitstreamWriter &Stream, + SmallVectorImpl &Buffer, + size_t BlockStartPos) { + // Emit the module's hash. + // MODULE_CODE_HASH: [5*i32] + SHA1 Hasher; + Hasher.update(ArrayRef((uint8_t *)&Buffer[BlockStartPos], + Buffer.size() - BlockStartPos)); + auto Hash = Hasher.result(); + SmallVector Vals; + auto LShift = [&](unsigned char Val, unsigned Amount) + -> uint64_t { return ((uint64_t)Val) << Amount; }; + for (int Pos = 0; Pos < 20; Pos += 4) { + uint32_t SubHash = LShift(Hash[Pos + 0], 24); + SubHash |= LShift(Hash[Pos + 1], 16) | LShift(Hash[Pos + 2], 8) | + (unsigned)(unsigned char)Hash[Pos + 3]; + Vals.push_back(SubHash); + } + + // Emit the finished record. + Stream.EmitRecord(bitc::MODULE_CODE_HASH, Vals); +} + /// WriteModule - Emit the specified module to the bitstream. static void WriteModule(const Module *M, BitstreamWriter &Stream, bool ShouldPreserveUseListOrder, - uint64_t BitcodeStartBit, bool EmitSummaryIndex) { + uint64_t BitcodeStartBit, bool EmitSummaryIndex, + bool GenerateHash, SmallVectorImpl &Buffer) { Stream.EnterSubblock(bitc::MODULE_BLOCK_ID, 3); + size_t BlockStartPos = Buffer.size(); SmallVector Vals; unsigned CurVersion = 1; @@ -3238,6 +3290,10 @@ static void WriteModule(const Module *M, BitstreamWriter &Stream, WriteValueSymbolTable(M->getValueSymbolTable(), VE, Stream, VSTOffsetPlaceholder, BitcodeStartBit, &FunctionIndex); + if (GenerateHash) { + writeModuleHash(Stream, Buffer, BlockStartPos); + } + Stream.ExitBlock(); } @@ -3322,7 +3378,7 @@ static void WriteBitcodeHeader(BitstreamWriter &Stream) { /// stream. void llvm::WriteBitcodeToFile(const Module *M, raw_ostream &Out, bool ShouldPreserveUseListOrder, - bool EmitSummaryIndex) { + bool EmitSummaryIndex, bool GenerateHash) { SmallVector Buffer; Buffer.reserve(256*1024); @@ -3348,7 +3404,7 @@ void llvm::WriteBitcodeToFile(const Module *M, raw_ostream &Out, // Emit the module. WriteModule(M, Stream, ShouldPreserveUseListOrder, BitcodeStartBit, - EmitSummaryIndex); + EmitSummaryIndex, GenerateHash, Buffer); } if (TT.isOSDarwin() || TT.isOSBinFormatMachO()) diff --git a/lib/IR/ModuleSummaryIndex.cpp b/lib/IR/ModuleSummaryIndex.cpp index 16b58ddbeff..c23e072b48a 100644 --- a/lib/IR/ModuleSummaryIndex.cpp +++ b/lib/IR/ModuleSummaryIndex.cpp @@ -37,9 +37,11 @@ void ModuleSummaryIndex::mergeFrom(std::unique_ptr Other, // Add the module path string ref for this module if we haven't already // saved a reference to it. - if (ModPath.empty()) - ModPath = addModulePath(Info->summary()->modulePath(), NextModuleId); - else + if (ModPath.empty()) { + auto Path = Info->summary()->modulePath(); + ModPath = addModulePath(Path, NextModuleId, Other->getModuleHash(Path)) + ->first(); + } else assert(ModPath == Info->summary()->modulePath() && "Each module in the combined map should have a unique ID"); diff --git a/lib/Transforms/IPO/FunctionImport.cpp b/lib/Transforms/IPO/FunctionImport.cpp index 255a5582745..b5f1ffb046f 100644 --- a/lib/Transforms/IPO/FunctionImport.cpp +++ b/lib/Transforms/IPO/FunctionImport.cpp @@ -61,7 +61,7 @@ static std::unique_ptr loadFile(const std::string &FileName, /* ShouldLazyLoadMetadata = */ true); if (!Result) { Err.print("function-import", errs()); - return nullptr; + report_fatal_error("Abort"); } return Result; diff --git a/test/Bitcode/Inputs/module_hash.ll b/test/Bitcode/Inputs/module_hash.ll new file mode 100644 index 00000000000..1d422c6e817 --- /dev/null +++ b/test/Bitcode/Inputs/module_hash.ll @@ -0,0 +1,4 @@ +; Needs a function for the combined index to be populated +define void @bar() { + ret void +} diff --git a/test/Bitcode/module_hash.ll b/test/Bitcode/module_hash.ll new file mode 100644 index 00000000000..fda6030a903 --- /dev/null +++ b/test/Bitcode/module_hash.ll @@ -0,0 +1,35 @@ +; Check per module hash. +; RUN: llvm-as -module-hash %s -o - | llvm-bcanalyzer -dump | FileCheck %s --check-prefix=MOD1 +; MOD1: +; RUN: llvm-as -module-hash %p/Inputs/module_hash.ll -o - | llvm-bcanalyzer -dump | FileCheck %s --check-prefix=MOD2 +; MOD2: + +; Check that the hash matches in the combined index. + +; First regenerate the modules with a summary +; RUN: llvm-as -module-hash -module-summary %s -o %t.m1.bc +; RUN: llvm-as -module-hash -module-summary %p/Inputs/module_hash.ll -o %t.m2.bc + +; Recover the hashes from the modules themselves. +; RUN: llvm-bcanalyzer -dump %t.m1.bc | grep ' %t.hash +; RUN: llvm-bcanalyzer -dump %t.m2.bc | grep '> %t.hash + +; Generate the combined index and gather the hashes there. +; RUN: llvm-lto --thinlto-action=thinlink -o - %t.m1.bc %t.m2.bc | llvm-bcanalyzer -dump | grep '> %t.hash + +; Validate the output now, the hahes in the individual modules and the combined index are in the same file. +; RUN: cat %t.hash | FileCheck %s --check-prefix=COMBINED + +; First capture the value of the hash for the two modules. +; COMBINED: +; COMBINED: + +; Validate against the value extracted from the combined index +; COMBINED-DAG: +; COMBINED-DAG: + + +; Need a function for the combined index to be populated. +define void @foo() { + ret void +} diff --git a/tools/llvm-as/llvm-as.cpp b/tools/llvm-as/llvm-as.cpp index 7318bfe341d..7e9500a6672 100644 --- a/tools/llvm-as/llvm-as.cpp +++ b/tools/llvm-as/llvm-as.cpp @@ -48,6 +48,9 @@ static cl::opt EmitSummaryIndex("module-summary", cl::desc("Emit module summary index"), cl::init(false)); +static cl::opt EmitModuleHash("module-hash", cl::desc("Emit module hash"), + cl::init(false)); + static cl::opt DumpAsm("d", cl::desc("Print assembly as parsed"), cl::Hidden); @@ -82,7 +85,7 @@ static void WriteOutputFile(const Module *M) { if (Force || !CheckBitcodeOutputToConsole(Out->os(), true)) WriteBitcodeToFile(M, Out->os(), PreserveBitcodeUseListOrder, - EmitSummaryIndex); + EmitSummaryIndex, EmitModuleHash); // Declare success. Out->keep(); diff --git a/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp b/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp index d1cc1a02778..3c23103d70b 100644 --- a/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp +++ b/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp @@ -29,6 +29,7 @@ #include "llvm/Bitcode/BitstreamReader.h" #include "llvm/ADT/Optional.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/Bitcode/LLVMBitCodes.h" #include "llvm/Bitcode/ReaderWriter.h" #include "llvm/IR/Verifier.h" @@ -38,8 +39,10 @@ #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/PrettyStackTrace.h" #include "llvm/Support/Signals.h" +#include "llvm/Support/SHA1.h" #include "llvm/Support/raw_ostream.h" #include +#include #include #include #include @@ -174,6 +177,7 @@ static const char *GetCodeName(unsigned CodeID, unsigned BlockID, STRINGIFY_CODE(MODULE_CODE, VSTOFFSET) STRINGIFY_CODE(MODULE_CODE, METADATA_VALUES_UNUSED) STRINGIFY_CODE(MODULE_CODE, SOURCE_FILENAME) + STRINGIFY_CODE(MODULE_CODE, HASH) } case bitc::IDENTIFICATION_BLOCK_ID: switch (CodeID) { @@ -292,6 +296,7 @@ static const char *GetCodeName(unsigned CodeID, unsigned BlockID, default: return nullptr; STRINGIFY_CODE(MST_CODE, ENTRY) + STRINGIFY_CODE(MST_CODE, HASH) } case bitc::GLOBALVAL_SUMMARY_BLOCK_ID: switch (CodeID) { @@ -481,6 +486,9 @@ static bool ParseBlock(BitstreamCursor &Stream, unsigned BlockID, if (Stream.EnterSubBlock(BlockID, &NumWords)) return Error("Malformed block record"); + // Keep it for later, when we see a MODULE_HASH record + uint64_t BlockEntryPos = Stream.getCurrentByteNo(); + const char *BlockName = nullptr; if (DumpRecords) { outs() << Indent << "<"; @@ -552,6 +560,7 @@ static bool ParseBlock(BitstreamCursor &Stream, unsigned BlockID, ++BlockStats.NumRecords; StringRef Blob; + unsigned CurrentRecordPos = Stream.getCurrentByteNo(); unsigned Code = Stream.readRecord(Entry.ID, Record, &Blob); // Increment the # occurrences of this code. @@ -586,6 +595,37 @@ static bool ParseBlock(BitstreamCursor &Stream, unsigned BlockID, for (unsigned i = 0, e = Record.size(); i != e; ++i) outs() << " op" << i << "=" << (int64_t)Record[i]; + // If we found a module hash, let's verify that it matches! + if (BlockID == bitc::MODULE_BLOCK_ID && Code == bitc::MODULE_CODE_HASH) { + if (Record.size() != 5) + outs() << " (invalid)"; + else { + // Recompute the hash and compare it to the one in the bitcode + SHA1 Hasher; + StringRef Hash; + { + int BlockSize = CurrentRecordPos - BlockEntryPos; + auto Ptr = Stream.getPointerToByte(BlockEntryPos, BlockSize); + Hasher.update(ArrayRef(Ptr, BlockSize)); + Hash = Hasher.result(); + } + SmallString<20> RecordedHash; + RecordedHash.resize(20); + int Pos = 0; + for (auto &Val : Record) { + assert(!(Val >> 32) && "Unexpected high bits set"); + RecordedHash[Pos++] = (Val >> 24) & 0xFF; + RecordedHash[Pos++] = (Val >> 16) & 0xFF; + RecordedHash[Pos++] = (Val >> 8) & 0xFF; + RecordedHash[Pos++] = (Val >> 0) & 0xFF; + } + if (Hash == RecordedHash) + outs() << " (match)"; + else + outs() << " (!mismatch!)"; + } + } + outs() << "/>"; if (Abbv) {