Add a module Hash in the bitcode and the combined index, implementing a kind of "build-id"

This is intended to be used for ThinLTO incremental build.

Differential Revision: http://reviews.llvm.org/D18213

This is a recommit of r265095 after fixing the Windows issues.

From: Mehdi Amini <mehdi.amini@apple.com>

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@265111 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Mehdi Amini 2016-04-01 05:33:11 +00:00
parent d4b1021e3e
commit d2f4701e4a
12 changed files with 236 additions and 38 deletions

View File

@ -446,6 +446,8 @@ public:
using SimpleBitstreamCursor::canSkipToPos; using SimpleBitstreamCursor::canSkipToPos;
using SimpleBitstreamCursor::AtEndOfStream; using SimpleBitstreamCursor::AtEndOfStream;
using SimpleBitstreamCursor::GetCurrentBitNo; using SimpleBitstreamCursor::GetCurrentBitNo;
using SimpleBitstreamCursor::getCurrentByteNo;
using SimpleBitstreamCursor::getPointerToByte;
using SimpleBitstreamCursor::getBitStreamReader; using SimpleBitstreamCursor::getBitStreamReader;
using SimpleBitstreamCursor::JumpToBit; using SimpleBitstreamCursor::JumpToBit;
using SimpleBitstreamCursor::fillCurWord; using SimpleBitstreamCursor::fillCurWord;

View File

@ -107,6 +107,9 @@ enum ModuleCodes {
// SOURCE_FILENAME: [namechar x N] // SOURCE_FILENAME: [namechar x N]
MODULE_CODE_SOURCE_FILENAME = 16, MODULE_CODE_SOURCE_FILENAME = 16,
// HASH: [5*i32]
MODULE_CODE_HASH = 17,
}; };
/// PARAMATTR blocks have code for defining a parameter attribute set. /// PARAMATTR blocks have code for defining a parameter attribute set.
@ -183,6 +186,7 @@ enum ValueSymtabCodes {
// The module path symbol table only has one code (MST_CODE_ENTRY). // The module path symbol table only has one code (MST_CODE_ENTRY).
enum ModulePathSymtabCodes { enum ModulePathSymtabCodes {
MST_CODE_ENTRY = 1, // MST_ENTRY: [modid, namechar x N] MST_CODE_ENTRY = 1, // MST_ENTRY: [modid, namechar x N]
MST_CODE_HASH = 2, // MST_HASH: [5*i32]
}; };
// The summary section uses different codes in the per-module // The summary section uses different codes in the per-module

View File

@ -107,7 +107,8 @@ namespace llvm {
/// for use in ThinLTO optimization). /// for use in ThinLTO optimization).
void WriteBitcodeToFile(const Module *M, raw_ostream &Out, void WriteBitcodeToFile(const Module *M, raw_ostream &Out,
bool ShouldPreserveUseListOrder = false, bool ShouldPreserveUseListOrder = false,
bool EmitSummaryIndex = false); bool EmitSummaryIndex = false,
bool GenerateHash = false);
/// Write the specified module summary index to the given raw output stream, /// Write the specified module summary index to the given raw output stream,
/// where it will be written in a new bitcode block. This is used when /// where it will be written in a new bitcode block. This is used when

View File

@ -25,6 +25,8 @@
#include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/raw_ostream.h" #include "llvm/Support/raw_ostream.h"
#include <array>
namespace llvm { namespace llvm {
/// \brief Class to accumulate and hold information about a callee. /// \brief Class to accumulate and hold information about a callee.
@ -228,6 +230,9 @@ public:
void setBitcodeIndex(uint64_t Offset) { BitcodeIndex = Offset; } void setBitcodeIndex(uint64_t Offset) { BitcodeIndex = Offset; }
}; };
/// 160 bits SHA1
typedef std::array<uint32_t, 5> ModuleHash;
/// List of global value info structures for a particular value held /// List of global value info structures for a particular value held
/// in the GlobalValueMap. Requires a vector in the case of multiple /// in the GlobalValueMap. Requires a vector in the case of multiple
/// COMDAT values of the same name. /// COMDAT values of the same name.
@ -245,9 +250,9 @@ typedef GlobalValueInfoMapTy::const_iterator const_globalvalueinfo_iterator;
typedef GlobalValueInfoMapTy::iterator globalvalueinfo_iterator; typedef GlobalValueInfoMapTy::iterator globalvalueinfo_iterator;
/// String table to hold/own module path strings, which additionally holds the /// String table to hold/own module path strings, which additionally holds the
/// module ID assigned to each module during the plugin step. The StringMap /// module ID assigned to each module during the plugin step, as well as a hash
/// makes a copy of and owns inserted strings. /// of the module. The StringMap makes a copy of and owns inserted strings.
typedef StringMap<uint64_t> ModulePathStringTableTy; typedef StringMap<std::pair<uint64_t, ModuleHash>> ModulePathStringTableTy;
/// Class to hold module path string table and global value map, /// Class to hold module path string table and global value map,
/// and encapsulate methods for operating on them. /// and encapsulate methods for operating on them.
@ -304,17 +309,26 @@ public:
GlobalValueMap[ValueGUID].push_back(std::move(Info)); GlobalValueMap[ValueGUID].push_back(std::move(Info));
} }
/// Table of modules, containing an id. /// Table of modules, containing module hash and id.
const StringMap<uint64_t> &modulePaths() const { const StringMap<std::pair<uint64_t, ModuleHash>> &modulePaths() const {
return ModulePathStringTable; return ModulePathStringTable;
} }
/// Table of modules, containing an id. /// Table of modules, containing hash and id.
StringMap<uint64_t> &modulePaths() { return ModulePathStringTable; } StringMap<std::pair<uint64_t, ModuleHash>> &modulePaths() {
return ModulePathStringTable;
}
/// Get the module ID recorded for the given module path. /// Get the module ID recorded for the given module path.
uint64_t getModuleId(const StringRef ModPath) const { uint64_t getModuleId(const StringRef ModPath) const {
return ModulePathStringTable.lookup(ModPath); return ModulePathStringTable.lookup(ModPath).first;
}
/// Get the module SHA1 hash recorded for the given module path.
const ModuleHash &getModuleHash(const StringRef ModPath) const {
auto It = ModulePathStringTable.find(ModPath);
assert(It != ModulePathStringTable.end() && "Module not registered");
return It->second.second;
} }
/// Add the given per-module index into this module index/summary, /// Add the given per-module index into this module index/summary,
@ -333,11 +347,14 @@ public:
return NewName.str(); return NewName.str();
} }
/// Add a new module path, mapped to the given module Id, and return StringRef /// Add a new module path with the given \p Hash, mapped to the given \p
/// owned by string table map. /// ModID, and return an iterator to the entry in the index.
StringRef addModulePath(StringRef ModPath, uint64_t ModId) { ModulePathStringTableTy::iterator
return ModulePathStringTable.insert(std::make_pair(ModPath, ModId)) addModulePath(StringRef ModPath, uint64_t ModId,
.first->first(); ModuleHash Hash = ModuleHash{{0}}) {
return ModulePathStringTable.insert(std::make_pair(
ModPath,
std::make_pair(ModId, Hash))).first;
} }
/// Check if the given Module has any functions available for exporting /// Check if the given Module has any functions available for exporting

View File

@ -5632,11 +5632,7 @@ std::error_code ModuleSummaryIndexBitcodeReader::parseModule() {
} }
continue; continue;
case BitstreamEntry::Record: case BitstreamEntry::Record: {
// Once we find the last record of interest, skip the rest.
if (VSTOffset > 0)
Stream.skipRecord(Entry.ID);
else {
Record.clear(); Record.clear();
auto BitCode = Stream.readRecord(Entry.ID, Record); auto BitCode = Stream.readRecord(Entry.ID, Record);
switch (BitCode) { switch (BitCode) {
@ -5650,6 +5646,25 @@ std::error_code ModuleSummaryIndexBitcodeReader::parseModule() {
SourceFileName = ValueName.c_str(); SourceFileName = ValueName.c_str();
break; break;
} }
/// MODULE_CODE_HASH: [5*i32]
case bitc::MODULE_CODE_HASH: {
if (Record.size() != 5)
return error("Invalid hash length " + Twine(Record.size()).str());
if (!TheIndex)
break;
if (TheIndex->modulePaths().empty())
// Does not have any summary emitted.
break;
if (TheIndex->modulePaths().size() != 1)
return error("Don't expect multiple modules defined?");
auto &Hash = TheIndex->modulePaths().begin()->second.second;
int Pos = 0;
for (auto &Val : Record) {
assert(!(Val >> 32) && "Unexpected high bits set");
Hash[Pos++] = Val;
}
break;
}
/// MODULE_CODE_VSTOFFSET: [offset] /// MODULE_CODE_VSTOFFSET: [offset]
case bitc::MODULE_CODE_VSTOFFSET: case bitc::MODULE_CODE_VSTOFFSET:
if (Record.size() < 1) if (Record.size() < 1)
@ -5761,7 +5776,7 @@ std::error_code ModuleSummaryIndexBitcodeReader::parseEntireSummary() {
// module path string table entry with an empty (0) ID to take // module path string table entry with an empty (0) ID to take
// ownership. // ownership.
FS->setModulePath( FS->setModulePath(
TheIndex->addModulePath(Buffer->getBufferIdentifier(), 0)); TheIndex->addModulePath(Buffer->getBufferIdentifier(), 0)->first());
static int RefListStartIndex = 4; static int RefListStartIndex = 4;
int CallGraphEdgeStartIndex = RefListStartIndex + NumRefs; int CallGraphEdgeStartIndex = RefListStartIndex + NumRefs;
assert(Record.size() >= RefListStartIndex + NumRefs && assert(Record.size() >= RefListStartIndex + NumRefs &&
@ -5799,7 +5814,7 @@ std::error_code ModuleSummaryIndexBitcodeReader::parseEntireSummary() {
std::unique_ptr<GlobalVarSummary> FS = std::unique_ptr<GlobalVarSummary> FS =
llvm::make_unique<GlobalVarSummary>(getDecodedLinkage(RawLinkage)); llvm::make_unique<GlobalVarSummary>(getDecodedLinkage(RawLinkage));
FS->setModulePath( FS->setModulePath(
TheIndex->addModulePath(Buffer->getBufferIdentifier(), 0)); TheIndex->addModulePath(Buffer->getBufferIdentifier(), 0)->first());
for (unsigned I = 2, E = Record.size(); I != E; ++I) { for (unsigned I = 2, E = Record.size(); I != E; ++I) {
unsigned RefValueId = Record[I]; unsigned RefValueId = Record[I];
uint64_t RefGUID = getGUIDFromValueId(RefValueId); uint64_t RefGUID = getGUIDFromValueId(RefValueId);
@ -5887,6 +5902,7 @@ std::error_code ModuleSummaryIndexBitcodeReader::parseModuleStringTable() {
SmallVector<uint64_t, 64> Record; SmallVector<uint64_t, 64> Record;
SmallString<128> ModulePath; SmallString<128> ModulePath;
ModulePathStringTableTy::iterator LastSeenModulePath;
while (1) { while (1) {
BitstreamEntry Entry = Stream.advanceSkippingSubblocks(); BitstreamEntry Entry = Stream.advanceSkippingSubblocks();
@ -5907,14 +5923,32 @@ std::error_code ModuleSummaryIndexBitcodeReader::parseModuleStringTable() {
break; break;
case bitc::MST_CODE_ENTRY: { case bitc::MST_CODE_ENTRY: {
// MST_ENTRY: [modid, namechar x N] // MST_ENTRY: [modid, namechar x N]
uint64_t ModuleId = Record[0];
if (convertToString(Record, 1, ModulePath)) if (convertToString(Record, 1, ModulePath))
return error("Invalid record"); return error("Invalid record");
uint64_t ModuleId = Record[0];
StringRef ModulePathInMap = TheIndex->addModulePath(ModulePath, ModuleId); LastSeenModulePath = TheIndex->addModulePath(ModulePath, ModuleId);
ModuleIdMap[ModuleId] = ModulePathInMap; ModuleIdMap[ModuleId] = LastSeenModulePath->first();
ModulePath.clear(); ModulePath.clear();
break; break;
} }
/// MST_CODE_HASH: [5*i32]
case bitc::MST_CODE_HASH: {
if (Record.size() != 5)
return error("Invalid hash length " + Twine(Record.size()).str());
if (LastSeenModulePath == TheIndex->modulePaths().end())
return error("Invalid hash that does not follow a module path");
int Pos = 0;
for (auto &Val : Record) {
assert(!(Val >> 32) && "Unexpected high bits set");
LastSeenModulePath->second.second[Pos++] = Val;
}
// Reset LastSeenModulePath to avoid overriding the hash unexpectedly.
LastSeenModulePath = TheIndex->modulePaths().end();
break;
}
} }
} }
llvm_unreachable("Exit infinite loop"); llvm_unreachable("Exit infinite loop");

View File

@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
#include "ValueEnumerator.h" #include "ValueEnumerator.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Triple.h" #include "llvm/ADT/Triple.h"
#include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/BlockFrequencyInfo.h"
@ -39,6 +40,7 @@
#include "llvm/Support/MathExtras.h" #include "llvm/Support/MathExtras.h"
#include "llvm/Support/Program.h" #include "llvm/Support/Program.h"
#include "llvm/Support/raw_ostream.h" #include "llvm/Support/raw_ostream.h"
#include "llvm/Support/SHA1.h"
#include <cctype> #include <cctype>
#include <map> #include <map>
using namespace llvm; using namespace llvm;
@ -2852,8 +2854,18 @@ static void WriteModStrings(const ModuleSummaryIndex &I,
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Char6)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Char6));
unsigned Abbrev6Bit = Stream.EmitAbbrev(Abbv); unsigned Abbrev6Bit = Stream.EmitAbbrev(Abbv);
SmallVector<unsigned, 64> NameVals; // Module Hash, 160 bits SHA1. Optionally, emitted after each MST_CODE_ENTRY.
for (const StringMapEntry<uint64_t> &MPSE : I.modulePaths()) { Abbv = new BitCodeAbbrev();
Abbv->Add(BitCodeAbbrevOp(bitc::MST_CODE_HASH));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32));
unsigned AbbrevHash = Stream.EmitAbbrev(Abbv);
SmallVector<unsigned, 64> Vals;
for (const auto &MPSE : I.modulePaths()) {
StringEncoding Bits = StringEncoding Bits =
getStringEncoding(MPSE.getKey().data(), MPSE.getKey().size()); getStringEncoding(MPSE.getKey().data(), MPSE.getKey().size());
unsigned AbbrevToUse = Abbrev8Bit; unsigned AbbrevToUse = Abbrev8Bit;
@ -2862,14 +2874,29 @@ static void WriteModStrings(const ModuleSummaryIndex &I,
else if (Bits == SE_Fixed7) else if (Bits == SE_Fixed7)
AbbrevToUse = Abbrev7Bit; AbbrevToUse = Abbrev7Bit;
NameVals.push_back(MPSE.getValue()); Vals.push_back(MPSE.getValue().first);
for (const auto P : MPSE.getKey()) for (const auto P : MPSE.getKey())
NameVals.push_back((unsigned char)P); Vals.push_back((unsigned char)P);
// Emit the finished record. // Emit the finished record.
Stream.EmitRecord(bitc::MST_CODE_ENTRY, NameVals, AbbrevToUse); Stream.EmitRecord(bitc::MST_CODE_ENTRY, Vals, AbbrevToUse);
NameVals.clear();
Vals.clear();
// Emit an optional hash for the module now
auto &Hash = MPSE.getValue().second;
bool AllZero = true; // Detect if the hash is empty, and do not generate it
for (auto Val : Hash) {
if (Val)
AllZero = false;
Vals.push_back(Val);
}
if (!AllZero) {
// Emit the hash record.
Stream.EmitRecord(bitc::MST_CODE_HASH, Vals, AbbrevHash);
}
Vals.clear();
} }
Stream.ExitBlock(); Stream.ExitBlock();
} }
@ -3177,11 +3204,36 @@ static void WriteIdentificationBlock(const Module *M, BitstreamWriter &Stream) {
Stream.ExitBlock(); Stream.ExitBlock();
} }
static void writeModuleHash(BitstreamWriter &Stream,
SmallVectorImpl<char> &Buffer,
size_t BlockStartPos) {
// Emit the module's hash.
// MODULE_CODE_HASH: [5*i32]
SHA1 Hasher;
Hasher.update(ArrayRef<uint8_t>((uint8_t *)&Buffer[BlockStartPos],
Buffer.size() - BlockStartPos));
auto Hash = Hasher.result();
SmallVector<uint64_t, 20> Vals;
auto LShift = [&](unsigned char Val, unsigned Amount)
-> uint64_t { return ((uint64_t)Val) << Amount; };
for (int Pos = 0; Pos < 20; Pos += 4) {
uint32_t SubHash = LShift(Hash[Pos + 0], 24);
SubHash |= LShift(Hash[Pos + 1], 16) | LShift(Hash[Pos + 2], 8) |
(unsigned)(unsigned char)Hash[Pos + 3];
Vals.push_back(SubHash);
}
// Emit the finished record.
Stream.EmitRecord(bitc::MODULE_CODE_HASH, Vals);
}
/// WriteModule - Emit the specified module to the bitstream. /// WriteModule - Emit the specified module to the bitstream.
static void WriteModule(const Module *M, BitstreamWriter &Stream, static void WriteModule(const Module *M, BitstreamWriter &Stream,
bool ShouldPreserveUseListOrder, bool ShouldPreserveUseListOrder,
uint64_t BitcodeStartBit, bool EmitSummaryIndex) { uint64_t BitcodeStartBit, bool EmitSummaryIndex,
bool GenerateHash, SmallVectorImpl<char> &Buffer) {
Stream.EnterSubblock(bitc::MODULE_BLOCK_ID, 3); Stream.EnterSubblock(bitc::MODULE_BLOCK_ID, 3);
size_t BlockStartPos = Buffer.size();
SmallVector<unsigned, 1> Vals; SmallVector<unsigned, 1> Vals;
unsigned CurVersion = 1; unsigned CurVersion = 1;
@ -3238,6 +3290,10 @@ static void WriteModule(const Module *M, BitstreamWriter &Stream,
WriteValueSymbolTable(M->getValueSymbolTable(), VE, Stream, WriteValueSymbolTable(M->getValueSymbolTable(), VE, Stream,
VSTOffsetPlaceholder, BitcodeStartBit, &FunctionIndex); VSTOffsetPlaceholder, BitcodeStartBit, &FunctionIndex);
if (GenerateHash) {
writeModuleHash(Stream, Buffer, BlockStartPos);
}
Stream.ExitBlock(); Stream.ExitBlock();
} }
@ -3322,7 +3378,7 @@ static void WriteBitcodeHeader(BitstreamWriter &Stream) {
/// stream. /// stream.
void llvm::WriteBitcodeToFile(const Module *M, raw_ostream &Out, void llvm::WriteBitcodeToFile(const Module *M, raw_ostream &Out,
bool ShouldPreserveUseListOrder, bool ShouldPreserveUseListOrder,
bool EmitSummaryIndex) { bool EmitSummaryIndex, bool GenerateHash) {
SmallVector<char, 0> Buffer; SmallVector<char, 0> Buffer;
Buffer.reserve(256*1024); Buffer.reserve(256*1024);
@ -3348,7 +3404,7 @@ void llvm::WriteBitcodeToFile(const Module *M, raw_ostream &Out,
// Emit the module. // Emit the module.
WriteModule(M, Stream, ShouldPreserveUseListOrder, BitcodeStartBit, WriteModule(M, Stream, ShouldPreserveUseListOrder, BitcodeStartBit,
EmitSummaryIndex); EmitSummaryIndex, GenerateHash, Buffer);
} }
if (TT.isOSDarwin() || TT.isOSBinFormatMachO()) if (TT.isOSDarwin() || TT.isOSBinFormatMachO())

View File

@ -37,9 +37,11 @@ void ModuleSummaryIndex::mergeFrom(std::unique_ptr<ModuleSummaryIndex> Other,
// Add the module path string ref for this module if we haven't already // Add the module path string ref for this module if we haven't already
// saved a reference to it. // saved a reference to it.
if (ModPath.empty()) if (ModPath.empty()) {
ModPath = addModulePath(Info->summary()->modulePath(), NextModuleId); auto Path = Info->summary()->modulePath();
else ModPath = addModulePath(Path, NextModuleId, Other->getModuleHash(Path))
->first();
} else
assert(ModPath == Info->summary()->modulePath() && assert(ModPath == Info->summary()->modulePath() &&
"Each module in the combined map should have a unique ID"); "Each module in the combined map should have a unique ID");

View File

@ -61,7 +61,7 @@ static std::unique_ptr<Module> loadFile(const std::string &FileName,
/* ShouldLazyLoadMetadata = */ true); /* ShouldLazyLoadMetadata = */ true);
if (!Result) { if (!Result) {
Err.print("function-import", errs()); Err.print("function-import", errs());
return nullptr; report_fatal_error("Abort");
} }
return Result; return Result;

View File

@ -0,0 +1,4 @@
; Needs a function for the combined index to be populated
define void @bar() {
ret void
}

View File

@ -0,0 +1,35 @@
; Check per module hash.
; RUN: llvm-as -module-hash %s -o - | llvm-bcanalyzer -dump | FileCheck %s --check-prefix=MOD1
; MOD1: <HASH op0={{[0-9]*}} op1={{[0-9]*}} op2={{[0-9]*}} op3={{[0-9]*}} op4={{[0-9]*}} (match)/>
; RUN: llvm-as -module-hash %p/Inputs/module_hash.ll -o - | llvm-bcanalyzer -dump | FileCheck %s --check-prefix=MOD2
; MOD2: <HASH op0={{[0-9]*}} op1={{[0-9]*}} op2={{[0-9]*}} op3={{[0-9]*}} op4={{[0-9]*}} (match)/>
; Check that the hash matches in the combined index.
; First regenerate the modules with a summary
; RUN: llvm-as -module-hash -module-summary %s -o %t.m1.bc
; RUN: llvm-as -module-hash -module-summary %p/Inputs/module_hash.ll -o %t.m2.bc
; Recover the hashes from the modules themselves.
; RUN: llvm-bcanalyzer -dump %t.m1.bc | grep '<HASH' > %t.hash
; RUN: llvm-bcanalyzer -dump %t.m2.bc | grep '<HASH' >> %t.hash
; Generate the combined index and gather the hashes there.
; RUN: llvm-lto --thinlto-action=thinlink -o - %t.m1.bc %t.m2.bc | llvm-bcanalyzer -dump | grep '<HASH ' >> %t.hash
; Validate the output now, the hahes in the individual modules and the combined index are in the same file.
; RUN: cat %t.hash | FileCheck %s --check-prefix=COMBINED
; First capture the value of the hash for the two modules.
; COMBINED: <HASH op0=[[HASH1_1:[0-9]*]] op1=[[HASH1_2:[0-9]*]] op2=[[HASH1_3:[0-9]*]] op3=[[HASH1_4:[0-9]*]] op4=[[HASH1_5:[0-9]*]] (match)/>
; COMBINED: <HASH op0=[[HASH2_1:[0-9]*]] op1=[[HASH2_2:[0-9]*]] op2=[[HASH2_3:[0-9]*]] op3=[[HASH2_4:[0-9]*]] op4=[[HASH2_5:[0-9]*]] (match)/>
; Validate against the value extracted from the combined index
; COMBINED-DAG: <HASH abbrevid={{[0-9]*}} op0=[[HASH1_1]] op1=[[HASH1_2]] op2=[[HASH1_3]] op3=[[HASH1_4]] op4=[[HASH1_5]]/>
; COMBINED-DAG: <HASH abbrevid={{[0-9]*}} op0=[[HASH2_1]] op1=[[HASH2_2]] op2=[[HASH2_3]] op3=[[HASH2_4]] op4=[[HASH2_5]]/>
; Need a function for the combined index to be populated.
define void @foo() {
ret void
}

View File

@ -48,6 +48,9 @@ static cl::opt<bool> EmitSummaryIndex("module-summary",
cl::desc("Emit module summary index"), cl::desc("Emit module summary index"),
cl::init(false)); cl::init(false));
static cl::opt<bool> EmitModuleHash("module-hash", cl::desc("Emit module hash"),
cl::init(false));
static cl::opt<bool> static cl::opt<bool>
DumpAsm("d", cl::desc("Print assembly as parsed"), cl::Hidden); DumpAsm("d", cl::desc("Print assembly as parsed"), cl::Hidden);
@ -82,7 +85,7 @@ static void WriteOutputFile(const Module *M) {
if (Force || !CheckBitcodeOutputToConsole(Out->os(), true)) if (Force || !CheckBitcodeOutputToConsole(Out->os(), true))
WriteBitcodeToFile(M, Out->os(), PreserveBitcodeUseListOrder, WriteBitcodeToFile(M, Out->os(), PreserveBitcodeUseListOrder,
EmitSummaryIndex); EmitSummaryIndex, EmitModuleHash);
// Declare success. // Declare success.
Out->keep(); Out->keep();

View File

@ -29,6 +29,7 @@
#include "llvm/Bitcode/BitstreamReader.h" #include "llvm/Bitcode/BitstreamReader.h"
#include "llvm/ADT/Optional.h" #include "llvm/ADT/Optional.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Bitcode/LLVMBitCodes.h" #include "llvm/Bitcode/LLVMBitCodes.h"
#include "llvm/Bitcode/ReaderWriter.h" #include "llvm/Bitcode/ReaderWriter.h"
#include "llvm/IR/Verifier.h" #include "llvm/IR/Verifier.h"
@ -38,8 +39,10 @@
#include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/PrettyStackTrace.h" #include "llvm/Support/PrettyStackTrace.h"
#include "llvm/Support/Signals.h" #include "llvm/Support/Signals.h"
#include "llvm/Support/SHA1.h"
#include "llvm/Support/raw_ostream.h" #include "llvm/Support/raw_ostream.h"
#include <algorithm> #include <algorithm>
#include <array>
#include <cctype> #include <cctype>
#include <map> #include <map>
#include <system_error> #include <system_error>
@ -174,6 +177,7 @@ static const char *GetCodeName(unsigned CodeID, unsigned BlockID,
STRINGIFY_CODE(MODULE_CODE, VSTOFFSET) STRINGIFY_CODE(MODULE_CODE, VSTOFFSET)
STRINGIFY_CODE(MODULE_CODE, METADATA_VALUES_UNUSED) STRINGIFY_CODE(MODULE_CODE, METADATA_VALUES_UNUSED)
STRINGIFY_CODE(MODULE_CODE, SOURCE_FILENAME) STRINGIFY_CODE(MODULE_CODE, SOURCE_FILENAME)
STRINGIFY_CODE(MODULE_CODE, HASH)
} }
case bitc::IDENTIFICATION_BLOCK_ID: case bitc::IDENTIFICATION_BLOCK_ID:
switch (CodeID) { switch (CodeID) {
@ -292,6 +296,7 @@ static const char *GetCodeName(unsigned CodeID, unsigned BlockID,
default: default:
return nullptr; return nullptr;
STRINGIFY_CODE(MST_CODE, ENTRY) STRINGIFY_CODE(MST_CODE, ENTRY)
STRINGIFY_CODE(MST_CODE, HASH)
} }
case bitc::GLOBALVAL_SUMMARY_BLOCK_ID: case bitc::GLOBALVAL_SUMMARY_BLOCK_ID:
switch (CodeID) { switch (CodeID) {
@ -481,6 +486,9 @@ static bool ParseBlock(BitstreamCursor &Stream, unsigned BlockID,
if (Stream.EnterSubBlock(BlockID, &NumWords)) if (Stream.EnterSubBlock(BlockID, &NumWords))
return Error("Malformed block record"); return Error("Malformed block record");
// Keep it for later, when we see a MODULE_HASH record
uint64_t BlockEntryPos = Stream.getCurrentByteNo();
const char *BlockName = nullptr; const char *BlockName = nullptr;
if (DumpRecords) { if (DumpRecords) {
outs() << Indent << "<"; outs() << Indent << "<";
@ -552,6 +560,7 @@ static bool ParseBlock(BitstreamCursor &Stream, unsigned BlockID,
++BlockStats.NumRecords; ++BlockStats.NumRecords;
StringRef Blob; StringRef Blob;
unsigned CurrentRecordPos = Stream.getCurrentByteNo();
unsigned Code = Stream.readRecord(Entry.ID, Record, &Blob); unsigned Code = Stream.readRecord(Entry.ID, Record, &Blob);
// Increment the # occurrences of this code. // Increment the # occurrences of this code.
@ -586,6 +595,37 @@ static bool ParseBlock(BitstreamCursor &Stream, unsigned BlockID,
for (unsigned i = 0, e = Record.size(); i != e; ++i) for (unsigned i = 0, e = Record.size(); i != e; ++i)
outs() << " op" << i << "=" << (int64_t)Record[i]; outs() << " op" << i << "=" << (int64_t)Record[i];
// If we found a module hash, let's verify that it matches!
if (BlockID == bitc::MODULE_BLOCK_ID && Code == bitc::MODULE_CODE_HASH) {
if (Record.size() != 5)
outs() << " (invalid)";
else {
// Recompute the hash and compare it to the one in the bitcode
SHA1 Hasher;
StringRef Hash;
{
int BlockSize = CurrentRecordPos - BlockEntryPos;
auto Ptr = Stream.getPointerToByte(BlockEntryPos, BlockSize);
Hasher.update(ArrayRef<uint8_t>(Ptr, BlockSize));
Hash = Hasher.result();
}
SmallString<20> RecordedHash;
RecordedHash.resize(20);
int Pos = 0;
for (auto &Val : Record) {
assert(!(Val >> 32) && "Unexpected high bits set");
RecordedHash[Pos++] = (Val >> 24) & 0xFF;
RecordedHash[Pos++] = (Val >> 16) & 0xFF;
RecordedHash[Pos++] = (Val >> 8) & 0xFF;
RecordedHash[Pos++] = (Val >> 0) & 0xFF;
}
if (Hash == RecordedHash)
outs() << " (match)";
else
outs() << " (!mismatch!)";
}
}
outs() << "/>"; outs() << "/>";
if (Abbv) { if (Abbv) {