Add a module Hash in the bitcode and the combined index, implementing a kind of "build-id"

This is intended to be used for ThinLTO incremental build.

Differential Revision: http://reviews.llvm.org/D18213

This is a recommit of r265095 after fixing the Windows issues.

From: Mehdi Amini <mehdi.amini@apple.com>

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@265111 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Mehdi Amini 2016-04-01 05:33:11 +00:00
parent d4b1021e3e
commit d2f4701e4a
12 changed files with 236 additions and 38 deletions

View File

@ -446,6 +446,8 @@ public:
using SimpleBitstreamCursor::canSkipToPos;
using SimpleBitstreamCursor::AtEndOfStream;
using SimpleBitstreamCursor::GetCurrentBitNo;
using SimpleBitstreamCursor::getCurrentByteNo;
using SimpleBitstreamCursor::getPointerToByte;
using SimpleBitstreamCursor::getBitStreamReader;
using SimpleBitstreamCursor::JumpToBit;
using SimpleBitstreamCursor::fillCurWord;

View File

@ -107,6 +107,9 @@ enum ModuleCodes {
// SOURCE_FILENAME: [namechar x N]
MODULE_CODE_SOURCE_FILENAME = 16,
// HASH: [5*i32]
MODULE_CODE_HASH = 17,
};
/// PARAMATTR blocks have code for defining a parameter attribute set.
@ -183,6 +186,7 @@ enum ValueSymtabCodes {
// The module path symbol table only has one code (MST_CODE_ENTRY).
enum ModulePathSymtabCodes {
MST_CODE_ENTRY = 1, // MST_ENTRY: [modid, namechar x N]
MST_CODE_HASH = 2, // MST_HASH: [5*i32]
};
// The summary section uses different codes in the per-module

View File

@ -107,7 +107,8 @@ namespace llvm {
/// for use in ThinLTO optimization).
void WriteBitcodeToFile(const Module *M, raw_ostream &Out,
bool ShouldPreserveUseListOrder = false,
bool EmitSummaryIndex = false);
bool EmitSummaryIndex = false,
bool GenerateHash = false);
/// Write the specified module summary index to the given raw output stream,
/// where it will be written in a new bitcode block. This is used when

View File

@ -25,6 +25,8 @@
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/raw_ostream.h"
#include <array>
namespace llvm {
/// \brief Class to accumulate and hold information about a callee.
@ -228,6 +230,9 @@ public:
void setBitcodeIndex(uint64_t Offset) { BitcodeIndex = Offset; }
};
/// 160 bits SHA1
typedef std::array<uint32_t, 5> ModuleHash;
/// List of global value info structures for a particular value held
/// in the GlobalValueMap. Requires a vector in the case of multiple
/// COMDAT values of the same name.
@ -245,9 +250,9 @@ typedef GlobalValueInfoMapTy::const_iterator const_globalvalueinfo_iterator;
typedef GlobalValueInfoMapTy::iterator globalvalueinfo_iterator;
/// String table to hold/own module path strings, which additionally holds the
/// module ID assigned to each module during the plugin step. The StringMap
/// makes a copy of and owns inserted strings.
typedef StringMap<uint64_t> ModulePathStringTableTy;
/// module ID assigned to each module during the plugin step, as well as a hash
/// of the module. The StringMap makes a copy of and owns inserted strings.
typedef StringMap<std::pair<uint64_t, ModuleHash>> ModulePathStringTableTy;
/// Class to hold module path string table and global value map,
/// and encapsulate methods for operating on them.
@ -304,17 +309,26 @@ public:
GlobalValueMap[ValueGUID].push_back(std::move(Info));
}
/// Table of modules, containing an id.
const StringMap<uint64_t> &modulePaths() const {
/// Table of modules, containing module hash and id.
const StringMap<std::pair<uint64_t, ModuleHash>> &modulePaths() const {
return ModulePathStringTable;
}
/// Table of modules, containing an id.
StringMap<uint64_t> &modulePaths() { return ModulePathStringTable; }
/// Table of modules, containing hash and id.
StringMap<std::pair<uint64_t, ModuleHash>> &modulePaths() {
return ModulePathStringTable;
}
/// Get the module ID recorded for the given module path.
uint64_t getModuleId(const StringRef ModPath) const {
return ModulePathStringTable.lookup(ModPath);
return ModulePathStringTable.lookup(ModPath).first;
}
/// Get the module SHA1 hash recorded for the given module path.
const ModuleHash &getModuleHash(const StringRef ModPath) const {
auto It = ModulePathStringTable.find(ModPath);
assert(It != ModulePathStringTable.end() && "Module not registered");
return It->second.second;
}
/// Add the given per-module index into this module index/summary,
@ -333,11 +347,14 @@ public:
return NewName.str();
}
/// Add a new module path, mapped to the given module Id, and return StringRef
/// owned by string table map.
StringRef addModulePath(StringRef ModPath, uint64_t ModId) {
return ModulePathStringTable.insert(std::make_pair(ModPath, ModId))
.first->first();
/// Add a new module path with the given \p Hash, mapped to the given \p
/// ModID, and return an iterator to the entry in the index.
ModulePathStringTableTy::iterator
addModulePath(StringRef ModPath, uint64_t ModId,
ModuleHash Hash = ModuleHash{{0}}) {
return ModulePathStringTable.insert(std::make_pair(
ModPath,
std::make_pair(ModId, Hash))).first;
}
/// Check if the given Module has any functions available for exporting

View File

@ -5632,11 +5632,7 @@ std::error_code ModuleSummaryIndexBitcodeReader::parseModule() {
}
continue;
case BitstreamEntry::Record:
// Once we find the last record of interest, skip the rest.
if (VSTOffset > 0)
Stream.skipRecord(Entry.ID);
else {
case BitstreamEntry::Record: {
Record.clear();
auto BitCode = Stream.readRecord(Entry.ID, Record);
switch (BitCode) {
@ -5650,6 +5646,25 @@ std::error_code ModuleSummaryIndexBitcodeReader::parseModule() {
SourceFileName = ValueName.c_str();
break;
}
/// MODULE_CODE_HASH: [5*i32]
case bitc::MODULE_CODE_HASH: {
if (Record.size() != 5)
return error("Invalid hash length " + Twine(Record.size()).str());
if (!TheIndex)
break;
if (TheIndex->modulePaths().empty())
// Does not have any summary emitted.
break;
if (TheIndex->modulePaths().size() != 1)
return error("Don't expect multiple modules defined?");
auto &Hash = TheIndex->modulePaths().begin()->second.second;
int Pos = 0;
for (auto &Val : Record) {
assert(!(Val >> 32) && "Unexpected high bits set");
Hash[Pos++] = Val;
}
break;
}
/// MODULE_CODE_VSTOFFSET: [offset]
case bitc::MODULE_CODE_VSTOFFSET:
if (Record.size() < 1)
@ -5761,7 +5776,7 @@ std::error_code ModuleSummaryIndexBitcodeReader::parseEntireSummary() {
// module path string table entry with an empty (0) ID to take
// ownership.
FS->setModulePath(
TheIndex->addModulePath(Buffer->getBufferIdentifier(), 0));
TheIndex->addModulePath(Buffer->getBufferIdentifier(), 0)->first());
static int RefListStartIndex = 4;
int CallGraphEdgeStartIndex = RefListStartIndex + NumRefs;
assert(Record.size() >= RefListStartIndex + NumRefs &&
@ -5799,7 +5814,7 @@ std::error_code ModuleSummaryIndexBitcodeReader::parseEntireSummary() {
std::unique_ptr<GlobalVarSummary> FS =
llvm::make_unique<GlobalVarSummary>(getDecodedLinkage(RawLinkage));
FS->setModulePath(
TheIndex->addModulePath(Buffer->getBufferIdentifier(), 0));
TheIndex->addModulePath(Buffer->getBufferIdentifier(), 0)->first());
for (unsigned I = 2, E = Record.size(); I != E; ++I) {
unsigned RefValueId = Record[I];
uint64_t RefGUID = getGUIDFromValueId(RefValueId);
@ -5887,6 +5902,7 @@ std::error_code ModuleSummaryIndexBitcodeReader::parseModuleStringTable() {
SmallVector<uint64_t, 64> Record;
SmallString<128> ModulePath;
ModulePathStringTableTy::iterator LastSeenModulePath;
while (1) {
BitstreamEntry Entry = Stream.advanceSkippingSubblocks();
@ -5907,14 +5923,32 @@ std::error_code ModuleSummaryIndexBitcodeReader::parseModuleStringTable() {
break;
case bitc::MST_CODE_ENTRY: {
// MST_ENTRY: [modid, namechar x N]
uint64_t ModuleId = Record[0];
if (convertToString(Record, 1, ModulePath))
return error("Invalid record");
uint64_t ModuleId = Record[0];
StringRef ModulePathInMap = TheIndex->addModulePath(ModulePath, ModuleId);
ModuleIdMap[ModuleId] = ModulePathInMap;
LastSeenModulePath = TheIndex->addModulePath(ModulePath, ModuleId);
ModuleIdMap[ModuleId] = LastSeenModulePath->first();
ModulePath.clear();
break;
}
/// MST_CODE_HASH: [5*i32]
case bitc::MST_CODE_HASH: {
if (Record.size() != 5)
return error("Invalid hash length " + Twine(Record.size()).str());
if (LastSeenModulePath == TheIndex->modulePaths().end())
return error("Invalid hash that does not follow a module path");
int Pos = 0;
for (auto &Val : Record) {
assert(!(Val >> 32) && "Unexpected high bits set");
LastSeenModulePath->second.second[Pos++] = Val;
}
// Reset LastSeenModulePath to avoid overriding the hash unexpectedly.
LastSeenModulePath = TheIndex->modulePaths().end();
break;
}
}
}
llvm_unreachable("Exit infinite loop");

View File

@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "ValueEnumerator.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Triple.h"
#include "llvm/Analysis/BlockFrequencyInfo.h"
@ -39,6 +40,7 @@
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/Program.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/SHA1.h"
#include <cctype>
#include <map>
using namespace llvm;
@ -2852,8 +2854,18 @@ static void WriteModStrings(const ModuleSummaryIndex &I,
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Char6));
unsigned Abbrev6Bit = Stream.EmitAbbrev(Abbv);
SmallVector<unsigned, 64> NameVals;
for (const StringMapEntry<uint64_t> &MPSE : I.modulePaths()) {
// Module Hash, 160 bits SHA1. Optionally, emitted after each MST_CODE_ENTRY.
Abbv = new BitCodeAbbrev();
Abbv->Add(BitCodeAbbrevOp(bitc::MST_CODE_HASH));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32));
unsigned AbbrevHash = Stream.EmitAbbrev(Abbv);
SmallVector<unsigned, 64> Vals;
for (const auto &MPSE : I.modulePaths()) {
StringEncoding Bits =
getStringEncoding(MPSE.getKey().data(), MPSE.getKey().size());
unsigned AbbrevToUse = Abbrev8Bit;
@ -2862,14 +2874,29 @@ static void WriteModStrings(const ModuleSummaryIndex &I,
else if (Bits == SE_Fixed7)
AbbrevToUse = Abbrev7Bit;
NameVals.push_back(MPSE.getValue());
Vals.push_back(MPSE.getValue().first);
for (const auto P : MPSE.getKey())
NameVals.push_back((unsigned char)P);
Vals.push_back((unsigned char)P);
// Emit the finished record.
Stream.EmitRecord(bitc::MST_CODE_ENTRY, NameVals, AbbrevToUse);
NameVals.clear();
Stream.EmitRecord(bitc::MST_CODE_ENTRY, Vals, AbbrevToUse);
Vals.clear();
// Emit an optional hash for the module now
auto &Hash = MPSE.getValue().second;
bool AllZero = true; // Detect if the hash is empty, and do not generate it
for (auto Val : Hash) {
if (Val)
AllZero = false;
Vals.push_back(Val);
}
if (!AllZero) {
// Emit the hash record.
Stream.EmitRecord(bitc::MST_CODE_HASH, Vals, AbbrevHash);
}
Vals.clear();
}
Stream.ExitBlock();
}
@ -3177,11 +3204,36 @@ static void WriteIdentificationBlock(const Module *M, BitstreamWriter &Stream) {
Stream.ExitBlock();
}
static void writeModuleHash(BitstreamWriter &Stream,
SmallVectorImpl<char> &Buffer,
size_t BlockStartPos) {
// Emit the module's hash.
// MODULE_CODE_HASH: [5*i32]
SHA1 Hasher;
Hasher.update(ArrayRef<uint8_t>((uint8_t *)&Buffer[BlockStartPos],
Buffer.size() - BlockStartPos));
auto Hash = Hasher.result();
SmallVector<uint64_t, 20> Vals;
auto LShift = [&](unsigned char Val, unsigned Amount)
-> uint64_t { return ((uint64_t)Val) << Amount; };
for (int Pos = 0; Pos < 20; Pos += 4) {
uint32_t SubHash = LShift(Hash[Pos + 0], 24);
SubHash |= LShift(Hash[Pos + 1], 16) | LShift(Hash[Pos + 2], 8) |
(unsigned)(unsigned char)Hash[Pos + 3];
Vals.push_back(SubHash);
}
// Emit the finished record.
Stream.EmitRecord(bitc::MODULE_CODE_HASH, Vals);
}
/// WriteModule - Emit the specified module to the bitstream.
static void WriteModule(const Module *M, BitstreamWriter &Stream,
bool ShouldPreserveUseListOrder,
uint64_t BitcodeStartBit, bool EmitSummaryIndex) {
uint64_t BitcodeStartBit, bool EmitSummaryIndex,
bool GenerateHash, SmallVectorImpl<char> &Buffer) {
Stream.EnterSubblock(bitc::MODULE_BLOCK_ID, 3);
size_t BlockStartPos = Buffer.size();
SmallVector<unsigned, 1> Vals;
unsigned CurVersion = 1;
@ -3238,6 +3290,10 @@ static void WriteModule(const Module *M, BitstreamWriter &Stream,
WriteValueSymbolTable(M->getValueSymbolTable(), VE, Stream,
VSTOffsetPlaceholder, BitcodeStartBit, &FunctionIndex);
if (GenerateHash) {
writeModuleHash(Stream, Buffer, BlockStartPos);
}
Stream.ExitBlock();
}
@ -3322,7 +3378,7 @@ static void WriteBitcodeHeader(BitstreamWriter &Stream) {
/// stream.
void llvm::WriteBitcodeToFile(const Module *M, raw_ostream &Out,
bool ShouldPreserveUseListOrder,
bool EmitSummaryIndex) {
bool EmitSummaryIndex, bool GenerateHash) {
SmallVector<char, 0> Buffer;
Buffer.reserve(256*1024);
@ -3348,7 +3404,7 @@ void llvm::WriteBitcodeToFile(const Module *M, raw_ostream &Out,
// Emit the module.
WriteModule(M, Stream, ShouldPreserveUseListOrder, BitcodeStartBit,
EmitSummaryIndex);
EmitSummaryIndex, GenerateHash, Buffer);
}
if (TT.isOSDarwin() || TT.isOSBinFormatMachO())

View File

@ -37,9 +37,11 @@ void ModuleSummaryIndex::mergeFrom(std::unique_ptr<ModuleSummaryIndex> Other,
// Add the module path string ref for this module if we haven't already
// saved a reference to it.
if (ModPath.empty())
ModPath = addModulePath(Info->summary()->modulePath(), NextModuleId);
else
if (ModPath.empty()) {
auto Path = Info->summary()->modulePath();
ModPath = addModulePath(Path, NextModuleId, Other->getModuleHash(Path))
->first();
} else
assert(ModPath == Info->summary()->modulePath() &&
"Each module in the combined map should have a unique ID");

View File

@ -61,7 +61,7 @@ static std::unique_ptr<Module> loadFile(const std::string &FileName,
/* ShouldLazyLoadMetadata = */ true);
if (!Result) {
Err.print("function-import", errs());
return nullptr;
report_fatal_error("Abort");
}
return Result;

View File

@ -0,0 +1,4 @@
; Needs a function for the combined index to be populated
define void @bar() {
ret void
}

View File

@ -0,0 +1,35 @@
; Check per module hash.
; RUN: llvm-as -module-hash %s -o - | llvm-bcanalyzer -dump | FileCheck %s --check-prefix=MOD1
; MOD1: <HASH op0={{[0-9]*}} op1={{[0-9]*}} op2={{[0-9]*}} op3={{[0-9]*}} op4={{[0-9]*}} (match)/>
; RUN: llvm-as -module-hash %p/Inputs/module_hash.ll -o - | llvm-bcanalyzer -dump | FileCheck %s --check-prefix=MOD2
; MOD2: <HASH op0={{[0-9]*}} op1={{[0-9]*}} op2={{[0-9]*}} op3={{[0-9]*}} op4={{[0-9]*}} (match)/>
; Check that the hash matches in the combined index.
; First regenerate the modules with a summary
; RUN: llvm-as -module-hash -module-summary %s -o %t.m1.bc
; RUN: llvm-as -module-hash -module-summary %p/Inputs/module_hash.ll -o %t.m2.bc
; Recover the hashes from the modules themselves.
; RUN: llvm-bcanalyzer -dump %t.m1.bc | grep '<HASH' > %t.hash
; RUN: llvm-bcanalyzer -dump %t.m2.bc | grep '<HASH' >> %t.hash
; Generate the combined index and gather the hashes there.
; RUN: llvm-lto --thinlto-action=thinlink -o - %t.m1.bc %t.m2.bc | llvm-bcanalyzer -dump | grep '<HASH ' >> %t.hash
; Validate the output now, the hahes in the individual modules and the combined index are in the same file.
; RUN: cat %t.hash | FileCheck %s --check-prefix=COMBINED
; First capture the value of the hash for the two modules.
; COMBINED: <HASH op0=[[HASH1_1:[0-9]*]] op1=[[HASH1_2:[0-9]*]] op2=[[HASH1_3:[0-9]*]] op3=[[HASH1_4:[0-9]*]] op4=[[HASH1_5:[0-9]*]] (match)/>
; COMBINED: <HASH op0=[[HASH2_1:[0-9]*]] op1=[[HASH2_2:[0-9]*]] op2=[[HASH2_3:[0-9]*]] op3=[[HASH2_4:[0-9]*]] op4=[[HASH2_5:[0-9]*]] (match)/>
; Validate against the value extracted from the combined index
; COMBINED-DAG: <HASH abbrevid={{[0-9]*}} op0=[[HASH1_1]] op1=[[HASH1_2]] op2=[[HASH1_3]] op3=[[HASH1_4]] op4=[[HASH1_5]]/>
; COMBINED-DAG: <HASH abbrevid={{[0-9]*}} op0=[[HASH2_1]] op1=[[HASH2_2]] op2=[[HASH2_3]] op3=[[HASH2_4]] op4=[[HASH2_5]]/>
; Need a function for the combined index to be populated.
define void @foo() {
ret void
}

View File

@ -48,6 +48,9 @@ static cl::opt<bool> EmitSummaryIndex("module-summary",
cl::desc("Emit module summary index"),
cl::init(false));
static cl::opt<bool> EmitModuleHash("module-hash", cl::desc("Emit module hash"),
cl::init(false));
static cl::opt<bool>
DumpAsm("d", cl::desc("Print assembly as parsed"), cl::Hidden);
@ -82,7 +85,7 @@ static void WriteOutputFile(const Module *M) {
if (Force || !CheckBitcodeOutputToConsole(Out->os(), true))
WriteBitcodeToFile(M, Out->os(), PreserveBitcodeUseListOrder,
EmitSummaryIndex);
EmitSummaryIndex, EmitModuleHash);
// Declare success.
Out->keep();

View File

@ -29,6 +29,7 @@
#include "llvm/Bitcode/BitstreamReader.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Bitcode/LLVMBitCodes.h"
#include "llvm/Bitcode/ReaderWriter.h"
#include "llvm/IR/Verifier.h"
@ -38,8 +39,10 @@
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/PrettyStackTrace.h"
#include "llvm/Support/Signals.h"
#include "llvm/Support/SHA1.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <array>
#include <cctype>
#include <map>
#include <system_error>
@ -174,6 +177,7 @@ static const char *GetCodeName(unsigned CodeID, unsigned BlockID,
STRINGIFY_CODE(MODULE_CODE, VSTOFFSET)
STRINGIFY_CODE(MODULE_CODE, METADATA_VALUES_UNUSED)
STRINGIFY_CODE(MODULE_CODE, SOURCE_FILENAME)
STRINGIFY_CODE(MODULE_CODE, HASH)
}
case bitc::IDENTIFICATION_BLOCK_ID:
switch (CodeID) {
@ -292,6 +296,7 @@ static const char *GetCodeName(unsigned CodeID, unsigned BlockID,
default:
return nullptr;
STRINGIFY_CODE(MST_CODE, ENTRY)
STRINGIFY_CODE(MST_CODE, HASH)
}
case bitc::GLOBALVAL_SUMMARY_BLOCK_ID:
switch (CodeID) {
@ -481,6 +486,9 @@ static bool ParseBlock(BitstreamCursor &Stream, unsigned BlockID,
if (Stream.EnterSubBlock(BlockID, &NumWords))
return Error("Malformed block record");
// Keep it for later, when we see a MODULE_HASH record
uint64_t BlockEntryPos = Stream.getCurrentByteNo();
const char *BlockName = nullptr;
if (DumpRecords) {
outs() << Indent << "<";
@ -552,6 +560,7 @@ static bool ParseBlock(BitstreamCursor &Stream, unsigned BlockID,
++BlockStats.NumRecords;
StringRef Blob;
unsigned CurrentRecordPos = Stream.getCurrentByteNo();
unsigned Code = Stream.readRecord(Entry.ID, Record, &Blob);
// Increment the # occurrences of this code.
@ -586,6 +595,37 @@ static bool ParseBlock(BitstreamCursor &Stream, unsigned BlockID,
for (unsigned i = 0, e = Record.size(); i != e; ++i)
outs() << " op" << i << "=" << (int64_t)Record[i];
// If we found a module hash, let's verify that it matches!
if (BlockID == bitc::MODULE_BLOCK_ID && Code == bitc::MODULE_CODE_HASH) {
if (Record.size() != 5)
outs() << " (invalid)";
else {
// Recompute the hash and compare it to the one in the bitcode
SHA1 Hasher;
StringRef Hash;
{
int BlockSize = CurrentRecordPos - BlockEntryPos;
auto Ptr = Stream.getPointerToByte(BlockEntryPos, BlockSize);
Hasher.update(ArrayRef<uint8_t>(Ptr, BlockSize));
Hash = Hasher.result();
}
SmallString<20> RecordedHash;
RecordedHash.resize(20);
int Pos = 0;
for (auto &Val : Record) {
assert(!(Val >> 32) && "Unexpected high bits set");
RecordedHash[Pos++] = (Val >> 24) & 0xFF;
RecordedHash[Pos++] = (Val >> 16) & 0xFF;
RecordedHash[Pos++] = (Val >> 8) & 0xFF;
RecordedHash[Pos++] = (Val >> 0) & 0xFF;
}
if (Hash == RecordedHash)
outs() << " (match)";
else
outs() << " (!mismatch!)";
}
}
outs() << "/>";
if (Abbv) {