[PGO] Profile summary reader/writer support

With this patch, the profile summary data will be available in indexed
profile data file so that profiler reader/compiler optimizer can start
to make use of.

Differential Revision: http://reviews.llvm.org/D16258

llvm-svn: 259626
This commit is contained in:
Xinliang David Li 2016-02-03 04:08:18 +00:00
parent 7f6faddfa9
commit f9d9bfe484
8 changed files with 259 additions and 22 deletions

View File

@ -576,6 +576,10 @@ ValueProfData *
serializeValueProfDataFromRT(const ValueProfRuntimeRecord *Record,
ValueProfData *Dst);
namespace IndexedInstrProf {
struct Summary;
}
///// Profile summary computation ////
// The 'show' command displays richer summary of the profile data. The profile
// summary is one or more (Cutoff, MinBlockCount, NumBlocks) triplets. Given a
@ -585,6 +589,10 @@ struct ProfileSummaryEntry {
uint32_t Cutoff; ///< The required percentile of total execution count.
uint64_t MinBlockCount; ///< The minimum execution count for this percentile.
uint64_t NumBlocks; ///< Number of blocks >= the minumum execution count.
ProfileSummaryEntry(uint32_t TheCutoff, uint64_t TheMinBlockCount,
uint64_t TheNumBlocks)
: Cutoff(TheCutoff), MinBlockCount(TheMinBlockCount),
NumBlocks(TheNumBlocks) {}
};
class ProfileSummary {
@ -598,15 +606,17 @@ class ProfileSummary {
uint64_t MaxBlockCount, MaxInternalBlockCount, MaxFunctionCount;
uint32_t NumBlocks, NumFunctions;
inline void addCount(uint64_t Count, bool IsEntry);
void computeDetailedSummary();
public:
static const int Scale = 1000000;
ProfileSummary(std::vector<uint32_t> Cutoffs)
: DetailedSummaryCutoffs(Cutoffs), TotalCount(0), MaxBlockCount(0),
MaxInternalBlockCount(0), MaxFunctionCount(0), NumBlocks(0), NumFunctions(0) {}
MaxInternalBlockCount(0), MaxFunctionCount(0), NumBlocks(0),
NumFunctions(0) {}
ProfileSummary(const IndexedInstrProf::Summary &S);
inline void addRecord(const InstrProfRecord &);
inline std::vector<ProfileSummaryEntry> &getDetailedSummary();
void computeDetailedSummary();
uint32_t getNumBlocks() { return NumBlocks; }
uint64_t getTotalCount() { return TotalCount; }
uint32_t getNumFunctions() { return NumFunctions; }
@ -684,7 +694,10 @@ enum ProfVersion {
// Version 3 supports value profile data. The value profile data is expected
// to follow the block counter profile data.
Version3 = 3,
// The current version is 3.
// In this version, profile summary data \c IndexedInstrProf::Summary is
// stored after the profile header.
Version4 = 4,
// The current version is 4.
CurrentVersion = INSTR_PROF_INDEX_VERSION
};
const uint64_t Version = ProfVersion::CurrentVersion;
@ -698,11 +711,100 @@ inline uint64_t ComputeHash(StringRef K) { return ComputeHash(HashType, K); }
struct Header {
uint64_t Magic;
uint64_t Version;
uint64_t MaxFunctionCount;
uint64_t Unused; // Becomes unused since version 4
uint64_t HashType;
uint64_t HashOffset;
};
static const uint32_t SummaryCutoffs[] = {
10000, /* 1% */
100000, /* 10% */
200000, 300000, 400000, 500000, 600000, 500000, 600000, 700000,
800000, 900000, 950000, 990000, 999000, 999900, 999990, 999999};
static const uint32_t NumSummaryCutoffs =
sizeof(SummaryCutoffs) / sizeof(*SummaryCutoffs);
// Profile summary data recorded in the profile data file in indexed
// format. It is introduced in version 4. The summary data follows
// right after the profile file header.
struct Summary {
struct Entry {
uint64_t Cutoff; ///< The required percentile of total execution count.
uint64_t
MinBlockCount; ///< The minimum execution count for this percentile.
uint64_t NumBlocks; ///< Number of blocks >= the minumum execution count.
};
// New field kind to existing enum value mapping should remain unchanged
// when new kind is added in the future.
enum SummaryFieldKind {
/// The total number of functions instrumented.
TotalNumFunctions = 0,
/// Total number of instrumented blocks/edges.
TotalNumBlocks = 1,
/// The maximal execution count among all functions.
/// This field does not exist for profile data from IR based
/// instrumentation.
MaxFunctionCount = 2,
/// Max block count of the program.
MaxBlockCount = 3,
/// Max internal block count of the program (excluding entry blocks).
MaxInternalBlockCount = 4,
/// The sum of all instrumented block counts.
TotalBlockCount = 5,
NumKinds = TotalBlockCount + 1
};
// The number of summmary fields following the summary header.
uint64_t NumSummaryFields;
// The number of Cutoff Entries (Summary::Entry) following summary fields.
uint64_t NumCutoffEntries;
static uint32_t getSize(uint32_t NumSumFields, uint32_t NumCutoffEntries) {
return sizeof(Summary) + NumCutoffEntries * sizeof(Entry) +
NumSumFields * sizeof(uint64_t);
}
const uint64_t *getSummaryDataBase() const {
return reinterpret_cast<const uint64_t *>(this + 1);
}
uint64_t *getSummaryDataBase() {
return reinterpret_cast<uint64_t *>(this + 1);
}
const Entry *getCutoffEntryBase() const {
return reinterpret_cast<const Entry *>(
&getSummaryDataBase()[NumSummaryFields]);
}
Entry *getCutoffEntryBase() {
return reinterpret_cast<Entry *>(&getSummaryDataBase()[NumSummaryFields]);
}
uint64_t get(SummaryFieldKind K) const {
return getSummaryDataBase()[K];
}
void set(SummaryFieldKind K, uint64_t V) {
getSummaryDataBase()[K] = V;
}
const Entry &getEntry(uint32_t I) const { return getCutoffEntryBase()[I]; }
void setEntry(uint32_t I, const ProfileSummaryEntry &E) {
Entry &ER = getCutoffEntryBase()[I];
ER.Cutoff = E.Cutoff;
ER.MinBlockCount = E.MinBlockCount;
ER.NumBlocks = E.NumBlocks;
}
Summary(uint32_t Size) { memset(this, 0, Size); }
void operator delete(void *ptr) { ::operator delete(ptr); }
Summary() = delete;
};
inline std::unique_ptr<Summary> allocSummary(uint32_t TotalSize) {
return std::unique_ptr<Summary>(new (::operator new(TotalSize))
Summary(TotalSize));
}
} // end namespace IndexedInstrProf
namespace RawInstrProf {

View File

@ -694,7 +694,7 @@ serializeValueProfDataFromRT(const ValueProfRuntimeRecord *Record,
/* Raw profile format version. */
#define INSTR_PROF_RAW_VERSION 2
#define INSTR_PROF_INDEX_VERSION 3
#define INSTR_PROF_INDEX_VERSION 4
#define INSTR_PROF_COVMAP_VERSION 0
/* Profile version is always of type uint64_t. Reserve the upper 8 bits in the

View File

@ -336,12 +336,17 @@ private:
std::unique_ptr<MemoryBuffer> DataBuffer;
/// The index into the profile data.
std::unique_ptr<InstrProfReaderIndexBase> Index;
/// The maximal execution count among all functions.
uint64_t MaxFunctionCount;
/// Profile summary data.
std::unique_ptr<ProfileSummary> Summary;
IndexedInstrProfReader(const IndexedInstrProfReader &) = delete;
IndexedInstrProfReader &operator=(const IndexedInstrProfReader &) = delete;
// Read the profile summary. Return a pointer pointing to one byte past the
// end of the summary data if it exists or the input \c Cur.
const unsigned char *readSummary(IndexedInstrProf::ProfVersion Version,
const unsigned char *Cur);
public:
uint64_t getVersion() const { return Index->getVersion(); }
IndexedInstrProfReader(std::unique_ptr<MemoryBuffer> DataBuffer)
@ -365,7 +370,7 @@ public:
std::vector<uint64_t> &Counts);
/// Return the maximum of all known function counts.
uint64_t getMaximumFunctionCount() { return MaxFunctionCount; }
uint64_t getMaximumFunctionCount() { return Summary->getMaxFunctionCount(); }
/// Factory method to create an indexed reader.
static ErrorOr<std::unique_ptr<IndexedInstrProfReader>>
@ -383,6 +388,7 @@ public:
// to be used by llvm-profdata (for dumping). Avoid using this when
// the client is the compiler.
InstrProfSymtab &getSymtab() override;
ProfileSummary &getSummary() { return *(Summary.get()); }
};
} // end namespace llvm

View File

@ -34,7 +34,6 @@ public:
private:
bool Sparse;
StringMap<ProfilingData> FunctionData;
uint64_t MaxFunctionCount;
// Use raw pointer here for the incomplete type object.
InstrProfRecordWriterTrait *InfoObj;

View File

@ -641,4 +641,19 @@ void ProfileSummary::computeDetailedSummary() {
}
}
ProfileSummary::ProfileSummary(const IndexedInstrProf::Summary &S)
: TotalCount(S.get(IndexedInstrProf::Summary::TotalBlockCount)),
MaxBlockCount(S.get(IndexedInstrProf::Summary::MaxBlockCount)),
MaxInternalBlockCount(
S.get(IndexedInstrProf::Summary::MaxInternalBlockCount)),
MaxFunctionCount(S.get(IndexedInstrProf::Summary::MaxFunctionCount)),
NumBlocks(S.get(IndexedInstrProf::Summary::TotalNumBlocks)),
NumFunctions(S.get(IndexedInstrProf::Summary::TotalNumFunctions)) {
for (unsigned I = 0; I < S.NumCutoffEntries; I++) {
const IndexedInstrProf::Summary::Entry &Ent = S.getEntry(I);
DetailedSummary.emplace_back((uint32_t)Ent.Cutoff, Ent.MinBlockCount,
Ent.NumBlocks);
}
}
} // end namespace llvm

View File

@ -554,6 +554,41 @@ bool IndexedInstrProfReader::hasFormat(const MemoryBuffer &DataBuffer) {
return Magic == IndexedInstrProf::Magic;
}
const unsigned char *
IndexedInstrProfReader::readSummary(IndexedInstrProf::ProfVersion Version,
const unsigned char *Cur) {
using namespace support;
if (Version >= IndexedInstrProf::Version4) {
const IndexedInstrProf::Summary *SummaryInLE =
reinterpret_cast<const IndexedInstrProf::Summary *>(Cur);
uint64_t NFields =
endian::byte_swap<uint64_t, little>(SummaryInLE->NumSummaryFields);
uint64_t NEntries =
endian::byte_swap<uint64_t, little>(SummaryInLE->NumCutoffEntries);
uint32_t SummarySize =
IndexedInstrProf::Summary::getSize(NFields, NEntries);
std::unique_ptr<IndexedInstrProf::Summary> SummaryData =
IndexedInstrProf::allocSummary(SummarySize);
const uint64_t *Src = reinterpret_cast<const uint64_t *>(SummaryInLE);
uint64_t *Dst = reinterpret_cast<uint64_t *>(SummaryData.get());
for (unsigned I = 0; I < SummarySize / sizeof(uint64_t); I++)
Dst[I] = endian::byte_swap<uint64_t, little>(Src[I]);
// initialize ProfileSummary using the SummaryData from disk.
this->Summary = llvm::make_unique<ProfileSummary>(*(SummaryData.get()));
return Cur + SummarySize;
} else {
// For older version of profile data, we need to compute on the fly:
using namespace IndexedInstrProf;
std::vector<uint32_t> Cutoffs(&SummaryCutoffs[0],
&SummaryCutoffs[NumSummaryCutoffs]);
this->Summary = llvm::make_unique<ProfileSummary>(Cutoffs);
this->Summary->computeDetailedSummary();
return Cur;
}
}
std::error_code IndexedInstrProfReader::readHeader() {
const unsigned char *Start =
(const unsigned char *)DataBuffer->getBufferStart();
@ -576,9 +611,7 @@ std::error_code IndexedInstrProfReader::readHeader() {
if (FormatVersion > IndexedInstrProf::ProfVersion::CurrentVersion)
return error(instrprof_error::unsupported_version);
// Read the maximal function count.
MaxFunctionCount =
endian::byte_swap<uint64_t, little>(Header->MaxFunctionCount);
Cur = readSummary((IndexedInstrProf::ProfVersion)FormatVersion, Cur);
// Read the hash type and start offset.
IndexedInstrProf::HashT HashType = static_cast<IndexedInstrProf::HashT>(

View File

@ -84,6 +84,7 @@ public:
typedef uint64_t offset_type;
support::endianness ValueProfDataEndianness;
ProfileSummary *TheProfileSummary;
InstrProfRecordWriterTrait() : ValueProfDataEndianness(support::little) {}
static hash_value_type ComputeHash(key_type_ref K) {
@ -122,6 +123,7 @@ public:
endian::Writer<little> LE(Out);
for (const auto &ProfileData : *V) {
const InstrProfRecord &ProfRecord = ProfileData.second;
TheProfileSummary->addRecord(ProfRecord);
LE.write<uint64_t>(ProfileData.first); // Function hash
LE.write<uint64_t>(ProfRecord.Counts.size());
@ -140,7 +142,7 @@ public:
}
InstrProfWriter::InstrProfWriter(bool Sparse)
: Sparse(Sparse), FunctionData(), MaxFunctionCount(0),
: Sparse(Sparse), FunctionData(),
InfoObj(new InstrProfRecordWriterTrait()) {}
InstrProfWriter::~InstrProfWriter() { delete InfoObj; }
@ -179,11 +181,6 @@ std::error_code InstrProfWriter::addRecord(InstrProfRecord &&I,
Dest.sortValueData();
// We keep track of the max function count as we go for simplicity.
// Update this statistic no matter the result of the merge.
if (Dest.Counts[0] > MaxFunctionCount)
MaxFunctionCount = Dest.Counts[0];
return Result;
}
@ -199,8 +196,32 @@ bool InstrProfWriter::shouldEncodeData(const ProfilingData &PD) {
return false;
}
static void setSummary(IndexedInstrProf::Summary *TheSummary,
ProfileSummary &PS) {
using namespace IndexedInstrProf;
std::vector<ProfileSummaryEntry> &Res = PS.getDetailedSummary();
TheSummary->NumSummaryFields = Summary::NumKinds;
TheSummary->NumCutoffEntries = Res.size();
TheSummary->set(Summary::MaxFunctionCount, PS.getMaxFunctionCount());
TheSummary->set(Summary::MaxBlockCount, PS.getMaxBlockCount());
TheSummary->set(Summary::MaxInternalBlockCount,
PS.getMaxInternalBlockCount());
TheSummary->set(Summary::TotalBlockCount, PS.getTotalCount());
TheSummary->set(Summary::TotalNumBlocks, PS.getNumBlocks());
TheSummary->set(Summary::TotalNumFunctions, PS.getNumFunctions());
for (unsigned I = 0; I < Res.size(); I++)
TheSummary->setEntry(I, Res[I]);
}
void InstrProfWriter::writeImpl(ProfOStream &OS) {
OnDiskChainedHashTableGenerator<InstrProfRecordWriterTrait> Generator;
using namespace IndexedInstrProf;
std::vector<uint32_t> Cutoffs(&SummaryCutoffs[0],
&SummaryCutoffs[NumSummaryCutoffs]);
ProfileSummary PS(Cutoffs);
InfoObj->TheProfileSummary = &PS;
// Populate the hash table generator.
for (const auto &I : FunctionData)
if (shouldEncodeData(I.getValue()))
@ -209,7 +230,7 @@ void InstrProfWriter::writeImpl(ProfOStream &OS) {
IndexedInstrProf::Header Header;
Header.Magic = IndexedInstrProf::Magic;
Header.Version = IndexedInstrProf::ProfVersion::CurrentVersion;
Header.MaxFunctionCount = MaxFunctionCount;
Header.Unused = 0;
Header.HashType = static_cast<uint64_t>(IndexedInstrProf::HashType);
Header.HashOffset = 0;
int N = sizeof(IndexedInstrProf::Header) / sizeof(uint64_t);
@ -220,15 +241,37 @@ void InstrProfWriter::writeImpl(ProfOStream &OS) {
for (int I = 0; I < N - 1; I++)
OS.write(reinterpret_cast<uint64_t *>(&Header)[I]);
// Save a space to write the hash table start location.
uint64_t HashTableStartLoc = OS.tell();
// Save the location of Header.HashOffset field in \c OS.
uint64_t HashTableStartFieldOffset = OS.tell();
// Reserve the space for HashOffset field.
OS.write(0);
// Reserve space to write profile summary data.
uint32_t NumEntries = Cutoffs.size();
uint32_t SummarySize = Summary::getSize(Summary::NumKinds, NumEntries);
// Remember the summary offset.
uint64_t SummaryOffset = OS.tell();
for (unsigned I = 0; I < SummarySize / sizeof(uint64_t); I++)
OS.write(0);
// Write the hash table.
uint64_t HashTableStart = Generator.Emit(OS.OS, *InfoObj);
// Allocate space for data to be serialized out.
std::unique_ptr<IndexedInstrProf::Summary> TheSummary =
IndexedInstrProf::allocSummary(SummarySize);
// Compute the Summary and copy the data to the data
// structure to be serialized out (to disk or buffer).
setSummary(TheSummary.get(), PS);
InfoObj->TheProfileSummary = 0;
// Now do the final patch:
PatchItem PatchItems[1] = {{HashTableStartLoc, &HashTableStart, 1}};
PatchItem PatchItems[] = {
// Patch the Header.HashOffset field.
{HashTableStartFieldOffset, &HashTableStart, 1},
// Patch the summary data.
{SummaryOffset, reinterpret_cast<uint64_t *>(TheSummary.get()),
(int)(SummarySize / sizeof(uint64_t))}};
OS.patch(PatchItems, sizeof(PatchItems) / sizeof(*PatchItems));
}

View File

@ -137,6 +137,45 @@ TEST_P(MaybeSparseInstrProfTest, get_function_counts) {
ASSERT_TRUE(ErrorEquals(instrprof_error::unknown_function, EC));
}
// Profile data is copied from general.proftext
TEST_F(InstrProfTest, get_profile_summary) {
InstrProfRecord Record1("func1", 0x1234, {97531});
InstrProfRecord Record2("func2", 0x1234, {0, 0});
InstrProfRecord Record3("func3", 0x1234,
{2305843009213693952, 1152921504606846976,
576460752303423488, 288230376151711744,
144115188075855872, 72057594037927936});
InstrProfRecord Record4("func4", 0x1234, {0});
Writer.addRecord(std::move(Record1));
Writer.addRecord(std::move(Record2));
Writer.addRecord(std::move(Record3));
Writer.addRecord(std::move(Record4));
auto Profile = Writer.writeBuffer();
readProfile(std::move(Profile));
ProfileSummary &PS = Reader->getSummary();
ASSERT_EQ(2305843009213693952U, PS.getMaxFunctionCount());
ASSERT_EQ(2305843009213693952U, PS.getMaxBlockCount());
ASSERT_EQ(10U, PS.getNumBlocks());
ASSERT_EQ(4539628424389557499U, PS.getTotalCount());
std::vector<ProfileSummaryEntry> &Details = PS.getDetailedSummary();
uint32_t Cutoff = 800000;
auto Predicate = [&Cutoff](const ProfileSummaryEntry &PE) {
return PE.Cutoff == Cutoff;
};
auto EightyPerc = std::find_if(Details.begin(), Details.end(), Predicate);
Cutoff = 900000;
auto NinetyPerc = std::find_if(Details.begin(), Details.end(), Predicate);
Cutoff = 950000;
auto NinetyFivePerc = std::find_if(Details.begin(), Details.end(), Predicate);
Cutoff = 990000;
auto NinetyNinePerc = std::find_if(Details.begin(), Details.end(), Predicate);
ASSERT_EQ(576460752303423488U, EightyPerc->MinBlockCount);
ASSERT_EQ(288230376151711744U, NinetyPerc->MinBlockCount);
ASSERT_EQ(288230376151711744U, NinetyFivePerc->MinBlockCount);
ASSERT_EQ(72057594037927936U, NinetyNinePerc->MinBlockCount);
}
TEST_P(MaybeSparseInstrProfTest, get_icall_data_read_write) {
InstrProfRecord Record1("caller", 0x1234, {1, 2});
InstrProfRecord Record2("callee1", 0x1235, {3, 4});