Prototype: Reduce llvm-profdata merge memory usage further

The InstrProfWriter already stores the name and hash of the record in
the nested maps it uses for lookup while merging - this data is
duplicated in the value within the maps.

Refactor the InstrProfRecord to use a nested struct for the counters
themselves so that InstrProfWriter can use this nested struct alone
without the name or hash duplicated there.

This work is incomplete, but enough to demonstrate the value (around a
50% decrease in memory usage for a large test case (10GB -> 5GB)).
Though most of that decrease is probably from removing the
SoftInstrProfError as well, but I haven't implemented a replacement for
it yet. (it needs to go with the counters, because the operations on the
counters - merging, etc, are where the failures are - unlike the
name/hash which are totally unused by those counter-related operations
and thus easy to split out)

Ongoing discussion about removing SoftInstrProfError as a field of the
InstrProfRecord is happening on the thread that added it - including
the possibility of moving back towards an earlier version of that
proposed patch that passed SoftInstrProfError through the various APIs,
rather than as a member of InstrProfRecord.

Reviewers: davidxl

Differential Revision: https://reviews.llvm.org/D34838

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@307298 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
David Blaikie 2017-07-06 19:00:12 +00:00
parent 812570f6d4
commit 450ef2ab9d
8 changed files with 82 additions and 71 deletions

View File

@ -249,9 +249,8 @@ void annotateValueSite(Module &M, Instruction &Inst,
/// Same as the above interface but using an ArrayRef, as well as \p Sum.
void annotateValueSite(Module &M, Instruction &Inst,
ArrayRef<InstrProfValueData> VDs,
uint64_t Sum, InstrProfValueKind ValueKind,
uint32_t MaxMDCount);
ArrayRef<InstrProfValueData> VDs, uint64_t Sum,
InstrProfValueKind ValueKind, uint32_t MaxMDCount);
/// Extract the value profile data from \p Inst which is annotated with
/// value profile meta data. Return false if there is no value data annotated,
@ -590,26 +589,20 @@ struct InstrProfValueSiteRecord {
/// Profiling information for a single function.
struct InstrProfRecord {
StringRef Name;
uint64_t Hash;
std::vector<uint64_t> Counts;
SoftInstrProfErrors SIPE;
InstrProfRecord() = default;
InstrProfRecord(StringRef Name, uint64_t Hash, std::vector<uint64_t> Counts)
: Name(Name), Hash(Hash), Counts(std::move(Counts)) {}
InstrProfRecord(std::vector<uint64_t> Counts) : Counts(std::move(Counts)) {}
InstrProfRecord(InstrProfRecord &&) = default;
InstrProfRecord(const InstrProfRecord &RHS)
: Name(RHS.Name), Hash(RHS.Hash), Counts(RHS.Counts), SIPE(RHS.SIPE),
: Counts(RHS.Counts),
ValueData(RHS.ValueData
? llvm::make_unique<ValueProfData>(*RHS.ValueData)
: nullptr) {}
InstrProfRecord &operator=(InstrProfRecord &&) = default;
InstrProfRecord &operator=(const InstrProfRecord &RHS) {
Name = RHS.Name;
Hash = RHS.Hash;
Counts = RHS.Counts;
SIPE = RHS.SIPE;
if (!RHS.ValueData) {
ValueData = nullptr;
return *this;
@ -626,7 +619,6 @@ struct InstrProfRecord {
/// Return the number of value profile kinds with non-zero number
/// of profile sites.
inline uint32_t getNumValueKinds() const;
/// Return the number of instrumented sites for ValueKind.
inline uint32_t getNumValueSites(uint32_t ValueKind) const;
@ -744,6 +736,16 @@ private:
void scaleValueProfData(uint32_t ValueKind, uint64_t Weight);
};
struct NamedInstrProfRecord : InstrProfRecord {
StringRef Name;
uint64_t Hash;
NamedInstrProfRecord() = default;
NamedInstrProfRecord(StringRef Name, uint64_t Hash,
std::vector<uint64_t> Counts)
: InstrProfRecord(std::move(Counts)), Name(Name), Hash(Hash) {}
};
uint32_t InstrProfRecord::getNumValueKinds() const {
uint32_t NumValueKinds = 0;
for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)

View File

@ -40,9 +40,9 @@ class InstrProfReader;
/// A file format agnostic iterator over profiling data.
class InstrProfIterator : public std::iterator<std::input_iterator_tag,
InstrProfRecord> {
NamedInstrProfRecord> {
InstrProfReader *Reader = nullptr;
InstrProfRecord Record;
value_type Record;
void Increment();
@ -53,12 +53,12 @@ public:
InstrProfIterator &operator++() { Increment(); return *this; }
bool operator==(const InstrProfIterator &RHS) { return Reader == RHS.Reader; }
bool operator!=(const InstrProfIterator &RHS) { return Reader != RHS.Reader; }
InstrProfRecord &operator*() { return Record; }
InstrProfRecord *operator->() { return &Record; }
value_type &operator*() { return Record; }
value_type *operator->() { return &Record; }
};
/// Base class and interface for reading profiling data of any known instrprof
/// format. Provides an iterator over InstrProfRecords.
/// format. Provides an iterator over NamedInstrProfRecords.
class InstrProfReader {
instrprof_error LastError = instrprof_error::success;
@ -70,7 +70,7 @@ public:
virtual Error readHeader() = 0;
/// Read a single record.
virtual Error readNextRecord(InstrProfRecord &Record) = 0;
virtual Error readNextRecord(NamedInstrProfRecord &Record) = 0;
/// Iterator over profile data.
InstrProfIterator begin() { return InstrProfIterator(this); }
@ -161,7 +161,7 @@ public:
Error readHeader() override;
/// Read a single record.
Error readNextRecord(InstrProfRecord &Record) override;
Error readNextRecord(NamedInstrProfRecord &Record) override;
InstrProfSymtab &getSymtab() override {
assert(Symtab.get());
@ -209,7 +209,7 @@ public:
static bool hasFormat(const MemoryBuffer &DataBuffer);
Error readHeader() override;
Error readNextRecord(InstrProfRecord &Record) override;
Error readNextRecord(NamedInstrProfRecord &Record) override;
bool isIRLevelProfile() const override {
return (Version & VARIANT_MASK_IR_PROF) != 0;
@ -243,8 +243,8 @@ private:
return 7 & (sizeof(uint64_t) - SizeInBytes % sizeof(uint64_t));
}
Error readName(InstrProfRecord &Record);
Error readFuncHash(InstrProfRecord &Record);
Error readName(NamedInstrProfRecord &Record);
Error readFuncHash(NamedInstrProfRecord &Record);
Error readRawCounts(InstrProfRecord &Record);
Error readValueProfilingData(InstrProfRecord &Record);
bool atEnd() const { return Data == DataEnd; }
@ -281,7 +281,7 @@ enum class HashT : uint32_t;
/// Trait for lookups into the on-disk hash table for the binary instrprof
/// format.
class InstrProfLookupTrait {
std::vector<InstrProfRecord> DataBuffer;
std::vector<NamedInstrProfRecord> DataBuffer;
IndexedInstrProf::HashT HashType;
unsigned FormatVersion;
// Endianness of the input value profile data.
@ -293,7 +293,7 @@ public:
InstrProfLookupTrait(IndexedInstrProf::HashT HashType, unsigned FormatVersion)
: HashType(HashType), FormatVersion(FormatVersion) {}
using data_type = ArrayRef<InstrProfRecord>;
using data_type = ArrayRef<NamedInstrProfRecord>;
using internal_key_type = StringRef;
using external_key_type = StringRef;
@ -334,11 +334,11 @@ struct InstrProfReaderIndexBase {
// Read all the profile records with the same key pointed to the current
// iterator.
virtual Error getRecords(ArrayRef<InstrProfRecord> &Data) = 0;
virtual Error getRecords(ArrayRef<NamedInstrProfRecord> &Data) = 0;
// Read all the profile records with the key equal to FuncName
virtual Error getRecords(StringRef FuncName,
ArrayRef<InstrProfRecord> &Data) = 0;
ArrayRef<NamedInstrProfRecord> &Data) = 0;
virtual void advanceToNextKey() = 0;
virtual bool atEnd() const = 0;
virtual void setValueProfDataEndianness(support::endianness Endianness) = 0;
@ -364,9 +364,9 @@ public:
IndexedInstrProf::HashT HashType, uint64_t Version);
~InstrProfReaderIndex() override = default;
Error getRecords(ArrayRef<InstrProfRecord> &Data) override;
Error getRecords(ArrayRef<NamedInstrProfRecord> &Data) override;
Error getRecords(StringRef FuncName,
ArrayRef<InstrProfRecord> &Data) override;
ArrayRef<NamedInstrProfRecord> &Data) override;
void advanceToNextKey() override { RecordIterator++; }
bool atEnd() const override {
@ -419,10 +419,9 @@ public:
/// Read the file header.
Error readHeader() override;
/// Read a single record.
Error readNextRecord(InstrProfRecord &Record) override;
Error readNextRecord(NamedInstrProfRecord &Record) override;
/// Return the pointer to InstrProfRecord associated with FuncName
/// and FuncHash
/// Return the NamedInstrProfRecord associated with FuncName and FuncHash
Expected<InstrProfRecord> getInstrProfRecord(StringRef FuncName,
uint64_t FuncHash);

View File

@ -33,7 +33,8 @@ class raw_fd_ostream;
class InstrProfWriter {
public:
using ProfilingData = SmallDenseMap<uint64_t, InstrProfRecord, 1>;
using ProfilingData =
SmallDenseMap<uint64_t, InstrProfRecord, 1>;
enum ProfKind { PF_Unknown = 0, PF_FE, PF_IRLevel };
private:
@ -50,7 +51,7 @@ public:
/// Add function counts for the given function. If there are already counts
/// for this function and the hash and number of counts match, each counter is
/// summed. Optionally scale counts by \p Weight.
Error addRecord(InstrProfRecord &&I, uint64_t Weight = 1);
Error addRecord(NamedInstrProfRecord &&I, uint64_t Weight = 1);
/// Merge existing function counts from the given writer.
Error mergeRecordsFromWriter(InstrProfWriter &&IPW);
@ -62,7 +63,8 @@ public:
Error writeText(raw_fd_ostream &OS);
/// Write \c Record in text format to \c OS
static void writeRecordInText(const InstrProfRecord &Record,
static void writeRecordInText(StringRef Name, uint64_t Hash,
const InstrProfRecord &Counters,
InstrProfSymtab &Symtab, raw_fd_ostream &OS);
/// Write the profile, returning the raw data. For testing.
@ -85,6 +87,8 @@ public:
void setOutputSparse(bool Sparse);
private:
Error addRecord(StringRef Name, uint64_t Hash, InstrProfRecord &&I,
uint64_t Weight = 1);
bool shouldEncodeData(const ProfilingData &PD);
void writeImpl(ProfOStream &OS);
};

View File

@ -17,6 +17,7 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/IR/ProfileSummary.h"
#include "llvm/ProfileData/InstrProf.h"
#include "llvm/Support/Error.h"
#include <algorithm>
#include <cstdint>
@ -27,8 +28,6 @@
namespace llvm {
struct InstrProfRecord;
namespace sampleprof {
class FunctionSamples;

View File

@ -221,7 +221,7 @@ TextInstrProfReader::readValueProfileData(InstrProfRecord &Record) {
#undef VP_READ_ADVANCE
}
Error TextInstrProfReader::readNextRecord(InstrProfRecord &Record) {
Error TextInstrProfReader::readNextRecord(NamedInstrProfRecord &Record) {
// Skip empty lines and comments.
while (!Line.is_at_end() && (Line->empty() || Line->startswith("#")))
++Line;
@ -377,13 +377,13 @@ Error RawInstrProfReader<IntPtrT>::readHeader(
}
template <class IntPtrT>
Error RawInstrProfReader<IntPtrT>::readName(InstrProfRecord &Record) {
Error RawInstrProfReader<IntPtrT>::readName(NamedInstrProfRecord &Record) {
Record.Name = getName(Data->NameRef);
return success();
}
template <class IntPtrT>
Error RawInstrProfReader<IntPtrT>::readFuncHash(InstrProfRecord &Record) {
Error RawInstrProfReader<IntPtrT>::readFuncHash(NamedInstrProfRecord &Record) {
Record.Hash = swap(Data->FuncHash);
return success();
}
@ -445,7 +445,7 @@ Error RawInstrProfReader<IntPtrT>::readValueProfilingData(
}
template <class IntPtrT>
Error RawInstrProfReader<IntPtrT>::readNextRecord(InstrProfRecord &Record) {
Error RawInstrProfReader<IntPtrT>::readNextRecord(NamedInstrProfRecord &Record) {
if (atEnd())
// At this point, ValueDataStart field points to the next header.
if (Error E = readNextHeader(getNextHeaderPos()))
@ -550,7 +550,7 @@ data_type InstrProfLookupTrait::ReadData(StringRef K, const unsigned char *D,
template <typename HashTableImpl>
Error InstrProfReaderIndex<HashTableImpl>::getRecords(
StringRef FuncName, ArrayRef<InstrProfRecord> &Data) {
StringRef FuncName, ArrayRef<NamedInstrProfRecord> &Data) {
auto Iter = HashTable->find(FuncName);
if (Iter == HashTable->end())
return make_error<InstrProfError>(instrprof_error::unknown_function);
@ -564,7 +564,7 @@ Error InstrProfReaderIndex<HashTableImpl>::getRecords(
template <typename HashTableImpl>
Error InstrProfReaderIndex<HashTableImpl>::getRecords(
ArrayRef<InstrProfRecord> &Data) {
ArrayRef<NamedInstrProfRecord> &Data) {
if (atEnd())
return make_error<InstrProfError>(instrprof_error::eof);
@ -644,7 +644,7 @@ IndexedInstrProfReader::readSummary(IndexedInstrProf::ProfVersion Version,
InstrProfSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs);
// FIXME: This only computes an empty summary. Need to call addRecord for
// all InstrProfRecords to get the correct summary.
// all NamedInstrProfRecords to get the correct summary.
this->Summary = Builder.getSummary();
return Cur;
}
@ -707,7 +707,7 @@ InstrProfSymtab &IndexedInstrProfReader::getSymtab() {
Expected<InstrProfRecord>
IndexedInstrProfReader::getInstrProfRecord(StringRef FuncName,
uint64_t FuncHash) {
ArrayRef<InstrProfRecord> Data;
ArrayRef<NamedInstrProfRecord> Data;
Error Err = Index->getRecords(FuncName, Data);
if (Err)
return std::move(Err);
@ -732,10 +732,10 @@ Error IndexedInstrProfReader::getFunctionCounts(StringRef FuncName,
return success();
}
Error IndexedInstrProfReader::readNextRecord(InstrProfRecord &Record) {
Error IndexedInstrProfReader::readNextRecord(NamedInstrProfRecord &Record) {
static unsigned RecordIndex = 0;
ArrayRef<InstrProfRecord> Data;
ArrayRef<NamedInstrProfRecord> Data;
Error E = Index->getRecords(Data);
if (E)

View File

@ -176,20 +176,25 @@ void InstrProfWriter::setOutputSparse(bool Sparse) {
this->Sparse = Sparse;
}
Error InstrProfWriter::addRecord(InstrProfRecord &&I, uint64_t Weight) {
auto &ProfileDataMap = FunctionData[I.Name];
Error InstrProfWriter::addRecord(NamedInstrProfRecord &&I, uint64_t Weight) {
auto Name = I.Name;
auto Hash = I.Hash;
return addRecord(Name, Hash, std::move(I), Weight);
}
Error InstrProfWriter::addRecord(StringRef Name, uint64_t Hash,
InstrProfRecord &&I, uint64_t Weight) {
auto &ProfileDataMap = FunctionData[Name];
bool NewFunc;
ProfilingData::iterator Where;
std::tie(Where, NewFunc) =
ProfileDataMap.insert(std::make_pair(I.Hash, InstrProfRecord()));
ProfileDataMap.insert(std::make_pair(Hash, InstrProfRecord()));
InstrProfRecord &Dest = Where->second;
if (NewFunc) {
// We've never seen a function with this name and hash, add it.
Dest = std::move(I);
// Fix up the name to avoid dangling reference.
Dest.Name = FunctionData.find(Dest.Name)->getKey();
if (Weight > 1)
Dest.scale(Weight);
} else {
@ -205,7 +210,7 @@ Error InstrProfWriter::addRecord(InstrProfRecord &&I, uint64_t Weight) {
Error InstrProfWriter::mergeRecordsFromWriter(InstrProfWriter &&IPW) {
for (auto &I : IPW.FunctionData)
for (auto &Func : I.getValue())
if (Error E = addRecord(std::move(Func.second), 1))
if (Error E = addRecord(I.getKey(), Func.first, std::move(Func.second)))
return E;
return Error::success();
}
@ -323,11 +328,12 @@ static const char *ValueProfKindStr[] = {
#include "llvm/ProfileData/InstrProfData.inc"
};
void InstrProfWriter::writeRecordInText(const InstrProfRecord &Func,
void InstrProfWriter::writeRecordInText(StringRef Name, uint64_t Hash,
const InstrProfRecord &Func,
InstrProfSymtab &Symtab,
raw_fd_ostream &OS) {
OS << Func.Name << "\n";
OS << "# Func Hash:\n" << Func.Hash << "\n";
OS << Name << "\n";
OS << "# Func Hash:\n" << Hash << "\n";
OS << "# Num Counters:\n" << Func.Counts.size() << "\n";
OS << "# Counter Values:\n";
for (uint64_t Count : Func.Counts)
@ -375,6 +381,6 @@ Error InstrProfWriter::writeText(raw_fd_ostream &OS) {
for (const auto &I : FunctionData)
if (shouldEncodeData(I.getValue()))
for (const auto &Func : I.getValue())
writeRecordInText(Func.second, Symtab, OS);
writeRecordInText(I.getKey(), Func.first, Func.second, Symtab, OS);
return Error::success();
}

View File

@ -528,7 +528,8 @@ static int showInstrProfile(const std::string &Filename, bool ShowCounts,
if (doTextFormatDump) {
InstrProfSymtab &Symtab = Reader->getSymtab();
InstrProfWriter::writeRecordInText(Func, Symtab, OS);
InstrProfWriter::writeRecordInText(Func.Name, Func.Hash, Func, Symtab,
OS);
continue;
}

View File

@ -225,7 +225,7 @@ static const char callee5[] = "callee5";
static const char callee6[] = "callee6";
TEST_P(MaybeSparseInstrProfTest, get_icall_data_read_write) {
InstrProfRecord Record1("caller", 0x1234, {1, 2});
NamedInstrProfRecord Record1("caller", 0x1234, {1, 2});
// 4 value sites.
Record1.reserveSites(IPVK_IndirectCallTarget, 4);
@ -269,7 +269,7 @@ TEST_P(MaybeSparseInstrProfTest, get_icall_data_read_write) {
}
TEST_P(MaybeSparseInstrProfTest, annotate_vp_data) {
InstrProfRecord Record("caller", 0x1234, {1, 2});
NamedInstrProfRecord Record("caller", 0x1234, {1, 2});
Record.reserveSites(IPVK_IndirectCallTarget, 1);
InstrProfValueData VD0[] = {{1000, 1}, {2000, 2}, {3000, 3}, {5000, 5},
{4000, 4}, {6000, 6}};
@ -365,7 +365,7 @@ TEST_P(MaybeSparseInstrProfTest, annotate_vp_data) {
}
TEST_P(MaybeSparseInstrProfTest, get_icall_data_read_write_with_weight) {
InstrProfRecord Record1("caller", 0x1234, {1, 2});
NamedInstrProfRecord Record1("caller", 0x1234, {1, 2});
// 4 value sites.
Record1.reserveSites(IPVK_IndirectCallTarget, 4);
@ -408,7 +408,7 @@ TEST_P(MaybeSparseInstrProfTest, get_icall_data_read_write_with_weight) {
}
TEST_P(MaybeSparseInstrProfTest, get_icall_data_read_write_big_endian) {
InstrProfRecord Record1("caller", 0x1234, {1, 2});
NamedInstrProfRecord Record1("caller", 0x1234, {1, 2});
// 4 value sites.
Record1.reserveSites(IPVK_IndirectCallTarget, 4);
@ -456,8 +456,8 @@ TEST_P(MaybeSparseInstrProfTest, get_icall_data_read_write_big_endian) {
TEST_P(MaybeSparseInstrProfTest, get_icall_data_merge1) {
static const char caller[] = "caller";
InstrProfRecord Record11(caller, 0x1234, {1, 2});
InstrProfRecord Record12(caller, 0x1234, {1, 2});
NamedInstrProfRecord Record11(caller, 0x1234, {1, 2});
NamedInstrProfRecord Record12(caller, 0x1234, {1, 2});
// 5 value sites.
Record11.reserveSites(IPVK_IndirectCallTarget, 5);
@ -577,7 +577,7 @@ TEST_P(MaybeSparseInstrProfTest, get_icall_data_merge1_saturation) {
ASSERT_EQ(InstrProfError::take(std::move(Result3)),
instrprof_error::success);
InstrProfRecord Record4("baz", 0x5678, {3, 4});
NamedInstrProfRecord Record4("baz", 0x5678, {3, 4});
Record4.reserveSites(IPVK_IndirectCallTarget, 1);
InstrProfValueData VD4[] = {{uint64_t(bar), 1}};
Record4.addValueData(IPVK_IndirectCallTarget, 0, VD4, 1, nullptr);
@ -586,7 +586,7 @@ TEST_P(MaybeSparseInstrProfTest, get_icall_data_merge1_saturation) {
instrprof_error::success);
// Verify value data counter overflow.
InstrProfRecord Record5("baz", 0x5678, {5, 6});
NamedInstrProfRecord Record5("baz", 0x5678, {5, 6});
Record5.reserveSites(IPVK_IndirectCallTarget, 1);
InstrProfValueData VD5[] = {{uint64_t(bar), Max}};
Record5.addValueData(IPVK_IndirectCallTarget, 0, VD5, 1, nullptr);
@ -619,8 +619,8 @@ TEST_P(MaybeSparseInstrProfTest, get_icall_data_merge1_saturation) {
TEST_P(MaybeSparseInstrProfTest, get_icall_data_merge_site_trunc) {
static const char caller[] = "caller";
InstrProfRecord Record11(caller, 0x1234, {1, 2});
InstrProfRecord Record12(caller, 0x1234, {1, 2});
NamedInstrProfRecord Record11(caller, 0x1234, {1, 2});
NamedInstrProfRecord Record12(caller, 0x1234, {1, 2});
// 2 value sites.
Record11.reserveSites(IPVK_IndirectCallTarget, 2);
@ -686,12 +686,12 @@ static void addValueProfData(InstrProfRecord &Record) {
}
TEST_P(MaybeSparseInstrProfTest, value_prof_data_read_write) {
InstrProfRecord SrcRecord("caller", 0x1234, {1ULL << 31, 2});
InstrProfRecord SrcRecord({1ULL << 31, 2});
addValueProfData(SrcRecord);
std::unique_ptr<ValueProfData> VPData =
ValueProfData::serializeFrom(SrcRecord);
InstrProfRecord Record("caller", 0x1234, {1ULL << 31, 2});
InstrProfRecord Record({1ULL << 31, 2});
VPData->deserializeTo(Record, nullptr);
// Now read data from Record and sanity check the data
@ -752,12 +752,12 @@ TEST_P(MaybeSparseInstrProfTest, value_prof_data_read_write) {
TEST_P(MaybeSparseInstrProfTest, value_prof_data_read_write_mapping) {
InstrProfRecord SrcRecord("caller", 0x1234, {1ULL << 31, 2});
NamedInstrProfRecord SrcRecord("caller", 0x1234, {1ULL << 31, 2});
addValueProfData(SrcRecord);
std::unique_ptr<ValueProfData> VPData =
ValueProfData::serializeFrom(SrcRecord);
InstrProfRecord Record("caller", 0x1234, {1ULL << 31, 2});
NamedInstrProfRecord Record("caller", 0x1234, {1ULL << 31, 2});
InstrProfSymtab Symtab;
Symtab.mapAddress(uint64_t(callee1), 0x1000ULL);
Symtab.mapAddress(uint64_t(callee2), 0x2000ULL);