[PGO]: Implement Func PGO name string compression

This is part of the effort/prepration to reduce the size
instr-pgo (object, binary, memory footprint, and raw data).

The functionality is currently off by default and not yet
used by any clients.

llvm-svn: 256667
This commit is contained in:
Xinliang David Li 2015-12-31 07:57:16 +00:00
parent 4ee6871a85
commit 20f66720da
3 changed files with 196 additions and 4 deletions

View File

@ -160,6 +160,29 @@ GlobalVariable *createPGOFuncNameVar(Module &M,
/// the original (static) function name.
StringRef getFuncNameWithoutPrefix(StringRef PGOFuncName, StringRef FileName);
/// Given a vector of strings (function PGO names) \c NameStrs, the
/// method generates a combined string \c Result thatis ready to be
/// serialized. The \c Result string is comprised of three fields:
/// The first field is the legnth of the uncompressed strings, and the
/// the second field is the length of the zlib-compressed string.
/// Both fields are encoded in ULEB128. If \c doCompress is false, the
/// third field is the uncompressed strings; otherwise it is the
/// compressed string. When the string compression is off, the
/// second field will have value zero.
int collectPGOFuncNameStrings(const std::vector<std::string> &NameStrs,
bool doCompression, std::string &Result);
/// Produce \c Result string with the same format described above. The input
/// is vector of PGO function name variables that are referenced.
int collectPGOFuncNameStrings(const std::vector<GlobalVariable *> &NameVars,
std::string &Result);
class InstrProfSymtab;
/// \c NameStrings is a string composed of one of more sub-strings encoded in
/// the
/// format described above. The substrings are seperated by 0 or more zero
/// bytes.
/// This method decodes the string and populates the \c Symtab.
int readPGOFuncNameStrings(StringRef NameStrings, InstrProfSymtab &Symtab);
const std::error_category &instrprof_category();
enum class instrprof_error {
@ -235,6 +258,11 @@ public:
/// This interface is used by reader of CoverageMapping test
/// format.
inline std::error_code create(StringRef D, uint64_t BaseAddr);
/// \c NameStrings is a string composed of one of more sub-strings
/// encoded in the format described above. The substrings are
/// seperated by 0 or more zero bytes. This method decodes the
/// string and populates the \c Symtab.
inline std::error_code create(StringRef NameStrings);
/// Create InstrProfSymtab from a set of names iteratable from
/// \p IterRange. This interface is used by IndexedProfReader.
template <typename NameIterRange> void create(const NameIterRange &IterRange);
@ -255,8 +283,8 @@ public:
AddrToMD5Map.push_back(std::make_pair(Addr, MD5Val));
}
AddrHashMap &getAddrHashMap() { return AddrToMD5Map; }
/// Return function's PGO name from the function name's symabol
/// address in the object file. If an error occurs, Return
/// Return function's PGO name from the function name's symbol
/// address in the object file. If an error occurs, return
/// an empty string.
StringRef getFuncName(uint64_t FuncNameAddress, size_t NameSize);
/// Return function's PGO name from the name's md5 hash value.
@ -270,6 +298,12 @@ std::error_code InstrProfSymtab::create(StringRef D, uint64_t BaseAddr) {
return std::error_code();
}
std::error_code InstrProfSymtab::create(StringRef NameStrings) {
if (readPGOFuncNameStrings(NameStrings, *this))
return make_error_code(instrprof_error::malformed);
return std::error_code();
}
template <typename NameIterRange>
void InstrProfSymtab::create(const NameIterRange &IterRange) {
for (auto Name : IterRange)

View File

@ -12,12 +12,14 @@
//
//===----------------------------------------------------------------------===//
#include "llvm/ProfileData/InstrProf.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/ProfileData/InstrProf.h"
#include "llvm/IR/Module.h"
#include "llvm/Support/Compression.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/LEB128.h"
#include "llvm/Support/ManagedStatic.h"
using namespace llvm;
@ -162,6 +164,101 @@ GlobalVariable *createPGOFuncNameVar(Function &F, StringRef FuncName) {
return createPGOFuncNameVar(*F.getParent(), F.getLinkage(), FuncName);
}
int collectPGOFuncNameStrings(const std::vector<std::string> &NameStrs,
bool doCompression, std::string &Result) {
uint8_t Header[16], *P = Header;
std::string UncompressedNameStrings;
for (auto NameStr : NameStrs) {
UncompressedNameStrings += NameStr;
UncompressedNameStrings.append(" ");
}
unsigned EncLen = encodeULEB128(UncompressedNameStrings.length(), P);
P += EncLen;
if (!doCompression) {
EncLen = encodeULEB128(0, P);
P += EncLen;
Result.append(reinterpret_cast<char *>(&Header[0]), P - &Header[0]);
Result += UncompressedNameStrings;
return 0;
}
SmallVector<char, 128> CompressedNameStrings;
zlib::Status Success =
zlib::compress(StringRef(UncompressedNameStrings), CompressedNameStrings,
zlib::BestSizeCompression);
assert(Success == zlib::StatusOK);
if (Success != zlib::StatusOK)
return 1;
EncLen = encodeULEB128(CompressedNameStrings.size(), P);
P += EncLen;
Result.append(reinterpret_cast<char *>(&Header[0]), P - &Header[0]);
Result +=
std::string(CompressedNameStrings.data(), CompressedNameStrings.size());
return 0;
}
int collectPGOFuncNameStrings(const std::vector<GlobalVariable *> &NameVars,
std::string &Result) {
std::vector<std::string> NameStrs;
for (auto *NameVar : NameVars) {
auto *Arr = cast<ConstantDataArray>(NameVar->getInitializer());
StringRef NameStr =
Arr->isCString() ? Arr->getAsCString() : Arr->getAsString();
NameStrs.push_back(NameStr.str());
}
return collectPGOFuncNameStrings(NameStrs, zlib::isAvailable(), Result);
}
int readPGOFuncNameStrings(StringRef NameStrings, InstrProfSymtab &Symtab) {
const uint8_t *P = reinterpret_cast<const uint8_t *>(NameStrings.data());
const uint8_t *EndP = reinterpret_cast<const uint8_t *>(NameStrings.data() +
NameStrings.size());
while (P < EndP) {
uint32_t N;
uint64_t UncompressedSize = decodeULEB128(P, &N);
P += N;
uint64_t CompressedSize = decodeULEB128(P, &N);
P += N;
bool isCompressed = (CompressedSize != 0);
SmallString<128> UncompressedNameStrings;
StringRef NameStrings;
if (isCompressed) {
StringRef CompressedNameStrings(reinterpret_cast<const char *>(P),
CompressedSize);
if (zlib::uncompress(CompressedNameStrings, UncompressedNameStrings,
UncompressedSize) != zlib::StatusOK)
return 1;
P += CompressedSize;
NameStrings = StringRef(UncompressedNameStrings.data(),
UncompressedNameStrings.size());
} else {
NameStrings =
StringRef(reinterpret_cast<const char *>(P), UncompressedSize);
P += UncompressedSize;
}
// Now parse the name strings.
size_t NameStart = 0;
bool isLast = false;
do {
size_t NameStop = NameStrings.find(' ', NameStart);
if (NameStop == StringRef::npos)
return 1;
if (NameStop == NameStrings.size() - 1)
isLast = true;
StringRef Name = NameStrings.substr(NameStart, NameStop - NameStart);
Symtab.addFuncName(Name);
if (isLast)
break;
NameStart = NameStop + 1;
} while (true);
while (P < EndP && *P == 0)
P++;
}
Symtab.finalizeSymtab();
return 0;
}
instrprof_error
InstrProfValueSiteRecord::mergeValueData(InstrProfValueSiteRecord &Input,
uint64_t Weight) {

View File

@ -9,6 +9,7 @@
#include "llvm/ProfileData/InstrProfReader.h"
#include "llvm/ProfileData/InstrProfWriter.h"
#include "llvm/Support/Compression.h"
#include "gtest/gtest.h"
#include <cstdarg>
@ -583,4 +584,64 @@ TEST_F(InstrProfTest, instr_prof_symtab_test) {
ASSERT_EQ(StringRef("bar3"), R);
}
TEST_F(InstrProfTest, instr_prof_symtab_compression_test) {
std::vector<std::string> FuncNames1;
std::vector<std::string> FuncNames2;
for (int I = 0; I < 10 * 1024; I++) {
std::string str;
raw_string_ostream OS(str);
OS << "func_" << I;
FuncNames1.push_back(OS.str());
str.clear();
OS << "fooooooooooooooo_" << I;
FuncNames1.push_back(OS.str());
str.clear();
OS << "BAR_" << I;
FuncNames2.push_back(OS.str());
str.clear();
OS << "BlahblahBlahblahBar_" << I;
FuncNames2.push_back(OS.str());
}
for (int Padding = 0; Padding < 10; Padding++) {
for (int DoCompression = 0; DoCompression < 2; DoCompression++) {
// Compressing:
std::string FuncNameStrings1;
collectPGOFuncNameStrings(FuncNames1,
(DoCompression != 0 && zlib::isAvailable()),
FuncNameStrings1);
// Compressing:
std::string FuncNameStrings2;
collectPGOFuncNameStrings(FuncNames2,
(DoCompression != 0 && zlib::isAvailable()),
FuncNameStrings2);
// Join with paddings:
std::string FuncNameStrings = FuncNameStrings1;
for (int P = 0; P < Padding; P++) {
FuncNameStrings.push_back('\0');
}
FuncNameStrings += FuncNameStrings2;
// Now decompress
InstrProfSymtab Symtab;
Symtab.create(StringRef(FuncNameStrings));
// Now check
for (int I = 0; I < 10 * 1024; I++) {
std::string N[4];
N[0] = FuncNames1[2 * I];
N[1] = FuncNames1[2 * I + 1];
N[2] = FuncNames2[2 * I];
N[3] = FuncNames2[2 * I + 1];
for (int J = 0; J < 4; J++) {
StringRef R = Symtab.getFuncName(IndexedInstrProf::ComputeHash(N[J]));
ASSERT_EQ(StringRef(N[J]), R);
}
}
}
}
}
} // end anonymous namespace