mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-25 13:27:52 +00:00
[PGO]: Implement Func PGO name string compression
This is part of the effort/prepration to reduce the size instr-pgo (object, binary, memory footprint, and raw data). The functionality is currently off by default and not yet used by any clients. llvm-svn: 256667
This commit is contained in:
parent
4ee6871a85
commit
20f66720da
@ -160,6 +160,29 @@ GlobalVariable *createPGOFuncNameVar(Module &M,
|
||||
/// the original (static) function name.
|
||||
StringRef getFuncNameWithoutPrefix(StringRef PGOFuncName, StringRef FileName);
|
||||
|
||||
/// Given a vector of strings (function PGO names) \c NameStrs, the
|
||||
/// method generates a combined string \c Result thatis ready to be
|
||||
/// serialized. The \c Result string is comprised of three fields:
|
||||
/// The first field is the legnth of the uncompressed strings, and the
|
||||
/// the second field is the length of the zlib-compressed string.
|
||||
/// Both fields are encoded in ULEB128. If \c doCompress is false, the
|
||||
/// third field is the uncompressed strings; otherwise it is the
|
||||
/// compressed string. When the string compression is off, the
|
||||
/// second field will have value zero.
|
||||
int collectPGOFuncNameStrings(const std::vector<std::string> &NameStrs,
|
||||
bool doCompression, std::string &Result);
|
||||
/// Produce \c Result string with the same format described above. The input
|
||||
/// is vector of PGO function name variables that are referenced.
|
||||
int collectPGOFuncNameStrings(const std::vector<GlobalVariable *> &NameVars,
|
||||
std::string &Result);
|
||||
class InstrProfSymtab;
|
||||
/// \c NameStrings is a string composed of one of more sub-strings encoded in
|
||||
/// the
|
||||
/// format described above. The substrings are seperated by 0 or more zero
|
||||
/// bytes.
|
||||
/// This method decodes the string and populates the \c Symtab.
|
||||
int readPGOFuncNameStrings(StringRef NameStrings, InstrProfSymtab &Symtab);
|
||||
|
||||
const std::error_category &instrprof_category();
|
||||
|
||||
enum class instrprof_error {
|
||||
@ -235,6 +258,11 @@ public:
|
||||
/// This interface is used by reader of CoverageMapping test
|
||||
/// format.
|
||||
inline std::error_code create(StringRef D, uint64_t BaseAddr);
|
||||
/// \c NameStrings is a string composed of one of more sub-strings
|
||||
/// encoded in the format described above. The substrings are
|
||||
/// seperated by 0 or more zero bytes. This method decodes the
|
||||
/// string and populates the \c Symtab.
|
||||
inline std::error_code create(StringRef NameStrings);
|
||||
/// Create InstrProfSymtab from a set of names iteratable from
|
||||
/// \p IterRange. This interface is used by IndexedProfReader.
|
||||
template <typename NameIterRange> void create(const NameIterRange &IterRange);
|
||||
@ -255,8 +283,8 @@ public:
|
||||
AddrToMD5Map.push_back(std::make_pair(Addr, MD5Val));
|
||||
}
|
||||
AddrHashMap &getAddrHashMap() { return AddrToMD5Map; }
|
||||
/// Return function's PGO name from the function name's symabol
|
||||
/// address in the object file. If an error occurs, Return
|
||||
/// Return function's PGO name from the function name's symbol
|
||||
/// address in the object file. If an error occurs, return
|
||||
/// an empty string.
|
||||
StringRef getFuncName(uint64_t FuncNameAddress, size_t NameSize);
|
||||
/// Return function's PGO name from the name's md5 hash value.
|
||||
@ -270,6 +298,12 @@ std::error_code InstrProfSymtab::create(StringRef D, uint64_t BaseAddr) {
|
||||
return std::error_code();
|
||||
}
|
||||
|
||||
std::error_code InstrProfSymtab::create(StringRef NameStrings) {
|
||||
if (readPGOFuncNameStrings(NameStrings, *this))
|
||||
return make_error_code(instrprof_error::malformed);
|
||||
return std::error_code();
|
||||
}
|
||||
|
||||
template <typename NameIterRange>
|
||||
void InstrProfSymtab::create(const NameIterRange &IterRange) {
|
||||
for (auto Name : IterRange)
|
||||
|
@ -12,12 +12,14 @@
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "llvm/ProfileData/InstrProf.h"
|
||||
#include "llvm/IR/Constants.h"
|
||||
#include "llvm/IR/Function.h"
|
||||
#include "llvm/IR/Module.h"
|
||||
#include "llvm/IR/GlobalVariable.h"
|
||||
#include "llvm/ProfileData/InstrProf.h"
|
||||
#include "llvm/IR/Module.h"
|
||||
#include "llvm/Support/Compression.h"
|
||||
#include "llvm/Support/ErrorHandling.h"
|
||||
#include "llvm/Support/LEB128.h"
|
||||
#include "llvm/Support/ManagedStatic.h"
|
||||
|
||||
using namespace llvm;
|
||||
@ -162,6 +164,101 @@ GlobalVariable *createPGOFuncNameVar(Function &F, StringRef FuncName) {
|
||||
return createPGOFuncNameVar(*F.getParent(), F.getLinkage(), FuncName);
|
||||
}
|
||||
|
||||
int collectPGOFuncNameStrings(const std::vector<std::string> &NameStrs,
|
||||
bool doCompression, std::string &Result) {
|
||||
uint8_t Header[16], *P = Header;
|
||||
std::string UncompressedNameStrings;
|
||||
|
||||
for (auto NameStr : NameStrs) {
|
||||
UncompressedNameStrings += NameStr;
|
||||
UncompressedNameStrings.append(" ");
|
||||
}
|
||||
unsigned EncLen = encodeULEB128(UncompressedNameStrings.length(), P);
|
||||
P += EncLen;
|
||||
if (!doCompression) {
|
||||
EncLen = encodeULEB128(0, P);
|
||||
P += EncLen;
|
||||
Result.append(reinterpret_cast<char *>(&Header[0]), P - &Header[0]);
|
||||
Result += UncompressedNameStrings;
|
||||
return 0;
|
||||
}
|
||||
SmallVector<char, 128> CompressedNameStrings;
|
||||
zlib::Status Success =
|
||||
zlib::compress(StringRef(UncompressedNameStrings), CompressedNameStrings,
|
||||
zlib::BestSizeCompression);
|
||||
assert(Success == zlib::StatusOK);
|
||||
if (Success != zlib::StatusOK)
|
||||
return 1;
|
||||
EncLen = encodeULEB128(CompressedNameStrings.size(), P);
|
||||
P += EncLen;
|
||||
Result.append(reinterpret_cast<char *>(&Header[0]), P - &Header[0]);
|
||||
Result +=
|
||||
std::string(CompressedNameStrings.data(), CompressedNameStrings.size());
|
||||
return 0;
|
||||
}
|
||||
|
||||
int collectPGOFuncNameStrings(const std::vector<GlobalVariable *> &NameVars,
|
||||
std::string &Result) {
|
||||
std::vector<std::string> NameStrs;
|
||||
for (auto *NameVar : NameVars) {
|
||||
auto *Arr = cast<ConstantDataArray>(NameVar->getInitializer());
|
||||
StringRef NameStr =
|
||||
Arr->isCString() ? Arr->getAsCString() : Arr->getAsString();
|
||||
NameStrs.push_back(NameStr.str());
|
||||
}
|
||||
return collectPGOFuncNameStrings(NameStrs, zlib::isAvailable(), Result);
|
||||
}
|
||||
|
||||
int readPGOFuncNameStrings(StringRef NameStrings, InstrProfSymtab &Symtab) {
|
||||
const uint8_t *P = reinterpret_cast<const uint8_t *>(NameStrings.data());
|
||||
const uint8_t *EndP = reinterpret_cast<const uint8_t *>(NameStrings.data() +
|
||||
NameStrings.size());
|
||||
while (P < EndP) {
|
||||
uint32_t N;
|
||||
uint64_t UncompressedSize = decodeULEB128(P, &N);
|
||||
P += N;
|
||||
uint64_t CompressedSize = decodeULEB128(P, &N);
|
||||
P += N;
|
||||
bool isCompressed = (CompressedSize != 0);
|
||||
SmallString<128> UncompressedNameStrings;
|
||||
StringRef NameStrings;
|
||||
if (isCompressed) {
|
||||
StringRef CompressedNameStrings(reinterpret_cast<const char *>(P),
|
||||
CompressedSize);
|
||||
if (zlib::uncompress(CompressedNameStrings, UncompressedNameStrings,
|
||||
UncompressedSize) != zlib::StatusOK)
|
||||
return 1;
|
||||
P += CompressedSize;
|
||||
NameStrings = StringRef(UncompressedNameStrings.data(),
|
||||
UncompressedNameStrings.size());
|
||||
} else {
|
||||
NameStrings =
|
||||
StringRef(reinterpret_cast<const char *>(P), UncompressedSize);
|
||||
P += UncompressedSize;
|
||||
}
|
||||
// Now parse the name strings.
|
||||
size_t NameStart = 0;
|
||||
bool isLast = false;
|
||||
do {
|
||||
size_t NameStop = NameStrings.find(' ', NameStart);
|
||||
if (NameStop == StringRef::npos)
|
||||
return 1;
|
||||
if (NameStop == NameStrings.size() - 1)
|
||||
isLast = true;
|
||||
StringRef Name = NameStrings.substr(NameStart, NameStop - NameStart);
|
||||
Symtab.addFuncName(Name);
|
||||
if (isLast)
|
||||
break;
|
||||
NameStart = NameStop + 1;
|
||||
} while (true);
|
||||
|
||||
while (P < EndP && *P == 0)
|
||||
P++;
|
||||
}
|
||||
Symtab.finalizeSymtab();
|
||||
return 0;
|
||||
}
|
||||
|
||||
instrprof_error
|
||||
InstrProfValueSiteRecord::mergeValueData(InstrProfValueSiteRecord &Input,
|
||||
uint64_t Weight) {
|
||||
|
@ -9,6 +9,7 @@
|
||||
|
||||
#include "llvm/ProfileData/InstrProfReader.h"
|
||||
#include "llvm/ProfileData/InstrProfWriter.h"
|
||||
#include "llvm/Support/Compression.h"
|
||||
#include "gtest/gtest.h"
|
||||
|
||||
#include <cstdarg>
|
||||
@ -583,4 +584,64 @@ TEST_F(InstrProfTest, instr_prof_symtab_test) {
|
||||
ASSERT_EQ(StringRef("bar3"), R);
|
||||
}
|
||||
|
||||
TEST_F(InstrProfTest, instr_prof_symtab_compression_test) {
|
||||
std::vector<std::string> FuncNames1;
|
||||
std::vector<std::string> FuncNames2;
|
||||
for (int I = 0; I < 10 * 1024; I++) {
|
||||
std::string str;
|
||||
raw_string_ostream OS(str);
|
||||
OS << "func_" << I;
|
||||
FuncNames1.push_back(OS.str());
|
||||
str.clear();
|
||||
OS << "fooooooooooooooo_" << I;
|
||||
FuncNames1.push_back(OS.str());
|
||||
str.clear();
|
||||
OS << "BAR_" << I;
|
||||
FuncNames2.push_back(OS.str());
|
||||
str.clear();
|
||||
OS << "BlahblahBlahblahBar_" << I;
|
||||
FuncNames2.push_back(OS.str());
|
||||
}
|
||||
|
||||
for (int Padding = 0; Padding < 10; Padding++) {
|
||||
for (int DoCompression = 0; DoCompression < 2; DoCompression++) {
|
||||
// Compressing:
|
||||
std::string FuncNameStrings1;
|
||||
collectPGOFuncNameStrings(FuncNames1,
|
||||
(DoCompression != 0 && zlib::isAvailable()),
|
||||
FuncNameStrings1);
|
||||
|
||||
// Compressing:
|
||||
std::string FuncNameStrings2;
|
||||
collectPGOFuncNameStrings(FuncNames2,
|
||||
(DoCompression != 0 && zlib::isAvailable()),
|
||||
FuncNameStrings2);
|
||||
|
||||
// Join with paddings:
|
||||
std::string FuncNameStrings = FuncNameStrings1;
|
||||
for (int P = 0; P < Padding; P++) {
|
||||
FuncNameStrings.push_back('\0');
|
||||
}
|
||||
FuncNameStrings += FuncNameStrings2;
|
||||
|
||||
// Now decompress
|
||||
InstrProfSymtab Symtab;
|
||||
Symtab.create(StringRef(FuncNameStrings));
|
||||
|
||||
// Now check
|
||||
for (int I = 0; I < 10 * 1024; I++) {
|
||||
std::string N[4];
|
||||
N[0] = FuncNames1[2 * I];
|
||||
N[1] = FuncNames1[2 * I + 1];
|
||||
N[2] = FuncNames2[2 * I];
|
||||
N[3] = FuncNames2[2 * I + 1];
|
||||
for (int J = 0; J < 4; J++) {
|
||||
StringRef R = Symtab.getFuncName(IndexedInstrProf::ComputeHash(N[J]));
|
||||
ASSERT_EQ(StringRef(N[J]), R);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // end anonymous namespace
|
||||
|
Loading…
x
Reference in New Issue
Block a user