Revert "[llvm-profdata] Speed up merging by using a thread pool"

This reverts commit r275921. It broke the ppc64be bot:

  http://lab.llvm.org:8011/builders/clang-ppc64be-linux-multistage/builds/3537

I'm not sure why it broke, but based on the output, it looks like an
off-by-one (one profile left un-merged).

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@275937 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Vedant Kumar 2016-07-19 00:57:09 +00:00
parent 92a8d601a3
commit 40a2c5247c
6 changed files with 21 additions and 193 deletions

View File

@ -106,11 +106,6 @@ OPTIONS
conjunction with -instr. Defaults to false, since it can inhibit compiler
optimization during PGO.
.. option:: -num-threads=N, -j=N
Use N threads to perform profile merging. When N=0, llvm-profdata auto-detects
an appropriate number of threads to use. This is the default.
EXAMPLES
^^^^^^^^
Basic Usage

View File

@ -47,8 +47,6 @@ public:
/// for this function and the hash and number of counts match, each counter is
/// summed. Optionally scale counts by \p Weight.
Error addRecord(InstrProfRecord &&I, uint64_t Weight = 1);
/// Merge existing function counts from the given writer.
Error mergeRecordsFromWriter(InstrProfWriter &&IPW);
/// Write the profile to \c OS
void write(raw_fd_ostream &OS);
/// Write the profile in text format to \c OS

View File

@ -182,14 +182,6 @@ Error InstrProfWriter::addRecord(InstrProfRecord &&I, uint64_t Weight) {
return Dest.takeError();
}
Error InstrProfWriter::mergeRecordsFromWriter(InstrProfWriter &&IPW) {
for (auto &I : IPW.FunctionData)
for (auto &Func : I.getValue())
if (Error E = addRecord(std::move(Func.second), 1))
return E;
return Error::success();
}
bool InstrProfWriter::shouldEncodeData(const ProfilingData &PD) {
if (!Sparse)
return true;

View File

@ -51,43 +51,3 @@ DISJOINT-2: Block counts: [2, 3]
DISJOINT: Total functions: 2
DISJOINT: Maximum function count: 1
DISJOINT: Maximum internal block count: 3
RUN: llvm-profdata merge %p/Inputs/foo3-1.proftext %p/Inputs/foo3-1.proftext \
RUN: %p/Inputs/foo3-1.proftext %p/Inputs/foo3-1.proftext \
RUN: -num-threads 2 -o %t
RUN: llvm-profdata show %t -all-functions -counts | FileCheck %s --check-prefix=FOO4
RUN: llvm-profdata merge %p/Inputs/foo3-1.proftext %p/Inputs/foo3-1.proftext \
RUN: %p/Inputs/foo3-1.proftext %p/Inputs/foo3-1.proftext \
RUN: -j 3 -o %t
RUN: llvm-profdata show %t -all-functions -counts | FileCheck %s --check-prefix=FOO4
FOO4: foo:
FOO4: Counters: 3
FOO4: Function count: 4
FOO4: Block counts: [8, 12]
FOO4: Total functions: 1
FOO4: Maximum function count: 4
FOO4: Maximum internal block count: 12
RUN: llvm-profdata merge %p/Inputs/foo3-1.proftext %p/Inputs/foo3-1.proftext \
RUN: %p/Inputs/foo3-1.proftext %p/Inputs/foo3-1.proftext \
RUN: %p/Inputs/foo3-1.proftext -j 2 -o %t
RUN: llvm-profdata show %t -all-functions -counts | FileCheck %s --check-prefix=FOO5
RUN: llvm-profdata merge %p/Inputs/foo3-1.proftext %p/Inputs/foo3-1.proftext \
RUN: %p/Inputs/foo3-1.proftext %p/Inputs/foo3-1.proftext \
RUN: %p/Inputs/foo3-1.proftext -j 3 -o %t
RUN: llvm-profdata show %t -all-functions -counts | FileCheck %s --check-prefix=FOO5
RUN: llvm-profdata merge %p/Inputs/foo3-1.proftext %p/Inputs/foo3-1.proftext \
RUN: %p/Inputs/foo3-1.proftext %p/Inputs/foo3-1.proftext \
RUN: %p/Inputs/foo3-1.proftext -o %t
RUN: llvm-profdata show %t -all-functions -counts | FileCheck %s --check-prefix=FOO5
RUN: llvm-profdata merge %p/Inputs/foo3-1.proftext %p/Inputs/foo3-1.proftext \
RUN: %p/Inputs/foo3-1.proftext %p/Inputs/foo3-1.proftext \
RUN: %p/Inputs/foo3-1.proftext -j 1 -o %t
RUN: llvm-profdata show %t -all-functions -counts | FileCheck %s --check-prefix=FOO5
FOO5: foo:
FOO5: Counters: 3
FOO5: Function count: 5
FOO5: Block counts: [10, 15]
FOO5: Total functions: 1
FOO5: Maximum function count: 5
FOO5: Maximum internal block count: 15

View File

@ -29,7 +29,6 @@
#include "llvm/Support/Path.h"
#include "llvm/Support/PrettyStackTrace.h"
#include "llvm/Support/Signals.h"
#include "llvm/Support/ThreadPool.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
@ -118,68 +117,9 @@ struct WeightedFile {
};
typedef SmallVector<WeightedFile, 5> WeightedFileVector;
/// Keep track of merged data and reported errors.
struct WriterContext {
std::mutex Lock;
InstrProfWriter Writer;
Error Err;
StringRef ErrWhence;
std::mutex &ErrLock;
SmallSet<instrprof_error, 4> &WriterErrorCodes;
WriterContext(bool IsSparse, std::mutex &ErrLock,
SmallSet<instrprof_error, 4> &WriterErrorCodes)
: Lock(), Writer(IsSparse), Err(Error::success()), ErrWhence(""),
ErrLock(ErrLock), WriterErrorCodes(WriterErrorCodes) {}
};
/// Load an input into a writer context.
static void loadInput(const WeightedFile &Input, WriterContext *WC) {
std::unique_lock<std::mutex> CtxGuard{WC->Lock};
// If there's a pending hard error, don't do more work.
if (WC->Err)
return;
WC->ErrWhence = Input.Filename;
auto ReaderOrErr = InstrProfReader::create(Input.Filename);
if ((WC->Err = ReaderOrErr.takeError()))
return;
auto Reader = std::move(ReaderOrErr.get());
bool IsIRProfile = Reader->isIRLevelProfile();
if (WC->Writer.setIsIRLevelProfile(IsIRProfile)) {
WC->Err = make_error<StringError>(
"Merge IR generated profile with Clang generated profile.",
std::error_code());
return;
}
for (auto &I : *Reader) {
if (Error E = WC->Writer.addRecord(std::move(I), Input.Weight)) {
// Only show hint the first time an error occurs.
instrprof_error IPE = InstrProfError::take(std::move(E));
std::unique_lock<std::mutex> ErrGuard{WC->ErrLock};
bool firstTime = WC->WriterErrorCodes.insert(IPE).second;
handleMergeWriterError(make_error<InstrProfError>(IPE), Input.Filename,
I.Name, firstTime);
}
}
if (Reader->hasError())
WC->Err = Reader->getError();
}
/// Merge the \p Src writer context into \p Dst.
static void mergeWriterContexts(WriterContext *Dst, WriterContext *Src) {
if (Error E = Dst->Writer.mergeRecordsFromWriter(std::move(Src->Writer)))
Dst->Err = std::move(E);
}
static void mergeInstrProfile(const WeightedFileVector &Inputs,
StringRef OutputFilename,
ProfileFormat OutputFormat, bool OutputSparse,
unsigned NumThreads) {
ProfileFormat OutputFormat, bool OutputSparse) {
if (OutputFilename.compare("-") == 0)
exitWithError("Cannot write indexed profdata format to stdout.");
@ -191,57 +131,30 @@ static void mergeInstrProfile(const WeightedFileVector &Inputs,
if (EC)
exitWithErrorCode(EC, OutputFilename);
std::mutex ErrorLock;
InstrProfWriter Writer(OutputSparse);
SmallSet<instrprof_error, 4> WriterErrorCodes;
for (const auto &Input : Inputs) {
auto ReaderOrErr = InstrProfReader::create(Input.Filename);
if (Error E = ReaderOrErr.takeError())
exitWithError(std::move(E), Input.Filename);
// If NumThreads is not specified, auto-detect a good default.
if (NumThreads == 0)
NumThreads = std::max(1U, std::min(std::thread::hardware_concurrency(),
unsigned(Inputs.size() / 2)));
auto Reader = std::move(ReaderOrErr.get());
bool IsIRProfile = Reader->isIRLevelProfile();
if (Writer.setIsIRLevelProfile(IsIRProfile))
exitWithError("Merge IR generated profile with Clang generated profile.");
// Initialize the writer contexts.
SmallVector<std::unique_ptr<WriterContext>, 4> Contexts;
for (unsigned I = 0; I < NumThreads; ++I)
Contexts.emplace_back(llvm::make_unique<WriterContext>(
OutputSparse, ErrorLock, WriterErrorCodes));
if (NumThreads == 1) {
for (const auto &Input : Inputs)
loadInput(Input, Contexts[0].get());
} else {
ThreadPool Pool(NumThreads);
// Load the inputs in parallel (N/NumThreads serial steps).
unsigned Ctx = 0;
for (const auto &Input : Inputs) {
Pool.async(loadInput, Input, Contexts[Ctx].get());
Ctx = (Ctx + 1) % NumThreads;
for (auto &I : *Reader) {
if (Error E = Writer.addRecord(std::move(I), Input.Weight)) {
// Only show hint the first time an error occurs.
instrprof_error IPE = InstrProfError::take(std::move(E));
bool firstTime = WriterErrorCodes.insert(IPE).second;
handleMergeWriterError(make_error<InstrProfError>(IPE), Input.Filename,
I.Name, firstTime);
}
}
Pool.wait();
// Merge the writer contexts together (lg(NumThreads) serial steps).
unsigned Mid = Contexts.size() / 2;
unsigned End = Contexts.size();
assert(Mid > 0 && "Expected more than one context");
do {
for (unsigned I = 0; I < Mid; ++I)
Pool.async(mergeWriterContexts, Contexts[I].get(),
Contexts[I + Mid].get());
if (End & 1)
Pool.async(mergeWriterContexts, Contexts[0].get(),
Contexts[End - 1].get());
Pool.wait();
End = Mid;
Mid /= 2;
} while (Mid > 0);
if (Reader->hasError())
exitWithError(Reader->getError(), Input.Filename);
}
// Handle deferred hard errors encountered during merging.
for (std::unique_ptr<WriterContext> &WC : Contexts)
if (WC->Err)
exitWithError(std::move(WC->Err), WC->ErrWhence);
InstrProfWriter &Writer = Contexts[0]->Writer;
if (OutputFormat == PF_Text)
Writer.writeText(Output);
else
@ -375,11 +288,6 @@ static int merge_main(int argc, const char *argv[]) {
clEnumValEnd));
cl::opt<bool> OutputSparse("sparse", cl::init(false),
cl::desc("Generate a sparse profile (only meaningful for -instr)"));
cl::opt<unsigned> NumThreads(
"num-threads", cl::init(0),
cl::desc("Number of merge threads to use (default: autodetect)"));
cl::alias NumThreadsA("j", cl::desc("Alias for --num-threads"),
cl::aliasopt(NumThreads));
cl::ParseCommandLineOptions(argc, argv, "LLVM profile data merger\n");
@ -406,7 +314,7 @@ static int merge_main(int argc, const char *argv[]) {
if (ProfileKind == instr)
mergeInstrProfile(WeightedInputs, OutputFilename, OutputFormat,
OutputSparse, NumThreads);
OutputSparse);
else
mergeSampleProfile(WeightedInputs, OutputFilename, OutputFormat);

View File

@ -204,31 +204,6 @@ TEST_F(InstrProfTest, get_profile_summary) {
delete PSFromMD;
}
TEST_F(InstrProfTest, test_writer_merge) {
InstrProfRecord Record1("func1", 0x1234, {42});
NoError(Writer.addRecord(std::move(Record1)));
InstrProfWriter Writer2;
InstrProfRecord Record2("func2", 0x1234, {0, 0});
NoError(Writer2.addRecord(std::move(Record2)));
NoError(Writer.mergeRecordsFromWriter(std::move(Writer2)));
auto Profile = Writer.writeBuffer();
readProfile(std::move(Profile));
Expected<InstrProfRecord> R = Reader->getInstrProfRecord("func1", 0x1234);
ASSERT_TRUE(NoError(R.takeError()));
ASSERT_EQ(1U, R->Counts.size());
ASSERT_EQ(42U, R->Counts[0]);
R = Reader->getInstrProfRecord("func2", 0x1234);
ASSERT_TRUE(NoError(R.takeError()));
ASSERT_EQ(2U, R->Counts.size());
ASSERT_EQ(0U, R->Counts[0]);
ASSERT_EQ(0U, R->Counts[1]);
}
static const char callee1[] = "callee1";
static const char callee2[] = "callee2";
static const char callee3[] = "callee3";