Retry^4 "[llvm-profdata] Add option to ingest filepaths from a file"

Changes since the initial commit:
- Use echo instead of printf. This should side-step the character
  escaping issues on Windows.

Differential Revision: http://reviews.llvm.org/D20980

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@272068 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Vedant Kumar 2016-06-07 22:47:31 +00:00
parent 8a560fc113
commit 192404027d
3 changed files with 85 additions and 6 deletions

View File

@ -44,6 +44,9 @@ interpreted as relatively more important than a shorter run. Depending on the
nature of the training runs it may be useful to adjust the weight given to each nature of the training runs it may be useful to adjust the weight given to each
input file by using the ``-weighted-input`` option. input file by using the ``-weighted-input`` option.
Profiles passed in via ``-weighted-input``, ``-input-files``, or via positional
arguments are processed once for each time they are seen.
OPTIONS OPTIONS
^^^^^^^ ^^^^^^^
@ -65,6 +68,12 @@ OPTIONS
Input files specified without using this option are assigned a default Input files specified without using this option are assigned a default
weight of 1. Examples are shown below. weight of 1. Examples are shown below.
.. option:: -input-files=path, -f=path
Specify a file which contains a list of files to merge. The entries in this
file are newline-separated. Lines starting with '#' are skipped. Entries may
be of the form <filename> or <weight>,<filename>.
.. option:: -instr (default) .. option:: -instr (default)
Specify that the input profile is an instrumentation-based profile. Specify that the input profile is an instrumentation-based profile.

View File

@ -0,0 +1,17 @@
# Create an input file.
RUN: echo '# comment 1' > %t.input
RUN: echo ' # comment 2' >> %t.input
RUN: echo 'bar' >> %t.input
RUN: echo ' baz' >> %t.input
RUN: echo "2,%t.weighted" >> %t.input
# Create the weighted file, since these actually need to exist.
RUN: echo ' ' > %t.weighted
RUN: llvm-profdata merge -f %t.input -dump-input-file-list -o /dev/null foo | FileCheck %s
RUN: llvm-profdata merge -input-files %t.input -dump-input-file-list -o /dev/null foo | FileCheck %s
CHECK: 1,foo
CHECK-NEXT: 1,bar
CHECK-NEXT: 1,baz
CHECK-NEXT: 2,{{.*}}.weighted

View File

@ -223,11 +223,53 @@ static WeightedFile parseWeightedFile(const StringRef &WeightedFilename) {
return WeightedFile(FileName, Weight); return WeightedFile(FileName, Weight);
} }
static std::unique_ptr<MemoryBuffer>
getInputFilenamesFileBuf(const StringRef &InputFilenamesFile) {
if (InputFilenamesFile == "")
return {};
auto BufOrError = MemoryBuffer::getFileOrSTDIN(InputFilenamesFile);
if (!BufOrError)
exitWithErrorCode(BufOrError.getError(), InputFilenamesFile);
return std::move(*BufOrError);
}
static void parseInputFilenamesFile(MemoryBuffer *Buffer,
WeightedFileVector &WFV) {
if (!Buffer)
return;
SmallVector<StringRef, 8> Entries;
StringRef Data = Buffer->getBuffer();
Data.split(Entries, '\n', /*MaxSplit=*/-1, /*KeepEmpty=*/false);
for (const StringRef &FileWeightEntry : Entries) {
StringRef SanitizedEntry = FileWeightEntry.trim(" \t\v\f\r");
// Skip comments.
if (SanitizedEntry.startswith("#"))
continue;
// If there's no comma, it's an unweighted profile.
else if (SanitizedEntry.find(',') == StringRef::npos)
WFV.emplace_back(SanitizedEntry, 1);
else
WFV.emplace_back(parseWeightedFile(SanitizedEntry));
}
}
static int merge_main(int argc, const char *argv[]) { static int merge_main(int argc, const char *argv[]) {
cl::list<std::string> InputFilenames(cl::Positional, cl::list<std::string> InputFilenames(cl::Positional,
cl::desc("<filename...>")); cl::desc("<filename...>"));
cl::list<std::string> WeightedInputFilenames("weighted-input", cl::list<std::string> WeightedInputFilenames("weighted-input",
cl::desc("<weight>,<filename>")); cl::desc("<weight>,<filename>"));
cl::opt<std::string> InputFilenamesFile(
"input-files", cl::init(""),
cl::desc("Path to file containing newline-separated "
"[<weight>,]<filename> entries"));
cl::alias InputFilenamesFileA("f", cl::desc("Alias for --input-files"),
cl::aliasopt(InputFilenamesFile));
cl::opt<bool> DumpInputFileList(
"dump-input-file-list", cl::init(false), cl::Hidden,
cl::desc("Dump the list of input files and their weights, then exit"));
cl::opt<std::string> OutputFilename("output", cl::value_desc("output"), cl::opt<std::string> OutputFilename("output", cl::value_desc("output"),
cl::init("-"), cl::Required, cl::init("-"), cl::Required,
cl::desc("Output file")); cl::desc("Output file"));
@ -249,15 +291,26 @@ static int merge_main(int argc, const char *argv[]) {
cl::ParseCommandLineOptions(argc, argv, "LLVM profile data merger\n"); cl::ParseCommandLineOptions(argc, argv, "LLVM profile data merger\n");
if (InputFilenames.empty() && WeightedInputFilenames.empty()) WeightedFileVector WeightedInputs;
for (StringRef Filename : InputFilenames)
WeightedInputs.emplace_back(Filename, 1);
for (StringRef WeightedFilename : WeightedInputFilenames)
WeightedInputs.emplace_back(parseWeightedFile(WeightedFilename));
// Make sure that the file buffer stays alive for the duration of the
// weighted input vector's lifetime.
auto Buffer = getInputFilenamesFileBuf(InputFilenamesFile);
parseInputFilenamesFile(Buffer.get(), WeightedInputs);
if (WeightedInputs.empty())
exitWithError("No input files specified. See " + exitWithError("No input files specified. See " +
sys::path::filename(argv[0]) + " -help"); sys::path::filename(argv[0]) + " -help");
WeightedFileVector WeightedInputs; if (DumpInputFileList) {
for (StringRef Filename : InputFilenames) for (auto &WF : WeightedInputs)
WeightedInputs.push_back(WeightedFile(Filename, 1)); outs() << WF.Weight << "," << WF.Filename << "\n";
for (StringRef WeightedFilename : WeightedInputFilenames) return 0;
WeightedInputs.push_back(parseWeightedFile(WeightedFilename)); }
if (ProfileKind == instr) if (ProfileKind == instr)
mergeInstrProfile(WeightedInputs, OutputFilename, OutputFormat, mergeInstrProfile(WeightedInputs, OutputFilename, OutputFormat,