From 192404027d77263faa8bbaf3261100b0f1338b87 Mon Sep 17 00:00:00 2001 From: Vedant Kumar Date: Tue, 7 Jun 2016 22:47:31 +0000 Subject: [PATCH] Retry^4 "[llvm-profdata] Add option to ingest filepaths from a file" Changes since the initial commit: - Use echo instead of printf. This should side-step the character escaping issues on Windows. Differential Revision: http://reviews.llvm.org/D20980 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@272068 91177308-0d34-0410-b5e6-96231b3b80d8 --- docs/CommandGuide/llvm-profdata.rst | 9 +++ test/tools/llvm-profdata/input-filenames.test | 17 +++++ tools/llvm-profdata/llvm-profdata.cpp | 65 +++++++++++++++++-- 3 files changed, 85 insertions(+), 6 deletions(-) create mode 100644 test/tools/llvm-profdata/input-filenames.test diff --git a/docs/CommandGuide/llvm-profdata.rst b/docs/CommandGuide/llvm-profdata.rst index 12f2771bd00..2742fd3d75d 100644 --- a/docs/CommandGuide/llvm-profdata.rst +++ b/docs/CommandGuide/llvm-profdata.rst @@ -44,6 +44,9 @@ interpreted as relatively more important than a shorter run. Depending on the nature of the training runs it may be useful to adjust the weight given to each input file by using the ``-weighted-input`` option. +Profiles passed in via ``-weighted-input``, ``-input-files``, or via positional +arguments are processed once for each time they are seen. + OPTIONS ^^^^^^^ @@ -65,6 +68,12 @@ OPTIONS Input files specified without using this option are assigned a default weight of 1. Examples are shown below. +.. option:: -input-files=path, -f=path + + Specify a file which contains a list of files to merge. The entries in this + file are newline-separated. Lines starting with '#' are skipped. Entries may + be of the form or ,. + .. option:: -instr (default) Specify that the input profile is an instrumentation-based profile. diff --git a/test/tools/llvm-profdata/input-filenames.test b/test/tools/llvm-profdata/input-filenames.test new file mode 100644 index 00000000000..da0c47bf82a --- /dev/null +++ b/test/tools/llvm-profdata/input-filenames.test @@ -0,0 +1,17 @@ +# Create an input file. +RUN: echo '# comment 1' > %t.input +RUN: echo ' # comment 2' >> %t.input +RUN: echo 'bar' >> %t.input +RUN: echo ' baz' >> %t.input +RUN: echo "2,%t.weighted" >> %t.input + +# Create the weighted file, since these actually need to exist. +RUN: echo ' ' > %t.weighted + +RUN: llvm-profdata merge -f %t.input -dump-input-file-list -o /dev/null foo | FileCheck %s +RUN: llvm-profdata merge -input-files %t.input -dump-input-file-list -o /dev/null foo | FileCheck %s + +CHECK: 1,foo +CHECK-NEXT: 1,bar +CHECK-NEXT: 1,baz +CHECK-NEXT: 2,{{.*}}.weighted diff --git a/tools/llvm-profdata/llvm-profdata.cpp b/tools/llvm-profdata/llvm-profdata.cpp index 20a167226a3..064f36a8dc9 100644 --- a/tools/llvm-profdata/llvm-profdata.cpp +++ b/tools/llvm-profdata/llvm-profdata.cpp @@ -223,11 +223,53 @@ static WeightedFile parseWeightedFile(const StringRef &WeightedFilename) { return WeightedFile(FileName, Weight); } +static std::unique_ptr +getInputFilenamesFileBuf(const StringRef &InputFilenamesFile) { + if (InputFilenamesFile == "") + return {}; + + auto BufOrError = MemoryBuffer::getFileOrSTDIN(InputFilenamesFile); + if (!BufOrError) + exitWithErrorCode(BufOrError.getError(), InputFilenamesFile); + + return std::move(*BufOrError); +} + +static void parseInputFilenamesFile(MemoryBuffer *Buffer, + WeightedFileVector &WFV) { + if (!Buffer) + return; + + SmallVector Entries; + StringRef Data = Buffer->getBuffer(); + Data.split(Entries, '\n', /*MaxSplit=*/-1, /*KeepEmpty=*/false); + for (const StringRef &FileWeightEntry : Entries) { + StringRef SanitizedEntry = FileWeightEntry.trim(" \t\v\f\r"); + // Skip comments. + if (SanitizedEntry.startswith("#")) + continue; + // If there's no comma, it's an unweighted profile. + else if (SanitizedEntry.find(',') == StringRef::npos) + WFV.emplace_back(SanitizedEntry, 1); + else + WFV.emplace_back(parseWeightedFile(SanitizedEntry)); + } +} + static int merge_main(int argc, const char *argv[]) { cl::list InputFilenames(cl::Positional, cl::desc("")); cl::list WeightedInputFilenames("weighted-input", cl::desc(",")); + cl::opt InputFilenamesFile( + "input-files", cl::init(""), + cl::desc("Path to file containing newline-separated " + "[,] entries")); + cl::alias InputFilenamesFileA("f", cl::desc("Alias for --input-files"), + cl::aliasopt(InputFilenamesFile)); + cl::opt DumpInputFileList( + "dump-input-file-list", cl::init(false), cl::Hidden, + cl::desc("Dump the list of input files and their weights, then exit")); cl::opt OutputFilename("output", cl::value_desc("output"), cl::init("-"), cl::Required, cl::desc("Output file")); @@ -249,15 +291,26 @@ static int merge_main(int argc, const char *argv[]) { cl::ParseCommandLineOptions(argc, argv, "LLVM profile data merger\n"); - if (InputFilenames.empty() && WeightedInputFilenames.empty()) + WeightedFileVector WeightedInputs; + for (StringRef Filename : InputFilenames) + WeightedInputs.emplace_back(Filename, 1); + for (StringRef WeightedFilename : WeightedInputFilenames) + WeightedInputs.emplace_back(parseWeightedFile(WeightedFilename)); + + // Make sure that the file buffer stays alive for the duration of the + // weighted input vector's lifetime. + auto Buffer = getInputFilenamesFileBuf(InputFilenamesFile); + parseInputFilenamesFile(Buffer.get(), WeightedInputs); + + if (WeightedInputs.empty()) exitWithError("No input files specified. See " + sys::path::filename(argv[0]) + " -help"); - WeightedFileVector WeightedInputs; - for (StringRef Filename : InputFilenames) - WeightedInputs.push_back(WeightedFile(Filename, 1)); - for (StringRef WeightedFilename : WeightedInputFilenames) - WeightedInputs.push_back(parseWeightedFile(WeightedFilename)); + if (DumpInputFileList) { + for (auto &WF : WeightedInputs) + outs() << WF.Weight << "," << WF.Filename << "\n"; + return 0; + } if (ProfileKind == instr) mergeInstrProfile(WeightedInputs, OutputFilename, OutputFormat,