[llvm-exegesis] Add throughput mode.

Summary:
This just uses the latency benchmark runner on the parallel uops snippet
generator.

Fixes PR37698.

Reviewers: gchatelet

Subscribers: tschuett, RKSimon, llvm-commits

Differential Revision: https://reviews.llvm.org/D57000

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@352632 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Clement Courbet 2019-01-30 16:02:20 +00:00
parent f067deca74
commit 91921e0980
10 changed files with 63 additions and 29 deletions

View File

@ -10,13 +10,13 @@ DESCRIPTION
-----------
:program:`llvm-exegesis` is a benchmarking tool that uses information available
in LLVM to measure host machine instruction characteristics like latency or port
decomposition.
in LLVM to measure host machine instruction characteristics like latency,
throughput, or port decomposition.
Given an LLVM opcode name and a benchmarking mode, :program:`llvm-exegesis`
generates a code snippet that makes execution as serial (resp. as parallel) as
possible so that we can measure the latency (resp. uop decomposition) of the
instruction.
possible so that we can measure the latency (resp. inverse throughput/uop decomposition)
of the instruction.
The code snippet is jitted and executed on the host subtarget. The time taken
(resp. resource usage) is measured using hardware performance counters. The
result is printed out as YAML to the standard output.
@ -37,11 +37,13 @@ instruction, run:
$ llvm-exegesis -mode=latency -opcode-name=ADD64rr
Measuring the uop decomposition of an instruction works similarly:
Measuring the uop decomposition or inverse throughput of an instruction works similarly:
.. code-block:: bash
$ llvm-exegesis -mode=uops -opcode-name=ADD64rr
$ llvm-exegesis -mode=inverse_throughput -opcode-name=ADD64rr
The output is a YAML document (the default is to write to stdout, but you can
redirect the output to a file using `-benchmarks-file`):
@ -186,7 +188,7 @@ OPTIONS
Specify the custom code snippet to measure. See example 2 for details.
Either `opcode-index`, `opcode-name` or `snippets-file` must be set.
.. option:: -mode=[latency|uops|analysis]
.. option:: -mode=[latency|uops|inverse_throughput|analysis]
Specify the run mode.
@ -197,8 +199,8 @@ OPTIONS
.. option:: -benchmarks-file=</path/to/file>
File to read (`analysis` mode) or write (`latency`/`uops` modes) benchmark
results. "-" uses stdin/stdout.
File to read (`analysis` mode) or write (`latency`/`uops`/`inverse_throughput`
modes) benchmark results. "-" uses stdin/stdout.
.. option:: -analysis-clusters-output-file=</path/to/file>

View File

@ -0,0 +1,8 @@
# RUN: llvm-exegesis -mode=inverse_throughput -opcode-name=ADD32rr | FileCheck %s
CHECK: ---
CHECK-NEXT: mode: inverse_throughput
CHECK-NEXT: key:
CHECK-NEXT: instructions:
CHECK-NEXT: ADD32rr
CHECK: key: inverse_throughput

View File

@ -209,6 +209,8 @@ struct ScalarEnumerationTraits<exegesis::InstructionBenchmark::ModeE> {
Io.enumCase(Value, "", exegesis::InstructionBenchmark::Unknown);
Io.enumCase(Value, "latency", exegesis::InstructionBenchmark::Latency);
Io.enumCase(Value, "uops", exegesis::InstructionBenchmark::Uops);
Io.enumCase(Value, "inverse_throughput",
exegesis::InstructionBenchmark::InverseThroughput);
}
};

View File

@ -57,7 +57,7 @@ struct BenchmarkMeasure {
// The result of an instruction benchmark.
struct InstructionBenchmark {
InstructionBenchmarkKey Key;
enum ModeE { Unknown, Latency, Uops };
enum ModeE { Unknown, Latency, Uops, InverseThroughput };
ModeE Mode;
std::string CpuName;
std::string LLVMTriple;

View File

@ -75,6 +75,7 @@ public:
protected:
const LLVMState &State;
const InstructionBenchmark::ModeE Mode;
private:
virtual llvm::Expected<std::vector<BenchmarkMeasure>>
@ -84,7 +85,6 @@ private:
writeObjectFile(const BenchmarkCode &Configuration,
llvm::ArrayRef<llvm::MCInst> Code) const;
const InstructionBenchmark::ModeE Mode;
const std::unique_ptr<ScratchSpace> Scratch;
};

View File

@ -165,6 +165,14 @@ LatencySnippetGenerator::generateCodeTemplates(const Instruction &Instr) const {
return std::move(Results);
}
LatencyBenchmarkRunner::LatencyBenchmarkRunner(const LLVMState &State,
InstructionBenchmark::ModeE Mode)
: BenchmarkRunner(State, Mode) {
assert((Mode == InstructionBenchmark::Latency ||
Mode == InstructionBenchmark::InverseThroughput) &&
"invalid mode");
}
LatencyBenchmarkRunner::~LatencyBenchmarkRunner() = default;
llvm::Expected<std::vector<BenchmarkMeasure>>
@ -184,8 +192,17 @@ LatencyBenchmarkRunner::runMeasurements(
if (*ExpectedCounterValue < MinValue)
MinValue = *ExpectedCounterValue;
}
std::vector<BenchmarkMeasure> Result = {
BenchmarkMeasure::Create("latency", MinValue)};
std::vector<BenchmarkMeasure> Result;
switch (Mode) {
case InstructionBenchmark::Latency:
Result = {BenchmarkMeasure::Create("latency", MinValue)};
break;
case InstructionBenchmark::InverseThroughput:
Result = {BenchmarkMeasure::Create("inverse_throughput", MinValue)};
break;
default:
break;
}
return std::move(Result);
}

View File

@ -32,8 +32,8 @@ public:
class LatencyBenchmarkRunner : public BenchmarkRunner {
public:
LatencyBenchmarkRunner(const LLVMState &State)
: BenchmarkRunner(State, InstructionBenchmark::Latency) {}
LatencyBenchmarkRunner(const LLVMState &State,
InstructionBenchmark::ModeE Mode);
~LatencyBenchmarkRunner() override;
private:

View File

@ -45,6 +45,7 @@ ExegesisTarget::createSnippetGenerator(InstructionBenchmark::ModeE Mode,
case InstructionBenchmark::Latency:
return createLatencySnippetGenerator(State);
case InstructionBenchmark::Uops:
case InstructionBenchmark::InverseThroughput:
return createUopsSnippetGenerator(State);
}
return nullptr;
@ -57,7 +58,8 @@ ExegesisTarget::createBenchmarkRunner(InstructionBenchmark::ModeE Mode,
case InstructionBenchmark::Unknown:
return nullptr;
case InstructionBenchmark::Latency:
return createLatencyBenchmarkRunner(State);
case InstructionBenchmark::InverseThroughput:
return createLatencyBenchmarkRunner(State, Mode);
case InstructionBenchmark::Uops:
return createUopsBenchmarkRunner(State);
}
@ -74,9 +76,9 @@ ExegesisTarget::createUopsSnippetGenerator(const LLVMState &State) const {
return llvm::make_unique<UopsSnippetGenerator>(State);
}
std::unique_ptr<BenchmarkRunner>
ExegesisTarget::createLatencyBenchmarkRunner(const LLVMState &State) const {
return llvm::make_unique<LatencyBenchmarkRunner>(State);
std::unique_ptr<BenchmarkRunner> ExegesisTarget::createLatencyBenchmarkRunner(
const LLVMState &State, InstructionBenchmark::ModeE Mode) const {
return llvm::make_unique<LatencyBenchmarkRunner>(State, Mode);
}
std::unique_ptr<BenchmarkRunner>

View File

@ -130,7 +130,7 @@ private:
std::unique_ptr<SnippetGenerator> virtual createUopsSnippetGenerator(
const LLVMState &State) const;
std::unique_ptr<BenchmarkRunner> virtual createLatencyBenchmarkRunner(
const LLVMState &State) const;
const LLVMState &State, InstructionBenchmark::ModeE Mode) const;
std::unique_ptr<BenchmarkRunner> virtual createUopsBenchmarkRunner(
const LLVMState &State) const;

View File

@ -56,16 +56,19 @@ static cl::opt<std::string> SnippetsFile("snippets-file",
static cl::opt<std::string> BenchmarkFile("benchmarks-file", cl::desc(""),
cl::init(""));
static cl::opt<exegesis::InstructionBenchmark::ModeE>
BenchmarkMode("mode", cl::desc("the mode to run"),
cl::values(clEnumValN(exegesis::InstructionBenchmark::Latency,
"latency", "Instruction Latency"),
clEnumValN(exegesis::InstructionBenchmark::Uops,
"uops", "Uop Decomposition"),
// When not asking for a specific benchmark mode,
// we'll analyse the results.
clEnumValN(exegesis::InstructionBenchmark::Unknown,
"analysis", "Analysis")));
static cl::opt<exegesis::InstructionBenchmark::ModeE> BenchmarkMode(
"mode", cl::desc("the mode to run"),
cl::values(clEnumValN(exegesis::InstructionBenchmark::Latency, "latency",
"Instruction Latency"),
clEnumValN(exegesis::InstructionBenchmark::InverseThroughput,
"inverse_throughput",
"Instruction Inverse Throughput"),
clEnumValN(exegesis::InstructionBenchmark::Uops, "uops",
"Uop Decomposition"),
// When not asking for a specific benchmark mode,
// we'll analyse the results.
clEnumValN(exegesis::InstructionBenchmark::Unknown, "analysis",
"Analysis")));
static cl::opt<unsigned>
NumRepetitions("num-repetitions",