[llvm-exegesis][NFC] Split BenchmarkRunner class

Summary:
The snippet-generation part goes to the SnippetGenerator class.

This will allow benchmarking arbitrary code (see PR38437).

Reviewers: gchatelet

Subscribers: mgorny, tschuett, llvm-commits

Differential Revision: https://reviews.llvm.org/D51979

llvm-svn: 342117
This commit is contained in:
Clement Courbet 2018-09-13 07:40:53 +00:00
parent 4375b6f72e
commit 1931b181a5
15 changed files with 399 additions and 243 deletions

View File

@ -0,0 +1,38 @@
//===-- BenchmarkCode.h -----------------------------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_TOOLS_LLVM_EXEGESIS_BENCHMARKCODE_H
#define LLVM_TOOLS_LLVM_EXEGESIS_BENCHMARKCODE_H
#include "llvm/MC/MCInst.h"
#include <string>
#include <vector>
namespace exegesis {
// A collection of instructions that are to be assembled, executed and measured.
struct BenchmarkCode {
// The sequence of instructions that are to be repeated.
std::vector<llvm::MCInst> Instructions;
// Before the code is executed some instructions are added to setup the
// registers initial values.
std::vector<unsigned> RegsToDef;
// We also need to provide the registers that are live on entry for the
// assembler to generate proper prologue/epilogue.
std::vector<unsigned> LiveIns;
// Informations about how this configuration was built.
std::string Info;
};
} // namespace exegesis
#endif // LLVM_TOOLS_LLVM_EXEGESIS_BENCHMARKCODE_H

View File

@ -17,7 +17,6 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/Program.h"
@ -28,37 +27,10 @@ BenchmarkFailure::BenchmarkFailure(const llvm::Twine &S)
BenchmarkRunner::BenchmarkRunner(const LLVMState &State,
InstructionBenchmark::ModeE Mode)
: State(State), RATC(State.getRegInfo(),
getFunctionReservedRegs(State.getTargetMachine())),
Mode(Mode), Scratch(llvm::make_unique<ScratchSpace>()) {}
: State(State), Mode(Mode), Scratch(llvm::make_unique<ScratchSpace>()) {}
BenchmarkRunner::~BenchmarkRunner() = default;
llvm::Expected<std::vector<InstructionBenchmark>>
BenchmarkRunner::run(unsigned Opcode, unsigned NumRepetitions) {
const llvm::MCInstrDesc &InstrDesc = State.getInstrInfo().get(Opcode);
// Ignore instructions that we cannot run.
if (InstrDesc.isPseudo())
return llvm::make_error<BenchmarkFailure>("Unsupported opcode: isPseudo");
if (InstrDesc.isBranch() || InstrDesc.isIndirectBranch())
return llvm::make_error<BenchmarkFailure>(
"Unsupported opcode: isBranch/isIndirectBranch");
if (InstrDesc.isCall() || InstrDesc.isReturn())
return llvm::make_error<BenchmarkFailure>(
"Unsupported opcode: isCall/isReturn");
llvm::Expected<std::vector<BenchmarkCode>> ConfigurationOrError =
generateConfigurations(Opcode);
if (llvm::Error E = ConfigurationOrError.takeError())
return std::move(E);
std::vector<InstructionBenchmark> InstrBenchmarks;
for (const BenchmarkCode &Conf : ConfigurationOrError.get())
InstrBenchmarks.push_back(runConfiguration(Conf, NumRepetitions));
return InstrBenchmarks;
}
// Repeat the snippet until there are at least NumInstructions in the resulting
// code.
static std::vector<llvm::MCInst>
@ -122,74 +94,6 @@ BenchmarkRunner::runConfiguration(const BenchmarkCode &BC,
return InstrBenchmark;
}
llvm::Expected<std::vector<BenchmarkCode>>
BenchmarkRunner::generateConfigurations(unsigned Opcode) const {
if (auto E = generateCodeTemplate(Opcode)) {
CodeTemplate &CT = E.get();
std::vector<BenchmarkCode> Output;
// TODO: Generate as many BenchmarkCode as needed.
{
BenchmarkCode BC;
BC.Info = CT.Info;
for (InstructionBuilder &IB : CT.Instructions) {
IB.randomizeUnsetVariables(
CT.ScratchSpacePointerInReg
? RATC.getRegister(CT.ScratchSpacePointerInReg).aliasedBits()
: RATC.emptyRegisters());
BC.Instructions.push_back(IB.build());
}
if (CT.ScratchSpacePointerInReg)
BC.LiveIns.push_back(CT.ScratchSpacePointerInReg);
BC.RegsToDef = computeRegsToDef(CT.Instructions);
Output.push_back(std::move(BC));
}
return Output;
} else
return E.takeError();
}
std::vector<unsigned> BenchmarkRunner::computeRegsToDef(
const std::vector<InstructionBuilder> &Instructions) const {
// Collect all register uses and create an assignment for each of them.
// Ignore memory operands which are handled separately.
// Loop invariant: DefinedRegs[i] is true iif it has been set at least once
// before the current instruction.
llvm::BitVector DefinedRegs = RATC.emptyRegisters();
std::vector<unsigned> RegsToDef;
for (const InstructionBuilder &IB : Instructions) {
// Returns the register that this Operand sets or uses, or 0 if this is not
// a register.
const auto GetOpReg = [&IB](const Operand &Op) -> unsigned {
if (Op.IsMem)
return 0;
if (Op.ImplicitReg)
return *Op.ImplicitReg;
if (Op.IsExplicit && IB.getValueFor(Op).isReg())
return IB.getValueFor(Op).getReg();
return 0;
};
// Collect used registers that have never been def'ed.
for (const Operand &Op : IB.Instr.Operands) {
if (!Op.IsDef) {
const unsigned Reg = GetOpReg(Op);
if (Reg > 0 && !DefinedRegs.test(Reg)) {
RegsToDef.push_back(Reg);
DefinedRegs.set(Reg);
}
}
}
// Mark defs as having been def'ed.
for (const Operand &Op : IB.Instr.Operands) {
if (Op.IsDef) {
const unsigned Reg = GetOpReg(Op);
if (Reg > 0)
DefinedRegs.set(Reg);
}
}
}
return RegsToDef;
}
llvm::Expected<std::string>
BenchmarkRunner::writeObjectFile(const BenchmarkCode &BC,
llvm::ArrayRef<llvm::MCInst> Code) const {
@ -204,32 +108,4 @@ BenchmarkRunner::writeObjectFile(const BenchmarkCode &BC,
return ResultPath.str();
}
llvm::Expected<CodeTemplate> BenchmarkRunner::generateSelfAliasingCodeTemplate(
const Instruction &Instr) const {
const AliasingConfigurations SelfAliasing(Instr, Instr);
if (SelfAliasing.empty()) {
return llvm::make_error<BenchmarkFailure>("empty self aliasing");
}
CodeTemplate CT;
InstructionBuilder IB(Instr);
if (SelfAliasing.hasImplicitAliasing()) {
CT.Info = "implicit Self cycles, picking random values.";
} else {
CT.Info = "explicit self cycles, selecting one aliasing Conf.";
// This is a self aliasing instruction so defs and uses are from the same
// instance, hence twice IB in the following call.
setRandomAliasing(SelfAliasing, IB, IB);
}
CT.Instructions.push_back(std::move(IB));
return std::move(CT);
}
llvm::Expected<CodeTemplate>
BenchmarkRunner::generateUnconstrainedCodeTemplate(const Instruction &Instr,
llvm::StringRef Msg) const {
CodeTemplate CT;
CT.Info = llvm::formatv("{0}, repeating an unconstrained assignment", Msg);
CT.Instructions.emplace_back(Instr);
return std::move(CT);
}
} // namespace exegesis

View File

@ -17,10 +17,10 @@
#define LLVM_TOOLS_LLVM_EXEGESIS_BENCHMARKRUNNER_H
#include "Assembler.h"
#include "BenchmarkCode.h"
#include "BenchmarkResult.h"
#include "LlvmState.h"
#include "MCInstrDescView.h"
#include "RegisterAliasing.h"
#include "llvm/MC/MCInst.h"
#include "llvm/Support/Error.h"
#include <cstdlib>
@ -36,23 +36,6 @@ public:
BenchmarkFailure(const llvm::Twine &S);
};
// A collection of instructions that are to be assembled, executed and measured.
struct BenchmarkCode {
// The sequence of instructions that are to be repeated.
std::vector<llvm::MCInst> Instructions;
// Before the code is executed some instructions are added to setup the
// registers initial values.
std::vector<unsigned> RegsToDef;
// We also need to provide the registers that are live on entry for the
// assembler to generate proper prologue/epilogue.
std::vector<unsigned> LiveIns;
// Informations about how this configuration was built.
std::string Info;
};
// Common code for all benchmark modes.
class BenchmarkRunner {
public:
@ -61,12 +44,8 @@ public:
virtual ~BenchmarkRunner();
llvm::Expected<std::vector<InstructionBenchmark>>
run(unsigned Opcode, unsigned NumRepetitions);
// Given a snippet, computes which registers the setup code needs to define.
std::vector<unsigned>
computeRegsToDef(const std::vector<InstructionBuilder> &Snippet) const;
InstructionBenchmark runConfiguration(const BenchmarkCode &Configuration,
unsigned NumRepetitions) const;
// Scratch space to run instructions that touch memory.
struct ScratchSpace {
@ -87,33 +66,12 @@ public:
protected:
const LLVMState &State;
const RegisterAliasingTrackerCache RATC;
// Generates a single code template that has a self-dependency.
llvm::Expected<CodeTemplate>
generateSelfAliasingCodeTemplate(const Instruction &Instr) const;
// Generates a single code template without assignment constraints.
llvm::Expected<CodeTemplate>
generateUnconstrainedCodeTemplate(const Instruction &Instr,
llvm::StringRef Msg) const;
private:
// API to be implemented by subclasses.
virtual llvm::Expected<CodeTemplate>
generateCodeTemplate(unsigned Opcode) const = 0;
virtual std::vector<BenchmarkMeasure>
runMeasurements(const ExecutableFunction &EF, ScratchSpace &Scratch,
const unsigned NumRepetitions) const = 0;
// Internal helpers.
InstructionBenchmark runConfiguration(const BenchmarkCode &Configuration,
unsigned NumRepetitions) const;
// Calls generateCodeTemplate and expands it into one or more BenchmarkCode.
llvm::Expected<std::vector<BenchmarkCode>>
generateConfigurations(unsigned Opcode) const;
llvm::Expected<std::string>
writeObjectFile(const BenchmarkCode &Configuration,
llvm::ArrayRef<llvm::MCInst> Code) const;

View File

@ -19,6 +19,7 @@ add_library(LLVMExegesis
MCInstrDescView.cpp
PerfHelper.cpp
RegisterAliasing.cpp
SnippetGenerator.cpp
Target.cpp
Uops.cpp
)

View File

@ -29,9 +29,9 @@ static bool hasMemoryOperand(const llvm::MCOperandInfo &OpInfo) {
return OpInfo.OperandType == llvm::MCOI::OPERAND_MEMORY;
}
LatencyBenchmarkRunner::~LatencyBenchmarkRunner() = default;
LatencySnippetGenerator::~LatencySnippetGenerator() = default;
llvm::Error LatencyBenchmarkRunner::isInfeasible(
llvm::Error LatencySnippetGenerator::isInfeasible(
const llvm::MCInstrDesc &MCInstrDesc) const {
if (llvm::any_of(MCInstrDesc.operands(), hasUnknownOperand))
return llvm::make_error<BenchmarkFailure>(
@ -43,7 +43,7 @@ llvm::Error LatencyBenchmarkRunner::isInfeasible(
}
llvm::Expected<CodeTemplate>
LatencyBenchmarkRunner::generateTwoInstructionPrototype(
LatencySnippetGenerator::generateTwoInstructionPrototype(
const Instruction &Instr) const {
std::vector<unsigned> Opcodes;
Opcodes.resize(State.getInstrInfo().getNumOpcodes());
@ -80,7 +80,7 @@ LatencyBenchmarkRunner::generateTwoInstructionPrototype(
}
llvm::Expected<CodeTemplate>
LatencyBenchmarkRunner::generateCodeTemplate(unsigned Opcode) const {
LatencySnippetGenerator::generateCodeTemplate(unsigned Opcode) const {
const auto &InstrDesc = State.getInstrInfo().get(Opcode);
if (auto E = isInfeasible(InstrDesc))
return std::move(E);
@ -105,6 +105,8 @@ const char *LatencyBenchmarkRunner::getCounterName() const {
return CounterName;
}
LatencyBenchmarkRunner::~LatencyBenchmarkRunner() = default;
std::vector<BenchmarkMeasure>
LatencyBenchmarkRunner::runMeasurements(const ExecutableFunction &Function,
ScratchSpace &Scratch,

View File

@ -17,14 +17,14 @@
#include "BenchmarkRunner.h"
#include "MCInstrDescView.h"
#include "SnippetGenerator.h"
namespace exegesis {
class LatencyBenchmarkRunner : public BenchmarkRunner {
class LatencySnippetGenerator : public SnippetGenerator {
public:
LatencyBenchmarkRunner(const LLVMState &State)
: BenchmarkRunner(State, InstructionBenchmark::Latency) {}
~LatencyBenchmarkRunner() override;
LatencySnippetGenerator(const LLVMState &State) : SnippetGenerator(State) {}
~LatencySnippetGenerator() override;
llvm::Expected<CodeTemplate>
generateCodeTemplate(unsigned Opcode) const override;
@ -34,14 +34,21 @@ private:
llvm::Expected<CodeTemplate>
generateTwoInstructionPrototype(const Instruction &Instr) const;
};
class LatencyBenchmarkRunner : public BenchmarkRunner {
public:
LatencyBenchmarkRunner(const LLVMState &State)
: BenchmarkRunner(State, InstructionBenchmark::Latency) {}
~LatencyBenchmarkRunner() override;
private:
std::vector<BenchmarkMeasure>
runMeasurements(const ExecutableFunction &EF, ScratchSpace &Scratch,
const unsigned NumRepetitions) const override;
virtual const char *getCounterName() const;
};
} // namespace exegesis
#endif // LLVM_TOOLS_LLVM_EXEGESIS_LATENCY_H

View File

@ -0,0 +1,130 @@
//===-- SnippetGenerator.cpp ------------------------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#include <array>
#include <string>
#include "Assembler.h"
#include "MCInstrDescView.h"
#include "SnippetGenerator.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/Program.h"
namespace exegesis {
SnippetGeneratorFailure::SnippetGeneratorFailure(const llvm::Twine &S)
: llvm::StringError(S, llvm::inconvertibleErrorCode()) {}
SnippetGenerator::SnippetGenerator(const LLVMState &State)
: State(State), RATC(State.getRegInfo(),
getFunctionReservedRegs(State.getTargetMachine())) {}
SnippetGenerator::~SnippetGenerator() = default;
llvm::Expected<std::vector<BenchmarkCode>>
SnippetGenerator::generateConfigurations(unsigned Opcode) const {
if (auto E = generateCodeTemplate(Opcode)) {
CodeTemplate &CT = E.get();
std::vector<BenchmarkCode> Output;
// TODO: Generate as many BenchmarkCode as needed.
{
BenchmarkCode BC;
BC.Info = CT.Info;
for (InstructionBuilder &IB : CT.Instructions) {
IB.randomizeUnsetVariables(
CT.ScratchSpacePointerInReg
? RATC.getRegister(CT.ScratchSpacePointerInReg).aliasedBits()
: RATC.emptyRegisters());
BC.Instructions.push_back(IB.build());
}
if (CT.ScratchSpacePointerInReg)
BC.LiveIns.push_back(CT.ScratchSpacePointerInReg);
BC.RegsToDef = computeRegsToDef(CT.Instructions);
Output.push_back(std::move(BC));
}
return Output;
} else
return E.takeError();
}
std::vector<unsigned> SnippetGenerator::computeRegsToDef(
const std::vector<InstructionBuilder> &Instructions) const {
// Collect all register uses and create an assignment for each of them.
// Ignore memory operands which are handled separately.
// Loop invariant: DefinedRegs[i] is true iif it has been set at least once
// before the current instruction.
llvm::BitVector DefinedRegs = RATC.emptyRegisters();
std::vector<unsigned> RegsToDef;
for (const InstructionBuilder &IB : Instructions) {
// Returns the register that this Operand sets or uses, or 0 if this is not
// a register.
const auto GetOpReg = [&IB](const Operand &Op) -> unsigned {
if (Op.IsMem)
return 0;
if (Op.ImplicitReg)
return *Op.ImplicitReg;
if (Op.IsExplicit && IB.getValueFor(Op).isReg())
return IB.getValueFor(Op).getReg();
return 0;
};
// Collect used registers that have never been def'ed.
for (const Operand &Op : IB.Instr.Operands) {
if (!Op.IsDef) {
const unsigned Reg = GetOpReg(Op);
if (Reg > 0 && !DefinedRegs.test(Reg)) {
RegsToDef.push_back(Reg);
DefinedRegs.set(Reg);
}
}
}
// Mark defs as having been def'ed.
for (const Operand &Op : IB.Instr.Operands) {
if (Op.IsDef) {
const unsigned Reg = GetOpReg(Op);
if (Reg > 0)
DefinedRegs.set(Reg);
}
}
}
return RegsToDef;
}
llvm::Expected<CodeTemplate> SnippetGenerator::generateSelfAliasingCodeTemplate(
const Instruction &Instr) const {
const AliasingConfigurations SelfAliasing(Instr, Instr);
if (SelfAliasing.empty()) {
return llvm::make_error<SnippetGeneratorFailure>("empty self aliasing");
}
CodeTemplate CT;
InstructionBuilder IB(Instr);
if (SelfAliasing.hasImplicitAliasing()) {
CT.Info = "implicit Self cycles, picking random values.";
} else {
CT.Info = "explicit self cycles, selecting one aliasing Conf.";
// This is a self aliasing instruction so defs and uses are from the same
// instance, hence twice IB in the following call.
setRandomAliasing(SelfAliasing, IB, IB);
}
CT.Instructions.push_back(std::move(IB));
return std::move(CT);
}
llvm::Expected<CodeTemplate>
SnippetGenerator::generateUnconstrainedCodeTemplate(const Instruction &Instr,
llvm::StringRef Msg) const {
CodeTemplate CT;
CT.Info = llvm::formatv("{0}, repeating an unconstrained assignment", Msg);
CT.Instructions.emplace_back(Instr);
return std::move(CT);
}
} // namespace exegesis

View File

@ -0,0 +1,74 @@
//===-- SnippetGenerator.h --------------------------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
///
/// \file
/// Defines the abstract SnippetGenerator class for generating code that allows
/// measuring a certain property of instructions (e.g. latency).
///
//===----------------------------------------------------------------------===//
#ifndef LLVM_TOOLS_LLVM_EXEGESIS_SNIPPETGENERATOR_H
#define LLVM_TOOLS_LLVM_EXEGESIS_SNIPPETGENERATOR_H
#include "Assembler.h"
#include "BenchmarkCode.h"
#include "LlvmState.h"
#include "MCInstrDescView.h"
#include "RegisterAliasing.h"
#include "llvm/MC/MCInst.h"
#include "llvm/Support/Error.h"
#include <cstdlib>
#include <memory>
#include <vector>
namespace exegesis {
// A class representing failures that happened during Benchmark, they are used
// to report informations to the user.
class SnippetGeneratorFailure : public llvm::StringError {
public:
SnippetGeneratorFailure(const llvm::Twine &S);
};
// Common code for all benchmark modes.
class SnippetGenerator {
public:
explicit SnippetGenerator(const LLVMState &State);
virtual ~SnippetGenerator();
// Calls generateCodeTemplate and expands it into one or more BenchmarkCode.
llvm::Expected<std::vector<BenchmarkCode>>
generateConfigurations(unsigned Opcode) const;
// Given a snippet, computes which registers the setup code needs to define.
std::vector<unsigned>
computeRegsToDef(const std::vector<InstructionBuilder> &Snippet) const;
protected:
const LLVMState &State;
const RegisterAliasingTrackerCache RATC;
// Generates a single code template that has a self-dependency.
llvm::Expected<CodeTemplate>
generateSelfAliasingCodeTemplate(const Instruction &Instr) const;
// Generates a single code template without assignment constraints.
llvm::Expected<CodeTemplate>
generateUnconstrainedCodeTemplate(const Instruction &Instr,
llvm::StringRef Msg) const;
private:
// API to be implemented by subclasses.
virtual llvm::Expected<CodeTemplate>
generateCodeTemplate(unsigned Opcode) const = 0;
};
} // namespace exegesis
#endif // LLVM_TOOLS_LLVM_EXEGESIS_SNIPPETGENERATOR_H

View File

@ -36,6 +36,20 @@ void ExegesisTarget::registerTarget(ExegesisTarget *Target) {
FirstTarget = Target;
}
std::unique_ptr<SnippetGenerator>
ExegesisTarget::createSnippetGenerator(InstructionBenchmark::ModeE Mode,
const LLVMState &State) const {
switch (Mode) {
case InstructionBenchmark::Unknown:
return nullptr;
case InstructionBenchmark::Latency:
return createLatencySnippetGenerator(State);
case InstructionBenchmark::Uops:
return createUopsSnippetGenerator(State);
}
return nullptr;
}
std::unique_ptr<BenchmarkRunner>
ExegesisTarget::createBenchmarkRunner(InstructionBenchmark::ModeE Mode,
const LLVMState &State) const {
@ -50,6 +64,16 @@ ExegesisTarget::createBenchmarkRunner(InstructionBenchmark::ModeE Mode,
return nullptr;
}
std::unique_ptr<SnippetGenerator>
ExegesisTarget::createLatencySnippetGenerator(const LLVMState &State) const {
return llvm::make_unique<LatencySnippetGenerator>(State);
}
std::unique_ptr<SnippetGenerator>
ExegesisTarget::createUopsSnippetGenerator(const LLVMState &State) const {
return llvm::make_unique<UopsSnippetGenerator>(State);
}
std::unique_ptr<BenchmarkRunner>
ExegesisTarget::createLatencyBenchmarkRunner(const LLVMState &State) const {
return llvm::make_unique<LatencyBenchmarkRunner>(State);

View File

@ -20,6 +20,7 @@
#include "BenchmarkResult.h"
#include "BenchmarkRunner.h"
#include "LlvmState.h"
#include "SnippetGenerator.h"
#include "llvm/ADT/Triple.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/CallingConv.h"
@ -61,6 +62,10 @@ public:
// matter as long as it's large enough.
virtual unsigned getMaxMemoryAccessSize() const { return 0; }
// Creates a snippet generator for the given mode.
std::unique_ptr<SnippetGenerator>
createSnippetGenerator(InstructionBenchmark::ModeE Mode,
const LLVMState &State) const;
// Creates a benchmark runner for the given mode.
std::unique_ptr<BenchmarkRunner>
createBenchmarkRunner(InstructionBenchmark::ModeE Mode,
@ -79,8 +84,12 @@ public:
private:
virtual bool matchesArch(llvm::Triple::ArchType Arch) const = 0;
// Targets can implement their own Latency/Uops benchmarks runners by
// Targets can implement their own snippet generators/benchmarks runners by
// implementing these.
std::unique_ptr<SnippetGenerator> virtual createLatencySnippetGenerator(
const LLVMState &State) const;
std::unique_ptr<SnippetGenerator> virtual createUopsSnippetGenerator(
const LLVMState &State) const;
std::unique_ptr<BenchmarkRunner> virtual createLatencyBenchmarkRunner(
const LLVMState &State) const;
std::unique_ptr<BenchmarkRunner> virtual createUopsBenchmarkRunner(

View File

@ -86,7 +86,7 @@ static bool hasUnknownOperand(const llvm::MCOperandInfo &OpInfo) {
}
llvm::Error
UopsBenchmarkRunner::isInfeasible(const llvm::MCInstrDesc &MCInstrDesc) const {
UopsSnippetGenerator::isInfeasible(const llvm::MCInstrDesc &MCInstrDesc) const {
if (llvm::any_of(MCInstrDesc.operands(), hasUnknownOperand))
return llvm::make_error<BenchmarkFailure>(
"Infeasible : has unknown operands");
@ -123,8 +123,9 @@ static void remove(llvm::BitVector &a, const llvm::BitVector &b) {
}
UopsBenchmarkRunner::~UopsBenchmarkRunner() = default;
UopsSnippetGenerator::~UopsSnippetGenerator() = default;
void UopsBenchmarkRunner::instantiateMemoryOperands(
void UopsSnippetGenerator::instantiateMemoryOperands(
const unsigned ScratchSpacePointerInReg,
std::vector<InstructionBuilder> &Instructions) const {
if (ScratchSpacePointerInReg == 0)
@ -144,11 +145,12 @@ void UopsBenchmarkRunner::instantiateMemoryOperands(
++I;
Instructions.push_back(std::move(IB));
}
assert(I * MemStep < ScratchSpace::kSize && "not enough scratch space");
assert(I * MemStep < BenchmarkRunner::ScratchSpace::kSize &&
"not enough scratch space");
}
llvm::Expected<CodeTemplate>
UopsBenchmarkRunner::generateCodeTemplate(unsigned Opcode) const {
UopsSnippetGenerator::generateCodeTemplate(unsigned Opcode) const {
const auto &InstrDesc = State.getInstrInfo().get(Opcode);
if (auto E = isInfeasible(InstrDesc))
return std::move(E);
@ -285,6 +287,6 @@ UopsBenchmarkRunner::runMeasurements(const ExecutableFunction &Function,
return Result;
}
constexpr const size_t UopsBenchmarkRunner::kMinNumDifferentAddresses;
constexpr const size_t UopsSnippetGenerator::kMinNumDifferentAddresses;
} // namespace exegesis

View File

@ -16,14 +16,14 @@
#define LLVM_TOOLS_LLVM_EXEGESIS_UOPS_H
#include "BenchmarkRunner.h"
#include "SnippetGenerator.h"
namespace exegesis {
class UopsBenchmarkRunner : public BenchmarkRunner {
class UopsSnippetGenerator : public SnippetGenerator {
public:
UopsBenchmarkRunner(const LLVMState &State)
: BenchmarkRunner(State, InstructionBenchmark::Uops) {}
~UopsBenchmarkRunner() override;
UopsSnippetGenerator(const LLVMState &State) : SnippetGenerator(State) {}
~UopsSnippetGenerator() override;
llvm::Expected<CodeTemplate>
generateCodeTemplate(unsigned Opcode) const override;
@ -33,10 +33,6 @@ public:
private:
llvm::Error isInfeasible(const llvm::MCInstrDesc &MCInstrDesc) const;
std::vector<BenchmarkMeasure>
runMeasurements(const ExecutableFunction &EF, ScratchSpace &Scratch,
const unsigned NumRepetitions) const override;
// Instantiates memory operands within a snippet.
// To make computations as parallel as possible, we generate independant
// memory locations for instructions that load and store. If there are less
@ -65,6 +61,20 @@ private:
std::vector<InstructionBuilder> &Snippet) const;
};
class UopsBenchmarkRunner : public BenchmarkRunner {
public:
UopsBenchmarkRunner(const LLVMState &State)
: BenchmarkRunner(State, InstructionBenchmark::Uops) {}
~UopsBenchmarkRunner() override;
static constexpr const size_t kMinNumDifferentAddresses = 6;
private:
std::vector<BenchmarkMeasure>
runMeasurements(const ExecutableFunction &EF, ScratchSpace &Scratch,
const unsigned NumRepetitions) const override;
};
} // namespace exegesis
#endif // LLVM_TOOLS_LLVM_EXEGESIS_UOPS_H

View File

@ -22,7 +22,7 @@ namespace exegesis {
namespace {
// Common code for X86 Uops and Latency runners.
template <typename Impl> class X86BenchmarkRunner : public Impl {
template <typename Impl> class X86SnippetGenerator : public Impl {
using Impl::Impl;
llvm::Expected<CodeTemplate>
@ -71,21 +71,23 @@ template <typename Impl> class X86BenchmarkRunner : public Impl {
}
};
class X86LatencyImpl : public LatencyBenchmarkRunner {
class X86LatencyImpl : public LatencySnippetGenerator {
protected:
using Base = LatencyBenchmarkRunner;
using Base = LatencySnippetGenerator;
using Base::Base;
llvm::Expected<CodeTemplate> handleCompareFP(const Instruction &Instr) const {
return llvm::make_error<BenchmarkFailure>("Unsupported x87 CompareFP");
return llvm::make_error<SnippetGeneratorFailure>(
"Unsupported x87 CompareFP");
}
llvm::Expected<CodeTemplate> handleCondMovFP(const Instruction &Instr) const {
return llvm::make_error<BenchmarkFailure>("Unsupported x87 CondMovFP");
return llvm::make_error<SnippetGeneratorFailure>(
"Unsupported x87 CondMovFP");
}
};
class X86UopsImpl : public UopsBenchmarkRunner {
class X86UopsImpl : public UopsSnippetGenerator {
protected:
using Base = UopsBenchmarkRunner;
using Base = UopsSnippetGenerator;
using Base::Base;
// We can compute uops for any FP instruction that does not grow or shrink the
// stack (either do not touch the stack or push as much as they pop).
@ -193,14 +195,14 @@ class ExegesisX86Target : public ExegesisTarget {
return {};
}
std::unique_ptr<BenchmarkRunner>
createLatencyBenchmarkRunner(const LLVMState &State) const override {
return llvm::make_unique<X86BenchmarkRunner<X86LatencyImpl>>(State);
std::unique_ptr<SnippetGenerator>
createLatencySnippetGenerator(const LLVMState &State) const override {
return llvm::make_unique<X86SnippetGenerator<X86LatencyImpl>>(State);
}
std::unique_ptr<BenchmarkRunner>
createUopsBenchmarkRunner(const LLVMState &State) const override {
return llvm::make_unique<X86BenchmarkRunner<X86UopsImpl>>(State);
std::unique_ptr<SnippetGenerator>
createUopsSnippetGenerator(const LLVMState &State) const override {
return llvm::make_unique<X86SnippetGenerator<X86UopsImpl>>(State);
}
bool matchesArch(llvm::Triple::ArchType Arch) const override {

View File

@ -119,6 +119,30 @@ getBenchmarkResultContext(const LLVMState &State) {
return Ctx;
}
// Generates code snippets for opcode `Opcode`.
llvm::Expected<std::vector<BenchmarkCode>>
generateSnippets(const LLVMState &State, unsigned Opcode,
unsigned NumRepetitions) {
const std::unique_ptr<SnippetGenerator> Generator =
State.getExegesisTarget().createSnippetGenerator(BenchmarkMode, State);
if (!Generator) {
llvm::report_fatal_error("cannot create snippet generator");
}
const llvm::MCInstrDesc &InstrDesc = State.getInstrInfo().get(Opcode);
// Ignore instructions that we cannot run.
if (InstrDesc.isPseudo())
return llvm::make_error<BenchmarkFailure>("Unsupported opcode: isPseudo");
if (InstrDesc.isBranch() || InstrDesc.isIndirectBranch())
return llvm::make_error<BenchmarkFailure>(
"Unsupported opcode: isBranch/isIndirectBranch");
if (InstrDesc.isCall() || InstrDesc.isReturn())
return llvm::make_error<BenchmarkFailure>(
"Unsupported opcode: isCall/isReturn");
return Generator->generateConfigurations(Opcode);
}
void benchmarkMain() {
if (exegesis::pfm::pfmInitialize())
llvm::report_fatal_error("cannot initialize libpfm");
@ -140,6 +164,10 @@ void benchmarkMain() {
return;
}
// FIXME: Allow arbitrary code.
const std::vector<BenchmarkCode> Configurations =
ExitOnErr(generateSnippets(State, Opcode, NumRepetitions));
const std::unique_ptr<BenchmarkRunner> Runner =
State.getExegesisTarget().createBenchmarkRunner(BenchmarkMode, State);
if (!Runner) {
@ -154,11 +182,12 @@ void benchmarkMain() {
BenchmarkFile = "-";
const BenchmarkResultContext Context = getBenchmarkResultContext(State);
std::vector<InstructionBenchmark> Results =
ExitOnErr(Runner->run(Opcode, NumRepetitions));
for (InstructionBenchmark &Result : Results)
ExitOnErr(Result.writeYaml(Context, BenchmarkFile));
for (const BenchmarkCode &Conf : Configurations) {
InstructionBenchmark Result =
Runner->runConfiguration(Conf, NumRepetitions);
ExitOnErr(Result.writeYaml(Context, BenchmarkFile));
}
exegesis::pfm::pfmTerminate();
}

View File

@ -52,26 +52,27 @@ protected:
const llvm::MCRegisterInfo &MCRegisterInfo;
};
template <typename BenchmarkRunner>
template <typename SnippetGeneratorT>
class SnippetGeneratorTest : public X86SnippetGeneratorTest {
protected:
SnippetGeneratorTest() : Runner(State) {}
SnippetGeneratorTest() : Generator(State) {}
CodeTemplate checkAndGetCodeTemplate(unsigned Opcode) {
randomGenerator().seed(0); // Initialize seed.
auto CodeTemplateOrError = Runner.generateCodeTemplate(Opcode);
auto CodeTemplateOrError = Generator.generateCodeTemplate(Opcode);
EXPECT_FALSE(CodeTemplateOrError.takeError()); // Valid configuration.
return std::move(CodeTemplateOrError.get());
}
BenchmarkRunner Runner;
SnippetGeneratorT Generator;
};
using LatencyBenchmarkRunnerTest = SnippetGeneratorTest<LatencyBenchmarkRunner>;
using LatencySnippetGeneratorTest =
SnippetGeneratorTest<LatencySnippetGenerator>;
using UopsBenchmarkRunnerTest = SnippetGeneratorTest<UopsBenchmarkRunner>;
using UopsSnippetGeneratorTest = SnippetGeneratorTest<UopsSnippetGenerator>;
TEST_F(LatencyBenchmarkRunnerTest, ImplicitSelfDependency) {
TEST_F(LatencySnippetGeneratorTest, ImplicitSelfDependency) {
// ADC16i16 self alias because of implicit use and def.
// explicit use 0 : imm
@ -93,7 +94,7 @@ TEST_F(LatencyBenchmarkRunnerTest, ImplicitSelfDependency) {
EXPECT_THAT(IB.VariableValues[0], IsInvalid()) << "Immediate is not set";
}
TEST_F(LatencyBenchmarkRunnerTest, ExplicitSelfDependency) {
TEST_F(LatencySnippetGeneratorTest, ExplicitSelfDependency) {
// ADD16ri self alias because Op0 and Op1 are tied together.
// explicit def 0 : reg RegClass=GR16
@ -112,7 +113,7 @@ TEST_F(LatencyBenchmarkRunnerTest, ExplicitSelfDependency) {
EXPECT_THAT(IB.VariableValues[1], IsInvalid()) << "Operand 2 is not set";
}
TEST_F(LatencyBenchmarkRunnerTest, DependencyThroughOtherOpcode) {
TEST_F(LatencySnippetGeneratorTest, DependencyThroughOtherOpcode) {
// CMP64rr
// explicit use 0 : reg RegClass=GR64
// explicit use 1 : reg RegClass=GR64
@ -131,7 +132,7 @@ TEST_F(LatencyBenchmarkRunnerTest, DependencyThroughOtherOpcode) {
// TODO: check that the two instructions alias each other.
}
TEST_F(LatencyBenchmarkRunnerTest, LAHF) {
TEST_F(LatencySnippetGeneratorTest, LAHF) {
const unsigned Opcode = llvm::X86::LAHF;
const CodeTemplate CT = checkAndGetCodeTemplate(Opcode);
EXPECT_THAT(CT.Info, HasSubstr("cycle through"));
@ -141,7 +142,7 @@ TEST_F(LatencyBenchmarkRunnerTest, LAHF) {
ASSERT_THAT(IB.VariableValues, SizeIs(0));
}
TEST_F(UopsBenchmarkRunnerTest, ParallelInstruction) {
TEST_F(UopsSnippetGeneratorTest, ParallelInstruction) {
// BNDCL32rr is parallel no matter what.
// explicit use 0 : reg RegClass=BNDR
@ -158,7 +159,7 @@ TEST_F(UopsBenchmarkRunnerTest, ParallelInstruction) {
EXPECT_THAT(IB.VariableValues[1], IsInvalid());
}
TEST_F(UopsBenchmarkRunnerTest, SerialInstruction) {
TEST_F(UopsSnippetGeneratorTest, SerialInstruction) {
// CDQ is serial no matter what.
// implicit def : EAX
@ -173,7 +174,7 @@ TEST_F(UopsBenchmarkRunnerTest, SerialInstruction) {
ASSERT_THAT(IB.VariableValues, SizeIs(0));
}
TEST_F(UopsBenchmarkRunnerTest, StaticRenaming) {
TEST_F(UopsSnippetGeneratorTest, StaticRenaming) {
// CMOVA32rr has tied variables, we enumarate the possible values to execute
// as many in parallel as possible.
@ -195,7 +196,7 @@ TEST_F(UopsBenchmarkRunnerTest, StaticRenaming) {
<< "Each instruction writes to a different register";
}
TEST_F(UopsBenchmarkRunnerTest, NoTiedVariables) {
TEST_F(UopsSnippetGeneratorTest, NoTiedVariables) {
// CMOV_GR32 has no tied variables, we make sure def and use are different
// from each other.
@ -218,13 +219,13 @@ TEST_F(UopsBenchmarkRunnerTest, NoTiedVariables) {
EXPECT_THAT(IB.VariableValues[3], IsInvalid());
}
TEST_F(UopsBenchmarkRunnerTest, MemoryUse) {
TEST_F(UopsSnippetGeneratorTest, MemoryUse) {
// Mov32rm reads from memory.
const unsigned Opcode = llvm::X86::MOV32rm;
const CodeTemplate CT = checkAndGetCodeTemplate(Opcode);
EXPECT_THAT(CT.Info, HasSubstr("no tied variables"));
ASSERT_THAT(CT.Instructions,
SizeIs(UopsBenchmarkRunner::kMinNumDifferentAddresses));
SizeIs(UopsSnippetGenerator::kMinNumDifferentAddresses));
const InstructionBuilder &IB = CT.Instructions[0];
EXPECT_THAT(IB.getOpcode(), Opcode);
ASSERT_THAT(IB.VariableValues, SizeIs(6));
@ -234,18 +235,17 @@ TEST_F(UopsBenchmarkRunnerTest, MemoryUse) {
EXPECT_EQ(IB.VariableValues[5].getReg(), 0u);
}
TEST_F(UopsBenchmarkRunnerTest, MemoryUse_Movsb) {
TEST_F(UopsSnippetGeneratorTest, MemoryUse_Movsb) {
// MOVSB writes to scratch memory register.
const unsigned Opcode = llvm::X86::MOVSB;
auto Error = Runner.generateCodeTemplate(Opcode).takeError();
auto Error = Generator.generateCodeTemplate(Opcode).takeError();
EXPECT_TRUE((bool)Error);
llvm::consumeError(std::move(Error));
}
class FakeBenchmarkRunner : public BenchmarkRunner {
class FakeSnippetGenerator : public SnippetGenerator {
public:
FakeBenchmarkRunner(const LLVMState &State)
: BenchmarkRunner(State, InstructionBenchmark::Unknown) {}
FakeSnippetGenerator(const LLVMState &State) : SnippetGenerator(State) {}
Instruction createInstruction(unsigned Opcode) {
return Instruction(State.getInstrInfo().get(Opcode), RATC);
@ -257,15 +257,9 @@ private:
return llvm::make_error<llvm::StringError>("not implemented",
llvm::inconvertibleErrorCode());
}
std::vector<BenchmarkMeasure>
runMeasurements(const ExecutableFunction &EF, ScratchSpace &Scratch,
const unsigned NumRepetitions) const override {
return {};
}
};
using FakeSnippetGeneratorTest = SnippetGeneratorTest<FakeBenchmarkRunner>;
using FakeSnippetGeneratorTest = SnippetGeneratorTest<FakeSnippetGenerator>;
TEST_F(FakeSnippetGeneratorTest, ComputeRegsToDefAdd16ri) {
// ADD16ri:
@ -273,12 +267,12 @@ TEST_F(FakeSnippetGeneratorTest, ComputeRegsToDefAdd16ri) {
// explicit use 1 : reg RegClass=GR16 | TIED_TO:0
// explicit use 2 : imm
// implicit def : EFLAGS
InstructionBuilder IB(Runner.createInstruction(llvm::X86::ADD16ri));
InstructionBuilder IB(Generator.createInstruction(llvm::X86::ADD16ri));
IB.getValueFor(IB.Instr.Variables[0]) =
llvm::MCOperand::createReg(llvm::X86::AX);
std::vector<InstructionBuilder> Snippet;
Snippet.push_back(std::move(IB));
const auto RegsToDef = Runner.computeRegsToDef(Snippet);
const auto RegsToDef = Generator.computeRegsToDef(Snippet);
EXPECT_THAT(RegsToDef, UnorderedElementsAre(llvm::X86::AX));
}
@ -289,14 +283,14 @@ TEST_F(FakeSnippetGeneratorTest, ComputeRegsToDefAdd64rr) {
// -> only rbx needs defining.
std::vector<InstructionBuilder> Snippet;
{
InstructionBuilder Mov(Runner.createInstruction(llvm::X86::MOV64ri));
InstructionBuilder Mov(Generator.createInstruction(llvm::X86::MOV64ri));
Mov.getValueFor(Mov.Instr.Variables[0]) =
llvm::MCOperand::createReg(llvm::X86::RAX);
Mov.getValueFor(Mov.Instr.Variables[1]) = llvm::MCOperand::createImm(42);
Snippet.push_back(std::move(Mov));
}
{
InstructionBuilder Add(Runner.createInstruction(llvm::X86::ADD64rr));
InstructionBuilder Add(Generator.createInstruction(llvm::X86::ADD64rr));
Add.getValueFor(Add.Instr.Variables[0]) =
llvm::MCOperand::createReg(llvm::X86::RAX);
Add.getValueFor(Add.Instr.Variables[1]) =
@ -304,7 +298,7 @@ TEST_F(FakeSnippetGeneratorTest, ComputeRegsToDefAdd64rr) {
Snippet.push_back(std::move(Add));
}
const auto RegsToDef = Runner.computeRegsToDef(Snippet);
const auto RegsToDef = Generator.computeRegsToDef(Snippet);
EXPECT_THAT(RegsToDef, UnorderedElementsAre(llvm::X86::RBX));
}