mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-26 14:16:12 +00:00
[llvm-exegesis] Add partial X87 support.
Summary: This enables the X86-specific X86FloatingPointStackifierPass, and allow llvm-exegesis to generate and measure X87 latency/uops for some FP ops. Reviewers: gchatelet Subscribers: tschuett, llvm-commits Differential Revision: https://reviews.llvm.org/D48592 llvm-svn: 335815
This commit is contained in:
parent
77f58a0ada
commit
68d7181227
@ -196,4 +196,25 @@ BenchmarkRunner::writeObjectFile(const BenchmarkConfiguration::Setup &Setup,
|
||||
return ResultPath.str();
|
||||
}
|
||||
|
||||
llvm::Expected<SnippetPrototype> BenchmarkRunner::generateSelfAliasingPrototype(
|
||||
const Instruction &Instr) const {
|
||||
const AliasingConfigurations SelfAliasing(Instr, Instr);
|
||||
if (SelfAliasing.empty()) {
|
||||
return llvm::make_error<BenchmarkFailure>("empty self aliasing");
|
||||
}
|
||||
SnippetPrototype Prototype;
|
||||
InstructionInstance II(Instr);
|
||||
if (SelfAliasing.hasImplicitAliasing()) {
|
||||
Prototype.Explanation = "implicit Self cycles, picking random values.";
|
||||
} else {
|
||||
Prototype.Explanation =
|
||||
"explicit self cycles, selecting one aliasing Conf.";
|
||||
// This is a self aliasing instruction so defs and uses are from the same
|
||||
// instance, hence twice II in the following call.
|
||||
setRandomAliasing(SelfAliasing, II, II);
|
||||
}
|
||||
Prototype.Snippet.push_back(std::move(II));
|
||||
return std::move(Prototype);
|
||||
}
|
||||
|
||||
} // namespace exegesis
|
||||
|
@ -69,6 +69,9 @@ protected:
|
||||
const LLVMState &State;
|
||||
const RegisterAliasingTrackerCache RATC;
|
||||
|
||||
llvm::Expected<SnippetPrototype> generateSelfAliasingPrototype(
|
||||
const Instruction &Instr) const;
|
||||
|
||||
private:
|
||||
// API to be implemented by subclasses.
|
||||
virtual llvm::Expected<SnippetPrototype>
|
||||
|
@ -42,29 +42,9 @@ llvm::Error LatencyBenchmarkRunner::isInfeasible(
|
||||
return llvm::Error::success();
|
||||
}
|
||||
|
||||
llvm::Expected<SnippetPrototype>
|
||||
LatencyBenchmarkRunner::generateSelfAliasingPrototype(
|
||||
const Instruction &Instr,
|
||||
const AliasingConfigurations &SelfAliasing) const {
|
||||
SnippetPrototype Prototype;
|
||||
InstructionInstance II(Instr);
|
||||
if (SelfAliasing.hasImplicitAliasing()) {
|
||||
Prototype.Explanation = "implicit Self cycles, picking random values.";
|
||||
} else {
|
||||
Prototype.Explanation =
|
||||
"explicit self cycles, selecting one aliasing Conf.";
|
||||
// This is a self aliasing instruction so defs and uses are from the same
|
||||
// instance, hence twice II in the following call.
|
||||
setRandomAliasing(SelfAliasing, II, II);
|
||||
}
|
||||
Prototype.Snippet.push_back(std::move(II));
|
||||
return std::move(Prototype);
|
||||
}
|
||||
|
||||
llvm::Expected<SnippetPrototype>
|
||||
LatencyBenchmarkRunner::generateTwoInstructionPrototype(
|
||||
const Instruction &Instr,
|
||||
const AliasingConfigurations &SelfAliasing) const {
|
||||
const Instruction &Instr) const {
|
||||
std::vector<unsigned> Opcodes;
|
||||
Opcodes.resize(State.getInstrInfo().getNumOpcodes());
|
||||
std::iota(Opcodes.begin(), Opcodes.end(), 0U);
|
||||
@ -89,8 +69,9 @@ LatencyBenchmarkRunner::generateTwoInstructionPrototype(
|
||||
if (!Back.hasImplicitAliasing())
|
||||
setRandomAliasing(Back, OtherII, ThisII);
|
||||
SnippetPrototype Prototype;
|
||||
Prototype.Explanation = llvm::formatv("creating cycle through {0}.",
|
||||
State.getInstrInfo().getName(OtherOpcode));
|
||||
Prototype.Explanation =
|
||||
llvm::formatv("creating cycle through {0}.",
|
||||
State.getInstrInfo().getName(OtherOpcode));
|
||||
Prototype.Snippet.push_back(std::move(ThisII));
|
||||
Prototype.Snippet.push_back(std::move(OtherII));
|
||||
return std::move(Prototype);
|
||||
@ -105,13 +86,12 @@ LatencyBenchmarkRunner::generatePrototype(unsigned Opcode) const {
|
||||
if (auto E = isInfeasible(InstrDesc))
|
||||
return std::move(E);
|
||||
const Instruction Instr(InstrDesc, RATC);
|
||||
const AliasingConfigurations SelfAliasing(Instr, Instr);
|
||||
if (SelfAliasing.empty()) {
|
||||
// No self aliasing, trying to create a dependency through another opcode.
|
||||
return generateTwoInstructionPrototype(Instr, SelfAliasing);
|
||||
} else {
|
||||
return generateSelfAliasingPrototype(Instr, SelfAliasing);
|
||||
}
|
||||
if (auto SelfAliasingPrototype = generateSelfAliasingPrototype(Instr))
|
||||
return SelfAliasingPrototype;
|
||||
else
|
||||
llvm::consumeError(SelfAliasingPrototype.takeError());
|
||||
// No self aliasing, trying to create a dependency through another opcode.
|
||||
return generateTwoInstructionPrototype(Instr);
|
||||
}
|
||||
|
||||
std::vector<BenchmarkMeasure>
|
||||
|
@ -32,13 +32,8 @@ public:
|
||||
private:
|
||||
llvm::Error isInfeasible(const llvm::MCInstrDesc &MCInstrDesc) const;
|
||||
|
||||
llvm::Expected<SnippetPrototype> generateSelfAliasingPrototype(
|
||||
const Instruction &Instr,
|
||||
const AliasingConfigurations &SelfAliasing) const;
|
||||
|
||||
llvm::Expected<SnippetPrototype> generateTwoInstructionPrototype(
|
||||
const Instruction &Instr,
|
||||
const AliasingConfigurations &SelfAliasing) const;
|
||||
const Instruction &Instr) const;
|
||||
|
||||
std::vector<BenchmarkMeasure>
|
||||
runMeasurements(const ExecutableFunction &EF,
|
||||
|
@ -10,6 +10,7 @@
|
||||
|
||||
#include "../Latency.h"
|
||||
#include "../Uops.h"
|
||||
#include "MCTargetDesc/X86BaseInfo.h"
|
||||
#include "MCTargetDesc/X86MCTargetDesc.h"
|
||||
#include "X86.h"
|
||||
#include "X86RegisterInfo.h"
|
||||
@ -17,43 +18,107 @@
|
||||
|
||||
namespace exegesis {
|
||||
|
||||
// Test whether we can generate a snippet for this instruction.
|
||||
static llvm::Error shouldRun(const LLVMState &State, const unsigned Opcode) {
|
||||
const auto &InstrInfo = State.getInstrInfo();
|
||||
const auto OpcodeName = InstrInfo.getName(Opcode);
|
||||
if (OpcodeName.startswith("POPF") || OpcodeName.startswith("PUSHF") ||
|
||||
OpcodeName.startswith("ADJCALLSTACK")) {
|
||||
return llvm::make_error<BenchmarkFailure>(
|
||||
"Unsupported opcode: Push/Pop/AdjCallStack");
|
||||
}
|
||||
return llvm::ErrorSuccess();
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
class X86LatencyBenchmarkRunner : public LatencyBenchmarkRunner {
|
||||
private:
|
||||
using LatencyBenchmarkRunner::LatencyBenchmarkRunner;
|
||||
// Common code for X86 Uops and Latency runners.
|
||||
template <typename Impl> class X86BenchmarkRunner : public Impl {
|
||||
using Impl::Impl;
|
||||
|
||||
llvm::Expected<SnippetPrototype>
|
||||
generatePrototype(unsigned Opcode) const override {
|
||||
if (llvm::Error E = shouldRun(State, Opcode)) {
|
||||
return std::move(E);
|
||||
// Test whether we can generate a snippet for this instruction.
|
||||
const auto &InstrInfo = this->State.getInstrInfo();
|
||||
const auto OpcodeName = InstrInfo.getName(Opcode);
|
||||
if (OpcodeName.startswith("POPF") || OpcodeName.startswith("PUSHF") ||
|
||||
OpcodeName.startswith("ADJCALLSTACK")) {
|
||||
return llvm::make_error<BenchmarkFailure>(
|
||||
"Unsupported opcode: Push/Pop/AdjCallStack");
|
||||
}
|
||||
return LatencyBenchmarkRunner::generatePrototype(Opcode);
|
||||
|
||||
// Handle X87.
|
||||
const auto &InstrDesc = InstrInfo.get(Opcode);
|
||||
const unsigned FPInstClass = InstrDesc.TSFlags & llvm::X86II::FPTypeMask;
|
||||
const Instruction Instr(InstrDesc, this->RATC);
|
||||
switch (FPInstClass) {
|
||||
case llvm::X86II::NotFP:
|
||||
break;
|
||||
case llvm::X86II::ZeroArgFP:
|
||||
return Impl::handleZeroArgFP(Instr);
|
||||
case llvm::X86II::OneArgFP:
|
||||
return Impl::handleOneArgFP(Instr); // fstp ST(0)
|
||||
case llvm::X86II::OneArgFPRW:
|
||||
case llvm::X86II::TwoArgFP: {
|
||||
// These are instructions like
|
||||
// - `ST(0) = fsqrt(ST(0))` (OneArgFPRW)
|
||||
// - `ST(0) = ST(0) + ST(i)` (TwoArgFP)
|
||||
// They are intrinsically serial and do not modify the state of the stack.
|
||||
// We generate the same code for latency and uops.
|
||||
return this->generateSelfAliasingPrototype(Instr);
|
||||
}
|
||||
case llvm::X86II::CompareFP:
|
||||
return Impl::handleCompareFP(Instr);
|
||||
case llvm::X86II::CondMovFP:
|
||||
return Impl::handleCondMovFP(Instr);
|
||||
case llvm::X86II::SpecialFP:
|
||||
return Impl::handleSpecialFP(Instr);
|
||||
default:
|
||||
llvm_unreachable("Unknown FP Type!");
|
||||
}
|
||||
|
||||
// Fallback to generic implementation.
|
||||
return Impl::Base::generatePrototype(Opcode);
|
||||
}
|
||||
};
|
||||
|
||||
class X86UopsBenchmarkRunner : public UopsBenchmarkRunner {
|
||||
private:
|
||||
using UopsBenchmarkRunner::UopsBenchmarkRunner;
|
||||
|
||||
class X86LatencyImpl : public LatencyBenchmarkRunner {
|
||||
protected:
|
||||
using Base = LatencyBenchmarkRunner;
|
||||
using Base::Base;
|
||||
llvm::Expected<SnippetPrototype>
|
||||
generatePrototype(unsigned Opcode) const override {
|
||||
if (llvm::Error E = shouldRun(State, Opcode)) {
|
||||
return std::move(E);
|
||||
}
|
||||
return UopsBenchmarkRunner::generatePrototype(Opcode);
|
||||
handleZeroArgFP(const Instruction &Instr) const {
|
||||
return llvm::make_error<BenchmarkFailure>("Unsupported x87 ZeroArgFP");
|
||||
}
|
||||
llvm::Expected<SnippetPrototype>
|
||||
handleOneArgFP(const Instruction &Instr) const {
|
||||
return llvm::make_error<BenchmarkFailure>("Unsupported x87 OneArgFP");
|
||||
}
|
||||
llvm::Expected<SnippetPrototype>
|
||||
handleCompareFP(const Instruction &Instr) const {
|
||||
return llvm::make_error<BenchmarkFailure>("Unsupported x87 CompareFP");
|
||||
}
|
||||
llvm::Expected<SnippetPrototype>
|
||||
handleCondMovFP(const Instruction &Instr) const {
|
||||
return llvm::make_error<BenchmarkFailure>("Unsupported x87 CondMovFP");
|
||||
}
|
||||
llvm::Expected<SnippetPrototype>
|
||||
handleSpecialFP(const Instruction &Instr) const {
|
||||
return llvm::make_error<BenchmarkFailure>("Unsupported x87 SpecialFP");
|
||||
}
|
||||
};
|
||||
|
||||
class X86UopsImpl : public UopsBenchmarkRunner {
|
||||
protected:
|
||||
using Base = UopsBenchmarkRunner;
|
||||
using Base::Base;
|
||||
llvm::Expected<SnippetPrototype>
|
||||
handleZeroArgFP(const Instruction &Instr) const {
|
||||
return llvm::make_error<BenchmarkFailure>("Unsupported x87 ZeroArgFP");
|
||||
}
|
||||
llvm::Expected<SnippetPrototype>
|
||||
handleOneArgFP(const Instruction &Instr) const {
|
||||
return llvm::make_error<BenchmarkFailure>("Unsupported x87 OneArgFP");
|
||||
}
|
||||
llvm::Expected<SnippetPrototype>
|
||||
handleCompareFP(const Instruction &Instr) const {
|
||||
return llvm::make_error<BenchmarkFailure>("Unsupported x87 CompareFP");
|
||||
}
|
||||
llvm::Expected<SnippetPrototype>
|
||||
handleCondMovFP(const Instruction &Instr) const {
|
||||
return llvm::make_error<BenchmarkFailure>("Unsupported x87 CondMovFP");
|
||||
}
|
||||
llvm::Expected<SnippetPrototype>
|
||||
handleSpecialFP(const Instruction &Instr) const {
|
||||
return llvm::make_error<BenchmarkFailure>("Unsupported x87 SpecialFP");
|
||||
}
|
||||
};
|
||||
|
||||
@ -62,15 +127,11 @@ class ExegesisX86Target : public ExegesisTarget {
|
||||
// Lowers FP pseudo-instructions, e.g. ABS_Fp32 -> ABS_F.
|
||||
// FIXME: Enable when the exegesis assembler no longer does
|
||||
// Properties.reset(TracksLiveness);
|
||||
// PM.add(llvm::createX86FloatingPointStackifierPass());
|
||||
PM.add(llvm::createX86FloatingPointStackifierPass());
|
||||
}
|
||||
|
||||
std::vector<llvm::MCInst>
|
||||
setRegToConstant(unsigned Reg) const override {
|
||||
// FIXME: Handle FP stack:
|
||||
// llvm::X86::RFP32RegClass
|
||||
// llvm::X86::RFP64RegClass
|
||||
// llvm::X86::RFP80RegClass
|
||||
if (llvm::X86::GR8RegClass.contains(Reg)) {
|
||||
return {llvm::MCInstBuilder(llvm::X86::MOV8ri).addReg(Reg).addImm(1)};
|
||||
}
|
||||
@ -92,17 +153,23 @@ class ExegesisX86Target : public ExegesisTarget {
|
||||
if (llvm::X86::VR512RegClass.contains(Reg)) {
|
||||
return setVectorRegToConstant(Reg, 64, llvm::X86::VMOVDQU64Zrm);
|
||||
}
|
||||
if (llvm::X86::RFP32RegClass.contains(Reg) ||
|
||||
llvm::X86::RFP64RegClass.contains(Reg) ||
|
||||
llvm::X86::RFP80RegClass.contains(Reg)) {
|
||||
return setVectorRegToConstant(Reg, 8, llvm::X86::LD_Fp64m);
|
||||
}
|
||||
return {};
|
||||
}
|
||||
|
||||
std::unique_ptr<BenchmarkRunner>
|
||||
createLatencyBenchmarkRunner(const LLVMState &State) const override {
|
||||
return llvm::make_unique<X86LatencyBenchmarkRunner>(State);
|
||||
return llvm::make_unique<X86BenchmarkRunner<X86LatencyImpl>>(
|
||||
State);
|
||||
}
|
||||
|
||||
std::unique_ptr<BenchmarkRunner>
|
||||
createUopsBenchmarkRunner(const LLVMState &State) const override {
|
||||
return llvm::make_unique<X86UopsBenchmarkRunner>(State);
|
||||
return llvm::make_unique<X86BenchmarkRunner<X86UopsImpl>>(State);
|
||||
}
|
||||
|
||||
bool matchesArch(llvm::Triple::ArchType Arch) const override {
|
||||
|
Loading…
x
Reference in New Issue
Block a user