[llvm-exegeis] Computing Latency configuration upfront so we can generate many CodeTemplates at once.

Summary: LatencyGenerator now computes all possible mode of serial execution for an Instruction upfront and generates CodeTemplate for the ones that give the best results (e.g. no need to generate a two instructions snippet when repeating a single one would do). The next step is to generate even more configurations for cases (e.g. for XOR we should generate "XOR EAX, EAX, EAX" and "XOR EAX, EAX, EBX")

Reviewers: courbet

Reviewed By: courbet

Subscribers: llvm-commits

Differential Revision: https://reviews.llvm.org/D53320

llvm-svn: 344689
This commit is contained in:
Guillaume Chatelet 2018-10-17 11:37:28 +00:00
parent 7472803a23
commit ee936f9a64
9 changed files with 427 additions and 113 deletions

View File

@ -65,4 +65,54 @@ llvm::MCInst InstructionTemplate::build() const {
return Result;
}
bool isEnumValue(ExecutionMode Execution) {
return llvm::isPowerOf2_32(static_cast<uint32_t>(Execution));
}
llvm::StringRef getName(ExecutionMode Bit) {
assert(isEnumValue(Bit) && "Bit must be a power of two");
switch (Bit) {
case ExecutionMode::UNKNOWN:
return "UNKNOWN";
case ExecutionMode::ALWAYS_SERIAL_IMPLICIT_REGS_ALIAS:
return "ALWAYS_SERIAL_IMPLICIT_REGS_ALIAS";
case ExecutionMode::ALWAYS_SERIAL_TIED_REGS_ALIAS:
return "ALWAYS_SERIAL_TIED_REGS_ALIAS";
case ExecutionMode::SERIAL_VIA_MEMORY_INSTR:
return "SERIAL_VIA_MEMORY_INSTR";
case ExecutionMode::SERIAL_VIA_EXPLICIT_REGS:
return "SERIAL_VIA_EXPLICIT_REGS";
case ExecutionMode::SERIAL_VIA_NON_MEMORY_INSTR:
return "SERIAL_VIA_NON_MEMORY_INSTR";
case ExecutionMode::ALWAYS_PARALLEL_MISSING_USE_OR_DEF:
return "ALWAYS_PARALLEL_MISSING_USE_OR_DEF";
case ExecutionMode::PARALLEL_VIA_EXPLICIT_REGS:
return "PARALLEL_VIA_EXPLICIT_REGS";
}
llvm_unreachable("Missing enum case");
}
static const ExecutionMode kAllExecutionModeBits[] = {
ExecutionMode::ALWAYS_SERIAL_IMPLICIT_REGS_ALIAS,
ExecutionMode::ALWAYS_SERIAL_TIED_REGS_ALIAS,
ExecutionMode::SERIAL_VIA_MEMORY_INSTR,
ExecutionMode::SERIAL_VIA_EXPLICIT_REGS,
ExecutionMode::SERIAL_VIA_NON_MEMORY_INSTR,
ExecutionMode::ALWAYS_PARALLEL_MISSING_USE_OR_DEF,
ExecutionMode::PARALLEL_VIA_EXPLICIT_REGS,
};
llvm::ArrayRef<ExecutionMode> getAllExecutionBits() {
return kAllExecutionModeBits;
}
llvm::SmallVector<ExecutionMode, 4>
getExecutionModeBits(ExecutionMode Execution) {
llvm::SmallVector<ExecutionMode, 4> Result;
for (const auto Bit : getAllExecutionBits())
if ((Execution & Bit) == Bit)
Result.push_back(Bit);
return Result;
}
} // namespace exegesis

View File

@ -17,6 +17,7 @@
#define LLVM_TOOLS_LLVM_EXEGESIS_CODETEMPLATE_H
#include "MCInstrDescView.h"
#include "llvm/ADT/BitmaskEnum.h"
namespace exegesis {
@ -45,9 +46,65 @@ struct InstructionTemplate {
llvm::SmallVector<llvm::MCOperand, 4> VariableValues;
};
enum class ExecutionMode : uint8_t {
UNKNOWN = 0U,
// The instruction is always serial because implicit Use and Def alias.
// e.g. AAA (alias via EFLAGS)
ALWAYS_SERIAL_IMPLICIT_REGS_ALIAS = 1u << 0,
// The instruction is always serial because one Def is tied to a Use.
// e.g. AND32ri (alias via tied GR32)
ALWAYS_SERIAL_TIED_REGS_ALIAS = 1u << 1,
// The execution can be made serial by inserting a second instruction that
// clobbers/reads memory.
// e.g. MOV8rm
SERIAL_VIA_MEMORY_INSTR = 1u << 2,
// The execution can be made serial by picking one Def that aliases with one
// Use.
// e.g. VXORPSrr XMM1, XMM1, XMM2
SERIAL_VIA_EXPLICIT_REGS = 1u << 3,
// The execution can be made serial by inserting a second instruction that
// uses one of the Defs and defs one of the Uses.
// e.g.
// 1st instruction: MMX_PMOVMSKBrr ECX, MM7
// 2nd instruction: MMX_MOVD64rr MM7, ECX
// or instruction: MMX_MOVD64to64rr MM7, ECX
// or instruction: MMX_PINSRWrr MM7, MM7, ECX, 1
SERIAL_VIA_NON_MEMORY_INSTR = 1u << 4,
// The execution is always parallel because the instruction is missing Use or
// Def operands.
ALWAYS_PARALLEL_MISSING_USE_OR_DEF = 1u << 5,
// The execution can be made parallel by repeating the same instruction but
// making sure that Defs of one instruction do not alias with Uses of the
// second one.
PARALLEL_VIA_EXPLICIT_REGS = 1u << 6,
LLVM_MARK_AS_BITMASK_ENUM(/*Largest*/ PARALLEL_VIA_EXPLICIT_REGS)
};
// Returns whether Execution is one of the values defined in the enum above.
bool isEnumValue(ExecutionMode Execution);
// Returns a human readable string for the enum.
llvm::StringRef getName(ExecutionMode Execution);
// Returns a sequence of increasing powers of two corresponding to all the
// Execution flags.
llvm::ArrayRef<ExecutionMode> getAllExecutionBits();
// Decomposes Execution into individual set bits.
llvm::SmallVector<ExecutionMode, 4> getExecutionModeBits(ExecutionMode);
LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
// A CodeTemplate is a set of InstructionTemplates that may not be fully
// specified (i.e. some variables are not yet set). This allows the
// BenchmarkRunner to instantiate it many times with specific values to study
// SnippetGenerator to instantiate it many times with specific values to study
// their impact on instruction's performance.
struct CodeTemplate {
CodeTemplate() = default;
@ -57,6 +114,7 @@ struct CodeTemplate {
CodeTemplate(const CodeTemplate &) = delete;
CodeTemplate &operator=(const CodeTemplate &) = delete;
ExecutionMode Execution = ExecutionMode::UNKNOWN;
// Some information about how this template has been created.
std::string Info;
// The list of the instructions for this template.

View File

@ -20,53 +20,148 @@
namespace exegesis {
LatencySnippetGenerator::~LatencySnippetGenerator() = default;
struct ExecutionClass {
ExecutionMode Mask;
const char *Description;
} static const kExecutionClasses[] = {
{ExecutionMode::ALWAYS_SERIAL_IMPLICIT_REGS_ALIAS |
ExecutionMode::ALWAYS_SERIAL_TIED_REGS_ALIAS,
"Repeating a single implicitly serial instruction"},
{ExecutionMode::SERIAL_VIA_EXPLICIT_REGS,
"Repeating a single explicitly serial instruction"},
{ExecutionMode::SERIAL_VIA_MEMORY_INSTR |
ExecutionMode::SERIAL_VIA_NON_MEMORY_INSTR,
"Repeating two instructions"},
};
llvm::Expected<std::vector<CodeTemplate>>
generateTwoInstructionPrototypes(const LLVMState &State,
const Instruction &Instr) {
static constexpr size_t kMaxAliasingInstructions = 10;
static std::vector<Instruction>
computeAliasingInstructions(const LLVMState &State, const Instruction &Instr,
size_t MaxAliasingInstructions) {
// Randomly iterate the set of instructions.
std::vector<unsigned> Opcodes;
Opcodes.resize(State.getInstrInfo().getNumOpcodes());
std::iota(Opcodes.begin(), Opcodes.end(), 0U);
std::shuffle(Opcodes.begin(), Opcodes.end(), randomGenerator());
std::vector<Instruction> AliasingInstructions;
for (const unsigned OtherOpcode : Opcodes) {
if (OtherOpcode == Instr.Description->Opcode)
if (OtherOpcode == Instr.Description->getOpcode())
continue;
const Instruction OtherInstr(State, OtherOpcode);
if (OtherInstr.hasMemoryOperands())
continue;
const AliasingConfigurations Forward(Instr, OtherInstr);
const AliasingConfigurations Back(OtherInstr, Instr);
if (Forward.empty() || Back.empty())
continue;
InstructionTemplate ThisIT(Instr);
InstructionTemplate OtherIT(OtherInstr);
if (!Forward.hasImplicitAliasing())
setRandomAliasing(Forward, ThisIT, OtherIT);
if (!Back.hasImplicitAliasing())
setRandomAliasing(Back, OtherIT, ThisIT);
CodeTemplate CT;
CT.Info = llvm::formatv("creating cycle through {0}.",
State.getInstrInfo().getName(OtherOpcode));
CT.Instructions.push_back(std::move(ThisIT));
CT.Instructions.push_back(std::move(OtherIT));
return getSingleton(CT);
if (Instr.hasAliasingRegistersThrough(OtherInstr))
AliasingInstructions.push_back(std::move(OtherInstr));
if (AliasingInstructions.size() >= MaxAliasingInstructions)
break;
}
return llvm::make_error<BenchmarkFailure>(
"Infeasible : Didn't find any scheme to make the instruction serial");
return AliasingInstructions;
}
static ExecutionMode getExecutionModes(const Instruction &Instr) {
ExecutionMode EM;
if (Instr.hasAliasingImplicitRegisters())
EM |= ExecutionMode::ALWAYS_SERIAL_IMPLICIT_REGS_ALIAS;
if (Instr.hasTiedRegisters())
EM |= ExecutionMode::ALWAYS_SERIAL_TIED_REGS_ALIAS;
if (Instr.hasMemoryOperands())
EM |= ExecutionMode::SERIAL_VIA_MEMORY_INSTR;
else {
if (Instr.hasAliasingRegisters())
EM |= ExecutionMode::SERIAL_VIA_EXPLICIT_REGS;
if (Instr.hasOneUseOrOneDef())
EM |= ExecutionMode::SERIAL_VIA_NON_MEMORY_INSTR;
}
return EM;
}
static void appendCodeTemplates(const LLVMState &State,
const Instruction &Instr,
ExecutionMode ExecutionModeBit,
llvm::StringRef ExecutionClassDescription,
std::vector<CodeTemplate> &CodeTemplates) {
assert(isEnumValue(ExecutionModeBit) && "Bit must be a power of two");
switch (ExecutionModeBit) {
case ExecutionMode::ALWAYS_SERIAL_IMPLICIT_REGS_ALIAS:
// Nothing to do, the instruction is always serial.
LLVM_FALLTHROUGH;
case ExecutionMode::ALWAYS_SERIAL_TIED_REGS_ALIAS: {
// Picking whatever value for the tied variable will make the instruction
// serial.
CodeTemplate CT;
CT.Execution = ExecutionModeBit;
CT.Info = ExecutionClassDescription;
CT.Instructions.push_back(Instr);
CodeTemplates.push_back(std::move(CT));
return;
}
case ExecutionMode::SERIAL_VIA_MEMORY_INSTR: {
// Select back-to-back memory instruction.
// TODO: Implement me.
return;
}
case ExecutionMode::SERIAL_VIA_EXPLICIT_REGS: {
// Making the execution of this instruction serial by selecting one def
// register to alias with one use register.
const AliasingConfigurations SelfAliasing(Instr, Instr);
assert(!SelfAliasing.empty() && !SelfAliasing.hasImplicitAliasing() &&
"Instr must alias itself explicitly");
InstructionTemplate IT(Instr);
// This is a self aliasing instruction so defs and uses are from the same
// instance, hence twice IT in the following call.
setRandomAliasing(SelfAliasing, IT, IT);
CodeTemplate CT;
CT.Execution = ExecutionModeBit;
CT.Info = ExecutionClassDescription;
CT.Instructions.push_back(std::move(IT));
CodeTemplates.push_back(std::move(CT));
return;
}
case ExecutionMode::SERIAL_VIA_NON_MEMORY_INSTR: {
// Select back-to-back non-memory instruction.
for (const auto OtherInstr :
computeAliasingInstructions(State, Instr, kMaxAliasingInstructions)) {
const AliasingConfigurations Forward(Instr, OtherInstr);
const AliasingConfigurations Back(OtherInstr, Instr);
InstructionTemplate ThisIT(Instr);
InstructionTemplate OtherIT(OtherInstr);
if (!Forward.hasImplicitAliasing())
setRandomAliasing(Forward, ThisIT, OtherIT);
if (!Back.hasImplicitAliasing())
setRandomAliasing(Back, OtherIT, ThisIT);
CodeTemplate CT;
CT.Execution = ExecutionModeBit;
CT.Info = ExecutionClassDescription;
CT.Instructions.push_back(std::move(ThisIT));
CT.Instructions.push_back(std::move(OtherIT));
CodeTemplates.push_back(std::move(CT));
}
return;
}
default:
llvm_unreachable("Unhandled enum value");
}
}
LatencySnippetGenerator::~LatencySnippetGenerator() = default;
llvm::Expected<std::vector<CodeTemplate>>
LatencySnippetGenerator::generateCodeTemplates(const Instruction &Instr) const {
if (Instr.hasMemoryOperands())
std::vector<CodeTemplate> Results;
const ExecutionMode EM = getExecutionModes(Instr);
for (const auto EC : kExecutionClasses) {
for (const auto ExecutionModeBit : getExecutionModeBits(EM & EC.Mask))
appendCodeTemplates(State, Instr, ExecutionModeBit, EC.Description,
Results);
if (!Results.empty())
break;
}
if (Results.empty())
return llvm::make_error<BenchmarkFailure>(
"Infeasible : has memory operands");
return llvm::handleExpected( //
generateSelfAliasingCodeTemplates(Instr),
[this, &Instr]() {
return generateTwoInstructionPrototypes(State, Instr);
},
[](const BenchmarkFailure &) { /*Consume Error*/ });
"No strategy found to make the execution serial");
return std::move(Results);
}
const char *LatencyBenchmarkRunner::getCounterName() const {

View File

@ -27,7 +27,14 @@ unsigned Variable::getPrimaryOperandIndex() const {
return TiedOperands[0];
}
bool Variable::hasTiedOperands() const { return TiedOperands.size() > 1; }
bool Variable::hasTiedOperands() const {
assert(TiedOperands.size() <= 2 &&
"No more than two operands can be tied together");
// By definition only Use and Def operands can be tied together.
// TiedOperands[0] is the Def operand (LLVM stores defs first).
// TiedOperands[1] is the Use operand.
return TiedOperands.size() > 1;
}
unsigned Operand::getIndex() const {
assert(Index >= 0 && "Index must be set");
@ -197,6 +204,10 @@ bool Instruction::hasAliasingRegisters() const {
return AllDefRegs.anyCommon(AllUseRegs);
}
bool Instruction::hasOneUseOrOneDef() const {
return AllDefRegs.count() || AllUseRegs.count();
}
void Instruction::dump(const llvm::MCRegisterInfo &RegInfo,
llvm::raw_ostream &Stream) const {
Stream << "- " << Name << "\n";
@ -288,8 +299,7 @@ bool AliasingConfigurations::hasImplicitAliasing() const {
}
AliasingConfigurations::AliasingConfigurations(
const Instruction &DefInstruction, const Instruction &UseInstruction)
: DefInstruction(DefInstruction), UseInstruction(UseInstruction) {
const Instruction &DefInstruction, const Instruction &UseInstruction) {
if (UseInstruction.AllUseRegs.anyCommon(DefInstruction.AllDefRegs)) {
auto CommonRegisters = UseInstruction.AllUseRegs;
CommonRegisters &= DefInstruction.AllDefRegs;

View File

@ -125,6 +125,11 @@ struct Instruction {
// reads or write the same memory region.
bool hasMemoryOperands() const;
// Returns whether this instruction as at least one use or one def.
// Repeating this instruction may execute sequentially by adding an
// instruction that aliases one of these.
bool hasOneUseOrOneDef() const;
// Convenient function to help with debugging.
void dump(const llvm::MCRegisterInfo &RegInfo,
llvm::raw_ostream &Stream) const;
@ -174,10 +179,7 @@ struct AliasingConfigurations {
bool empty() const; // True if no aliasing configuration is found.
bool hasImplicitAliasing() const;
void setExplicitAliasing() const;
const Instruction &DefInstruction;
const Instruction &UseInstruction;
llvm::SmallVector<AliasingRegisterOperands, 32> Configurations;
};

View File

@ -22,7 +22,7 @@
namespace exegesis {
std::vector<CodeTemplate> getSingleton(CodeTemplate &CT) {
std::vector<CodeTemplate> getSingleton(CodeTemplate &&CT) {
std::vector<CodeTemplate> Result;
Result.push_back(std::move(CT));
return Result;

View File

@ -30,7 +30,7 @@
namespace exegesis {
std::vector<CodeTemplate> getSingleton(CodeTemplate &CT);
std::vector<CodeTemplate> getSingleton(CodeTemplate &&CT);
// Generates code templates that has a self-dependency.
llvm::Expected<std::vector<CodeTemplate>>

View File

@ -153,13 +153,13 @@ UopsSnippetGenerator::generateCodeTemplates(const Instruction &Instr) const {
CT.Info = "instruction is parallel, repeating a random one.";
CT.Instructions.push_back(std::move(IT));
instantiateMemoryOperands(CT.ScratchSpacePointerInReg, CT.Instructions);
return getSingleton(CT);
return getSingleton(std::move(CT));
}
if (SelfAliasing.hasImplicitAliasing()) {
CT.Info = "instruction is serial, repeating a random one.";
CT.Instructions.push_back(std::move(IT));
instantiateMemoryOperands(CT.ScratchSpacePointerInReg, CT.Instructions);
return getSingleton(CT);
return getSingleton(std::move(CT));
}
const auto TiedVariables = getVariablesWithTiedOperands(Instr);
if (!TiedVariables.empty()) {
@ -181,7 +181,7 @@ UopsSnippetGenerator::generateCodeTemplates(const Instruction &Instr) const {
CT.Instructions.push_back(std::move(TmpIT));
}
instantiateMemoryOperands(CT.ScratchSpacePointerInReg, CT.Instructions);
return getSingleton(CT);
return getSingleton(std::move(CT));
}
const auto &ReservedRegisters = State.getRATC().reservedRegisters();
// No tied variables, we pick random values for defs.
@ -218,7 +218,7 @@ UopsSnippetGenerator::generateCodeTemplates(const Instruction &Instr) const {
"instruction has no tied variables picking Uses different from defs";
CT.Instructions.push_back(std::move(IT));
instantiateMemoryOperands(CT.ScratchSpacePointerInReg, CT.Instructions);
return getSingleton(CT);
return getSingleton(std::move(CT));
}
std::vector<BenchmarkMeasure>

View File

@ -25,6 +25,7 @@ namespace {
using testing::AnyOf;
using testing::ElementsAre;
using testing::Gt;
using testing::HasSubstr;
using testing::Not;
using testing::SizeIs;
@ -57,14 +58,12 @@ class SnippetGeneratorTest : public X86SnippetGeneratorTest {
protected:
SnippetGeneratorTest() : Generator(State) {}
CodeTemplate checkAndGetCodeTemplate(unsigned Opcode) {
std::vector<CodeTemplate> checkAndGetCodeTemplates(unsigned Opcode) {
randomGenerator().seed(0); // Initialize seed.
const Instruction Instr(State, Opcode);
auto CodeTemplateOrError = Generator.generateCodeTemplates(Instr);
EXPECT_FALSE(CodeTemplateOrError.takeError()); // Valid configuration.
auto &CodeTemplate = CodeTemplateOrError.get();
EXPECT_EQ(CodeTemplate.size(), 1U);
return std::move(CodeTemplate.front());
return std::move(CodeTemplateOrError.get());
}
SnippetGeneratorT Generator;
@ -75,21 +74,25 @@ using LatencySnippetGeneratorTest =
using UopsSnippetGeneratorTest = SnippetGeneratorTest<UopsSnippetGenerator>;
TEST_F(LatencySnippetGeneratorTest, ImplicitSelfDependency) {
// ADC16i16 self alias because of implicit use and def.
// explicit use 0 : imm
// implicit def : AX
// implicit def : EFLAGS
// implicit use : AX
// implicit use : EFLAGS
TEST_F(LatencySnippetGeneratorTest, ImplicitSelfDependencyThroughImplicitReg) {
// - ADC16i16
// - Op0 Explicit Use Immediate
// - Op1 Implicit Def Reg(AX)
// - Op2 Implicit Def Reg(EFLAGS)
// - Op3 Implicit Use Reg(AX)
// - Op4 Implicit Use Reg(EFLAGS)
// - Var0 [Op0]
// - hasAliasingImplicitRegisters (execution is always serial)
// - hasAliasingRegisters
const unsigned Opcode = llvm::X86::ADC16i16;
EXPECT_THAT(MCInstrInfo.get(Opcode).getImplicitDefs()[0], llvm::X86::AX);
EXPECT_THAT(MCInstrInfo.get(Opcode).getImplicitDefs()[1], llvm::X86::EFLAGS);
EXPECT_THAT(MCInstrInfo.get(Opcode).getImplicitUses()[0], llvm::X86::AX);
EXPECT_THAT(MCInstrInfo.get(Opcode).getImplicitUses()[1], llvm::X86::EFLAGS);
const CodeTemplate CT = checkAndGetCodeTemplate(Opcode);
EXPECT_THAT(CT.Info, HasSubstr("implicit"));
const auto CodeTemplates = checkAndGetCodeTemplates(Opcode);
ASSERT_THAT(CodeTemplates, SizeIs(1));
const auto &CT = CodeTemplates[0];
EXPECT_THAT(CT.Execution, ExecutionMode::ALWAYS_SERIAL_IMPLICIT_REGS_ALIAS);
ASSERT_THAT(CT.Instructions, SizeIs(1));
const InstructionTemplate &IT = CT.Instructions[0];
EXPECT_THAT(IT.getOpcode(), Opcode);
@ -97,63 +100,105 @@ TEST_F(LatencySnippetGeneratorTest, ImplicitSelfDependency) {
EXPECT_THAT(IT.VariableValues[0], IsInvalid()) << "Immediate is not set";
}
TEST_F(LatencySnippetGeneratorTest, ExplicitSelfDependency) {
// ADD16ri self alias because Op0 and Op1 are tied together.
// explicit def 0 : reg RegClass=GR16
// explicit use 1 : reg RegClass=GR16 | TIED_TO:0
// explicit use 2 : imm
// implicit def : EFLAGS
TEST_F(LatencySnippetGeneratorTest, ImplicitSelfDependencyThroughTiedRegs) {
// - ADD16ri
// - Op0 Explicit Def RegClass(GR16)
// - Op1 Explicit Use RegClass(GR16) TiedToOp0
// - Op2 Explicit Use Immediate
// - Op3 Implicit Def Reg(EFLAGS)
// - Var0 [Op0,Op1]
// - Var1 [Op2]
// - hasTiedRegisters (execution is always serial)
// - hasAliasingRegisters
const unsigned Opcode = llvm::X86::ADD16ri;
EXPECT_THAT(MCInstrInfo.get(Opcode).getImplicitDefs()[0], llvm::X86::EFLAGS);
const CodeTemplate CT = checkAndGetCodeTemplate(Opcode);
EXPECT_THAT(CT.Info, HasSubstr("explicit"));
const auto CodeTemplates = checkAndGetCodeTemplates(Opcode);
ASSERT_THAT(CodeTemplates, SizeIs(1));
const auto &CT = CodeTemplates[0];
EXPECT_THAT(CT.Execution, ExecutionMode::ALWAYS_SERIAL_TIED_REGS_ALIAS);
ASSERT_THAT(CT.Instructions, SizeIs(1));
const InstructionTemplate &IT = CT.Instructions[0];
EXPECT_THAT(IT.getOpcode(), Opcode);
ASSERT_THAT(IT.VariableValues, SizeIs(2));
EXPECT_THAT(IT.VariableValues[0], IsReg()) << "Operand 0 and 1";
EXPECT_THAT(IT.VariableValues[0], IsInvalid()) << "Operand 1 is not set";
EXPECT_THAT(IT.VariableValues[1], IsInvalid()) << "Operand 2 is not set";
}
TEST_F(LatencySnippetGeneratorTest, DependencyThroughOtherOpcode) {
// CMP64rr
// explicit use 0 : reg RegClass=GR64
// explicit use 1 : reg RegClass=GR64
// implicit def : EFLAGS
const unsigned Opcode = llvm::X86::CMP64rr;
const CodeTemplate CT = checkAndGetCodeTemplate(Opcode);
EXPECT_THAT(CT.Info, HasSubstr("cycle through"));
ASSERT_THAT(CT.Instructions, SizeIs(2));
TEST_F(LatencySnippetGeneratorTest, ImplicitSelfDependencyThroughExplicitRegs) {
// - VXORPSrr
// - Op0 Explicit Def RegClass(VR128)
// - Op1 Explicit Use RegClass(VR128)
// - Op2 Explicit Use RegClass(VR128)
// - Var0 [Op0]
// - Var1 [Op1]
// - Var2 [Op2]
// - hasAliasingRegisters
const unsigned Opcode = llvm::X86::VXORPSrr;
const auto CodeTemplates = checkAndGetCodeTemplates(Opcode);
ASSERT_THAT(CodeTemplates, SizeIs(1));
const auto &CT = CodeTemplates[0];
EXPECT_THAT(CT.Execution, ExecutionMode::SERIAL_VIA_EXPLICIT_REGS);
ASSERT_THAT(CT.Instructions, SizeIs(1));
const InstructionTemplate &IT = CT.Instructions[0];
EXPECT_THAT(IT.getOpcode(), Opcode);
ASSERT_THAT(IT.VariableValues, SizeIs(2));
EXPECT_THAT(IT.VariableValues, AnyOf(ElementsAre(IsReg(), IsInvalid()),
ElementsAre(IsInvalid(), IsReg())));
EXPECT_THAT(CT.Instructions[1].getOpcode(), Not(Opcode));
// TODO: check that the two instructions alias each other.
ASSERT_THAT(IT.VariableValues, SizeIs(3));
EXPECT_THAT(IT.VariableValues,
AnyOf(ElementsAre(IsReg(), IsInvalid(), IsReg()),
ElementsAre(IsReg(), IsReg(), IsInvalid())))
<< "Op0 is either set to Op1 or to Op2";
}
TEST_F(LatencySnippetGeneratorTest, DependencyThroughOtherOpcode) {
// - CMP64rr
// - Op0 Explicit Use RegClass(GR64)
// - Op1 Explicit Use RegClass(GR64)
// - Op2 Implicit Def Reg(EFLAGS)
// - Var0 [Op0]
// - Var1 [Op1]
const unsigned Opcode = llvm::X86::CMP64rr;
const auto CodeTemplates = checkAndGetCodeTemplates(Opcode);
ASSERT_THAT(CodeTemplates, SizeIs(Gt(1U))) << "Many templates are available";
for (const auto &CT : CodeTemplates) {
EXPECT_THAT(CT.Execution, ExecutionMode::SERIAL_VIA_NON_MEMORY_INSTR);
ASSERT_THAT(CT.Instructions, SizeIs(2));
const InstructionTemplate &IT = CT.Instructions[0];
EXPECT_THAT(IT.getOpcode(), Opcode);
ASSERT_THAT(IT.VariableValues, SizeIs(2));
EXPECT_THAT(IT.VariableValues, AnyOf(ElementsAre(IsReg(), IsInvalid()),
ElementsAre(IsInvalid(), IsReg())));
EXPECT_THAT(CT.Instructions[1].getOpcode(), Not(Opcode));
// TODO: check that the two instructions alias each other.
}
}
TEST_F(LatencySnippetGeneratorTest, LAHF) {
// - LAHF
// - Op0 Implicit Def Reg(AH)
// - Op1 Implicit Use Reg(EFLAGS)
const unsigned Opcode = llvm::X86::LAHF;
const CodeTemplate CT = checkAndGetCodeTemplate(Opcode);
EXPECT_THAT(CT.Info, HasSubstr("cycle through"));
ASSERT_THAT(CT.Instructions, SizeIs(2));
const InstructionTemplate &IT = CT.Instructions[0];
EXPECT_THAT(IT.getOpcode(), Opcode);
ASSERT_THAT(IT.VariableValues, SizeIs(0));
const auto CodeTemplates = checkAndGetCodeTemplates(Opcode);
ASSERT_THAT(CodeTemplates, SizeIs(Gt(1U))) << "Many templates are available";
for (const auto &CT : CodeTemplates) {
EXPECT_THAT(CT.Execution, ExecutionMode::SERIAL_VIA_NON_MEMORY_INSTR);
ASSERT_THAT(CT.Instructions, SizeIs(2));
const InstructionTemplate &IT = CT.Instructions[0];
EXPECT_THAT(IT.getOpcode(), Opcode);
ASSERT_THAT(IT.VariableValues, SizeIs(0));
}
}
TEST_F(UopsSnippetGeneratorTest, ParallelInstruction) {
// BNDCL32rr is parallel no matter what.
// explicit use 0 : reg RegClass=BNDR
// explicit use 1 : reg RegClass=GR32
// - BNDCL32rr
// - Op0 Explicit Use RegClass(BNDR)
// - Op1 Explicit Use RegClass(GR32)
// - Var0 [Op0]
// - Var1 [Op1]
const unsigned Opcode = llvm::X86::BNDCL32rr;
const CodeTemplate CT = checkAndGetCodeTemplate(Opcode);
const auto CodeTemplates = checkAndGetCodeTemplates(Opcode);
ASSERT_THAT(CodeTemplates, SizeIs(1));
const auto &CT = CodeTemplates[0];
EXPECT_THAT(CT.Info, HasSubstr("parallel"));
EXPECT_THAT(CT.Execution, ExecutionMode::UNKNOWN);
ASSERT_THAT(CT.Instructions, SizeIs(1));
const InstructionTemplate &IT = CT.Instructions[0];
EXPECT_THAT(IT.getOpcode(), Opcode);
@ -163,14 +208,18 @@ TEST_F(UopsSnippetGeneratorTest, ParallelInstruction) {
}
TEST_F(UopsSnippetGeneratorTest, SerialInstruction) {
// CDQ is serial no matter what.
// implicit def : EAX
// implicit def : EDX
// implicit use : EAX
// - CDQ
// - Op0 Implicit Def Reg(EAX)
// - Op1 Implicit Def Reg(EDX)
// - Op2 Implicit Use Reg(EAX)
// - hasAliasingImplicitRegisters (execution is always serial)
// - hasAliasingRegisters
const unsigned Opcode = llvm::X86::CDQ;
const CodeTemplate CT = checkAndGetCodeTemplate(Opcode);
const auto CodeTemplates = checkAndGetCodeTemplates(Opcode);
ASSERT_THAT(CodeTemplates, SizeIs(1));
const auto &CT = CodeTemplates[0];
EXPECT_THAT(CT.Info, HasSubstr("serial"));
EXPECT_THAT(CT.Execution, ExecutionMode::UNKNOWN);
ASSERT_THAT(CT.Instructions, SizeIs(1));
const InstructionTemplate &IT = CT.Instructions[0];
EXPECT_THAT(IT.getOpcode(), Opcode);
@ -181,13 +230,21 @@ TEST_F(UopsSnippetGeneratorTest, StaticRenaming) {
// CMOVA32rr has tied variables, we enumerate the possible values to execute
// as many in parallel as possible.
// explicit def 0 : reg RegClass=GR32
// explicit use 1 : reg RegClass=GR32 | TIED_TO:0
// explicit use 2 : reg RegClass=GR32
// implicit use : EFLAGS
// - CMOVA32rr
// - Op0 Explicit Def RegClass(GR32)
// - Op1 Explicit Use RegClass(GR32) TiedToOp0
// - Op2 Explicit Use RegClass(GR32)
// - Op3 Implicit Use Reg(EFLAGS)
// - Var0 [Op0,Op1]
// - Var1 [Op2]
// - hasTiedRegisters (execution is always serial)
// - hasAliasingRegisters
const unsigned Opcode = llvm::X86::CMOVA32rr;
const CodeTemplate CT = checkAndGetCodeTemplate(Opcode);
const auto CodeTemplates = checkAndGetCodeTemplates(Opcode);
ASSERT_THAT(CodeTemplates, SizeIs(1));
const auto &CT = CodeTemplates[0];
EXPECT_THAT(CT.Info, HasSubstr("static renaming"));
EXPECT_THAT(CT.Execution, ExecutionMode::UNKNOWN);
constexpr const unsigned kInstructionCount = 15;
ASSERT_THAT(CT.Instructions, SizeIs(kInstructionCount));
std::unordered_set<unsigned> AllDefRegisters;
@ -203,14 +260,23 @@ TEST_F(UopsSnippetGeneratorTest, NoTiedVariables) {
// CMOV_GR32 has no tied variables, we make sure def and use are different
// from each other.
// explicit def 0 : reg RegClass=GR32
// explicit use 1 : reg RegClass=GR32
// explicit use 2 : reg RegClass=GR32
// explicit use 3 : imm
// implicit use : EFLAGS
// - CMOV_GR32
// - Op0 Explicit Def RegClass(GR32)
// - Op1 Explicit Use RegClass(GR32)
// - Op2 Explicit Use RegClass(GR32)
// - Op3 Explicit Use Immediate
// - Op4 Implicit Use Reg(EFLAGS)
// - Var0 [Op0]
// - Var1 [Op1]
// - Var2 [Op2]
// - Var3 [Op3]
// - hasAliasingRegisters
const unsigned Opcode = llvm::X86::CMOV_GR32;
const CodeTemplate CT = checkAndGetCodeTemplate(Opcode);
const auto CodeTemplates = checkAndGetCodeTemplates(Opcode);
ASSERT_THAT(CodeTemplates, SizeIs(1));
const auto &CT = CodeTemplates[0];
EXPECT_THAT(CT.Info, HasSubstr("no tied variables"));
EXPECT_THAT(CT.Execution, ExecutionMode::UNKNOWN);
ASSERT_THAT(CT.Instructions, SizeIs(1));
const InstructionTemplate &IT = CT.Instructions[0];
EXPECT_THAT(IT.getOpcode(), Opcode);
@ -224,9 +290,27 @@ TEST_F(UopsSnippetGeneratorTest, NoTiedVariables) {
TEST_F(UopsSnippetGeneratorTest, MemoryUse) {
// Mov32rm reads from memory.
// - MOV32rm
// - Op0 Explicit Def RegClass(GR32)
// - Op1 Explicit Use Memory RegClass(GR8)
// - Op2 Explicit Use Memory
// - Op3 Explicit Use Memory RegClass(GRH8)
// - Op4 Explicit Use Memory
// - Op5 Explicit Use Memory RegClass(SEGMENT_REG)
// - Var0 [Op0]
// - Var1 [Op1]
// - Var2 [Op2]
// - Var3 [Op3]
// - Var4 [Op4]
// - Var5 [Op5]
// - hasMemoryOperands
// - hasAliasingRegisters
const unsigned Opcode = llvm::X86::MOV32rm;
const CodeTemplate CT = checkAndGetCodeTemplate(Opcode);
const auto CodeTemplates = checkAndGetCodeTemplates(Opcode);
ASSERT_THAT(CodeTemplates, SizeIs(1));
const auto &CT = CodeTemplates[0];
EXPECT_THAT(CT.Info, HasSubstr("no tied variables"));
EXPECT_THAT(CT.Execution, ExecutionMode::UNKNOWN);
ASSERT_THAT(CT.Instructions,
SizeIs(UopsSnippetGenerator::kMinNumDifferentAddresses));
const InstructionTemplate &IT = CT.Instructions[0];
@ -240,6 +324,21 @@ TEST_F(UopsSnippetGeneratorTest, MemoryUse) {
TEST_F(UopsSnippetGeneratorTest, MemoryUse_Movsb) {
// MOVSB writes to scratch memory register.
// - MOVSB
// - Op0 Explicit Use Memory RegClass(GR8)
// - Op1 Explicit Use Memory RegClass(GR8)
// - Op2 Explicit Use Memory RegClass(SEGMENT_REG)
// - Op3 Implicit Def Reg(EDI)
// - Op4 Implicit Def Reg(ESI)
// - Op5 Implicit Use Reg(EDI)
// - Op6 Implicit Use Reg(ESI)
// - Op7 Implicit Use Reg(DF)
// - Var0 [Op0]
// - Var1 [Op1]
// - Var2 [Op2]
// - hasMemoryOperands
// - hasAliasingImplicitRegisters (execution is always serial)
// - hasAliasingRegisters
const unsigned Opcode = llvm::X86::MOVSB;
const Instruction Instr(State, Opcode);
auto Error = Generator.generateCodeTemplates(Instr).takeError();