AVX128: Implement support for VPCLMULQDQ

This is just the 128-bit version twice.
This commit is contained in:
Ryan Houdek 2024-06-21 05:03:30 -07:00 committed by Alyssa Rosenzweig
parent 34272fc134
commit 7069643ae6
3 changed files with 31 additions and 1 deletions

View File

@ -5404,13 +5404,15 @@ void OpDispatchBuilder::InstallHostSpecificOpcodeHandlers() {
if (CTX->HostFeatures.SupportsSVE256) {
InstallToTable(FEXCore::X86Tables::VEXTableOps, AVXTable);
InstallToTable(FEXCore::X86Tables::VEXTableGroupOps, VEXTableGroupOps);
if (CTX->HostFeatures.SupportsPMULL_128Bit) {
InstallToTable(FEXCore::X86Tables::VEXTableOps, VEX_PCLMUL);
}
} else if (CTX->HostFeatures.SupportsAVX) {
InstallAVX128Handlers();
}
if (CTX->HostFeatures.SupportsPMULL_128Bit) {
InstallToTable(FEXCore::X86Tables::H0F3ATableOps, H0F3A_PCLMUL);
InstallToTable(FEXCore::X86Tables::VEXTableOps, VEX_PCLMUL);
}
Initialized = true;
}

View File

@ -1208,6 +1208,8 @@ public:
void AVX128_VPERMD(OpcodeArgs);
void AVX128_VPCLMULQDQ(OpcodeArgs);
// End of AVX 128-bit implementation
void InvalidOp(OpcodeArgs);

View File

@ -402,6 +402,12 @@ void OpDispatchBuilder::InstallAVX128Handlers() {
};
#undef OPD
#define OPD(map_select, pp, opcode) (((map_select - 1) << 10) | (pp << 8) | (opcode))
constexpr std::tuple<uint16_t, uint8_t, FEXCore::X86Tables::OpDispatchPtr> VEX128_PCLMUL[] = {
{OPD(3, 0b01, 0x44), 1, &OpDispatchBuilder::AVX128_VPCLMULQDQ},
};
#undef OPD
auto InstallToTable = [](auto& FinalTable, auto& LocalTable) {
for (auto Op : LocalTable) {
auto OpNum = std::get<0>(Op);
@ -415,6 +421,10 @@ void OpDispatchBuilder::InstallAVX128Handlers() {
InstallToTable(FEXCore::X86Tables::VEXTableOps, AVX128Table);
InstallToTable(FEXCore::X86Tables::VEXTableGroupOps, VEX128TableGroupOps);
if (CTX->HostFeatures.SupportsPMULL_128Bit) {
InstallToTable(FEXCore::X86Tables::VEXTableOps, VEX128_PCLMUL);
}
SaveAVXStateFunc = &OpDispatchBuilder::AVX128_SaveAVXState;
RestoreAVXStateFunc = &OpDispatchBuilder::AVX128_RestoreAVXState;
DefaultAVXStateFunc = &OpDispatchBuilder::AVX128_DefaultAVXState;
@ -2238,4 +2248,20 @@ void OpDispatchBuilder::AVX128_VPERMD(OpcodeArgs) {
AVX128_StoreResult_WithOpSize(Op, Op->Dest, Result);
}
void OpDispatchBuilder::AVX128_VPCLMULQDQ(OpcodeArgs) {
const auto Size = GetDstSize(Op);
const auto Is128Bit = Size == Core::CPUState::XMM_SSE_REG_SIZE;
const auto Selector = static_cast<uint8_t>(Op->Src[2].Literal());
auto Src1 = AVX128_LoadSource_WithOpSize(Op, Op->Src[0], Op->Flags, !Is128Bit);
auto Src2 = AVX128_LoadSource_WithOpSize(Op, Op->Src[1], Op->Flags, !Is128Bit);
RefPair Result {};
Result.Low = _PCLMUL(OpSize::i128Bit, Src1.Low, Src2.Low, Selector);
if (!Is128Bit) {
Result.High = _PCLMUL(OpSize::i128Bit, Src1.High, Src2.High, Selector);
}
AVX128_StoreResult_WithOpSize(Op, Op->Dest, Result);
}
} // namespace FEXCore::IR