mirror of
https://github.com/FEX-Emu/FEX.git
synced 2024-12-12 16:46:23 +00:00
AVX256: Initial fixes just to get my unittest working
This is the initial split to decouple AVX256 composed operations from their MMX/SSE counterparts. This is to work around the subtle differences with AVX/SSE zext/insert behaviour.
This commit is contained in:
parent
3d90d1ab4f
commit
7e8d734e43
@ -4475,7 +4475,7 @@ void OpDispatchBuilder::StoreResult_WithOpSize(FEXCore::IR::RegisterClassType Cl
|
||||
_StoreContext(OpSize, Class, Src, offsetof(FEXCore::Core::CPUState, mm[gpr - FEXCore::X86State::REG_MM_0]));
|
||||
} else if (gpr >= FEXCore::X86State::REG_XMM_0) {
|
||||
const auto gprIndex = gpr - X86State::REG_XMM_0;
|
||||
const auto VectorSize = (CTX->HostFeatures.SupportsSVE256 && CTX->HostFeatures.SupportsAVX) ? 32 : 16;
|
||||
const auto VectorSize = GetGuestVectorLength();
|
||||
|
||||
auto Result = Src;
|
||||
if (OpSize != VectorSize) {
|
||||
@ -5145,7 +5145,7 @@ void OpDispatchBuilder::InstallHostSpecificOpcodeHandlers() {
|
||||
{OPD(1, 0b01, 0x6B), 1, &OpDispatchBuilder::VPACKSSOp<4>},
|
||||
{OPD(1, 0b01, 0x6C), 1, &OpDispatchBuilder::VPUNPCKLOp<8>},
|
||||
{OPD(1, 0b01, 0x6D), 1, &OpDispatchBuilder::VPUNPCKHOp<8>},
|
||||
{OPD(1, 0b01, 0x6E), 1, &OpDispatchBuilder::MOVBetweenGPR_FPR},
|
||||
{OPD(1, 0b01, 0x6E), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::MOVBetweenGPR_FPR, OpDispatchBuilder::VectorOpType::AVX>},
|
||||
|
||||
{OPD(1, 0b01, 0x6F), 1, &OpDispatchBuilder::VMOVAPS_VMOVAPDOp},
|
||||
{OPD(1, 0b10, 0x6F), 1, &OpDispatchBuilder::VMOVUPS_VMOVUPDOp},
|
||||
@ -5165,8 +5165,8 @@ void OpDispatchBuilder::InstallHostSpecificOpcodeHandlers() {
|
||||
{OPD(1, 0b01, 0x7D), 1, &OpDispatchBuilder::VHSUBPOp<8>},
|
||||
{OPD(1, 0b11, 0x7D), 1, &OpDispatchBuilder::VHSUBPOp<4>},
|
||||
|
||||
{OPD(1, 0b01, 0x7E), 1, &OpDispatchBuilder::MOVBetweenGPR_FPR},
|
||||
{OPD(1, 0b10, 0x7E), 1, &OpDispatchBuilder::MOVQOp},
|
||||
{OPD(1, 0b01, 0x7E), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::MOVBetweenGPR_FPR, OpDispatchBuilder::VectorOpType::AVX>},
|
||||
{OPD(1, 0b10, 0x7E), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::MOVQOp, OpDispatchBuilder::VectorOpType::AVX>},
|
||||
|
||||
{OPD(1, 0b01, 0x7F), 1, &OpDispatchBuilder::VMOVAPS_VMOVAPDOp},
|
||||
{OPD(1, 0b10, 0x7F), 1, &OpDispatchBuilder::VMOVUPS_VMOVUPDOp},
|
||||
@ -5190,7 +5190,7 @@ void OpDispatchBuilder::InstallHostSpecificOpcodeHandlers() {
|
||||
{OPD(1, 0b01, 0xD3), 1, &OpDispatchBuilder::VPSRLDOp<8>},
|
||||
{OPD(1, 0b01, 0xD4), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VADD, 8>},
|
||||
{OPD(1, 0b01, 0xD5), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VMUL, 2>},
|
||||
{OPD(1, 0b01, 0xD6), 1, &OpDispatchBuilder::MOVQOp},
|
||||
{OPD(1, 0b01, 0xD6), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::MOVQOp, OpDispatchBuilder::VectorOpType::AVX>},
|
||||
{OPD(1, 0b01, 0xD7), 1, &OpDispatchBuilder::MOVMSKOpOne},
|
||||
|
||||
{OPD(1, 0b01, 0xD8), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VUQSUB, 1>},
|
||||
@ -5602,9 +5602,9 @@ void InstallOpcodeHandlers(Context::OperatingMode Mode) {
|
||||
|
||||
{0x3F, 1, &OpDispatchBuilder::ThunkOp},
|
||||
{0x40, 16, &OpDispatchBuilder::CMOVOp},
|
||||
{0x6E, 1, &OpDispatchBuilder::MOVBetweenGPR_FPR},
|
||||
{0x6E, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::MOVBetweenGPR_FPR, OpDispatchBuilder::VectorOpType::MMX>},
|
||||
{0x6F, 1, &OpDispatchBuilder::MOVQMMXOp},
|
||||
{0x7E, 1, &OpDispatchBuilder::MOVBetweenGPR_FPR},
|
||||
{0x7E, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::MOVBetweenGPR_FPR, OpDispatchBuilder::VectorOpType::MMX>},
|
||||
{0x7F, 1, &OpDispatchBuilder::MOVQMMXOp},
|
||||
{0x80, 16, &OpDispatchBuilder::CondJUMPOp},
|
||||
{0x90, 16, &OpDispatchBuilder::SETccOp},
|
||||
@ -5884,7 +5884,7 @@ void InstallOpcodeHandlers(Context::OperatingMode Mode) {
|
||||
{0x5F, 1, &OpDispatchBuilder::VectorScalarInsertALUOp<IR::OP_VFMAXSCALARINSERT, 4>},
|
||||
{0x6F, 1, &OpDispatchBuilder::MOVVectorUnalignedOp},
|
||||
{0x70, 1, &OpDispatchBuilder::PSHUFWOp<false>},
|
||||
{0x7E, 1, &OpDispatchBuilder::MOVQOp},
|
||||
{0x7E, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::MOVQOp, OpDispatchBuilder::VectorOpType::SSE>},
|
||||
{0x7F, 1, &OpDispatchBuilder::MOVVectorUnalignedOp},
|
||||
{0xB8, 1, &OpDispatchBuilder::PopcountOp},
|
||||
{0xBC, 1, &OpDispatchBuilder::TZCNT},
|
||||
@ -5964,7 +5964,7 @@ void InstallOpcodeHandlers(Context::OperatingMode Mode) {
|
||||
{0x6B, 1, &OpDispatchBuilder::PACKSSOp<4>},
|
||||
{0x6C, 1, &OpDispatchBuilder::PUNPCKLOp<8>},
|
||||
{0x6D, 1, &OpDispatchBuilder::PUNPCKHOp<8>},
|
||||
{0x6E, 1, &OpDispatchBuilder::MOVBetweenGPR_FPR},
|
||||
{0x6E, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::MOVBetweenGPR_FPR, OpDispatchBuilder::VectorOpType::SSE>},
|
||||
{0x6F, 1, &OpDispatchBuilder::MOVVectorAlignedOp},
|
||||
{0x70, 1, &OpDispatchBuilder::PSHUFDOp},
|
||||
|
||||
@ -5974,7 +5974,7 @@ void InstallOpcodeHandlers(Context::OperatingMode Mode) {
|
||||
{0x78, 1, nullptr}, // GROUP 17
|
||||
{0x7C, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VFADDP, 8>},
|
||||
{0x7D, 1, &OpDispatchBuilder::HSUBP<8>},
|
||||
{0x7E, 1, &OpDispatchBuilder::MOVBetweenGPR_FPR},
|
||||
{0x7E, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::MOVBetweenGPR_FPR, OpDispatchBuilder::VectorOpType::SSE>},
|
||||
{0x7F, 1, &OpDispatchBuilder::MOVVectorAlignedOp},
|
||||
{0xC2, 1, &OpDispatchBuilder::VFCMPOp<8>},
|
||||
{0xC4, 1, &OpDispatchBuilder::PINSROp<2>},
|
||||
@ -5987,7 +5987,7 @@ void InstallOpcodeHandlers(Context::OperatingMode Mode) {
|
||||
{0xD3, 1, &OpDispatchBuilder::PSRLDOp<8>},
|
||||
{0xD4, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VADD, 8>},
|
||||
{0xD5, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VMUL, 2>},
|
||||
{0xD6, 1, &OpDispatchBuilder::MOVQOp},
|
||||
{0xD6, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::MOVQOp, OpDispatchBuilder::VectorOpType::SSE>},
|
||||
{0xD7, 1, &OpDispatchBuilder::MOVMSKOpOne}, // PMOVMSKB
|
||||
{0xD8, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VUQSUB, 1>},
|
||||
{0xD9, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VUQSUB, 2>},
|
||||
|
@ -432,6 +432,11 @@ public:
|
||||
void SGDTOp(OpcodeArgs);
|
||||
void SMSWOp(OpcodeArgs);
|
||||
|
||||
enum class VectorOpType {
|
||||
MMX,
|
||||
SSE,
|
||||
AVX,
|
||||
};
|
||||
// SSE
|
||||
void MOVLPOp(OpcodeArgs);
|
||||
void MOVHPDOp(OpcodeArgs);
|
||||
@ -445,7 +450,7 @@ public:
|
||||
template<FEXCore::IR::IROps IROp, size_t ElementSize>
|
||||
void VectorUnaryDuplicateOp(OpcodeArgs);
|
||||
|
||||
void MOVQOp(OpcodeArgs);
|
||||
void MOVQOp(OpcodeArgs, VectorOpType VectorType);
|
||||
void MOVQMMXOp(OpcodeArgs);
|
||||
template<size_t ElementSize>
|
||||
void MOVMSKOp(OpcodeArgs);
|
||||
@ -489,7 +494,7 @@ public:
|
||||
template<size_t SrcElementSize, bool Narrow, bool HostRoundingMode>
|
||||
void XMM_To_MMX_Vector_CVT_Float_To_Int(OpcodeArgs);
|
||||
void MASKMOVOp(OpcodeArgs);
|
||||
void MOVBetweenGPR_FPR(OpcodeArgs);
|
||||
void MOVBetweenGPR_FPR(OpcodeArgs, VectorOpType VectorType);
|
||||
void TZCNT(OpcodeArgs);
|
||||
void LZCNT(OpcodeArgs);
|
||||
template<size_t ElementSize>
|
||||
@ -1171,6 +1176,36 @@ public:
|
||||
void AVX128_VCVTPS2PH(OpcodeArgs);
|
||||
|
||||
// End of AVX 128-bit implementation
|
||||
|
||||
// AVX 256-bit operations
|
||||
void StoreResult_WithAVXInsert(VectorOpType Type, FEXCore::IR::RegisterClassType Class, FEXCore::X86Tables::DecodedOp Op, Ref Value,
|
||||
int8_t Align, MemoryAccessType AccessType = MemoryAccessType::DEFAULT) {
|
||||
if (Op->Dest.IsGPR() && Op->Dest.Data.GPR.GPR >= X86State::REG_XMM_0 && Op->Dest.Data.GPR.GPR <= X86State::REG_XMM_15 &&
|
||||
GetGuestVectorLength() == Core::CPUState::XMM_AVX_REG_SIZE && Type == VectorOpType::SSE) {
|
||||
const auto gpr = Op->Dest.Data.GPR.GPR;
|
||||
const auto gprIndex = gpr - X86State::REG_XMM_0;
|
||||
auto DestVector = LoadXMMRegister(gprIndex);
|
||||
Value = _VInsElement(GetGuestVectorLength(), OpSize::i128Bit, 0, 0, DestVector, Value);
|
||||
StoreXMMRegister(gprIndex, Value);
|
||||
return;
|
||||
}
|
||||
|
||||
StoreResult(Class, Op, Value, Align, AccessType);
|
||||
}
|
||||
|
||||
void StoreXMMRegister_WithAVXInsert(VectorOpType Type, uint32_t XMM, Ref Value) {
|
||||
if (GetGuestVectorLength() == Core::CPUState::XMM_AVX_REG_SIZE && Type == VectorOpType::SSE) {
|
||||
///< SSE vector stores need to insert in the low 128-bit lane of the 256-bit register.
|
||||
auto DestVector = LoadXMMRegister(XMM);
|
||||
Value = _VInsElement(GetGuestVectorLength(), OpSize::i128Bit, 0, 0, DestVector, Value);
|
||||
StoreXMMRegister(XMM, Value);
|
||||
return;
|
||||
}
|
||||
StoreXMMRegister(XMM, Value);
|
||||
}
|
||||
|
||||
// End of AVX 256-bit implementation
|
||||
|
||||
void InvalidOp(OpcodeArgs);
|
||||
|
||||
void SetPackedRFLAG(bool Lower8, Ref Src);
|
||||
|
@ -681,7 +681,7 @@ void OpDispatchBuilder::VectorUnaryDuplicateOp(OpcodeArgs) {
|
||||
template void OpDispatchBuilder::VectorUnaryDuplicateOp<IR::OP_VFRSQRT, 4>(OpcodeArgs);
|
||||
template void OpDispatchBuilder::VectorUnaryDuplicateOp<IR::OP_VFRECP, 4>(OpcodeArgs);
|
||||
|
||||
void OpDispatchBuilder::MOVQOp(OpcodeArgs) {
|
||||
void OpDispatchBuilder::MOVQOp(OpcodeArgs, VectorOpType VectorType) {
|
||||
const auto SrcSize = Op->Src[0].IsGPR() ? 16U : GetSrcSize(Op);
|
||||
Ref Src = LoadSource_WithOpSize(FPRClass, Op, Op->Src[0], SrcSize, Op->Flags);
|
||||
// This instruction is a bit special that if the destination is a register then it'll ZEXT the 64bit source to 128bit
|
||||
@ -690,7 +690,7 @@ void OpDispatchBuilder::MOVQOp(OpcodeArgs) {
|
||||
const auto gprIndex = gpr - X86State::REG_XMM_0;
|
||||
|
||||
auto Reg = _VMov(8, Src);
|
||||
StoreXMMRegister(gprIndex, Reg);
|
||||
StoreXMMRegister_WithAVXInsert(VectorType, gprIndex, Reg);
|
||||
} else {
|
||||
// This is simple, just store the result
|
||||
StoreResult(FPRClass, Op, Src, -1);
|
||||
@ -2327,19 +2327,20 @@ void OpDispatchBuilder::VPMASKMOVOp(OpcodeArgs) {
|
||||
template void OpDispatchBuilder::VPMASKMOVOp<false>(OpcodeArgs);
|
||||
template void OpDispatchBuilder::VPMASKMOVOp<true>(OpcodeArgs);
|
||||
|
||||
void OpDispatchBuilder::MOVBetweenGPR_FPR(OpcodeArgs) {
|
||||
void OpDispatchBuilder::MOVBetweenGPR_FPR(OpcodeArgs, VectorOpType VectorType) {
|
||||
if (Op->Dest.IsGPR() && Op->Dest.Data.GPR.GPR >= FEXCore::X86State::REG_XMM_0) {
|
||||
Ref Result {};
|
||||
if (Op->Src[0].IsGPR()) {
|
||||
// Loading from GPR and moving to Vector.
|
||||
Ref Src = LoadSource_WithOpSize(FPRClass, Op, Op->Src[0], CTX->GetGPRSize(), Op->Flags);
|
||||
// zext to 128bit
|
||||
auto Converted = _VCastFromGPR(16, GetSrcSize(Op), Src);
|
||||
StoreResult(FPRClass, Op, Op->Dest, Converted, -1);
|
||||
Result = _VCastFromGPR(16, GetSrcSize(Op), Src);
|
||||
} else {
|
||||
// Loading from Memory as a scalar. Zero extend
|
||||
Ref Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags);
|
||||
StoreResult(FPRClass, Op, Op->Dest, Src, -1);
|
||||
Result = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags);
|
||||
}
|
||||
|
||||
StoreResult_WithAVXInsert(VectorType, FPRClass, Op, Result, -1);
|
||||
} else {
|
||||
Ref Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags);
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user