OpcodeDispatcher: Handle load variants of VMASKMOVP{D, S}

This commit is contained in:
Lioncache 2023-03-27 21:21:06 -04:00
parent eb8626c1f7
commit 25960fe6b1
6 changed files with 148 additions and 2 deletions

View File

@ -5895,6 +5895,8 @@ void OpDispatchBuilder::InstallHostSpecificOpcodeHandlers() {
{OPD(2, 0b01, 0x29), 1, &OpDispatchBuilder::AVXVectorALUOp<IR::OP_VCMPEQ, 8>},
{OPD(2, 0b01, 0x2A), 1, &OpDispatchBuilder::VMOVVectorNTOp},
{OPD(2, 0b01, 0x2B), 1, &OpDispatchBuilder::VPACKUSOp<4>},
{OPD(2, 0b01, 0x2C), 1, &OpDispatchBuilder::VMASKMOVOp<4>},
{OPD(2, 0b01, 0x2D), 1, &OpDispatchBuilder::VMASKMOVOp<8>},
{OPD(2, 0b01, 0x30), 1, &OpDispatchBuilder::AVXExtendVectorElements<1, 2, false>},
{OPD(2, 0b01, 0x31), 1, &OpDispatchBuilder::AVXExtendVectorElements<1, 4, false>},

View File

@ -459,6 +459,9 @@ public:
void VINSERTOp(OpcodeArgs);
void VINSERTPSOp(OpcodeArgs);
template <size_t ElementSize>
void VMASKMOVOp(OpcodeArgs);
void VMOVAPS_VMOVAPD_Op(OpcodeArgs);
void VMOVUPS_VMOVUPD_Op(OpcodeArgs);
@ -906,6 +909,10 @@ private:
const X86Tables::DecodedOperand& Src2,
const X86Tables::DecodedOperand& Imm);
OrderedNode* VMASKMOVOpImpl(OpcodeArgs, size_t ElementSize,
const X86Tables::DecodedOperand& MaskOp,
const X86Tables::DecodedOperand& MemoryOp);
void VMOVScalarOpImpl(OpcodeArgs, size_t ElementSize);
OrderedNode* VFCMPOpImpl(OpcodeArgs, size_t ElementSize, bool Scalar,

View File

@ -2261,6 +2261,27 @@ void OpDispatchBuilder::MASKMOVOp(OpcodeArgs) {
}
}
OrderedNode* OpDispatchBuilder::VMASKMOVOpImpl(OpcodeArgs, size_t ElementSize,
const X86Tables::DecodedOperand& MaskOp,
const X86Tables::DecodedOperand& MemoryOp) {
const auto DstSize = GetDstSize(Op);
OrderedNode *Mask = LoadSource(FPRClass, Op, MaskOp, Op->Flags, -1);
OrderedNode *BaseAddr = LoadSource_WithOpSize(GPRClass, Op, MemoryOp, CTX->GetGPRSize(), Op->Flags, -1, false);
OrderedNode *CorrectedAddr = AppendSegmentOffset(BaseAddr, Op->Flags);
return _VLoadVectorMasked(DstSize, ElementSize, Mask, CorrectedAddr, Invalid(), MEM_OFFSET_SXTX, 1);
}
template <size_t ElementSize>
void OpDispatchBuilder::VMASKMOVOp(OpcodeArgs) {
OrderedNode *Result = VMASKMOVOpImpl(Op, ElementSize, Op->Src[0], Op->Src[1]);
StoreResult(FPRClass, Op, Result, -1);
}
template
void OpDispatchBuilder::VMASKMOVOp<4>(OpcodeArgs);
template
void OpDispatchBuilder::VMASKMOVOp<8>(OpcodeArgs);
void OpDispatchBuilder::MOVBetweenGPR_FPR(OpcodeArgs) {
if (Op->Dest.IsGPR() &&
Op->Dest.Data.GPR.GPR >= FEXCore::X86State::REG_XMM_0) {

View File

@ -299,8 +299,8 @@ void InitializeVEXTables() {
{OPD(2, 0b01, 0x29), 1, X86InstInfo{"VPCMPEQQ", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0, nullptr}},
{OPD(2, 0b01, 0x2A), 1, X86InstInfo{"VMOVNTDQA", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY | FLAGS_XMM_FLAGS, 0, nullptr}},
{OPD(2, 0b01, 0x2B), 1, X86InstInfo{"VPACKUSDW", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0, nullptr}},
{OPD(2, 0b01, 0x2C), 1, X86InstInfo{"VMASKMOVPS", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}},
{OPD(2, 0b01, 0x2D), 1, X86InstInfo{"VMASKMOVPD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}},
{OPD(2, 0b01, 0x2C), 1, X86InstInfo{"VMASKMOVPS", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0, nullptr}},
{OPD(2, 0b01, 0x2D), 1, X86InstInfo{"VMASKMOVPD", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0, nullptr}},
{OPD(2, 0b01, 0x2E), 1, X86InstInfo{"VMASKMOVPS", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}},
{OPD(2, 0b01, 0x2F), 1, X86InstInfo{"VMASKMOVPD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}},

View File

@ -0,0 +1,58 @@
%ifdef CONFIG
{
"HostFeatures": ["AVX"],
"RegData": {
"XMM0": ["0x8868C3F30AED56E0", "0x10FCE9E284E6E6DE", "0x1DDDDDDD8DDDDDDD", "0x8CCCCCCC0CCCCCCC"],
"XMM1": ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
"XMM2": ["0x8000000080000000", "0x8000000080000000", "0x8000000080000000", "0x8000000080000000"],
"XMM3": ["0xA76C4F06A12BFCE0", "0x0000000000000000", "0x0000000000000000", "0xEEEEEEEEEEEEEEEE"],
"XMM4": ["0xA76C4F06A12BFCE0", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
"XMM5": ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
"XMM6": ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
"XMM7": ["0xA76C4F06A12BFCE0", "0x9B80767F1E6A060F", "0xFFFFFFFFFFFFFFFF", "0xEEEEEEEEEEEEEEEE"],
"XMM8": ["0xA76C4F06A12BFCE0", "0x9B80767F1E6A060F", "0x0000000000000000", "0x0000000000000000"]
}
}
%endif
lea rdx, [rel .data]
vmovaps ymm0, [rdx + 32]
vmovaps ymm1, [rdx + 64]
vmovaps ymm2, [rdx + 96]
vmaskmovpd ymm3, ymm0, [rdx]
vmaskmovpd xmm4, xmm0, [rdx]
vmaskmovpd ymm5, ymm1, [rdx]
vmaskmovpd xmm6, xmm1, [rdx]
vmaskmovpd ymm7, ymm2, [rdx]
vmaskmovpd xmm8, xmm2, [rdx]
hlt
align 32
.data:
dq 0xA76C4F06A12BFCE0
dq 0x9B80767F1E6A060F
dq 0xFFFFFFFFFFFFFFFF
dq 0xEEEEEEEEEEEEEEEE
; Disastrously organized mask (sign mask [1, 0, 0, 1])
dq 0x8868C3F30AED56E0
dq 0x10FCE9E284E6E6DE
dq 0x1DDDDDDD8DDDDDDD
dq 0x8CCCCCCC0CCCCCCC
; No masking at all. Should not touch memory at all.
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000
; Select all elements
dq 0x8000000080000000
dq 0x8000000080000000
dq 0x8000000080000000
dq 0x8000000080000000

View File

@ -0,0 +1,58 @@
%ifdef CONFIG
{
"HostFeatures": ["AVX"],
"RegData": {
"XMM0": ["0x0868C3F30AED56E0", "0x80FCE9E284E6E6DE", "0x8DDDDDDD8DDDDDDD", "0x0CCCCCCC0CCCCCCC"],
"XMM1": ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
"XMM2": ["0x8000000080000000", "0x8000000080000000", "0x8000000080000000", "0x8000000080000000"],
"XMM3": ["0x0000000000000000", "0x9B80767F1E6A060F", "0xFFFFFFFFFFFFFFFF", "0x0000000000000000"],
"XMM4": ["0x0000000000000000", "0x9B80767F1E6A060F", "0x0000000000000000", "0x0000000000000000"],
"XMM5": ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
"XMM6": ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
"XMM7": ["0xA76C4F06A12BFCE0", "0x9B80767F1E6A060F", "0xFFFFFFFFFFFFFFFF", "0xEEEEEEEEEEEEEEEE"],
"XMM8": ["0xA76C4F06A12BFCE0", "0x9B80767F1E6A060F", "0x0000000000000000", "0x0000000000000000"]
}
}
%endif
lea rdx, [rel .data]
vmovaps ymm0, [rdx + 32]
vmovaps ymm1, [rdx + 64]
vmovaps ymm2, [rdx + 96]
vmaskmovps ymm3, ymm0, [rdx]
vmaskmovps xmm4, xmm0, [rdx]
vmaskmovps ymm5, ymm1, [rdx]
vmaskmovps xmm6, xmm1, [rdx]
vmaskmovps ymm7, ymm2, [rdx]
vmaskmovps xmm8, xmm2, [rdx]
hlt
align 32
.data:
dq 0xA76C4F06A12BFCE0
dq 0x9B80767F1E6A060F
dq 0xFFFFFFFFFFFFFFFF
dq 0xEEEEEEEEEEEEEEEE
; Disastrously organized mask (sign mask [0, 0, 1, 1, 1, 1, 0, 0])
dq 0x0868C3F30AED56E0
dq 0x80FCE9E284E6E6DE
dq 0x8DDDDDDD8DDDDDDD
dq 0x0CCCCCCC0CCCCCCC
; No masking at all. Should not touch memory at all.
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000
; Select all elements
dq 0x8000000080000000
dq 0x8000000080000000
dq 0x8000000080000000
dq 0x8000000080000000