OpcodeDispatcher: Handle VZEROUPPER/VZEROALL

This commit is contained in:
lioncash 2022-11-29 14:59:10 +00:00
parent b35c6c6d22
commit 3e80416eb6
6 changed files with 140 additions and 2 deletions

View File

@ -5834,7 +5834,7 @@ void OpDispatchBuilder::InstallHostSpecificOpcodeHandlers() {
{OPD(1, 0b01, 0x74), 3, &OpDispatchBuilder::UnimplementedOp},
{OPD(1, 0b00, 0x77), 1, &OpDispatchBuilder::UnimplementedOp},
{OPD(1, 0b00, 0x77), 1, &OpDispatchBuilder::VZEROOp},
{OPD(1, 0b01, 0x7E), 1, &OpDispatchBuilder::MOVBetweenGPR_FPR},
{OPD(1, 0b10, 0x7E), 1, &OpDispatchBuilder::MOVQOp},

View File

@ -419,6 +419,8 @@ public:
void VMOVVectorNTOp(OpcodeArgs);
void VZEROOp(OpcodeArgs);
// X87 Ops
template<size_t width>
void FLD(OpcodeArgs);

View File

@ -2701,4 +2701,28 @@ void OpDispatchBuilder::MPSADBWOp(OpcodeArgs) {
StoreResult(FPRClass, Op, Result, -1);
}
void OpDispatchBuilder::VZEROOp(OpcodeArgs) {
const auto DstSize = GetDstSize(Op);
const auto IsVZEROALL = DstSize == Core::CPUState::XMM_AVX_REG_SIZE;
const auto NumRegs = CTX->Config.Is64BitMode ? 16U : 8U;
if (IsVZEROALL) {
// NOTE: Despite the name being VZEROALL, this will still only ever
// zero out up to the first 16 registers (even on AVX-512, where we have 32 registers)
OrderedNode* ZeroVector = _VectorZero(DstSize);
for (uint32_t i = 0; i < NumRegs; i++) {
StoreXMMRegister(i, ZeroVector);
}
} else {
// Likewise, VZEROUPPER will only ever zero only up to the first 16 registers
for (uint32_t i = 0; i < NumRegs; i++) {
OrderedNode* Reg = LoadXMMRegister(i);
OrderedNode* Dst = _VMov(16, Reg);
StoreXMMRegister(i, Dst);
}
}
}
}

View File

@ -95,7 +95,7 @@ void InitializeVEXTables() {
{OPD(1, 0b01, 0x75), 1, X86InstInfo{"VPCMPEQW", TYPE_INST, FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}},
{OPD(1, 0b01, 0x76), 1, X86InstInfo{"VPCMPEQD", TYPE_INST, FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}},
{OPD(1, 0b00, 0x77), 1, X86InstInfo{"VZERO*", TYPE_INST, FLAGS_NONE, 0, nullptr}},
{OPD(1, 0b00, 0x77), 1, X86InstInfo{"VZERO*", TYPE_INST, GenFlagsDstSize(SIZE_128BIT), 0, nullptr}},
{OPD(1, 0b00, 0xC2), 1, X86InstInfo{"VCMPccPS", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}},
{OPD(1, 0b01, 0xC2), 1, X86InstInfo{"VCMPccPD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}},

View File

@ -0,0 +1,56 @@
%ifdef CONFIG
{
"HostFeatures": ["AVX"],
"RegData": {
"XMM0": ["0", "0", "0", "0"],
"XMM1": ["0", "0", "0", "0"],
"XMM2": ["0", "0", "0", "0"],
"XMM3": ["0", "0", "0", "0"],
"XMM4": ["0", "0", "0", "0"],
"XMM5": ["0", "0", "0", "0"],
"XMM6": ["0", "0", "0", "0"],
"XMM7": ["0", "0", "0", "0"],
"XMM8": ["0", "0", "0", "0"],
"XMM9": ["0", "0", "0", "0"],
"XMM10": ["0", "0", "0", "0"],
"XMM11": ["0", "0", "0", "0"],
"XMM12": ["0", "0", "0", "0"],
"XMM13": ["0", "0", "0", "0"],
"XMM14": ["0", "0", "0", "0"],
"XMM15": ["0", "0", "0", "0"]
},
"MemoryRegions": {
"0x100000000": "4096"
}
}
%endif
lea rdx, [rel .data]
vmovapd ymm0, [rdx]
vmovapd ymm1, [rdx]
vmovapd ymm2, [rdx]
vmovapd ymm3, [rdx]
vmovapd ymm4, [rdx]
vmovapd ymm5, [rdx]
vmovapd ymm6, [rdx]
vmovapd ymm7, [rdx]
vmovapd ymm8, [rdx]
vmovapd ymm9, [rdx]
vmovapd ymm10, [rdx]
vmovapd ymm11, [rdx]
vmovapd ymm12, [rdx]
vmovapd ymm13, [rdx]
vmovapd ymm14, [rdx]
vmovapd ymm15, [rdx]
vzeroall
hlt
align 32
.data:
dq 0x4142434445464748
dq 0x5152535455565758
dq 0x6162636465666768
dq 0x7172737475767778

View File

@ -0,0 +1,56 @@
%ifdef CONFIG
{
"HostFeatures": ["AVX"],
"RegData": {
"XMM0": ["0x4142434445464748", "0x5152535455565758", "0", "0"],
"XMM1": ["0x4142434445464748", "0x5152535455565758", "0", "0"],
"XMM2": ["0x4142434445464748", "0x5152535455565758", "0", "0"],
"XMM3": ["0x4142434445464748", "0x5152535455565758", "0", "0"],
"XMM4": ["0x4142434445464748", "0x5152535455565758", "0", "0"],
"XMM5": ["0x4142434445464748", "0x5152535455565758", "0", "0"],
"XMM6": ["0x4142434445464748", "0x5152535455565758", "0", "0"],
"XMM7": ["0x4142434445464748", "0x5152535455565758", "0", "0"],
"XMM8": ["0x4142434445464748", "0x5152535455565758", "0", "0"],
"XMM9": ["0x4142434445464748", "0x5152535455565758", "0", "0"],
"XMM10": ["0x4142434445464748", "0x5152535455565758", "0", "0"],
"XMM11": ["0x4142434445464748", "0x5152535455565758", "0", "0"],
"XMM12": ["0x4142434445464748", "0x5152535455565758", "0", "0"],
"XMM13": ["0x4142434445464748", "0x5152535455565758", "0", "0"],
"XMM14": ["0x4142434445464748", "0x5152535455565758", "0", "0"],
"XMM15": ["0x4142434445464748", "0x5152535455565758", "0", "0"]
},
"MemoryRegions": {
"0x100000000": "4096"
}
}
%endif
lea rdx, [rel .data]
vmovapd ymm0, [rdx]
vmovapd ymm1, [rdx]
vmovapd ymm2, [rdx]
vmovapd ymm3, [rdx]
vmovapd ymm4, [rdx]
vmovapd ymm5, [rdx]
vmovapd ymm6, [rdx]
vmovapd ymm7, [rdx]
vmovapd ymm8, [rdx]
vmovapd ymm9, [rdx]
vmovapd ymm10, [rdx]
vmovapd ymm11, [rdx]
vmovapd ymm12, [rdx]
vmovapd ymm13, [rdx]
vmovapd ymm14, [rdx]
vmovapd ymm15, [rdx]
vzeroupper
hlt
align 32
.data:
dq 0x4142434445464748
dq 0x5152535455565758
dq 0x6162636465666768
dq 0x7172737475767778