OpcodeDispatcher: Handle VCMPPS

This commit is contained in:
lioncash 2022-12-21 05:35:19 +00:00
parent 2c5aceb9b6
commit 10a6b5794b
6 changed files with 159 additions and 1 deletions

View File

@ -5919,6 +5919,8 @@ void OpDispatchBuilder::InstallHostSpecificOpcodeHandlers() {
{OPD(1, 0b01, 0x7F), 1, &OpDispatchBuilder::VMOVAPS_VMOVAPD_Op},
{OPD(1, 0b10, 0x7F), 1, &OpDispatchBuilder::VMOVUPS_VMOVUPD_Op},
{OPD(1, 0b00, 0xC2), 1, &OpDispatchBuilder::AVXVFCMPOp<4, false>},
{OPD(1, 0b01, 0xC5), 1, &OpDispatchBuilder::PExtrOp<2>},
{OPD(1, 0b01, 0xD1), 1, &OpDispatchBuilder::VPSRLDOp<2>},

View File

@ -419,6 +419,9 @@ public:
template <size_t SrcElementSize, bool Widen>
void AVXVector_CVT_Int_To_Float(OpcodeArgs);
template <size_t ElementSize, bool Scalar>
void AVXVFCMPOp(OpcodeArgs);
template <size_t ElementSize>
void VADDSUBPOp(OpcodeArgs);

View File

@ -2008,6 +2008,27 @@ void OpDispatchBuilder::VFCMPOp<8, false>(OpcodeArgs);
template
void OpDispatchBuilder::VFCMPOp<8, true>(OpcodeArgs);
template <size_t ElementSize, bool Scalar>
void OpDispatchBuilder::AVXVFCMPOp(OpcodeArgs) {
const auto DstSize = GetDstSize(Op);
const auto Is128Bit = DstSize == Core::CPUState::XMM_SSE_REG_SIZE;
LOGMAN_THROW_A_FMT(Op->Src[2].IsLiteral(), "Src[2] needs to be literal");
const uint8_t CompType = Op->Src[2].Data.Literal.Value;
OrderedNode *Src1 = LoadSource_WithOpSize(FPRClass, Op, Op->Src[0], DstSize, Op->Flags, -1);
OrderedNode *Src2 = LoadSource(FPRClass, Op, Op->Src[1], Op->Flags, -1);
OrderedNode *Result = VFCMPOpImpl(Op, ElementSize, Scalar, Src1, Src2, CompType);
if (Is128Bit) {
Result = _VMov(16, Result);
}
StoreResult(FPRClass, Op, Result, -1);
}
template
void OpDispatchBuilder::AVXVFCMPOp<4, false>(OpcodeArgs);
void OpDispatchBuilder::FXSaveOp(OpcodeArgs) {
OrderedNode *Mem = LoadSource(GPRClass, Op, Op->Dest, Op->Flags, -1, false);
Mem = AppendSegmentOffset(Mem, Op->Flags);

View File

@ -97,7 +97,7 @@ void InitializeVEXTables() {
{OPD(1, 0b00, 0x77), 1, X86InstInfo{"VZERO*", TYPE_INST, GenFlagsDstSize(SIZE_128BIT), 0, nullptr}},
{OPD(1, 0b00, 0xC2), 1, X86InstInfo{"VCMPccPS", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}},
{OPD(1, 0b00, 0xC2), 1, X86InstInfo{"VCMPccPS", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 1, nullptr}},
{OPD(1, 0b01, 0xC2), 1, X86InstInfo{"VCMPccPD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}},
{OPD(1, 0b10, 0xC2), 1, X86InstInfo{"VCMPccSS", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}},
{OPD(1, 0b11, 0xC2), 1, X86InstInfo{"VCMPccSD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}},

View File

@ -0,0 +1,66 @@
%ifdef CONFIG
{
"HostFeatures": ["AVX"],
"RegData": {
"XMM2": ["0xFFFFFFFFFFFFFFFF", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
"XMM3": ["0x0000000000000000", "0xFFFFFFFF00000000", "0x0000000000000000", "0x0000000000000000"],
"XMM4": ["0xFFFFFFFFFFFFFFFF", "0xFFFFFFFF00000000", "0x0000000000000000", "0x0000000000000000"],
"XMM5": ["0x0000000000000000", "0xFFFFFFFFFFFFFFFF", "0x0000000000000000", "0x0000000000000000"],
"XMM6": ["0xFFFFFFFFFFFFFFFF", "0x00000000FFFFFFFF", "0x0000000000000000", "0x0000000000000000"],
"XMM7": ["0x0000000000000000", "0x00000000FFFFFFFF", "0x0000000000000000", "0x0000000000000000"],
"XMM10": ["0xFFFFFFFF00000000", "0xFFFFFFFFFFFFFFFF", "0x0000000000000000", "0x0000000000000000"],
"XMM11": ["0x00000000FFFFFFFF", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"]
},
"MemoryRegions": {
"0x100000000": "4096"
}
}
%endif
lea rdx, [rel .data]
vmovapd ymm0, [rdx + 32 * 0]
vmovapd ymm1, [rdx + 32 * 1]
vcmpps xmm2, xmm0, xmm1, 0x00 ; EQ
vcmpps xmm3, xmm0, xmm1, 0x01 ; LT
vcmpps xmm4, xmm0, xmm1, 0x02 ; LTE
vcmpps xmm5, xmm0, xmm1, 0x04 ; NEQ
vcmpps xmm6, xmm0, xmm1, 0x05 ; NLT
vcmpps xmm7, xmm0, xmm1, 0x06 ; NLTE
; Unordered and Ordered tests need to be special cased
vmovapd ymm8, [rdx + 32 * 2]
vmovapd ymm9, [rdx + 32 * 3]
; Unordered will return true when either input is nan
; [0.0, 0.0, nan, nan] unord [0.0, nan, 0.0, nan] = [0, 1, 1, 1]
vcmpps xmm10, xmm8, xmm9, 0x03 ; Unordered
; Ordered will return true when both inputs are NOT nan
; [0.0, 0.0, nan, nan] ord [0.0, nan, 0.0, nan] = [1, 0, 0, 0]
vcmpps xmm11, xmm8, xmm9, 0x07 ; Ordered
hlt
align 32
.data:
dq 0x3F80000040000000
dq 0x4000000040800000
dq 0x3F80000040000000
dq 0x4000000040800000
dq 0x3F80000040000000
dq 0x40A000003F800000
dq 0x3F80000040000000
dq 0x40A000003F800000
dq 0x0000000000000000
dq 0x7FC000007FC00000
dq 0x0000000000000000
dq 0x7FC000007FC00000
dq 0x7FC0000000000000
dq 0x7FC0000000000000
dq 0x7FC0000000000000
dq 0x7FC0000000000000

View File

@ -0,0 +1,66 @@
%ifdef CONFIG
{
"HostFeatures": ["AVX"],
"RegData": {
"XMM2": ["0xFFFFFFFFFFFFFFFF", "0x0000000000000000", "0xFFFFFFFFFFFFFFFF", "0x0000000000000000"],
"XMM3": ["0x0000000000000000", "0xFFFFFFFF00000000", "0x0000000000000000", "0xFFFFFFFF00000000"],
"XMM4": ["0xFFFFFFFFFFFFFFFF", "0xFFFFFFFF00000000", "0xFFFFFFFFFFFFFFFF", "0xFFFFFFFF00000000"],
"XMM5": ["0x0000000000000000", "0xFFFFFFFFFFFFFFFF", "0x0000000000000000", "0xFFFFFFFFFFFFFFFF"],
"XMM6": ["0xFFFFFFFFFFFFFFFF", "0x00000000FFFFFFFF", "0xFFFFFFFFFFFFFFFF", "0x00000000FFFFFFFF"],
"XMM7": ["0x0000000000000000", "0x00000000FFFFFFFF", "0x0000000000000000", "0x00000000FFFFFFFF"],
"XMM10": ["0xFFFFFFFF00000000", "0xFFFFFFFFFFFFFFFF", "0xFFFFFFFF00000000", "0xFFFFFFFFFFFFFFFF"],
"XMM11": ["0x00000000FFFFFFFF", "0x0000000000000000", "0x00000000FFFFFFFF", "0x0000000000000000"]
},
"MemoryRegions": {
"0x100000000": "4096"
}
}
%endif
lea rdx, [rel .data]
vmovapd ymm0, [rdx + 32 * 0]
vmovapd ymm1, [rdx + 32 * 1]
vcmpps ymm2, ymm0, ymm1, 0x00 ; EQ
vcmpps ymm3, ymm0, ymm1, 0x01 ; LT
vcmpps ymm4, ymm0, ymm1, 0x02 ; LTE
vcmpps ymm5, ymm0, ymm1, 0x04 ; NEQ
vcmpps ymm6, ymm0, ymm1, 0x05 ; NLT
vcmpps ymm7, ymm0, ymm1, 0x06 ; NLTE
; Unordered and Ordered tests need to be special cased
vmovapd ymm8, [rdx + 32 * 2]
vmovapd ymm9, [rdx + 32 * 3]
; Unordered will return true when either input is nan
; [0.0, 0.0, nan, nan] unord [0.0, nan, 0.0, nan] = [0, 1, 1, 1]
vcmpps ymm10, ymm8, ymm9, 0x03 ; Unordered
; Ordered will return true when both inputs are NOT nan
; [0.0, 0.0, nan, nan] ord [0.0, nan, 0.0, nan] = [1, 0, 0, 0]
vcmpps ymm11, ymm8, ymm9, 0x07 ; Ordered
hlt
align 32
.data:
dq 0x3F80000040000000
dq 0x4000000040800000
dq 0x3F80000040000000
dq 0x4000000040800000
dq 0x3F80000040000000
dq 0x40A000003F800000
dq 0x3F80000040000000
dq 0x40A000003F800000
dq 0x0000000000000000
dq 0x7FC000007FC00000
dq 0x0000000000000000
dq 0x7FC000007FC00000
dq 0x7FC0000000000000
dq 0x7FC0000000000000
dq 0x7FC0000000000000
dq 0x7FC0000000000000