OpcodeDispatcher: Handle VSQRTPD

This commit is contained in:
lioncash 2022-12-07 19:35:47 +00:00
parent 2125949d6d
commit 25428cb28c
3 changed files with 96 additions and 1 deletions

View File

@ -5827,6 +5827,7 @@ void OpDispatchBuilder::InstallHostSpecificOpcodeHandlers() {
{OPD(1, 0b01, 0x2F), 1, &OpDispatchBuilder::UCOMISxOp<8>},
{OPD(1, 0b00, 0x51), 1, &OpDispatchBuilder::AVXVectorUnaryOp<IR::OP_VFSQRT, 4, false>},
{OPD(1, 0b01, 0x51), 1, &OpDispatchBuilder::AVXVectorUnaryOp<IR::OP_VFSQRT, 8, false>},
{OPD(1, 0b00, 0x54), 1, &OpDispatchBuilder::AVXVectorALUOp<IR::OP_VAND, 16>},
{OPD(1, 0b01, 0x54), 1, &OpDispatchBuilder::AVXVectorALUOp<IR::OP_VAND, 16>},

View File

@ -52,7 +52,7 @@ void InitializeVEXTables() {
{OPD(1, 0b01, 0x50), 1, X86InstInfo{"VMOVMSKPD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}},
{OPD(1, 0b00, 0x51), 1, X86InstInfo{"VSQRTPS", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}},
{OPD(1, 0b01, 0x51), 1, X86InstInfo{"VSQRTPD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}},
{OPD(1, 0b01, 0x51), 1, X86InstInfo{"VSQRTPD", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}},
{OPD(1, 0b10, 0x51), 1, X86InstInfo{"VSQRTSS", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}},
{OPD(1, 0b11, 0x51), 1, X86InstInfo{"VSQRTSD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}},

View File

@ -0,0 +1,94 @@
%ifdef CONFIG
{
"HostFeatures": ["AVX"],
"RegData": {
"XMM0": ["0x3FF0000000000000", "0x3FF0000000000000", "0x0000000000000000", "0x0000000000000000"],
"XMM1": ["0x4000000000000000", "0x4000000000000000", "0x0000000000000000", "0x0000000000000000"],
"XMM2": ["0x4008000000000000", "0x4008000000000000", "0x0000000000000000", "0x0000000000000000"],
"XMM3": ["0x4010000000000000", "0x4010000000000000", "0x0000000000000000", "0x0000000000000000"],
"XMM4": ["0x3FF0000000000000", "0x3FF0000000000000", "0x0000000000000000", "0x0000000000000000"],
"XMM5": ["0x4000000000000000", "0x4000000000000000", "0x0000000000000000", "0x0000000000000000"],
"XMM6": ["0x4008000000000000", "0x4008000000000000", "0x0000000000000000", "0x0000000000000000"],
"XMM7": ["0x4010000000000000", "0x4010000000000000", "0x0000000000000000", "0x0000000000000000"],
"XMM8": ["0x3FF0000000000000", "0x3FF0000000000000", "0x3FF0000000000000", "0x3FF0000000000000"],
"XMM9": ["0x4000000000000000", "0x4000000000000000", "0x4000000000000000", "0x4000000000000000"],
"XMM10": ["0x4008000000000000", "0x4008000000000000", "0x4008000000000000", "0x4008000000000000"],
"XMM11": ["0x4010000000000000", "0x4010000000000000", "0x4010000000000000", "0x4010000000000000"],
"XMM12": ["0x3FF0000000000000", "0x3FF0000000000000", "0x3FF0000000000000", "0x3FF0000000000000"],
"XMM13": ["0x4000000000000000", "0x4000000000000000", "0x4000000000000000", "0x4000000000000000"],
"XMM14": ["0x4008000000000000", "0x4008000000000000", "0x4008000000000000", "0x4008000000000000"],
"XMM15": ["0x4010000000000000", "0x4010000000000000", "0x4010000000000000", "0x4010000000000000"]
},
"MemoryRegions": {
"0x100000000": "4096"
}
}
%endif
lea rdx, [rel .data]
vmovapd ymm0, [rdx + 32 * 0]
vmovapd ymm1, [rdx + 32 * 1]
vmovapd ymm2, [rdx + 32 * 2]
vmovapd ymm3, [rdx + 32 * 3]
vmovapd ymm4, [rdx + 32 * 4]
vmovapd ymm5, [rdx + 32 * 4]
vmovapd ymm6, [rdx + 32 * 4]
vmovapd ymm7, [rdx + 32 * 4]
vmovapd ymm8, [rdx + 32 * 0]
vmovapd ymm9, [rdx + 32 * 1]
vmovapd ymm10, [rdx + 32 * 2]
vmovapd ymm11, [rdx + 32 * 3]
; 128-bit registers
vsqrtpd xmm0, xmm0
vsqrtpd xmm1, xmm1
vsqrtpd xmm2, xmm2
vsqrtpd xmm3, xmm3
; 256-bit registers
vsqrtpd ymm8, ymm8
vsqrtpd ymm9, ymm9
vsqrtpd ymm10, ymm10
vsqrtpd ymm11, ymm11
; 128-bit memory operand
vsqrtpd xmm4, [rdx + 32 * 0]
vsqrtpd xmm5, [rdx + 32 * 1]
vsqrtpd xmm6, [rdx + 32 * 2]
vsqrtpd xmm7, [rdx + 32 * 3]
; 256-bit memory operand
vsqrtpd ymm12, [rdx + 32 * 0]
vsqrtpd ymm13, [rdx + 32 * 1]
vsqrtpd ymm14, [rdx + 32 * 2]
vsqrtpd ymm15, [rdx + 32 * 3]
hlt
align 32
.data:
dq 0x3FF0000000000000 ; 1.0
dq 0x3FF0000000000000
dq 0x3FF0000000000000
dq 0x3FF0000000000000
dq 0x4010000000000000 ; 4.0
dq 0x4010000000000000
dq 0x4010000000000000
dq 0x4010000000000000
dq 0x4022000000000000 ; 9.0
dq 0x4022000000000000
dq 0x4022000000000000
dq 0x4022000000000000
dq 0x4030000000000000 ; 16.0
dq 0x4030000000000000
dq 0x4030000000000000
dq 0x4030000000000000
dq 0x4039000000000000 ; 25.0
dq 0x4039000000000000
dq 0x4039000000000000
dq 0x4039000000000000