Merge pull request #2230 from lioncash/div

OpcodeDispatcher: Handle VDIVP{D, S}/VDIVS{D, S}
This commit is contained in:
Ryan Houdek 2022-12-12 20:40:26 -08:00 committed by GitHub
commit a07a533640
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 243 additions and 4 deletions

View File

@ -5878,6 +5878,11 @@ void OpDispatchBuilder::InstallHostSpecificOpcodeHandlers() {
{OPD(1, 0b10, 0x5D), 1, &OpDispatchBuilder::AVXVectorScalarALUOp<IR::OP_VFMIN, 4>},
{OPD(1, 0b11, 0x5D), 1, &OpDispatchBuilder::AVXVectorScalarALUOp<IR::OP_VFMIN, 8>},
{OPD(1, 0b00, 0x5E), 1, &OpDispatchBuilder::AVXVectorALUOp<IR::OP_VFDIV, 4>},
{OPD(1, 0b01, 0x5E), 1, &OpDispatchBuilder::AVXVectorALUOp<IR::OP_VFDIV, 8>},
{OPD(1, 0b10, 0x5E), 1, &OpDispatchBuilder::AVXVectorScalarALUOp<IR::OP_VFDIV, 4>},
{OPD(1, 0b11, 0x5E), 1, &OpDispatchBuilder::AVXVectorScalarALUOp<IR::OP_VFDIV, 8>},
{OPD(1, 0b00, 0x5F), 1, &OpDispatchBuilder::AVXVectorALUOp<IR::OP_VFMAX, 4>},
{OPD(1, 0b01, 0x5F), 1, &OpDispatchBuilder::AVXVectorALUOp<IR::OP_VFMAX, 8>},
{OPD(1, 0b10, 0x5F), 1, &OpDispatchBuilder::AVXVectorScalarALUOp<IR::OP_VFMAX, 4>},

View File

@ -443,6 +443,10 @@ void OpDispatchBuilder::AVXVectorALUOp<IR::OP_VFADD, 4>(OpcodeArgs);
template
void OpDispatchBuilder::AVXVectorALUOp<IR::OP_VFADD, 8>(OpcodeArgs);
template
void OpDispatchBuilder::AVXVectorALUOp<IR::OP_VFDIV, 4>(OpcodeArgs);
template
void OpDispatchBuilder::AVXVectorALUOp<IR::OP_VFDIV, 8>(OpcodeArgs);
template
void OpDispatchBuilder::AVXVectorALUOp<IR::OP_VFMAX, 4>(OpcodeArgs);
template
void OpDispatchBuilder::AVXVectorALUOp<IR::OP_VFMAX, 8>(OpcodeArgs);
@ -568,6 +572,10 @@ void OpDispatchBuilder::AVXVectorScalarALUOp<IR::OP_VFADD, 4>(OpcodeArgs);
template
void OpDispatchBuilder::AVXVectorScalarALUOp<IR::OP_VFADD, 8>(OpcodeArgs);
template
void OpDispatchBuilder::AVXVectorScalarALUOp<IR::OP_VFDIV, 4>(OpcodeArgs);
template
void OpDispatchBuilder::AVXVectorScalarALUOp<IR::OP_VFDIV, 8>(OpcodeArgs);
template
void OpDispatchBuilder::AVXVectorScalarALUOp<IR::OP_VFMAX, 4>(OpcodeArgs);
template
void OpDispatchBuilder::AVXVectorScalarALUOp<IR::OP_VFMAX, 8>(OpcodeArgs);

View File

@ -160,10 +160,10 @@ void InitializeVEXTables() {
{OPD(1, 0b10, 0x5D), 1, X86InstInfo{"VMINSS", TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_32BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0, nullptr}},
{OPD(1, 0b11, 0x5D), 1, X86InstInfo{"VMINSD", TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_64BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0, nullptr}},
{OPD(1, 0b00, 0x5E), 1, X86InstInfo{"VDIVPS", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}},
{OPD(1, 0b01, 0x5E), 1, X86InstInfo{"VDIVPD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}},
{OPD(1, 0b10, 0x5E), 1, X86InstInfo{"VDIVSS", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}},
{OPD(1, 0b11, 0x5E), 1, X86InstInfo{"VDIVSD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}},
{OPD(1, 0b00, 0x5E), 1, X86InstInfo{"VDIVPS", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0, nullptr}},
{OPD(1, 0b01, 0x5E), 1, X86InstInfo{"VDIVPD", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0, nullptr}},
{OPD(1, 0b10, 0x5E), 1, X86InstInfo{"VDIVSS", TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_32BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0, nullptr}},
{OPD(1, 0b11, 0x5E), 1, X86InstInfo{"VDIVSD", TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_64BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0, nullptr}},
{OPD(1, 0b00, 0x5F), 1, X86InstInfo{"VMAXPS", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0, nullptr}},
{OPD(1, 0b01, 0x5F), 1, X86InstInfo{"VMAXPD", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0, nullptr}},

View File

@ -0,0 +1,43 @@
%ifdef CONFIG
{
"HostFeatures": ["AVX"],
"RegData": {
"XMM0": ["0x3FF0000000000000", "0x3FF0000000000000", "0x3FF0000000000000", "0x3FF0000000000000"],
"XMM1": ["0x4000000000000000", "0x4000000000000000", "0x4000000000000000", "0x4000000000000000"],
"XMM2": ["0x3FE0000000000000", "0x3FE0000000000000", "0x0000000000000000", "0x0000000000000000"],
"XMM3": ["0x3FE0000000000000", "0x3FE0000000000000", "0x0000000000000000", "0x0000000000000000"],
"XMM4": ["0x3FE0000000000000", "0x3FE0000000000000", "0x3FE0000000000000", "0x3FE0000000000000"],
"XMM5": ["0x4000000000000000", "0x4000000000000000", "0x4000000000000000", "0x4000000000000000"]
},
"MemoryRegions": {
"0x100000000": "4096"
}
}
%endif
lea rdx, [rel .data]
vmovapd ymm0, [rdx]
vmovapd ymm1, [rdx + 32]
; Memory operand
vdivpd xmm2, xmm0, [rdx + 32]
vdivpd ymm4, ymm0, [rdx + 32]
; Register only
vdivpd xmm3, xmm0, xmm1
vdivpd ymm5, ymm1, ymm0
hlt
align 32
.data:
dq 0x3FF0000000000000
dq 0x3FF0000000000000
dq 0x3FF0000000000000
dq 0x3FF0000000000000
dq 0x4000000000000000
dq 0x4000000000000000
dq 0x4000000000000000
dq 0x4000000000000000

View File

@ -0,0 +1,43 @@
%ifdef CONFIG
{
"HostFeatures": ["AVX"],
"RegData": {
"XMM0": ["0x400000003F800000", "0x4080000040400000", "0x400000003F800000", "0x4080000040400000"],
"XMM1": ["0x40C0000040A00000", "0x4100000040E00000", "0x40C0000040A00000", "0x4100000040E00000"],
"XMM2": ["0x3EAAAAAB3E4CCCCD", "0x3F0000003EDB6DB7", "0x0000000000000000", "0x0000000000000000"],
"XMM3": ["0x3EAAAAAB3E4CCCCD", "0x3F0000003EDB6DB7", "0x0000000000000000", "0x0000000000000000"],
"XMM4": ["0x3EAAAAAB3E4CCCCD", "0x3F0000003EDB6DB7", "0x3EAAAAAB3E4CCCCD", "0x3F0000003EDB6DB7"],
"XMM5": ["0x4040000040A00000", "0x4000000040155555", "0x4040000040A00000", "0x4000000040155555"]
},
"MemoryRegions": {
"0x100000000": "4096"
}
}
%endif
lea rdx, [rel .data]
vmovapd ymm0, [rdx]
vmovapd ymm1, [rdx + 32]
; Memory operand
vdivps xmm2, xmm0, [rdx + 32]
vdivps ymm4, ymm0, [rdx + 32]
; Register only
vdivps xmm3, xmm0, xmm1
vdivps ymm5, ymm1, ymm0
hlt
align 32
.data:
dq 0x400000003F800000 ; 2, 1
dq 0x4080000040400000 ; 4, 3
dq 0x400000003F800000 ; 2, 1
dq 0x4080000040400000 ; 4, 3
dq 0x40C0000040A00000 ; 6, 5
dq 0x4100000040E00000 ; 8, 7
dq 0x40C0000040A00000 ; 6, 5
dq 0x4100000040E00000 ; 8, 7

View File

@ -0,0 +1,70 @@
%ifdef CONFIG
{
"HostFeatures": ["AVX"],
"RegData": {
"XMM0": ["0x3FD0000000000000", "0x5152535455565758", "0x0000000000000000", "0x0000000000000000"],
"XMM1": ["0x4010000000000000", "0x5152535455565758", "0x5152535455565758", "0x5152535455565758"],
"XMM2": ["0x3FE2000000000000", "0x5152535455565758", "0x0000000000000000", "0x0000000000000000"],
"XMM3": ["0x4030000000000000", "0x5152535455565758", "0x5152535455565758", "0x5152535455565758"],
"XMM4": ["0x4039000000000000", "0x5152535455565758", "0x0000000000000000", "0x0000000000000000"],
"XMM5": ["0x4019000000000000", "0x5152535455565758", "0x0000000000000000", "0x0000000000000000"],
"XMM7": ["0x3FE47AE147AE147B", "0x5152535455565758", "0x0000000000000000", "0x0000000000000000"],
"XMM8": ["0x4030000000000000", "0x5152535455565758", "0x5152535455565758", "0x5152535455565758"],
"XMM9": ["0x4039000000000000", "0x5152535455565758", "0x5152535455565758", "0x5152535455565758"]
},
"MemoryRegions": {
"0x100000000": "4096"
}
}
%endif
lea rdx, [rel .data]
vmovapd ymm0, [rdx + 32 * 0]
vmovapd ymm1, [rdx + 32 * 1]
vmovapd ymm2, [rdx + 32 * 2]
vmovapd ymm3, [rdx + 32 * 3]
vmovapd ymm4, [rdx + 32 * 4]
; Register only
vdivsd xmm0, xmm0, xmm1
vdivsd xmm2, xmm2, xmm3
; Memory operand
vdivsd xmm5, xmm4, [rdx + 32 * 1]
vdivsd xmm4, xmm4, [rdx + 32 * 0]
; Merging different src into destination
vpxor xmm7, xmm7, xmm7
vmovapd ymm8, [rdx + 32 * 3]
vmovapd ymm9, [rdx + 32 * 4]
vdivsd xmm7, xmm8, xmm9
hlt
align 32
.data:
dq 0x3FF0000000000000 ; 1.0
dq 0x5152535455565758
dq 0x5152535455565758
dq 0x5152535455565758
dq 0x4010000000000000 ; 4.0
dq 0x5152535455565758
dq 0x5152535455565758
dq 0x5152535455565758
dq 0x4022000000000000 ; 9.0
dq 0x5152535455565758
dq 0x5152535455565758
dq 0x5152535455565758
dq 0x4030000000000000 ; 16.0
dq 0x5152535455565758
dq 0x5152535455565758
dq 0x5152535455565758
dq 0x4039000000000000 ; 25.0
dq 0x5152535455565758
dq 0x5152535455565758
dq 0x5152535455565758

View File

@ -0,0 +1,70 @@
%ifdef CONFIG
{
"HostFeatures": ["AVX"],
"RegData": {
"XMM0": ["0x414243443E800000", "0x5152535455565758", "0x0000000000000000", "0x0000000000000000"],
"XMM1": ["0x4142434440800000", "0x5152535455565758", "0x5152535455565758", "0x5152535455565758"],
"XMM2": ["0x414243443F100000", "0x5152535455565758", "0x0000000000000000", "0x0000000000000000"],
"XMM3": ["0x4142434441800000", "0x5152535455565758", "0x5152535455565758", "0x5152535455565758"],
"XMM4": ["0x4142434441C80000", "0x5152535455565758", "0x0000000000000000", "0x0000000000000000"],
"XMM5": ["0x4142434440C80000", "0x5152535455565758", "0x0000000000000000", "0x0000000000000000"],
"XMM7": ["0x414243443F23D70A", "0x5152535455565758", "0x0000000000000000", "0x0000000000000000"],
"XMM8": ["0x4142434441800000", "0x5152535455565758", "0x5152535455565758", "0x5152535455565758"],
"XMM9": ["0x4142434441C80000", "0x5152535455565758", "0x5152535455565758", "0x5152535455565758"]
},
"MemoryRegions": {
"0x100000000": "4096"
}
}
%endif
lea rdx, [rel .data]
vmovapd ymm0, [rdx + 32 * 0]
vmovapd ymm1, [rdx + 32 * 1]
vmovapd ymm2, [rdx + 32 * 2]
vmovapd ymm3, [rdx + 32 * 3]
vmovapd ymm4, [rdx + 32 * 4]
; Register only
vdivss xmm0, xmm0, xmm1
vdivss xmm2, xmm2, xmm3
; Memory operand
vdivss xmm5, xmm4, [rdx + 32 * 1]
vdivss xmm4, xmm4, [rdx + 32 * 0]
; Merging different src into destination
vpxor xmm7, xmm7, xmm7
vmovapd ymm8, [rdx + 32 * 3]
vmovapd ymm9, [rdx + 32 * 4]
vdivss xmm7, xmm8, xmm9
hlt
align 32
.data:
dq 0x414243443F800000 ; 1.0
dq 0x5152535455565758
dq 0x5152535455565758
dq 0x5152535455565758
dq 0x4142434440800000 ; 4.0
dq 0x5152535455565758
dq 0x5152535455565758
dq 0x5152535455565758
dq 0x4142434441100000 ; 9.0
dq 0x5152535455565758
dq 0x5152535455565758
dq 0x5152535455565758
dq 0x4142434441800000 ; 16.0
dq 0x5152535455565758
dq 0x5152535455565758
dq 0x5152535455565758
dq 0x4142434441C80000 ; 25.0
dq 0x5152535455565758
dq 0x5152535455565758
dq 0x5152535455565758