OpcodeDispatcher: Handle VPACKSSWB

This commit is contained in:
lioncash 2022-12-17 02:16:15 +00:00
parent 4ac031df59
commit f57debeb29
5 changed files with 70 additions and 1 deletions

View File

@ -5891,6 +5891,7 @@ void OpDispatchBuilder::InstallHostSpecificOpcodeHandlers() {
{OPD(1, 0b10, 0x5F), 1, &OpDispatchBuilder::AVXVectorScalarALUOp<IR::OP_VFMAX, 4>},
{OPD(1, 0b11, 0x5F), 1, &OpDispatchBuilder::AVXVectorScalarALUOp<IR::OP_VFMAX, 8>},
{OPD(1, 0b01, 0x63), 1, &OpDispatchBuilder::VPACKSSOp<2>},
{OPD(1, 0b01, 0x64), 1, &OpDispatchBuilder::AVXVectorALUOp<IR::OP_VCMPGT, 1>},
{OPD(1, 0b01, 0x65), 1, &OpDispatchBuilder::AVXVectorALUOp<IR::OP_VCMPGT, 2>},
{OPD(1, 0b01, 0x66), 1, &OpDispatchBuilder::AVXVectorALUOp<IR::OP_VCMPGT, 4>},

View File

@ -441,6 +441,9 @@ public:
void VMOVVectorNTOp(OpcodeArgs);
template <size_t ElementSize>
void VPACKSSOp(OpcodeArgs);
void VPERM2Op(OpcodeArgs);
void VPERMQOp(OpcodeArgs);

View File

@ -2129,6 +2129,28 @@ void OpDispatchBuilder::PACKSSOp<2>(OpcodeArgs);
template
void OpDispatchBuilder::PACKSSOp<4>(OpcodeArgs);
template<size_t ElementSize>
void OpDispatchBuilder::VPACKSSOp(OpcodeArgs) {
const auto DstSize = GetDstSize(Op);
const auto Is128Bit = DstSize == Core::CPUState::XMM_SSE_REG_SIZE;
OrderedNode *Src1 = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags, -1);
OrderedNode *Src2 = LoadSource(FPRClass, Op, Op->Src[1], Op->Flags, -1);
OrderedNode *Result = PACKSSOpImpl(Op, ElementSize, Src1, Src2);
if (Is128Bit) {
Result = _VMov(16, Result);
} else {
// We do a little cheeky 64-bit swapping to interleave the result.
OrderedNode* Swapped = _VInsElement(DstSize, 8, 2, 1, Result, Result);
Result = _VInsElement(DstSize, 8, 1, 2, Swapped, Result);
}
StoreResult(FPRClass, Op, Result, -1);
}
template
void OpDispatchBuilder::VPACKSSOp<2>(OpcodeArgs);
template<size_t ElementSize, bool Signed>
void OpDispatchBuilder::PMULLOp(OpcodeArgs) {
static_assert(ElementSize == sizeof(uint32_t),

View File

@ -77,7 +77,7 @@ void InitializeVEXTables() {
{OPD(1, 0b01, 0x60), 1, X86InstInfo{"VPUNPCKLBW", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}},
{OPD(1, 0b01, 0x61), 1, X86InstInfo{"VPUNPCKLWD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}},
{OPD(1, 0b01, 0x62), 1, X86InstInfo{"VPUNPCKLDQ", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}},
{OPD(1, 0b01, 0x63), 1, X86InstInfo{"VPACKSSWB", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}},
{OPD(1, 0b01, 0x63), 1, X86InstInfo{"VPACKSSWB", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0, nullptr}},
{OPD(1, 0b01, 0x64), 1, X86InstInfo{"VPCMPGTB", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0, nullptr}},
{OPD(1, 0b01, 0x65), 1, X86InstInfo{"VPCMPGTW", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0, nullptr}},
{OPD(1, 0b01, 0x66), 1, X86InstInfo{"VPCMPGTD", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0, nullptr}},

View File

@ -0,0 +1,43 @@
%ifdef CONFIG
{
"HostFeatures": ["AVX"],
"RegData": {
"XMM2": ["0x00807F4100807F41", "0x00FF7F4100FF7F41", "0x0000000000000000", "0x0000000000000000"],
"XMM3": ["0x00807F4100807F41", "0x00FF7F4100FF7F41", "0x0000000000000000", "0x0000000000000000"],
"XMM4": ["0x00807F4100807F41", "0x00FF7F4100FF7F41", "0x00807F4100807F41", "0x00FF7F4100FF7F41"],
"XMM5": ["0x00807F4100807F41", "0x00FF7F4100FF7F41", "0x00807F4100807F41", "0x00FF7F4100FF7F41"]
},
"MemoryRegions": {
"0x100000000": "4096"
}
}
%endif
lea rdx, [rel .data]
; 16bit signed -> 8bit signed (saturated)
; input > 0x7F(SCHAR_MAX, 127) = 0x7F(SCHAR_MAX, 127)
; input < 0x80(-127) = 0x80
vmovapd ymm0, [rdx]
vmovapd ymm1, [rdx + 32]
vpacksswb xmm2, xmm0, [rdx + 32]
vpacksswb xmm3, xmm0, xmm1
vpacksswb ymm4, ymm0, [rdx + 32]
vpacksswb ymm5, ymm0, ymm1
hlt
align 32
.data:
dq 0x00008000007F0041
dq 0x00008000007F0041
dq 0x00008000007F0041
dq 0x00008000007F0041
dq 0x0000FFFF007F0041
dq 0x0000FFFF007F0041
dq 0x0000FFFF007F0041
dq 0x0000FFFF007F0041