mirror of
https://github.com/FEX-Emu/FEX.git
synced 2025-02-15 12:28:36 +00:00
OpcodeDispatcher: Handle VPACKSSWB
This commit is contained in:
parent
4ac031df59
commit
f57debeb29
@ -5891,6 +5891,7 @@ void OpDispatchBuilder::InstallHostSpecificOpcodeHandlers() {
|
||||
{OPD(1, 0b10, 0x5F), 1, &OpDispatchBuilder::AVXVectorScalarALUOp<IR::OP_VFMAX, 4>},
|
||||
{OPD(1, 0b11, 0x5F), 1, &OpDispatchBuilder::AVXVectorScalarALUOp<IR::OP_VFMAX, 8>},
|
||||
|
||||
{OPD(1, 0b01, 0x63), 1, &OpDispatchBuilder::VPACKSSOp<2>},
|
||||
{OPD(1, 0b01, 0x64), 1, &OpDispatchBuilder::AVXVectorALUOp<IR::OP_VCMPGT, 1>},
|
||||
{OPD(1, 0b01, 0x65), 1, &OpDispatchBuilder::AVXVectorALUOp<IR::OP_VCMPGT, 2>},
|
||||
{OPD(1, 0b01, 0x66), 1, &OpDispatchBuilder::AVXVectorALUOp<IR::OP_VCMPGT, 4>},
|
||||
|
@ -441,6 +441,9 @@ public:
|
||||
|
||||
void VMOVVectorNTOp(OpcodeArgs);
|
||||
|
||||
template <size_t ElementSize>
|
||||
void VPACKSSOp(OpcodeArgs);
|
||||
|
||||
void VPERM2Op(OpcodeArgs);
|
||||
void VPERMQOp(OpcodeArgs);
|
||||
|
||||
|
@ -2129,6 +2129,28 @@ void OpDispatchBuilder::PACKSSOp<2>(OpcodeArgs);
|
||||
template
|
||||
void OpDispatchBuilder::PACKSSOp<4>(OpcodeArgs);
|
||||
|
||||
template<size_t ElementSize>
|
||||
void OpDispatchBuilder::VPACKSSOp(OpcodeArgs) {
|
||||
const auto DstSize = GetDstSize(Op);
|
||||
const auto Is128Bit = DstSize == Core::CPUState::XMM_SSE_REG_SIZE;
|
||||
|
||||
OrderedNode *Src1 = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags, -1);
|
||||
OrderedNode *Src2 = LoadSource(FPRClass, Op, Op->Src[1], Op->Flags, -1);
|
||||
OrderedNode *Result = PACKSSOpImpl(Op, ElementSize, Src1, Src2);
|
||||
|
||||
if (Is128Bit) {
|
||||
Result = _VMov(16, Result);
|
||||
} else {
|
||||
// We do a little cheeky 64-bit swapping to interleave the result.
|
||||
OrderedNode* Swapped = _VInsElement(DstSize, 8, 2, 1, Result, Result);
|
||||
Result = _VInsElement(DstSize, 8, 1, 2, Swapped, Result);
|
||||
}
|
||||
StoreResult(FPRClass, Op, Result, -1);
|
||||
}
|
||||
|
||||
template
|
||||
void OpDispatchBuilder::VPACKSSOp<2>(OpcodeArgs);
|
||||
|
||||
template<size_t ElementSize, bool Signed>
|
||||
void OpDispatchBuilder::PMULLOp(OpcodeArgs) {
|
||||
static_assert(ElementSize == sizeof(uint32_t),
|
||||
|
@ -77,7 +77,7 @@ void InitializeVEXTables() {
|
||||
{OPD(1, 0b01, 0x60), 1, X86InstInfo{"VPUNPCKLBW", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}},
|
||||
{OPD(1, 0b01, 0x61), 1, X86InstInfo{"VPUNPCKLWD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}},
|
||||
{OPD(1, 0b01, 0x62), 1, X86InstInfo{"VPUNPCKLDQ", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}},
|
||||
{OPD(1, 0b01, 0x63), 1, X86InstInfo{"VPACKSSWB", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}},
|
||||
{OPD(1, 0b01, 0x63), 1, X86InstInfo{"VPACKSSWB", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0, nullptr}},
|
||||
{OPD(1, 0b01, 0x64), 1, X86InstInfo{"VPCMPGTB", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0, nullptr}},
|
||||
{OPD(1, 0b01, 0x65), 1, X86InstInfo{"VPCMPGTW", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0, nullptr}},
|
||||
{OPD(1, 0b01, 0x66), 1, X86InstInfo{"VPCMPGTD", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0, nullptr}},
|
||||
|
43
unittests/ASM/VEX/vpacksswb.asm
Normal file
43
unittests/ASM/VEX/vpacksswb.asm
Normal file
@ -0,0 +1,43 @@
|
||||
%ifdef CONFIG
|
||||
{
|
||||
"HostFeatures": ["AVX"],
|
||||
"RegData": {
|
||||
"XMM2": ["0x00807F4100807F41", "0x00FF7F4100FF7F41", "0x0000000000000000", "0x0000000000000000"],
|
||||
"XMM3": ["0x00807F4100807F41", "0x00FF7F4100FF7F41", "0x0000000000000000", "0x0000000000000000"],
|
||||
"XMM4": ["0x00807F4100807F41", "0x00FF7F4100FF7F41", "0x00807F4100807F41", "0x00FF7F4100FF7F41"],
|
||||
"XMM5": ["0x00807F4100807F41", "0x00FF7F4100FF7F41", "0x00807F4100807F41", "0x00FF7F4100FF7F41"]
|
||||
},
|
||||
"MemoryRegions": {
|
||||
"0x100000000": "4096"
|
||||
}
|
||||
}
|
||||
%endif
|
||||
|
||||
lea rdx, [rel .data]
|
||||
|
||||
; 16bit signed -> 8bit signed (saturated)
|
||||
; input > 0x7F(SCHAR_MAX, 127) = 0x7F(SCHAR_MAX, 127)
|
||||
; input < 0x80(-127) = 0x80
|
||||
|
||||
vmovapd ymm0, [rdx]
|
||||
vmovapd ymm1, [rdx + 32]
|
||||
|
||||
vpacksswb xmm2, xmm0, [rdx + 32]
|
||||
vpacksswb xmm3, xmm0, xmm1
|
||||
|
||||
vpacksswb ymm4, ymm0, [rdx + 32]
|
||||
vpacksswb ymm5, ymm0, ymm1
|
||||
|
||||
hlt
|
||||
|
||||
align 32
|
||||
.data:
|
||||
dq 0x00008000007F0041
|
||||
dq 0x00008000007F0041
|
||||
dq 0x00008000007F0041
|
||||
dq 0x00008000007F0041
|
||||
|
||||
dq 0x0000FFFF007F0041
|
||||
dq 0x0000FFFF007F0041
|
||||
dq 0x0000FFFF007F0041
|
||||
dq 0x0000FFFF007F0041
|
Loading…
x
Reference in New Issue
Block a user