OpcodeDispatcher: Narrow memory access with scalar rounding operations

These should only be accessing a 32-bit or 64-bit portion of memory
depending on single or double precision variants are used. Previously
we'd be doing a full 128-bit load.
This commit is contained in:
lioncash 2022-12-15 19:02:01 +00:00
parent 4b891d6147
commit ba5fafcd7f
3 changed files with 6 additions and 6 deletions

View File

@ -2689,7 +2689,7 @@ void OpDispatchBuilder::VectorRound(OpcodeArgs) {
if constexpr (Scalar) {
// Insert the lower bits
OrderedNode *Dest = LoadSource(FPRClass, Op, Op->Dest, Op->Flags, -1);
OrderedNode *Dest = LoadSource_WithOpSize(FPRClass, Op, Op->Dest, Size, Op->Flags, -1);
auto Result = _VInsElement(Size, ElementSize, 0, 0, Dest, Src);
StoreResult(FPRClass, Op, Result, -1);
} else {
@ -2735,7 +2735,7 @@ void OpDispatchBuilder::AVXVectorRound(OpcodeArgs) {
if constexpr (Scalar) {
// Insert the lower bits
OrderedNode *Dest = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags, -1);
OrderedNode *Dest = LoadSource_WithOpSize(FPRClass, Op, Op->Src[0], Size, Op->Flags, -1);
Result = _VInsElement(Size, ElementSize, 0, 0, Dest, Result);
}
if (Is128Bit) {

View File

@ -24,8 +24,8 @@ void InitializeH0F3ATables(Context::OperatingMode Mode) {
{OPD(0, PF_3A_NONE, 0x0F), 1, X86InstInfo{"PALIGNR", TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_SF_MMX, 1, nullptr}},
{OPD(0, PF_3A_66, 0x08), 1, X86InstInfo{"ROUNDPS", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 1, nullptr}},
{OPD(0, PF_3A_66, 0x09), 1, X86InstInfo{"ROUNDPD", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 1, nullptr}},
{OPD(0, PF_3A_66, 0x0A), 1, X86InstInfo{"ROUNDSS", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 1, nullptr}},
{OPD(0, PF_3A_66, 0x0B), 1, X86InstInfo{"ROUNDSD", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 1, nullptr}},
{OPD(0, PF_3A_66, 0x0A), 1, X86InstInfo{"ROUNDSS", TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_32BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 1, nullptr}},
{OPD(0, PF_3A_66, 0x0B), 1, X86InstInfo{"ROUNDSD", TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 1, nullptr}},
{OPD(0, PF_3A_66, 0x0C), 1, X86InstInfo{"BLENDPS", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 1, nullptr}},
{OPD(0, PF_3A_66, 0x0D), 1, X86InstInfo{"BLENDPD", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 1, nullptr}},
{OPD(0, PF_3A_66, 0x0E), 1, X86InstInfo{"PBLENDW", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 1, nullptr}},

View File

@ -415,8 +415,8 @@ void InitializeVEXTables() {
{OPD(3, 0b01, 0x08), 1, X86InstInfo{"VROUNDPS", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 1, nullptr}},
{OPD(3, 0b01, 0x09), 1, X86InstInfo{"VROUNDPD", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 1, nullptr}},
{OPD(3, 0b01, 0x0A), 1, X86InstInfo{"VROUNDSS", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 1, nullptr}},
{OPD(3, 0b01, 0x0B), 1, X86InstInfo{"VROUNDSD", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 1, nullptr}},
{OPD(3, 0b01, 0x0A), 1, X86InstInfo{"VROUNDSS", TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_32BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 1, nullptr}},
{OPD(3, 0b01, 0x0B), 1, X86InstInfo{"VROUNDSD", TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_64BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 1, nullptr}},
{OPD(3, 0b01, 0x0C), 1, X86InstInfo{"VBLENDPS", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}},
{OPD(3, 0b01, 0x0D), 1, X86InstInfo{"VBLENDPD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}},
{OPD(3, 0b01, 0x0E), 1, X86InstInfo{"VBLENDW", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}},