From 3599438e57e638726a492825435ae46fea99e5af Mon Sep 17 00:00:00 2001 From: TellowKrinkle Date: Sat, 9 Aug 2025 23:11:32 -0500 Subject: [PATCH] Common: Switch movss/d to auto SSE/AVX --- common/emitter/instructions.h | 14 ++++++----- common/emitter/simd.cpp | 23 +++++++++++++------ .../common/x86emitter/codegen_tests_main.cpp | 16 +++++++++++++ 3 files changed, 40 insertions(+), 13 deletions(-) diff --git a/common/emitter/instructions.h b/common/emitter/instructions.h index bfe3b734af..162ed5739c 100644 --- a/common/emitter/instructions.h +++ b/common/emitter/instructions.h @@ -459,13 +459,15 @@ namespace x86Emitter extern void xMOVQZX(const xRegisterSSE& dst, const xIndirectVoid& src); extern void xMOVQZX(const xRegisterSSE& dst, const xRegisterSSE& src); - extern void xMOVSS(const xRegisterSSE& to, const xRegisterSSE& from); - extern void xMOVSS(const xIndirectVoid& to, const xRegisterSSE& from); - extern void xMOVSD(const xRegisterSSE& to, const xRegisterSSE& from); - extern void xMOVSD(const xIndirectVoid& to, const xRegisterSSE& from); + extern void xMOVSS(const xRegisterSSE& dst, const xRegisterSSE& src1, const xRegisterSSE& src2); + static void xMOVSS(const xRegisterSSE& dst, const xRegisterSSE& src) { xMOVSS(dst, dst, src); } + extern void xMOVSS(const xIndirectVoid& dst, const xRegisterSSE& src); + extern void xMOVSD(const xRegisterSSE& dst, const xRegisterSSE& src1, const xRegisterSSE& src2); + static void xMOVSD(const xRegisterSSE& dst, const xRegisterSSE& src) { xMOVSD(dst, dst, src); } + extern void xMOVSD(const xIndirectVoid& dst, const xRegisterSSE& src); - extern void xMOVSSZX(const xRegisterSSE& to, const xIndirectVoid& from); - extern void xMOVSDZX(const xRegisterSSE& to, const xIndirectVoid& from); + extern void xMOVSSZX(const xRegisterSSE& dst, const xIndirectVoid& src); + extern void xMOVSDZX(const xRegisterSSE& dst, const xIndirectVoid& src); extern void xMOVNTDQA(const xRegisterSSE& to, const xIndirectVoid& from); extern void xMOVNTDQA(const xIndirectVoid& to, const xRegisterSSE& from); diff --git a/common/emitter/simd.cpp b/common/emitter/simd.cpp index a255e90096..f27e5d5612 100644 --- a/common/emitter/simd.cpp +++ b/common/emitter/simd.cpp @@ -809,16 +809,25 @@ namespace x86Emitter // #define IMPLEMENT_xMOVS(ssd, prefix) \ - __fi void xMOV##ssd(const xRegisterSSE& to, const xRegisterSSE& from) \ + __fi void xMOV##ssd(const xRegisterSSE& dst, const xRegisterSSE& src1, const xRegisterSSE& src2) \ { \ - if (to != from) \ - xOpWrite0F(prefix, 0x10, to, from); \ + if (src1 == src2) \ + return xMOVAPS(dst, src1); \ + SIMDInstructionInfo op = SIMDInstructionInfo(0x10).prefix(); \ + const xRegisterSSE* psrc = &src2; \ + const xRegisterSSE* pdst = &dst; \ + if (x86Emitter::use_avx && src2.IsExtended() && !dst.IsExtended()) \ + { \ + op.opcode = 0x11; \ + std::swap(psrc, pdst); \ + } \ + EmitSIMD(op, *pdst, src1, *psrc); \ } \ - __fi void xMOV##ssd##ZX(const xRegisterSSE& to, const xIndirectVoid& from) { xOpWrite0F(prefix, 0x10, to, from); } \ - __fi void xMOV##ssd(const xIndirectVoid& to, const xRegisterSSE& from) { xOpWrite0F(prefix, 0x11, from, to); } + __fi void xMOV##ssd##ZX(const xRegisterSSE& dst, const xIndirectVoid& src) { EmitSIMD(SIMDInstructionInfo(0x10).prefix().mov(), dst, dst, src); } \ + __fi void xMOV##ssd (const xIndirectVoid& dst, const xRegisterSSE& src) { EmitSIMD(SIMDInstructionInfo(0x11).prefix().mov(), src, src, dst); } - IMPLEMENT_xMOVS(SS, 0xf3) - IMPLEMENT_xMOVS(SD, 0xf2) + IMPLEMENT_xMOVS(SS, pf3) + IMPLEMENT_xMOVS(SD, pf2) ////////////////////////////////////////////////////////////////////////////////////////// // Non-temporal movs only support a register as a target (ie, load form only, no stores) diff --git a/tests/ctest/common/x86emitter/codegen_tests_main.cpp b/tests/ctest/common/x86emitter/codegen_tests_main.cpp index 9cefdce336..2db635e4e6 100644 --- a/tests/ctest/common/x86emitter/codegen_tests_main.cpp +++ b/tests/ctest/common/x86emitter/codegen_tests_main.cpp @@ -401,6 +401,14 @@ TEST(CodegenTests, SSETest) CODEGEN_TEST(xMOVDZX(xmm9, ptr[r9]), "66 45 0f 6e 09"); CODEGEN_TEST(xMOVQZX(xmm9, xmm4), "f3 44 0f 7e cc"); CODEGEN_TEST(xMOVQZX(xmm9, ptr[r8]), "f3 45 0f 7e 08"); + + CODEGEN_TEST(xMOVSS(xmm1, xmm1), ""); + CODEGEN_TEST(xMOVSS(xmm1, xmm4), "f3 0f 10 cc"); + CODEGEN_TEST(xMOVSS(ptr[rax], xmm8), "f3 44 0f 11 00"); + CODEGEN_TEST(xMOVSSZX(xmm8, ptr[r8]), "f3 45 0f 10 00"); + CODEGEN_TEST(xMOVSD(xmm4, xmm8), "f2 41 0f 10 e0"); + CODEGEN_TEST(xMOVSD(ptr[rcx], xmm3), "f2 0f 11 19"); + CODEGEN_TEST(xMOVSDZX(xmm2, ptr[r9]), "f2 41 0f 10 11"); } TEST(CodegenTests, AVXTest) @@ -668,6 +676,14 @@ TEST(CodegenTests, AVXTest) CODEGEN_TEST(xMOVQZX(xmm9, xmm4), "c5 7a 7e cc"); CODEGEN_TEST(xMOVQZX(xmm9, ptr[r8]), "c4 41 7a 7e 08"); + CODEGEN_TEST(xMOVSS(xmm1, xmm1), ""); + CODEGEN_TEST(xMOVSS(xmm1, xmm4), "c5 f2 10 cc"); + CODEGEN_TEST(xMOVSS(ptr[rax], xmm8), "c5 7a 11 00"); + CODEGEN_TEST(xMOVSSZX(xmm8, ptr[r8]), "c4 41 7a 10 00"); + CODEGEN_TEST(xMOVSD(xmm4, xmm8), "c5 5b 11 c4"); + CODEGEN_TEST(xMOVSD(ptr[rcx], xmm3), "c5 fb 11 19"); + CODEGEN_TEST(xMOVSDZX(xmm2, ptr[r9]), "c4 c1 7b 10 11"); + CODEGEN_TEST(xVMOVAPS(xmm0, xmm1), "c5 f8 28 c1"); CODEGEN_TEST(xVMOVAPS(xmm0, ptr32[rdi]), "c5 f8 28 07"); CODEGEN_TEST(xVMOVAPS(ptr32[rdi], xmm0), "c5 f8 29 07");