diff --git a/common/emitter/implement/simd_shufflepack.h b/common/emitter/implement/simd_shufflepack.h index efd894ce56..7e36239284 100644 --- a/common/emitter/implement/simd_shufflepack.h +++ b/common/emitter/implement/simd_shufflepack.h @@ -13,11 +13,15 @@ namespace x86Emitter { inline void _selector_assertion_check(u8 selector) const; - void PS(const xRegisterSSE& to, const xRegisterSSE& from, u8 selector) const; - void PS(const xRegisterSSE& to, const xIndirectVoid& from, u8 selector) const; + void PS(const xRegisterSSE& dst, const xRegisterSSE& src, u8 selector) const { PS(dst, dst, src, selector); } + void PS(const xRegisterSSE& dst, const xIndirectVoid& src, u8 selector) const { PS(dst, dst, src, selector); } + void PS(const xRegisterSSE& dst, const xRegisterSSE& src1, const xRegisterSSE& src2, u8 selector) const; + void PS(const xRegisterSSE& dst, const xRegisterSSE& src1, const xIndirectVoid& src2, u8 selector) const; - void PD(const xRegisterSSE& to, const xRegisterSSE& from, u8 selector) const; - void PD(const xRegisterSSE& to, const xIndirectVoid& from, u8 selector) const; + void PD(const xRegisterSSE& dst, const xRegisterSSE& src, u8 selector) const { PD(dst, dst, src, selector); } + void PD(const xRegisterSSE& dst, const xIndirectVoid& src, u8 selector) const { PD(dst, dst, src, selector); } + void PD(const xRegisterSSE& dst, const xRegisterSSE& src1, const xRegisterSSE& src2, u8 selector) const; + void PD(const xRegisterSSE& dst, const xRegisterSSE& src1, const xIndirectVoid& src2, u8 selector) const; }; // -------------------------------------------------------------------------------------- diff --git a/common/emitter/instructions.h b/common/emitter/instructions.h index fc0515b9fe..3e5fff270b 100644 --- a/common/emitter/instructions.h +++ b/common/emitter/instructions.h @@ -508,11 +508,13 @@ namespace x86Emitter extern const xImplSimd_DestRegSSE xMOVSLDUP; extern const xImplSimd_DestRegSSE xMOVSHDUP; - extern void xINSERTPS(const xRegisterSSE& to, const xRegisterSSE& from, u8 imm8); - extern void xINSERTPS(const xRegisterSSE& to, const xIndirect32& from, u8 imm8); + extern void xINSERTPS(const xRegisterSSE& dst, const xRegisterSSE& src1, const xRegisterSSE& src2, u8 imm8); + extern void xINSERTPS(const xRegisterSSE& dst, const xRegisterSSE& src1, const xIndirect32& src2, u8 imm8); + static void xINSERTPS(const xRegisterSSE& dst, const xRegisterSSE& src, u8 imm8) { xINSERTPS(dst, dst, src, imm8); } + static void xINSERTPS(const xRegisterSSE& dst, const xIndirect32& src, u8 imm8) { xINSERTPS(dst, dst, src, imm8); } - extern void xEXTRACTPS(const xRegister32or64& to, const xRegisterSSE& from, u8 imm8); - extern void xEXTRACTPS(const xIndirect32& dest, const xRegisterSSE& from, u8 imm8); + extern void xEXTRACTPS(const xRegister32& dst, const xRegisterSSE& src, u8 imm8); + extern void xEXTRACTPS(const xIndirect32& dst, const xRegisterSSE& src, u8 imm8); // ------------------------------------------------------------------------ diff --git a/common/emitter/simd.cpp b/common/emitter/simd.cpp index b4af0c52d6..eb9b8e7b64 100644 --- a/common/emitter/simd.cpp +++ b/common/emitter/simd.cpp @@ -540,26 +540,26 @@ namespace x86Emitter "Invalid immediate operand on SSE Shuffle: Upper 6 bits of the SSE Shuffle-PD Selector are reserved and must be zero."); } - void xImplSimd_Shuffle::PS(const xRegisterSSE& to, const xRegisterSSE& from, u8 selector) const + void xImplSimd_Shuffle::PS(const xRegisterSSE& dst, const xRegisterSSE& src1, const xRegisterSSE& src2, u8 selector) const { - xOpWrite0F(0xc6, to, from, selector); + EmitSIMD(SIMDInstructionInfo(0xc6), dst, src1, src2, selector); } - void xImplSimd_Shuffle::PS(const xRegisterSSE& to, const xIndirectVoid& from, u8 selector) const + void xImplSimd_Shuffle::PS(const xRegisterSSE& dst, const xRegisterSSE& src1, const xIndirectVoid& src2, u8 selector) const { - xOpWrite0F(0xc6, to, from, selector); + EmitSIMD(SIMDInstructionInfo(0xc6), dst, src1, src2, selector); } - void xImplSimd_Shuffle::PD(const xRegisterSSE& to, const xRegisterSSE& from, u8 selector) const + void xImplSimd_Shuffle::PD(const xRegisterSSE& dst, const xRegisterSSE& src1, const xRegisterSSE& src2, u8 selector) const { _selector_assertion_check(selector); - xOpWrite0F(0x66, 0xc6, to, from, selector & 0x3); + EmitSIMD(SIMDInstructionInfo(0xc6).d().p66(), dst, src1, src2, selector); } - void xImplSimd_Shuffle::PD(const xRegisterSSE& to, const xIndirectVoid& from, u8 selector) const + void xImplSimd_Shuffle::PD(const xRegisterSSE& dst, const xRegisterSSE& src1, const xIndirectVoid& src2, u8 selector) const { _selector_assertion_check(selector); - xOpWrite0F(0x66, 0xc6, to, from, selector & 0x3); + EmitSIMD(SIMDInstructionInfo(0xc6).d().p66(), dst, src1, src2, selector); } void xImplSimd_PInsert::B(const xRegisterSSE& to, const xRegister32& from, u8 imm8) const { xOpWrite0F(0x66, 0x203a, to, from, imm8); } @@ -866,15 +866,15 @@ namespace x86Emitter // * ZMASK: Each bit of Imm8[3:0] selects a dword element in dest to be written // with 0.0 if set to 1. // - __emitinline void xINSERTPS(const xRegisterSSE& to, const xRegisterSSE& from, u8 imm8) { xOpWrite0F(0x66, 0x213a, to, from, imm8); } - __emitinline void xINSERTPS(const xRegisterSSE& to, const xIndirect32& from, u8 imm8) { xOpWrite0F(0x66, 0x213a, to, from, imm8); } + __emitinline void xINSERTPS(const xRegisterSSE& dst, const xRegisterSSE& src1, const xRegisterSSE& src2, u8 imm8) { EmitSIMD(SIMDInstructionInfo(0x21).p66().m0f3a(), dst, src1, src2, imm8); } + __emitinline void xINSERTPS(const xRegisterSSE& dst, const xRegisterSSE& src1, const xIndirect32& src2, u8 imm8) { EmitSIMD(SIMDInstructionInfo(0x21).p66().m0f3a(), dst, src1, src2, imm8); } // [SSE-4.1] Extract a single-precision floating-point value from src at an offset // determined by imm8[1-0]*32. The extracted single precision floating-point value // is stored into the low 32-bits of dest (or at a 32-bit memory pointer). // - __emitinline void xEXTRACTPS(const xRegister32or64& to, const xRegisterSSE& from, u8 imm8) { xOpWrite0F(0x66, 0x173a, to, from, imm8); } - __emitinline void xEXTRACTPS(const xIndirect32& dest, const xRegisterSSE& from, u8 imm8) { xOpWrite0F(0x66, 0x173a, from, dest, imm8); } + __emitinline void xEXTRACTPS(const xRegister32& dst, const xRegisterSSE& src, u8 imm8) { EmitSIMD(SIMDInstructionInfo(0x17).mov().p66().m0f3a(), src, src, dst, imm8); } + __emitinline void xEXTRACTPS(const xIndirect32& dst, const xRegisterSSE& src, u8 imm8) { EmitSIMD(SIMDInstructionInfo(0x17).mov().p66().m0f3a(), src, src, dst, imm8); } // ===================================================================================================== diff --git a/tests/ctest/common/x86emitter/codegen_tests_main.cpp b/tests/ctest/common/x86emitter/codegen_tests_main.cpp index 35db6a627a..157f8bdb40 100644 --- a/tests/ctest/common/x86emitter/codegen_tests_main.cpp +++ b/tests/ctest/common/x86emitter/codegen_tests_main.cpp @@ -282,6 +282,16 @@ TEST(CodegenTests, SSETest) CODEGEN_TEST(xPMAX.UW(xmm4, xmm9), "66 41 0f 38 3e e1"); CODEGEN_TEST(xPMAX.UD(xmm2, ptr[r10]), "66 41 0f 38 3f 12"); + CODEGEN_TEST(xSHUF.PS(xmm0, xmm8, 0x33), "41 0f c6 c0 33"); + CODEGEN_TEST(xSHUF.PS(xmm0, ptr[r8], 0), "41 0f c6 00 00"); + CODEGEN_TEST(xSHUF.PD(xmm3, ptr[rcx], 0), "66 0f c6 19 00"); + CODEGEN_TEST(xSHUF.PD(xmm3, xmm2, 2), "66 0f c6 da 02"); + CODEGEN_TEST(xINSERTPS(xmm1, xmm2, 0x87), "66 0f 3a 21 ca 87"); + CODEGEN_TEST(xINSERTPS(xmm1, ptr32[r8], 0x87), "66 41 0f 3a 21 08 87"); + CODEGEN_TEST(xEXTRACTPS(eax, xmm2, 2), "66 0f 3a 17 d0 02"); + CODEGEN_TEST(xEXTRACTPS(ptr32[r9], xmm3, 3), "66 41 0f 3a 17 19 03"); + CODEGEN_TEST(xEXTRACTPS(ptr32[base], xmm1, 2), "66 0f 3a 17 0d f6 ff ff ff 02"); + CODEGEN_TEST(xMOVAPS(xmm0, xmm1), "0f 28 c1"); CODEGEN_TEST(xMOVAPS(xmm8, xmm9), "45 0f 28 c1"); CODEGEN_TEST(xMOVUPS(xmm8, ptr128[r8+r9]), "47 0f 10 04 08"); @@ -290,7 +300,6 @@ TEST(CodegenTests, SSETest) CODEGEN_TEST(xBLEND.PD(xmm8, xmm9, 0xaa), "66 45 0f 3a 0d c1 aa"); CODEGEN_TEST(xPBLEND.W(xmm0, xmm1, 0x55), "66 0f 3a 0e c1 55"); CODEGEN_TEST(xPBLEND.VB(xmm1, xmm2), "66 0f 38 10 ca"); - CODEGEN_TEST(xEXTRACTPS(ptr32[base], xmm1, 2), "66 0f 3a 17 0d f6 ff ff ff 02"); CODEGEN_TEST(xMOVD(eax, xmm1), "66 0f 7e c8"); CODEGEN_TEST(xMOVD(eax, xmm10), "66 44 0f 7e d0"); CODEGEN_TEST(xMOVD(rax, xmm1), "66 48 0f 7e c8"); @@ -445,6 +454,16 @@ TEST(CodegenTests, AVXTest) CODEGEN_TEST(xPMAX.UW(xmm4, xmm9), "c4 c2 59 3e e1"); CODEGEN_TEST(xPMAX.UD(xmm2, ptr[r10]), "c4 c2 69 3f 12"); + CODEGEN_TEST(xSHUF.PS(xmm0, xmm8, 0x33), "c4 c1 78 c6 c0 33"); + CODEGEN_TEST(xSHUF.PS(xmm0, ptr[r8], 0), "c4 c1 78 c6 00 00"); + CODEGEN_TEST(xSHUF.PD(xmm3, ptr[rcx], 0), "c5 e1 c6 19 00"); + CODEGEN_TEST(xSHUF.PD(xmm3, xmm2, 2), "c5 e1 c6 da 02"); + CODEGEN_TEST(xINSERTPS(xmm1, xmm2, 0x87), "c4 e3 71 21 ca 87"); + CODEGEN_TEST(xINSERTPS(xmm1, ptr32[r8], 0x87), "c4 c3 71 21 08 87"); + CODEGEN_TEST(xEXTRACTPS(eax, xmm2, 2), "c4 e3 79 17 d0 02"); + CODEGEN_TEST(xEXTRACTPS(ptr32[r9], xmm3, 3), "c4 c3 79 17 19 03"); + CODEGEN_TEST(xEXTRACTPS(ptr32[base], xmm1, 2), "c4 e3 79 17 0d f6 ff ff ff 02"); + CODEGEN_TEST(xVMOVAPS(xmm0, xmm1), "c5 f8 28 c1"); CODEGEN_TEST(xVMOVAPS(xmm0, ptr32[rdi]), "c5 f8 28 07"); CODEGEN_TEST(xVMOVAPS(ptr32[rdi], xmm0), "c5 f8 29 07");