diff --git a/common/emitter/implement/simd_helpers.h b/common/emitter/implement/simd_helpers.h index 55eb34c7bb..f2a8812865 100644 --- a/common/emitter/implement/simd_helpers.h +++ b/common/emitter/implement/simd_helpers.h @@ -149,7 +149,8 @@ namespace x86Emitter // struct xImplSimd_4ArgBlend { - SIMDInstructionInfo info; + SIMDInstructionInfo sse; + SIMDInstructionInfo avx; void operator()(const xRegisterSSE& dst, const xRegisterSSE& src) const { (*this)(dst, dst, src, xmm0); } void operator()(const xRegisterSSE& dst, const xIndirectVoid& src) const { (*this)(dst, dst, src, xmm0); } diff --git a/common/emitter/implement/simd_moremovs.h b/common/emitter/implement/simd_moremovs.h index 3b6205d2ae..7be49cb32b 100644 --- a/common/emitter/implement/simd_moremovs.h +++ b/common/emitter/implement/simd_moremovs.h @@ -76,7 +76,7 @@ namespace x86Emitter // If a mask bit is 1, then the corresponding dword in the source operand is copied // to dest, else the dword element in dest is left unchanged. // - xImplSimd_DestRegImmSSE PS; + xImplSimd_3ArgImm PS; // [SSE-4.1] Conditionally copies quadword values from src to dest, depending on the // mask bits in the immediate operand (bits [1:0]). Each mask bit corresponds to a @@ -85,7 +85,7 @@ namespace x86Emitter // If a mask bit is 1, then the corresponding dword in the source operand is copied // to dest, else the dword element in dest is left unchanged. // - xImplSimd_DestRegImmSSE PD; + xImplSimd_3ArgImm PD; // [SSE-4.1] Conditionally copies dword values from src to dest, depending on the // mask (bits [3:0]) in XMM0 (yes, the fixed register). Each mask bit corresponds @@ -94,7 +94,7 @@ namespace x86Emitter // If a mask bit is 1, then the corresponding dword in the source operand is copied // to dest, else the dword element in dest is left unchanged. // - xImplSimd_DestRegSSE VPS; + xImplSimd_4ArgBlend VPS; // [SSE-4.1] Conditionally copies quadword values from src to dest, depending on the // mask (bits [1:0]) in XMM0 (yes, the fixed register). Each mask bit corresponds @@ -103,13 +103,13 @@ namespace x86Emitter // If a mask bit is 1, then the corresponding dword in the source operand is copied // to dest, else the dword element in dest is left unchanged. // - xImplSimd_DestRegSSE VPD; + xImplSimd_4ArgBlend VPD; }; struct xImplSimd_PBlend { - xImplSimd_DestRegImmSSE W; - xImplSimd_DestRegSSE VB; + xImplSimd_3ArgImm W; + xImplSimd_4ArgBlend VB; }; // -------------------------------------------------------------------------------------- diff --git a/common/emitter/simd.cpp b/common/emitter/simd.cpp index 689ed3acfd..1451062e05 100644 --- a/common/emitter/simd.cpp +++ b/common/emitter/simd.cpp @@ -285,8 +285,8 @@ namespace x86Emitter void xImplSimd_3ArgImm::operator()(const xRegisterSSE& dst, const xRegisterSSE& src1, const xIndirectVoid& src2, u8 imm) const { EmitSIMD(info, dst, src1, src2, imm); } void xImplSimd_3ArgCmp::operator()(const xRegisterSSE& dst, const xRegisterSSE& src1, const xRegisterSSE& src2, SSE2_ComparisonType imm) const { EmitSIMD(info, dst, src1, src2, imm); } void xImplSimd_3ArgCmp::operator()(const xRegisterSSE& dst, const xRegisterSSE& src1, const xIndirectVoid& src2, SSE2_ComparisonType imm) const { EmitSIMD(info, dst, src1, src2, imm); } - void xImplSimd_4ArgBlend::operator()(const xRegisterSSE& dst, const xRegisterSSE& src1, const xRegisterSSE& src2, const xRegisterSSE& src3) const { EmitSIMD(info, dst, src1, src2, src3); } - void xImplSimd_4ArgBlend::operator()(const xRegisterSSE& dst, const xRegisterSSE& src1, const xIndirectVoid& src2, const xRegisterSSE& src3) const { EmitSIMD(info, dst, src1, src2, src3); } + void xImplSimd_4ArgBlend::operator()(const xRegisterSSE& dst, const xRegisterSSE& src1, const xRegisterSSE& src2, const xRegisterSSE& src3) const { EmitSIMD(x86Emitter::use_avx ? avx : sse, dst, src1, src2, src3); } + void xImplSimd_4ArgBlend::operator()(const xRegisterSSE& dst, const xRegisterSSE& src1, const xIndirectVoid& src2, const xRegisterSSE& src3) const { EmitSIMD(x86Emitter::use_avx ? avx : sse, dst, src1, src2, src3); } void xImplSimd_DestRegSSE::operator()(const xRegisterSSE& to, const xRegisterSSE& from) const { OpWriteSSE(Prefix, Opcode); } void xImplSimd_DestRegSSE::operator()(const xRegisterSSE& to, const xIndirectVoid& from) const { OpWriteSSE(Prefix, Opcode); } @@ -761,16 +761,16 @@ namespace x86Emitter const xImplSimd_PBlend xPBLEND = { - {0x66, 0x0e3a}, // W - {0x66, 0x1038}, // VB + {SIMDInstructionInfo(0x0e).i().p66().m0f3a()}, // W + {SIMDInstructionInfo(0x10).i().p66().m0f38(), SIMDInstructionInfo(0x4c).i().p66().m0f3a()}, // VB }; const xImplSimd_Blend xBLEND = { - {0x66, 0x0c3a}, // PS - {0x66, 0x0d3a}, // PD - {0x66, 0x1438}, // VPS - {0x66, 0x1538}, // VPD + {SIMDInstructionInfo(0x0c).p66().f().m0f3a()}, // PS + {SIMDInstructionInfo(0x0d).p66().d().m0f3a()}, // PD + {SIMDInstructionInfo(0x14).p66().f().m0f38(), SIMDInstructionInfo(0x4a).f().p66().m0f3a()}, // VPS + {SIMDInstructionInfo(0x15).p66().d().m0f38(), SIMDInstructionInfo(0x4b).d().p66().m0f3a()}, // VPD }; const xImplSimd_PMove xPMOVSX = {0x2038}; diff --git a/tests/ctest/common/x86emitter/codegen_tests_main.cpp b/tests/ctest/common/x86emitter/codegen_tests_main.cpp index bb678e0dab..638dd99acf 100644 --- a/tests/ctest/common/x86emitter/codegen_tests_main.cpp +++ b/tests/ctest/common/x86emitter/codegen_tests_main.cpp @@ -365,8 +365,11 @@ TEST(CodegenTests, SSETest) CODEGEN_TEST(xBLEND.PS(xmm0, xmm1, 0x55), "66 0f 3a 0c c1 55"); CODEGEN_TEST(xBLEND.PD(xmm8, xmm9, 0xaa), "66 45 0f 3a 0d c1 aa"); + CODEGEN_TEST(xBLEND.VPS(xmm8, ptr[r8]), "66 45 0f 38 14 00"); + CODEGEN_TEST(xBLEND.VPD(xmm1, ptr[base]), "66 0f 38 15 0d f7 ff ff ff"); CODEGEN_TEST(xPBLEND.W(xmm0, xmm1, 0x55), "66 0f 3a 0e c1 55"); - CODEGEN_TEST(xPBLEND.VB(xmm1, xmm2), "66 0f 38 10 ca"); + CODEGEN_TEST(xPBLEND.VB(xmm1, xmm2), "66 0f 38 10 ca"); + CODEGEN_TEST(xMOVD(eax, xmm1), "66 0f 7e c8"); CODEGEN_TEST(xMOVD(eax, xmm10), "66 44 0f 7e d0"); CODEGEN_TEST(xMOVD(rax, xmm1), "66 48 0f 7e c8"); @@ -601,6 +604,13 @@ TEST(CodegenTests, AVXTest) CODEGEN_TEST(xMOVDQU(xmm7, ptr[rcx]), "c5 fa 6f 39"); #endif + CODEGEN_TEST(xBLEND.PS(xmm0, xmm1, 0x55), "c4 e3 79 0c c1 55"); + CODEGEN_TEST(xBLEND.PD(xmm8, xmm9, 0xaa), "c4 43 39 0d c1 aa"); + CODEGEN_TEST(xBLEND.VPS(xmm8, ptr[r8]), "c4 43 39 4a 00 00"); + CODEGEN_TEST(xBLEND.VPD(xmm1, ptr[base]), "c4 e3 71 4b 0d f6 ff ff ff 00"); + CODEGEN_TEST(xPBLEND.W(xmm0, xmm1, 0x55), "c4 e3 79 0e c1 55"); + CODEGEN_TEST(xPBLEND.VB(xmm1, xmm2), "c4 e3 71 4c ca 00"); + CODEGEN_TEST(xVMOVAPS(xmm0, xmm1), "c5 f8 28 c1"); CODEGEN_TEST(xVMOVAPS(xmm0, ptr32[rdi]), "c5 f8 28 07"); CODEGEN_TEST(xVMOVAPS(ptr32[rdi], xmm0), "c5 f8 29 07");