mirror of
https://github.com/PCSX2/pcsx2.git
synced 2026-01-31 01:15:24 +01:00
Common: Switch blend to auto SSE/AVX
This commit is contained in:
committed by
TellowKrinkle
parent
a045c917e7
commit
7eeb6c7aca
@@ -149,7 +149,8 @@ namespace x86Emitter
|
||||
//
|
||||
struct xImplSimd_4ArgBlend
|
||||
{
|
||||
SIMDInstructionInfo info;
|
||||
SIMDInstructionInfo sse;
|
||||
SIMDInstructionInfo avx;
|
||||
|
||||
void operator()(const xRegisterSSE& dst, const xRegisterSSE& src) const { (*this)(dst, dst, src, xmm0); }
|
||||
void operator()(const xRegisterSSE& dst, const xIndirectVoid& src) const { (*this)(dst, dst, src, xmm0); }
|
||||
|
||||
@@ -76,7 +76,7 @@ namespace x86Emitter
|
||||
// If a mask bit is 1, then the corresponding dword in the source operand is copied
|
||||
// to dest, else the dword element in dest is left unchanged.
|
||||
//
|
||||
xImplSimd_DestRegImmSSE PS;
|
||||
xImplSimd_3ArgImm PS;
|
||||
|
||||
// [SSE-4.1] Conditionally copies quadword values from src to dest, depending on the
|
||||
// mask bits in the immediate operand (bits [1:0]). Each mask bit corresponds to a
|
||||
@@ -85,7 +85,7 @@ namespace x86Emitter
|
||||
// If a mask bit is 1, then the corresponding dword in the source operand is copied
|
||||
// to dest, else the dword element in dest is left unchanged.
|
||||
//
|
||||
xImplSimd_DestRegImmSSE PD;
|
||||
xImplSimd_3ArgImm PD;
|
||||
|
||||
// [SSE-4.1] Conditionally copies dword values from src to dest, depending on the
|
||||
// mask (bits [3:0]) in XMM0 (yes, the fixed register). Each mask bit corresponds
|
||||
@@ -94,7 +94,7 @@ namespace x86Emitter
|
||||
// If a mask bit is 1, then the corresponding dword in the source operand is copied
|
||||
// to dest, else the dword element in dest is left unchanged.
|
||||
//
|
||||
xImplSimd_DestRegSSE VPS;
|
||||
xImplSimd_4ArgBlend VPS;
|
||||
|
||||
// [SSE-4.1] Conditionally copies quadword values from src to dest, depending on the
|
||||
// mask (bits [1:0]) in XMM0 (yes, the fixed register). Each mask bit corresponds
|
||||
@@ -103,13 +103,13 @@ namespace x86Emitter
|
||||
// If a mask bit is 1, then the corresponding dword in the source operand is copied
|
||||
// to dest, else the dword element in dest is left unchanged.
|
||||
//
|
||||
xImplSimd_DestRegSSE VPD;
|
||||
xImplSimd_4ArgBlend VPD;
|
||||
};
|
||||
|
||||
struct xImplSimd_PBlend
|
||||
{
|
||||
xImplSimd_DestRegImmSSE W;
|
||||
xImplSimd_DestRegSSE VB;
|
||||
xImplSimd_3ArgImm W;
|
||||
xImplSimd_4ArgBlend VB;
|
||||
};
|
||||
|
||||
// --------------------------------------------------------------------------------------
|
||||
|
||||
@@ -285,8 +285,8 @@ namespace x86Emitter
|
||||
void xImplSimd_3ArgImm::operator()(const xRegisterSSE& dst, const xRegisterSSE& src1, const xIndirectVoid& src2, u8 imm) const { EmitSIMD(info, dst, src1, src2, imm); }
|
||||
void xImplSimd_3ArgCmp::operator()(const xRegisterSSE& dst, const xRegisterSSE& src1, const xRegisterSSE& src2, SSE2_ComparisonType imm) const { EmitSIMD(info, dst, src1, src2, imm); }
|
||||
void xImplSimd_3ArgCmp::operator()(const xRegisterSSE& dst, const xRegisterSSE& src1, const xIndirectVoid& src2, SSE2_ComparisonType imm) const { EmitSIMD(info, dst, src1, src2, imm); }
|
||||
void xImplSimd_4ArgBlend::operator()(const xRegisterSSE& dst, const xRegisterSSE& src1, const xRegisterSSE& src2, const xRegisterSSE& src3) const { EmitSIMD(info, dst, src1, src2, src3); }
|
||||
void xImplSimd_4ArgBlend::operator()(const xRegisterSSE& dst, const xRegisterSSE& src1, const xIndirectVoid& src2, const xRegisterSSE& src3) const { EmitSIMD(info, dst, src1, src2, src3); }
|
||||
void xImplSimd_4ArgBlend::operator()(const xRegisterSSE& dst, const xRegisterSSE& src1, const xRegisterSSE& src2, const xRegisterSSE& src3) const { EmitSIMD(x86Emitter::use_avx ? avx : sse, dst, src1, src2, src3); }
|
||||
void xImplSimd_4ArgBlend::operator()(const xRegisterSSE& dst, const xRegisterSSE& src1, const xIndirectVoid& src2, const xRegisterSSE& src3) const { EmitSIMD(x86Emitter::use_avx ? avx : sse, dst, src1, src2, src3); }
|
||||
|
||||
void xImplSimd_DestRegSSE::operator()(const xRegisterSSE& to, const xRegisterSSE& from) const { OpWriteSSE(Prefix, Opcode); }
|
||||
void xImplSimd_DestRegSSE::operator()(const xRegisterSSE& to, const xIndirectVoid& from) const { OpWriteSSE(Prefix, Opcode); }
|
||||
@@ -761,16 +761,16 @@ namespace x86Emitter
|
||||
|
||||
const xImplSimd_PBlend xPBLEND =
|
||||
{
|
||||
{0x66, 0x0e3a}, // W
|
||||
{0x66, 0x1038}, // VB
|
||||
{SIMDInstructionInfo(0x0e).i().p66().m0f3a()}, // W
|
||||
{SIMDInstructionInfo(0x10).i().p66().m0f38(), SIMDInstructionInfo(0x4c).i().p66().m0f3a()}, // VB
|
||||
};
|
||||
|
||||
const xImplSimd_Blend xBLEND =
|
||||
{
|
||||
{0x66, 0x0c3a}, // PS
|
||||
{0x66, 0x0d3a}, // PD
|
||||
{0x66, 0x1438}, // VPS
|
||||
{0x66, 0x1538}, // VPD
|
||||
{SIMDInstructionInfo(0x0c).p66().f().m0f3a()}, // PS
|
||||
{SIMDInstructionInfo(0x0d).p66().d().m0f3a()}, // PD
|
||||
{SIMDInstructionInfo(0x14).p66().f().m0f38(), SIMDInstructionInfo(0x4a).f().p66().m0f3a()}, // VPS
|
||||
{SIMDInstructionInfo(0x15).p66().d().m0f38(), SIMDInstructionInfo(0x4b).d().p66().m0f3a()}, // VPD
|
||||
};
|
||||
|
||||
const xImplSimd_PMove xPMOVSX = {0x2038};
|
||||
|
||||
@@ -365,8 +365,11 @@ TEST(CodegenTests, SSETest)
|
||||
|
||||
CODEGEN_TEST(xBLEND.PS(xmm0, xmm1, 0x55), "66 0f 3a 0c c1 55");
|
||||
CODEGEN_TEST(xBLEND.PD(xmm8, xmm9, 0xaa), "66 45 0f 3a 0d c1 aa");
|
||||
CODEGEN_TEST(xBLEND.VPS(xmm8, ptr[r8]), "66 45 0f 38 14 00");
|
||||
CODEGEN_TEST(xBLEND.VPD(xmm1, ptr[base]), "66 0f 38 15 0d f7 ff ff ff");
|
||||
CODEGEN_TEST(xPBLEND.W(xmm0, xmm1, 0x55), "66 0f 3a 0e c1 55");
|
||||
CODEGEN_TEST(xPBLEND.VB(xmm1, xmm2), "66 0f 38 10 ca");
|
||||
CODEGEN_TEST(xPBLEND.VB(xmm1, xmm2), "66 0f 38 10 ca");
|
||||
|
||||
CODEGEN_TEST(xMOVD(eax, xmm1), "66 0f 7e c8");
|
||||
CODEGEN_TEST(xMOVD(eax, xmm10), "66 44 0f 7e d0");
|
||||
CODEGEN_TEST(xMOVD(rax, xmm1), "66 48 0f 7e c8");
|
||||
@@ -601,6 +604,13 @@ TEST(CodegenTests, AVXTest)
|
||||
CODEGEN_TEST(xMOVDQU(xmm7, ptr[rcx]), "c5 fa 6f 39");
|
||||
#endif
|
||||
|
||||
CODEGEN_TEST(xBLEND.PS(xmm0, xmm1, 0x55), "c4 e3 79 0c c1 55");
|
||||
CODEGEN_TEST(xBLEND.PD(xmm8, xmm9, 0xaa), "c4 43 39 0d c1 aa");
|
||||
CODEGEN_TEST(xBLEND.VPS(xmm8, ptr[r8]), "c4 43 39 4a 00 00");
|
||||
CODEGEN_TEST(xBLEND.VPD(xmm1, ptr[base]), "c4 e3 71 4b 0d f6 ff ff ff 00");
|
||||
CODEGEN_TEST(xPBLEND.W(xmm0, xmm1, 0x55), "c4 e3 79 0e c1 55");
|
||||
CODEGEN_TEST(xPBLEND.VB(xmm1, xmm2), "c4 e3 71 4c ca 00");
|
||||
|
||||
CODEGEN_TEST(xVMOVAPS(xmm0, xmm1), "c5 f8 28 c1");
|
||||
CODEGEN_TEST(xVMOVAPS(xmm0, ptr32[rdi]), "c5 f8 28 07");
|
||||
CODEGEN_TEST(xVMOVAPS(ptr32[rdi], xmm0), "c5 f8 29 07");
|
||||
|
||||
Reference in New Issue
Block a user