Common: Switch blend to auto SSE/AVX

This commit is contained in:
TellowKrinkle
2025-08-09 20:49:07 -05:00
committed by TellowKrinkle
parent a045c917e7
commit 7eeb6c7aca
4 changed files with 27 additions and 16 deletions

View File

@@ -149,7 +149,8 @@ namespace x86Emitter
//
struct xImplSimd_4ArgBlend
{
SIMDInstructionInfo info;
SIMDInstructionInfo sse;
SIMDInstructionInfo avx;
void operator()(const xRegisterSSE& dst, const xRegisterSSE& src) const { (*this)(dst, dst, src, xmm0); }
void operator()(const xRegisterSSE& dst, const xIndirectVoid& src) const { (*this)(dst, dst, src, xmm0); }

View File

@@ -76,7 +76,7 @@ namespace x86Emitter
// If a mask bit is 1, then the corresponding dword in the source operand is copied
// to dest, else the dword element in dest is left unchanged.
//
xImplSimd_DestRegImmSSE PS;
xImplSimd_3ArgImm PS;
// [SSE-4.1] Conditionally copies quadword values from src to dest, depending on the
// mask bits in the immediate operand (bits [1:0]). Each mask bit corresponds to a
@@ -85,7 +85,7 @@ namespace x86Emitter
// If a mask bit is 1, then the corresponding dword in the source operand is copied
// to dest, else the dword element in dest is left unchanged.
//
xImplSimd_DestRegImmSSE PD;
xImplSimd_3ArgImm PD;
// [SSE-4.1] Conditionally copies dword values from src to dest, depending on the
// mask (bits [3:0]) in XMM0 (yes, the fixed register). Each mask bit corresponds
@@ -94,7 +94,7 @@ namespace x86Emitter
// If a mask bit is 1, then the corresponding dword in the source operand is copied
// to dest, else the dword element in dest is left unchanged.
//
xImplSimd_DestRegSSE VPS;
xImplSimd_4ArgBlend VPS;
// [SSE-4.1] Conditionally copies quadword values from src to dest, depending on the
// mask (bits [1:0]) in XMM0 (yes, the fixed register). Each mask bit corresponds
@@ -103,13 +103,13 @@ namespace x86Emitter
// If a mask bit is 1, then the corresponding dword in the source operand is copied
// to dest, else the dword element in dest is left unchanged.
//
xImplSimd_DestRegSSE VPD;
xImplSimd_4ArgBlend VPD;
};
struct xImplSimd_PBlend
{
xImplSimd_DestRegImmSSE W;
xImplSimd_DestRegSSE VB;
xImplSimd_3ArgImm W;
xImplSimd_4ArgBlend VB;
};
// --------------------------------------------------------------------------------------

View File

@@ -285,8 +285,8 @@ namespace x86Emitter
void xImplSimd_3ArgImm::operator()(const xRegisterSSE& dst, const xRegisterSSE& src1, const xIndirectVoid& src2, u8 imm) const { EmitSIMD(info, dst, src1, src2, imm); }
void xImplSimd_3ArgCmp::operator()(const xRegisterSSE& dst, const xRegisterSSE& src1, const xRegisterSSE& src2, SSE2_ComparisonType imm) const { EmitSIMD(info, dst, src1, src2, imm); }
void xImplSimd_3ArgCmp::operator()(const xRegisterSSE& dst, const xRegisterSSE& src1, const xIndirectVoid& src2, SSE2_ComparisonType imm) const { EmitSIMD(info, dst, src1, src2, imm); }
void xImplSimd_4ArgBlend::operator()(const xRegisterSSE& dst, const xRegisterSSE& src1, const xRegisterSSE& src2, const xRegisterSSE& src3) const { EmitSIMD(info, dst, src1, src2, src3); }
void xImplSimd_4ArgBlend::operator()(const xRegisterSSE& dst, const xRegisterSSE& src1, const xIndirectVoid& src2, const xRegisterSSE& src3) const { EmitSIMD(info, dst, src1, src2, src3); }
void xImplSimd_4ArgBlend::operator()(const xRegisterSSE& dst, const xRegisterSSE& src1, const xRegisterSSE& src2, const xRegisterSSE& src3) const { EmitSIMD(x86Emitter::use_avx ? avx : sse, dst, src1, src2, src3); }
void xImplSimd_4ArgBlend::operator()(const xRegisterSSE& dst, const xRegisterSSE& src1, const xIndirectVoid& src2, const xRegisterSSE& src3) const { EmitSIMD(x86Emitter::use_avx ? avx : sse, dst, src1, src2, src3); }
void xImplSimd_DestRegSSE::operator()(const xRegisterSSE& to, const xRegisterSSE& from) const { OpWriteSSE(Prefix, Opcode); }
void xImplSimd_DestRegSSE::operator()(const xRegisterSSE& to, const xIndirectVoid& from) const { OpWriteSSE(Prefix, Opcode); }
@@ -761,16 +761,16 @@ namespace x86Emitter
const xImplSimd_PBlend xPBLEND =
{
{0x66, 0x0e3a}, // W
{0x66, 0x1038}, // VB
{SIMDInstructionInfo(0x0e).i().p66().m0f3a()}, // W
{SIMDInstructionInfo(0x10).i().p66().m0f38(), SIMDInstructionInfo(0x4c).i().p66().m0f3a()}, // VB
};
const xImplSimd_Blend xBLEND =
{
{0x66, 0x0c3a}, // PS
{0x66, 0x0d3a}, // PD
{0x66, 0x1438}, // VPS
{0x66, 0x1538}, // VPD
{SIMDInstructionInfo(0x0c).p66().f().m0f3a()}, // PS
{SIMDInstructionInfo(0x0d).p66().d().m0f3a()}, // PD
{SIMDInstructionInfo(0x14).p66().f().m0f38(), SIMDInstructionInfo(0x4a).f().p66().m0f3a()}, // VPS
{SIMDInstructionInfo(0x15).p66().d().m0f38(), SIMDInstructionInfo(0x4b).d().p66().m0f3a()}, // VPD
};
const xImplSimd_PMove xPMOVSX = {0x2038};