Common: Add auto-switching AVX/SSE emitter functions

This commit is contained in:
TellowKrinkle
2025-06-01 16:26:59 -05:00
committed by TellowKrinkle
parent 5561884126
commit de022ab68d
6 changed files with 503 additions and 1 deletions

View File

@@ -10,6 +10,153 @@ namespace x86Emitter
// xImpl_SIMD Types (template free!)
// =====================================================================================================
struct SIMDInstructionInfo {
/// The prefix byte of a simd instruction. These match up with their (E)VEX encodings.
enum class Prefix : u32 {
None = 0,
P66 = 1,
PF3 = 2,
PF2 = 3,
};
/// The opcode map of a simd instruction. These match up with their (E)VEX encodings.
enum class Map : u32 {
M0F = 1,
M0F38 = 2,
M0F3A = 3,
};
/// Whether an operation operates on float (ss, ps), integer (b, w, d, q), or double (sd, pd) data.
/// May be used to choose an appropriate mov instruction if one is needed.
enum class Type : u32 {
Float, Integer, Double
};
/// The main opcode
u32 opcode : 8;
/// Prefix byte
Prefix prefix : 2;
/// Opcode map
Map map : 5;
/// Information about the data this operation operates on. Ignored for instructions where the SSE4 and AVX versions have the same number of arguments.
Type type : 2;
/// For instructions like pslld, the data that should go into the reg field in place of the first src
u32 ext : 3;
/// If true, the two inputs to the function can be swapped without changing its result.
u32 is_commutative : 1;
/// If true, the dst and src1 must be the same in AVX (e.g. mov instructions, pshufd)
u32 is_mov : 1;
/// If true, get `W` from dst register instead of `w_bit`
u32 dst_w : 1;
/// If true, get `W` from src register instead of `w_bit`
u32 src_w : 1;
/// If true, the instruction has the VEX W bit set
u32 w_bit : 1;
constexpr SIMDInstructionInfo(u8 opcode_, u8 ext_ = 0)
: opcode(opcode_), prefix(Prefix::None), map(Map::M0F), type(Type::Float), ext(ext_)
, is_commutative(false), is_mov(false), dst_w(false), src_w(false), w_bit(false)
{
}
// For configuration using in a builder-style
constexpr SIMDInstructionInfo p66() const { SIMDInstructionInfo copy = *this; copy.prefix = Prefix::P66; return copy; }
constexpr SIMDInstructionInfo pf3() const { SIMDInstructionInfo copy = *this; copy.prefix = Prefix::PF3; return copy; }
constexpr SIMDInstructionInfo pf2() const { SIMDInstructionInfo copy = *this; copy.prefix = Prefix::PF2; return copy; }
constexpr SIMDInstructionInfo m0f38() const { SIMDInstructionInfo copy = *this; copy.map = Map::M0F38; return copy; }
constexpr SIMDInstructionInfo m0f3a() const { SIMDInstructionInfo copy = *this; copy.map = Map::M0F3A; return copy; }
constexpr SIMDInstructionInfo f() const { SIMDInstructionInfo copy = *this; copy.type = Type::Float; return copy; }
constexpr SIMDInstructionInfo i() const { SIMDInstructionInfo copy = *this; copy.type = Type::Integer; return copy; }
constexpr SIMDInstructionInfo d() const { SIMDInstructionInfo copy = *this; copy.type = Type::Double; return copy; }
constexpr SIMDInstructionInfo w() const { SIMDInstructionInfo copy = *this; copy.w_bit = true; return copy; }
constexpr SIMDInstructionInfo dstw() const { SIMDInstructionInfo copy = *this; copy.dst_w = true; return copy; }
constexpr SIMDInstructionInfo srcw() const { SIMDInstructionInfo copy = *this; copy.src_w = true; return copy; }
constexpr SIMDInstructionInfo commutative() const { SIMDInstructionInfo copy = *this; copy.is_commutative = true; return copy; }
constexpr SIMDInstructionInfo mov() const { SIMDInstructionInfo copy = *this; copy.is_mov = true; return copy; }
};
// ------------------------------------------------------------------------
// For implementing SSE/AVX logic operations that have two arguments in both SSE and AVX
// like MOVAPS, CVTPS2DQ, etc
//
struct xImplSimd_2Arg
{
SIMDInstructionInfo info;
constexpr xImplSimd_2Arg(SIMDInstructionInfo info_): info(info_.mov()) {}
void operator()(const xRegisterSSE& dst, const xRegisterSSE& src) const;
void operator()(const xRegisterSSE& dst, const xIndirectVoid& src) const;
};
// ------------------------------------------------------------------------
// For implementing SSE/AVX logic operations that have two arguments in both SSE and AVX, plus an immediate
// like PSHUFD
//
struct xImplSimd_2ArgImm
{
SIMDInstructionInfo info;
constexpr xImplSimd_2ArgImm(SIMDInstructionInfo info_): info(info_.mov()) {}
void operator()(const xRegisterSSE& dst, const xRegisterSSE& src, u8 imm) const;
void operator()(const xRegisterSSE& dst, const xIndirectVoid& src, u8 imm) const;
};
// ------------------------------------------------------------------------
// For implementing SSE/AVX logic operations that have three arguments AVX and two in SSE
// like ANDPS, ANDPD, etc
//
struct xImplSimd_3Arg
{
SIMDInstructionInfo info;
void operator()(const xRegisterSSE& dst, const xRegisterSSE& src) const { (*this)(dst, dst, src); }
void operator()(const xRegisterSSE& dst, const xIndirectVoid& src) const { (*this)(dst, dst, src); }
void operator()(const xRegisterSSE& dst, const xRegisterSSE& src1, const xRegisterSSE& src2) const;
void operator()(const xRegisterSSE& dst, const xRegisterSSE& src1, const xIndirectVoid& src2) const;
};
// ------------------------------------------------------------------------
// For implementing SSE/AVX logic operations that have three arguments AVX and two in SSE
// like SHUFPS, INSERTPS, etc
//
struct xImplSimd_3ArgImm
{
SIMDInstructionInfo info;
void operator()(const xRegisterSSE& dst, const xRegisterSSE& src, u8 imm) const { (*this)(dst, dst, src, imm); }
void operator()(const xRegisterSSE& dst, const xIndirectVoid& src, u8 imm) const { (*this)(dst, dst, src, imm); }
void operator()(const xRegisterSSE& dst, const xRegisterSSE& src1, const xRegisterSSE& src2, u8 imm) const;
void operator()(const xRegisterSSE& dst, const xRegisterSSE& src1, const xIndirectVoid& src2, u8 imm) const;
};
// ------------------------------------------------------------------------
// For implementing SSE/AVX logic operations that have three arguments AVX and two in SSE
// like SHUFPS, INSERTPS, etc
//
struct xImplSimd_3ArgCmp
{
SIMDInstructionInfo info;
void operator()(const xRegisterSSE& dst, const xRegisterSSE& src, SSE2_ComparisonType imm) const { (*this)(dst, dst, src, imm); }
void operator()(const xRegisterSSE& dst, const xIndirectVoid& src, SSE2_ComparisonType imm) const { (*this)(dst, dst, src, imm); }
void operator()(const xRegisterSSE& dst, const xRegisterSSE& src1, const xRegisterSSE& src2, SSE2_ComparisonType imm) const;
void operator()(const xRegisterSSE& dst, const xRegisterSSE& src1, const xIndirectVoid& src2, SSE2_ComparisonType imm) const;
};
// ------------------------------------------------------------------------
// For implementing SSE/AVX logic operations that have four arguments AVX and two in SSE (with an implicit xmm0)
// like PBLENDVB, BLENDVPS, etc
//
struct xImplSimd_4ArgBlend
{
SIMDInstructionInfo info;
void operator()(const xRegisterSSE& dst, const xRegisterSSE& src) const { (*this)(dst, dst, src, xmm0); }
void operator()(const xRegisterSSE& dst, const xIndirectVoid& src) const { (*this)(dst, dst, src, xmm0); }
void operator()(const xRegisterSSE& dst, const xRegisterSSE& src1, const xRegisterSSE& src2, const xRegisterSSE& src3) const;
void operator()(const xRegisterSSE& dst, const xRegisterSSE& src1, const xIndirectVoid& src2, const xRegisterSSE& src3) const;
};
// ------------------------------------------------------------------------
// For implementing SSE-only logic operations that have xmmreg,xmmreg/rm forms only,
// like ANDPS/ANDPD

View File

@@ -10,6 +10,7 @@ namespace x86Emitter
{
#define OpWriteSSE(pre, op) xOpWrite0F(pre, op, to, from)
#define OpWriteSIMDMovOp(op) EmitSIMD(op.mov(), to, to, from)
extern void SimdPrefix(u8 prefix, u16 opcode);
extern void EmitSibMagic(uint regfield, const void* address, int extraRIPOffset = 0);
@@ -25,6 +26,9 @@ namespace x86Emitter
extern void EmitRex(const xRegisterBase& reg1, const xRegisterBase& reg2);
extern void EmitRex(const xRegisterBase& reg1, const void* src);
extern void EmitRex(const xRegisterBase& reg1, const xIndirectVoid& sib);
extern void EmitRex(SIMDInstructionInfo info, u32 reg1, const xRegisterBase& reg2);
extern void EmitRex(SIMDInstructionInfo info, const xRegisterBase& reg1, const xRegisterBase& reg2);
extern void EmitRex(SIMDInstructionInfo info, const xRegisterBase& reg1, const xIndirectVoid& sib);
extern void _xMovRtoR(const xRegisterInt& to, const xRegisterInt& from);
@@ -171,4 +175,44 @@ namespace x86Emitter
xWrite8(opcode);
EmitSibMagic(param1, param3);
}
void EmitVEX(SIMDInstructionInfo info, const xRegisterBase& dst, u8 src1, const xRegisterBase& src2, int extraRipOffset = 0);
void EmitVEX(SIMDInstructionInfo info, const xRegisterBase& dst, u8 src1, const xIndirectVoid& src2, int extraRipOffset = 0);
void EmitVEX(SIMDInstructionInfo info, u32 ext, u8 dst, const xRegisterBase& src2, int extraRipOffset = 0);
template <typename S2>
__emitinline static void EmitVEX(SIMDInstructionInfo info, const xRegisterBase& dst, const xRegisterBase& src1, const S2& src2, int extraRipOffset = 0)
{
EmitVEX(info, dst, src1.GetId(), src2, extraRipOffset);
}
// Emitter helpers for SIMD operations
// These will dispatch to either SSE or AVX implementations
void EmitSIMDImpl(SIMDInstructionInfo info, const xRegisterBase& dst, const xRegisterBase& src1, int extraRipOffset);
void EmitSIMDImpl(SIMDInstructionInfo info, const xRegisterBase& dst, const xRegisterBase& src1, const xRegisterBase& src2, int extraRipOffset);
void EmitSIMDImpl(SIMDInstructionInfo info, const xRegisterBase& dst, const xRegisterBase& src1, const xIndirectVoid& src2, int extraRipOffset);
void EmitSIMD(SIMDInstructionInfo info, const xRegisterBase& dst, const xRegisterBase& src1, const xRegisterBase& src2, const xRegisterBase& src3);
void EmitSIMD(SIMDInstructionInfo info, const xRegisterBase& dst, const xRegisterBase& src1, const xIndirectVoid& src2, const xRegisterBase& src3);
__emitinline static void EmitSIMD(SIMDInstructionInfo info, const xRegisterBase& dst, const xRegisterBase& src1)
{
EmitSIMDImpl(info, dst, src1, 0);
}
__emitinline static void EmitSIMD(SIMDInstructionInfo info, const xRegisterBase& dst, const xRegisterBase& src1, u8 imm)
{
EmitSIMDImpl(info, dst, src1, 1);
xWrite8(imm);
}
template <typename S2>
__emitinline static void EmitSIMD(SIMDInstructionInfo info, const xRegisterBase& dst, const xRegisterBase& src1, const S2& src2)
{
EmitSIMDImpl(info, dst, src1, src2, 0);
}
template <typename S2>
__emitinline static void EmitSIMD(SIMDInstructionInfo info, const xRegisterBase& dst, const xRegisterBase& src1, const S2& src2, u8 imm)
{
EmitSIMDImpl(info, dst, src1, src2, 1);
xWrite8(imm);
}
} // namespace x86Emitter

View File

@@ -6,6 +6,171 @@
namespace x86Emitter
{
__emitinline static u8 getSSE(SIMDInstructionInfo::Prefix prefix)
{
switch (prefix) {
case SIMDInstructionInfo::Prefix::P66: return 0x66;
case SIMDInstructionInfo::Prefix::PF3: return 0xf3;
case SIMDInstructionInfo::Prefix::PF2: return 0xf2;
case SIMDInstructionInfo::Prefix::None:
default:
pxAssert(0);
return 0;
}
}
__emitinline static u16 getSSE(SIMDInstructionInfo::Map map)
{
switch (map) {
case SIMDInstructionInfo::Map::M0F38: return 0x380f;
case SIMDInstructionInfo::Map::M0F3A: return 0x3a0f;
case SIMDInstructionInfo::Map::M0F:
default:
pxAssert(0);
return 0;
}
}
__emitinline static SIMDInstructionInfo getMov(SIMDInstructionInfo::Type type)
{
switch (type) {
#ifndef ALWAYS_USE_MOVAPS
case SIMDInstructionInfo::Type::Integer:
return SIMDInstructionInfo(0x6f).p66().mov();
case SIMDInstructionInfo::Type::Double:
return SIMDInstructionInfo(0x28).p66().mov();
#endif
default:
case SIMDInstructionInfo::Type::Float:
return SIMDInstructionInfo(0x28).mov();
}
}
template <typename T1, typename T2>
__emitinline static void xOpWrite0F(SIMDInstructionInfo info, T1 dst, const T2& src, int extraRIPOffset)
{
if (info.prefix != SIMDInstructionInfo::Prefix::None)
xWrite8(getSSE(info.prefix));
pxAssert(!info.w_bit); // Only used by AVX
EmitRex(info, dst, src);
if (info.map == SIMDInstructionInfo::Map::M0F)
{
xWrite16(0x0F | (info.opcode << 8));
}
else
{
xWrite16(getSSE(info.map));
xWrite8(info.opcode);
}
EmitSibMagic(dst, src, extraRIPOffset);
}
template <typename S2>
__emitinline static void EmitSimdOp(SIMDInstructionInfo info, const xRegisterBase& dst, const xRegisterBase& src1, const S2& src2, int extraRIPOffset)
{
if (x86Emitter::use_avx)
{
EmitVEX(info, dst, info.is_mov ? 0 : src1.GetId(), src2, extraRIPOffset);
}
else
{
if (dst.GetId() != src1.GetId())
{
pxAssert(!info.is_mov);
// Generate a mov to copy from src1 to dst
xOpWrite0F(getMov(info.type), dst, src1, 0);
}
xOpWrite0F(info, dst, src2, extraRIPOffset);
}
}
void EmitSIMDImpl(SIMDInstructionInfo info, const xRegisterBase& dst, const xRegisterBase& src1, int extraRipOffset)
{
pxAssert(!info.is_mov);
if (x86Emitter::use_avx)
{
EmitVEX(info, info.ext, dst.GetId(), src1, extraRipOffset);
}
else
{
if (dst.GetId() != src1.GetId())
{
// Generate a mov to copy from src1 to dst
xOpWrite0F(getMov(info.type), dst, src1, 0);
}
xOpWrite0F(info, info.ext, dst, extraRipOffset);
}
}
void EmitSIMDImpl(SIMDInstructionInfo info, const xRegisterBase& dst, const xRegisterBase& src1, const xRegisterBase& src2, int extraRipOffset)
{
pxAssert(!info.is_mov || dst.GetId() == src1.GetId());
const xRegisterBase* ps1 = &src1;
const xRegisterBase* ps2 = &src2;
if (x86Emitter::use_avx)
{
if (info.is_commutative && info.map == SIMDInstructionInfo::Map::M0F && src2.IsExtended() && !src1.IsExtended())
{
// We can use a C5 op instead of a C4 op if we swap the inputs
std::swap(ps1, ps2);
}
}
else if (dst.GetId() != src1.GetId() && dst.GetId() == src2.GetId())
{
if (info.is_commutative)
std::swap(ps1, ps2);
else
pxAssertRel(0, "SSE4 auto mov would destroy the second source!");
}
EmitSimdOp(info, dst, *ps1, *ps2, extraRipOffset);
}
void EmitSIMDImpl(SIMDInstructionInfo info, const xRegisterBase& dst, const xRegisterBase& src1, const xIndirectVoid& src2, int extraRipOffset)
{
pxAssert(!info.is_mov || dst.GetId() == src1.GetId());
if (!x86Emitter::use_avx && info.is_commutative && dst.GetId() != src1.GetId())
{
// Do load, op instead of mov, op+load
// No processors differentiate between loads, so always use movaps
EmitSimdOp(getMov(SIMDInstructionInfo::Type::Float), dst, dst, src2, 0);
EmitSimdOp(info, dst, dst, src1, extraRipOffset);
}
else
{
EmitSimdOp(info, dst, src1, src2, extraRipOffset);
}
}
void EmitSIMD(SIMDInstructionInfo info, const xRegisterBase& dst, const xRegisterBase& src1, const xRegisterBase& src2, const xRegisterBase& src3)
{
pxAssert(!info.is_mov);
pxAssertMsg(!info.is_commutative, "I don't think any blend instructions are commutative...");
if (x86Emitter::use_avx)
{
EmitSimdOp(info, dst, src1, src2, 1);
xWrite8(src3.GetId());
}
else
{
pxAssertRel(src3.GetId() == 0, "SSE4 requires the third source to be xmm0!");
if (dst.GetId() != src1.GetId() && dst.GetId() == src2.GetId())
pxAssertRel(0, "SSE4 auto mov would destroy the second source!");
EmitSimdOp(info, dst, src1, src2, 0);
}
}
void EmitSIMD(SIMDInstructionInfo info, const xRegisterBase& dst, const xRegisterBase& src1, const xIndirectVoid& src2, const xRegisterBase& src3)
{
pxAssert(!info.is_mov);
pxAssertMsg(!info.is_commutative, "I don't think any blend instructions are commutative...");
if (x86Emitter::use_avx)
{
EmitSimdOp(info, dst, src1, src2, 1);
xWrite8(src3.GetId());
}
else
{
pxAssertRel(src3.GetId() == 0, "SSE4 requires the third source to be xmm0!");
EmitSimdOp(info, dst, src1, src2, 0);
}
}
// ------------------------------------------------------------------------
// SimdPrefix - If the lower byte of the opcode is 0x38 or 0x3a, then the opcode is
@@ -107,6 +272,19 @@ namespace x86Emitter
// ------------------------------------------------------------------------
void xImplSimd_2Arg::operator()(const xRegisterSSE& dst, const xRegisterSSE& src) const { EmitSIMD(info, dst, dst, src); }
void xImplSimd_2Arg::operator()(const xRegisterSSE& dst, const xIndirectVoid& src) const { EmitSIMD(info, dst, dst, src); }
void xImplSimd_2ArgImm::operator()(const xRegisterSSE& dst, const xRegisterSSE& src, u8 imm) const { EmitSIMD(info, dst, dst, src, imm); }
void xImplSimd_2ArgImm::operator()(const xRegisterSSE& dst, const xIndirectVoid& src, u8 imm) const { EmitSIMD(info, dst, dst, src, imm); }
void xImplSimd_3Arg::operator()(const xRegisterSSE& dst, const xRegisterSSE& src1, const xRegisterSSE& src2) const { EmitSIMD(info, dst, src1, src2); }
void xImplSimd_3Arg::operator()(const xRegisterSSE& dst, const xRegisterSSE& src1, const xIndirectVoid& src2) const { EmitSIMD(info, dst, src1, src2); }
void xImplSimd_3ArgImm::operator()(const xRegisterSSE& dst, const xRegisterSSE& src1, const xRegisterSSE& src2, u8 imm) const { EmitSIMD(info, dst, src1, src2, imm); }
void xImplSimd_3ArgImm::operator()(const xRegisterSSE& dst, const xRegisterSSE& src1, const xIndirectVoid& src2, u8 imm) const { EmitSIMD(info, dst, src1, src2, imm); }
void xImplSimd_3ArgCmp::operator()(const xRegisterSSE& dst, const xRegisterSSE& src1, const xRegisterSSE& src2, SSE2_ComparisonType imm) const { EmitSIMD(info, dst, src1, src2, imm); }
void xImplSimd_3ArgCmp::operator()(const xRegisterSSE& dst, const xRegisterSSE& src1, const xIndirectVoid& src2, SSE2_ComparisonType imm) const { EmitSIMD(info, dst, src1, src2, imm); }
void xImplSimd_4ArgBlend::operator()(const xRegisterSSE& dst, const xRegisterSSE& src1, const xRegisterSSE& src2, const xRegisterSSE& src3) const { EmitSIMD(info, dst, src1, src2, src3); }
void xImplSimd_4ArgBlend::operator()(const xRegisterSSE& dst, const xRegisterSSE& src1, const xIndirectVoid& src2, const xRegisterSSE& src3) const { EmitSIMD(info, dst, src1, src2, src3); }
void xImplSimd_DestRegSSE::operator()(const xRegisterSSE& to, const xRegisterSSE& from) const { OpWriteSSE(Prefix, Opcode); }
void xImplSimd_DestRegSSE::operator()(const xRegisterSSE& to, const xIndirectVoid& from) const { OpWriteSSE(Prefix, Opcode); }

View File

@@ -51,6 +51,8 @@
thread_local u8* x86Ptr;
thread_local XMMSSEType g_xmmtypes[iREGCNT_XMM] = {XMMT_INT};
bool x86Emitter::use_avx;
namespace x86Emitter
{
@@ -502,6 +504,46 @@ const xRegister32
EmitRex(w, r, x, b, reg1.IsExtended8Bit());
}
void EmitRex(SIMDInstructionInfo info, const xRegisterBase& reg1, const xRegisterBase& reg2)
{
bool w = false;
if (info.dst_w)
w |= reg1.IsWide();
if (info.src_w)
w |= reg2.IsWide();
bool r = reg1.IsExtended();
bool x = false;
bool b = reg2.IsExtended();
EmitRex(w, r, x, b, reg2.IsExtended8Bit());
}
void EmitRex(SIMDInstructionInfo info, const xRegisterBase& reg1, const xIndirectVoid& sib)
{
bool w = false;
if (info.dst_w)
w |= reg1.IsWide();
if (info.src_w)
w |= sib.IsWide();
bool r = reg1.IsExtended();
bool x = sib.Index.IsExtended();
bool b = sib.Base.IsExtended();
if (!NeedsSibMagic(sib))
{
b = x;
x = false;
}
EmitRex(w, r, x, b, reg1.IsExtended8Bit());
}
void EmitRex(SIMDInstructionInfo info, uint reg1, const xRegisterBase& reg2)
{
bool w = info.src_w ? reg2.IsWide() : false;
bool r = false;
bool x = false;
bool b = reg2.IsExtended();
EmitRex(w, r, x, b, reg2.IsExtended8Bit());
}
// For use by instructions that are implicitly wide
void EmitRexImplicitlyWide(const xRegisterBase& reg)
{
@@ -526,6 +568,89 @@ const xRegister32
EmitRex(w, r, x, b);
}
__emitinline static u8 GetVEXRXB(u32 ext, const xRegisterBase& src2)
{
return src2.IsExtended() << 5;
}
__emitinline static u8 GetVEXRXB(const xRegisterBase& dst, const xIndirectVoid& src2)
{
bool r = dst.IsExtended();
bool x = src2.Index.IsExtended();
bool b = src2.Base.IsExtended();
if (!NeedsSibMagic(src2))
{
b = x;
x = false;
}
return (r << 7) | (x << 6) | (b << 5);
}
__emitinline static u8 GetVEXRXB(const xRegisterBase& dst, const xRegisterBase& src2)
{
return (dst.IsExtended() << 7) | (src2.IsExtended() << 5);
}
__emitinline static u8 GetL(const xRegisterBase& arg) { return arg.IsWideSIMD() ? 4 : 0; }
__emitinline static u8 GetL(const xIndirectVoid& arg) { return 0; }
__emitinline static u8 GetL(u32 ext) { return 0; }
__emitinline static u8 GetVEXW(const xRegisterBase& arg) { return arg.GetOperandSize() == 8 ? 0x80 : 0; }
__emitinline static u8 GetVEXW(const xIndirectVoid& arg) { return arg.GetOperandSize() == 8 ? 0x80 : 0; }
__emitinline static u8 GetVEXW(u32 ext) { return 0; }
template <typename D, typename S2>
__emitinline void xOpWriteVEX(SIMDInstructionInfo info, D dst, u8 src1, const S2& src2, int extraRipOffset)
{
u8 m = static_cast<u8>(info.map);
u8 p = static_cast<u8>(info.prefix);
u8 w = 0;
if (info.src_w || info.dst_w) {
if (info.dst_w)
w |= GetVEXW(dst);
if (info.src_w)
w |= GetVEXW(src2);
} else {
w = info.w_bit << 7;
}
u8 l = GetL(dst) | GetL(src2); // Needed for 256-bit movemask.
u8 rxb = GetVEXRXB(dst, src2);
u8 b2 = p | l | (src1 << 3);
if (!w && info.map == SIMDInstructionInfo::Map::M0F && !(rxb & 0x7F))
{
// Can use a C5 VEX
u8 b1 = rxb | b2;
xWrite8(0xC5);
xWrite8(b1 ^ 0xF8);
xWrite8(info.opcode);
}
else
{
u8 b1 = rxb | m;
b2 |= w;
xWrite8(0xC4);
xWrite8(b1 ^ 0xE0);
xWrite8(b2 ^ 0x78);
xWrite8(info.opcode);
}
EmitSibMagic(dst, src2, extraRipOffset);
}
void EmitVEX(SIMDInstructionInfo info, const xRegisterBase& dst, u8 src1, const xRegisterBase& src2, int extraRipOffset)
{
xOpWriteVEX(info, dst, src1, src2, extraRipOffset);
}
void EmitVEX(SIMDInstructionInfo info, const xRegisterBase& dst, u8 src1, const xIndirectVoid& src2, int extraRipOffset)
{
xOpWriteVEX(info, dst, src1, src2, extraRipOffset);
}
void EmitVEX(SIMDInstructionInfo info, u32 ext, u8 dst, const xRegisterBase& src2, int extraRipOffset)
{
xOpWriteVEX(info, ext, dst, src2, extraRipOffset);
}
// --------------------------------------------------------------------------------------
// xSetPtr / xAlignPtr / xGetPtr / xAdvancePtr

View File

@@ -29,6 +29,9 @@ namespace x86Emitter
static constexpr int SHADOW_STACK_SIZE = 0;
#endif
/// This will switch all SSE instructions to generate AVX instructions instead
extern bool use_avx;
extern void xWrite8(u8 val);
extern void xWrite16(u16 val);
extern void xWrite32(u32 val);
@@ -1075,4 +1078,4 @@ extern const xRegister32
#include "implement/jmpcall.h"
#include "implement/bmi.h"
#include "implement/avx.h"
#include "implement/avx.h"

View File

@@ -52,6 +52,7 @@
#include "common/StringUtil.h"
#include "common/Threading.h"
#include "common/Timer.h"
#include "common/emitter/x86emitter.h"
#include "IconsFontAwesome6.h"
#include "IconsPromptFont.h"
@@ -391,6 +392,10 @@ bool VMManager::Internal::CPUThreadInitialize()
if (!cpuinfo_initialize())
Console.Error("cpuinfo_initialize() failed.");
#ifdef _M_X86
x86Emitter::use_avx = g_cpu.vectorISA >= ProcessorFeatures::VectorISA::AVX;
#endif
LogCPUCapabilities();
if (!SysMemory::Allocate())