mirror of
https://github.com/PCSX2/pcsx2.git
synced 2026-01-31 01:15:24 +01:00
Common: Add auto-switching AVX/SSE emitter functions
This commit is contained in:
committed by
TellowKrinkle
parent
5561884126
commit
de022ab68d
@@ -10,6 +10,153 @@ namespace x86Emitter
|
||||
// xImpl_SIMD Types (template free!)
|
||||
// =====================================================================================================
|
||||
|
||||
struct SIMDInstructionInfo {
|
||||
/// The prefix byte of a simd instruction. These match up with their (E)VEX encodings.
|
||||
enum class Prefix : u32 {
|
||||
None = 0,
|
||||
P66 = 1,
|
||||
PF3 = 2,
|
||||
PF2 = 3,
|
||||
};
|
||||
/// The opcode map of a simd instruction. These match up with their (E)VEX encodings.
|
||||
enum class Map : u32 {
|
||||
M0F = 1,
|
||||
M0F38 = 2,
|
||||
M0F3A = 3,
|
||||
};
|
||||
/// Whether an operation operates on float (ss, ps), integer (b, w, d, q), or double (sd, pd) data.
|
||||
/// May be used to choose an appropriate mov instruction if one is needed.
|
||||
enum class Type : u32 {
|
||||
Float, Integer, Double
|
||||
};
|
||||
|
||||
/// The main opcode
|
||||
u32 opcode : 8;
|
||||
/// Prefix byte
|
||||
Prefix prefix : 2;
|
||||
/// Opcode map
|
||||
Map map : 5;
|
||||
/// Information about the data this operation operates on. Ignored for instructions where the SSE4 and AVX versions have the same number of arguments.
|
||||
Type type : 2;
|
||||
/// For instructions like pslld, the data that should go into the reg field in place of the first src
|
||||
u32 ext : 3;
|
||||
/// If true, the two inputs to the function can be swapped without changing its result.
|
||||
u32 is_commutative : 1;
|
||||
/// If true, the dst and src1 must be the same in AVX (e.g. mov instructions, pshufd)
|
||||
u32 is_mov : 1;
|
||||
/// If true, get `W` from dst register instead of `w_bit`
|
||||
u32 dst_w : 1;
|
||||
/// If true, get `W` from src register instead of `w_bit`
|
||||
u32 src_w : 1;
|
||||
/// If true, the instruction has the VEX W bit set
|
||||
u32 w_bit : 1;
|
||||
|
||||
constexpr SIMDInstructionInfo(u8 opcode_, u8 ext_ = 0)
|
||||
: opcode(opcode_), prefix(Prefix::None), map(Map::M0F), type(Type::Float), ext(ext_)
|
||||
, is_commutative(false), is_mov(false), dst_w(false), src_w(false), w_bit(false)
|
||||
{
|
||||
}
|
||||
|
||||
// For configuration using in a builder-style
|
||||
constexpr SIMDInstructionInfo p66() const { SIMDInstructionInfo copy = *this; copy.prefix = Prefix::P66; return copy; }
|
||||
constexpr SIMDInstructionInfo pf3() const { SIMDInstructionInfo copy = *this; copy.prefix = Prefix::PF3; return copy; }
|
||||
constexpr SIMDInstructionInfo pf2() const { SIMDInstructionInfo copy = *this; copy.prefix = Prefix::PF2; return copy; }
|
||||
constexpr SIMDInstructionInfo m0f38() const { SIMDInstructionInfo copy = *this; copy.map = Map::M0F38; return copy; }
|
||||
constexpr SIMDInstructionInfo m0f3a() const { SIMDInstructionInfo copy = *this; copy.map = Map::M0F3A; return copy; }
|
||||
constexpr SIMDInstructionInfo f() const { SIMDInstructionInfo copy = *this; copy.type = Type::Float; return copy; }
|
||||
constexpr SIMDInstructionInfo i() const { SIMDInstructionInfo copy = *this; copy.type = Type::Integer; return copy; }
|
||||
constexpr SIMDInstructionInfo d() const { SIMDInstructionInfo copy = *this; copy.type = Type::Double; return copy; }
|
||||
constexpr SIMDInstructionInfo w() const { SIMDInstructionInfo copy = *this; copy.w_bit = true; return copy; }
|
||||
constexpr SIMDInstructionInfo dstw() const { SIMDInstructionInfo copy = *this; copy.dst_w = true; return copy; }
|
||||
constexpr SIMDInstructionInfo srcw() const { SIMDInstructionInfo copy = *this; copy.src_w = true; return copy; }
|
||||
constexpr SIMDInstructionInfo commutative() const { SIMDInstructionInfo copy = *this; copy.is_commutative = true; return copy; }
|
||||
constexpr SIMDInstructionInfo mov() const { SIMDInstructionInfo copy = *this; copy.is_mov = true; return copy; }
|
||||
};
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
// For implementing SSE/AVX logic operations that have two arguments in both SSE and AVX
|
||||
// like MOVAPS, CVTPS2DQ, etc
|
||||
//
|
||||
struct xImplSimd_2Arg
|
||||
{
|
||||
SIMDInstructionInfo info;
|
||||
|
||||
constexpr xImplSimd_2Arg(SIMDInstructionInfo info_): info(info_.mov()) {}
|
||||
|
||||
void operator()(const xRegisterSSE& dst, const xRegisterSSE& src) const;
|
||||
void operator()(const xRegisterSSE& dst, const xIndirectVoid& src) const;
|
||||
};
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
// For implementing SSE/AVX logic operations that have two arguments in both SSE and AVX, plus an immediate
|
||||
// like PSHUFD
|
||||
//
|
||||
struct xImplSimd_2ArgImm
|
||||
{
|
||||
SIMDInstructionInfo info;
|
||||
|
||||
constexpr xImplSimd_2ArgImm(SIMDInstructionInfo info_): info(info_.mov()) {}
|
||||
|
||||
void operator()(const xRegisterSSE& dst, const xRegisterSSE& src, u8 imm) const;
|
||||
void operator()(const xRegisterSSE& dst, const xIndirectVoid& src, u8 imm) const;
|
||||
};
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
// For implementing SSE/AVX logic operations that have three arguments AVX and two in SSE
|
||||
// like ANDPS, ANDPD, etc
|
||||
//
|
||||
struct xImplSimd_3Arg
|
||||
{
|
||||
SIMDInstructionInfo info;
|
||||
|
||||
void operator()(const xRegisterSSE& dst, const xRegisterSSE& src) const { (*this)(dst, dst, src); }
|
||||
void operator()(const xRegisterSSE& dst, const xIndirectVoid& src) const { (*this)(dst, dst, src); }
|
||||
void operator()(const xRegisterSSE& dst, const xRegisterSSE& src1, const xRegisterSSE& src2) const;
|
||||
void operator()(const xRegisterSSE& dst, const xRegisterSSE& src1, const xIndirectVoid& src2) const;
|
||||
};
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
// For implementing SSE/AVX logic operations that have three arguments AVX and two in SSE
|
||||
// like SHUFPS, INSERTPS, etc
|
||||
//
|
||||
struct xImplSimd_3ArgImm
|
||||
{
|
||||
SIMDInstructionInfo info;
|
||||
|
||||
void operator()(const xRegisterSSE& dst, const xRegisterSSE& src, u8 imm) const { (*this)(dst, dst, src, imm); }
|
||||
void operator()(const xRegisterSSE& dst, const xIndirectVoid& src, u8 imm) const { (*this)(dst, dst, src, imm); }
|
||||
void operator()(const xRegisterSSE& dst, const xRegisterSSE& src1, const xRegisterSSE& src2, u8 imm) const;
|
||||
void operator()(const xRegisterSSE& dst, const xRegisterSSE& src1, const xIndirectVoid& src2, u8 imm) const;
|
||||
};
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
// For implementing SSE/AVX logic operations that have three arguments AVX and two in SSE
|
||||
// like SHUFPS, INSERTPS, etc
|
||||
//
|
||||
struct xImplSimd_3ArgCmp
|
||||
{
|
||||
SIMDInstructionInfo info;
|
||||
|
||||
void operator()(const xRegisterSSE& dst, const xRegisterSSE& src, SSE2_ComparisonType imm) const { (*this)(dst, dst, src, imm); }
|
||||
void operator()(const xRegisterSSE& dst, const xIndirectVoid& src, SSE2_ComparisonType imm) const { (*this)(dst, dst, src, imm); }
|
||||
void operator()(const xRegisterSSE& dst, const xRegisterSSE& src1, const xRegisterSSE& src2, SSE2_ComparisonType imm) const;
|
||||
void operator()(const xRegisterSSE& dst, const xRegisterSSE& src1, const xIndirectVoid& src2, SSE2_ComparisonType imm) const;
|
||||
};
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
// For implementing SSE/AVX logic operations that have four arguments AVX and two in SSE (with an implicit xmm0)
|
||||
// like PBLENDVB, BLENDVPS, etc
|
||||
//
|
||||
struct xImplSimd_4ArgBlend
|
||||
{
|
||||
SIMDInstructionInfo info;
|
||||
|
||||
void operator()(const xRegisterSSE& dst, const xRegisterSSE& src) const { (*this)(dst, dst, src, xmm0); }
|
||||
void operator()(const xRegisterSSE& dst, const xIndirectVoid& src) const { (*this)(dst, dst, src, xmm0); }
|
||||
void operator()(const xRegisterSSE& dst, const xRegisterSSE& src1, const xRegisterSSE& src2, const xRegisterSSE& src3) const;
|
||||
void operator()(const xRegisterSSE& dst, const xRegisterSSE& src1, const xIndirectVoid& src2, const xRegisterSSE& src3) const;
|
||||
};
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
// For implementing SSE-only logic operations that have xmmreg,xmmreg/rm forms only,
|
||||
// like ANDPS/ANDPD
|
||||
|
||||
@@ -10,6 +10,7 @@ namespace x86Emitter
|
||||
{
|
||||
|
||||
#define OpWriteSSE(pre, op) xOpWrite0F(pre, op, to, from)
|
||||
#define OpWriteSIMDMovOp(op) EmitSIMD(op.mov(), to, to, from)
|
||||
|
||||
extern void SimdPrefix(u8 prefix, u16 opcode);
|
||||
extern void EmitSibMagic(uint regfield, const void* address, int extraRIPOffset = 0);
|
||||
@@ -25,6 +26,9 @@ namespace x86Emitter
|
||||
extern void EmitRex(const xRegisterBase& reg1, const xRegisterBase& reg2);
|
||||
extern void EmitRex(const xRegisterBase& reg1, const void* src);
|
||||
extern void EmitRex(const xRegisterBase& reg1, const xIndirectVoid& sib);
|
||||
extern void EmitRex(SIMDInstructionInfo info, u32 reg1, const xRegisterBase& reg2);
|
||||
extern void EmitRex(SIMDInstructionInfo info, const xRegisterBase& reg1, const xRegisterBase& reg2);
|
||||
extern void EmitRex(SIMDInstructionInfo info, const xRegisterBase& reg1, const xIndirectVoid& sib);
|
||||
|
||||
extern void _xMovRtoR(const xRegisterInt& to, const xRegisterInt& from);
|
||||
|
||||
@@ -171,4 +175,44 @@ namespace x86Emitter
|
||||
xWrite8(opcode);
|
||||
EmitSibMagic(param1, param3);
|
||||
}
|
||||
|
||||
void EmitVEX(SIMDInstructionInfo info, const xRegisterBase& dst, u8 src1, const xRegisterBase& src2, int extraRipOffset = 0);
|
||||
void EmitVEX(SIMDInstructionInfo info, const xRegisterBase& dst, u8 src1, const xIndirectVoid& src2, int extraRipOffset = 0);
|
||||
void EmitVEX(SIMDInstructionInfo info, u32 ext, u8 dst, const xRegisterBase& src2, int extraRipOffset = 0);
|
||||
|
||||
template <typename S2>
|
||||
__emitinline static void EmitVEX(SIMDInstructionInfo info, const xRegisterBase& dst, const xRegisterBase& src1, const S2& src2, int extraRipOffset = 0)
|
||||
{
|
||||
EmitVEX(info, dst, src1.GetId(), src2, extraRipOffset);
|
||||
}
|
||||
|
||||
// Emitter helpers for SIMD operations
|
||||
// These will dispatch to either SSE or AVX implementations
|
||||
|
||||
void EmitSIMDImpl(SIMDInstructionInfo info, const xRegisterBase& dst, const xRegisterBase& src1, int extraRipOffset);
|
||||
void EmitSIMDImpl(SIMDInstructionInfo info, const xRegisterBase& dst, const xRegisterBase& src1, const xRegisterBase& src2, int extraRipOffset);
|
||||
void EmitSIMDImpl(SIMDInstructionInfo info, const xRegisterBase& dst, const xRegisterBase& src1, const xIndirectVoid& src2, int extraRipOffset);
|
||||
void EmitSIMD(SIMDInstructionInfo info, const xRegisterBase& dst, const xRegisterBase& src1, const xRegisterBase& src2, const xRegisterBase& src3);
|
||||
void EmitSIMD(SIMDInstructionInfo info, const xRegisterBase& dst, const xRegisterBase& src1, const xIndirectVoid& src2, const xRegisterBase& src3);
|
||||
|
||||
__emitinline static void EmitSIMD(SIMDInstructionInfo info, const xRegisterBase& dst, const xRegisterBase& src1)
|
||||
{
|
||||
EmitSIMDImpl(info, dst, src1, 0);
|
||||
}
|
||||
__emitinline static void EmitSIMD(SIMDInstructionInfo info, const xRegisterBase& dst, const xRegisterBase& src1, u8 imm)
|
||||
{
|
||||
EmitSIMDImpl(info, dst, src1, 1);
|
||||
xWrite8(imm);
|
||||
}
|
||||
template <typename S2>
|
||||
__emitinline static void EmitSIMD(SIMDInstructionInfo info, const xRegisterBase& dst, const xRegisterBase& src1, const S2& src2)
|
||||
{
|
||||
EmitSIMDImpl(info, dst, src1, src2, 0);
|
||||
}
|
||||
template <typename S2>
|
||||
__emitinline static void EmitSIMD(SIMDInstructionInfo info, const xRegisterBase& dst, const xRegisterBase& src1, const S2& src2, u8 imm)
|
||||
{
|
||||
EmitSIMDImpl(info, dst, src1, src2, 1);
|
||||
xWrite8(imm);
|
||||
}
|
||||
} // namespace x86Emitter
|
||||
|
||||
@@ -6,6 +6,171 @@
|
||||
|
||||
namespace x86Emitter
|
||||
{
|
||||
__emitinline static u8 getSSE(SIMDInstructionInfo::Prefix prefix)
|
||||
{
|
||||
switch (prefix) {
|
||||
case SIMDInstructionInfo::Prefix::P66: return 0x66;
|
||||
case SIMDInstructionInfo::Prefix::PF3: return 0xf3;
|
||||
case SIMDInstructionInfo::Prefix::PF2: return 0xf2;
|
||||
case SIMDInstructionInfo::Prefix::None:
|
||||
default:
|
||||
pxAssert(0);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
__emitinline static u16 getSSE(SIMDInstructionInfo::Map map)
|
||||
{
|
||||
switch (map) {
|
||||
case SIMDInstructionInfo::Map::M0F38: return 0x380f;
|
||||
case SIMDInstructionInfo::Map::M0F3A: return 0x3a0f;
|
||||
case SIMDInstructionInfo::Map::M0F:
|
||||
default:
|
||||
pxAssert(0);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
__emitinline static SIMDInstructionInfo getMov(SIMDInstructionInfo::Type type)
|
||||
{
|
||||
switch (type) {
|
||||
#ifndef ALWAYS_USE_MOVAPS
|
||||
case SIMDInstructionInfo::Type::Integer:
|
||||
return SIMDInstructionInfo(0x6f).p66().mov();
|
||||
case SIMDInstructionInfo::Type::Double:
|
||||
return SIMDInstructionInfo(0x28).p66().mov();
|
||||
#endif
|
||||
default:
|
||||
case SIMDInstructionInfo::Type::Float:
|
||||
return SIMDInstructionInfo(0x28).mov();
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T1, typename T2>
|
||||
__emitinline static void xOpWrite0F(SIMDInstructionInfo info, T1 dst, const T2& src, int extraRIPOffset)
|
||||
{
|
||||
if (info.prefix != SIMDInstructionInfo::Prefix::None)
|
||||
xWrite8(getSSE(info.prefix));
|
||||
pxAssert(!info.w_bit); // Only used by AVX
|
||||
EmitRex(info, dst, src);
|
||||
if (info.map == SIMDInstructionInfo::Map::M0F)
|
||||
{
|
||||
xWrite16(0x0F | (info.opcode << 8));
|
||||
}
|
||||
else
|
||||
{
|
||||
xWrite16(getSSE(info.map));
|
||||
xWrite8(info.opcode);
|
||||
}
|
||||
EmitSibMagic(dst, src, extraRIPOffset);
|
||||
}
|
||||
|
||||
template <typename S2>
|
||||
__emitinline static void EmitSimdOp(SIMDInstructionInfo info, const xRegisterBase& dst, const xRegisterBase& src1, const S2& src2, int extraRIPOffset)
|
||||
{
|
||||
if (x86Emitter::use_avx)
|
||||
{
|
||||
EmitVEX(info, dst, info.is_mov ? 0 : src1.GetId(), src2, extraRIPOffset);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (dst.GetId() != src1.GetId())
|
||||
{
|
||||
pxAssert(!info.is_mov);
|
||||
// Generate a mov to copy from src1 to dst
|
||||
xOpWrite0F(getMov(info.type), dst, src1, 0);
|
||||
}
|
||||
xOpWrite0F(info, dst, src2, extraRIPOffset);
|
||||
}
|
||||
}
|
||||
|
||||
void EmitSIMDImpl(SIMDInstructionInfo info, const xRegisterBase& dst, const xRegisterBase& src1, int extraRipOffset)
|
||||
{
|
||||
pxAssert(!info.is_mov);
|
||||
if (x86Emitter::use_avx)
|
||||
{
|
||||
EmitVEX(info, info.ext, dst.GetId(), src1, extraRipOffset);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (dst.GetId() != src1.GetId())
|
||||
{
|
||||
// Generate a mov to copy from src1 to dst
|
||||
xOpWrite0F(getMov(info.type), dst, src1, 0);
|
||||
}
|
||||
xOpWrite0F(info, info.ext, dst, extraRipOffset);
|
||||
}
|
||||
}
|
||||
void EmitSIMDImpl(SIMDInstructionInfo info, const xRegisterBase& dst, const xRegisterBase& src1, const xRegisterBase& src2, int extraRipOffset)
|
||||
{
|
||||
pxAssert(!info.is_mov || dst.GetId() == src1.GetId());
|
||||
const xRegisterBase* ps1 = &src1;
|
||||
const xRegisterBase* ps2 = &src2;
|
||||
if (x86Emitter::use_avx)
|
||||
{
|
||||
if (info.is_commutative && info.map == SIMDInstructionInfo::Map::M0F && src2.IsExtended() && !src1.IsExtended())
|
||||
{
|
||||
// We can use a C5 op instead of a C4 op if we swap the inputs
|
||||
std::swap(ps1, ps2);
|
||||
}
|
||||
}
|
||||
else if (dst.GetId() != src1.GetId() && dst.GetId() == src2.GetId())
|
||||
{
|
||||
if (info.is_commutative)
|
||||
std::swap(ps1, ps2);
|
||||
else
|
||||
pxAssertRel(0, "SSE4 auto mov would destroy the second source!");
|
||||
}
|
||||
EmitSimdOp(info, dst, *ps1, *ps2, extraRipOffset);
|
||||
}
|
||||
void EmitSIMDImpl(SIMDInstructionInfo info, const xRegisterBase& dst, const xRegisterBase& src1, const xIndirectVoid& src2, int extraRipOffset)
|
||||
{
|
||||
pxAssert(!info.is_mov || dst.GetId() == src1.GetId());
|
||||
if (!x86Emitter::use_avx && info.is_commutative && dst.GetId() != src1.GetId())
|
||||
{
|
||||
// Do load, op instead of mov, op+load
|
||||
// No processors differentiate between loads, so always use movaps
|
||||
EmitSimdOp(getMov(SIMDInstructionInfo::Type::Float), dst, dst, src2, 0);
|
||||
EmitSimdOp(info, dst, dst, src1, extraRipOffset);
|
||||
}
|
||||
else
|
||||
{
|
||||
EmitSimdOp(info, dst, src1, src2, extraRipOffset);
|
||||
}
|
||||
}
|
||||
void EmitSIMD(SIMDInstructionInfo info, const xRegisterBase& dst, const xRegisterBase& src1, const xRegisterBase& src2, const xRegisterBase& src3)
|
||||
{
|
||||
pxAssert(!info.is_mov);
|
||||
pxAssertMsg(!info.is_commutative, "I don't think any blend instructions are commutative...");
|
||||
if (x86Emitter::use_avx)
|
||||
{
|
||||
EmitSimdOp(info, dst, src1, src2, 1);
|
||||
xWrite8(src3.GetId());
|
||||
}
|
||||
else
|
||||
{
|
||||
pxAssertRel(src3.GetId() == 0, "SSE4 requires the third source to be xmm0!");
|
||||
if (dst.GetId() != src1.GetId() && dst.GetId() == src2.GetId())
|
||||
pxAssertRel(0, "SSE4 auto mov would destroy the second source!");
|
||||
EmitSimdOp(info, dst, src1, src2, 0);
|
||||
}
|
||||
|
||||
}
|
||||
void EmitSIMD(SIMDInstructionInfo info, const xRegisterBase& dst, const xRegisterBase& src1, const xIndirectVoid& src2, const xRegisterBase& src3)
|
||||
{
|
||||
pxAssert(!info.is_mov);
|
||||
pxAssertMsg(!info.is_commutative, "I don't think any blend instructions are commutative...");
|
||||
if (x86Emitter::use_avx)
|
||||
{
|
||||
EmitSimdOp(info, dst, src1, src2, 1);
|
||||
xWrite8(src3.GetId());
|
||||
}
|
||||
else
|
||||
{
|
||||
pxAssertRel(src3.GetId() == 0, "SSE4 requires the third source to be xmm0!");
|
||||
EmitSimdOp(info, dst, src1, src2, 0);
|
||||
}
|
||||
}
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
// SimdPrefix - If the lower byte of the opcode is 0x38 or 0x3a, then the opcode is
|
||||
@@ -107,6 +272,19 @@ namespace x86Emitter
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
|
||||
void xImplSimd_2Arg::operator()(const xRegisterSSE& dst, const xRegisterSSE& src) const { EmitSIMD(info, dst, dst, src); }
|
||||
void xImplSimd_2Arg::operator()(const xRegisterSSE& dst, const xIndirectVoid& src) const { EmitSIMD(info, dst, dst, src); }
|
||||
void xImplSimd_2ArgImm::operator()(const xRegisterSSE& dst, const xRegisterSSE& src, u8 imm) const { EmitSIMD(info, dst, dst, src, imm); }
|
||||
void xImplSimd_2ArgImm::operator()(const xRegisterSSE& dst, const xIndirectVoid& src, u8 imm) const { EmitSIMD(info, dst, dst, src, imm); }
|
||||
void xImplSimd_3Arg::operator()(const xRegisterSSE& dst, const xRegisterSSE& src1, const xRegisterSSE& src2) const { EmitSIMD(info, dst, src1, src2); }
|
||||
void xImplSimd_3Arg::operator()(const xRegisterSSE& dst, const xRegisterSSE& src1, const xIndirectVoid& src2) const { EmitSIMD(info, dst, src1, src2); }
|
||||
void xImplSimd_3ArgImm::operator()(const xRegisterSSE& dst, const xRegisterSSE& src1, const xRegisterSSE& src2, u8 imm) const { EmitSIMD(info, dst, src1, src2, imm); }
|
||||
void xImplSimd_3ArgImm::operator()(const xRegisterSSE& dst, const xRegisterSSE& src1, const xIndirectVoid& src2, u8 imm) const { EmitSIMD(info, dst, src1, src2, imm); }
|
||||
void xImplSimd_3ArgCmp::operator()(const xRegisterSSE& dst, const xRegisterSSE& src1, const xRegisterSSE& src2, SSE2_ComparisonType imm) const { EmitSIMD(info, dst, src1, src2, imm); }
|
||||
void xImplSimd_3ArgCmp::operator()(const xRegisterSSE& dst, const xRegisterSSE& src1, const xIndirectVoid& src2, SSE2_ComparisonType imm) const { EmitSIMD(info, dst, src1, src2, imm); }
|
||||
void xImplSimd_4ArgBlend::operator()(const xRegisterSSE& dst, const xRegisterSSE& src1, const xRegisterSSE& src2, const xRegisterSSE& src3) const { EmitSIMD(info, dst, src1, src2, src3); }
|
||||
void xImplSimd_4ArgBlend::operator()(const xRegisterSSE& dst, const xRegisterSSE& src1, const xIndirectVoid& src2, const xRegisterSSE& src3) const { EmitSIMD(info, dst, src1, src2, src3); }
|
||||
|
||||
void xImplSimd_DestRegSSE::operator()(const xRegisterSSE& to, const xRegisterSSE& from) const { OpWriteSSE(Prefix, Opcode); }
|
||||
void xImplSimd_DestRegSSE::operator()(const xRegisterSSE& to, const xIndirectVoid& from) const { OpWriteSSE(Prefix, Opcode); }
|
||||
|
||||
|
||||
@@ -51,6 +51,8 @@
|
||||
thread_local u8* x86Ptr;
|
||||
thread_local XMMSSEType g_xmmtypes[iREGCNT_XMM] = {XMMT_INT};
|
||||
|
||||
bool x86Emitter::use_avx;
|
||||
|
||||
namespace x86Emitter
|
||||
{
|
||||
|
||||
@@ -502,6 +504,46 @@ const xRegister32
|
||||
EmitRex(w, r, x, b, reg1.IsExtended8Bit());
|
||||
}
|
||||
|
||||
void EmitRex(SIMDInstructionInfo info, const xRegisterBase& reg1, const xRegisterBase& reg2)
|
||||
{
|
||||
bool w = false;
|
||||
if (info.dst_w)
|
||||
w |= reg1.IsWide();
|
||||
if (info.src_w)
|
||||
w |= reg2.IsWide();
|
||||
bool r = reg1.IsExtended();
|
||||
bool x = false;
|
||||
bool b = reg2.IsExtended();
|
||||
EmitRex(w, r, x, b, reg2.IsExtended8Bit());
|
||||
}
|
||||
|
||||
void EmitRex(SIMDInstructionInfo info, const xRegisterBase& reg1, const xIndirectVoid& sib)
|
||||
{
|
||||
bool w = false;
|
||||
if (info.dst_w)
|
||||
w |= reg1.IsWide();
|
||||
if (info.src_w)
|
||||
w |= sib.IsWide();
|
||||
bool r = reg1.IsExtended();
|
||||
bool x = sib.Index.IsExtended();
|
||||
bool b = sib.Base.IsExtended();
|
||||
if (!NeedsSibMagic(sib))
|
||||
{
|
||||
b = x;
|
||||
x = false;
|
||||
}
|
||||
EmitRex(w, r, x, b, reg1.IsExtended8Bit());
|
||||
}
|
||||
|
||||
void EmitRex(SIMDInstructionInfo info, uint reg1, const xRegisterBase& reg2)
|
||||
{
|
||||
bool w = info.src_w ? reg2.IsWide() : false;
|
||||
bool r = false;
|
||||
bool x = false;
|
||||
bool b = reg2.IsExtended();
|
||||
EmitRex(w, r, x, b, reg2.IsExtended8Bit());
|
||||
}
|
||||
|
||||
// For use by instructions that are implicitly wide
|
||||
void EmitRexImplicitlyWide(const xRegisterBase& reg)
|
||||
{
|
||||
@@ -526,6 +568,89 @@ const xRegister32
|
||||
EmitRex(w, r, x, b);
|
||||
}
|
||||
|
||||
__emitinline static u8 GetVEXRXB(u32 ext, const xRegisterBase& src2)
|
||||
{
|
||||
return src2.IsExtended() << 5;
|
||||
}
|
||||
|
||||
__emitinline static u8 GetVEXRXB(const xRegisterBase& dst, const xIndirectVoid& src2)
|
||||
{
|
||||
bool r = dst.IsExtended();
|
||||
bool x = src2.Index.IsExtended();
|
||||
bool b = src2.Base.IsExtended();
|
||||
if (!NeedsSibMagic(src2))
|
||||
{
|
||||
b = x;
|
||||
x = false;
|
||||
}
|
||||
return (r << 7) | (x << 6) | (b << 5);
|
||||
}
|
||||
|
||||
__emitinline static u8 GetVEXRXB(const xRegisterBase& dst, const xRegisterBase& src2)
|
||||
{
|
||||
return (dst.IsExtended() << 7) | (src2.IsExtended() << 5);
|
||||
}
|
||||
|
||||
__emitinline static u8 GetL(const xRegisterBase& arg) { return arg.IsWideSIMD() ? 4 : 0; }
|
||||
__emitinline static u8 GetL(const xIndirectVoid& arg) { return 0; }
|
||||
__emitinline static u8 GetL(u32 ext) { return 0; }
|
||||
|
||||
__emitinline static u8 GetVEXW(const xRegisterBase& arg) { return arg.GetOperandSize() == 8 ? 0x80 : 0; }
|
||||
__emitinline static u8 GetVEXW(const xIndirectVoid& arg) { return arg.GetOperandSize() == 8 ? 0x80 : 0; }
|
||||
__emitinline static u8 GetVEXW(u32 ext) { return 0; }
|
||||
|
||||
template <typename D, typename S2>
|
||||
__emitinline void xOpWriteVEX(SIMDInstructionInfo info, D dst, u8 src1, const S2& src2, int extraRipOffset)
|
||||
{
|
||||
u8 m = static_cast<u8>(info.map);
|
||||
u8 p = static_cast<u8>(info.prefix);
|
||||
u8 w = 0;
|
||||
if (info.src_w || info.dst_w) {
|
||||
if (info.dst_w)
|
||||
w |= GetVEXW(dst);
|
||||
if (info.src_w)
|
||||
w |= GetVEXW(src2);
|
||||
} else {
|
||||
w = info.w_bit << 7;
|
||||
}
|
||||
u8 l = GetL(dst) | GetL(src2); // Needed for 256-bit movemask.
|
||||
u8 rxb = GetVEXRXB(dst, src2);
|
||||
u8 b2 = p | l | (src1 << 3);
|
||||
if (!w && info.map == SIMDInstructionInfo::Map::M0F && !(rxb & 0x7F))
|
||||
{
|
||||
// Can use a C5 VEX
|
||||
u8 b1 = rxb | b2;
|
||||
xWrite8(0xC5);
|
||||
xWrite8(b1 ^ 0xF8);
|
||||
xWrite8(info.opcode);
|
||||
}
|
||||
else
|
||||
{
|
||||
u8 b1 = rxb | m;
|
||||
b2 |= w;
|
||||
xWrite8(0xC4);
|
||||
xWrite8(b1 ^ 0xE0);
|
||||
xWrite8(b2 ^ 0x78);
|
||||
xWrite8(info.opcode);
|
||||
}
|
||||
EmitSibMagic(dst, src2, extraRipOffset);
|
||||
}
|
||||
|
||||
void EmitVEX(SIMDInstructionInfo info, const xRegisterBase& dst, u8 src1, const xRegisterBase& src2, int extraRipOffset)
|
||||
{
|
||||
xOpWriteVEX(info, dst, src1, src2, extraRipOffset);
|
||||
}
|
||||
|
||||
void EmitVEX(SIMDInstructionInfo info, const xRegisterBase& dst, u8 src1, const xIndirectVoid& src2, int extraRipOffset)
|
||||
{
|
||||
xOpWriteVEX(info, dst, src1, src2, extraRipOffset);
|
||||
}
|
||||
|
||||
void EmitVEX(SIMDInstructionInfo info, u32 ext, u8 dst, const xRegisterBase& src2, int extraRipOffset)
|
||||
{
|
||||
xOpWriteVEX(info, ext, dst, src2, extraRipOffset);
|
||||
}
|
||||
|
||||
|
||||
// --------------------------------------------------------------------------------------
|
||||
// xSetPtr / xAlignPtr / xGetPtr / xAdvancePtr
|
||||
|
||||
@@ -29,6 +29,9 @@ namespace x86Emitter
|
||||
static constexpr int SHADOW_STACK_SIZE = 0;
|
||||
#endif
|
||||
|
||||
/// This will switch all SSE instructions to generate AVX instructions instead
|
||||
extern bool use_avx;
|
||||
|
||||
extern void xWrite8(u8 val);
|
||||
extern void xWrite16(u16 val);
|
||||
extern void xWrite32(u32 val);
|
||||
@@ -1075,4 +1078,4 @@ extern const xRegister32
|
||||
#include "implement/jmpcall.h"
|
||||
|
||||
#include "implement/bmi.h"
|
||||
#include "implement/avx.h"
|
||||
#include "implement/avx.h"
|
||||
|
||||
@@ -52,6 +52,7 @@
|
||||
#include "common/StringUtil.h"
|
||||
#include "common/Threading.h"
|
||||
#include "common/Timer.h"
|
||||
#include "common/emitter/x86emitter.h"
|
||||
|
||||
#include "IconsFontAwesome6.h"
|
||||
#include "IconsPromptFont.h"
|
||||
@@ -391,6 +392,10 @@ bool VMManager::Internal::CPUThreadInitialize()
|
||||
if (!cpuinfo_initialize())
|
||||
Console.Error("cpuinfo_initialize() failed.");
|
||||
|
||||
#ifdef _M_X86
|
||||
x86Emitter::use_avx = g_cpu.vectorISA >= ProcessorFeatures::VectorISA::AVX;
|
||||
#endif
|
||||
|
||||
LogCPUCapabilities();
|
||||
|
||||
if (!SysMemory::Allocate())
|
||||
|
||||
Reference in New Issue
Block a user