From 7aa9664d20c111434a4704d9f67975088986927f Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Wed, 29 Dec 2021 19:46:11 -0800 Subject: [PATCH] x64jit: Add AVX2-only instructions. --- Common/x64Emitter.cpp | 61 +++++++++++++++++++++++++++++++++++++++++++ Common/x64Emitter.h | 44 +++++++++++++++++++++++++++++++ 2 files changed, 105 insertions(+) diff --git a/Common/x64Emitter.cpp b/Common/x64Emitter.cpp index 1f9cce376d..df934fe61b 100644 --- a/Common/x64Emitter.cpp +++ b/Common/x64Emitter.cpp @@ -2201,6 +2201,67 @@ void XEmitter::VZEROUPPER() { Write8(0x77); } +void XEmitter::VEXTRACTI128(OpArg arg, X64Reg regOp1, u8 subreg) { WriteAVX2Op(256, 0x66, 0x3A39, regOp1, arg, 1); Write8(subreg); } +void XEmitter::VINSERTI128(X64Reg regOp1, X64Reg regOp2, OpArg arg, u8 subreg) { WriteAVX2Op(256, 0x66, 0x3A38, regOp1, regOp2, arg, 1); Write8(subreg); } +void XEmitter::VPBLENDD(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg, u8 mask) { WriteAVX2Op(bits, 0x66, 0x3A02, regOp1, regOp2, arg, 1); Write8(mask); } +void XEmitter::VPBROADCASTB(int bits, X64Reg regOp1, OpArg arg) { WriteAVX2Op(bits, 0x66, 0x3878, regOp1, arg); } +void XEmitter::VPBROADCASTW(int bits, X64Reg regOp1, OpArg arg) { WriteAVX2Op(bits, 0x66, 0x3879, regOp1, arg); } +void XEmitter::VPBROADCASTD(int bits, X64Reg regOp1, OpArg arg) { WriteAVX2Op(bits, 0x66, 0x3858, regOp1, arg); } +void XEmitter::VPBROADCASTQ(int bits, X64Reg regOp1, OpArg arg) { WriteAVX2Op(bits, 0x66, 0x3859, regOp1, arg); } +void XEmitter::VBROADCASTI128(X64Reg regOp1, OpArg arg) { + _assert_msg_(!arg.IsSimpleReg(), "VBROADCASTI128 must come from memory"); + WriteAVX2Op(256, 0x66, 0x385A, regOp1, arg); +} +void XEmitter::VPERM2I128(X64Reg regOp1, X64Reg regOp2, OpArg arg, u8 mask) { WriteAVX2Op(256, 0x66, 0x3A46, regOp1, regOp2, arg, 1); Write8(mask); } +void XEmitter::VPERMD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVX2Op(256, 0x66, 0x3836, regOp1, regOp2, arg); } +void XEmitter::VPERMPS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVX2Op(256, 0x66, 0x3816, regOp1, regOp2, arg); } +void XEmitter::VPERMPD(X64Reg regOp1, OpArg arg, u8 shuffle) { WriteAVX2Op(256, 0x66, 0x3A01, regOp1, arg, 1, 1); Write8(shuffle); } +void XEmitter::VPERMQ(X64Reg regOp1, OpArg arg, u8 shuffle) { WriteAVX2Op(256, 0x66, 0x3A00, regOp1, arg, 1, 1); Write8(shuffle); } + +void XEmitter::VPMASKMOVD(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVX2Op(bits, 0x66, 0x388C, regOp1, regOp2, arg); } +void XEmitter::VPMASKMOVD(int bits, OpArg arg, X64Reg regOp1, X64Reg regOp2) { WriteAVX2Op(bits, 0x66, 0x388E, regOp1, regOp2, arg); } +void XEmitter::VPMASKMOVQ(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVX2Op(bits, 0x66, 0x388C, regOp1, regOp2, arg, 0, 1); } +void XEmitter::VPMASKMOVQ(int bits, OpArg arg, X64Reg regOp1, X64Reg regOp2) { WriteAVX2Op(bits, 0x66, 0x388E, regOp1, regOp2, arg, 0, 1); } + +void XEmitter::VGATHERDPS(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2) { + _assert_msg_(regOp1 != regOp2 && !arg.IsIndexedReg(regOp1) && !arg.IsIndexedReg(regOp2), "VGATHER cannot have overlapped registers"); + WriteAVX2Op(bits, 0x66, 0x3892, regOp1, regOp2, arg); +} +void XEmitter::VGATHERDPD(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2) { + _assert_msg_(regOp1 != regOp2 && !arg.IsIndexedReg(regOp1) && !arg.IsIndexedReg(regOp2), "VGATHER cannot have overlapped registers"); + WriteAVX2Op(bits, 0x66, 0x3893, regOp1, regOp2, arg); +} +void XEmitter::VGATHERQPS(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2) { + _assert_msg_(regOp1 != regOp2 && !arg.IsIndexedReg(regOp1) && !arg.IsIndexedReg(regOp2), "VGATHER cannot have overlapped registers"); + WriteAVX2Op(bits, 0x66, 0x3892, regOp1, regOp2, arg); +} +void XEmitter::VGATHERQPD(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2) { + _assert_msg_(regOp1 != regOp2 && !arg.IsIndexedReg(regOp1) && !arg.IsIndexedReg(regOp2), "VGATHER cannot have overlapped registers"); + WriteAVX2Op(bits, 0x66, 0x3893, regOp1, regOp2, arg); +} +void XEmitter::VGATHERDD(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2) { + _assert_msg_(regOp1 != regOp2 && !arg.IsIndexedReg(regOp1) && !arg.IsIndexedReg(regOp2), "VGATHER cannot have overlapped registers"); + WriteAVX2Op(bits, 0x66, 0x3890, regOp1, regOp2, arg); +} +void XEmitter::VGATHERQD(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2) { + _assert_msg_(regOp1 != regOp2 && !arg.IsIndexedReg(regOp1) && !arg.IsIndexedReg(regOp2), "VGATHER cannot have overlapped registers"); + WriteAVX2Op(bits, 0x66, 0x3891, regOp1, regOp2, arg); +} +void XEmitter::VGATHERDQ(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2) { + _assert_msg_(regOp1 != regOp2 && !arg.IsIndexedReg(regOp1) && !arg.IsIndexedReg(regOp2), "VGATHER cannot have overlapped registers"); + WriteAVX2Op(bits, 0x66, 0x3890, regOp1, regOp2, arg, 0, 1); +} +void XEmitter::VGATHERQQ(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2) { + _assert_msg_(regOp1 != regOp2 && !arg.IsIndexedReg(regOp1) && !arg.IsIndexedReg(regOp2), "VGATHER cannot have overlapped registers"); + WriteAVX2Op(bits, 0x66, 0x3891, regOp1, regOp2, arg, 0, 1); +} + +void XEmitter::VPSLLVD(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVX2Op(bits, 0x66, 0x3847, regOp1, regOp2, arg); } +void XEmitter::VPSLLVQ(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVX2Op(bits, 0x66, 0x3847, regOp1, regOp2, arg, 0, 1); } +void XEmitter::VPSRAVD(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVX2Op(bits, 0x66, 0x3846, regOp1, regOp2, arg); } +void XEmitter::VPSRLVD(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVX2Op(bits, 0x66, 0x3845, regOp1, regOp2, arg); } +void XEmitter::VPSRLVQ(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVX2Op(bits, 0x66, 0x3845, regOp1, regOp2, arg, 0, 1); } + void XEmitter::VFMADD132PS(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(bits, 0x66, 0x3898, regOp1, regOp2, arg); } void XEmitter::VFMADD213PS(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(bits, 0x66, 0x38A8, regOp1, regOp2, arg); } void XEmitter::VFMADD231PS(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(bits, 0x66, 0x38B8, regOp1, regOp2, arg); } diff --git a/Common/x64Emitter.h b/Common/x64Emitter.h index 3bf696c7fa..38856019aa 100644 --- a/Common/x64Emitter.h +++ b/Common/x64Emitter.h @@ -189,6 +189,13 @@ struct OpArg return false; return GetSimpleReg() == reg; } + bool IsIndexedReg(X64Reg reg) const { + if (scale >= 1 && scale <= 4) + return indexReg == reg; + if (scale >= 32 && scale <= 40) + return indexReg == reg; + return false; + } bool CanDoOpWith(const OpArg &other) const { @@ -1205,6 +1212,43 @@ public: void VZEROALL(); void VZEROUPPER(); + // AVX2 + void VEXTRACTI128(OpArg arg, X64Reg regOp1, u8 subreg); + void VINSERTI128(X64Reg regOp1, X64Reg regOp2, OpArg arg, u8 subreg); + void VPBLENDD(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg, u8 mask); + void VPBROADCASTB(int bits, X64Reg regOp1, OpArg arg); + void VPBROADCASTW(int bits, X64Reg regOp1, OpArg arg); + void VPBROADCASTD(int bits, X64Reg regOp1, OpArg arg); + void VPBROADCASTQ(int bits, X64Reg regOp1, OpArg arg); + // Must be memory for this one. + void VBROADCASTI128(X64Reg regOp1, OpArg arg); + void VPERM2I128(X64Reg regOp1, X64Reg regOp2, OpArg arg, u8 mask); + void VPERMD(X64Reg dest, X64Reg shuffle, OpArg src); + void VPERMPS(X64Reg dest, X64Reg shuffle, OpArg src); + void VPERMPD(X64Reg dest, OpArg src, u8 shuffle); + void VPERMQ(X64Reg dest, OpArg src, u8 shuffle); + + void VPMASKMOVD(int bits, X64Reg regOp1, X64Reg mask, OpArg arg); + void VPMASKMOVD(int bits, OpArg arg, X64Reg mask, X64Reg regOp2); + void VPMASKMOVQ(int bits, X64Reg regOp1, X64Reg mask, OpArg arg); + void VPMASKMOVQ(int bits, OpArg arg, X64Reg mask, X64Reg regOp2); + + // Use an XMM for the scaled reg in MComplex. + void VGATHERDPS(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2); + void VGATHERDPD(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2); + void VGATHERQPS(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2); + void VGATHERQPD(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2); + void VGATHERDD(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2); + void VGATHERQD(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2); + void VGATHERDQ(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2); + void VGATHERQQ(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2); + + void VPSLLVD(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg); + void VPSLLVQ(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg); + void VPSRAVD(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg); + void VPSRLVD(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg); + void VPSRLVQ(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg); + // FMA3 void VFMADD132PS(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg); void VFMADD213PS(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg);