mirror of
https://github.com/PCSX2/pcsx2.git
synced 2026-01-31 01:15:24 +01:00
Common: Switch integer shuffle/insert/extract instructions to auto SSE/AVX
This commit is contained in:
committed by
TellowKrinkle
parent
c9ddab444a
commit
0c8c798051
@@ -31,17 +31,17 @@ namespace x86Emitter
|
||||
{
|
||||
// Copies doublewords from src and inserts them into dest at dword locations selected
|
||||
// with the order operand (8 bit immediate).
|
||||
const xImplSimd_DestRegImmSSE D;
|
||||
const xImplSimd_2ArgImm D;
|
||||
|
||||
// Copies words from the low quadword of src and inserts them into the low quadword
|
||||
// of dest at word locations selected with the order operand (8 bit immediate).
|
||||
// The high quadword of src is copied to the high quadword of dest.
|
||||
const xImplSimd_DestRegImmSSE LW;
|
||||
const xImplSimd_2ArgImm LW;
|
||||
|
||||
// Copies words from the high quadword of src and inserts them into the high quadword
|
||||
// of dest at word locations selected with the order operand (8 bit immediate).
|
||||
// The low quadword of src is copied to the low quadword of dest.
|
||||
const xImplSimd_DestRegImmSSE HW;
|
||||
const xImplSimd_2ArgImm HW;
|
||||
|
||||
// [sSSE-3] Performs in-place shuffles of bytes in dest according to the shuffle
|
||||
// control mask in src. If the most significant bit (bit[7]) of each byte of the
|
||||
@@ -50,42 +50,7 @@ namespace x86Emitter
|
||||
// byte in dest. The value of each index is the least significant 4 bits (128-bit
|
||||
// operation) or 3 bits (64-bit operation) of the shuffle control byte.
|
||||
//
|
||||
const xImplSimd_DestRegEither B;
|
||||
|
||||
// below is my test bed for a new system, free of subclasses. Was supposed to improve intellisense
|
||||
// but it doesn't (makes it worse). Will try again in MSVC 2010. --air
|
||||
|
||||
#if 0
|
||||
// Copies words from src and inserts them into dest at word locations selected with
|
||||
// the order operand (8 bit immediate).
|
||||
|
||||
// Copies doublewords from src and inserts them into dest at dword locations selected
|
||||
// with the order operand (8 bit immediate).
|
||||
void D( const xRegisterSSE& to, const xRegisterSSE& from, u8 imm ) const { xOpWrite0F( 0x66, 0x70, to, from, imm ); }
|
||||
void D( const xRegisterSSE& to, const xIndirectVoid& from, u8 imm ) const { xOpWrite0F( 0x66, 0x70, to, from, imm ); }
|
||||
|
||||
// Copies words from the low quadword of src and inserts them into the low quadword
|
||||
// of dest at word locations selected with the order operand (8 bit immediate).
|
||||
// The high quadword of src is copied to the high quadword of dest.
|
||||
void LW( const xRegisterSSE& to, const xRegisterSSE& from, u8 imm ) const { xOpWrite0F( 0xf2, 0x70, to, from, imm ); }
|
||||
void LW( const xRegisterSSE& to, const xIndirectVoid& from, u8 imm ) const { xOpWrite0F( 0xf2, 0x70, to, from, imm ); }
|
||||
|
||||
// Copies words from the high quadword of src and inserts them into the high quadword
|
||||
// of dest at word locations selected with the order operand (8 bit immediate).
|
||||
// The low quadword of src is copied to the low quadword of dest.
|
||||
void HW( const xRegisterSSE& to, const xRegisterSSE& from, u8 imm ) const { xOpWrite0F( 0xf3, 0x70, to, from, imm ); }
|
||||
void HW( const xRegisterSSE& to, const xIndirectVoid& from, u8 imm ) const { xOpWrite0F( 0xf3, 0x70, to, from, imm ); }
|
||||
|
||||
// [sSSE-3] Performs in-place shuffles of bytes in dest according to the shuffle
|
||||
// control mask in src. If the most significant bit (bit[7]) of each byte of the
|
||||
// shuffle control mask is set, then constant zero is written in the result byte.
|
||||
// Each byte in the shuffle control mask forms an index to permute the corresponding
|
||||
// byte in dest. The value of each index is the least significant 4 bits (128-bit
|
||||
// operation) or 3 bits (64-bit operation) of the shuffle control byte.
|
||||
//
|
||||
void B( const xRegisterSSE& to, const xRegisterSSE& from ) const { OpWriteSSE( 0x66, 0x0038 ); }
|
||||
void B( const xRegisterSSE& to, const xIndirectVoid& from ) const { OpWriteSSE( 0x66, 0x0038 ); }
|
||||
#endif
|
||||
const xImplSimd_3Arg B;
|
||||
};
|
||||
|
||||
// --------------------------------------------------------------------------------------
|
||||
@@ -183,17 +148,25 @@ namespace x86Emitter
|
||||
//
|
||||
struct xImplSimd_PInsert
|
||||
{
|
||||
void B(const xRegisterSSE& to, const xRegister32& from, u8 imm8) const;
|
||||
void B(const xRegisterSSE& to, const xIndirect32& from, u8 imm8) const;
|
||||
void B(const xRegisterSSE& dst, const xRegister32& src, u8 imm8) const { B(dst, dst, src, imm8); }
|
||||
void B(const xRegisterSSE& dst, const xIndirect8& src, u8 imm8) const { B(dst, dst, src, imm8); }
|
||||
void B(const xRegisterSSE& dst, const xRegisterSSE& src1, const xRegister32& src2, u8 imm8) const;
|
||||
void B(const xRegisterSSE& dst, const xRegisterSSE& src1, const xIndirect8& src2, u8 imm8) const;
|
||||
|
||||
void W(const xRegisterSSE& to, const xRegister32& from, u8 imm8) const;
|
||||
void W(const xRegisterSSE& to, const xIndirect32& from, u8 imm8) const;
|
||||
void W(const xRegisterSSE& dst, const xRegister32& src, u8 imm8) const { W(dst, dst, src, imm8); }
|
||||
void W(const xRegisterSSE& dst, const xIndirect16& src, u8 imm8) const { W(dst, dst, src, imm8); }
|
||||
void W(const xRegisterSSE& dst, const xRegisterSSE& src1, const xRegister32& src2, u8 imm8) const;
|
||||
void W(const xRegisterSSE& dst, const xRegisterSSE& src1, const xIndirect16& src2, u8 imm8) const;
|
||||
|
||||
void D(const xRegisterSSE& to, const xRegister32& from, u8 imm8) const;
|
||||
void D(const xRegisterSSE& to, const xIndirect32& from, u8 imm8) const;
|
||||
void D(const xRegisterSSE& dst, const xRegister32& src, u8 imm8) const { D(dst, dst, src, imm8); }
|
||||
void D(const xRegisterSSE& dst, const xIndirect32& src, u8 imm8) const { D(dst, dst, src, imm8); }
|
||||
void D(const xRegisterSSE& dst, const xRegisterSSE& src1, const xRegister32& src2, u8 imm8) const;
|
||||
void D(const xRegisterSSE& dst, const xRegisterSSE& src1, const xIndirect32& src2, u8 imm8) const;
|
||||
|
||||
void Q(const xRegisterSSE& to, const xRegister64& from, u8 imm8) const;
|
||||
void Q(const xRegisterSSE& to, const xIndirect64& from, u8 imm8) const;
|
||||
void Q(const xRegisterSSE& dst, const xRegister64& src, u8 imm8) const { Q(dst, dst, src, imm8); }
|
||||
void Q(const xRegisterSSE& dst, const xIndirect64& src, u8 imm8) const { Q(dst, dst, src, imm8); }
|
||||
void Q(const xRegisterSSE& dst, const xRegisterSSE& src1, const xRegister64& src2, u8 imm8) const;
|
||||
void Q(const xRegisterSSE& dst, const xRegisterSSE& src1, const xIndirect64& src2, u8 imm8) const;
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
@@ -206,8 +179,8 @@ namespace x86Emitter
|
||||
// [SSE-4.1] Copies the byte element specified by imm8 from src to dest. The upper bits
|
||||
// of dest are zero-extended (cleared). This can be used to extract any single packed
|
||||
// byte value from src into an x86 32 bit register.
|
||||
void B(const xRegister32& to, const xRegisterSSE& from, u8 imm8) const;
|
||||
void B(const xIndirect32& dest, const xRegisterSSE& from, u8 imm8) const;
|
||||
void B(const xRegister32& dst, const xRegisterSSE& src, u8 imm8) const;
|
||||
void B(const xIndirect8& dst, const xRegisterSSE& src, u8 imm8) const;
|
||||
|
||||
// Copies the word element specified by imm8 from src to dest. The upper bits
|
||||
// of dest are zero-extended (cleared). This can be used to extract any single packed
|
||||
@@ -215,16 +188,17 @@ namespace x86Emitter
|
||||
//
|
||||
// [SSE-4.1] Note: Indirect memory forms of this instruction are an SSE-4.1 extension!
|
||||
//
|
||||
void W(const xRegister32& to, const xRegisterSSE& from, u8 imm8) const;
|
||||
void W(const xIndirect32& dest, const xRegisterSSE& from, u8 imm8) const;
|
||||
void W(const xRegister32& dst, const xRegisterSSE& src, u8 imm8) const;
|
||||
void W(const xIndirect16& dst, const xRegisterSSE& src, u8 imm8) const;
|
||||
|
||||
// [SSE-4.1] Copies the dword element specified by imm8 from src to dest. This can be
|
||||
// used to extract any single packed dword value from src into an x86 32 bit register.
|
||||
void D(const xRegister32& to, const xRegisterSSE& from, u8 imm8) const;
|
||||
void D(const xIndirect32& dest, const xRegisterSSE& from, u8 imm8) const;
|
||||
void D(const xRegister32& dst, const xRegisterSSE& src, u8 imm8) const;
|
||||
void D(const xIndirect32& dst, const xRegisterSSE& src, u8 imm8) const;
|
||||
|
||||
// Insert a qword integer value from r/m64 into the xmm1 at the destination element specified by imm8.
|
||||
void Q(const xRegister64& to, const xRegisterSSE& from, u8 imm8) const;
|
||||
void Q(const xIndirect64& dest, const xRegisterSSE& from, u8 imm8) const;
|
||||
// [SSE-4.1] Copies the dword element specified by imm8 from src to dest. This can be
|
||||
// used to extract any single packed dword value from src into an x86 64 bit register.
|
||||
void Q(const xRegister64& dst, const xRegisterSSE& src, u8 imm8) const;
|
||||
void Q(const xIndirect64& dst, const xRegisterSSE& src, u8 imm8) const;
|
||||
};
|
||||
} // namespace x86Emitter
|
||||
|
||||
@@ -562,39 +562,38 @@ namespace x86Emitter
|
||||
EmitSIMD(SIMDInstructionInfo(0xc6).d().p66(), dst, src1, src2, selector);
|
||||
}
|
||||
|
||||
void xImplSimd_PInsert::B(const xRegisterSSE& to, const xRegister32& from, u8 imm8) const { xOpWrite0F(0x66, 0x203a, to, from, imm8); }
|
||||
void xImplSimd_PInsert::B(const xRegisterSSE& to, const xIndirect32& from, u8 imm8) const { xOpWrite0F(0x66, 0x203a, to, from, imm8); }
|
||||
void xImplSimd_PInsert::B(const xRegisterSSE& dst, const xRegisterSSE& src1, const xRegister32& src2, u8 imm8) const { EmitSIMD(SIMDInstructionInfo(0x20).i().p66().m0f3a(), dst, src1, src2, imm8); }
|
||||
void xImplSimd_PInsert::B(const xRegisterSSE& dst, const xRegisterSSE& src1, const xIndirect8& src2, u8 imm8) const { EmitSIMD(SIMDInstructionInfo(0x20).i().p66().m0f3a(), dst, src1, src2, imm8); }
|
||||
|
||||
void xImplSimd_PInsert::W(const xRegisterSSE& to, const xRegister32& from, u8 imm8) const { xOpWrite0F(0x66, 0xc4, to, from, imm8); }
|
||||
void xImplSimd_PInsert::W(const xRegisterSSE& to, const xIndirect32& from, u8 imm8) const { xOpWrite0F(0x66, 0xc4, to, from, imm8); }
|
||||
void xImplSimd_PInsert::W(const xRegisterSSE& dst, const xRegisterSSE& src1, const xRegister32& src2, u8 imm8) const { EmitSIMD(SIMDInstructionInfo(0xc4).i().p66(), dst, src1, src2, imm8); }
|
||||
void xImplSimd_PInsert::W(const xRegisterSSE& dst, const xRegisterSSE& src1, const xIndirect16& src2, u8 imm8) const { EmitSIMD(SIMDInstructionInfo(0xc4).i().p66(), dst, src1, src2, imm8); }
|
||||
|
||||
void xImplSimd_PInsert::D(const xRegisterSSE& to, const xRegister32& from, u8 imm8) const { xOpWrite0F(0x66, 0x223a, to, from, imm8); }
|
||||
void xImplSimd_PInsert::D(const xRegisterSSE& to, const xIndirect32& from, u8 imm8) const { xOpWrite0F(0x66, 0x223a, to, from, imm8); }
|
||||
void xImplSimd_PInsert::D(const xRegisterSSE& dst, const xRegisterSSE& src1, const xRegister32& src2, u8 imm8) const { EmitSIMD(SIMDInstructionInfo(0x22).i().p66().m0f3a().srcw(), dst, src1, src2, imm8); }
|
||||
void xImplSimd_PInsert::D(const xRegisterSSE& dst, const xRegisterSSE& src1, const xIndirect32& src2, u8 imm8) const { EmitSIMD(SIMDInstructionInfo(0x22).i().p66().m0f3a().srcw(), dst, src1, src2, imm8); }
|
||||
|
||||
void xImplSimd_PInsert::Q(const xRegisterSSE& to, const xRegister64& from, u8 imm8) const { xOpWrite0F(0x66, 0x223a, to, from, imm8); }
|
||||
void xImplSimd_PInsert::Q(const xRegisterSSE& to, const xIndirect64& from, u8 imm8) const { xOpWrite0F(0x66, 0x223a, to, from, imm8); }
|
||||
void xImplSimd_PInsert::Q(const xRegisterSSE& dst, const xRegisterSSE& src1, const xRegister64& src2, u8 imm8) const { EmitSIMD(SIMDInstructionInfo(0x22).i().p66().m0f3a().srcw(), dst, src1, src2, imm8); }
|
||||
void xImplSimd_PInsert::Q(const xRegisterSSE& dst, const xRegisterSSE& src1, const xIndirect64& src2, u8 imm8) const { EmitSIMD(SIMDInstructionInfo(0x22).i().p66().m0f3a().srcw(), dst, src1, src2, imm8); }
|
||||
|
||||
void SimdImpl_PExtract::B(const xRegister32& to, const xRegisterSSE& from, u8 imm8) const { xOpWrite0F(0x66, 0x143a, from, to, imm8); }
|
||||
void SimdImpl_PExtract::B(const xIndirect32& dest, const xRegisterSSE& from, u8 imm8) const { xOpWrite0F(0x66, 0x143a, from, dest, imm8); }
|
||||
void SimdImpl_PExtract::B(const xRegister32& dst, const xRegisterSSE& src, u8 imm8) const { EmitSIMD(SIMDInstructionInfo(0x14).mov().p66().m0f3a(), src, src, dst, imm8); }
|
||||
void SimdImpl_PExtract::B(const xIndirect8& dst, const xRegisterSSE& src, u8 imm8) const { EmitSIMD(SIMDInstructionInfo(0x14).mov().p66().m0f3a(), src, src, dst, imm8); }
|
||||
|
||||
void SimdImpl_PExtract::W(const xRegister32& to, const xRegisterSSE& from, u8 imm8) const { xOpWrite0F(0x66, 0xc5, from, to, imm8); }
|
||||
void SimdImpl_PExtract::W(const xIndirect32& dest, const xRegisterSSE& from, u8 imm8) const { xOpWrite0F(0x66, 0x153a, from, dest, imm8); }
|
||||
void SimdImpl_PExtract::W(const xRegister32& dst, const xRegisterSSE& src, u8 imm8) const { EmitSIMD(SIMDInstructionInfo(0xc5).mov().p66(), dst, dst, src, imm8); }
|
||||
void SimdImpl_PExtract::W(const xIndirect16& dst, const xRegisterSSE& src, u8 imm8) const { EmitSIMD(SIMDInstructionInfo(0x15).mov().p66().m0f3a(), src, src, dst, imm8); }
|
||||
|
||||
void SimdImpl_PExtract::D(const xRegister32& to, const xRegisterSSE& from, u8 imm8) const { xOpWrite0F(0x66, 0x163a, from, to, imm8); }
|
||||
void SimdImpl_PExtract::D(const xIndirect32& dest, const xRegisterSSE& from, u8 imm8) const { xOpWrite0F(0x66, 0x163a, from, dest, imm8); }
|
||||
void SimdImpl_PExtract::D(const xRegister32& dst, const xRegisterSSE& src, u8 imm8) const { EmitSIMD(SIMDInstructionInfo(0x16).mov().p66().m0f3a().srcw(), src, src, dst, imm8); }
|
||||
void SimdImpl_PExtract::D(const xIndirect32& dst, const xRegisterSSE& src, u8 imm8) const { EmitSIMD(SIMDInstructionInfo(0x16).mov().p66().m0f3a().srcw(), src, src, dst, imm8); }
|
||||
|
||||
void SimdImpl_PExtract::Q(const xRegister64& to, const xRegisterSSE& from, u8 imm8) const { xOpWrite0F(0x66, 0x163a, from, to, imm8); }
|
||||
void SimdImpl_PExtract::Q(const xIndirect64& dest, const xRegisterSSE& from, u8 imm8) const { xOpWrite0F(0x66, 0x163a, from, dest, imm8); }
|
||||
void SimdImpl_PExtract::Q(const xRegister64& dst, const xRegisterSSE& src, u8 imm8) const { EmitSIMD(SIMDInstructionInfo(0x16).mov().p66().m0f3a().srcw(), src, src, dst, imm8); }
|
||||
void SimdImpl_PExtract::Q(const xIndirect64& dst, const xRegisterSSE& src, u8 imm8) const { EmitSIMD(SIMDInstructionInfo(0x16).mov().p66().m0f3a().srcw(), src, src, dst, imm8); }
|
||||
|
||||
const xImplSimd_Shuffle xSHUF = {};
|
||||
|
||||
const xImplSimd_PShuffle xPSHUF =
|
||||
{
|
||||
{0x66, 0x70}, // D
|
||||
{0xf2, 0x70}, // LW
|
||||
{0xf3, 0x70}, // HW
|
||||
|
||||
{0x66, 0x0038}, // B
|
||||
{
|
||||
{SIMDInstructionInfo(0x70).i().p66()}, // D
|
||||
{SIMDInstructionInfo(0x70).i().pf2()}, // LW
|
||||
{SIMDInstructionInfo(0x70).i().pf3()}, // HW
|
||||
{SIMDInstructionInfo(0x00).i().p66().m0f38()}, // B
|
||||
};
|
||||
|
||||
const SimdImpl_PUnpack xPUNPCK =
|
||||
|
||||
@@ -292,6 +292,27 @@ TEST(CodegenTests, SSETest)
|
||||
CODEGEN_TEST(xEXTRACTPS(ptr32[r9], xmm3, 3), "66 41 0f 3a 17 19 03");
|
||||
CODEGEN_TEST(xEXTRACTPS(ptr32[base], xmm1, 2), "66 0f 3a 17 0d f6 ff ff ff 02");
|
||||
|
||||
CODEGEN_TEST(xPSHUF.D(xmm2, ptr[r8], 0), "66 41 0f 70 10 00");
|
||||
CODEGEN_TEST(xPSHUF.LW(xmm3, xmm8, 1), "f2 41 0f 70 d8 01");
|
||||
CODEGEN_TEST(xPSHUF.HW(xmm4, xmm2, 8), "f3 0f 70 e2 08");
|
||||
CODEGEN_TEST(xPSHUF.B(xmm2, ptr[r8]), "66 41 0f 38 00 10");
|
||||
CODEGEN_TEST(xPINSR.B(xmm1, ebx, 1), "66 0f 3a 20 cb 01");
|
||||
CODEGEN_TEST(xPINSR.W(xmm1, ebx, 1), "66 0f c4 cb 01");
|
||||
CODEGEN_TEST(xPINSR.D(xmm1, ebx, 1), "66 0f 3a 22 cb 01");
|
||||
CODEGEN_TEST(xPINSR.Q(xmm1, rbx, 1), "66 48 0f 3a 22 cb 01");
|
||||
CODEGEN_TEST(xPINSR.B(xmm9, ptr8[rax], 1), "66 44 0f 3a 20 08 01");
|
||||
CODEGEN_TEST(xPINSR.W(xmm9, ptr16[rax], 1), "66 44 0f c4 08 01");
|
||||
CODEGEN_TEST(xPINSR.D(xmm9, ptr32[rax], 1), "66 44 0f 3a 22 08 01");
|
||||
CODEGEN_TEST(xPINSR.Q(xmm9, ptr64[rax], 1), "66 4c 0f 3a 22 08 01");
|
||||
CODEGEN_TEST(xPEXTR.B(ebx, xmm1, 1), "66 0f 3a 14 cb 01");
|
||||
CODEGEN_TEST(xPEXTR.W(ebx, xmm1, 1), "66 0f c5 d9 01");
|
||||
CODEGEN_TEST(xPEXTR.D(ebx, xmm1, 1), "66 0f 3a 16 cb 01");
|
||||
CODEGEN_TEST(xPEXTR.Q(rbx, xmm1, 1), "66 48 0f 3a 16 cb 01");
|
||||
CODEGEN_TEST(xPEXTR.B(ptr8[rax], xmm9, 1), "66 44 0f 3a 14 08 01");
|
||||
CODEGEN_TEST(xPEXTR.W(ptr16[rax], xmm9, 1), "66 44 0f 3a 15 08 01");
|
||||
CODEGEN_TEST(xPEXTR.D(ptr32[rax], xmm9, 1), "66 44 0f 3a 16 08 01");
|
||||
CODEGEN_TEST(xPEXTR.Q(ptr64[rax], xmm9, 1), "66 4c 0f 3a 16 08 01");
|
||||
|
||||
CODEGEN_TEST(xMOVAPS(xmm0, xmm1), "0f 28 c1");
|
||||
CODEGEN_TEST(xMOVAPS(xmm8, xmm9), "45 0f 28 c1");
|
||||
CODEGEN_TEST(xMOVUPS(xmm8, ptr128[r8+r9]), "47 0f 10 04 08");
|
||||
@@ -306,15 +327,6 @@ TEST(CodegenTests, SSETest)
|
||||
CODEGEN_TEST(xMOVD(r10, xmm1), "66 49 0f 7e ca");
|
||||
CODEGEN_TEST(xMOVD(rax, xmm10), "66 4c 0f 7e d0");
|
||||
CODEGEN_TEST(xMOVD(r10, xmm10), "66 4d 0f 7e d2");
|
||||
CODEGEN_TEST(xPINSR.B(xmm0, ebx, 1), "66 0f 3a 20 c3 01");
|
||||
CODEGEN_TEST(xPINSR.W(xmm0, ebx, 1), "66 0f c4 c3 01");
|
||||
CODEGEN_TEST(xPINSR.D(xmm0, ebx, 1), "66 0f 3a 22 c3 01");
|
||||
CODEGEN_TEST(xPINSR.Q(xmm0, rbx, 1), "66 48 0f 3a 22 c3 01");
|
||||
CODEGEN_TEST(xPEXTR.B(ebx, xmm0, 1), "66 0f 3a 14 c3 01");
|
||||
CODEGEN_TEST(xPEXTR.W(ebx, xmm0, 1), "66 0f c5 c3 01");
|
||||
CODEGEN_TEST(xPEXTR.D(ebx, xmm0, 1), "66 0f 3a 16 c3 01");
|
||||
CODEGEN_TEST(xPEXTR.Q(rbx, xmm0, 1), "66 48 0f 3a 16 c3 01");
|
||||
CODEGEN_TEST(xPEXTR.Q(ptr64[rax], xmm0, 1), "66 48 0f 3a 16 00 01");
|
||||
}
|
||||
|
||||
TEST(CodegenTests, AVXTest)
|
||||
@@ -464,6 +476,27 @@ TEST(CodegenTests, AVXTest)
|
||||
CODEGEN_TEST(xEXTRACTPS(ptr32[r9], xmm3, 3), "c4 c3 79 17 19 03");
|
||||
CODEGEN_TEST(xEXTRACTPS(ptr32[base], xmm1, 2), "c4 e3 79 17 0d f6 ff ff ff 02");
|
||||
|
||||
CODEGEN_TEST(xPSHUF.D(xmm2, ptr[r8], 0), "c4 c1 79 70 10 00");
|
||||
CODEGEN_TEST(xPSHUF.LW(xmm3, xmm8, 1), "c4 c1 7b 70 d8 01");
|
||||
CODEGEN_TEST(xPSHUF.HW(xmm4, xmm2, 8), "c5 fa 70 e2 08");
|
||||
CODEGEN_TEST(xPSHUF.B(xmm2, ptr[r8]), "c4 c2 69 00 10");
|
||||
CODEGEN_TEST(xPINSR.B(xmm1, ebx, 1), "c4 e3 71 20 cb 01");
|
||||
CODEGEN_TEST(xPINSR.W(xmm1, ebx, 1), "c5 f1 c4 cb 01");
|
||||
CODEGEN_TEST(xPINSR.D(xmm1, ebx, 1), "c4 e3 71 22 cb 01");
|
||||
CODEGEN_TEST(xPINSR.Q(xmm1, rbx, 1), "c4 e3 f1 22 cb 01");
|
||||
CODEGEN_TEST(xPINSR.B(xmm9, ptr8[rax], 1), "c4 63 31 20 08 01");
|
||||
CODEGEN_TEST(xPINSR.W(xmm9, ptr16[rax], 1), "c5 31 c4 08 01");
|
||||
CODEGEN_TEST(xPINSR.D(xmm9, ptr32[rax], 1), "c4 63 31 22 08 01");
|
||||
CODEGEN_TEST(xPINSR.Q(xmm9, ptr64[rax], 1), "c4 63 b1 22 08 01");
|
||||
CODEGEN_TEST(xPEXTR.B(ebx, xmm1, 1), "c4 e3 79 14 cb 01");
|
||||
CODEGEN_TEST(xPEXTR.W(ebx, xmm1, 1), "c5 f9 c5 d9 01");
|
||||
CODEGEN_TEST(xPEXTR.D(ebx, xmm1, 1), "c4 e3 79 16 cb 01");
|
||||
CODEGEN_TEST(xPEXTR.Q(rbx, xmm1, 1), "c4 e3 f9 16 cb 01");
|
||||
CODEGEN_TEST(xPEXTR.B(ptr8[rax], xmm9, 1), "c4 63 79 14 08 01");
|
||||
CODEGEN_TEST(xPEXTR.W(ptr16[rax], xmm9, 1), "c4 63 79 15 08 01");
|
||||
CODEGEN_TEST(xPEXTR.D(ptr32[rax], xmm9, 1), "c4 63 79 16 08 01");
|
||||
CODEGEN_TEST(xPEXTR.Q(ptr64[rax], xmm9, 1), "c4 63 f9 16 08 01");
|
||||
|
||||
CODEGEN_TEST(xVMOVAPS(xmm0, xmm1), "c5 f8 28 c1");
|
||||
CODEGEN_TEST(xVMOVAPS(xmm0, ptr32[rdi]), "c5 f8 28 07");
|
||||
CODEGEN_TEST(xVMOVAPS(ptr32[rdi], xmm0), "c5 f8 29 07");
|
||||
|
||||
Reference in New Issue
Block a user