From 56557c845aa4ee5a0f9a1b06ad3fc5d6e7e6e4b7 Mon Sep 17 00:00:00 2001 From: Ryan Houdek Date: Tue, 12 Nov 2013 01:01:54 +0000 Subject: [PATCH] [ARM] Fix NEON emitter encodings. --- Source/Core/Common/Src/ArmEmitter.cpp | 181 ++++++++++-------- Source/Core/Common/Src/ArmEmitter.h | 172 ++++++++--------- Source/Core/Core/Src/PowerPC/JitArm32/Jit.cpp | 19 ++ 3 files changed, 201 insertions(+), 171 deletions(-) diff --git a/Source/Core/Common/Src/ArmEmitter.cpp b/Source/Core/Common/Src/ArmEmitter.cpp index 6fa952ea40..6178d2c58c 100644 --- a/Source/Core/Common/Src/ArmEmitter.cpp +++ b/Source/Core/Common/Src/ArmEmitter.cpp @@ -1237,7 +1237,7 @@ void ARMXEmitter::VCVT(ARMReg Dest, ARMReg Source, int flags) } } -void NEONXEmitter::VABA(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) +void NEONXEmitter::VABA(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) { _dbg_assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to " __FUNCTION__); _dbg_assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use " __FUNCTION__ " when CPU doesn't support it"); @@ -1248,7 +1248,7 @@ void NEONXEmitter::VABA(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) | (encodedSize(Size) << 20) | EncodeVd(Vd) | (0x71 << 4) | (register_quad << 6) | EncodeVm(Vm)); } -void NEONXEmitter::VABAL(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) +void NEONXEmitter::VABAL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) { _dbg_assert_msg_(DYNA_REC, Vd >= Q0, "Pass invalid register to " __FUNCTION__); _dbg_assert_msg_(DYNA_REC, Vn >= D0 && Vn < Q0, "Pass invalid register to " __FUNCTION__); @@ -1260,7 +1260,7 @@ void NEONXEmitter::VABAL(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) | (encodedSize(Size) << 20) | EncodeVd(Vd) | (0x50 << 4) | EncodeVm(Vm)); } -void NEONXEmitter::VABD(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) +void NEONXEmitter::VABD(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) { _dbg_assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to " __FUNCTION__); _dbg_assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use " __FUNCTION__ " when CPU doesn't support it"); @@ -1273,7 +1273,7 @@ void NEONXEmitter::VABD(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) | (encodedSize(Size) << 20) | EncodeVd(Vd) | (0x70 << 4) | (register_quad << 6) | EncodeVm(Vm)); } -void NEONXEmitter::VABDL(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) +void NEONXEmitter::VABDL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) { _dbg_assert_msg_(DYNA_REC, Vd >= Q0, "Pass invalid register to " __FUNCTION__); _dbg_assert_msg_(DYNA_REC, Vn >= D0 && Vn < Q0, "Pass invalid register to " __FUNCTION__); @@ -1285,7 +1285,7 @@ void NEONXEmitter::VABDL(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) | (encodedSize(Size) << 20) | EncodeVd(Vd) | (0x70 << 4) | EncodeVm(Vm)); } -void NEONXEmitter::VABS(NEONElementType Size, ARMReg Vd, ARMReg Vm) +void NEONXEmitter::VABS(u32 Size, ARMReg Vd, ARMReg Vm) { _dbg_assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to " __FUNCTION__); _dbg_assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use " __FUNCTION__ " when CPU doesn't support it"); @@ -1327,7 +1327,7 @@ void NEONXEmitter::VACLT(ARMReg Vd, ARMReg Vn, ARMReg Vm) VACGT(Vd, Vn, Vm); } -void NEONXEmitter::VADD(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) +void NEONXEmitter::VADD(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) { _dbg_assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to " __FUNCTION__); _dbg_assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use " __FUNCTION__ " when CPU doesn't support it"); @@ -1341,7 +1341,7 @@ void NEONXEmitter::VADD(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) | (0x8 << 8) | (register_quad << 6) | EncodeVm(Vm)); } -void NEONXEmitter::VADDHN(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) +void NEONXEmitter::VADDHN(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) { _dbg_assert_msg_(DYNA_REC, Vd < Q0, "Pass invalid register to " __FUNCTION__); _dbg_assert_msg_(DYNA_REC, Vn >= Q0, "Pass invalid register to " __FUNCTION__); @@ -1353,7 +1353,7 @@ void NEONXEmitter::VADDHN(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) | EncodeVd(Vd) | (0x80 << 4) | EncodeVm(Vm)); } -void NEONXEmitter::VADDL(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) +void NEONXEmitter::VADDL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) { _dbg_assert_msg_(DYNA_REC, Vd >= Q0, "Pass invalid register to " __FUNCTION__); _dbg_assert_msg_(DYNA_REC, Vn >= D0 && Vn < Q0, "Pass invalid register to " __FUNCTION__); @@ -1364,7 +1364,7 @@ void NEONXEmitter::VADDL(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) Write32((0xF2 << 24) | ((Size & I_UNSIGNED ? 1 : 0) << 24) | (1 << 23) | (encodedSize(Size) << 20) | EncodeVn(Vn) \ | EncodeVd(Vd) | EncodeVm(Vm)); } -void NEONXEmitter::VADDW(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) +void NEONXEmitter::VADDW(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) { _dbg_assert_msg_(DYNA_REC, Vd >= Q0, "Pass invalid register to " __FUNCTION__); _dbg_assert_msg_(DYNA_REC, Vn >= Q0, "Pass invalid register to " __FUNCTION__); @@ -1420,7 +1420,7 @@ void NEONXEmitter::VBSL(ARMReg Vd, ARMReg Vn, ARMReg Vm) Write32((0xF3 << 24) | (1 << 20) | EncodeVn(Vn) | EncodeVd(Vd) | (0x11 << 4) | (register_quad << 6) | EncodeVm(Vm)); } -void NEONXEmitter::VCEQ(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) +void NEONXEmitter::VCEQ(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) { _dbg_assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to " __FUNCTION__); _dbg_assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use " __FUNCTION__ " when CPU doesn't support it"); @@ -1433,7 +1433,7 @@ void NEONXEmitter::VCEQ(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) | (0x81 << 4) | (register_quad << 6) | EncodeVm(Vm)); } -void NEONXEmitter::VCEQ(NEONElementType Size, ARMReg Vd, ARMReg Vm) +void NEONXEmitter::VCEQ(u32 Size, ARMReg Vd, ARMReg Vm) { _dbg_assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to " __FUNCTION__); _dbg_assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use " __FUNCTION__ " when CPU doesn't support it"); @@ -1443,7 +1443,7 @@ void NEONXEmitter::VCEQ(NEONElementType Size, ARMReg Vd, ARMReg Vm) Write32((0xF2 << 24) | (0xB << 20) | (encodedSize(Size) << 18) | (1 << 16) \ | EncodeVd(Vd) | ((Size & F_32 ? 1 : 0) << 10) | (0x10 << 4) | (register_quad << 6) | EncodeVm(Vm)); } -void NEONXEmitter::VCGE(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) +void NEONXEmitter::VCGE(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) { _dbg_assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to " __FUNCTION__); _dbg_assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use " __FUNCTION__ " when CPU doesn't support it"); @@ -1455,7 +1455,7 @@ void NEONXEmitter::VCGE(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) Write32((0xF2 << 24) | ((Size & I_UNSIGNED ? 1 : 0) << 24) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) \ | (0x31 << 4) | (register_quad << 6) | EncodeVm(Vm)); } -void NEONXEmitter::VCGE(NEONElementType Size, ARMReg Vd, ARMReg Vm) +void NEONXEmitter::VCGE(u32 Size, ARMReg Vd, ARMReg Vm) { _dbg_assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to " __FUNCTION__); _dbg_assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use " __FUNCTION__ " when CPU doesn't support it"); @@ -1464,7 +1464,7 @@ void NEONXEmitter::VCGE(NEONElementType Size, ARMReg Vd, ARMReg Vm) Write32((0xF3 << 24) | (0xB << 20) | (encodedSize(Size) << 18) | (1 << 16) \ | EncodeVd(Vd) | ((Size & F_32 ? 1 : 0) << 10) | (0x8 << 4) | (register_quad << 6) | EncodeVm(Vm)); } -void NEONXEmitter::VCGT(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) +void NEONXEmitter::VCGT(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) { _dbg_assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to " __FUNCTION__); _dbg_assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use " __FUNCTION__ " when CPU doesn't support it"); @@ -1476,7 +1476,7 @@ void NEONXEmitter::VCGT(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) Write32((0xF2 << 24) | ((Size & I_UNSIGNED ? 1 : 0) << 24) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) \ | (0x30 << 4) | (register_quad << 6) | EncodeVm(Vm)); } -void NEONXEmitter::VCGT(NEONElementType Size, ARMReg Vd, ARMReg Vm) +void NEONXEmitter::VCGT(u32 Size, ARMReg Vd, ARMReg Vm) { _dbg_assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to " __FUNCTION__); _dbg_assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use " __FUNCTION__ " when CPU doesn't support it"); @@ -1485,11 +1485,11 @@ void NEONXEmitter::VCGT(NEONElementType Size, ARMReg Vd, ARMReg Vm) Write32((0xF3 << 24) | (0xD << 20) | (encodedSize(Size) << 18) | (1 << 16) \ | EncodeVd(Vd) | ((Size & F_32 ? 1 : 0) << 10) | (register_quad << 6) | EncodeVm(Vm)); } -void NEONXEmitter::VCLE(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) +void NEONXEmitter::VCLE(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) { VCGE(Size, Vd, Vm, Vn); } -void NEONXEmitter::VCLE(NEONElementType Size, ARMReg Vd, ARMReg Vm) +void NEONXEmitter::VCLE(u32 Size, ARMReg Vd, ARMReg Vm) { _dbg_assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to " __FUNCTION__); _dbg_assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use " __FUNCTION__ " when CPU doesn't support it"); @@ -1498,7 +1498,7 @@ void NEONXEmitter::VCLE(NEONElementType Size, ARMReg Vd, ARMReg Vm) Write32((0xF3 << 24) | (0xD << 20) | (encodedSize(Size) << 18) | (1 << 16) \ | EncodeVd(Vd) | ((Size & F_32 ? 1 : 0) << 10) | (3 << 7) | (register_quad << 6) | EncodeVm(Vm)); } -void NEONXEmitter::VCLS(NEONElementType Size, ARMReg Vd, ARMReg Vm) +void NEONXEmitter::VCLS(u32 Size, ARMReg Vd, ARMReg Vm) { _dbg_assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to " __FUNCTION__); _dbg_assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use " __FUNCTION__ " when CPU doesn't support it"); @@ -1508,11 +1508,11 @@ void NEONXEmitter::VCLS(NEONElementType Size, ARMReg Vd, ARMReg Vm) Write32((0xF3 << 24) | (0xD << 20) | (encodedSize(Size) << 18) \ | EncodeVd(Vd) | (1 << 10) | (register_quad << 6) | EncodeVm(Vm)); } -void NEONXEmitter::VCLT(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) +void NEONXEmitter::VCLT(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) { VCGT(Size, Vd, Vm, Vn); } -void NEONXEmitter::VCLT(NEONElementType Size, ARMReg Vd, ARMReg Vm) +void NEONXEmitter::VCLT(u32 Size, ARMReg Vd, ARMReg Vm) { _dbg_assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to " __FUNCTION__); _dbg_assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use " __FUNCTION__ " when CPU doesn't support it"); @@ -1521,7 +1521,7 @@ void NEONXEmitter::VCLT(NEONElementType Size, ARMReg Vd, ARMReg Vm) Write32((0xF3 << 24) | (0xD << 20) | (encodedSize(Size) << 18) | (1 << 16) \ | EncodeVd(Vd) | ((Size & F_32 ? 1 : 0) << 10) | (0x20 << 4) | (register_quad << 6) | EncodeVm(Vm)); } -void NEONXEmitter::VCLZ(NEONElementType Size, ARMReg Vd, ARMReg Vm) +void NEONXEmitter::VCLZ(u32 Size, ARMReg Vd, ARMReg Vm) { _dbg_assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to " __FUNCTION__); _dbg_assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use " __FUNCTION__ " when CPU doesn't support it"); @@ -1530,7 +1530,7 @@ void NEONXEmitter::VCLZ(NEONElementType Size, ARMReg Vd, ARMReg Vm) Write32((0xF3 << 24) | (0xD << 20) | (encodedSize(Size) << 18) \ | EncodeVd(Vd) | (0x48 << 4) | (register_quad << 6) | EncodeVm(Vm)); } -void NEONXEmitter::VCNT(NEONElementType Size, ARMReg Vd, ARMReg Vm) +void NEONXEmitter::VCNT(u32 Size, ARMReg Vd, ARMReg Vm) { _dbg_assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to " __FUNCTION__); _dbg_assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use " __FUNCTION__ " when CPU doesn't support it"); @@ -1540,7 +1540,7 @@ void NEONXEmitter::VCNT(NEONElementType Size, ARMReg Vd, ARMReg Vm) Write32((0xF3 << 24) | (0xD << 20) | (encodedSize(Size) << 18) \ | EncodeVd(Vd) | (0x90 << 4) | (register_quad << 6) | EncodeVm(Vm)); } -void NEONXEmitter::VDUP(NEONElementType Size, ARMReg Vd, ARMReg Vm, u8 index) +void NEONXEmitter::VDUP(u32 Size, ARMReg Vd, ARMReg Vm, u8 index) { _dbg_assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to " __FUNCTION__); _dbg_assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use " __FUNCTION__ " when CPU doesn't support it"); @@ -1562,7 +1562,7 @@ void NEONXEmitter::VDUP(NEONElementType Size, ARMReg Vd, ARMReg Vm, u8 index) Write32((0xF3 << 24) | (0xD << 20) | (sizeEncoded << 16) | (indexEncoded << 16) \ | EncodeVd(Vd) | (0xC0 << 4) | (register_quad << 6) | EncodeVm(Vm)); } -void NEONXEmitter::VDUP(NEONElementType Size, ARMReg Vd, ARMReg Rt) +void NEONXEmitter::VDUP(u32 Size, ARMReg Vd, ARMReg Rt) { _dbg_assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to " __FUNCTION__); _dbg_assert_msg_(DYNA_REC, Rt < D0, "Pass invalid register to " __FUNCTION__); @@ -1616,7 +1616,7 @@ void NEONXEmitter::VFMS(ARMReg Vd, ARMReg Vn, ARMReg Vm) Write32((0xF2 << 24) | (1 << 21) | EncodeVn(Vn) | EncodeVd(Vd) | (0xC1 << 4) | (register_quad << 6) | EncodeVm(Vm)); } -void NEONXEmitter::VHADD(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) +void NEONXEmitter::VHADD(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) { _dbg_assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to " __FUNCTION__); _dbg_assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use " __FUNCTION__ " when CPU doesn't support it"); @@ -1627,7 +1627,7 @@ void NEONXEmitter::VHADD(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) Write32((0xF2 << 24) | ((Size & I_UNSIGNED ? 1 : 0) << 23) | (encodedSize(Size) << 20) \ | EncodeVn(Vn) | EncodeVd(Vd) | (register_quad << 6) | EncodeVm(Vm)); } -void NEONXEmitter::VHSUB(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) +void NEONXEmitter::VHSUB(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) { _dbg_assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to " __FUNCTION__); _dbg_assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use " __FUNCTION__ " when CPU doesn't support it"); @@ -1638,7 +1638,7 @@ void NEONXEmitter::VHSUB(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) Write32((0xF2 << 24) | ((Size & I_UNSIGNED ? 1 : 0) << 23) | (encodedSize(Size) << 20) \ | EncodeVn(Vn) | EncodeVd(Vd) | (1 << 9) | (register_quad << 6) | EncodeVm(Vm)); } -void NEONXEmitter::VMAX(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) +void NEONXEmitter::VMAX(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) { _dbg_assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to " __FUNCTION__); _dbg_assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use " __FUNCTION__ " when CPU doesn't support it"); @@ -1651,7 +1651,7 @@ void NEONXEmitter::VMAX(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) Write32((0xF2 << 24) | ((Size & I_UNSIGNED ? 1 : 0) << 23) | (encodedSize(Size) << 20) \ | EncodeVn(Vn) | EncodeVd(Vd) | (0x60 << 4) | (register_quad << 6) | EncodeVm(Vm)); } -void NEONXEmitter::VMIN(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) +void NEONXEmitter::VMIN(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) { _dbg_assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to " __FUNCTION__); _dbg_assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use " __FUNCTION__ " when CPU doesn't support it"); @@ -1664,7 +1664,7 @@ void NEONXEmitter::VMIN(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) Write32((0xF2 << 24) | ((Size & I_UNSIGNED ? 1 : 0) << 23) | (encodedSize(Size) << 20) \ | EncodeVn(Vn) | EncodeVd(Vd) | (0x61 << 4) | (register_quad << 6) | EncodeVm(Vm)); } -void NEONXEmitter::VMLA(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) +void NEONXEmitter::VMLA(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) { _dbg_assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to " __FUNCTION__); _dbg_assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use " __FUNCTION__ " when CPU doesn't support it"); @@ -1676,7 +1676,7 @@ void NEONXEmitter::VMLA(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) else Write32((0xF2 << 24) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) | (0x90 << 4) | (register_quad << 6) | EncodeVm(Vm)); } -void NEONXEmitter::VMLS(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) +void NEONXEmitter::VMLS(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) { _dbg_assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to " __FUNCTION__); _dbg_assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use " __FUNCTION__ " when CPU doesn't support it"); @@ -1688,7 +1688,7 @@ void NEONXEmitter::VMLS(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) else Write32((0xF2 << 24) | (1 << 24) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) | (0x90 << 4) | (register_quad << 6) | EncodeVm(Vm)); } -void NEONXEmitter::VMLAL(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) +void NEONXEmitter::VMLAL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) { _dbg_assert_msg_(DYNA_REC, Vd >= Q0, "Pass invalid register to " __FUNCTION__); _dbg_assert_msg_(DYNA_REC, Vn >= Q0, "Pass invalid register to " __FUNCTION__); @@ -1699,7 +1699,7 @@ void NEONXEmitter::VMLAL(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) Write32((0xF2 << 24) | ((Size & I_UNSIGNED ? 1 : 0) << 24) | (encodedSize(Size) << 20) \ | EncodeVn(Vn) | EncodeVd(Vd) | (0x80 << 4) | EncodeVm(Vm)); } -void NEONXEmitter::VMLSL(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) +void NEONXEmitter::VMLSL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) { _dbg_assert_msg_(DYNA_REC, Vd >= Q0, "Pass invalid register to " __FUNCTION__); _dbg_assert_msg_(DYNA_REC, Vn >= Q0, "Pass invalid register to " __FUNCTION__); @@ -1710,7 +1710,7 @@ void NEONXEmitter::VMLSL(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) Write32((0xF2 << 24) | ((Size & I_UNSIGNED ? 1 : 0) << 24) | (encodedSize(Size) << 20) \ | EncodeVn(Vn) | EncodeVd(Vd) | (0xA0 << 4) | EncodeVm(Vm)); } -void NEONXEmitter::VMUL(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) +void NEONXEmitter::VMUL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) { _dbg_assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to " __FUNCTION__); _dbg_assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use " __FUNCTION__ " when CPU doesn't support it"); @@ -1723,7 +1723,7 @@ void NEONXEmitter::VMUL(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) Write32((0xF2 << 24) | ((Size & I_POLYNOMIAL) ? (1 << 24) : 0) | (encodedSize(Size) << 20) | \ EncodeVn(Vn) | EncodeVd(Vd) | (0x91 << 4) | (register_quad << 6) | EncodeVm(Vm)); } -void NEONXEmitter::VMULL(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) +void NEONXEmitter::VMULL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) { _dbg_assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to " __FUNCTION__); _dbg_assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use " __FUNCTION__ " when CPU doesn't support it"); @@ -1732,7 +1732,7 @@ void NEONXEmitter::VMULL(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) Write32((0xF2 << 24) | (1 << 23) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) | \ (0xC0 << 4) | ((Size & I_POLYNOMIAL) ? 1 << 9 : 0) | EncodeVm(Vm)); } -void NEONXEmitter::VNEG(NEONElementType Size, ARMReg Vd, ARMReg Vm) +void NEONXEmitter::VNEG(u32 Size, ARMReg Vd, ARMReg Vm) { _dbg_assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to " __FUNCTION__); _dbg_assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use " __FUNCTION__ " when CPU doesn't support it"); @@ -1760,7 +1760,7 @@ void NEONXEmitter::VORR(ARMReg Vd, ARMReg Vn, ARMReg Vm) Write32((0xF2 << 24) | (2 << 20) | EncodeVn(Vn) | EncodeVd(Vd) | (0x11 << 4) | (register_quad << 6) | EncodeVm(Vm)); } -void NEONXEmitter::VPADAL(NEONElementType Size, ARMReg Vd, ARMReg Vm) +void NEONXEmitter::VPADAL(u32 Size, ARMReg Vd, ARMReg Vm) { _dbg_assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to " __FUNCTION__); _dbg_assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use " __FUNCTION__ " when CPU doesn't support it"); @@ -1771,7 +1771,7 @@ void NEONXEmitter::VPADAL(NEONElementType Size, ARMReg Vd, ARMReg Vm) Write32((0xF3 << 24) | (0xB << 20) | (encodedSize(Size) << 18) | EncodeVd(Vd) | \ (0x60 << 4) | ((Size & I_UNSIGNED) ? 1 << 7 : 0) | (register_quad << 6) | EncodeVm(Vm)); } -void NEONXEmitter::VPADD(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) +void NEONXEmitter::VPADD(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) { _dbg_assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to " __FUNCTION__); _dbg_assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use " __FUNCTION__ " when CPU doesn't support it"); @@ -1779,10 +1779,10 @@ void NEONXEmitter::VPADD(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) if (Size & F_32) Write32((0xF3 << 24) | EncodeVn(Vn) | EncodeVd(Vd) | (0xD0 << 4) | EncodeVm(Vm)); else - Write32((0xF2 << 24) | (1 << 23) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) | \ + Write32((0xF2 << 24) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) | \ (0xB1 << 4) | EncodeVm(Vm)); } -void NEONXEmitter::VPADDL(NEONElementType Size, ARMReg Vd, ARMReg Vm) +void NEONXEmitter::VPADDL(u32 Size, ARMReg Vd, ARMReg Vm) { _dbg_assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to " __FUNCTION__); _dbg_assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use " __FUNCTION__ " when CPU doesn't support it"); @@ -1793,7 +1793,7 @@ void NEONXEmitter::VPADDL(NEONElementType Size, ARMReg Vd, ARMReg Vm) Write32((0xF3 << 24) | (0xB << 20) | (encodedSize(Size) << 18) | EncodeVd(Vd) | \ (0x20 << 4) | (Size & I_UNSIGNED ? 1 << 7 : 0) | (register_quad << 6) | EncodeVm(Vm)); } -void NEONXEmitter::VPMAX(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) +void NEONXEmitter::VPMAX(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) { _dbg_assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to " __FUNCTION__); _dbg_assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use " __FUNCTION__ " when CPU doesn't support it"); @@ -1804,7 +1804,7 @@ void NEONXEmitter::VPMAX(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) Write32((0xF2 << 24) | (Size & I_UNSIGNED ? 1 << 24 : 0) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) | \ (0xA0 << 4) | EncodeVm(Vm)); } -void NEONXEmitter::VPMIN(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) +void NEONXEmitter::VPMIN(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) { _dbg_assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to " __FUNCTION__); _dbg_assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use " __FUNCTION__ " when CPU doesn't support it"); @@ -1815,7 +1815,7 @@ void NEONXEmitter::VPMIN(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) Write32((0xF2 << 24) | (Size & I_UNSIGNED ? 1 << 24 : 0) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) | \ (0xA1 << 4) | EncodeVm(Vm)); } -void NEONXEmitter::VQABS(NEONElementType Size, ARMReg Vd, ARMReg Vm) +void NEONXEmitter::VQABS(u32 Size, ARMReg Vd, ARMReg Vm) { _dbg_assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to " __FUNCTION__); _dbg_assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use " __FUNCTION__ " when CPU doesn't support it"); @@ -1826,7 +1826,7 @@ void NEONXEmitter::VQABS(NEONElementType Size, ARMReg Vd, ARMReg Vm) Write32((0xF3 << 24) | (0xB << 20) | (encodedSize(Size) << 18) | EncodeVd(Vd) | \ (0x70 << 4) | (register_quad << 6) | EncodeVm(Vm)); } -void NEONXEmitter::VQADD(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) +void NEONXEmitter::VQADD(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) { _dbg_assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to " __FUNCTION__); _dbg_assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use " __FUNCTION__ " when CPU doesn't support it"); @@ -1837,7 +1837,7 @@ void NEONXEmitter::VQADD(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) Write32((0xF2 << 24) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) | \ (0x1 << 4) | (register_quad << 6) | EncodeVm(Vm)); } -void NEONXEmitter::VQMLAL(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) +void NEONXEmitter::VQDMLAL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) { _dbg_assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to " __FUNCTION__); _dbg_assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use " __FUNCTION__ " when CPU doesn't support it"); @@ -1846,7 +1846,7 @@ void NEONXEmitter::VQMLAL(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) Write32((0xF2 << 24) | (1 << 23) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) | \ (0x90 << 4) | EncodeVm(Vm)); } -void NEONXEmitter::VQMLSL(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) +void NEONXEmitter::VQDMLSL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) { _dbg_assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to " __FUNCTION__); _dbg_assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use " __FUNCTION__ " when CPU doesn't support it"); @@ -1855,7 +1855,7 @@ void NEONXEmitter::VQMLSL(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) Write32((0xF2 << 24) | (1 << 23) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) | \ (0xB0 << 4) | EncodeVm(Vm)); } -void NEONXEmitter::VQDMULH(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) +void NEONXEmitter::VQDMULH(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) { _dbg_assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to " __FUNCTION__); _dbg_assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use " __FUNCTION__ " when CPU doesn't support it"); @@ -1864,7 +1864,7 @@ void NEONXEmitter::VQDMULH(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm Write32((0xF2 << 24) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) | \ (0xB0 << 4) | EncodeVm(Vm)); } -void NEONXEmitter::VQDMULL(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) +void NEONXEmitter::VQDMULL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) { _dbg_assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to " __FUNCTION__); _dbg_assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use " __FUNCTION__ " when CPU doesn't support it"); @@ -1873,7 +1873,7 @@ void NEONXEmitter::VQDMULL(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm Write32((0xF2 << 24) | (1 << 23) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) | \ (0xD0 << 4) | EncodeVm(Vm)); } -void NEONXEmitter::VQNEG(NEONElementType Size, ARMReg Vd, ARMReg Vm) +void NEONXEmitter::VQNEG(u32 Size, ARMReg Vd, ARMReg Vm) { _dbg_assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to " __FUNCTION__); _dbg_assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use " __FUNCTION__ " when CPU doesn't support it"); @@ -1884,7 +1884,7 @@ void NEONXEmitter::VQNEG(NEONElementType Size, ARMReg Vd, ARMReg Vm) Write32((0xF3 << 24) | (0xB << 20) | (encodedSize(Size) << 18) | EncodeVd(Vd) | \ (0x78 << 4) | (register_quad << 6) | EncodeVm(Vm)); } -void NEONXEmitter::VQRDMULH(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) +void NEONXEmitter::VQRDMULH(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) { _dbg_assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to " __FUNCTION__); _dbg_assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use " __FUNCTION__ " when CPU doesn't support it"); @@ -1893,7 +1893,7 @@ void NEONXEmitter::VQRDMULH(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg V Write32((0xF3 << 24) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) | \ (0xB0 << 4) | EncodeVm(Vm)); } -void NEONXEmitter::VQRSHL(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) +void NEONXEmitter::VQRSHL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) { _dbg_assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to " __FUNCTION__); _dbg_assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use " __FUNCTION__ " when CPU doesn't support it"); @@ -1902,9 +1902,9 @@ void NEONXEmitter::VQRSHL(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) bool register_quad = Vd >= Q0; Write32((0xF2 << 24) | (Size & I_UNSIGNED ? 1 << 24 : 0) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) | \ - (0x91 << 4) | (register_quad << 6) | EncodeVm(Vm)); + (0x51 << 4) | (register_quad << 6) | EncodeVm(Vm)); } -void NEONXEmitter::VQSHL(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) +void NEONXEmitter::VQSHL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) { _dbg_assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to " __FUNCTION__); _dbg_assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use " __FUNCTION__ " when CPU doesn't support it"); @@ -1915,7 +1915,7 @@ void NEONXEmitter::VQSHL(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) Write32((0xF2 << 24) | (Size & I_UNSIGNED ? 1 << 24 : 0) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) | \ (0x41 << 4) | (register_quad << 6) | EncodeVm(Vm)); } -void NEONXEmitter::VQSUB(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) +void NEONXEmitter::VQSUB(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) { _dbg_assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to " __FUNCTION__); _dbg_assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use " __FUNCTION__ " when CPU doesn't support it"); @@ -1926,35 +1926,46 @@ void NEONXEmitter::VQSUB(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) Write32((0xF2 << 24) | (Size & I_UNSIGNED ? 1 << 24 : 0) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) | \ (0x21 << 4) | (register_quad << 6) | EncodeVm(Vm)); } -void NEONXEmitter::VRADDHN(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) +void NEONXEmitter::VRADDHN(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) { _dbg_assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to " __FUNCTION__); _dbg_assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use " __FUNCTION__ " when CPU doesn't support it"); _dbg_assert_msg_(DYNA_REC, !(Size & F_32), __FUNCTION__ " doesn't support float"); - Write32((0xF3 << 24) | (1 << 23) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) | \ + Write32((0xF3 << 24) | (1 << 23) | ((encodedSize(Size) - 1) << 20) | EncodeVn(Vn) | EncodeVd(Vd) | \ (0x40 << 4) | EncodeVm(Vm)); } -void NEONXEmitter::VRECPE(NEONElementType Size, ARMReg Vd, ARMReg Vm) +void NEONXEmitter::VRECPE(u32 Size, ARMReg Vd, ARMReg Vm) { _dbg_assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to " __FUNCTION__); _dbg_assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use " __FUNCTION__ " when CPU doesn't support it"); bool register_quad = Vd >= Q0; - Write32((0xF3 << 24) | (0xB << 20) | (encodedSize(Size) << 18) | (0x3 << 16) | EncodeVd(Vd) | \ + Write32((0xF3 << 24) | (0xB << 20) | (0xB << 16) | EncodeVd(Vd) | \ (0x40 << 4) | (Size & F_32 ? 1 << 8 : 0) | (register_quad << 6) | EncodeVm(Vm)); } -void NEONXEmitter::VRHADD(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) +void NEONXEmitter::VRECPS(ARMReg Vd, ARMReg Vn, ARMReg Vm) +{ + _dbg_assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to " __FUNCTION__); + _dbg_assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use " __FUNCTION__ " when CPU doesn't support it"); + + bool register_quad = Vd >= Q0; + + Write32((0xF2 << 24) | EncodeVn(Vn) | EncodeVd(Vd) | (0xF1 << 4) | (register_quad << 6) | EncodeVm(Vm)); +} +void NEONXEmitter::VRHADD(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) { _dbg_assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to " __FUNCTION__); _dbg_assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use " __FUNCTION__ " when CPU doesn't support it"); _dbg_assert_msg_(DYNA_REC, !(Size & F_32), __FUNCTION__ " doesn't support float"); + bool register_quad = Vd >= Q0; + Write32((0xF2 << 24) | (Size & I_UNSIGNED ? 1 << 24 : 0) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) | \ - (0x10 << 4) | EncodeVm(Vm)); + (0x10 << 4) | (register_quad << 6) | EncodeVm(Vm)); } -void NEONXEmitter::VRSHL(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) +void NEONXEmitter::VRSHL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) { _dbg_assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to " __FUNCTION__); _dbg_assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use " __FUNCTION__ " when CPU doesn't support it"); @@ -1965,8 +1976,11 @@ void NEONXEmitter::VRSHL(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) Write32((0xF2 << 24) | (Size & I_UNSIGNED ? 1 << 24 : 0) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) | \ (0x50 << 4) | (register_quad << 6) | EncodeVm(Vm)); } -void NEONXEmitter::VRSQRTE(NEONElementType Size, ARMReg Vd, ARMReg Vm) +void NEONXEmitter::VRSQRTE(u32 Size, ARMReg Vd, ARMReg Vm) { + _dbg_assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to " __FUNCTION__); + _dbg_assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use " __FUNCTION__ " when CPU doesn't support it"); + bool register_quad = Vd >= Q0; Vd = SubBase(Vd); Vm = SubBase(Vm); @@ -1975,27 +1989,26 @@ void NEONXEmitter::VRSQRTE(NEONElementType Size, ARMReg Vd, ARMReg Vm) | ((Vd & 0xF) << 12) | (9 << 7) | (Size & F_32 ? (1 << 8) : 0) | (register_quad << 6) | ((Vm & 0x10) << 1) | (Vm & 0xF)); } -void NEONXEmitter::VRSQRTS(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) +void NEONXEmitter::VRSQRTS(ARMReg Vd, ARMReg Vn, ARMReg Vm) { _dbg_assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to " __FUNCTION__); _dbg_assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use " __FUNCTION__ " when CPU doesn't support it"); - _dbg_assert_msg_(DYNA_REC, Size & F_32, __FUNCTION__ " only supports float"); bool register_quad = Vd >= Q0; Write32((0xF2 << 24) | (1 << 21) | EncodeVn(Vn) | EncodeVd(Vd) | \ (0xF1 << 4) | (register_quad << 6) | EncodeVm(Vm)); } -void NEONXEmitter::VRSUBHN(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) +void NEONXEmitter::VRSUBHN(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) { _dbg_assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to " __FUNCTION__); _dbg_assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use " __FUNCTION__ " when CPU doesn't support it"); _dbg_assert_msg_(DYNA_REC, !(Size & F_32), __FUNCTION__ " doesn't support float"); - Write32((0xF3 << 24) | (1 << 21) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) | \ + Write32((0xF3 << 24) | (1 << 23) | ((encodedSize(Size) - 1) << 20) | EncodeVn(Vn) | EncodeVd(Vd) | \ (0x60 << 4) | EncodeVm(Vm)); } -void NEONXEmitter::VSHL(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) +void NEONXEmitter::VSHL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) { _dbg_assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to " __FUNCTION__); _dbg_assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use " __FUNCTION__ " when CPU doesn't support it"); @@ -2006,7 +2019,7 @@ void NEONXEmitter::VSHL(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) Write32((0xF2 << 24) | (Size & I_UNSIGNED ? 1 << 24 : 0) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) | \ (0x40 << 4) | (register_quad << 6) | EncodeVm(Vm)); } -void NEONXEmitter::VSUB(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) +void NEONXEmitter::VSUB(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) { _dbg_assert_msg_(DYNA_REC, Vd >= Q0, "Pass invalid register to " __FUNCTION__); _dbg_assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use " __FUNCTION__ " when CPU doesn't support it"); @@ -2020,28 +2033,28 @@ void NEONXEmitter::VSUB(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) Write32((0xF3 << 24) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) | \ (0x80 << 4) | (register_quad << 6) | EncodeVm(Vm)); } -void NEONXEmitter::VSUBHN(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) +void NEONXEmitter::VSUBHN(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) { _dbg_assert_msg_(DYNA_REC, Vd >= Q0, "Pass invalid register to " __FUNCTION__); _dbg_assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use " __FUNCTION__ " when CPU doesn't support it"); - Write32((0xF2 << 24) | (1 << 23) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) | \ + Write32((0xF2 << 24) | (1 << 23) | ((encodedSize(Size) - 1) << 20) | EncodeVn(Vn) | EncodeVd(Vd) | \ (0x60 << 4) | EncodeVm(Vm)); } -void NEONXEmitter::VSUBL(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) +void NEONXEmitter::VSUBL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) { _dbg_assert_msg_(DYNA_REC, Vd >= Q0, "Pass invalid register to " __FUNCTION__); _dbg_assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use " __FUNCTION__ " when CPU doesn't support it"); - Write32((0xF2 << 24) | (Size & I_UNSIGNED ? 1 << 24 : 0) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) | \ + Write32((0xF2 << 24) | (Size & I_UNSIGNED ? 1 << 24 : 0) | (1 << 23) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) | \ (0x20 << 4) | EncodeVm(Vm)); } -void NEONXEmitter::VSUBW(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) +void NEONXEmitter::VSUBW(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) { _dbg_assert_msg_(DYNA_REC, Vd >= Q0, "Pass invalid register to " __FUNCTION__); _dbg_assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use " __FUNCTION__ " when CPU doesn't support it"); - Write32((0xF2 << 24) | (Size & I_UNSIGNED ? 1 << 24 : 0) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) | \ + Write32((0xF2 << 24) | (Size & I_UNSIGNED ? 1 << 24 : 0) | (1 << 23) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) | \ (0x30 << 4) | EncodeVm(Vm)); } void NEONXEmitter::VSWP(ARMReg Vd, ARMReg Vm) @@ -2054,7 +2067,7 @@ void NEONXEmitter::VSWP(ARMReg Vd, ARMReg Vm) Write32((0xF3 << 24) | (0xB << 20) | (1 << 17) | EncodeVd(Vd) | \ (register_quad << 6) | EncodeVm(Vm)); } -void NEONXEmitter::VTRN(NEONElementType Size, ARMReg Vd, ARMReg Vm) +void NEONXEmitter::VTRN(u32 Size, ARMReg Vd, ARMReg Vm) { _dbg_assert_msg_(DYNA_REC, Vd >= Q0, "Pass invalid register to " __FUNCTION__); _dbg_assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use " __FUNCTION__ " when CPU doesn't support it"); @@ -2064,7 +2077,7 @@ void NEONXEmitter::VTRN(NEONElementType Size, ARMReg Vd, ARMReg Vm) Write32((0xF3 << 24) | (0xB << 20) | (encodedSize(Size) << 18) | (1 << 17) | EncodeVd(Vd) | \ (1 << 7) | (register_quad << 6) | EncodeVm(Vm)); } -void NEONXEmitter::VTST(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) +void NEONXEmitter::VTST(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) { _dbg_assert_msg_(DYNA_REC, Vd >= Q0, "Pass invalid register to " __FUNCTION__); _dbg_assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use " __FUNCTION__ " when CPU doesn't support it"); @@ -2074,7 +2087,7 @@ void NEONXEmitter::VTST(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) Write32((0xF2 << 24) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) | \ (0x81 << 4) | (register_quad << 6) | EncodeVm(Vm)); } -void NEONXEmitter::VUZP(NEONElementType Size, ARMReg Vd, ARMReg Vm) +void NEONXEmitter::VUZP(u32 Size, ARMReg Vd, ARMReg Vm) { _dbg_assert_msg_(DYNA_REC, Vd >= Q0, "Pass invalid register to " __FUNCTION__); _dbg_assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use " __FUNCTION__ " when CPU doesn't support it"); @@ -2084,7 +2097,7 @@ void NEONXEmitter::VUZP(NEONElementType Size, ARMReg Vd, ARMReg Vm) Write32((0xF3 << 24) | (0xB << 20) | (encodedSize(Size) << 18) | (1 << 17) | EncodeVd(Vd) | \ (0x10 << 4) | (register_quad << 6) | EncodeVm(Vm)); } -void NEONXEmitter::VZIP(NEONElementType Size, ARMReg Vd, ARMReg Vm) +void NEONXEmitter::VZIP(u32 Size, ARMReg Vd, ARMReg Vm) { _dbg_assert_msg_(DYNA_REC, Vd >= Q0, "Pass invalid register to " __FUNCTION__); _dbg_assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use " __FUNCTION__ " when CPU doesn't support it"); @@ -2094,7 +2107,7 @@ void NEONXEmitter::VZIP(NEONElementType Size, ARMReg Vd, ARMReg Vm) Write32((0xF3 << 24) | (0xB << 20) | (encodedSize(Size) << 18) | (1 << 17) | EncodeVd(Vd) | \ (0x18 << 4) | (register_quad << 6) | EncodeVm(Vm)); } -void NEONXEmitter::VLD1(NEONElementType Size, ARMReg Vd, ARMReg Rn, NEONAlignment align, ARMReg Rm) +void NEONXEmitter::VLD1(u32 Size, ARMReg Vd, ARMReg Rn, NEONAlignment align, ARMReg Rm) { u32 spacing = 0x7; // Only support loading to 1 reg // Gets encoded as a double register @@ -2104,7 +2117,7 @@ void NEONXEmitter::VLD1(NEONElementType Size, ARMReg Vd, ARMReg Rn, NEONAlignmen | ((Vd & 0xF) << 12) | (spacing << 8) | (encodedSize(Size) << 6) | (align << 4) | Rm); } -void NEONXEmitter::VLD2(NEONElementType Size, ARMReg Vd, ARMReg Rn, NEONAlignment align, ARMReg Rm) +void NEONXEmitter::VLD2(u32 Size, ARMReg Vd, ARMReg Rn, NEONAlignment align, ARMReg Rm) { u32 spacing = 0x8; // Single spaced registers // Gets encoded as a double register @@ -2114,7 +2127,7 @@ void NEONXEmitter::VLD2(NEONElementType Size, ARMReg Vd, ARMReg Rn, NEONAlignmen | ((Vd & 0xF) << 12) | (spacing << 8) | (encodedSize(Size) << 6) | (align << 4) | Rm); } -void NEONXEmitter::VST1(NEONElementType Size, ARMReg Vd, ARMReg Rn, NEONAlignment align, ARMReg Rm) +void NEONXEmitter::VST1(u32 Size, ARMReg Vd, ARMReg Rn, NEONAlignment align, ARMReg Rm) { u32 spacing = 0x7; // Single spaced registers // Gets encoded as a double register @@ -2125,7 +2138,7 @@ void NEONXEmitter::VST1(NEONElementType Size, ARMReg Vd, ARMReg Rn, NEONAlignmen | (align << 4) | Rm); } -void NEONXEmitter::VREVX(u32 size, NEONElementType Size, ARMReg Vd, ARMReg Vm) +void NEONXEmitter::VREVX(u32 size, u32 Size, ARMReg Vd, ARMReg Vm) { bool register_quad = Vd >= Q0; Vd = SubBase(Vd); @@ -2136,17 +2149,17 @@ void NEONXEmitter::VREVX(u32 size, NEONElementType Size, ARMReg Vd, ARMReg Vm) | (register_quad << 6) | ((Vm & 0x10) << 1) | (Vm & 0xF)); } -void NEONXEmitter::VREV64(NEONElementType Size, ARMReg Vd, ARMReg Vm) +void NEONXEmitter::VREV64(u32 Size, ARMReg Vd, ARMReg Vm) { VREVX(0, Size, Vd, Vm); } -void NEONXEmitter::VREV32(NEONElementType Size, ARMReg Vd, ARMReg Vm) +void NEONXEmitter::VREV32(u32 Size, ARMReg Vd, ARMReg Vm) { VREVX(1, Size, Vd, Vm); } -void NEONXEmitter::VREV16(NEONElementType Size, ARMReg Vd, ARMReg Vm) +void NEONXEmitter::VREV16(u32 Size, ARMReg Vd, ARMReg Vm) { VREVX(2, Size, Vd, Vm); } diff --git a/Source/Core/Common/Src/ArmEmitter.h b/Source/Core/Common/Src/ArmEmitter.h index a2b6884607..99024f3b6f 100644 --- a/Source/Core/Common/Src/ArmEmitter.h +++ b/Source/Core/Common/Src/ArmEmitter.h @@ -338,6 +338,15 @@ struct LiteralPool }; typedef const u8* JumpTarget; +// XXX: Stop polluting the global namespace +const u32 I_8 = (1 << 0); +const u32 I_16 = (1 << 1); +const u32 I_32 = (1 << 2); +const u32 I_64 = (1 << 3); +const u32 I_SIGNED = (1 << 4); +const u32 I_UNSIGNED = (1 << 5); +const u32 F_32 = (1 << 6); +const u32 I_POLYNOMIAL = (1 << 7); // Only used in VMUL/VMULL u32 EncodeVd(ARMReg Vd); u32 EncodeVn(ARMReg Vn); @@ -572,18 +581,6 @@ public: }; // class ARMXEmitter -enum NEONElementType -{ - I_8 = (1 << 0), - I_16 = (1 << 1), - I_32 = (1 << 2), - I_64 = (1 << 3), - I_SIGNED = (1 << 4), - I_UNSIGNED = (1 << 5), - F_32 = (1 << 6), - I_POLYNOMIAL = (1 << 7), // Only used in VMUL/VMULL -}; - enum NEONAlignment { ALIGN_NONE = 0, @@ -614,104 +611,105 @@ private: return 0; } - void VREVX(u32 size, NEONElementType Size, ARMReg Vd, ARMReg Vm); + void VREVX(u32 size, u32 Size, ARMReg Vd, ARMReg Vm); -public: +public: NEONXEmitter(ARMXEmitter *emit) : _emit(emit) {} - void VABA(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); - void VABAL(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); - void VABD(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); - void VABDL(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); - void VABS(NEONElementType Size, ARMReg Vd, ARMReg Vm); + void VABA(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); + void VABAL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); + void VABD(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); + void VABDL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); + void VABS(u32 Size, ARMReg Vd, ARMReg Vm); void VACGE(ARMReg Vd, ARMReg Vn, ARMReg Vm); void VACGT(ARMReg Vd, ARMReg Vn, ARMReg Vm); void VACLE(ARMReg Vd, ARMReg Vn, ARMReg Vm); void VACLT(ARMReg Vd, ARMReg Vn, ARMReg Vm); - void VADD(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); - void VADDHN(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); - void VADDL(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); - void VADDW(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); + void VADD(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); + void VADDHN(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); + void VADDL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); + void VADDW(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); void VAND(ARMReg Vd, ARMReg Vn, ARMReg Vm); void VBIC(ARMReg Vd, ARMReg Vn, ARMReg Vm); void VBIF(ARMReg Vd, ARMReg Vn, ARMReg Vm); void VBIT(ARMReg Vd, ARMReg Vn, ARMReg Vm); void VBSL(ARMReg Vd, ARMReg Vn, ARMReg Vm); - void VCEQ(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); - void VCEQ(NEONElementType Size, ARMReg Vd, ARMReg Vm); - void VCGE(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); - void VCGE(NEONElementType Size, ARMReg Vd, ARMReg Vm); - void VCGT(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); - void VCGT(NEONElementType Size, ARMReg Vd, ARMReg Vm); - void VCLE(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); - void VCLE(NEONElementType Size, ARMReg Vd, ARMReg Vm); - void VCLS(NEONElementType Size, ARMReg Vd, ARMReg Vm); - void VCLT(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); - void VCLT(NEONElementType Size, ARMReg Vd, ARMReg Vm); - void VCLZ(NEONElementType Size, ARMReg Vd, ARMReg Vm); - void VCNT(NEONElementType Size, ARMReg Vd, ARMReg Vm); - void VDUP(NEONElementType Size, ARMReg Vd, ARMReg Vm, u8 index); - void VDUP(NEONElementType Size, ARMReg Vd, ARMReg Rt); + void VCEQ(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); + void VCEQ(u32 Size, ARMReg Vd, ARMReg Vm); + void VCGE(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); + void VCGE(u32 Size, ARMReg Vd, ARMReg Vm); + void VCGT(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); + void VCGT(u32 Size, ARMReg Vd, ARMReg Vm); + void VCLE(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); + void VCLE(u32 Size, ARMReg Vd, ARMReg Vm); + void VCLS(u32 Size, ARMReg Vd, ARMReg Vm); + void VCLT(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); + void VCLT(u32 Size, ARMReg Vd, ARMReg Vm); + void VCLZ(u32 Size, ARMReg Vd, ARMReg Vm); + void VCNT(u32 Size, ARMReg Vd, ARMReg Vm); + void VDUP(u32 Size, ARMReg Vd, ARMReg Vm, u8 index); + void VDUP(u32 Size, ARMReg Vd, ARMReg Rt); void VEOR(ARMReg Vd, ARMReg Vn, ARMReg Vm); void VEXT(ARMReg Vd, ARMReg Vn, ARMReg Vm, u8 index); void VFMA(ARMReg Vd, ARMReg Vn, ARMReg Vm); void VFMS(ARMReg Vd, ARMReg Vn, ARMReg Vm); - void VHADD(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); - void VHSUB(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); - void VMAX(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); - void VMIN(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); - void VMLA(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); - void VMLS(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); - void VMLAL(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); - void VMLSL(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); - void VMUL(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); - void VMULL(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); - void VNEG(NEONElementType Size, ARMReg Vd, ARMReg Vm); + void VHADD(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); + void VHSUB(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); + void VMAX(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); + void VMIN(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); + void VMLA(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); + void VMLS(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); + void VMLAL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); + void VMLSL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); + void VMUL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); + void VMULL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); + void VNEG(u32 Size, ARMReg Vd, ARMReg Vm); void VORN(ARMReg Vd, ARMReg Vn, ARMReg Vm); void VORR(ARMReg Vd, ARMReg Vn, ARMReg Vm); - void VPADAL(NEONElementType Size, ARMReg Vd, ARMReg Vm); - void VPADD(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); - void VPADDL(NEONElementType Size, ARMReg Vd, ARMReg Vm); - void VPMAX(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); - void VPMIN(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); - void VQABS(NEONElementType Size, ARMReg Vd, ARMReg Vm); - void VQADD(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); - void VQMLAL(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); - void VQMLSL(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); - void VQDMULH(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); - void VQDMULL(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); - void VQNEG(NEONElementType Size, ARMReg Vd, ARMReg Vm); - void VQRDMULH(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); - void VQRSHL(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); - void VQSHL(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); - void VQSUB(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); - void VRADDHN(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); - void VRECPE(NEONElementType Size, ARMReg Vd, ARMReg Vm); - void VRHADD(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); - void VRSHL(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); - void VRSQRTE(NEONElementType Size, ARMReg Vd, ARMReg Vm); - void VRSQRTS(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); - void VRSUBHN(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); - void VSHL(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); - void VSUB(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); - void VSUBHN(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); - void VSUBL(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); - void VSUBW(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); + void VPADAL(u32 Size, ARMReg Vd, ARMReg Vm); + void VPADD(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); + void VPADDL(u32 Size, ARMReg Vd, ARMReg Vm); + void VPMAX(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); + void VPMIN(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); + void VQABS(u32 Size, ARMReg Vd, ARMReg Vm); + void VQADD(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); + void VQDMLAL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); + void VQDMLSL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); + void VQDMULH(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); + void VQDMULL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); + void VQNEG(u32 Size, ARMReg Vd, ARMReg Vm); + void VQRDMULH(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); + void VQRSHL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); + void VQSHL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); + void VQSUB(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); + void VRADDHN(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); + void VRECPE(u32 Size, ARMReg Vd, ARMReg Vm); + void VRECPS(ARMReg Vd, ARMReg Vn, ARMReg Vm); + void VRHADD(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); + void VRSHL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); + void VRSQRTE(u32 Size, ARMReg Vd, ARMReg Vm); + void VRSQRTS(ARMReg Vd, ARMReg Vn, ARMReg Vm); + void VRSUBHN(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); + void VSHL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); + void VSUB(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); + void VSUBHN(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); + void VSUBL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); + void VSUBW(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); void VSWP(ARMReg Vd, ARMReg Vm); - void VTRN(NEONElementType Size, ARMReg Vd, ARMReg Vm); - void VTST(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); - void VUZP(NEONElementType Size, ARMReg Vd, ARMReg Vm); - void VZIP(NEONElementType Size, ARMReg Vd, ARMReg Vm); - void VREV64(NEONElementType Size, ARMReg Vd, ARMReg Vm); - void VREV32(NEONElementType Size, ARMReg Vd, ARMReg Vm); - void VREV16(NEONElementType Size, ARMReg Vd, ARMReg Vm); + void VTRN(u32 Size, ARMReg Vd, ARMReg Vm); + void VTST(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); + void VUZP(u32 Size, ARMReg Vd, ARMReg Vm); + void VZIP(u32 Size, ARMReg Vd, ARMReg Vm); + void VREV64(u32 Size, ARMReg Vd, ARMReg Vm); + void VREV32(u32 Size, ARMReg Vd, ARMReg Vm); + void VREV16(u32 Size, ARMReg Vd, ARMReg Vm); - void VLD1(NEONElementType Size, ARMReg Vd, ARMReg Rn, NEONAlignment align = ALIGN_NONE, ARMReg Rm = _PC); - void VLD2(NEONElementType Size, ARMReg Vd, ARMReg Rn, NEONAlignment align = ALIGN_NONE, ARMReg Rm = _PC); + void VLD1(u32 Size, ARMReg Vd, ARMReg Rn, NEONAlignment align = ALIGN_NONE, ARMReg Rm = _PC); + void VLD2(u32 Size, ARMReg Vd, ARMReg Rn, NEONAlignment align = ALIGN_NONE, ARMReg Rm = _PC); - void VST1(NEONElementType Size, ARMReg Vd, ARMReg Rn, NEONAlignment align = ALIGN_NONE, ARMReg Rm = _PC); + void VST1(u32 Size, ARMReg Vd, ARMReg Rn, NEONAlignment align = ALIGN_NONE, ARMReg Rm = _PC); }; // Everything that needs to generate X86 code should inherit from this. diff --git a/Source/Core/Core/Src/PowerPC/JitArm32/Jit.cpp b/Source/Core/Core/Src/PowerPC/JitArm32/Jit.cpp index 4083a383e9..0b13372079 100644 --- a/Source/Core/Core/Src/PowerPC/JitArm32/Jit.cpp +++ b/Source/Core/Core/Src/PowerPC/JitArm32/Jit.cpp @@ -358,6 +358,25 @@ const u8* JitArm::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBlo b->checkedEntry = start; b->runCount = 0; + BKPT(1); + NEONXEmitter emit(this); +#if 1 + emit.VNEG(I_8, D15, D15); + emit.VNEG(I_16, D15, D15); + emit.VNEG(I_32, D15, D15); + emit.VNEG(F_32, Q15, Q15); + +#elif 0 + emit.VTRN(I_8, D31, D31); + emit.VTRN(I_8, Q15, Q15); + emit.VTRN(I_16, D31, D31); + emit.VTRN(I_16, Q15, Q15); + emit.VTRN(I_32, D31, D31); + emit.VTRN(I_32, Q15, Q15); +#else + emit.VSWP(D31, D31); + emit.VSWP(Q15, Q15); +#endif // Downcount flag check, Only valid for linked blocks { SetCC(CC_MI);