mirror of
https://github.com/hrydgard/ppsspp.git
synced 2024-12-11 15:44:15 +00:00
ArmEmitter: Add VMOV_neon and a Size parameter to VFMA for consistency.
This commit is contained in:
parent
0b3f1e4a5b
commit
8dfadf7b8e
@ -1984,8 +1984,9 @@ void ARMXEmitter::VEXT(ARMReg Vd, ARMReg Vn, ARMReg Vm, u8 index)
|
||||
Write32((0xF2 << 24) | (0xB << 20) | EncodeVn(Vn) | EncodeVd(Vd) | (index & 0xF) \
|
||||
| (register_quad << 6) | EncodeVm(Vm));
|
||||
}
|
||||
void ARMXEmitter::VFMA(ARMReg Vd, ARMReg Vn, ARMReg Vm)
|
||||
void ARMXEmitter::VFMA(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)
|
||||
{
|
||||
_dbg_assert_msg_(JIT, Size == F_32, "Passed invalid size to FP-only NEON instruction");
|
||||
_dbg_assert_msg_(JIT, Vd >= D0, "Pass invalid register to " __FUNCTION__);
|
||||
_dbg_assert_msg_(JIT, cpu_info.bNEON, "Can't use " __FUNCTION__ " when CPU doesn't support it");
|
||||
_dbg_assert_msg_(JIT, cpu_info.bVFPv4, "Can't use " __FUNCTION__ " when CPU doesn't support it");
|
||||
@ -1993,8 +1994,9 @@ void ARMXEmitter::VFMA(ARMReg Vd, ARMReg Vn, ARMReg Vm)
|
||||
|
||||
Write32((0xF2 << 24) | EncodeVn(Vn) | EncodeVd(Vd) | (0xC1 << 4) | (register_quad << 6) | EncodeVm(Vm));
|
||||
}
|
||||
void ARMXEmitter::VFMS(ARMReg Vd, ARMReg Vn, ARMReg Vm)
|
||||
void ARMXEmitter::VFMS(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)
|
||||
{
|
||||
_dbg_assert_msg_(JIT, Size == F_32, "Passed invalid size to FP-only NEON instruction");
|
||||
_dbg_assert_msg_(JIT, Vd >= D0, "Pass invalid register to " __FUNCTION__);
|
||||
_dbg_assert_msg_(JIT, cpu_info.bNEON, "Can't use " __FUNCTION__ " when CPU doesn't support it");
|
||||
_dbg_assert_msg_(JIT, cpu_info.bVFPv4, "Can't use " __FUNCTION__ " when CPU doesn't support it");
|
||||
|
@ -635,8 +635,6 @@ public:
|
||||
void VADDHN(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
|
||||
void VADDL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
|
||||
void VADDW(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
|
||||
void VAND(ARMReg Vd, ARMReg Vn, ARMReg Vm);
|
||||
void VBIC(ARMReg Vd, ARMReg Vn, ARMReg Vm);
|
||||
void VBIF(ARMReg Vd, ARMReg Vn, ARMReg Vm);
|
||||
void VBIT(ARMReg Vd, ARMReg Vn, ARMReg Vm);
|
||||
void VBSL(ARMReg Vd, ARMReg Vn, ARMReg Vm);
|
||||
@ -655,10 +653,9 @@ public:
|
||||
void VCNT(u32 Size, ARMReg Vd, ARMReg Vm);
|
||||
void VDUP(u32 Size, ARMReg Vd, ARMReg Vm, u8 index);
|
||||
void VDUP(u32 Size, ARMReg Vd, ARMReg Rt);
|
||||
void VEOR(ARMReg Vd, ARMReg Vn, ARMReg Vm);
|
||||
void VEXT(ARMReg Vd, ARMReg Vn, ARMReg Vm, u8 index);
|
||||
void VFMA(ARMReg Vd, ARMReg Vn, ARMReg Vm);
|
||||
void VFMS(ARMReg Vd, ARMReg Vn, ARMReg Vm);
|
||||
void VFMA(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
|
||||
void VFMS(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
|
||||
void VHADD(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
|
||||
void VHSUB(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
|
||||
void VMAX(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
|
||||
@ -695,9 +692,17 @@ public:
|
||||
void VQRDMULH_scalar(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
|
||||
*/
|
||||
|
||||
void VNEG(u32 Size, ARMReg Vd, ARMReg Vm);
|
||||
// Vector bitwise. These don't have an element size for obvious reasons.
|
||||
void VAND(ARMReg Vd, ARMReg Vn, ARMReg Vm);
|
||||
void VBIC(ARMReg Vd, ARMReg Vn, ARMReg Vm);
|
||||
void VEOR(ARMReg Vd, ARMReg Vn, ARMReg Vm);
|
||||
void VORN(ARMReg Vd, ARMReg Vn, ARMReg Vm);
|
||||
void VORR(ARMReg Vd, ARMReg Vn, ARMReg Vm);
|
||||
inline void VMOV_neon(ARMReg Dest, ARMReg Src) {
|
||||
VORR(Dest, Src, Src);
|
||||
}
|
||||
|
||||
void VNEG(u32 Size, ARMReg Vd, ARMReg Vm);
|
||||
void VPADAL(u32 Size, ARMReg Vd, ARMReg Vm);
|
||||
void VPADD(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
|
||||
void VPADDL(u32 Size, ARMReg Vd, ARMReg Vm);
|
||||
|
@ -330,10 +330,10 @@ void VertexDecoderJitCache::Jit_ApplyWeights() {
|
||||
// Krait likes VDUP + VFMA better than VMLA, and it's easy to do here.
|
||||
if (cpu_info.bVFPv4) {
|
||||
VDUP(F_32, Q1, neonWeightRegs[i >> 2], i & 1);
|
||||
VFMA(Q4, Q12, Q1);
|
||||
VFMA(Q5, Q13, Q1);
|
||||
VFMA(Q6, Q14, Q1);
|
||||
VFMA(Q7, Q15, Q1);
|
||||
VFMA(F_32, Q4, Q12, Q1);
|
||||
VFMA(F_32, Q5, Q13, Q1);
|
||||
VFMA(F_32, Q6, Q14, Q1);
|
||||
VFMA(F_32, Q7, Q15, Q1);
|
||||
} else {
|
||||
VMLA_scalar(F_32, Q4, Q12, QScalar(neonWeightRegs[0], 1));
|
||||
VMLA_scalar(F_32, Q5, Q13, QScalar(neonWeightRegs[0], 1));
|
||||
|
Loading…
Reference in New Issue
Block a user