ArmEmitter: Add VMOV_neon and a Size parameter to VFMA for consistency.

This commit is contained in:
Henrik Rydgård 2014-03-22 16:27:23 +01:00
parent 0b3f1e4a5b
commit 8dfadf7b8e
3 changed files with 19 additions and 12 deletions

View File

@ -1984,8 +1984,9 @@ void ARMXEmitter::VEXT(ARMReg Vd, ARMReg Vn, ARMReg Vm, u8 index)
Write32((0xF2 << 24) | (0xB << 20) | EncodeVn(Vn) | EncodeVd(Vd) | (index & 0xF) \
| (register_quad << 6) | EncodeVm(Vm));
}
void ARMXEmitter::VFMA(ARMReg Vd, ARMReg Vn, ARMReg Vm)
void ARMXEmitter::VFMA(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)
{
_dbg_assert_msg_(JIT, Size == F_32, "Passed invalid size to FP-only NEON instruction");
_dbg_assert_msg_(JIT, Vd >= D0, "Pass invalid register to " __FUNCTION__);
_dbg_assert_msg_(JIT, cpu_info.bNEON, "Can't use " __FUNCTION__ " when CPU doesn't support it");
_dbg_assert_msg_(JIT, cpu_info.bVFPv4, "Can't use " __FUNCTION__ " when CPU doesn't support it");
@ -1993,8 +1994,9 @@ void ARMXEmitter::VFMA(ARMReg Vd, ARMReg Vn, ARMReg Vm)
Write32((0xF2 << 24) | EncodeVn(Vn) | EncodeVd(Vd) | (0xC1 << 4) | (register_quad << 6) | EncodeVm(Vm));
}
void ARMXEmitter::VFMS(ARMReg Vd, ARMReg Vn, ARMReg Vm)
void ARMXEmitter::VFMS(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)
{
_dbg_assert_msg_(JIT, Size == F_32, "Passed invalid size to FP-only NEON instruction");
_dbg_assert_msg_(JIT, Vd >= D0, "Pass invalid register to " __FUNCTION__);
_dbg_assert_msg_(JIT, cpu_info.bNEON, "Can't use " __FUNCTION__ " when CPU doesn't support it");
_dbg_assert_msg_(JIT, cpu_info.bVFPv4, "Can't use " __FUNCTION__ " when CPU doesn't support it");

View File

@ -635,8 +635,6 @@ public:
void VADDHN(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
void VADDL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
void VADDW(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
void VAND(ARMReg Vd, ARMReg Vn, ARMReg Vm);
void VBIC(ARMReg Vd, ARMReg Vn, ARMReg Vm);
void VBIF(ARMReg Vd, ARMReg Vn, ARMReg Vm);
void VBIT(ARMReg Vd, ARMReg Vn, ARMReg Vm);
void VBSL(ARMReg Vd, ARMReg Vn, ARMReg Vm);
@ -655,10 +653,9 @@ public:
void VCNT(u32 Size, ARMReg Vd, ARMReg Vm);
void VDUP(u32 Size, ARMReg Vd, ARMReg Vm, u8 index);
void VDUP(u32 Size, ARMReg Vd, ARMReg Rt);
void VEOR(ARMReg Vd, ARMReg Vn, ARMReg Vm);
void VEXT(ARMReg Vd, ARMReg Vn, ARMReg Vm, u8 index);
void VFMA(ARMReg Vd, ARMReg Vn, ARMReg Vm);
void VFMS(ARMReg Vd, ARMReg Vn, ARMReg Vm);
void VFMA(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
void VFMS(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
void VHADD(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
void VHSUB(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
void VMAX(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
@ -695,9 +692,17 @@ public:
void VQRDMULH_scalar(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
*/
void VNEG(u32 Size, ARMReg Vd, ARMReg Vm);
// Vector bitwise. These don't have an element size for obvious reasons.
void VAND(ARMReg Vd, ARMReg Vn, ARMReg Vm);
void VBIC(ARMReg Vd, ARMReg Vn, ARMReg Vm);
void VEOR(ARMReg Vd, ARMReg Vn, ARMReg Vm);
void VORN(ARMReg Vd, ARMReg Vn, ARMReg Vm);
void VORR(ARMReg Vd, ARMReg Vn, ARMReg Vm);
inline void VMOV_neon(ARMReg Dest, ARMReg Src) {
VORR(Dest, Src, Src);
}
void VNEG(u32 Size, ARMReg Vd, ARMReg Vm);
void VPADAL(u32 Size, ARMReg Vd, ARMReg Vm);
void VPADD(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
void VPADDL(u32 Size, ARMReg Vd, ARMReg Vm);

View File

@ -330,10 +330,10 @@ void VertexDecoderJitCache::Jit_ApplyWeights() {
// Krait likes VDUP + VFMA better than VMLA, and it's easy to do here.
if (cpu_info.bVFPv4) {
VDUP(F_32, Q1, neonWeightRegs[i >> 2], i & 1);
VFMA(Q4, Q12, Q1);
VFMA(Q5, Q13, Q1);
VFMA(Q6, Q14, Q1);
VFMA(Q7, Q15, Q1);
VFMA(F_32, Q4, Q12, Q1);
VFMA(F_32, Q5, Q13, Q1);
VFMA(F_32, Q6, Q14, Q1);
VFMA(F_32, Q7, Q15, Q1);
} else {
VMLA_scalar(F_32, Q4, Q12, QScalar(neonWeightRegs[0], 1));
VMLA_scalar(F_32, Q5, Q13, QScalar(neonWeightRegs[0], 1));