From 41a988774f29d5d850bc66d66b33a4bb505eae7c Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Sat, 28 Sep 2013 12:30:28 +0200 Subject: [PATCH] ARM: implement vhdp --- Core/MIPS/ARM/ArmCompVFPU.cpp | 52 +++++++++++++++++++++++++++++++---- Core/MIPS/ARM/ArmJit.h | 1 + Core/MIPS/MIPSTables.cpp | 2 +- Core/MIPS/PPC/PpcCompVFPU.cpp | 4 +++ Core/MIPS/PPC/PpcJit.h | 1 + Core/MIPS/x86/CompVFPU.cpp | 4 +++ Core/MIPS/x86/Jit.h | 1 + android/ab.cmd | 2 ++ 8 files changed, 60 insertions(+), 7 deletions(-) diff --git a/Core/MIPS/ARM/ArmCompVFPU.cpp b/Core/MIPS/ARM/ArmCompVFPU.cpp index cb5d9d1f3..75a4e516b 100644 --- a/Core/MIPS/ARM/ArmCompVFPU.cpp +++ b/Core/MIPS/ARM/ArmCompVFPU.cpp @@ -559,6 +559,48 @@ namespace MIPSComp fpr.ReleaseSpillLocksAndDiscardTemps(); } + void Jit::Comp_VHdp(MIPSOpcode op) { + // DISABLE; + + CONDITIONAL_DISABLE; + if (js.HasUnknownPrefix() || disablePrefixes) { + DISABLE; + } + + int vd = _VD; + int vs = _VS; + int vt = _VT; + VectorSize sz = GetVecSize(op); + + // TODO: Force read one of them into regs? probably not. + u8 sregs[4], tregs[4], dregs[1]; + GetVectorRegsPrefixS(sregs, sz, vs); + GetVectorRegsPrefixT(tregs, sz, vt); + GetVectorRegsPrefixD(dregs, V_Single, vd); + + // TODO: applyprefixST here somehow (shuffle, etc...) + fpr.MapRegsAndSpillLockV(sregs, sz, 0); + fpr.MapRegsAndSpillLockV(tregs, sz, 0); + VMUL(S0, fpr.V(sregs[0]), fpr.V(tregs[0])); + + int n = GetNumVectorElements(sz); + for (int i = 1; i < n; i++) { + // sum += s[i]*t[i]; + if (i == n - 1) { + VADD(S0, S0, fpr.V(tregs[i])); + } else { + VMLA(S0, fpr.V(sregs[i]), fpr.V(tregs[i])); + } + } + fpr.ReleaseSpillLocksAndDiscardTemps(); + + fpr.MapRegV(dregs[0], MAP_NOINIT | MAP_DIRTY); + + VMOV(fpr.V(dregs[0]), S0); + ApplyPrefixD(dregs, V_Single); + fpr.ReleaseSpillLocksAndDiscardTemps(); + } + void Jit::Comp_VDot(MIPSOpcode op) { CONDITIONAL_DISABLE; if (js.HasUnknownPrefix() || disablePrefixes) { @@ -590,7 +632,6 @@ namespace MIPSComp fpr.MapRegV(dregs[0], MAP_NOINIT | MAP_DIRTY); - // TODO: applyprefixD here somehow (write mask etc..) VMOV(fpr.V(dregs[0]), S0); ApplyPrefixD(dregs, V_Single); fpr.ReleaseSpillLocksAndDiscardTemps(); @@ -607,11 +648,6 @@ namespace MIPSComp } } - void Jit::Comp_VHdp(MIPSOpcode op) { - // Similar to vdot - DISABLE; - } - void Jit::Comp_VecDo3(MIPSOpcode op) { CONDITIONAL_DISABLE; @@ -899,6 +935,10 @@ namespace MIPSComp fpr.ReleaseSpillLocksAndDiscardTemps(); } + void Jit::Comp_Vh2f(MIPSOpcode op) { + DISABLE; + } + void Jit::Comp_Vf2i(MIPSOpcode op) { CONDITIONAL_DISABLE; DISABLE; diff --git a/Core/MIPS/ARM/ArmJit.h b/Core/MIPS/ARM/ArmJit.h index 2379dd925..8da416e9d 100644 --- a/Core/MIPS/ARM/ArmJit.h +++ b/Core/MIPS/ARM/ArmJit.h @@ -215,6 +215,7 @@ public: void Comp_Vx2i(MIPSOpcode op); void Comp_Vf2i(MIPSOpcode op); void Comp_Vi2f(MIPSOpcode op); + void Comp_Vh2f(MIPSOpcode op); void Comp_Vcst(MIPSOpcode op); void Comp_Vhoriz(MIPSOpcode op); void Comp_VRot(MIPSOpcode op); diff --git a/Core/MIPS/MIPSTables.cpp b/Core/MIPS/MIPSTables.cpp index ab9f76095..e26f2b05d 100644 --- a/Core/MIPS/MIPSTables.cpp +++ b/Core/MIPS/MIPSTables.cpp @@ -605,7 +605,7 @@ const MIPSInstruction tableVFPU7[32] = // 110100 00001 xxxxx . ....... . ....... INVALID, INVALID, INSTR("vf2h", &Jit::Comp_Generic, Dis_Generic, Int_Vf2h, IN_OTHER|OUT_OTHER|IS_VFPU|OUT_EAT_PREFIX), - INSTR("vh2f", &Jit::Comp_Generic, Dis_Generic, Int_Vh2f, IN_OTHER|OUT_OTHER|IS_VFPU|OUT_EAT_PREFIX), + INSTR("vh2f", &Jit::Comp_Vh2f, Dis_Generic, Int_Vh2f, IN_OTHER|OUT_OTHER|IS_VFPU|OUT_EAT_PREFIX), INVALID, INVALID, diff --git a/Core/MIPS/PPC/PpcCompVFPU.cpp b/Core/MIPS/PPC/PpcCompVFPU.cpp index 9b1337ef0..47880db66 100644 --- a/Core/MIPS/PPC/PpcCompVFPU.cpp +++ b/Core/MIPS/PPC/PpcCompVFPU.cpp @@ -949,6 +949,10 @@ namespace MIPSComp DISABLE; } + void Jit::Comp_Vh2f(MIPSOpcode op) { + DISABLE; + } + void Jit::Comp_Vcst(MIPSOpcode op) { CONDITIONAL_DISABLE; diff --git a/Core/MIPS/PPC/PpcJit.h b/Core/MIPS/PPC/PpcJit.h index 7a1227e2d..731d8661e 100644 --- a/Core/MIPS/PPC/PpcJit.h +++ b/Core/MIPS/PPC/PpcJit.h @@ -220,6 +220,7 @@ namespace MIPSComp void Comp_Vx2i(MIPSOpcode op); void Comp_Vf2i(MIPSOpcode op); void Comp_Vi2f(MIPSOpcode op); + void Comp_Vh2f(MIPSOpcode op); void Comp_Vcst(MIPSOpcode op); void Comp_Vhoriz(MIPSOpcode op); void Comp_VRot(MIPSOpcode op); diff --git a/Core/MIPS/x86/CompVFPU.cpp b/Core/MIPS/x86/CompVFPU.cpp index 9529c4812..25e5dd4ab 100644 --- a/Core/MIPS/x86/CompVFPU.cpp +++ b/Core/MIPS/x86/CompVFPU.cpp @@ -1083,6 +1083,10 @@ void Jit::Comp_Vi2f(MIPSOpcode op) { fpr.ReleaseSpillLocks(); } +void Jit::Comp_Vh2f(MIPSOpcode op) { + DISABLE; +} + extern const double mulTableVf2i[32] = { (1ULL<<0),(1ULL<<1),(1ULL<<2),(1ULL<<3), (1ULL<<4),(1ULL<<5),(1ULL<<6),(1ULL<<7), diff --git a/Core/MIPS/x86/Jit.h b/Core/MIPS/x86/Jit.h index 7ff4f115d..9c8cfdbe6 100644 --- a/Core/MIPS/x86/Jit.h +++ b/Core/MIPS/x86/Jit.h @@ -235,6 +235,7 @@ public: void Comp_Vx2i(MIPSOpcode op); void Comp_Vf2i(MIPSOpcode op); void Comp_Vi2f(MIPSOpcode op); + void Comp_Vh2f(MIPSOpcode op); void Comp_Vcst(MIPSOpcode op); void Comp_Vhoriz(MIPSOpcode op); void Comp_VRot(MIPSOpcode op); diff --git a/android/ab.cmd b/android/ab.cmd index 1a65efb5f..a6a2cec17 100644 --- a/android/ab.cmd +++ b/android/ab.cmd @@ -1,5 +1,7 @@ xcopy ..\flash0 assets\flash0 /s /y xcopy ..\lang assets\lang /s /y +xcopy ..\assets\shaders assets\shaders /s /y +copy ..\assets\langregion.ini assets\langregion.ini SET NDK=C:\AndroidNDK SET NDK_MODULE_PATH=..;..\native\ext REM Need to force target-platform to android-9 to get access to OpenSL headers.