Some scaffolding for a future VFPU-on-NEON implementation

This commit is contained in:
Henrik Rydgard 2013-11-19 16:25:38 +01:00
parent 99af10cb09
commit ab3037112f
10 changed files with 253 additions and 35 deletions

View File

@ -679,6 +679,7 @@ if(ARM)
Core/MIPS/ARM/ArmCompFPU.cpp
Core/MIPS/ARM/ArmCompLoadStore.cpp
Core/MIPS/ARM/ArmCompVFPU.cpp
Core/MIPS/ARM/ArmCompVFPUNEON.cpp
Core/MIPS/ARM/ArmJit.cpp
Core/MIPS/ARM/ArmJit.h
Core/MIPS/ARM/ArmRegCache.cpp

View File

@ -518,6 +518,12 @@
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
</ClInclude>
<ClInclude Include="MIPS\ARM\ArmCompVFPUNEON.cpp">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
</ClInclude>
<ClInclude Include="MIPS\ARM\ArmJit.h">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>

View File

@ -915,6 +915,9 @@
<ClInclude Include="MIPS\JitCommon\JitState.h">
<Filter>MIPS\JitCommon</Filter>
</ClInclude>
<ClInclude Include="MIPS\ARM\ArmCompVFPUNEON.cpp">
<Filter>MIPS\ARM</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<None Include="CMakeLists.txt" />

View File

@ -38,7 +38,7 @@
// #define CONDITIONAL_DISABLE { fpr.ReleaseSpillLocks(); Comp_Generic(op); return; }
#define CONDITIONAL_DISABLE ;
#define DISABLE { fpr.ReleaseSpillLocksAndDiscardTemps(); Comp_Generic(op); return; }
#define NEON_IF_AVAILABLE(func) { if (jo.useNEONVFPU) { func(); return; } }
#define _RS MIPS_GET_RS(op)
#define _RT MIPS_GET_RT(op)
#define _RD MIPS_GET_RD(op)
@ -237,6 +237,7 @@ namespace MIPSComp
}
void Jit::Comp_SV(MIPSOpcode op) {
NEON_IF_AVAILABLE(CompNEON_SV);
CONDITIONAL_DISABLE;
s32 offset = (signed short)(op & 0xFFFC);
@ -343,6 +344,7 @@ namespace MIPSComp
void Jit::Comp_SVQ(MIPSOpcode op)
{
CONDITIONAL_DISABLE;
NEON_IF_AVAILABLE(CompNEON_SVQ);
int imm = (signed short)(op&0xFFFC);
int vt = (((op >> 16) & 0x1f)) | ((op&1) << 5);
@ -454,8 +456,8 @@ namespace MIPSComp
void Jit::Comp_VVectorInit(MIPSOpcode op)
{
NEON_IF_AVAILABLE(CompNEON_VVectorInit);
CONDITIONAL_DISABLE;
// WARNING: No prefix support!
if (js.HasUnknownPrefix()) {
DISABLE;
@ -490,6 +492,7 @@ namespace MIPSComp
}
void Jit::Comp_VIdt(MIPSOpcode op) {
NEON_IF_AVAILABLE(CompNEON_VIdt);
CONDITIONAL_DISABLE;
if (js.HasUnknownPrefix()) {
DISABLE;
@ -527,8 +530,8 @@ namespace MIPSComp
void Jit::Comp_VMatrixInit(MIPSOpcode op)
{
NEON_IF_AVAILABLE(CompNEON_VMatrixInit);
CONDITIONAL_DISABLE;
if (js.HasUnknownPrefix()) {
// Don't think matrix init ops care about prefixes.
// DISABLE;
@ -575,6 +578,7 @@ namespace MIPSComp
}
void Jit::Comp_VHdp(MIPSOpcode op) {
NEON_IF_AVAILABLE(CompNEON_VHdp);
CONDITIONAL_DISABLE;
if (js.HasUnknownPrefix()) {
DISABLE;
@ -651,6 +655,7 @@ namespace MIPSComp
}
void Jit::Comp_VecDo3(MIPSOpcode op) {
NEON_IF_AVAILABLE(CompNEON_VecDo3);
CONDITIONAL_DISABLE;
if (js.HasUnknownPrefix()) {
DISABLE;
@ -765,6 +770,7 @@ namespace MIPSComp
}
void Jit::Comp_VV2Op(MIPSOpcode op) {
NEON_IF_AVAILABLE(CompNEON_VV2Op);
CONDITIONAL_DISABLE;
if (js.HasUnknownPrefix()) {
DISABLE;
@ -890,6 +896,7 @@ namespace MIPSComp
}
void Jit::Comp_Vi2f(MIPSOpcode op) {
NEON_IF_AVAILABLE(CompNEON_Vi2f);
CONDITIONAL_DISABLE;
if (js.HasUnknownPrefix()) {
DISABLE;
@ -936,6 +943,12 @@ namespace MIPSComp
}
void Jit::Comp_Vh2f(MIPSOpcode op) {
NEON_IF_AVAILABLE(CompNEON_Vh2f(op))
CONDITIONAL_DISABLE;
if (js.HasUnknownPrefix()) {
DISABLE;
}
if (!cpu_info.bNEON || !cpu_info.bHalf) {
// No hardware support for half-to-float, fallback to interpreter
// TODO: Translate the fast SSE solution to standard integer/VFP stuff
@ -943,11 +956,6 @@ namespace MIPSComp
DISABLE;
}
CONDITIONAL_DISABLE;
if (js.HasUnknownPrefix()) {
DISABLE;
}
u8 sregs[4], dregs[4];
VectorSize sz = GetVecSize(op);
VectorSize outSz;
@ -991,11 +999,13 @@ namespace MIPSComp
}
void Jit::Comp_Vf2i(MIPSOpcode op) {
NEON_IF_AVAILABLE(CompNEON_Vf2i);
CONDITIONAL_DISABLE;
DISABLE;
if (js.HasUnknownPrefix())
if (js.HasUnknownPrefix()) {
DISABLE;
}
DISABLE;
VectorSize sz = GetVecSize(op);
int n = GetNumVectorElements(sz);
@ -1060,6 +1070,7 @@ namespace MIPSComp
void Jit::Comp_Mftv(MIPSOpcode op) {
CONDITIONAL_DISABLE;
NEON_IF_AVAILABLE(CompNEON_Mftv);
int imm = op & 0xFF;
MIPSGPReg rt = _RT;
@ -1127,6 +1138,7 @@ namespace MIPSComp
}
void Jit::Comp_Vmtvc(MIPSOpcode op) {
NEON_IF_AVAILABLE(CompNEON_Vmtvc);
CONDITIONAL_DISABLE;
int vs = _VS;
@ -1148,10 +1160,10 @@ namespace MIPSComp
}
void Jit::Comp_Vmmov(MIPSOpcode op) {
NEON_IF_AVAILABLE(CompNEON_Vmmov);
CONDITIONAL_DISABLE;
// This probably ignores prefixes for all sane intents and purposes.
if (_VS == _VD) {
// A lot of these no-op matrix moves in Wipeout... Just drop the instruction entirely.
return;
@ -1186,6 +1198,7 @@ namespace MIPSComp
}
void Jit::Comp_VScl(MIPSOpcode op) {
NEON_IF_AVAILABLE(CompNEON_VScl);
CONDITIONAL_DISABLE;
if (js.HasUnknownPrefix()) {
DISABLE;
@ -1238,6 +1251,7 @@ namespace MIPSComp
if (js.HasUnknownPrefix()) {
DISABLE;
}
NEON_IF_AVAILABLE(CompNEON_Vmmul);
// TODO: This probably ignores prefixes?
@ -1276,10 +1290,12 @@ namespace MIPSComp
}
void Jit::Comp_Vmscl(MIPSOpcode op) {
NEON_IF_AVAILABLE(CompNEON_Vmscl);
DISABLE;
}
void Jit::Comp_Vtfm(MIPSOpcode op) {
NEON_IF_AVAILABLE(CompNEON_Vtfm);
CONDITIONAL_DISABLE;
if (js.HasUnknownPrefix()) {
DISABLE;
@ -1340,23 +1356,27 @@ namespace MIPSComp
}
void Jit::Comp_VCrs(MIPSOpcode op) {
NEON_IF_AVAILABLE(CompNEON_VCrs);
DISABLE;
}
void Jit::Comp_VDet(MIPSOpcode op) {
NEON_IF_AVAILABLE(CompNEON_VDet);
DISABLE;
}
void Jit::Comp_Vi2x(MIPSOpcode op) {
NEON_IF_AVAILABLE(CompNEON_Vi2x);
DISABLE;
}
void Jit::Comp_Vx2i(MIPSOpcode op) {
NEON_IF_AVAILABLE(CompNEON_Vx2i);
DISABLE;
}
void Jit::Comp_VCrossQuat(MIPSOpcode op) {
NEON_IF_AVAILABLE(CompNEON_VCrossQuat);
// This op does not support prefixes anyway.
CONDITIONAL_DISABLE;
if (js.HasUnknownPrefix())
@ -1404,6 +1424,7 @@ namespace MIPSComp
}
void Jit::Comp_Vcmp(MIPSOpcode op) {
NEON_IF_AVAILABLE(CompNEON_Vcmp);
CONDITIONAL_DISABLE;
if (js.HasUnknownPrefix())
DISABLE;
@ -1592,6 +1613,7 @@ namespace MIPSComp
}
void Jit::Comp_Vcmov(MIPSOpcode op) {
NEON_IF_AVAILABLE(CompNEON_Vcmov);
CONDITIONAL_DISABLE;
if (js.HasUnknownPrefix()) {
DISABLE;
@ -1640,6 +1662,7 @@ namespace MIPSComp
}
void Jit::Comp_Viim(MIPSOpcode op) {
NEON_IF_AVAILABLE(CompNEON_Viim);
CONDITIONAL_DISABLE;
u8 dreg;
@ -1654,12 +1677,12 @@ namespace MIPSComp
}
void Jit::Comp_Vfim(MIPSOpcode op) {
NEON_IF_AVAILABLE(CompNEON_Vfim);
CONDITIONAL_DISABLE;
if (js.HasUnknownPrefix()) {
DISABLE;
}
u8 dreg;
GetVectorRegs(&dreg, V_Single, _VT);
@ -1674,6 +1697,7 @@ namespace MIPSComp
}
void Jit::Comp_Vcst(MIPSOpcode op) {
NEON_IF_AVAILABLE(CompNEON_Vcst);
CONDITIONAL_DISABLE;
if (js.HasUnknownPrefix()) {
DISABLE;
@ -1724,6 +1748,7 @@ namespace MIPSComp
// calling the math library.
// Apparently this may not work on hardfp. I don't think we have any platforms using this though.
void Jit::Comp_VRot(MIPSOpcode op) {
NEON_IF_AVAILABLE(CompNEON_VRot);
// VRot probably doesn't accept prefixes anyway.
CONDITIONAL_DISABLE;
if (js.HasUnknownPrefix()) {
@ -1784,6 +1809,7 @@ namespace MIPSComp
}
void Jit::Comp_Vhoriz(MIPSOpcode op) {
NEON_IF_AVAILABLE(CompNEON_Vhoriz);
DISABLE;
// Do any games use these a noticable amount?
@ -1796,6 +1822,7 @@ namespace MIPSComp
}
void Jit::Comp_Vsgn(MIPSOpcode op) {
NEON_IF_AVAILABLE(CompNEON_Vsgn);
CONDITIONAL_DISABLE;
if (js.HasUnknownPrefix()) {
DISABLE;
@ -1847,6 +1874,7 @@ namespace MIPSComp
}
void Jit::Comp_Vocp(MIPSOpcode op) {
NEON_IF_AVAILABLE(CompNEON_Vocp);
CONDITIONAL_DISABLE;
if (js.HasUnknownPrefix()) {
DISABLE;

View File

@ -15,22 +15,163 @@
// Official git repository and contact information can be found at
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
// NEON VFPU
// This is where we will create an alternate implementation of the VFPU emulation
// that uses NEON Q registers to cache pairs/tris/quads, and so on.
// Will require major extensions to the reg cache and other things.
#include <cmath>
#include "math/math_util.h"
#include "Common/CPUDetect.h"
#include "Core/MemMap.h"
#include "Core/MIPS/MIPS.h"
#include "Core/MIPS/MIPSAnalyst.h"
#include "Core/MIPS/MIPSCodeUtils.h"
#include "Common/CPUDetect.h"
#include "Core/Config.h"
#include "Core/Reporting.h"
#include "Core/MIPS/ARM/ArmJit.h"
#include "Core/MIPS/ARM/ArmRegCache.h"
// TODO: Somehow #ifdef away on ARMv5eabi, without breaking the linker.
namespace MIPSComp
{
#define CONDITIONAL_DISABLE ;
#define DISABLE { fpr.ReleaseSpillLocksAndDiscardTemps(); Comp_Generic(op); return; }
}
namespace MIPSComp {
void Jit::CompNEON_SV(MIPSOpcode op) {
DISABLE;
}
void Jit::CompNEON_SVQ(MIPSOpcode op) {
DISABLE;
}
void Jit::CompNEON_VVectorInit(MIPSOpcode op) {
DISABLE;
}
void Jit::CompNEON_VMatrixInit(MIPSOpcode op) {
DISABLE;
}
void Jit::CompNEON_VDot(MIPSOpcode op) {
DISABLE;
}
void Jit::CompNEON_VecDo3(MIPSOpcode op) {
DISABLE;
}
void Jit::CompNEON_VV2Op(MIPSOpcode op) {
DISABLE;
}
void Jit::CompNEON_Mftv(MIPSOpcode op) {
DISABLE;
}
void Jit::CompNEON_Vmtvc(MIPSOpcode op) {
DISABLE;
}
void Jit::CompNEON_Vmmov(MIPSOpcode op) {
DISABLE;
}
void Jit::CompNEON_VScl(MIPSOpcode op) {
DISABLE;
}
void Jit::CompNEON_Vmmul(MIPSOpcode op) {
DISABLE;
}
void Jit::CompNEON_Vmscl(MIPSOpcode op) {
DISABLE;
}
void Jit::CompNEON_Vtfm(MIPSOpcode op) {
DISABLE;
}
void Jit::CompNEON_VHdp(MIPSOpcode op) {
DISABLE;
}
void Jit::CompNEON_VCrs(MIPSOpcode op) {
DISABLE;
}
void Jit::CompNEON_VDet(MIPSOpcode op) {
DISABLE;
}
void Jit::CompNEON_Vi2x(MIPSOpcode op) {
DISABLE;
}
void Jit::CompNEON_Vx2i(MIPSOpcode op) {
DISABLE;
}
void Jit::CompNEON_Vf2i(MIPSOpcode op) {
DISABLE;
}
void Jit::CompNEON_Vi2f(MIPSOpcode op) {
DISABLE;
}
void Jit::CompNEON_Vh2f(MIPSOpcode op) {
DISABLE;
}
void Jit::CompNEON_Vcst(MIPSOpcode op) {
DISABLE;
}
void Jit::CompNEON_Vhoriz(MIPSOpcode op) {
DISABLE;
}
void Jit::CompNEON_VRot(MIPSOpcode op) {
DISABLE;
}
void Jit::CompNEON_VIdt(MIPSOpcode op) {
DISABLE;
}
void Jit::CompNEON_Vcmp(MIPSOpcode op) {
DISABLE;
}
void Jit::CompNEON_Vcmov(MIPSOpcode op) {
DISABLE;
}
void Jit::CompNEON_Viim(MIPSOpcode op) {
DISABLE;
}
void Jit::CompNEON_Vfim(MIPSOpcode op) {
DISABLE;
}
void Jit::CompNEON_VCrossQuat(MIPSOpcode op) {
DISABLE;
}
void Jit::CompNEON_Vsgn(MIPSOpcode op) {
DISABLE;
}
void Jit::CompNEON_Vocp(MIPSOpcode op) {
DISABLE;
}
}
// namespace MIPSComp

View File

@ -57,8 +57,7 @@ void DisassembleArm(const u8 *data, int size) {
namespace MIPSComp
{
ArmJitOptions::ArmJitOptions()
{
ArmJitOptions::ArmJitOptions() {
enableBlocklink = true;
downcountInRegister = true;
useBackJump = false;
@ -70,6 +69,10 @@ ArmJitOptions::ArmJitOptions()
continueBranches = false;
continueJumps = false;
continueMaxInstructions = 300;
useNEONVFPU = false; // true
if (!cpu_info.bNEON)
useNEONVFPU = false;
}
Jit::Jit(MIPSState *mips) : blocks(mips, this), gpr(mips, &jo), fpr(mips), mips_(mips)

View File

@ -36,6 +36,7 @@ struct ArmJitOptions
{
ArmJitOptions();
bool useNEONVFPU;
bool enableBlocklink;
bool downcountInRegister;
bool useBackJump;
@ -134,6 +135,44 @@ public:
void Comp_Vsgn(MIPSOpcode op);
void Comp_Vocp(MIPSOpcode op);
// Non-NEON: VPFX
// NEON implementations of the VFPU ops.
void CompNEON_SV(MIPSOpcode op);
void CompNEON_SVQ(MIPSOpcode op);
void CompNEON_VVectorInit(MIPSOpcode op);
void CompNEON_VMatrixInit(MIPSOpcode op);
void CompNEON_VDot(MIPSOpcode op);
void CompNEON_VecDo3(MIPSOpcode op);
void CompNEON_VV2Op(MIPSOpcode op);
void CompNEON_Mftv(MIPSOpcode op);
void CompNEON_Vmtvc(MIPSOpcode op);
void CompNEON_Vmmov(MIPSOpcode op);
void CompNEON_VScl(MIPSOpcode op);
void CompNEON_Vmmul(MIPSOpcode op);
void CompNEON_Vmscl(MIPSOpcode op);
void CompNEON_Vtfm(MIPSOpcode op);
void CompNEON_VHdp(MIPSOpcode op);
void CompNEON_VCrs(MIPSOpcode op);
void CompNEON_VDet(MIPSOpcode op);
void CompNEON_Vi2x(MIPSOpcode op);
void CompNEON_Vx2i(MIPSOpcode op);
void CompNEON_Vf2i(MIPSOpcode op);
void CompNEON_Vi2f(MIPSOpcode op);
void CompNEON_Vh2f(MIPSOpcode op);
void CompNEON_Vcst(MIPSOpcode op);
void CompNEON_Vhoriz(MIPSOpcode op);
void CompNEON_VRot(MIPSOpcode op);
void CompNEON_VIdt(MIPSOpcode op);
void CompNEON_Vcmp(MIPSOpcode op);
void CompNEON_Vcmov(MIPSOpcode op);
void CompNEON_Viim(MIPSOpcode op);
void CompNEON_Vfim(MIPSOpcode op);
void CompNEON_VCrossQuat(MIPSOpcode op);
void CompNEON_Vsgn(MIPSOpcode op);
void CompNEON_Vocp(MIPSOpcode op);
JitBlockCache *GetBlockCache() { return &blocks; }
void ClearCache();

View File

@ -79,16 +79,20 @@ public:
// Returns an ARM register containing the requested MIPS register.
ARMReg MapReg(MIPSReg reg, int mapFlags = 0);
void MapInIn(MIPSReg rd, MIPSReg rs);
void MapInInV(int rt, int rs);
void MapDirtyInV(int rd, int rs, bool avoidLoad = true);
void MapDirtyInInV(int rd, int rs, int rt, bool avoidLoad = true);
void MapDirty(MIPSReg rd);
void MapDirtyIn(MIPSReg rd, MIPSReg rs, bool avoidLoad = true);
void MapDirtyInIn(MIPSReg rd, MIPSReg rs, MIPSReg rt, bool avoidLoad = true);
void FlushArmReg(ARMReg r);
void FlushR(MIPSReg r);
void FlushV(MIPSReg r) { FlushR(r + 32); }
void DiscardR(MIPSReg r);
// VFPU register as single ARM VFP registers. Must not be used in the upcoming NEON mode!
void MapRegV(int vreg, int flags = 0);
void LoadToRegV(ARMReg armReg, int vreg);
void MapInInV(int rt, int rs);
void MapDirtyInV(int rd, int rs, bool avoidLoad = true);
void MapDirtyInInV(int rd, int rs, int rt, bool avoidLoad = true);
void FlushV(MIPSReg r) { FlushR(r + 32); }
void DiscardV(MIPSReg r) { DiscardR(r + 32);}
bool IsTempX(ARMReg r) const;
@ -99,14 +103,9 @@ public:
ARMReg R(int preg); // Returns a cached register
// VFPU registers
// VFPU registers as single VFP registers
ARMReg V(int vreg) { return R(vreg + 32); }
void MapRegV(int vreg, int flags = 0);
void LoadToRegV(ARMReg armReg, int vreg);
// NOTE: These require you to release spill locks manually!
void MapRegsAndSpillLockV(int vec, VectorSize vsz, int flags);
void MapRegsAndSpillLockV(const u8 *v, VectorSize vsz, int flags);

View File

@ -410,8 +410,6 @@ void Jit::Comp_SVQ(MIPSOpcode op)
}
break;
default:
DISABLE;
break;

View File

@ -56,12 +56,12 @@ ARCH_FILES := \
$(SRC)/Core/MIPS/ARM/ArmCompFPU.cpp \
$(SRC)/Core/MIPS/ARM/ArmCompLoadStore.cpp \
$(SRC)/Core/MIPS/ARM/ArmCompVFPU.cpp \
$(SRC)/Core/MIPS/ARM/ArmCompVFPUNEON.cpp \
$(SRC)/Core/MIPS/ARM/ArmAsm.cpp \
$(SRC)/Core/MIPS/ARM/ArmJit.cpp \
$(SRC)/Core/MIPS/ARM/ArmRegCache.cpp \
$(SRC)/Core/MIPS/ARM/ArmRegCacheFPU.cpp \
ArmEmitterTest.cpp \
ArmEmitterTest.cpp
endif
ifeq ($(TARGET_ARCH_ABI),armeabi)
@ -74,12 +74,12 @@ ARCH_FILES := \
$(SRC)/Core/MIPS/ARM/ArmCompFPU.cpp \
$(SRC)/Core/MIPS/ARM/ArmCompLoadStore.cpp \
$(SRC)/Core/MIPS/ARM/ArmCompVFPU.cpp \
$(SRC)/Core/MIPS/ARM/ArmCompVFPUNEON.cpp \
$(SRC)/Core/MIPS/ARM/ArmAsm.cpp \
$(SRC)/Core/MIPS/ARM/ArmJit.cpp \
$(SRC)/Core/MIPS/ARM/ArmRegCache.cpp \
$(SRC)/Core/MIPS/ARM/ArmRegCacheFPU.cpp \
ArmEmitterTest.cpp \
ArmEmitterTest.cpp
endif
EXEC_AND_LIB_FILES := \