mirror of
https://github.com/libretro/ppsspp.git
synced 2024-12-13 11:38:34 +00:00
249 lines
7.1 KiB
C++
249 lines
7.1 KiB
C++
// Copyright (c) 2012- PPSSPP Project.
|
|
|
|
// This program is free software: you can redistribute it and/or modify
|
|
// it under the terms of the GNU General Public License as published by
|
|
// the Free Software Foundation, version 2.0 or later versions.
|
|
|
|
// This program is distributed in the hope that it will be useful,
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
// GNU General Public License 2.0 for more details.
|
|
|
|
// A copy of the GPL 2.0 should have been included with the program.
|
|
// If not, see http://www.gnu.org/licenses/
|
|
|
|
// Official git repository and contact information can be found at
|
|
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
|
|
|
|
#pragma once
|
|
|
|
#include "Common/x64Emitter.h"
|
|
#include "Core/MIPS/MIPS.h"
|
|
#include "Core/MIPS/MIPSAnalyst.h"
|
|
#include "Core/MIPS/MIPSVFPUUtils.h"
|
|
|
|
#undef MAP_NOINIT
|
|
|
|
using namespace Gen;
|
|
|
|
// GPRs are numbered 0 to 31
|
|
// VFPU regs are numbered 32 to 159.
|
|
// Then we have some temp regs for VFPU handling from 160 to 175.
|
|
|
|
// Temp regs: 4 from S prefix, 4 from T prefix, 4 from D mask, and 4 for work (worst case.)
|
|
// But most of the time prefixes aren't used that heavily so we won't use all of them.
|
|
|
|
// PLANS FOR PROPER SIMD
|
|
// 1, 2, 3, and 4-vectors will be loaded into single XMM registers
|
|
// Matrices will be loaded into pairs, triads, or quads of XMM registers - simply by loading
|
|
// the columns or the rows one by one.
|
|
|
|
// On x86 this means that only one 4x4 matrix can be fully loaded at once but that's alright.
|
|
// We might want to keep "linearized" columns in memory.
|
|
|
|
// Implement optimized vec/matrix multiplications of all types and transposes that
|
|
// take into account in which XMM registers the values are. Fallback: Just dump out the values
|
|
// and do it the old way.
|
|
|
|
enum {
|
|
NUM_TEMPS = 16,
|
|
TEMP0 = 32 + 128,
|
|
NUM_MIPS_FPRS = 32 + 128 + NUM_TEMPS,
|
|
};
|
|
|
|
#ifdef _M_X64
|
|
#define NUM_X_FPREGS 16
|
|
#elif _M_IX86
|
|
#define NUM_X_FPREGS 8
|
|
#endif
|
|
|
|
struct X64CachedFPReg {
|
|
union {
|
|
int mipsReg;
|
|
int mipsRegs[4];
|
|
};
|
|
bool dirty;
|
|
};
|
|
|
|
struct MIPSCachedFPReg {
|
|
OpArg location;
|
|
int lane;
|
|
bool away; // value not in source register (memory)
|
|
u8 locked;
|
|
// Only for temp regs.
|
|
bool tempLocked;
|
|
};
|
|
|
|
struct FPURegCacheState {
|
|
MIPSCachedFPReg regs[NUM_MIPS_FPRS];
|
|
X64CachedFPReg xregs[NUM_X_FPREGS];
|
|
};
|
|
|
|
namespace MIPSComp {
|
|
struct JitOptions;
|
|
}
|
|
|
|
enum {
|
|
MAP_DIRTY = 1,
|
|
MAP_NOINIT = 2 | MAP_DIRTY,
|
|
// Only for MapRegsV, MapRegsVS.
|
|
MAP_NOLOCK = 4,
|
|
};
|
|
|
|
// The PSP has 160 FP registers: 32 FPRs + 128 VFPU registers.
|
|
// Soon we will support them all.
|
|
|
|
class FPURegCache
|
|
{
|
|
public:
|
|
FPURegCache();
|
|
~FPURegCache() {}
|
|
|
|
void Start(MIPSState *mips, MIPSAnalyst::AnalysisResults &stats);
|
|
void MapReg(int preg, bool doLoad = true, bool makeDirty = true);
|
|
void StoreFromRegister(int preg);
|
|
void StoreFromRegisterV(int preg) {
|
|
StoreFromRegister(preg + 32);
|
|
}
|
|
OpArg GetDefaultLocation(int reg) const;
|
|
void DiscardR(int freg);
|
|
void DiscardV(int vreg) {
|
|
DiscardR(vreg + 32);
|
|
}
|
|
void DiscardVS(int vreg);
|
|
bool IsTempX(X64Reg xreg);
|
|
int GetTempR();
|
|
int GetTempV() {
|
|
return GetTempR() - 32;
|
|
}
|
|
int GetTempVS(u8 *v, VectorSize vsz);
|
|
|
|
void SetEmitter(XEmitter *emitter) {emit = emitter;}
|
|
void SetOptions(MIPSComp::JitOptions *jo) {jo_ = jo;}
|
|
|
|
void Flush();
|
|
int SanityCheck() const;
|
|
|
|
const OpArg &R(int freg) const {return regs[freg].location;}
|
|
const OpArg &V(int vreg) const {
|
|
if (vregs[vreg].lane != 0)
|
|
PanicAlert("SIMD reg %d used as V reg (use VS instead)", vreg);
|
|
return vregs[vreg].location;
|
|
}
|
|
const OpArg &VS(const u8 *vs) const {
|
|
if (vregs[vs[0]].lane == 0)
|
|
PanicAlert("V reg %d used as VS reg (use V instead)", vs[0]);
|
|
return vregs[vs[0]].location;
|
|
}
|
|
|
|
X64Reg RX(int freg) const {
|
|
if (regs[freg].away && regs[freg].location.IsSimpleReg())
|
|
return regs[freg].location.GetSimpleReg();
|
|
PanicAlert("Not so simple - f%i", freg);
|
|
return (X64Reg)-1;
|
|
}
|
|
|
|
X64Reg VX(int vreg) const {
|
|
if (vregs[vreg].lane != 0)
|
|
PanicAlert("SIMD reg %d used as V reg (use VSX instead)", vreg);
|
|
if (vregs[vreg].away && vregs[vreg].location.IsSimpleReg())
|
|
return vregs[vreg].location.GetSimpleReg();
|
|
PanicAlert("Not so simple - v%i", vreg);
|
|
return (X64Reg)-1;
|
|
}
|
|
|
|
X64Reg VSX(const u8 *vs) const {
|
|
if (vregs[vs[0]].lane == 0)
|
|
PanicAlert("V reg %d used as VS reg (use VX instead)", vs[0]);
|
|
if (vregs[vs[0]].away && vregs[vs[0]].location.IsSimpleReg())
|
|
return vregs[vs[0]].location.GetSimpleReg();
|
|
PanicAlert("Not so simple - v%i", vs[0]);
|
|
return (X64Reg)-1;
|
|
}
|
|
|
|
// Just to avoid coding mistakes, defined here to prevent compilation.
|
|
void R(X64Reg r);
|
|
|
|
// Register locking. Prevents them from being spilled.
|
|
void SpillLock(int p1, int p2=0xff, int p3=0xff, int p4=0xff);
|
|
void ReleaseSpillLock(int mipsreg);
|
|
void ReleaseSpillLocks();
|
|
|
|
bool IsMapped(int r) {
|
|
return R(r).IsSimpleReg();
|
|
}
|
|
bool IsMappedV(int v) {
|
|
return vregs[v].lane == 0 && V(v).IsSimpleReg();
|
|
}
|
|
bool IsMappedVS(u8 v) {
|
|
return vregs[v].lane != 0 && VS(&v).IsSimpleReg();
|
|
}
|
|
bool IsMappedVS(const u8 *v, VectorSize vsz);
|
|
bool CanMapVS(const u8 *v, VectorSize vsz);
|
|
|
|
void MapRegV(int vreg, int flags);
|
|
void MapRegsV(int vec, VectorSize vsz, int flags);
|
|
void MapRegsV(const u8 *v, VectorSize vsz, int flags);
|
|
void SpillLockV(int vreg) {
|
|
SpillLock(vreg + 32);
|
|
}
|
|
void SpillLockV(const u8 *v, VectorSize vsz);
|
|
void SpillLockV(int vec, VectorSize vsz);
|
|
void ReleaseSpillLockV(int vreg) {
|
|
ReleaseSpillLock(vreg + 32);
|
|
}
|
|
void ReleaseSpillLockV(const u8 *vec, VectorSize sz);
|
|
|
|
// TODO: This may trash XMM0/XMM1 some day.
|
|
void MapRegsVS(const u8 *v, VectorSize vsz, int flags);
|
|
bool TryMapRegsVS(const u8 *v, VectorSize vsz, int flags);
|
|
bool TryMapDirtyInVS(const u8 *vd, VectorSize vdsz, const u8 *vs, VectorSize vssz, bool avoidLoad = true);
|
|
bool TryMapDirtyInInVS(const u8 *vd, VectorSize vdsz, const u8 *vs, VectorSize vssz, const u8 *vt, VectorSize vtsz, bool avoidLoad = true);
|
|
// TODO: If s/t overlap differently, need read-only copies? Maybe finalize d? Major design flaw...
|
|
// TODO: Matrix versions? Cols/Rows?
|
|
// No MapRegVS, that'd be silly.
|
|
|
|
void SimpleRegsV(const u8 *v, VectorSize vsz, int flags);
|
|
void SimpleRegsV(const u8 *v, MatrixSize msz, int flags);
|
|
void SimpleRegV(const u8 v, int flags);
|
|
|
|
void GetState(FPURegCacheState &state) const;
|
|
void RestoreState(const FPURegCacheState state);
|
|
|
|
MIPSState *mips;
|
|
|
|
void FlushX(X64Reg reg);
|
|
X64Reg GetFreeXReg();
|
|
int GetFreeXRegs(X64Reg *regs, int n, bool spill = true);
|
|
|
|
void Invariant() const;
|
|
|
|
private:
|
|
const int *GetAllocationOrder(int &count);
|
|
void SetupInitialRegs();
|
|
|
|
// These are intentionally not public so the interface is "locked" or "unlocked", no levels.
|
|
void ReduceSpillLock(int mreg);
|
|
void ReduceSpillLockV(int vreg) {
|
|
ReduceSpillLock(vreg + 32);
|
|
}
|
|
void ReduceSpillLockV(const u8 *vec, VectorSize sz);
|
|
|
|
X64Reg LoadRegsVS(const u8 *v, int n);
|
|
|
|
MIPSCachedFPReg regs[NUM_MIPS_FPRS];
|
|
X64CachedFPReg xregs[NUM_X_FPREGS];
|
|
MIPSCachedFPReg *vregs;
|
|
|
|
bool pendingFlush;
|
|
bool initialReady;
|
|
MIPSCachedFPReg regsInitial[NUM_MIPS_FPRS];
|
|
X64CachedFPReg xregsInitial[NUM_X_FPREGS];
|
|
|
|
// TEMP0, etc. are swapped in here if necessary (e.g. on x86.)
|
|
static float tempValues[NUM_TEMPS];
|
|
|
|
XEmitter *emit;
|
|
MIPSComp::JitOptions *jo_;
|
|
};
|