mirror of
https://github.com/hrydgard/ppsspp.git
synced 2024-11-23 21:39:52 +00:00
Merge pull request #707 from unknownbrackets/jit-vfpu
Copy over temp fpu regs, enable VecDo3/VDot
This commit is contained in:
commit
1ffde31328
@ -113,7 +113,10 @@ void ApplyPrefixST(float *v, u32 data, VectorSize size)
|
||||
// Prefix may say "z, z, z, z" but if this is a pair, we force to x.
|
||||
// TODO: But some ops seem to use const 0 instead?
|
||||
if (regnum >= n)
|
||||
{
|
||||
ERROR_LOG(CPU, "Invalid VFPU swizzle: %08x / %d", data, size);
|
||||
regnum = 0;
|
||||
}
|
||||
|
||||
v[i] = origV[regnum];
|
||||
if (abs)
|
||||
@ -1185,7 +1188,11 @@ namespace MIPSInt
|
||||
ReadVector(s, sz, vs);
|
||||
ApplySwizzleS(s, sz);
|
||||
float scale = V(vt);
|
||||
ApplySwizzleT(&scale, V_Single);
|
||||
if (currentMIPS->vfpuCtrl[VFPU_CTRL_TPREFIX] != 0xE4)
|
||||
{
|
||||
WARN_LOG(CPU, "Broken T prefix used with VScl: %08x / %08x", currentMIPS->vfpuCtrl[VFPU_CTRL_TPREFIX], op);
|
||||
ApplySwizzleT(&scale, V_Single);
|
||||
}
|
||||
int n = GetNumVectorElements(sz);
|
||||
for (int i = 0; i < n; i++)
|
||||
{
|
||||
|
@ -49,7 +49,7 @@ namespace MIPSComp
|
||||
|
||||
static const float one = 1.0f;
|
||||
static const float minus_one = -1.0f;
|
||||
static const float zero = -1.0f;
|
||||
static const float zero = 0.0f;
|
||||
|
||||
const u32 GC_ALIGNED16( noSignMask[4] ) = {0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF};
|
||||
const u32 GC_ALIGNED16( signBitLower[4] ) = {0x80000000, 0, 0, 0};
|
||||
@ -144,6 +144,25 @@ void Jit::ApplyPrefixD(const u8 *vregs, u32 prefix, VectorSize sz, bool onlyWrit
|
||||
}
|
||||
}
|
||||
|
||||
// Vector regs can overlap in all sorts of swizzled ways.
|
||||
// This does allow a single overlap in sregs[i].
|
||||
bool DestRegOverlaps(int dreg, int di, int sn, u8 sregs[], int tn, u8 tregs[])
|
||||
{
|
||||
for (int i = 0; i < sn; ++i)
|
||||
{
|
||||
if (sregs[i] == dreg && i != di)
|
||||
return true;
|
||||
}
|
||||
for (int i = 0; i < tn; ++i)
|
||||
{
|
||||
if (tregs[i] == dreg)
|
||||
return true;
|
||||
}
|
||||
|
||||
// Hurray, no overlap, we can write directly.
|
||||
return false;
|
||||
}
|
||||
|
||||
static u32 GC_ALIGNED16(ssLoadStoreTemp[1]);
|
||||
|
||||
void Jit::Comp_SV(u32 op) {
|
||||
@ -292,7 +311,7 @@ void Jit::Comp_SVQ(u32 op)
|
||||
}
|
||||
|
||||
void Jit::Comp_VDot(u32 op) {
|
||||
DISABLE;
|
||||
CONDITIONAL_DISABLE;
|
||||
|
||||
// WARNING: No prefix support!
|
||||
if (js.MayHavePrefix()) {
|
||||
@ -314,32 +333,42 @@ void Jit::Comp_VDot(u32 op) {
|
||||
|
||||
// TODO: applyprefixST here somehow (shuffle, etc...)
|
||||
|
||||
MOVSS(XMM0, fpr.V(sregs[0]));
|
||||
MULSS(XMM0, fpr.V(tregs[0]));
|
||||
|
||||
int n = GetNumVectorElements(sz);
|
||||
X64Reg tempxreg = XMM0;
|
||||
if (!DestRegOverlaps(dregs[0], 0, n, sregs, n, tregs))
|
||||
{
|
||||
fpr.MapRegsV(dregs, V_Single, MAP_NOINIT);
|
||||
tempxreg = fpr.VX(dregs[0]);
|
||||
}
|
||||
|
||||
MOVSS(tempxreg, M((void *) &zero));
|
||||
MOVSS(XMM1, fpr.V(sregs[0]));
|
||||
MULSS(XMM1, fpr.V(tregs[0]));
|
||||
ADDSS(tempxreg, R(XMM1));
|
||||
|
||||
for (int i = 1; i < n; i++)
|
||||
{
|
||||
// sum += s[i]*t[i];
|
||||
MOVSS(XMM1, fpr.V(sregs[i]));
|
||||
MULSS(XMM1, fpr.V(tregs[i]));
|
||||
ADDSS(XMM0, R(XMM1));
|
||||
ADDSS(tempxreg, R(XMM1));
|
||||
}
|
||||
fpr.ReleaseSpillLocks();
|
||||
|
||||
fpr.MapRegsV(dregs, V_Single, MAP_NOINIT);
|
||||
if (!fpr.V(dregs[0]).IsSimpleReg(tempxreg))
|
||||
{
|
||||
fpr.MapRegsV(dregs, V_Single, MAP_NOINIT);
|
||||
MOVSS(fpr.V(dregs[0]), XMM0);
|
||||
}
|
||||
|
||||
// TODO: applyprefixD here somehow (write mask etc..)
|
||||
|
||||
MOVSS(fpr.V(vd), XMM0);
|
||||
|
||||
fpr.ReleaseSpillLocks();
|
||||
|
||||
js.EatPrefix();
|
||||
}
|
||||
|
||||
void Jit::Comp_VecDo3(u32 op) {
|
||||
DISABLE;
|
||||
CONDITIONAL_DISABLE;
|
||||
|
||||
// WARNING: No prefix support!
|
||||
if (js.MayHavePrefix())
|
||||
@ -394,16 +423,42 @@ void Jit::Comp_VecDo3(u32 op) {
|
||||
}
|
||||
|
||||
int n = GetNumVectorElements(sz);
|
||||
// We need at least n temporaries...
|
||||
if (n > 2)
|
||||
fpr.Flush();
|
||||
|
||||
X64Reg tempxregs[4];
|
||||
for (int i = 0; i < n; ++i)
|
||||
{
|
||||
if (DestRegOverlaps(dregs[i], i, n, sregs, n, tregs))
|
||||
{
|
||||
// On 32-bit we only have 6 xregs for mips regs, use XMM0/XMM1 if possible.
|
||||
if (i < 2)
|
||||
tempxregs[i] = (X64Reg) (XMM0 + i);
|
||||
else
|
||||
{
|
||||
fpr.BindToRegister(TEMP0 + i, false, true);
|
||||
fpr.SpillLock(TEMP0 + i);
|
||||
tempxregs[i] = fpr.RX(TEMP0 + i);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
fpr.MapRegV(dregs[i], (dregs[i] == sregs[i] ? 0 : MAP_NOINIT) | MAP_DIRTY);
|
||||
fpr.SpillLockV(dregs[i]);
|
||||
tempxregs[i] = fpr.VX(dregs[i]);
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0; i < n; ++i)
|
||||
MOVSS((X64Reg) (XMM0 + i), fpr.V(sregs[i]));
|
||||
{
|
||||
if (!fpr.V(sregs[i]).IsSimpleReg(tempxregs[i]))
|
||||
MOVSS(tempxregs[i], fpr.V(sregs[i]));
|
||||
}
|
||||
for (int i = 0; i < n; ++i)
|
||||
(this->*xmmop)((X64Reg) (XMM0 + i), fpr.V(tregs[i]));
|
||||
(this->*xmmop)(tempxregs[i], fpr.V(tregs[i]));
|
||||
for (int i = 0; i < n; ++i)
|
||||
MOVSS(fpr.V(dregs[i]), (X64Reg) (XMM0 + i));
|
||||
{
|
||||
if (!fpr.V(dregs[i]).IsSimpleReg(tempxregs[i]))
|
||||
MOVSS(fpr.V(dregs[i]), tempxregs[i]);
|
||||
}
|
||||
|
||||
fpr.ReleaseSpillLocks();
|
||||
|
||||
|
@ -82,6 +82,8 @@ void FPURegCache::MapRegsV(const u8 *v, VectorSize sz, int flags) {
|
||||
void FPURegCache::ReleaseSpillLocks() {
|
||||
for (int i = 0; i < NUM_MIPS_FPRS; i++)
|
||||
regs[i].locked = false;
|
||||
for (int i = TEMP0; i < TEMP0 + NUM_TEMPS; ++i)
|
||||
DiscardR(i);
|
||||
}
|
||||
|
||||
void FPURegCache::BindToRegister(const int i, bool doLoad, bool makeDirty) {
|
||||
@ -97,7 +99,9 @@ void FPURegCache::BindToRegister(const int i, bool doLoad, bool makeDirty) {
|
||||
if (!regs[i].location.IsImm() && (regs[i].location.offset & 0x3)) {
|
||||
PanicAlert("WARNING - misaligned fp register location %i", i);
|
||||
}
|
||||
emit->MOVSS(xr, regs[i].location);
|
||||
if (i < TEMP0) {
|
||||
emit->MOVSS(xr, regs[i].location);
|
||||
}
|
||||
}
|
||||
regs[i].location = newloc;
|
||||
regs[i].away = true;
|
||||
@ -124,8 +128,27 @@ void FPURegCache::StoreFromRegister(int i) {
|
||||
}
|
||||
}
|
||||
|
||||
void FPURegCache::DiscardR(int i) {
|
||||
_assert_msg_(DYNA_REC, !regs[i].location.IsImm(), "FPU can't handle imm yet.");
|
||||
if (regs[i].away) {
|
||||
X64Reg xr = regs[i].location.GetSimpleReg();
|
||||
_assert_msg_(DYNA_REC, xr < NUM_X_FPREGS, "DiscardR: MipsReg had bad X64Reg");
|
||||
// Note that we DO NOT write it back here. That's the whole point of Discard.
|
||||
xregs[xr].dirty = false;
|
||||
xregs[xr].mipsReg = -1;
|
||||
regs[i].location = GetDefaultLocation(i);
|
||||
regs[i].away = false;
|
||||
} else {
|
||||
// _assert_msg_(DYNA_REC,0,"already stored");
|
||||
}
|
||||
}
|
||||
|
||||
bool FPURegCache::IsTemp(X64Reg xr) {
|
||||
return xregs[xr].mipsReg >= TEMP0;
|
||||
}
|
||||
|
||||
void FPURegCache::Flush() {
|
||||
for (int i = 0; i < NUM_MIPS_FPRS; i++) {
|
||||
for (int i = 0; i < TEMP0; i++) {
|
||||
if (regs[i].locked) {
|
||||
PanicAlert("Somebody forgot to unlock MIPS reg %i.", i);
|
||||
}
|
||||
@ -141,6 +164,9 @@ void FPURegCache::Flush() {
|
||||
}
|
||||
}
|
||||
}
|
||||
for (int i = TEMP0; i < TEMP0 + NUM_TEMPS; ++i) {
|
||||
DiscardR(i);
|
||||
}
|
||||
}
|
||||
|
||||
OpArg FPURegCache::GetDefaultLocation(int reg) const {
|
||||
@ -203,7 +229,7 @@ X64Reg FPURegCache::GetFreeXReg() {
|
||||
return (X64Reg) -1;
|
||||
}
|
||||
|
||||
void FPURegCache::FlushR(X64Reg reg) {
|
||||
void FPURegCache::FlushX(X64Reg reg) {
|
||||
if (reg >= NUM_X_FPREGS)
|
||||
PanicAlert("Flushing non existent reg");
|
||||
if (xregs[reg].mipsReg != -1) {
|
||||
|
@ -26,9 +26,18 @@ using namespace Gen;
|
||||
|
||||
|
||||
// GPRs are numbered 0 to 31
|
||||
// VFPU regs are numbered 32 to 160.
|
||||
// VFPU regs are numbered 32 to 159.
|
||||
// Then we have some temp regs for VFPU handling from 160 to 167.
|
||||
|
||||
#define NUM_MIPS_FPRS (32 + 128)
|
||||
enum {
|
||||
NUM_TEMPS = 4,
|
||||
TEMP0 = 32 + 128,
|
||||
TEMP1 = TEMP0 + 1,
|
||||
TEMP2 = TEMP0 + 2,
|
||||
TEMP3 = TEMP0 + 3,
|
||||
TEMP4 = TEMP0 + 4,
|
||||
NUM_MIPS_FPRS = 32 + 128 + NUM_TEMPS,
|
||||
};
|
||||
|
||||
#ifdef _M_X64
|
||||
#define NUM_X_FPREGS 16
|
||||
@ -68,6 +77,11 @@ public:
|
||||
StoreFromRegister(preg + 32);
|
||||
}
|
||||
OpArg GetDefaultLocation(int reg) const;
|
||||
void DiscardR(int freg);
|
||||
void DiscardV(int vreg) {
|
||||
DiscardR(vreg + 32);
|
||||
}
|
||||
bool IsTemp(X64Reg xreg);
|
||||
|
||||
void SetEmitter(XEmitter *emitter) {emit = emitter;}
|
||||
|
||||
@ -100,6 +114,9 @@ public:
|
||||
void MapRegV(int vreg, int flags);
|
||||
void MapRegsV(int vec, VectorSize vsz, int flags);
|
||||
void MapRegsV(const u8 *v, VectorSize vsz, int flags);
|
||||
void SpillLockV(int vreg) {
|
||||
SpillLock(vreg + 32);
|
||||
}
|
||||
void SpillLockV(const u8 *v, VectorSize vsz);
|
||||
void SpillLockV(int vec, VectorSize vsz);
|
||||
|
||||
@ -107,7 +124,7 @@ public:
|
||||
|
||||
private:
|
||||
X64Reg GetFreeXReg();
|
||||
void FlushR(X64Reg reg);
|
||||
void FlushX(X64Reg reg);
|
||||
const int *GetAllocationOrder(int &count);
|
||||
|
||||
MIPSCachedFPReg regs[NUM_MIPS_FPRS];
|
||||
|
Loading…
Reference in New Issue
Block a user