mirror of
https://github.com/libretro/ppsspp.git
synced 2025-02-07 17:46:41 +00:00
Merge pull request #7109 from unknownbrackets/jit-simd
x86jit: More fixes / tweaks to simd
This commit is contained in:
commit
811e6aea48
@ -242,7 +242,7 @@ void Jit::Comp_SV(MIPSOpcode op) {
|
||||
{
|
||||
gpr.Lock(rs);
|
||||
gpr.MapReg(rs, true, false);
|
||||
fpr.MapRegV(vt, MAP_NOINIT);
|
||||
fpr.MapRegV(vt, MAP_DIRTY | MAP_NOINIT);
|
||||
|
||||
JitSafeMem safe(this, rs, imm);
|
||||
safe.SetFar();
|
||||
@ -384,7 +384,7 @@ void Jit::Comp_SVQ(MIPSOpcode op)
|
||||
safe.SetFar();
|
||||
OpArg src;
|
||||
if (safe.PrepareRead(src, 16)) {
|
||||
MOVAPS(fpr.VSX(vregs[0]), safe.NextFastAddress(0));
|
||||
MOVAPS(fpr.VSX(vregs), safe.NextFastAddress(0));
|
||||
} else {
|
||||
// Hmm... probably never happens.
|
||||
}
|
||||
@ -433,7 +433,7 @@ void Jit::Comp_SVQ(MIPSOpcode op)
|
||||
safe.SetFar();
|
||||
OpArg dest;
|
||||
if (safe.PrepareWrite(dest, 16)) {
|
||||
MOVAPS(safe.NextFastAddress(0), fpr.VSX(vregs[0]));
|
||||
MOVAPS(safe.NextFastAddress(0), fpr.VSX(vregs));
|
||||
} else {
|
||||
// Hmm... probably never happens.
|
||||
}
|
||||
@ -488,9 +488,9 @@ void Jit::Comp_VVectorInit(MIPSOpcode op) {
|
||||
|
||||
if (fpr.TryMapRegsVS(dregs, sz, MAP_NOINIT | MAP_DIRTY)) {
|
||||
if (type == 6) {
|
||||
XORPS(fpr.VSX(dregs[0]), fpr.VS(dregs[0]));
|
||||
XORPS(fpr.VSX(dregs), fpr.VS(dregs));
|
||||
} else if (type == 7) {
|
||||
MOVAPS(fpr.VSX(dregs[0]), M(&oneOneOneOne));
|
||||
MOVAPS(fpr.VSX(dregs), M(&oneOneOneOne));
|
||||
} else {
|
||||
DISABLE;
|
||||
}
|
||||
@ -531,9 +531,9 @@ void Jit::Comp_VIdt(MIPSOpcode op) {
|
||||
|
||||
u8 dregs[4];
|
||||
GetVectorRegsPrefixD(dregs, sz, _VD);
|
||||
if (sz == V_Quad && fpr.TryMapRegsVS(dregs, sz, MAP_NOINIT | MAP_DIRTY)) {
|
||||
int n = vd & 3;
|
||||
MOVAPD(fpr.VSX(dregs[0]), M(identityMatrix[n]));
|
||||
if (fpr.TryMapRegsVS(dregs, sz, MAP_NOINIT | MAP_DIRTY)) {
|
||||
int row = vd & (n - 1);
|
||||
MOVAPD(fpr.VSX(dregs), M(identityMatrix[row]));
|
||||
ApplyPrefixD(dregs, sz);
|
||||
fpr.ReleaseSpillLocks();
|
||||
return;
|
||||
@ -586,42 +586,42 @@ void Jit::Comp_VDot(MIPSOpcode op) {
|
||||
switch (sz) {
|
||||
case V_Pair:
|
||||
if (cpu_info.bSSE4_1) {
|
||||
MOVAPD(XMM0, fpr.VS(sregs[0]));
|
||||
DPPS(XMM0, fpr.VS(tregs[0]), 0x31);
|
||||
MOVAPD(fpr.VSX(dregs[0]), R(XMM0));
|
||||
MOVAPD(XMM0, fpr.VS(sregs));
|
||||
DPPS(XMM0, fpr.VS(tregs), 0x31);
|
||||
MOVAPD(fpr.VSX(dregs), R(XMM0));
|
||||
} else {
|
||||
MOVAPD(XMM0, fpr.VS(sregs[0]));
|
||||
MULPS(XMM0, fpr.VS(tregs[0]));
|
||||
MOVAPD(XMM0, fpr.VS(sregs));
|
||||
MULPS(XMM0, fpr.VS(tregs));
|
||||
MOVAPD(R(XMM1), XMM0);
|
||||
SHUFPS(XMM1, R(XMM0), _MM_SHUFFLE(1, 1, 1, 1));
|
||||
ADDPS(XMM1, R(XMM0));
|
||||
MOVAPD(fpr.VS(dregs[0]), XMM1);
|
||||
MOVAPD(fpr.VS(dregs), XMM1);
|
||||
}
|
||||
break;
|
||||
case V_Triple:
|
||||
if (cpu_info.bSSE4_1) {
|
||||
MOVAPD(XMM0, fpr.VS(sregs[0]));
|
||||
DPPS(XMM0, fpr.VS(tregs[0]), 0x71);
|
||||
MOVAPD(fpr.VSX(dregs[0]), R(XMM0));
|
||||
MOVAPD(XMM0, fpr.VS(sregs));
|
||||
DPPS(XMM0, fpr.VS(tregs), 0x71);
|
||||
MOVAPD(fpr.VSX(dregs), R(XMM0));
|
||||
} else {
|
||||
MOVAPD(XMM0, fpr.VS(sregs[0]));
|
||||
MULPS(XMM0, fpr.VS(tregs[0]));
|
||||
MOVAPD(XMM0, fpr.VS(sregs));
|
||||
MULPS(XMM0, fpr.VS(tregs));
|
||||
MOVAPD(R(XMM1), XMM0);
|
||||
SHUFPS(XMM1, R(XMM0), _MM_SHUFFLE(3, 2, 1, 1));
|
||||
ADDSS(XMM1, R(XMM0));
|
||||
SHUFPS(XMM0, R(XMM1), _MM_SHUFFLE(3, 2, 2, 2));
|
||||
ADDSS(XMM1, R(XMM0));
|
||||
MOVAPD(fpr.VS(dregs[0]), XMM1);
|
||||
MOVAPD(fpr.VS(dregs), XMM1);
|
||||
}
|
||||
break;
|
||||
case V_Quad:
|
||||
if (cpu_info.bSSE4_1) {
|
||||
MOVAPD(XMM0, fpr.VS(sregs[0]));
|
||||
DPPS(XMM0, fpr.VS(tregs[0]), 0xF1);
|
||||
MOVAPD(fpr.VSX(dregs[0]), R(XMM0));
|
||||
MOVAPD(XMM0, fpr.VS(sregs));
|
||||
DPPS(XMM0, fpr.VS(tregs), 0xF1);
|
||||
MOVAPD(fpr.VSX(dregs), R(XMM0));
|
||||
} else {
|
||||
MOVAPD(XMM0, fpr.VS(sregs[0]));
|
||||
MOVAPD(XMM1, fpr.VS(tregs[0]));
|
||||
MOVAPD(XMM0, fpr.VS(sregs));
|
||||
MOVAPD(XMM1, fpr.VS(tregs));
|
||||
MULPS(XMM0, R(XMM1));
|
||||
MOVAPD(XMM1, R(XMM0));
|
||||
SHUFPS(XMM1, R(XMM1), _MM_SHUFFLE(2, 3, 0, 1));
|
||||
@ -629,7 +629,7 @@ void Jit::Comp_VDot(MIPSOpcode op) {
|
||||
MOVAPD(XMM1, R(XMM0));
|
||||
SHUFPS(XMM1, R(XMM1), _MM_SHUFFLE(0, 1, 2, 3));
|
||||
ADDSS(XMM0, R(XMM1));
|
||||
MOVAPD(fpr.VSX(dregs[0]), R(XMM0));
|
||||
MOVAPD(fpr.VSX(dregs), R(XMM0));
|
||||
}
|
||||
}
|
||||
ApplyPrefixD(dregs, V_Single);
|
||||
@ -640,10 +640,11 @@ void Jit::Comp_VDot(MIPSOpcode op) {
|
||||
// Flush SIMD.
|
||||
fpr.SimpleRegsV(sregs, sz, 0);
|
||||
fpr.SimpleRegsV(tregs, sz, 0);
|
||||
fpr.SimpleRegsV(dregs, V_Single, MAP_DIRTY | MAP_NOINIT);
|
||||
|
||||
X64Reg tempxreg = XMM0;
|
||||
if (IsOverlapSafe(dregs[0], 0, n, sregs, n, tregs)) {
|
||||
fpr.MapRegsV(dregs, V_Single, MAP_NOINIT);
|
||||
fpr.MapRegsV(dregs, V_Single, MAP_DIRTY | MAP_NOINIT);
|
||||
tempxreg = fpr.VX(dregs[0]);
|
||||
}
|
||||
|
||||
@ -659,7 +660,7 @@ void Jit::Comp_VDot(MIPSOpcode op) {
|
||||
}
|
||||
|
||||
if (!fpr.V(dregs[0]).IsSimpleReg(tempxreg)) {
|
||||
fpr.MapRegsV(dregs, V_Single, MAP_NOINIT);
|
||||
fpr.MapRegsV(dregs, V_Single, MAP_DIRTY | MAP_NOINIT);
|
||||
MOVSS(fpr.V(dregs[0]), tempxreg);
|
||||
}
|
||||
|
||||
@ -686,11 +687,12 @@ void Jit::Comp_VHdp(MIPSOpcode op) {
|
||||
// Flush SIMD.
|
||||
fpr.SimpleRegsV(sregs, sz, 0);
|
||||
fpr.SimpleRegsV(tregs, sz, 0);
|
||||
fpr.SimpleRegsV(dregs, V_Single, MAP_DIRTY | MAP_NOINIT);
|
||||
|
||||
X64Reg tempxreg = XMM0;
|
||||
if (IsOverlapSafe(dregs[0], 0, n, sregs, n, tregs))
|
||||
{
|
||||
fpr.MapRegsV(dregs, V_Single, MAP_NOINIT);
|
||||
fpr.MapRegsV(dregs, V_Single, MAP_DIRTY | MAP_NOINIT);
|
||||
tempxreg = fpr.VX(dregs[0]);
|
||||
}
|
||||
|
||||
@ -710,7 +712,7 @@ void Jit::Comp_VHdp(MIPSOpcode op) {
|
||||
}
|
||||
|
||||
if (!fpr.V(dregs[0]).IsSimpleReg(tempxreg)) {
|
||||
fpr.MapRegsV(dregs, V_Single, MAP_NOINIT);
|
||||
fpr.MapRegsV(dregs, V_Single, MAP_DIRTY | MAP_NOINIT);
|
||||
MOVSS(fpr.V(dregs[0]), tempxreg);
|
||||
}
|
||||
|
||||
@ -749,17 +751,16 @@ void Jit::Comp_VCrossQuat(MIPSOpcode op) {
|
||||
);
|
||||
return _mm_shuffle_ps(result, result, _MM_SHUFFLE(3, 0, 2, 1));
|
||||
*/
|
||||
MOVAPS(XMM0, fpr.VS(tregs[0]));
|
||||
MOVAPS(XMM1, fpr.VS(sregs[0]));
|
||||
MOVAPS(XMM0, fpr.VS(tregs));
|
||||
MOVAPS(XMM1, fpr.VS(sregs));
|
||||
SHUFPS(XMM0, R(XMM0), _MM_SHUFFLE(3, 0, 2, 1));
|
||||
SHUFPS(XMM1, R(XMM1), _MM_SHUFFLE(3, 0, 2, 1));
|
||||
MULPS(XMM0, fpr.VS(sregs[0]));
|
||||
MULPS(XMM1, fpr.VS(tregs[0]));
|
||||
MULPS(XMM0, fpr.VS(sregs));
|
||||
MULPS(XMM1, fpr.VS(tregs));
|
||||
SUBPS(XMM0, R(XMM1));
|
||||
SHUFPS(XMM0, R(XMM0), _MM_SHUFFLE(3, 0, 2, 1));
|
||||
MOVAPS(fpr.VS(dregs[0]), XMM0);
|
||||
MOVAPS(fpr.VS(dregs), XMM0);
|
||||
fpr.ReleaseSpillLocks();
|
||||
NOTICE_LOG(JIT, "Crossprod %08x", js.blockStart);
|
||||
return;
|
||||
}
|
||||
|
||||
@ -991,46 +992,46 @@ void Jit::Comp_VecDo3(MIPSOpcode op) {
|
||||
{
|
||||
case 2: // vmin
|
||||
// TODO: Mishandles NaN.
|
||||
MOVAPS(XMM1, fpr.VS(sregs[0]));
|
||||
MINPS(XMM1, fpr.VS(tregs[0]));
|
||||
MOVAPS(fpr.VSX(dregs[0]), R(XMM1));
|
||||
MOVAPS(XMM1, fpr.VS(sregs));
|
||||
MINPS(XMM1, fpr.VS(tregs));
|
||||
MOVAPS(fpr.VSX(dregs), R(XMM1));
|
||||
break;
|
||||
case 3: // vmax
|
||||
// TODO: Mishandles NaN.
|
||||
MOVAPS(XMM1, fpr.VS(sregs[0]));
|
||||
MAXPS(XMM1, fpr.VS(tregs[0]));
|
||||
MOVAPS(fpr.VSX(dregs[0]), R(XMM1));
|
||||
MOVAPS(XMM1, fpr.VS(sregs));
|
||||
MAXPS(XMM1, fpr.VS(tregs));
|
||||
MOVAPS(fpr.VSX(dregs), R(XMM1));
|
||||
break;
|
||||
case 6: // vsge
|
||||
// TODO: Mishandles NaN.
|
||||
MOVAPS(XMM1, fpr.VS(sregs[0]));
|
||||
CMPPS(XMM1, fpr.VS(tregs[0]), CMP_NLT);
|
||||
MOVAPS(XMM1, fpr.VS(sregs));
|
||||
CMPPS(XMM1, fpr.VS(tregs), CMP_NLT);
|
||||
ANDPS(XMM1, M(&oneOneOneOne));
|
||||
MOVAPS(fpr.VSX(dregs[0]), R(XMM1));
|
||||
MOVAPS(fpr.VSX(dregs), R(XMM1));
|
||||
break;
|
||||
case 7: // vslt
|
||||
MOVAPS(XMM1, fpr.VS(sregs[0]));
|
||||
CMPPS(XMM1, fpr.VS(tregs[0]), CMP_LT);
|
||||
MOVAPS(XMM1, fpr.VS(sregs));
|
||||
CMPPS(XMM1, fpr.VS(tregs), CMP_LT);
|
||||
ANDPS(XMM1, M(&oneOneOneOne));
|
||||
MOVAPS(fpr.VSX(dregs[0]), R(XMM1));
|
||||
MOVAPS(fpr.VSX(dregs), R(XMM1));
|
||||
break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
if (opFunc != nullptr) {
|
||||
if (fpr.VSX(dregs[0]) != fpr.VSX(tregs[0])) {
|
||||
if (fpr.VSX(dregs[0]) != fpr.VSX(sregs[0])) {
|
||||
MOVAPS(fpr.VSX(dregs[0]), fpr.VS(sregs[0]));
|
||||
if (fpr.VSX(dregs) != fpr.VSX(tregs)) {
|
||||
if (fpr.VSX(dregs) != fpr.VSX(sregs)) {
|
||||
MOVAPS(fpr.VSX(dregs), fpr.VS(sregs));
|
||||
}
|
||||
(this->*opFunc)(fpr.VSX(dregs[0]), fpr.VS(tregs[0]));
|
||||
(this->*opFunc)(fpr.VSX(dregs), fpr.VS(tregs));
|
||||
} else if (symmetric) {
|
||||
// We already know d = t.
|
||||
(this->*opFunc)(fpr.VSX(dregs[0]), fpr.VS(sregs[0]));
|
||||
(this->*opFunc)(fpr.VSX(dregs), fpr.VS(sregs));
|
||||
} else {
|
||||
MOVAPS(XMM1, fpr.VS(sregs[0]));
|
||||
(this->*opFunc)(XMM1, fpr.VS(tregs[0]));
|
||||
MOVAPS(fpr.VSX(dregs[0]), R(XMM1));
|
||||
MOVAPS(XMM1, fpr.VS(sregs));
|
||||
(this->*opFunc)(XMM1, fpr.VS(tregs));
|
||||
MOVAPS(fpr.VSX(dregs), R(XMM1));
|
||||
}
|
||||
}
|
||||
|
||||
@ -1631,7 +1632,7 @@ void Jit::Comp_Vx2i(MIPSOpcode op) {
|
||||
}
|
||||
|
||||
if (fpr.TryMapRegsVS(dregs, outsize, MAP_NOINIT | MAP_DIRTY)) {
|
||||
MOVAPS(fpr.VSX(dregs[0]), R(XMM0));
|
||||
MOVAPS(fpr.VSX(dregs), R(XMM0));
|
||||
} else {
|
||||
// Done! TODO: The rest of this should be possible to extract into a function.
|
||||
fpr.MapRegsV(dregs, outsize, MAP_NOINIT | MAP_DIRTY);
|
||||
@ -1722,6 +1723,7 @@ void Jit::Comp_Vf2i(MIPSOpcode op) {
|
||||
|
||||
// Flush SIMD.
|
||||
fpr.SimpleRegsV(sregs, sz, 0);
|
||||
fpr.SimpleRegsV(dregs, sz, MAP_DIRTY | MAP_NOINIT);
|
||||
|
||||
u8 tempregs[4];
|
||||
for (int i = 0; i < n; ++i) {
|
||||
@ -1790,7 +1792,7 @@ void Jit::Comp_Vcst(MIPSOpcode op) {
|
||||
|
||||
if (fpr.TryMapRegsVS(dregs, sz, MAP_NOINIT | MAP_DIRTY)) {
|
||||
SHUFPS(XMM0, R(XMM0), _MM_SHUFFLE(0,0,0,0));
|
||||
MOVAPS(fpr.VS(dregs[0]), XMM0);
|
||||
MOVAPS(fpr.VS(dregs), XMM0);
|
||||
fpr.ReleaseSpillLocks();
|
||||
return;
|
||||
}
|
||||
@ -2007,17 +2009,17 @@ void Jit::Comp_VV2Op(MIPSOpcode op) {
|
||||
if (canSIMD && fpr.TryMapDirtyInVS(dregs, sz, sregs, sz)) {
|
||||
switch ((op >> 16) & 0x1f) {
|
||||
case 0: // vmov
|
||||
MOVAPS(fpr.VSX(dregs[0]), fpr.VS(sregs[0]));
|
||||
MOVAPS(fpr.VSX(dregs), fpr.VS(sregs));
|
||||
break;
|
||||
case 1: // vabs
|
||||
if (dregs[0] != sregs[0])
|
||||
MOVAPS(fpr.VSX(dregs[0]), fpr.VS(sregs[0]));
|
||||
ANDPS(fpr.VSX(dregs[0]), M(&noSignMask));
|
||||
MOVAPS(fpr.VSX(dregs), fpr.VS(sregs));
|
||||
ANDPS(fpr.VSX(dregs), M(&noSignMask));
|
||||
break;
|
||||
case 2: // vneg
|
||||
if (dregs[0] != sregs[0])
|
||||
MOVAPS(fpr.VSX(dregs[0]), fpr.VS(sregs[0]));
|
||||
XORPS(fpr.VSX(dregs[0]), M(&signBitAll));
|
||||
MOVAPS(fpr.VSX(dregs), fpr.VS(sregs));
|
||||
XORPS(fpr.VSX(dregs), M(&signBitAll));
|
||||
break;
|
||||
}
|
||||
ApplyPrefixD(dregs, sz);
|
||||
@ -2251,7 +2253,7 @@ void Jit::Comp_Vmfvc(MIPSOpcode op) {
|
||||
int vs = _VS;
|
||||
int imm = op & 0xFF;
|
||||
if (imm >= 128 && imm < 128 + VFPU_CTRL_MAX) {
|
||||
fpr.MapRegV(vs, 0);
|
||||
fpr.MapRegV(vs, MAP_DIRTY | MAP_NOINIT);
|
||||
if (imm - 128 == VFPU_CTRL_CC) {
|
||||
gpr.MapReg(MIPS_REG_VFPUCC, true, false);
|
||||
MOVD_xmm(fpr.VX(vs), gpr.R(MIPS_REG_VFPUCC));
|
||||
@ -2393,13 +2395,13 @@ void Jit::Comp_VScl(MIPSOpcode op) {
|
||||
GetVectorRegsPrefixD(dregs, sz, _VD);
|
||||
|
||||
if (fpr.TryMapDirtyInInVS(dregs, sz, sregs, sz, &scale, V_Single, true)) {
|
||||
MOVSS(XMM0, fpr.VS(scale));
|
||||
MOVSS(XMM0, fpr.VS(&scale));
|
||||
if (sz != V_Single)
|
||||
SHUFPS(XMM0, R(XMM0), _MM_SHUFFLE(0, 0, 0, 0));
|
||||
if (dregs[0] != sregs[0]) {
|
||||
MOVAPS(fpr.VSX(dregs[0]), fpr.VS(sregs[0]));
|
||||
MOVAPS(fpr.VSX(dregs), fpr.VS(sregs));
|
||||
}
|
||||
MULPS(fpr.VSX(dregs[0]), R(XMM0));
|
||||
MULPS(fpr.VSX(dregs), R(XMM0));
|
||||
ApplyPrefixD(dregs, sz);
|
||||
fpr.ReleaseSpillLocks();
|
||||
return;
|
||||
|
@ -122,11 +122,12 @@ bool FPURegCache::IsMappedVS(const u8 *v, VectorSize vsz) {
|
||||
return false;
|
||||
|
||||
// And make sure the rest are mapped to the same reg in the right positions.
|
||||
X64Reg xr = VSX(v[0]);
|
||||
X64Reg xr = VSX(v);
|
||||
for (int i = 1; i < n; ++i) {
|
||||
if (!IsMappedVS(v[i]) || VSX(v[i]) != xr)
|
||||
u8 vi = v[i];
|
||||
if (!IsMappedVS(vi) || VSX(&vi) != xr)
|
||||
return false;
|
||||
if (vregs[v[i]].lane != i + 1)
|
||||
if (vregs[vi].lane != i + 1)
|
||||
return false;
|
||||
}
|
||||
// TODO: Optimize this case? It happens.
|
||||
@ -203,7 +204,7 @@ bool FPURegCache::TryMapRegsVS(const u8 *v, VectorSize vsz, int flags) {
|
||||
if (IsMappedVS(v, vsz)) {
|
||||
// Already mapped then, perfect. Just mark dirty.
|
||||
if ((flags & MAP_DIRTY) != 0)
|
||||
xregs[VSX(v[0])].dirty = true;
|
||||
xregs[VSX(v)].dirty = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -214,10 +215,8 @@ bool FPURegCache::TryMapRegsVS(const u8 *v, VectorSize vsz, int flags) {
|
||||
// This way V/VS can warn about improper usage properly.
|
||||
MapRegV(v[0], flags);
|
||||
vregs[v[0]].lane = 1;
|
||||
// TODO: Currently all non-simd regs are dirty.
|
||||
xregs[VSX(v[0])].dirty = true;
|
||||
//if ((flags & MAP_DIRTY) != 0)
|
||||
// xregs[VSX(v[0])].dirty = true;
|
||||
if ((flags & MAP_DIRTY) != 0)
|
||||
xregs[VSX(v)].dirty = true;
|
||||
Invariant();
|
||||
return true;
|
||||
}
|
||||
@ -238,14 +237,11 @@ bool FPURegCache::TryMapRegsVS(const u8 *v, VectorSize vsz, int flags) {
|
||||
// Clear the xreg it was in before.
|
||||
X64Reg oldXReg = vr.location.GetSimpleReg();
|
||||
xregs[oldXReg].mipsReg = -1;
|
||||
// TODO: Do this instead, once dirtying is handled well throughout?
|
||||
//if (xregs[oldXReg].dirty) {
|
||||
// // Inherit the "dirtiness" (ultimately set below for all regs.)
|
||||
// dirty = true;
|
||||
// xregs[oldXReg].dirty = false;
|
||||
//}
|
||||
// All non-simd regs are currently always dirty. Ought to be fixed.
|
||||
dirty = true;
|
||||
if (xregs[oldXReg].dirty) {
|
||||
// Inherit the "dirtiness" (ultimately set below for all regs.)
|
||||
dirty = true;
|
||||
xregs[oldXReg].dirty = false;
|
||||
}
|
||||
}
|
||||
xregs[xr].mipsRegs[i] = v[i] + 32;
|
||||
vr.location = newloc;
|
||||
@ -438,7 +434,10 @@ void FPURegCache::SimpleRegsV(const u8 *v, MatrixSize msz, int flags) {
|
||||
void FPURegCache::SimpleRegV(const u8 v, int flags) {
|
||||
MIPSCachedFPReg &vr = vregs[v];
|
||||
// Special optimization: if it's in a single simd, we can keep it there.
|
||||
if (vr.lane == 1 && xregs[VSX(v)].mipsRegs[1] == -1) {
|
||||
if (vr.lane == 1 && xregs[VSX(&v)].mipsRegs[1] == -1) {
|
||||
if (flags & MAP_DIRTY) {
|
||||
xregs[VSX(&v)].dirty = true;
|
||||
}
|
||||
// Just change the lane to 0.
|
||||
vr.lane = 0;
|
||||
} else if (vr.lane != 0) {
|
||||
@ -451,7 +450,9 @@ void FPURegCache::SimpleRegV(const u8 v, int flags) {
|
||||
}
|
||||
} else if (vr.away) {
|
||||
// There are no immediates in the FPR reg file, so we already had this in a register. Make dirty as necessary.
|
||||
xregs[VX(v)].dirty = xregs[VX(v)].dirty || ((flags & MAP_DIRTY) != 0);
|
||||
if (flags & MAP_DIRTY) {
|
||||
xregs[VX(v)].dirty = true;
|
||||
}
|
||||
_assert_msg_(JIT, vr.location.IsSimpleReg(), "not loaded and not simple.");
|
||||
}
|
||||
Invariant();
|
||||
@ -579,7 +580,9 @@ void FPURegCache::StoreFromRegister(int i) {
|
||||
} else {
|
||||
OpArg newLoc = GetDefaultLocation(i);
|
||||
xregs[xr].mipsReg = -1;
|
||||
emit->MOVSS(newLoc, xr);
|
||||
if (xregs[xr].dirty) {
|
||||
emit->MOVSS(newLoc, xr);
|
||||
}
|
||||
regs[i].location = newLoc;
|
||||
}
|
||||
xregs[xr].dirty = false;
|
||||
|
@ -128,10 +128,10 @@ public:
|
||||
PanicAlert("SIMD reg %d used as V reg (use VS instead)", vreg);
|
||||
return vregs[vreg].location;
|
||||
}
|
||||
const OpArg &VS(int vreg) const {
|
||||
if (vregs[vreg].lane == 0)
|
||||
PanicAlert("V reg %d used as VS reg (use V instead)", vreg);
|
||||
return vregs[vreg].location;
|
||||
const OpArg &VS(const u8 *vs) const {
|
||||
if (vregs[vs[0]].lane == 0)
|
||||
PanicAlert("V reg %d used as VS reg (use V instead)", vs[0]);
|
||||
return vregs[vs[0]].location;
|
||||
}
|
||||
|
||||
X64Reg RX(int freg) const {
|
||||
@ -150,12 +150,12 @@ public:
|
||||
return (X64Reg)-1;
|
||||
}
|
||||
|
||||
X64Reg VSX(int vreg) const {
|
||||
if (vregs[vreg].lane == 0)
|
||||
PanicAlert("V reg %d used as VS reg (use VX instead)", vreg);
|
||||
if (vregs[vreg].away && vregs[vreg].location.IsSimpleReg())
|
||||
return vregs[vreg].location.GetSimpleReg();
|
||||
PanicAlert("Not so simple - v%i", vreg);
|
||||
X64Reg VSX(const u8 *vs) const {
|
||||
if (vregs[vs[0]].lane == 0)
|
||||
PanicAlert("V reg %d used as VS reg (use VX instead)", vs[0]);
|
||||
if (vregs[vs[0]].away && vregs[vs[0]].location.IsSimpleReg())
|
||||
return vregs[vs[0]].location.GetSimpleReg();
|
||||
PanicAlert("Not so simple - v%i", vs[0]);
|
||||
return (X64Reg)-1;
|
||||
}
|
||||
|
||||
@ -173,8 +173,8 @@ public:
|
||||
bool IsMappedV(int v) {
|
||||
return vregs[v].lane == 0 && V(v).IsSimpleReg();
|
||||
}
|
||||
bool IsMappedVS(int v) {
|
||||
return vregs[v].lane != 0 && VS(v).IsSimpleReg();
|
||||
bool IsMappedVS(u8 v) {
|
||||
return vregs[v].lane != 0 && VS(&v).IsSimpleReg();
|
||||
}
|
||||
bool IsMappedVS(const u8 *v, VectorSize vsz);
|
||||
bool CanMapVS(const u8 *v, VectorSize vsz);
|
||||
|
Loading…
x
Reference in New Issue
Block a user