mirror of
https://github.com/libretro/ppsspp.git
synced 2024-12-14 20:48:30 +00:00
Optimize VDot and VecDo3 to avoid temporaries.
This commit is contained in:
parent
0bd382c518
commit
1c4c5e718b
@ -145,11 +145,12 @@ void Jit::ApplyPrefixD(const u8 *vregs, u32 prefix, VectorSize sz, bool onlyWrit
|
||||
}
|
||||
|
||||
// Vector regs can overlap in all sorts of swizzled ways.
|
||||
bool DestRegOverlaps(int dreg, int sn, u8 sregs[], int tn, u8 tregs[])
|
||||
// This does allow a single overlap in sregs[i].
|
||||
bool DestRegOverlaps(int dreg, int di, int sn, u8 sregs[], int tn, u8 tregs[])
|
||||
{
|
||||
for (int i = 0; i < sn; ++i)
|
||||
{
|
||||
if (sregs[i] == dreg)
|
||||
if (sregs[i] == dreg && i != di)
|
||||
return true;
|
||||
}
|
||||
for (int i = 0; i < tn; ++i)
|
||||
@ -332,25 +333,34 @@ void Jit::Comp_VDot(u32 op) {
|
||||
|
||||
// TODO: applyprefixST here somehow (shuffle, etc...)
|
||||
|
||||
MOVSS(XMM0, fpr.V(sregs[0]));
|
||||
MULSS(XMM0, fpr.V(tregs[0]));
|
||||
|
||||
int n = GetNumVectorElements(sz);
|
||||
X64Reg tempxreg = XMM0;
|
||||
if (!DestRegOverlaps(dregs[0], 0, n, sregs, n, tregs))
|
||||
{
|
||||
fpr.MapRegsV(dregs, V_Single, MAP_NOINIT);
|
||||
tempxreg = fpr.VX(dregs[0]);
|
||||
}
|
||||
|
||||
if (!fpr.V(sregs[0]).IsSimpleReg(tempxreg))
|
||||
MOVSS(tempxreg, fpr.V(sregs[0]));
|
||||
MULSS(tempxreg, fpr.V(tregs[0]));
|
||||
|
||||
for (int i = 1; i < n; i++)
|
||||
{
|
||||
// sum += s[i]*t[i];
|
||||
MOVSS(XMM1, fpr.V(sregs[i]));
|
||||
MULSS(XMM1, fpr.V(tregs[i]));
|
||||
ADDSS(XMM0, R(XMM1));
|
||||
ADDSS(tempxreg, R(XMM1));
|
||||
}
|
||||
fpr.ReleaseSpillLocks();
|
||||
|
||||
fpr.MapRegsV(dregs, V_Single, MAP_NOINIT);
|
||||
if (!fpr.V(dregs[0]).IsSimpleReg(tempxreg))
|
||||
{
|
||||
fpr.MapRegsV(dregs, V_Single, MAP_NOINIT);
|
||||
MOVSS(fpr.V(dregs[0]), XMM0);
|
||||
}
|
||||
|
||||
// TODO: applyprefixD here somehow (write mask etc..)
|
||||
|
||||
MOVSS(fpr.V(vd), XMM0);
|
||||
|
||||
fpr.ReleaseSpillLocks();
|
||||
|
||||
js.EatPrefix();
|
||||
@ -416,7 +426,7 @@ void Jit::Comp_VecDo3(u32 op) {
|
||||
X64Reg tempxregs[4];
|
||||
for (int i = 0; i < n; ++i)
|
||||
{
|
||||
if (DestRegOverlaps(dregs[i], n, sregs, n, tregs))
|
||||
if (DestRegOverlaps(dregs[i], i, n, sregs, n, tregs))
|
||||
{
|
||||
// On 32-bit we only have 6 xregs for mips regs, use XMM0/XMM1 if possible.
|
||||
if (i < 2)
|
||||
@ -430,19 +440,22 @@ void Jit::Comp_VecDo3(u32 op) {
|
||||
}
|
||||
else
|
||||
{
|
||||
fpr.MapRegV(dregs[i], MAP_NOINIT | MAP_DIRTY);
|
||||
fpr.MapRegV(dregs[i], (dregs[i] == sregs[i] ? 0 : MAP_NOINIT) | MAP_DIRTY);
|
||||
fpr.SpillLockV(dregs[i]);
|
||||
tempxregs[i] = fpr.VX(dregs[i]);
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0; i < n; ++i)
|
||||
MOVSS(tempxregs[i], fpr.V(sregs[i]));
|
||||
{
|
||||
if (!fpr.V(sregs[i]).IsSimpleReg(tempxregs[i]))
|
||||
MOVSS(tempxregs[i], fpr.V(sregs[i]));
|
||||
}
|
||||
for (int i = 0; i < n; ++i)
|
||||
(this->*xmmop)(tempxregs[i], fpr.V(tregs[i]));
|
||||
for (int i = 0; i < n; ++i)
|
||||
{
|
||||
if (!fpr.R(dregs[i]).IsSimpleReg(tempxregs[i]))
|
||||
if (!fpr.V(dregs[i]).IsSimpleReg(tempxregs[i]))
|
||||
MOVSS(fpr.V(dregs[i]), tempxregs[i]);
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user