x86 jit: SIMD-ify VFPU register file writebacks where possible

This commit is contained in:
Henrik Rydgard 2014-11-26 01:33:05 +01:00
parent a6eb4c7e73
commit 804de50711
3 changed files with 80 additions and 16 deletions

View File

@ -631,6 +631,25 @@ public:
// SSE/SSE2: Useful alternative to shuffle in some cases.
void MOVDDUP(X64Reg regOp, OpArg arg);
// TODO: Actually implement
#if 0
// SSE3: Horizontal operations in SIMD registers. Could be useful for various VFPU things like dot products...
void ADDSUBPS(X64Reg dest, OpArg src);
void ADDSUBPD(X64Reg dest, OpArg src);
void HADDPS(X64Reg dest, OpArg src);
void HADDPD(X64Reg dest, OpArg src);
void HSUBPS(X64Reg dest, OpArg src);
void HSUBPD(X64Reg dest, OpArg src);
// SSE4: Further horizontal operations - dot products. These are weirdly flexible, the arg contains both a read mask and a write "mask".
void DPPS(X64Reg dest, OpArg src, u8 arg);
void DPPD(X64Reg dest, OpArg src, u8 arg);
// These are probably useful for VFPU emulation.
void INSERTPS(X64Reg dest, OpArg src, u8 arg);
void EXTRACTPS(OpArg dest, X64Reg src, u8 arg);
#endif
void UNPCKLPS(X64Reg dest, OpArg src);
void UNPCKHPS(X64Reg dest, OpArg src);
void UNPCKLPD(X64Reg dest, OpArg src);

View File

@ -2292,6 +2292,19 @@ void Jit::Comp_VScl(MIPSOpcode op) {
GetVectorRegsPrefixT(&scale, V_Single, _VT);
GetVectorRegsPrefixD(dregs, sz, _VD);
if (fpr.TryMapDirtyInInVS(dregs, sz, sregs, sz, &scale, V_Single, true)) {
MOVSS(XMM0, fpr.VS(scale));
if (sz != V_Single)
SHUFPS(XMM0, R(XMM0), _MM_SHUFFLE(0, 0, 0, 0));
if (dregs[0] != sregs[0]) {
MOVAPS(fpr.VSX(dregs[0]), fpr.VS(sregs[0]));
}
MULPS(fpr.VSX(dregs[0]), R(XMM0));
ApplyPrefixD(dregs, sz);
fpr.ReleaseSpillLocks();
return;
}
// Flush SIMD.
fpr.SimpleRegsV(sregs, sz, 0);
fpr.SimpleRegsV(&scale, V_Single, 0);

View File

@ -507,29 +507,61 @@ void FPURegCache::StoreFromRegister(int i) {
X64Reg xr = regs[i].location.GetSimpleReg();
_assert_msg_(JIT, xr >= 0 && xr < NUM_X_FPREGS, "WTF - store - invalid reg");
if (regs[i].lane != 0) {
// Store all of them.
// TODO: This could be more optimal. Check if we can MOVUPS/MOVAPS, etc.
for (int j = 0; j < 4; ++j) {
int mr = xregs[xr].mipsRegs[j];
if (mr == -1) {
continue;
const int *mri = xregs[xr].mipsRegs;
int seq = 1;
for (int i = 1; i < 4; ++i) {
if (mri[i] == -1) {
break;
}
if (j != 0 && xregs[xr].dirty) {
emit->SHUFPS(xr, Gen::R(xr), MMShuffleSwapTo0(j));
if (voffset[mri[i] - 32] == voffset[mri[i - 1] - 32] + 1) {
seq++;
} else {
break;
}
}
OpArg newLoc = GetDefaultLocation(mr);
if (xregs[xr].dirty) {
emit->MOVSS(newLoc, xr);
if (seq == 2 || seq == 4) {
OpArg newLoc = GetDefaultLocation(mri[0]);
if (seq == 4)
emit->MOVAPS(newLoc, xr);
else
emit->MOVQ_xmm(newLoc, xr);
for (int j = 0; j < 4; ++j) {
int mr = xregs[xr].mipsRegs[j];
if (mr == -1) {
continue;
}
OpArg newLoc = GetDefaultLocation(mr);
regs[mr].location = newLoc;
regs[mr].away = false;
regs[mr].lane = 0;
xregs[xr].mipsRegs[j] = -1;
}
} else {
// Store all of them.
// TODO: This could be more optimal. Check if we can MOVUPS/MOVAPS, etc.
for (int j = 0; j < 4; ++j) {
int mr = xregs[xr].mipsRegs[j];
if (mr == -1) {
continue;
}
if (j != 0 && xregs[xr].dirty) {
emit->SHUFPS(xr, Gen::R(xr), MMShuffleSwapTo0(j));
}
OpArg newLoc = GetDefaultLocation(mr);
if (xregs[xr].dirty) {
emit->MOVSS(newLoc, xr);
}
regs[mr].location = newLoc;
regs[mr].away = false;
regs[mr].lane = 0;
xregs[xr].mipsRegs[j] = -1;
}
regs[mr].location = newLoc;
regs[mr].away = false;
regs[mr].lane = 0;
xregs[xr].mipsRegs[j] = -1;
}
} else {
xregs[xr].mipsReg = -1;
OpArg newLoc = GetDefaultLocation(i);
xregs[xr].mipsReg = -1;
emit->MOVSS(newLoc, xr);
regs[i].location = newLoc;
}