diff --git a/Common/x64Emitter.h b/Common/x64Emitter.h index 09d15584de..21cebc8a5d 100644 --- a/Common/x64Emitter.h +++ b/Common/x64Emitter.h @@ -631,6 +631,25 @@ public: // SSE/SSE2: Useful alternative to shuffle in some cases. void MOVDDUP(X64Reg regOp, OpArg arg); + // TODO: Actually implement +#if 0 + // SSE3: Horizontal operations in SIMD registers. Could be useful for various VFPU things like dot products... + void ADDSUBPS(X64Reg dest, OpArg src); + void ADDSUBPD(X64Reg dest, OpArg src); + void HADDPS(X64Reg dest, OpArg src); + void HADDPD(X64Reg dest, OpArg src); + void HSUBPS(X64Reg dest, OpArg src); + void HSUBPD(X64Reg dest, OpArg src); + + // SSE4: Further horizontal operations - dot products. These are weirdly flexible, the arg contains both a read mask and a write "mask". + void DPPS(X64Reg dest, OpArg src, u8 arg); + void DPPD(X64Reg dest, OpArg src, u8 arg); + + // These are probably useful for VFPU emulation. + void INSERTPS(X64Reg dest, OpArg src, u8 arg); + void EXTRACTPS(OpArg dest, X64Reg src, u8 arg); +#endif + void UNPCKLPS(X64Reg dest, OpArg src); void UNPCKHPS(X64Reg dest, OpArg src); void UNPCKLPD(X64Reg dest, OpArg src); diff --git a/Core/MIPS/x86/CompVFPU.cpp b/Core/MIPS/x86/CompVFPU.cpp index 7a1d3cc947..7a425b7366 100644 --- a/Core/MIPS/x86/CompVFPU.cpp +++ b/Core/MIPS/x86/CompVFPU.cpp @@ -2292,6 +2292,19 @@ void Jit::Comp_VScl(MIPSOpcode op) { GetVectorRegsPrefixT(&scale, V_Single, _VT); GetVectorRegsPrefixD(dregs, sz, _VD); + if (fpr.TryMapDirtyInInVS(dregs, sz, sregs, sz, &scale, V_Single, true)) { + MOVSS(XMM0, fpr.VS(scale)); + if (sz != V_Single) + SHUFPS(XMM0, R(XMM0), _MM_SHUFFLE(0, 0, 0, 0)); + if (dregs[0] != sregs[0]) { + MOVAPS(fpr.VSX(dregs[0]), fpr.VS(sregs[0])); + } + MULPS(fpr.VSX(dregs[0]), R(XMM0)); + ApplyPrefixD(dregs, sz); + fpr.ReleaseSpillLocks(); + return; + } + // Flush SIMD. fpr.SimpleRegsV(sregs, sz, 0); fpr.SimpleRegsV(&scale, V_Single, 0); diff --git a/Core/MIPS/x86/RegCacheFPU.cpp b/Core/MIPS/x86/RegCacheFPU.cpp index a7b51931c5..e26bf00ea3 100644 --- a/Core/MIPS/x86/RegCacheFPU.cpp +++ b/Core/MIPS/x86/RegCacheFPU.cpp @@ -507,29 +507,61 @@ void FPURegCache::StoreFromRegister(int i) { X64Reg xr = regs[i].location.GetSimpleReg(); _assert_msg_(JIT, xr >= 0 && xr < NUM_X_FPREGS, "WTF - store - invalid reg"); if (regs[i].lane != 0) { - // Store all of them. - // TODO: This could be more optimal. Check if we can MOVUPS/MOVAPS, etc. - for (int j = 0; j < 4; ++j) { - int mr = xregs[xr].mipsRegs[j]; - if (mr == -1) { - continue; + const int *mri = xregs[xr].mipsRegs; + int seq = 1; + for (int i = 1; i < 4; ++i) { + if (mri[i] == -1) { + break; } - if (j != 0 && xregs[xr].dirty) { - emit->SHUFPS(xr, Gen::R(xr), MMShuffleSwapTo0(j)); + if (voffset[mri[i] - 32] == voffset[mri[i - 1] - 32] + 1) { + seq++; + } else { + break; } + } - OpArg newLoc = GetDefaultLocation(mr); - if (xregs[xr].dirty) { - emit->MOVSS(newLoc, xr); + if (seq == 2 || seq == 4) { + OpArg newLoc = GetDefaultLocation(mri[0]); + if (seq == 4) + emit->MOVAPS(newLoc, xr); + else + emit->MOVQ_xmm(newLoc, xr); + for (int j = 0; j < 4; ++j) { + int mr = xregs[xr].mipsRegs[j]; + if (mr == -1) { + continue; + } + OpArg newLoc = GetDefaultLocation(mr); + regs[mr].location = newLoc; + regs[mr].away = false; + regs[mr].lane = 0; + xregs[xr].mipsRegs[j] = -1; + } + } else { + // Store all of them. + // TODO: This could be more optimal. Check if we can MOVUPS/MOVAPS, etc. + for (int j = 0; j < 4; ++j) { + int mr = xregs[xr].mipsRegs[j]; + if (mr == -1) { + continue; + } + if (j != 0 && xregs[xr].dirty) { + emit->SHUFPS(xr, Gen::R(xr), MMShuffleSwapTo0(j)); + } + + OpArg newLoc = GetDefaultLocation(mr); + if (xregs[xr].dirty) { + emit->MOVSS(newLoc, xr); + } + regs[mr].location = newLoc; + regs[mr].away = false; + regs[mr].lane = 0; + xregs[xr].mipsRegs[j] = -1; } - regs[mr].location = newLoc; - regs[mr].away = false; - regs[mr].lane = 0; - xregs[xr].mipsRegs[j] = -1; } } else { - xregs[xr].mipsReg = -1; OpArg newLoc = GetDefaultLocation(i); + xregs[xr].mipsReg = -1; emit->MOVSS(newLoc, xr); regs[i].location = newLoc; }