From 8ea59990ab8413c54ae83db69d3299f98e4b2d07 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sun, 17 Feb 2013 23:15:16 -0800 Subject: [PATCH] Make applying prefixes mostly automatic. And implement (hopefully) D prefixes. --- Core/MIPS/x86/CompVFPU.cpp | 51 ++++++++++++++++++++++++-------------- Core/MIPS/x86/Jit.h | 13 +++++++++- 2 files changed, 45 insertions(+), 19 deletions(-) diff --git a/Core/MIPS/x86/CompVFPU.cpp b/Core/MIPS/x86/CompVFPU.cpp index d4d689091c..16a59cd9ca 100644 --- a/Core/MIPS/x86/CompVFPU.cpp +++ b/Core/MIPS/x86/CompVFPU.cpp @@ -122,21 +122,42 @@ void Jit::ApplyPrefixST(u8 *vregs, u32 prefix, VectorSize sz) { } } -void Jit::ApplyPrefixD(const u8 *vregs, u32 prefix, VectorSize sz, bool onlyWriteMask) { +void Jit::GetVectorRegsPrefixD(u8 *regs, VectorSize sz, int vectorReg) { _assert_(js.prefixDFlag & JitState::PREFIX_KNOWN); - if (!prefix || onlyWriteMask) return; + + GetVectorRegs(regs, sz, vectorReg); + if (js.prefixD == 0) + return; int n = GetNumVectorElements(sz); for (int i = 0; i < n; i++) { - int sat = (prefix >> (i * 2)) & 3; + // Hopefully this is rare, we'll just write it into a reg we drop. + if (js.VfpuWriteMask(i)) + regs[i] = fpr.GetTempV(); + } +} + +void Jit::ApplyPrefixD(const u8 *vregs, VectorSize sz) { + _assert_(js.prefixDFlag & JitState::PREFIX_KNOWN); + if (!js.prefixD) return; + + int n = GetNumVectorElements(sz); + for (int i = 0; i < n; i++) + { + if (js.VfpuWriteMask(i)) + continue; + + int sat = (js.prefixD >> (i * 2)) & 3; if (sat == 1) { + fpr.MapRegV(vregs[i], MAP_DIRTY); MAXSS(fpr.VX(vregs[i]), M((void *)&zero)); MINSS(fpr.VX(vregs[i]), M((void *)&one)); } else if (sat == 3) { + fpr.MapRegV(vregs[i], MAP_DIRTY); MAXSS(fpr.VX(vregs[i]), M((void *)&minus_one)); MINSS(fpr.VX(vregs[i]), M((void *)&one)); } @@ -322,12 +343,7 @@ void Jit::Comp_VDot(u32 op) { return; } - // No-op. - if (js.VfpuWriteMask(0)) { - return; - } - - // WARNING: No prefix support! + // WARNING: No prefix support! (maybe soon) if (js.MayHavePrefix()) { Comp_Generic(op); return; @@ -340,11 +356,9 @@ void Jit::Comp_VDot(u32 op) { // TODO: Force read one of them into regs? probably not. u8 sregs[4], tregs[4], dregs[1]; - GetVectorRegs(sregs, sz, vs); - GetVectorRegs(tregs, sz, vt); - GetVectorRegs(dregs, V_Single, vd); - - // TODO: applyprefixST here somehow (shuffle, etc...) + GetVectorRegsPrefixS(sregs, sz, vs); + GetVectorRegsPrefixT(tregs, sz, vt); + GetVectorRegsPrefixD(dregs, V_Single, vd); int n = GetNumVectorElements(sz); X64Reg tempxreg = XMM0; @@ -370,7 +384,7 @@ void Jit::Comp_VDot(u32 op) { MOVSS(fpr.V(dregs[0]), tempxreg); } - // TODO: applyprefixD here somehow (write mask etc..) + ApplyPrefixD(dregs, V_Single); fpr.ReleaseSpillLocks(); } @@ -440,9 +454,10 @@ void Jit::Comp_VecDo3(u32 op) { tempxregs[i] = (X64Reg) (XMM0 + i); else { - fpr.BindToRegister(TEMP0 + i, false, true); - fpr.SpillLock(TEMP0 + i); - tempxregs[i] = fpr.RX(TEMP0 + i); + int reg = fpr.GetTempV(); + fpr.MapRegV(reg, MAP_NOINIT | MAP_DIRTY); + fpr.SpillLockV(reg); + tempxregs[i] = fpr.VX(reg); } } else diff --git a/Core/MIPS/x86/Jit.h b/Core/MIPS/x86/Jit.h index fadc83f9b7..a0f69d180a 100644 --- a/Core/MIPS/x86/Jit.h +++ b/Core/MIPS/x86/Jit.h @@ -184,7 +184,18 @@ public: void Comp_DoNothing(u32 op); void ApplyPrefixST(u8 *vregs, u32 prefix, VectorSize sz); - void ApplyPrefixD(const u8 *vregs, u32 prefix, VectorSize sz, bool onlyWriteMask = false); + void ApplyPrefixD(const u8 *vregs, VectorSize sz); + void GetVectorRegsPrefixS(u8 *regs, VectorSize sz, int vectorReg) { + _assert_(js.prefixSFlag & JitState::PREFIX_KNOWN); + GetVectorRegs(regs, sz, vectorReg); + ApplyPrefixST(regs, js.prefixS, sz); + } + void GetVectorRegsPrefixT(u8 *regs, VectorSize sz, int vectorReg) { + _assert_(js.prefixTFlag & JitState::PREFIX_KNOWN); + GetVectorRegs(regs, sz, vectorReg); + ApplyPrefixST(regs, js.prefixT, sz); + } + void GetVectorRegsPrefixD(u8 *regs, VectorSize sz, int vectorReg); void EatPrefix() { js.EatPrefix(); } JitBlockCache *GetBlockCache() { return &blocks; }