mirror of
https://github.com/libretro/pcsx2.git
synced 2024-11-25 10:20:09 +00:00
microVU:
- Code refactoring (mostly changing macros to functions/constants...) - Made it so the disable-regAlloc option flushes every 32bit instruction, instead of every 64bit instruction (upper+lower instruction pair) git-svn-id: http://pcsx2.googlecode.com/svn/trunk@3713 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
807bd9659c
commit
009d6ba5e6
@ -647,10 +647,6 @@
|
||||
RelativePath="..\..\x86\microVU_IR.h"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\x86\microVU_IR.inl"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\x86\microVU_Log.inl"
|
||||
>
|
||||
|
@ -24,7 +24,6 @@
|
||||
#include "microVU_Misc.inl"
|
||||
#include "microVU_Log.inl"
|
||||
#include "microVU_Analyze.inl"
|
||||
#include "microVU_IR.inl"
|
||||
#include "microVU_Alloc.inl"
|
||||
#include "microVU_Upper.inl"
|
||||
#include "microVU_Lower.inl"
|
||||
@ -103,7 +102,7 @@ void microVU::init(uint vuIndex) {
|
||||
dispCache = NULL;
|
||||
cache = NULL;
|
||||
cacheSize = mVUcacheSize;
|
||||
regAlloc = new microRegAlloc(this);
|
||||
regAlloc = new microRegAlloc(index);
|
||||
|
||||
for (u32 i = 0; i < (progSize / 2); i++) {
|
||||
prog.prog[i] = new deque<microProgram*>();
|
||||
|
@ -90,7 +90,7 @@ public:
|
||||
for (int i = 0; i <= listI; i++) {
|
||||
if ((linkI->block->pState.q == pState->q)
|
||||
&& (linkI->block->pState.p == pState->p)
|
||||
&& ((linkI->block->pState.vi15 == pState->vi15) || !CHECK_VU_CONSTPROP)
|
||||
&& ((linkI->block->pState.vi15 == pState->vi15) || !doConstProp)
|
||||
&& (linkI->block->pState.flags == pState->flags)
|
||||
&& (linkI->block->pState.xgkick == pState->xgkick)
|
||||
&& (linkI->block->pState.viBackUp == pState->viBackUp)
|
||||
@ -229,12 +229,6 @@ struct microVU {
|
||||
return (((prog.IRinfo.curPC + 4) + (Imm11() * 2)) & progMemMask) * 4;
|
||||
}
|
||||
|
||||
__ri void loadIreg(const xmm& reg, int xyzw)
|
||||
{
|
||||
xMOVSSZX(reg, ptr32[&getVI(REG_I)]);
|
||||
if (!_XYZWss(xyzw)) xSHUF.PS(reg, reg, 0);
|
||||
}
|
||||
|
||||
void init(uint vuIndex);
|
||||
void reset();
|
||||
void close();
|
||||
|
@ -470,7 +470,7 @@ __fi void mVUanalyzeNormBranch(mV, int It, bool isBAL) {
|
||||
__ri void mVUanalyzeJump(mV, int Is, int It, bool isJALR) {
|
||||
mVUbranchCheck(mVU);
|
||||
mVUlow.branch = (isJALR) ? 10 : 9;
|
||||
if (mVUconstReg[Is].isValid && CHECK_VU_CONSTPROP) {
|
||||
if (mVUconstReg[Is].isValid && doConstProp) {
|
||||
mVUlow.constJump.isValid = 1;
|
||||
mVUlow.constJump.regValue = mVUconstReg[Is].regValue;
|
||||
//DevCon.Status("microVU%d: Constant JR/JALR Address Optimization", mVU->index);
|
||||
|
@ -15,31 +15,19 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
//------------------------------------------------------------------
|
||||
// Helper Macros
|
||||
//------------------------------------------------------------------
|
||||
|
||||
#define calcCycles(reg, x) { reg = ((reg > x) ? (reg - x) : 0); }
|
||||
#define optimizeReg(rState) { rState = (rState==1) ? 0 : rState; }
|
||||
#define tCycles(dest, src) { dest = aMax(dest, src); }
|
||||
#define incP() { mVU->p = (mVU->p+1) & 1; }
|
||||
#define incQ() { mVU->q = (mVU->q+1) & 1; }
|
||||
#define doUpperOp() { mVUopU(mVU, 1); mVUdivSet(mVU); }
|
||||
#define doLowerOp() { incPC(-1); mVUopL(mVU, 1); incPC(1); }
|
||||
|
||||
//------------------------------------------------------------------
|
||||
// Messages Called at Execution Time...
|
||||
//------------------------------------------------------------------
|
||||
|
||||
static void __fastcall mVUbadOp0(mV) { Console.Error("microVU0 Warning: Exiting... Block started with illegal opcode. [%04x] [%x]", xPC, mVU->prog.cur); }
|
||||
static void __fastcall mVUbadOp1(mV) { Console.Error("microVU1 Warning: Exiting... Block started with illegal opcode. [%04x] [%x]", xPC, mVU->prog.cur); }
|
||||
static void __fastcall mVUwarning0(mV) { Console.Error("microVU0 Warning: Exiting from Possible Infinite Loop [%04x] [%x]", xPC, mVU->prog.cur); }
|
||||
static void __fastcall mVUwarning1(mV) { Console.Error("microVU1 Warning: Exiting from Possible Infinite Loop [%04x] [%x]", xPC, mVU->prog.cur); }
|
||||
static void __fastcall mVUprintPC1(u32 PC) { Console.WriteLn("Block Start PC = 0x%04x", PC); }
|
||||
static void __fastcall mVUprintPC2(u32 PC) { Console.WriteLn("Block End PC = 0x%04x", PC); }
|
||||
static void __fastcall mVUbadOp0(mV, u32 PC) { Console.Error("microVU0 Warning: Exiting... Block started with illegal opcode. [%04x] [%x]", PC, mVU->prog.cur->idx); }
|
||||
static void __fastcall mVUbadOp1(mV, u32 PC) { Console.Error("microVU1 Warning: Exiting... Block started with illegal opcode. [%04x] [%x]", PC, mVU->prog.cur->idx); }
|
||||
static void __fastcall mVUwarning0(mV) { Console.Error("microVU0 Warning: Exiting from Possible Infinite Loop [%04x] [%x]", mVU->prog.cur->idx); }
|
||||
static void __fastcall mVUwarning1(mV) { Console.Error("microVU1 Warning: Exiting from Possible Infinite Loop [%04x] [%x]", mVU->prog.cur->idx); }
|
||||
static void __fastcall mVUprintPC1(u32 PC) { Console.WriteLn("Block Start PC = 0x%04x", PC); }
|
||||
static void __fastcall mVUprintPC2(u32 PC) { Console.WriteLn("Block End PC = 0x%04x", PC); }
|
||||
|
||||
//------------------------------------------------------------------
|
||||
// Helper Functions
|
||||
// Program Range Checking and Setting up Ranges
|
||||
//------------------------------------------------------------------
|
||||
|
||||
// Used by mVUsetupRange
|
||||
@ -106,13 +94,13 @@ static void mVUsetupRange(microVU* mVU, s32 pc, bool isStartPC) {
|
||||
}
|
||||
}
|
||||
|
||||
static __fi void startLoop(mV) {
|
||||
if (curI & _Mbit_) { Console.WriteLn(Color_Green, "microVU%d: M-bit set!", getIndex); }
|
||||
if (curI & _Dbit_) { DevCon.WriteLn (Color_Green, "microVU%d: D-bit set!", getIndex); }
|
||||
if (curI & _Tbit_) { DevCon.WriteLn (Color_Green, "microVU%d: T-bit set!", getIndex); }
|
||||
memzero(mVUinfo);
|
||||
memzero(mVUregsTemp);
|
||||
}
|
||||
//------------------------------------------------------------------
|
||||
// Execute VU Opcode/Instruction (Upper and Lower)
|
||||
//------------------------------------------------------------------
|
||||
|
||||
__ri void doUpperOp(mV) { mVUopU(mVU, 1); mVUdivSet(mVU); }
|
||||
__ri void doLowerOp(mV) { incPC(-1); mVUopL(mVU, 1); incPC(1); }
|
||||
__ri void flushRegs(mV) { if (!doRegAlloc) mVU->regAlloc->flushAll(); }
|
||||
|
||||
static void doIbit(mV) {
|
||||
if (mVUup.iBit) {
|
||||
@ -126,7 +114,7 @@ static void doIbit(mV) {
|
||||
}
|
||||
else tempI = curI;
|
||||
|
||||
xMOV(ptr32[&mVU->regs().VI[REG_I].UL], tempI);
|
||||
xMOV(ptr32[&mVU->getVI(REG_I)], tempI);
|
||||
incPC(1);
|
||||
}
|
||||
}
|
||||
@ -150,16 +138,27 @@ static void doSwapOp(mV) {
|
||||
mVU->regAlloc->clearNeeded(t3);
|
||||
|
||||
incPC(1);
|
||||
doUpperOp();
|
||||
doUpperOp(mVU);
|
||||
|
||||
const xmm& t4 = mVU->regAlloc->allocReg(-1, mVUlow.VF_write.reg, 0xf);
|
||||
xMOVAPS(t4, t2);
|
||||
mVU->regAlloc->clearNeeded(t4);
|
||||
mVU->regAlloc->clearNeeded(t2);
|
||||
}
|
||||
else { mVUopL(mVU, 1); incPC(1); doUpperOp(); }
|
||||
else { mVUopL(mVU, 1); incPC(1); flushRegs(mVU); doUpperOp(mVU); }
|
||||
}
|
||||
|
||||
static void mVUexecuteInstruction(mV) {
|
||||
if (mVUlow.isNOP) { incPC(1); doUpperOp(mVU); flushRegs(mVU); doIbit(mVU); }
|
||||
elif(!mVUinfo.swapOps) { incPC(1); doUpperOp(mVU); flushRegs(mVU); doLowerOp(mVU); }
|
||||
else doSwapOp(mVU);
|
||||
flushRegs(mVU);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------
|
||||
// Warnings / Errors / Illegal Instructions
|
||||
//------------------------------------------------------------------
|
||||
|
||||
// If 1st op in block is a bad opcode, then don't compile rest of block (Dawn of Mana Level 2)
|
||||
static __fi void mVUcheckBadOp(mV) {
|
||||
if (mVUinfo.isBadOp && mVUcount == 0) {
|
||||
@ -172,6 +171,7 @@ static __fi void mVUcheckBadOp(mV) {
|
||||
static __fi void handleBadOp(mV, int count) {
|
||||
if (mVUinfo.isBadOp && count == 0) {
|
||||
xMOV(gprT2, (uptr)mVU);
|
||||
xMOV(gprT3, xPC);
|
||||
if (!isVU1) xCALL(mVUbadOp0);
|
||||
else xCALL(mVUbadOp1);
|
||||
}
|
||||
@ -211,8 +211,21 @@ static __ri void eBitWarning(mV) {
|
||||
incPC(-2);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------
|
||||
// Cycles / Pipeline State / Early Exit from Execution
|
||||
//------------------------------------------------------------------
|
||||
|
||||
__fi void optimizeReg(u8& rState) { rState = (rState==1) ? 0 : rState; }
|
||||
__fi void calcCycles(u8& reg, u8 x) { reg = ((reg > x) ? (reg - x) : 0); }
|
||||
__fi void tCycles(u8& dest, u8& src) { dest = aMax(dest, src); }
|
||||
__fi void incP(mV) { mVU->p ^= 1; }
|
||||
__fi void incQ(mV) { mVU->q ^= 1; }
|
||||
|
||||
// Optimizes the End Pipeline State Removing Unnecessary Info
|
||||
static __fi void mVUoptimizePipeState(mV) {
|
||||
// If the cycles remaining is just '1', we don't have to transfer it to the next block
|
||||
// because mVU automatically decrements this number at the start of its loop,
|
||||
// so essentially '1' will be the same as '0'...
|
||||
static void mVUoptimizePipeState(mV) {
|
||||
for (int i = 0; i < 32; i++) {
|
||||
optimizeReg(mVUregs.VF[i].x);
|
||||
optimizeReg(mVUregs.VF[i].y);
|
||||
@ -222,12 +235,12 @@ static __fi void mVUoptimizePipeState(mV) {
|
||||
for (int i = 0; i < 16; i++) {
|
||||
optimizeReg(mVUregs.VI[i]);
|
||||
}
|
||||
if (mVUregs.q) { optimizeReg(mVUregs.q); if (!mVUregs.q) { incQ(); } }
|
||||
if (mVUregs.p) { optimizeReg(mVUregs.p); if (!mVUregs.p) { incP(); } }
|
||||
if (mVUregs.q) { optimizeReg(mVUregs.q); if (!mVUregs.q) { incQ(mVU); } }
|
||||
if (mVUregs.p) { optimizeReg(mVUregs.p); if (!mVUregs.p) { incP(mVU); } }
|
||||
mVUregs.r = 0; // There are no stalls on the R-reg, so its Safe to discard info
|
||||
}
|
||||
|
||||
__fi void mVUincCycles(mV, int x) {
|
||||
static void mVUincCycles(mV, int x) {
|
||||
mVUcycles += x;
|
||||
for (int z = 31; z > 0; z--) {
|
||||
calcCycles(mVUregs.VF[z].x, x);
|
||||
@ -241,11 +254,11 @@ __fi void mVUincCycles(mV, int x) {
|
||||
if (mVUregs.q) {
|
||||
if (mVUregs.q > 4) { calcCycles(mVUregs.q, x); if (mVUregs.q <= 4) { mVUinfo.doDivFlag = 1; } }
|
||||
else { calcCycles(mVUregs.q, x); }
|
||||
if (!mVUregs.q) { incQ(); }
|
||||
if (!mVUregs.q) { incQ(mVU); }
|
||||
}
|
||||
if (mVUregs.p) {
|
||||
calcCycles(mVUregs.p, x);
|
||||
if (!mVUregs.p || mVUregsTemp.p) { incP(); }
|
||||
if (!mVUregs.p || mVUregsTemp.p) { incP(mVU); }
|
||||
}
|
||||
if (mVUregs.xgkick) {
|
||||
calcCycles(mVUregs.xgkick, x);
|
||||
@ -254,14 +267,13 @@ __fi void mVUincCycles(mV, int x) {
|
||||
calcCycles(mVUregs.r, x);
|
||||
}
|
||||
|
||||
#define cmpVFregs(VFreg1, VFreg2, xVar) { \
|
||||
if (VFreg1.reg == VFreg2.reg) { \
|
||||
if ((VFreg1.x && VFreg2.x) \
|
||||
|| (VFreg1.y && VFreg2.y) \
|
||||
|| (VFreg1.z && VFreg2.z) \
|
||||
|| (VFreg1.w && VFreg2.w)) \
|
||||
{ xVar = 1; } \
|
||||
} \
|
||||
// Helps check if upper/lower ops read/write to same regs...
|
||||
void cmpVFregs(microVFreg& VFreg1, microVFreg& VFreg2, bool& xVar) {
|
||||
if (VFreg1.reg == VFreg2.reg) {
|
||||
if ((VFreg1.x && VFreg2.x) || (VFreg1.y && VFreg2.y)
|
||||
|| (VFreg1.z && VFreg2.z) || (VFreg1.w && VFreg2.w))
|
||||
{ xVar = 1; }
|
||||
}
|
||||
}
|
||||
|
||||
void mVUsetCycles(mV) {
|
||||
@ -299,6 +311,15 @@ void mVUsetCycles(mV) {
|
||||
tCycles(mVUregs.xgkick, mVUregsTemp.xgkick);
|
||||
}
|
||||
|
||||
// Prints Start/End PC of blocks executed, for debugging...
|
||||
static void mVUdebugPrintBlocks(microVU* mVU, bool isEndPC) {
|
||||
if (mVUdebugNow) {
|
||||
xMOV(gprT2, xPC);
|
||||
if (isEndPC) xCALL(mVUprintPC2);
|
||||
else xCALL(mVUprintPC1);
|
||||
}
|
||||
}
|
||||
|
||||
// vu0 is allowed to exit early, so are dev builds (for inf loops)
|
||||
__fi bool doEarlyExit(microVU* mVU) {
|
||||
return IsDevBuild || !isVU1;
|
||||
@ -312,15 +333,6 @@ static __fi void mVUsavePipelineState(microVU* mVU) {
|
||||
}
|
||||
}
|
||||
|
||||
// Prints Start/End PC of blocks executed, for debugging...
|
||||
static void mVUdebugPrintBlocks(microVU* mVU, bool isEndPC) {
|
||||
if (mVUdebugNow) {
|
||||
xMOV(gprT2, xPC);
|
||||
if (isEndPC) xCALL(mVUprintPC2);
|
||||
else xCALL(mVUprintPC1);
|
||||
}
|
||||
}
|
||||
|
||||
// Test cycles to see if we need to exit-early...
|
||||
static void mVUtestCycles(microVU* mVU) {
|
||||
iPC = mVUstartPC;
|
||||
@ -332,8 +344,8 @@ static void mVUtestCycles(microVU* mVU) {
|
||||
// xFowardJZ32 vu0jmp;
|
||||
// xMOV(gprT2, (uptr)mVU);
|
||||
// xCALL(mVUwarning0); // VU0 is allowed early exit for COP2 Interlock Simulation
|
||||
mVUsavePipelineState(mVU);
|
||||
mVUendProgram(mVU, NULL, 0);
|
||||
mVUsavePipelineState(mVU);
|
||||
mVUendProgram(mVU, NULL, 0);
|
||||
// vu0jmp.SetTarget();
|
||||
}
|
||||
else {
|
||||
@ -347,6 +359,19 @@ static void mVUtestCycles(microVU* mVU) {
|
||||
xSUB(ptr32[&mVU->cycles], mVUcycles);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------
|
||||
// Initializing
|
||||
//------------------------------------------------------------------
|
||||
|
||||
// This gets run at the start of every loop of mVU's first pass
|
||||
static __fi void startLoop(mV) {
|
||||
if (curI & _Mbit_) { Console.WriteLn(Color_Green, "microVU%d: M-bit set!", getIndex); }
|
||||
if (curI & _Dbit_) { DevCon.WriteLn (Color_Green, "microVU%d: D-bit set!", getIndex); }
|
||||
if (curI & _Tbit_) { DevCon.WriteLn (Color_Green, "microVU%d: T-bit set!", getIndex); }
|
||||
memzero(mVUinfo);
|
||||
memzero(mVUregsTemp);
|
||||
}
|
||||
|
||||
// Initialize VI Constants (vi15 propagates through blocks)
|
||||
static __fi void mVUinitConstValues(microVU* mVU) {
|
||||
for (int i = 0; i < 16; i++) {
|
||||
@ -393,7 +418,7 @@ void* mVUcompile(microVU* mVU, u32 startPC, uptr pState) {
|
||||
|
||||
// First Pass
|
||||
iPC = startPC / 4;
|
||||
mVUsetupRange(mVU, startPC, 1); // Setup Program Bounds/Range
|
||||
mVUsetupRange(mVU, startPC, 1); // Setup Program Bounds/Range
|
||||
mVU->regAlloc->reset(); // Reset regAlloc
|
||||
mVUinitFirstPass(mVU, pState, thisPtr);
|
||||
for (int branch = 0; mVUcount < endCount; mVUcount++) {
|
||||
@ -419,7 +444,7 @@ void* mVUcompile(microVU* mVU, u32 startPC, uptr pState) {
|
||||
}
|
||||
|
||||
// Fix up vi15 const info for propagation through blocks
|
||||
mVUregs.vi15 = (mVUconstReg[15].isValid && CHECK_VU_CONSTPROP) ? ((1<<31) | (mVUconstReg[15].regValue&0xffff)) : 0;
|
||||
mVUregs.vi15 = (mVUconstReg[15].isValid && doConstProp) ? ((1<<31) | (mVUconstReg[15].regValue&0xffff)) : 0;
|
||||
|
||||
mVUsetFlags(mVU, mFC); // Sets Up Flag instances
|
||||
mVUoptimizePipeState(mVU); // Optimize the End Pipeline State for nicer Block Linking
|
||||
@ -434,11 +459,8 @@ void* mVUcompile(microVU* mVU, u32 startPC, uptr pState) {
|
||||
for (; x < endCount; x++) {
|
||||
if (mVUinfo.isEOB) { handleBadOp(mVU, x); x = 0xffff; }
|
||||
if (mVUup.mBit) { xOR(ptr32[&mVU->regs().flags], VUFLAG_MFLAGSET); }
|
||||
if (mVUlow.isNOP) { incPC(1); doUpperOp(); doIbit(mVU); }
|
||||
else if (!mVUinfo.swapOps) { incPC(1); doUpperOp(); doLowerOp(); }
|
||||
else { doSwapOp(mVU); }
|
||||
mVUexecuteInstruction(mVU);
|
||||
if (mVUinfo.doXGKICK) { mVU_XGKICK_DELAY(mVU, 1); }
|
||||
if (!doRegAlloc) { mVU->regAlloc->flushAll(); }
|
||||
if (isEvilBlock) { mVUsetupRange(mVU, xPC, 0); normJumpCompile(mVU, mFC, 1); return thisPtr; }
|
||||
else if (!mVUinfo.isBdelay) { incPC(1); }
|
||||
else {
|
||||
|
@ -286,7 +286,7 @@ void mVUflagPass(mV, u32 startPC, u32 sCount = 0, u32 found = 0) {
|
||||
__fi void mVUsetFlagInfo(mV) {
|
||||
branchType1 { incPC(-1); mVUflagPass(mVU, branchAddr); incPC(1); }
|
||||
branchType2 { // This case can possibly be turned off via a hack for a small speedup...
|
||||
if (!mVUlow.constJump.isValid || !CHECK_VU_CONSTPROP) { mVUregs.needExactMatch |= 0x7; }
|
||||
if (!mVUlow.constJump.isValid || !doConstProp) { mVUregs.needExactMatch |= 0x7; }
|
||||
else { mVUflagPass(mVU, (mVUlow.constJump.regValue*8)&(mVU->microMemSize-8)); }
|
||||
}
|
||||
branchType3 {
|
||||
|
@ -170,39 +170,194 @@ struct microMapXMM {
|
||||
bool isNeeded; // Is needed for current instruction
|
||||
};
|
||||
|
||||
#define xmmTotal 7 // Don't allocate PQ?
|
||||
class microRegAlloc {
|
||||
protected:
|
||||
static const u32 xmmTotal = 7; // Don't allocate PQ?
|
||||
microMapXMM xmmMap[xmmTotal];
|
||||
int counter;
|
||||
microVU* mVU;
|
||||
int counter; // Current allocation count
|
||||
int index; // VU0 or VU1
|
||||
|
||||
// Helper functions to get VU regs
|
||||
VURegs& regs() const { return ::vuRegs[index]; }
|
||||
__fi REG_VI& getVI(uint reg) const { return regs().VI[reg]; }
|
||||
__fi VECTOR& getVF(uint reg) const { return regs().VF[reg]; }
|
||||
|
||||
__ri void loadIreg(const xmm& reg, int xyzw) {
|
||||
xMOVSSZX(reg, ptr32[&getVI(REG_I)]);
|
||||
if (!_XYZWss(xyzw)) xSHUF.PS(reg, reg, 0);
|
||||
}
|
||||
|
||||
int findFreeRegRec(int startIdx);
|
||||
int findFreeReg();
|
||||
int findFreeRegRec(int startIdx) {
|
||||
for (int i = startIdx; i < xmmTotal; i++) {
|
||||
if (!xmmMap[i].isNeeded) {
|
||||
int x = findFreeRegRec(i+1);
|
||||
if (x == -1) return i;
|
||||
return ((xmmMap[i].count < xmmMap[x].count) ? i : x);
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
int findFreeReg() {
|
||||
for (int i = 0; i < xmmTotal; i++) {
|
||||
if (!xmmMap[i].isNeeded && (xmmMap[i].VFreg < 0)) {
|
||||
return i; // Reg is not needed and was a temp reg
|
||||
}
|
||||
}
|
||||
int x = findFreeRegRec(0);
|
||||
pxAssumeDev( x >= 0, "microVU register allocation failure!" );
|
||||
return x;
|
||||
}
|
||||
|
||||
public:
|
||||
microRegAlloc(microVU* _mVU);
|
||||
|
||||
microRegAlloc(int _index) {
|
||||
index = _index;
|
||||
}
|
||||
|
||||
void reset() {
|
||||
for (int i = 0; i < xmmTotal; i++) {
|
||||
clearReg(i);
|
||||
}
|
||||
counter = 0;
|
||||
}
|
||||
|
||||
void flushAll(bool clearState = 1) {
|
||||
for (int i = 0; i < xmmTotal; i++) {
|
||||
writeBackReg(xmm(i));
|
||||
if (clearState) clearReg(i);
|
||||
}
|
||||
}
|
||||
void clearReg(int regId);
|
||||
|
||||
void clearReg(const xmm& reg) { clearReg(reg.Id); }
|
||||
void clearReg(int regId) {
|
||||
microMapXMM& clear( xmmMap[regId] );
|
||||
clear.VFreg = -1;
|
||||
clear.count = 0;
|
||||
clear.xyzw = 0;
|
||||
clear.isNeeded = 0;
|
||||
}
|
||||
|
||||
void clearRegVF(int VFreg) {
|
||||
for (int i = 0; i < xmmTotal; i++) {
|
||||
if (xmmMap[i].VFreg == VFreg) clearReg(i);
|
||||
}
|
||||
}
|
||||
void writeBackReg(const xmm& reg, bool invalidateRegs = 1);
|
||||
void clearNeeded(const xmm& reg);
|
||||
const xmm& allocReg(int vfLoadReg = -1, int vfWriteReg = -1, int xyzw = 0, bool cloneWrite = 1);
|
||||
|
||||
void writeBackReg(const xmm& reg, bool invalidateRegs = 1) {
|
||||
microMapXMM& write( xmmMap[reg.Id] );
|
||||
|
||||
if ((write.VFreg > 0) && write.xyzw) { // Reg was modified and not Temp or vf0
|
||||
if (write.VFreg == 33) xMOVSS(ptr32[&getVI(REG_I)], reg);
|
||||
else if (write.VFreg == 32) mVUsaveReg(reg, ptr[®s().ACC], write.xyzw, 1);
|
||||
else mVUsaveReg(reg, ptr[&getVF(write.VFreg)], write.xyzw, 1);
|
||||
if (invalidateRegs) {
|
||||
for (int i = 0; i < xmmTotal; i++) {
|
||||
microMapXMM& imap (xmmMap[i]);
|
||||
if ((i == reg.Id) || imap.isNeeded) continue;
|
||||
if (imap.VFreg == write.VFreg) {
|
||||
if (imap.xyzw && imap.xyzw < 0xf) DevCon.Error("microVU Error: writeBackReg() [%d]", imap.VFreg);
|
||||
clearReg(i); // Invalidate any Cached Regs of same vf Reg
|
||||
}
|
||||
}
|
||||
}
|
||||
if (write.xyzw == 0xf) { // Make Cached Reg if All Vectors were Modified
|
||||
write.count = counter;
|
||||
write.xyzw = 0;
|
||||
write.isNeeded = 0;
|
||||
return;
|
||||
}
|
||||
}
|
||||
clearReg(reg); // Clear Reg
|
||||
}
|
||||
|
||||
void clearNeeded(const xmm& reg) {
|
||||
|
||||
if ((reg.Id < 0) || (reg.Id >= xmmTotal)) return;
|
||||
|
||||
microMapXMM& clear (xmmMap[reg.Id]);
|
||||
clear.isNeeded = 0;
|
||||
if (clear.xyzw) { // Reg was modified
|
||||
if (clear.VFreg > 0) {
|
||||
int mergeRegs = 0;
|
||||
if (clear.xyzw < 0xf) { mergeRegs = 1; } // Try to merge partial writes
|
||||
for (int i = 0; i < xmmTotal; i++) { // Invalidate any other read-only regs of same vfReg
|
||||
if (i == reg.Id) continue;
|
||||
microMapXMM& imap (xmmMap[i]);
|
||||
if (imap.VFreg == clear.VFreg) {
|
||||
if (imap.xyzw && imap.xyzw < 0xf) DevCon.Error("microVU Error: clearNeeded() [%d]", imap.VFreg);
|
||||
if (mergeRegs == 1) {
|
||||
mVUmergeRegs(xmm(i), reg, clear.xyzw, 1);
|
||||
imap.xyzw = 0xf;
|
||||
imap.count = counter;
|
||||
mergeRegs = 2;
|
||||
}
|
||||
else clearReg(i);
|
||||
}
|
||||
}
|
||||
if (mergeRegs == 2) clearReg(reg); // Clear Current Reg if Merged
|
||||
else if (mergeRegs) writeBackReg(reg); // Write Back Partial Writes if couldn't merge
|
||||
}
|
||||
else clearReg(reg); // If Reg was temp or vf0, then invalidate itself
|
||||
}
|
||||
}
|
||||
|
||||
const xmm& allocReg(int vfLoadReg = -1, int vfWriteReg = -1, int xyzw = 0, bool cloneWrite = 1) {
|
||||
counter++;
|
||||
if (vfLoadReg >= 0) { // Search For Cached Regs
|
||||
for (int i = 0; i < xmmTotal; i++) {
|
||||
const xmm& xmmi(xmm::GetInstance(i));
|
||||
microMapXMM& imap (xmmMap[i]);
|
||||
if ((imap.VFreg == vfLoadReg) && (!imap.xyzw // Reg Was Not Modified
|
||||
|| (imap.VFreg && (imap.xyzw==0xf)))) { // Reg Had All Vectors Modified and != VF0
|
||||
int z = i;
|
||||
if (vfWriteReg >= 0) { // Reg will be modified
|
||||
if (cloneWrite) { // Clone Reg so as not to use the same Cached Reg
|
||||
z = findFreeReg();
|
||||
const xmm& xmmz(xmm::GetInstance(z));
|
||||
writeBackReg(xmmz);
|
||||
if (z!=i && xyzw==8) xMOVAPS (xmmz, xmmi);
|
||||
else if (xyzw == 4) xPSHUF.D(xmmz, xmmi, 1);
|
||||
else if (xyzw == 2) xPSHUF.D(xmmz, xmmi, 2);
|
||||
else if (xyzw == 1) xPSHUF.D(xmmz, xmmi, 3);
|
||||
else if (z != i) xMOVAPS (xmmz, xmmi);
|
||||
imap.count = counter; // Reg i was used, so update counter
|
||||
}
|
||||
else { // Don't clone reg, but shuffle to adjust for SS ops
|
||||
if ((vfLoadReg != vfWriteReg) || (xyzw != 0xf)) { writeBackReg(xmmi); }
|
||||
if (xyzw == 4) xPSHUF.D(xmmi, xmmi, 1);
|
||||
else if (xyzw == 2) xPSHUF.D(xmmi, xmmi, 2);
|
||||
else if (xyzw == 1) xPSHUF.D(xmmi, xmmi, 3);
|
||||
}
|
||||
xmmMap[z].VFreg = vfWriteReg;
|
||||
xmmMap[z].xyzw = xyzw;
|
||||
}
|
||||
xmmMap[z].count = counter;
|
||||
xmmMap[z].isNeeded = 1;
|
||||
return xmm::GetInstance(z);
|
||||
}
|
||||
}
|
||||
}
|
||||
int x = findFreeReg();
|
||||
const xmm& xmmx = xmm::GetInstance(x);
|
||||
writeBackReg(xmmx);
|
||||
|
||||
if (vfWriteReg >= 0) { // Reg Will Be Modified (allow partial reg loading)
|
||||
if ((vfLoadReg == 0) && !(xyzw & 1)) { xPXOR(xmmx, xmmx); }
|
||||
else if (vfLoadReg == 33) loadIreg (xmmx, xyzw);
|
||||
else if (vfLoadReg == 32) mVUloadReg(xmmx, ptr[®s().ACC], xyzw);
|
||||
else if (vfLoadReg >= 0) mVUloadReg(xmmx, ptr[&getVF(vfLoadReg)], xyzw);
|
||||
xmmMap[x].VFreg = vfWriteReg;
|
||||
xmmMap[x].xyzw = xyzw;
|
||||
}
|
||||
else { // Reg Will Not Be Modified (always load full reg for caching)
|
||||
if (vfLoadReg == 33) loadIreg(xmmx, 0xf);
|
||||
else if (vfLoadReg == 32) xMOVAPS (xmmx, ptr128[®s().ACC]);
|
||||
else if (vfLoadReg >= 0) xMOVAPS (xmmx, ptr128[&getVF(vfLoadReg)]);
|
||||
xmmMap[x].VFreg = vfLoadReg;
|
||||
xmmMap[x].xyzw = 0;
|
||||
}
|
||||
xmmMap[x].count = counter;
|
||||
xmmMap[x].isNeeded = 1;
|
||||
return xmmx;
|
||||
}
|
||||
};
|
||||
|
@ -1,165 +0,0 @@
|
||||
/* PCSX2 - PS2 Emulator for PCs
|
||||
* Copyright (C) 2002-2010 PCSX2 Dev Team
|
||||
*
|
||||
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
|
||||
* of the GNU Lesser General Public License as published by the Free Software Found-
|
||||
* ation, either version 3 of the License, or (at your option) any later version.
|
||||
*
|
||||
* PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
|
||||
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||
* PURPOSE. See the GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along with PCSX2.
|
||||
* If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
|
||||
int microRegAlloc::findFreeRegRec(int startIdx) {
|
||||
for (int i = startIdx; i < xmmTotal; i++) {
|
||||
if (!xmmMap[i].isNeeded) {
|
||||
int x = findFreeRegRec(i+1);
|
||||
if (x == -1) return i;
|
||||
return ((xmmMap[i].count < xmmMap[x].count) ? i : x);
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
int microRegAlloc::findFreeReg() {
|
||||
for (int i = 0; i < xmmTotal; i++) {
|
||||
if (!xmmMap[i].isNeeded && (xmmMap[i].VFreg < 0)) {
|
||||
return i; // Reg is not needed and was a temp reg
|
||||
}
|
||||
}
|
||||
int x = findFreeRegRec(0);
|
||||
pxAssumeDev( x >= 0, "microVU register allocation failure!" );
|
||||
return x;
|
||||
}
|
||||
|
||||
microRegAlloc::microRegAlloc(microVU* _mVU) {
|
||||
mVU = _mVU;
|
||||
}
|
||||
|
||||
void microRegAlloc::clearReg(int regId) {
|
||||
microMapXMM& clear( xmmMap[regId] );
|
||||
clear.VFreg = -1;
|
||||
clear.count = 0;
|
||||
clear.xyzw = 0;
|
||||
clear.isNeeded = 0;
|
||||
}
|
||||
void microRegAlloc::writeBackReg(const xmm& reg, bool invalidateRegs) {
|
||||
microMapXMM& write( xmmMap[reg.Id] );
|
||||
|
||||
if ((write.VFreg > 0) && write.xyzw) { // Reg was modified and not Temp or vf0
|
||||
if (write.VFreg == 33) xMOVSS(ptr32[&mVU->getVI(REG_I)], reg);
|
||||
else if (write.VFreg == 32) mVUsaveReg(reg, ptr[&mVU->regs().ACC], write.xyzw, 1);
|
||||
else mVUsaveReg(reg, ptr[&mVU->getVF(write.VFreg)], write.xyzw, 1);
|
||||
if (invalidateRegs) {
|
||||
for (int i = 0; i < xmmTotal; i++) {
|
||||
microMapXMM& imap (xmmMap[i]);
|
||||
if ((i == reg.Id) || imap.isNeeded) continue;
|
||||
if (imap.VFreg == write.VFreg) {
|
||||
if (imap.xyzw && imap.xyzw < 0xf) DevCon.Error("microVU Error: writeBackReg() [%d]", imap.VFreg);
|
||||
clearReg(i); // Invalidate any Cached Regs of same vf Reg
|
||||
}
|
||||
}
|
||||
}
|
||||
if (write.xyzw == 0xf) { // Make Cached Reg if All Vectors were Modified
|
||||
write.count = counter;
|
||||
write.xyzw = 0;
|
||||
write.isNeeded = 0;
|
||||
return;
|
||||
}
|
||||
}
|
||||
clearReg(reg); // Clear Reg
|
||||
}
|
||||
void microRegAlloc::clearNeeded(const xmm& reg)
|
||||
{
|
||||
if ((reg.Id < 0) || (reg.Id >= xmmTotal)) return;
|
||||
|
||||
microMapXMM& clear (xmmMap[reg.Id]);
|
||||
clear.isNeeded = 0;
|
||||
if (clear.xyzw) { // Reg was modified
|
||||
if (clear.VFreg > 0) {
|
||||
int mergeRegs = 0;
|
||||
if (clear.xyzw < 0xf) { mergeRegs = 1; } // Try to merge partial writes
|
||||
for (int i = 0; i < xmmTotal; i++) { // Invalidate any other read-only regs of same vfReg
|
||||
if (i == reg.Id) continue;
|
||||
microMapXMM& imap (xmmMap[i]);
|
||||
if (imap.VFreg == clear.VFreg) {
|
||||
if (imap.xyzw && imap.xyzw < 0xf) DevCon.Error("microVU Error: clearNeeded() [%d]", imap.VFreg);
|
||||
if (mergeRegs == 1) {
|
||||
mVUmergeRegs(xmm(i), reg, clear.xyzw, 1);
|
||||
imap.xyzw = 0xf;
|
||||
imap.count = counter;
|
||||
mergeRegs = 2;
|
||||
}
|
||||
else clearReg(i);
|
||||
}
|
||||
}
|
||||
if (mergeRegs == 2) clearReg(reg); // Clear Current Reg if Merged
|
||||
else if (mergeRegs) writeBackReg(reg); // Write Back Partial Writes if couldn't merge
|
||||
}
|
||||
else clearReg(reg); // If Reg was temp or vf0, then invalidate itself
|
||||
}
|
||||
}
|
||||
const xmm& microRegAlloc::allocReg(int vfLoadReg, int vfWriteReg, int xyzw, bool cloneWrite) {
|
||||
counter++;
|
||||
if (vfLoadReg >= 0) { // Search For Cached Regs
|
||||
for (int i = 0; i < xmmTotal; i++) {
|
||||
const xmm& xmmi(xmm::GetInstance(i));
|
||||
microMapXMM& imap (xmmMap[i]);
|
||||
if ((imap.VFreg == vfLoadReg) && (!imap.xyzw // Reg Was Not Modified
|
||||
|| (imap.VFreg && (imap.xyzw==0xf)))) { // Reg Had All Vectors Modified and != VF0
|
||||
int z = i;
|
||||
if (vfWriteReg >= 0) { // Reg will be modified
|
||||
if (cloneWrite) { // Clone Reg so as not to use the same Cached Reg
|
||||
z = findFreeReg();
|
||||
const xmm& xmmz(xmm::GetInstance(z));
|
||||
writeBackReg(xmmz);
|
||||
if (z!=i && xyzw==8) xMOVAPS (xmmz, xmmi);
|
||||
else if (xyzw == 4) xPSHUF.D(xmmz, xmmi, 1);
|
||||
else if (xyzw == 2) xPSHUF.D(xmmz, xmmi, 2);
|
||||
else if (xyzw == 1) xPSHUF.D(xmmz, xmmi, 3);
|
||||
else if (z != i) xMOVAPS (xmmz, xmmi);
|
||||
imap.count = counter; // Reg i was used, so update counter
|
||||
}
|
||||
else { // Don't clone reg, but shuffle to adjust for SS ops
|
||||
if ((vfLoadReg != vfWriteReg) || (xyzw != 0xf)) { writeBackReg(xmmi); }
|
||||
if (xyzw == 4) xPSHUF.D(xmmi, xmmi, 1);
|
||||
else if (xyzw == 2) xPSHUF.D(xmmi, xmmi, 2);
|
||||
else if (xyzw == 1) xPSHUF.D(xmmi, xmmi, 3);
|
||||
}
|
||||
xmmMap[z].VFreg = vfWriteReg;
|
||||
xmmMap[z].xyzw = xyzw;
|
||||
}
|
||||
xmmMap[z].count = counter;
|
||||
xmmMap[z].isNeeded = 1;
|
||||
return xmm::GetInstance(z);
|
||||
}
|
||||
}
|
||||
}
|
||||
int x = findFreeReg();
|
||||
const xmm& xmmx = xmm::GetInstance(x);
|
||||
writeBackReg(xmmx);
|
||||
|
||||
if (vfWriteReg >= 0) { // Reg Will Be Modified (allow partial reg loading)
|
||||
if ((vfLoadReg == 0) && !(xyzw & 1)) { xPXOR(xmmx, xmmx); }
|
||||
else if (vfLoadReg == 33) mVU->loadIreg(xmmx, xyzw);
|
||||
else if (vfLoadReg == 32) mVUloadReg (xmmx, ptr[&mVU->regs().ACC], xyzw);
|
||||
else if (vfLoadReg >= 0) mVUloadReg (xmmx, ptr[&mVU->getVF(vfLoadReg)], xyzw);
|
||||
xmmMap[x].VFreg = vfWriteReg;
|
||||
xmmMap[x].xyzw = xyzw;
|
||||
}
|
||||
else { // Reg Will Not Be Modified (always load full reg for caching)
|
||||
if (vfLoadReg == 33) mVU->loadIreg(xmmx, 0xf);
|
||||
else if (vfLoadReg == 32) xMOVAPS(xmmx, ptr128[&mVU->regs().ACC]);
|
||||
else if (vfLoadReg >= 0) xMOVAPS(xmmx, ptr128[&mVU->getVF(vfLoadReg)]);
|
||||
xmmMap[x].VFreg = vfLoadReg;
|
||||
xmmMap[x].xyzw = 0;
|
||||
}
|
||||
xmmMap[x].count = counter;
|
||||
xmmMap[x].isNeeded = 1;
|
||||
return xmmx;
|
||||
}
|
@ -248,12 +248,14 @@ typedef u32 (__fastcall *mVUCall)(void*, void*);
|
||||
//------------------------------------------------------------------
|
||||
|
||||
// Reg Alloc
|
||||
#define doRegAlloc 1 // Set to 0 to flush every 64bit Instruction
|
||||
static const bool doRegAlloc = 1; // Set to 0 to flush every 32bit Instruction
|
||||
// This turns off reg alloc for the most part, but reg alloc will still
|
||||
// be done between Upper/Lower and within instructions...
|
||||
// be done within instructions... Also on doSwapOp() regAlloc is needed between
|
||||
// Lower and Upper instructions, so in this case it flushes after the full
|
||||
// 64bit instruction (lower and upper)
|
||||
|
||||
// No Flag Optimizations
|
||||
#define noFlagOpts 0 // Set to 1 to disable all flag setting optimizations
|
||||
static const bool noFlagOpts = 0; // Set to 1 to disable all flag setting optimizations
|
||||
// Note: The flag optimizations this disables should all be harmless, so
|
||||
// this option is mainly just for debugging... it effectively forces mVU
|
||||
// to always update Mac and Status Flags (both sticky and non-sticky) whenever
|
||||
@ -261,7 +263,7 @@ typedef u32 (__fastcall *mVUCall)(void*, void*);
|
||||
// flag instances between blocks...
|
||||
|
||||
// Constant Propagation
|
||||
#define CHECK_VU_CONSTPROP 0 // Set to 1 to turn on vi15 const propagation
|
||||
static const bool doConstProp = 0; // Set to 1 to turn on vi15 const propagation
|
||||
// Enables Constant Propagation for Jumps based on vi15 'link-register'
|
||||
// allowing us to know many indirect jump target addresses.
|
||||
// Makes GoW a lot slower due to extra recompilation time and extra code-gen!
|
||||
|
Loading…
Reference in New Issue
Block a user