- Code refactoring (mostly changing macros to functions/constants...)
- Made it so the disable-regAlloc option flushes every 32bit instruction, instead of every 64bit instruction (upper+lower instruction pair)

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@3713 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
cottonvibes 2010-09-01 04:57:10 +00:00
parent 807bd9659c
commit 009d6ba5e6
9 changed files with 258 additions and 255 deletions

View File

@ -647,10 +647,6 @@
RelativePath="..\..\x86\microVU_IR.h"
>
</File>
<File
RelativePath="..\..\x86\microVU_IR.inl"
>
</File>
<File
RelativePath="..\..\x86\microVU_Log.inl"
>

View File

@ -24,7 +24,6 @@
#include "microVU_Misc.inl"
#include "microVU_Log.inl"
#include "microVU_Analyze.inl"
#include "microVU_IR.inl"
#include "microVU_Alloc.inl"
#include "microVU_Upper.inl"
#include "microVU_Lower.inl"
@ -103,7 +102,7 @@ void microVU::init(uint vuIndex) {
dispCache = NULL;
cache = NULL;
cacheSize = mVUcacheSize;
regAlloc = new microRegAlloc(this);
regAlloc = new microRegAlloc(index);
for (u32 i = 0; i < (progSize / 2); i++) {
prog.prog[i] = new deque<microProgram*>();

View File

@ -90,7 +90,7 @@ public:
for (int i = 0; i <= listI; i++) {
if ((linkI->block->pState.q == pState->q)
&& (linkI->block->pState.p == pState->p)
&& ((linkI->block->pState.vi15 == pState->vi15) || !CHECK_VU_CONSTPROP)
&& ((linkI->block->pState.vi15 == pState->vi15) || !doConstProp)
&& (linkI->block->pState.flags == pState->flags)
&& (linkI->block->pState.xgkick == pState->xgkick)
&& (linkI->block->pState.viBackUp == pState->viBackUp)
@ -229,12 +229,6 @@ struct microVU {
return (((prog.IRinfo.curPC + 4) + (Imm11() * 2)) & progMemMask) * 4;
}
__ri void loadIreg(const xmm& reg, int xyzw)
{
xMOVSSZX(reg, ptr32[&getVI(REG_I)]);
if (!_XYZWss(xyzw)) xSHUF.PS(reg, reg, 0);
}
void init(uint vuIndex);
void reset();
void close();

View File

@ -470,7 +470,7 @@ __fi void mVUanalyzeNormBranch(mV, int It, bool isBAL) {
__ri void mVUanalyzeJump(mV, int Is, int It, bool isJALR) {
mVUbranchCheck(mVU);
mVUlow.branch = (isJALR) ? 10 : 9;
if (mVUconstReg[Is].isValid && CHECK_VU_CONSTPROP) {
if (mVUconstReg[Is].isValid && doConstProp) {
mVUlow.constJump.isValid = 1;
mVUlow.constJump.regValue = mVUconstReg[Is].regValue;
//DevCon.Status("microVU%d: Constant JR/JALR Address Optimization", mVU->index);

View File

@ -15,31 +15,19 @@
#pragma once
//------------------------------------------------------------------
// Helper Macros
//------------------------------------------------------------------
#define calcCycles(reg, x) { reg = ((reg > x) ? (reg - x) : 0); }
#define optimizeReg(rState) { rState = (rState==1) ? 0 : rState; }
#define tCycles(dest, src) { dest = aMax(dest, src); }
#define incP() { mVU->p = (mVU->p+1) & 1; }
#define incQ() { mVU->q = (mVU->q+1) & 1; }
#define doUpperOp() { mVUopU(mVU, 1); mVUdivSet(mVU); }
#define doLowerOp() { incPC(-1); mVUopL(mVU, 1); incPC(1); }
//------------------------------------------------------------------
// Messages Called at Execution Time...
//------------------------------------------------------------------
static void __fastcall mVUbadOp0(mV) { Console.Error("microVU0 Warning: Exiting... Block started with illegal opcode. [%04x] [%x]", xPC, mVU->prog.cur); }
static void __fastcall mVUbadOp1(mV) { Console.Error("microVU1 Warning: Exiting... Block started with illegal opcode. [%04x] [%x]", xPC, mVU->prog.cur); }
static void __fastcall mVUwarning0(mV) { Console.Error("microVU0 Warning: Exiting from Possible Infinite Loop [%04x] [%x]", xPC, mVU->prog.cur); }
static void __fastcall mVUwarning1(mV) { Console.Error("microVU1 Warning: Exiting from Possible Infinite Loop [%04x] [%x]", xPC, mVU->prog.cur); }
static void __fastcall mVUprintPC1(u32 PC) { Console.WriteLn("Block Start PC = 0x%04x", PC); }
static void __fastcall mVUprintPC2(u32 PC) { Console.WriteLn("Block End PC = 0x%04x", PC); }
static void __fastcall mVUbadOp0(mV, u32 PC) { Console.Error("microVU0 Warning: Exiting... Block started with illegal opcode. [%04x] [%x]", PC, mVU->prog.cur->idx); }
static void __fastcall mVUbadOp1(mV, u32 PC) { Console.Error("microVU1 Warning: Exiting... Block started with illegal opcode. [%04x] [%x]", PC, mVU->prog.cur->idx); }
static void __fastcall mVUwarning0(mV) { Console.Error("microVU0 Warning: Exiting from Possible Infinite Loop [%04x] [%x]", mVU->prog.cur->idx); }
static void __fastcall mVUwarning1(mV) { Console.Error("microVU1 Warning: Exiting from Possible Infinite Loop [%04x] [%x]", mVU->prog.cur->idx); }
static void __fastcall mVUprintPC1(u32 PC) { Console.WriteLn("Block Start PC = 0x%04x", PC); }
static void __fastcall mVUprintPC2(u32 PC) { Console.WriteLn("Block End PC = 0x%04x", PC); }
//------------------------------------------------------------------
// Helper Functions
// Program Range Checking and Setting up Ranges
//------------------------------------------------------------------
// Used by mVUsetupRange
@ -106,13 +94,13 @@ static void mVUsetupRange(microVU* mVU, s32 pc, bool isStartPC) {
}
}
static __fi void startLoop(mV) {
if (curI & _Mbit_) { Console.WriteLn(Color_Green, "microVU%d: M-bit set!", getIndex); }
if (curI & _Dbit_) { DevCon.WriteLn (Color_Green, "microVU%d: D-bit set!", getIndex); }
if (curI & _Tbit_) { DevCon.WriteLn (Color_Green, "microVU%d: T-bit set!", getIndex); }
memzero(mVUinfo);
memzero(mVUregsTemp);
}
//------------------------------------------------------------------
// Execute VU Opcode/Instruction (Upper and Lower)
//------------------------------------------------------------------
__ri void doUpperOp(mV) { mVUopU(mVU, 1); mVUdivSet(mVU); }
__ri void doLowerOp(mV) { incPC(-1); mVUopL(mVU, 1); incPC(1); }
__ri void flushRegs(mV) { if (!doRegAlloc) mVU->regAlloc->flushAll(); }
static void doIbit(mV) {
if (mVUup.iBit) {
@ -126,7 +114,7 @@ static void doIbit(mV) {
}
else tempI = curI;
xMOV(ptr32[&mVU->regs().VI[REG_I].UL], tempI);
xMOV(ptr32[&mVU->getVI(REG_I)], tempI);
incPC(1);
}
}
@ -150,16 +138,27 @@ static void doSwapOp(mV) {
mVU->regAlloc->clearNeeded(t3);
incPC(1);
doUpperOp();
doUpperOp(mVU);
const xmm& t4 = mVU->regAlloc->allocReg(-1, mVUlow.VF_write.reg, 0xf);
xMOVAPS(t4, t2);
mVU->regAlloc->clearNeeded(t4);
mVU->regAlloc->clearNeeded(t2);
}
else { mVUopL(mVU, 1); incPC(1); doUpperOp(); }
else { mVUopL(mVU, 1); incPC(1); flushRegs(mVU); doUpperOp(mVU); }
}
static void mVUexecuteInstruction(mV) {
if (mVUlow.isNOP) { incPC(1); doUpperOp(mVU); flushRegs(mVU); doIbit(mVU); }
elif(!mVUinfo.swapOps) { incPC(1); doUpperOp(mVU); flushRegs(mVU); doLowerOp(mVU); }
else doSwapOp(mVU);
flushRegs(mVU);
}
//------------------------------------------------------------------
// Warnings / Errors / Illegal Instructions
//------------------------------------------------------------------
// If 1st op in block is a bad opcode, then don't compile rest of block (Dawn of Mana Level 2)
static __fi void mVUcheckBadOp(mV) {
if (mVUinfo.isBadOp && mVUcount == 0) {
@ -172,6 +171,7 @@ static __fi void mVUcheckBadOp(mV) {
static __fi void handleBadOp(mV, int count) {
if (mVUinfo.isBadOp && count == 0) {
xMOV(gprT2, (uptr)mVU);
xMOV(gprT3, xPC);
if (!isVU1) xCALL(mVUbadOp0);
else xCALL(mVUbadOp1);
}
@ -211,8 +211,21 @@ static __ri void eBitWarning(mV) {
incPC(-2);
}
//------------------------------------------------------------------
// Cycles / Pipeline State / Early Exit from Execution
//------------------------------------------------------------------
__fi void optimizeReg(u8& rState) { rState = (rState==1) ? 0 : rState; }
__fi void calcCycles(u8& reg, u8 x) { reg = ((reg > x) ? (reg - x) : 0); }
__fi void tCycles(u8& dest, u8& src) { dest = aMax(dest, src); }
__fi void incP(mV) { mVU->p ^= 1; }
__fi void incQ(mV) { mVU->q ^= 1; }
// Optimizes the End Pipeline State Removing Unnecessary Info
static __fi void mVUoptimizePipeState(mV) {
// If the cycles remaining is just '1', we don't have to transfer it to the next block
// because mVU automatically decrements this number at the start of its loop,
// so essentially '1' will be the same as '0'...
static void mVUoptimizePipeState(mV) {
for (int i = 0; i < 32; i++) {
optimizeReg(mVUregs.VF[i].x);
optimizeReg(mVUregs.VF[i].y);
@ -222,12 +235,12 @@ static __fi void mVUoptimizePipeState(mV) {
for (int i = 0; i < 16; i++) {
optimizeReg(mVUregs.VI[i]);
}
if (mVUregs.q) { optimizeReg(mVUregs.q); if (!mVUregs.q) { incQ(); } }
if (mVUregs.p) { optimizeReg(mVUregs.p); if (!mVUregs.p) { incP(); } }
if (mVUregs.q) { optimizeReg(mVUregs.q); if (!mVUregs.q) { incQ(mVU); } }
if (mVUregs.p) { optimizeReg(mVUregs.p); if (!mVUregs.p) { incP(mVU); } }
mVUregs.r = 0; // There are no stalls on the R-reg, so its Safe to discard info
}
__fi void mVUincCycles(mV, int x) {
static void mVUincCycles(mV, int x) {
mVUcycles += x;
for (int z = 31; z > 0; z--) {
calcCycles(mVUregs.VF[z].x, x);
@ -241,11 +254,11 @@ __fi void mVUincCycles(mV, int x) {
if (mVUregs.q) {
if (mVUregs.q > 4) { calcCycles(mVUregs.q, x); if (mVUregs.q <= 4) { mVUinfo.doDivFlag = 1; } }
else { calcCycles(mVUregs.q, x); }
if (!mVUregs.q) { incQ(); }
if (!mVUregs.q) { incQ(mVU); }
}
if (mVUregs.p) {
calcCycles(mVUregs.p, x);
if (!mVUregs.p || mVUregsTemp.p) { incP(); }
if (!mVUregs.p || mVUregsTemp.p) { incP(mVU); }
}
if (mVUregs.xgkick) {
calcCycles(mVUregs.xgkick, x);
@ -254,14 +267,13 @@ __fi void mVUincCycles(mV, int x) {
calcCycles(mVUregs.r, x);
}
#define cmpVFregs(VFreg1, VFreg2, xVar) { \
if (VFreg1.reg == VFreg2.reg) { \
if ((VFreg1.x && VFreg2.x) \
|| (VFreg1.y && VFreg2.y) \
|| (VFreg1.z && VFreg2.z) \
|| (VFreg1.w && VFreg2.w)) \
{ xVar = 1; } \
} \
// Helps check if upper/lower ops read/write to same regs...
void cmpVFregs(microVFreg& VFreg1, microVFreg& VFreg2, bool& xVar) {
if (VFreg1.reg == VFreg2.reg) {
if ((VFreg1.x && VFreg2.x) || (VFreg1.y && VFreg2.y)
|| (VFreg1.z && VFreg2.z) || (VFreg1.w && VFreg2.w))
{ xVar = 1; }
}
}
void mVUsetCycles(mV) {
@ -299,6 +311,15 @@ void mVUsetCycles(mV) {
tCycles(mVUregs.xgkick, mVUregsTemp.xgkick);
}
// Prints Start/End PC of blocks executed, for debugging...
static void mVUdebugPrintBlocks(microVU* mVU, bool isEndPC) {
if (mVUdebugNow) {
xMOV(gprT2, xPC);
if (isEndPC) xCALL(mVUprintPC2);
else xCALL(mVUprintPC1);
}
}
// vu0 is allowed to exit early, so are dev builds (for inf loops)
__fi bool doEarlyExit(microVU* mVU) {
return IsDevBuild || !isVU1;
@ -312,15 +333,6 @@ static __fi void mVUsavePipelineState(microVU* mVU) {
}
}
// Prints Start/End PC of blocks executed, for debugging...
static void mVUdebugPrintBlocks(microVU* mVU, bool isEndPC) {
if (mVUdebugNow) {
xMOV(gprT2, xPC);
if (isEndPC) xCALL(mVUprintPC2);
else xCALL(mVUprintPC1);
}
}
// Test cycles to see if we need to exit-early...
static void mVUtestCycles(microVU* mVU) {
iPC = mVUstartPC;
@ -332,8 +344,8 @@ static void mVUtestCycles(microVU* mVU) {
// xFowardJZ32 vu0jmp;
// xMOV(gprT2, (uptr)mVU);
// xCALL(mVUwarning0); // VU0 is allowed early exit for COP2 Interlock Simulation
mVUsavePipelineState(mVU);
mVUendProgram(mVU, NULL, 0);
mVUsavePipelineState(mVU);
mVUendProgram(mVU, NULL, 0);
// vu0jmp.SetTarget();
}
else {
@ -347,6 +359,19 @@ static void mVUtestCycles(microVU* mVU) {
xSUB(ptr32[&mVU->cycles], mVUcycles);
}
//------------------------------------------------------------------
// Initializing
//------------------------------------------------------------------
// This gets run at the start of every loop of mVU's first pass
static __fi void startLoop(mV) {
if (curI & _Mbit_) { Console.WriteLn(Color_Green, "microVU%d: M-bit set!", getIndex); }
if (curI & _Dbit_) { DevCon.WriteLn (Color_Green, "microVU%d: D-bit set!", getIndex); }
if (curI & _Tbit_) { DevCon.WriteLn (Color_Green, "microVU%d: T-bit set!", getIndex); }
memzero(mVUinfo);
memzero(mVUregsTemp);
}
// Initialize VI Constants (vi15 propagates through blocks)
static __fi void mVUinitConstValues(microVU* mVU) {
for (int i = 0; i < 16; i++) {
@ -393,7 +418,7 @@ void* mVUcompile(microVU* mVU, u32 startPC, uptr pState) {
// First Pass
iPC = startPC / 4;
mVUsetupRange(mVU, startPC, 1); // Setup Program Bounds/Range
mVUsetupRange(mVU, startPC, 1); // Setup Program Bounds/Range
mVU->regAlloc->reset(); // Reset regAlloc
mVUinitFirstPass(mVU, pState, thisPtr);
for (int branch = 0; mVUcount < endCount; mVUcount++) {
@ -419,7 +444,7 @@ void* mVUcompile(microVU* mVU, u32 startPC, uptr pState) {
}
// Fix up vi15 const info for propagation through blocks
mVUregs.vi15 = (mVUconstReg[15].isValid && CHECK_VU_CONSTPROP) ? ((1<<31) | (mVUconstReg[15].regValue&0xffff)) : 0;
mVUregs.vi15 = (mVUconstReg[15].isValid && doConstProp) ? ((1<<31) | (mVUconstReg[15].regValue&0xffff)) : 0;
mVUsetFlags(mVU, mFC); // Sets Up Flag instances
mVUoptimizePipeState(mVU); // Optimize the End Pipeline State for nicer Block Linking
@ -434,11 +459,8 @@ void* mVUcompile(microVU* mVU, u32 startPC, uptr pState) {
for (; x < endCount; x++) {
if (mVUinfo.isEOB) { handleBadOp(mVU, x); x = 0xffff; }
if (mVUup.mBit) { xOR(ptr32[&mVU->regs().flags], VUFLAG_MFLAGSET); }
if (mVUlow.isNOP) { incPC(1); doUpperOp(); doIbit(mVU); }
else if (!mVUinfo.swapOps) { incPC(1); doUpperOp(); doLowerOp(); }
else { doSwapOp(mVU); }
mVUexecuteInstruction(mVU);
if (mVUinfo.doXGKICK) { mVU_XGKICK_DELAY(mVU, 1); }
if (!doRegAlloc) { mVU->regAlloc->flushAll(); }
if (isEvilBlock) { mVUsetupRange(mVU, xPC, 0); normJumpCompile(mVU, mFC, 1); return thisPtr; }
else if (!mVUinfo.isBdelay) { incPC(1); }
else {

View File

@ -286,7 +286,7 @@ void mVUflagPass(mV, u32 startPC, u32 sCount = 0, u32 found = 0) {
__fi void mVUsetFlagInfo(mV) {
branchType1 { incPC(-1); mVUflagPass(mVU, branchAddr); incPC(1); }
branchType2 { // This case can possibly be turned off via a hack for a small speedup...
if (!mVUlow.constJump.isValid || !CHECK_VU_CONSTPROP) { mVUregs.needExactMatch |= 0x7; }
if (!mVUlow.constJump.isValid || !doConstProp) { mVUregs.needExactMatch |= 0x7; }
else { mVUflagPass(mVU, (mVUlow.constJump.regValue*8)&(mVU->microMemSize-8)); }
}
branchType3 {

View File

@ -170,39 +170,194 @@ struct microMapXMM {
bool isNeeded; // Is needed for current instruction
};
#define xmmTotal 7 // Don't allocate PQ?
class microRegAlloc {
protected:
static const u32 xmmTotal = 7; // Don't allocate PQ?
microMapXMM xmmMap[xmmTotal];
int counter;
microVU* mVU;
int counter; // Current allocation count
int index; // VU0 or VU1
// Helper functions to get VU regs
VURegs& regs() const { return ::vuRegs[index]; }
__fi REG_VI& getVI(uint reg) const { return regs().VI[reg]; }
__fi VECTOR& getVF(uint reg) const { return regs().VF[reg]; }
__ri void loadIreg(const xmm& reg, int xyzw) {
xMOVSSZX(reg, ptr32[&getVI(REG_I)]);
if (!_XYZWss(xyzw)) xSHUF.PS(reg, reg, 0);
}
int findFreeRegRec(int startIdx);
int findFreeReg();
int findFreeRegRec(int startIdx) {
for (int i = startIdx; i < xmmTotal; i++) {
if (!xmmMap[i].isNeeded) {
int x = findFreeRegRec(i+1);
if (x == -1) return i;
return ((xmmMap[i].count < xmmMap[x].count) ? i : x);
}
}
return -1;
}
int findFreeReg() {
for (int i = 0; i < xmmTotal; i++) {
if (!xmmMap[i].isNeeded && (xmmMap[i].VFreg < 0)) {
return i; // Reg is not needed and was a temp reg
}
}
int x = findFreeRegRec(0);
pxAssumeDev( x >= 0, "microVU register allocation failure!" );
return x;
}
public:
microRegAlloc(microVU* _mVU);
microRegAlloc(int _index) {
index = _index;
}
void reset() {
for (int i = 0; i < xmmTotal; i++) {
clearReg(i);
}
counter = 0;
}
void flushAll(bool clearState = 1) {
for (int i = 0; i < xmmTotal; i++) {
writeBackReg(xmm(i));
if (clearState) clearReg(i);
}
}
void clearReg(int regId);
void clearReg(const xmm& reg) { clearReg(reg.Id); }
void clearReg(int regId) {
microMapXMM& clear( xmmMap[regId] );
clear.VFreg = -1;
clear.count = 0;
clear.xyzw = 0;
clear.isNeeded = 0;
}
void clearRegVF(int VFreg) {
for (int i = 0; i < xmmTotal; i++) {
if (xmmMap[i].VFreg == VFreg) clearReg(i);
}
}
void writeBackReg(const xmm& reg, bool invalidateRegs = 1);
void clearNeeded(const xmm& reg);
const xmm& allocReg(int vfLoadReg = -1, int vfWriteReg = -1, int xyzw = 0, bool cloneWrite = 1);
void writeBackReg(const xmm& reg, bool invalidateRegs = 1) {
microMapXMM& write( xmmMap[reg.Id] );
if ((write.VFreg > 0) && write.xyzw) { // Reg was modified and not Temp or vf0
if (write.VFreg == 33) xMOVSS(ptr32[&getVI(REG_I)], reg);
else if (write.VFreg == 32) mVUsaveReg(reg, ptr[&regs().ACC], write.xyzw, 1);
else mVUsaveReg(reg, ptr[&getVF(write.VFreg)], write.xyzw, 1);
if (invalidateRegs) {
for (int i = 0; i < xmmTotal; i++) {
microMapXMM& imap (xmmMap[i]);
if ((i == reg.Id) || imap.isNeeded) continue;
if (imap.VFreg == write.VFreg) {
if (imap.xyzw && imap.xyzw < 0xf) DevCon.Error("microVU Error: writeBackReg() [%d]", imap.VFreg);
clearReg(i); // Invalidate any Cached Regs of same vf Reg
}
}
}
if (write.xyzw == 0xf) { // Make Cached Reg if All Vectors were Modified
write.count = counter;
write.xyzw = 0;
write.isNeeded = 0;
return;
}
}
clearReg(reg); // Clear Reg
}
void clearNeeded(const xmm& reg) {
if ((reg.Id < 0) || (reg.Id >= xmmTotal)) return;
microMapXMM& clear (xmmMap[reg.Id]);
clear.isNeeded = 0;
if (clear.xyzw) { // Reg was modified
if (clear.VFreg > 0) {
int mergeRegs = 0;
if (clear.xyzw < 0xf) { mergeRegs = 1; } // Try to merge partial writes
for (int i = 0; i < xmmTotal; i++) { // Invalidate any other read-only regs of same vfReg
if (i == reg.Id) continue;
microMapXMM& imap (xmmMap[i]);
if (imap.VFreg == clear.VFreg) {
if (imap.xyzw && imap.xyzw < 0xf) DevCon.Error("microVU Error: clearNeeded() [%d]", imap.VFreg);
if (mergeRegs == 1) {
mVUmergeRegs(xmm(i), reg, clear.xyzw, 1);
imap.xyzw = 0xf;
imap.count = counter;
mergeRegs = 2;
}
else clearReg(i);
}
}
if (mergeRegs == 2) clearReg(reg); // Clear Current Reg if Merged
else if (mergeRegs) writeBackReg(reg); // Write Back Partial Writes if couldn't merge
}
else clearReg(reg); // If Reg was temp or vf0, then invalidate itself
}
}
const xmm& allocReg(int vfLoadReg = -1, int vfWriteReg = -1, int xyzw = 0, bool cloneWrite = 1) {
counter++;
if (vfLoadReg >= 0) { // Search For Cached Regs
for (int i = 0; i < xmmTotal; i++) {
const xmm& xmmi(xmm::GetInstance(i));
microMapXMM& imap (xmmMap[i]);
if ((imap.VFreg == vfLoadReg) && (!imap.xyzw // Reg Was Not Modified
|| (imap.VFreg && (imap.xyzw==0xf)))) { // Reg Had All Vectors Modified and != VF0
int z = i;
if (vfWriteReg >= 0) { // Reg will be modified
if (cloneWrite) { // Clone Reg so as not to use the same Cached Reg
z = findFreeReg();
const xmm& xmmz(xmm::GetInstance(z));
writeBackReg(xmmz);
if (z!=i && xyzw==8) xMOVAPS (xmmz, xmmi);
else if (xyzw == 4) xPSHUF.D(xmmz, xmmi, 1);
else if (xyzw == 2) xPSHUF.D(xmmz, xmmi, 2);
else if (xyzw == 1) xPSHUF.D(xmmz, xmmi, 3);
else if (z != i) xMOVAPS (xmmz, xmmi);
imap.count = counter; // Reg i was used, so update counter
}
else { // Don't clone reg, but shuffle to adjust for SS ops
if ((vfLoadReg != vfWriteReg) || (xyzw != 0xf)) { writeBackReg(xmmi); }
if (xyzw == 4) xPSHUF.D(xmmi, xmmi, 1);
else if (xyzw == 2) xPSHUF.D(xmmi, xmmi, 2);
else if (xyzw == 1) xPSHUF.D(xmmi, xmmi, 3);
}
xmmMap[z].VFreg = vfWriteReg;
xmmMap[z].xyzw = xyzw;
}
xmmMap[z].count = counter;
xmmMap[z].isNeeded = 1;
return xmm::GetInstance(z);
}
}
}
int x = findFreeReg();
const xmm& xmmx = xmm::GetInstance(x);
writeBackReg(xmmx);
if (vfWriteReg >= 0) { // Reg Will Be Modified (allow partial reg loading)
if ((vfLoadReg == 0) && !(xyzw & 1)) { xPXOR(xmmx, xmmx); }
else if (vfLoadReg == 33) loadIreg (xmmx, xyzw);
else if (vfLoadReg == 32) mVUloadReg(xmmx, ptr[&regs().ACC], xyzw);
else if (vfLoadReg >= 0) mVUloadReg(xmmx, ptr[&getVF(vfLoadReg)], xyzw);
xmmMap[x].VFreg = vfWriteReg;
xmmMap[x].xyzw = xyzw;
}
else { // Reg Will Not Be Modified (always load full reg for caching)
if (vfLoadReg == 33) loadIreg(xmmx, 0xf);
else if (vfLoadReg == 32) xMOVAPS (xmmx, ptr128[&regs().ACC]);
else if (vfLoadReg >= 0) xMOVAPS (xmmx, ptr128[&getVF(vfLoadReg)]);
xmmMap[x].VFreg = vfLoadReg;
xmmMap[x].xyzw = 0;
}
xmmMap[x].count = counter;
xmmMap[x].isNeeded = 1;
return xmmx;
}
};

View File

@ -1,165 +0,0 @@
/* PCSX2 - PS2 Emulator for PCs
* Copyright (C) 2002-2010 PCSX2 Dev Team
*
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
* of the GNU Lesser General Public License as published by the Free Software Found-
* ation, either version 3 of the License, or (at your option) any later version.
*
* PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along with PCSX2.
* If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
int microRegAlloc::findFreeRegRec(int startIdx) {
for (int i = startIdx; i < xmmTotal; i++) {
if (!xmmMap[i].isNeeded) {
int x = findFreeRegRec(i+1);
if (x == -1) return i;
return ((xmmMap[i].count < xmmMap[x].count) ? i : x);
}
}
return -1;
}
int microRegAlloc::findFreeReg() {
for (int i = 0; i < xmmTotal; i++) {
if (!xmmMap[i].isNeeded && (xmmMap[i].VFreg < 0)) {
return i; // Reg is not needed and was a temp reg
}
}
int x = findFreeRegRec(0);
pxAssumeDev( x >= 0, "microVU register allocation failure!" );
return x;
}
microRegAlloc::microRegAlloc(microVU* _mVU) {
mVU = _mVU;
}
void microRegAlloc::clearReg(int regId) {
microMapXMM& clear( xmmMap[regId] );
clear.VFreg = -1;
clear.count = 0;
clear.xyzw = 0;
clear.isNeeded = 0;
}
void microRegAlloc::writeBackReg(const xmm& reg, bool invalidateRegs) {
microMapXMM& write( xmmMap[reg.Id] );
if ((write.VFreg > 0) && write.xyzw) { // Reg was modified and not Temp or vf0
if (write.VFreg == 33) xMOVSS(ptr32[&mVU->getVI(REG_I)], reg);
else if (write.VFreg == 32) mVUsaveReg(reg, ptr[&mVU->regs().ACC], write.xyzw, 1);
else mVUsaveReg(reg, ptr[&mVU->getVF(write.VFreg)], write.xyzw, 1);
if (invalidateRegs) {
for (int i = 0; i < xmmTotal; i++) {
microMapXMM& imap (xmmMap[i]);
if ((i == reg.Id) || imap.isNeeded) continue;
if (imap.VFreg == write.VFreg) {
if (imap.xyzw && imap.xyzw < 0xf) DevCon.Error("microVU Error: writeBackReg() [%d]", imap.VFreg);
clearReg(i); // Invalidate any Cached Regs of same vf Reg
}
}
}
if (write.xyzw == 0xf) { // Make Cached Reg if All Vectors were Modified
write.count = counter;
write.xyzw = 0;
write.isNeeded = 0;
return;
}
}
clearReg(reg); // Clear Reg
}
void microRegAlloc::clearNeeded(const xmm& reg)
{
if ((reg.Id < 0) || (reg.Id >= xmmTotal)) return;
microMapXMM& clear (xmmMap[reg.Id]);
clear.isNeeded = 0;
if (clear.xyzw) { // Reg was modified
if (clear.VFreg > 0) {
int mergeRegs = 0;
if (clear.xyzw < 0xf) { mergeRegs = 1; } // Try to merge partial writes
for (int i = 0; i < xmmTotal; i++) { // Invalidate any other read-only regs of same vfReg
if (i == reg.Id) continue;
microMapXMM& imap (xmmMap[i]);
if (imap.VFreg == clear.VFreg) {
if (imap.xyzw && imap.xyzw < 0xf) DevCon.Error("microVU Error: clearNeeded() [%d]", imap.VFreg);
if (mergeRegs == 1) {
mVUmergeRegs(xmm(i), reg, clear.xyzw, 1);
imap.xyzw = 0xf;
imap.count = counter;
mergeRegs = 2;
}
else clearReg(i);
}
}
if (mergeRegs == 2) clearReg(reg); // Clear Current Reg if Merged
else if (mergeRegs) writeBackReg(reg); // Write Back Partial Writes if couldn't merge
}
else clearReg(reg); // If Reg was temp or vf0, then invalidate itself
}
}
const xmm& microRegAlloc::allocReg(int vfLoadReg, int vfWriteReg, int xyzw, bool cloneWrite) {
counter++;
if (vfLoadReg >= 0) { // Search For Cached Regs
for (int i = 0; i < xmmTotal; i++) {
const xmm& xmmi(xmm::GetInstance(i));
microMapXMM& imap (xmmMap[i]);
if ((imap.VFreg == vfLoadReg) && (!imap.xyzw // Reg Was Not Modified
|| (imap.VFreg && (imap.xyzw==0xf)))) { // Reg Had All Vectors Modified and != VF0
int z = i;
if (vfWriteReg >= 0) { // Reg will be modified
if (cloneWrite) { // Clone Reg so as not to use the same Cached Reg
z = findFreeReg();
const xmm& xmmz(xmm::GetInstance(z));
writeBackReg(xmmz);
if (z!=i && xyzw==8) xMOVAPS (xmmz, xmmi);
else if (xyzw == 4) xPSHUF.D(xmmz, xmmi, 1);
else if (xyzw == 2) xPSHUF.D(xmmz, xmmi, 2);
else if (xyzw == 1) xPSHUF.D(xmmz, xmmi, 3);
else if (z != i) xMOVAPS (xmmz, xmmi);
imap.count = counter; // Reg i was used, so update counter
}
else { // Don't clone reg, but shuffle to adjust for SS ops
if ((vfLoadReg != vfWriteReg) || (xyzw != 0xf)) { writeBackReg(xmmi); }
if (xyzw == 4) xPSHUF.D(xmmi, xmmi, 1);
else if (xyzw == 2) xPSHUF.D(xmmi, xmmi, 2);
else if (xyzw == 1) xPSHUF.D(xmmi, xmmi, 3);
}
xmmMap[z].VFreg = vfWriteReg;
xmmMap[z].xyzw = xyzw;
}
xmmMap[z].count = counter;
xmmMap[z].isNeeded = 1;
return xmm::GetInstance(z);
}
}
}
int x = findFreeReg();
const xmm& xmmx = xmm::GetInstance(x);
writeBackReg(xmmx);
if (vfWriteReg >= 0) { // Reg Will Be Modified (allow partial reg loading)
if ((vfLoadReg == 0) && !(xyzw & 1)) { xPXOR(xmmx, xmmx); }
else if (vfLoadReg == 33) mVU->loadIreg(xmmx, xyzw);
else if (vfLoadReg == 32) mVUloadReg (xmmx, ptr[&mVU->regs().ACC], xyzw);
else if (vfLoadReg >= 0) mVUloadReg (xmmx, ptr[&mVU->getVF(vfLoadReg)], xyzw);
xmmMap[x].VFreg = vfWriteReg;
xmmMap[x].xyzw = xyzw;
}
else { // Reg Will Not Be Modified (always load full reg for caching)
if (vfLoadReg == 33) mVU->loadIreg(xmmx, 0xf);
else if (vfLoadReg == 32) xMOVAPS(xmmx, ptr128[&mVU->regs().ACC]);
else if (vfLoadReg >= 0) xMOVAPS(xmmx, ptr128[&mVU->getVF(vfLoadReg)]);
xmmMap[x].VFreg = vfLoadReg;
xmmMap[x].xyzw = 0;
}
xmmMap[x].count = counter;
xmmMap[x].isNeeded = 1;
return xmmx;
}

View File

@ -248,12 +248,14 @@ typedef u32 (__fastcall *mVUCall)(void*, void*);
//------------------------------------------------------------------
// Reg Alloc
#define doRegAlloc 1 // Set to 0 to flush every 64bit Instruction
static const bool doRegAlloc = 1; // Set to 0 to flush every 32bit Instruction
// This turns off reg alloc for the most part, but reg alloc will still
// be done between Upper/Lower and within instructions...
// be done within instructions... Also on doSwapOp() regAlloc is needed between
// Lower and Upper instructions, so in this case it flushes after the full
// 64bit instruction (lower and upper)
// No Flag Optimizations
#define noFlagOpts 0 // Set to 1 to disable all flag setting optimizations
static const bool noFlagOpts = 0; // Set to 1 to disable all flag setting optimizations
// Note: The flag optimizations this disables should all be harmless, so
// this option is mainly just for debugging... it effectively forces mVU
// to always update Mac and Status Flags (both sticky and non-sticky) whenever
@ -261,7 +263,7 @@ typedef u32 (__fastcall *mVUCall)(void*, void*);
// flag instances between blocks...
// Constant Propagation
#define CHECK_VU_CONSTPROP 0 // Set to 1 to turn on vi15 const propagation
static const bool doConstProp = 0; // Set to 1 to turn on vi15 const propagation
// Enables Constant Propagation for Jumps based on vi15 'link-register'
// allowing us to know many indirect jump target addresses.
// Makes GoW a lot slower due to extra recompilation time and extra code-gen!