mirror of
https://github.com/libretro/pcsx2.git
synced 2024-12-23 02:08:27 +00:00
microVU:
- Tried a complex optimization with status flag updating. Didn't seem to work out as well as I thought. I need some benchmarks of mVU (w/o hacks) before this update and after this update to see how effective it is. If its not much faster I might just revert the change (because I'm sure no-one is going to understand how it works this way). git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1300 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
e85124dff5
commit
ebff9ec1c5
@ -98,9 +98,10 @@ struct microProgManager {
|
||||
#define mVUcacheSize (0x2000000 / ((vuIndex) ? 1 : 4))
|
||||
struct microVU {
|
||||
|
||||
PCSX2_ALIGNED16(u32 macFlag[4]); // 4 instances of mac flag (used in execution)
|
||||
PCSX2_ALIGNED16(u32 clipFlag[4]); // 4 instances of clip flag (used in execution)
|
||||
PCSX2_ALIGNED16(u32 xmmPQb[4]); // Backup for xmmPQ
|
||||
PCSX2_ALIGNED16(u32 statusFlag[4]); // 4 instances of status flag (used in execution)
|
||||
PCSX2_ALIGNED16(u32 macFlag[4]); // 4 instances of mac flag (used in execution)
|
||||
PCSX2_ALIGNED16(u32 clipFlag[4]); // 4 instances of clip flag (used in execution)
|
||||
PCSX2_ALIGNED16(u32 xmmPQb[4]); // Backup for xmmPQ
|
||||
|
||||
u32 index; // VU Index (VU0 or VU1)
|
||||
u32 microSize; // VU Micro Memory Size
|
||||
|
@ -634,28 +634,13 @@ microVUt(void) mVUallocFMAC26b(mV, int& ACCw, int& ACCr) {
|
||||
// Flag Allocators
|
||||
//------------------------------------------------------------------
|
||||
|
||||
#define getFlagReg(regX, fInst) { \
|
||||
switch (fInst) { \
|
||||
case 0: regX = gprF0; break; \
|
||||
case 1: regX = gprF1; break; \
|
||||
case 2: regX = gprF2; break; \
|
||||
case 3: regX = gprF3; break; \
|
||||
default: \
|
||||
Console::Error("microVU: Flag Instance Error (fInst = %d)", params fInst); \
|
||||
regX = gprF0; \
|
||||
break; \
|
||||
} \
|
||||
microVUt(void) mVUallocSFLAGa(mV, int reg, int fInstance) {
|
||||
MOVZX32M16toR(reg, (uptr)&mVU->statusFlag[fInstance]);
|
||||
}
|
||||
|
||||
microVUt(void) mVUallocSFLAGa(int reg, int fInstance) {
|
||||
getFlagReg(fInstance, fInstance);
|
||||
MOVZX32R16toR(reg, fInstance);
|
||||
}
|
||||
|
||||
microVUt(void) mVUallocSFLAGb(int reg, int fInstance) {
|
||||
getFlagReg(fInstance, fInstance);
|
||||
microVUt(void) mVUallocSFLAGb(mV, int reg, int fInstance) {
|
||||
//AND32ItoR(reg, 0xffff);
|
||||
MOV32RtoR(fInstance, reg);
|
||||
MOV32RtoM((uptr)&mVU->statusFlag[fInstance], reg);
|
||||
}
|
||||
|
||||
microVUt(void) mVUallocMFLAGa(mV, int reg, int fInstance) {
|
||||
|
@ -62,7 +62,8 @@
|
||||
}
|
||||
|
||||
microVUt(void) mVUanalyzeFMAC1(mV, int Fd, int Fs, int Ft) {
|
||||
sFLAG.doFlag = 1;
|
||||
mVUup.doFlags = 1;
|
||||
sFLAG.doSticky = 1;
|
||||
analyzeReg1(Fs);
|
||||
analyzeReg1(Ft);
|
||||
analyzeReg2(Fd, 0);
|
||||
@ -91,7 +92,8 @@ microVUt(void) mVUanalyzeFMAC2(mV, int Fs, int Ft) {
|
||||
}
|
||||
|
||||
microVUt(void) mVUanalyzeFMAC3(mV, int Fd, int Fs, int Ft) {
|
||||
sFLAG.doFlag = 1;
|
||||
mVUup.doFlags = 1;
|
||||
sFLAG.doSticky = 1;
|
||||
analyzeReg1(Fs);
|
||||
analyzeReg3(Ft);
|
||||
analyzeReg2(Fd, 0);
|
||||
@ -269,6 +271,15 @@ microVUt(void) mVUanalyzeR2(mV, int Ft, bool canBeNOP) {
|
||||
// Sflag - Status Flag Opcodes
|
||||
//------------------------------------------------------------------
|
||||
|
||||
#define setFlagInst(xDoFlag) { \
|
||||
int curPC = iPC; \
|
||||
for (int i = mVUcount, j = 0; i > 0; i--, j++) { \
|
||||
incPC2(-2); \
|
||||
if (mVUup.doFlags) { xDoFlag = 1; if (j >= 3) { break; } } \
|
||||
} \
|
||||
iPC = curPC; \
|
||||
}
|
||||
|
||||
microVUt(void) mVUanalyzeSflag(mV, int It) {
|
||||
if (!It) { mVUlow.isNOP = 1; }
|
||||
else {
|
||||
@ -279,17 +290,15 @@ microVUt(void) mVUanalyzeSflag(mV, int It) {
|
||||
// Note: useSflag is used for status flag optimizations when a FSSET instruction is called.
|
||||
// Do to stalls, it can only be set one instruction prior to the status flag read instruction
|
||||
// if we were guaranteed no-stalls were to happen, it could be set 4 instruction prior.
|
||||
setFlagInst(sFLAG.doFlag);
|
||||
}
|
||||
analyzeVIreg3(It, 1);
|
||||
}
|
||||
|
||||
microVUt(void) mVUanalyzeFSSET(mV) {
|
||||
mVUlow.isFSSET = 1;
|
||||
// mVUinfo &= ~_doStatus;
|
||||
// Note: I'm not entirely sure if the non-sticky flags
|
||||
// should be taken from the current upper instruction
|
||||
// or if they should be taken from the previous instruction
|
||||
// Uncomment the above line if the latter-case is true
|
||||
mVUinfo.swapOps = 1;
|
||||
mVUlow.isFSSET = 1;
|
||||
sFLAG.doSticky = 0;
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------
|
||||
@ -301,12 +310,7 @@ microVUt(void) mVUanalyzeMflag(mV, int Is, int It) {
|
||||
else { // Need set _doMac for 4 previous Ops (need to do all 4 because stalls could change the result needed)
|
||||
mVUinfo.swapOps = 1;
|
||||
if (mVUcount < 4) { mVUpBlock->pState.needExactMatch |= 0xf << (/*mVUcount +*/ 4); }
|
||||
int curPC = iPC;
|
||||
for (int i = mVUcount, j = 0; i > 0; i--, j++) {
|
||||
incPC2(-2);
|
||||
if (sFLAG.doFlag) { mFLAG.doFlag = 1; if (j >= 3) { break; } }
|
||||
}
|
||||
iPC = curPC;
|
||||
setFlagInst(mFLAG.doFlag);
|
||||
}
|
||||
analyzeVIreg1(Is);
|
||||
analyzeVIreg3(It, 1);
|
||||
|
@ -59,7 +59,7 @@
|
||||
#define tCycles(dest, src) { dest = aMax(dest, src); }
|
||||
#define incP() { mVU->p = (mVU->p+1) & 1; }
|
||||
#define incQ() { mVU->q = (mVU->q+1) & 1; }
|
||||
#define doUpperOp() { mVUopU(mVU, 1); mVUdivSet(mVU); }
|
||||
#define doUpperOp() { mVUdivSet(mVU); mVUopU(mVU, 1); }
|
||||
#define doLowerOp() { incPC(-1); mVUopL(mVU, 1); incPC(1); }
|
||||
#define doIbit() { if (mVUup.iBit) { incPC(-1); MOV32ItoM((uptr)&mVU->regs->VI[REG_I].UL, curI); incPC(1); } }
|
||||
|
||||
@ -169,8 +169,7 @@ microVUt(void) mVUendProgram(mV, int qInst, int pInst, int fStatus, int fMac, in
|
||||
|
||||
// Save Flag Instances
|
||||
if (!mVUflagHack) {
|
||||
getFlagReg(fStatus, fStatus);
|
||||
MOV32RtoM((uptr)&mVU->regs->VI[REG_STATUS_FLAG].UL, fStatus);
|
||||
MOV32RtoM((uptr)&mVU->regs->VI[REG_STATUS_FLAG].UL, gprST);
|
||||
}
|
||||
mVUallocMFLAGa(mVU, gprT1, fMac);
|
||||
mVUallocCFLAGa(mVU, gprT2, fClip);
|
||||
@ -200,7 +199,6 @@ microVUt(void) mVUtestCycles(mV) {
|
||||
MOV32ItoR(gprT2, xPC);
|
||||
if (!isVU1) CALLFunc((uptr)mVUwarning0);
|
||||
else CALLFunc((uptr)mVUwarning1);
|
||||
MOV32ItoR(gprR, Roffset); // Restore gprR
|
||||
mVUendProgram(mVU, 0, 0, sI, 0, cI);
|
||||
x86SetJ8(jmp8);
|
||||
}
|
||||
@ -322,7 +320,7 @@ microVUf(void*) __fastcall mVUcompile(u32 startPC, uptr pState) {
|
||||
|
||||
mVUbackupRegs(mVU);
|
||||
MOV32MtoR(gprT2, (uptr)&mVU->branch); // Get startPC (ECX first argument for __fastcall)
|
||||
MOV32ItoR(gprR, (u32)&pBlock->pStateEnd); // Get pState (EDX second argument for __fastcall)
|
||||
MOV32ItoR(gprT3, (u32)&pBlock->pStateEnd); // Get pState (EDX second argument for __fastcall)
|
||||
|
||||
if (!isVU1) CALLFunc((uptr)mVUcompileVU0); //(u32 startPC, uptr pState)
|
||||
else CALLFunc((uptr)mVUcompileVU1);
|
||||
@ -375,12 +373,12 @@ eBitTemination:
|
||||
memset(&mVUinfo, 0, sizeof(mVUinfo));
|
||||
incCycles(100); // Ensures Valid P/Q instances (And sets all cycle data to 0)
|
||||
mVUcycles -= 100;
|
||||
if (mVUinfo.doDivFlag) {
|
||||
/*if (mVUinfo.doDivFlag) {
|
||||
int flagReg;
|
||||
getFlagReg(flagReg, lStatus);
|
||||
AND32ItoR (flagReg, 0x0fcf);
|
||||
OR32MtoR (flagReg, (uptr)&mVU->divFlag);
|
||||
}
|
||||
}*/
|
||||
if (mVUinfo.doXGKICK) { mVU_XGKICK_DELAY(mVU, 1); }
|
||||
|
||||
// Do E-bit end stuff here
|
||||
|
@ -41,11 +41,11 @@ microVUt(void) mVUdispatcherA(mV) {
|
||||
|
||||
// Load Regs
|
||||
MOV32ItoR(gprR, Roffset); // Load VI Reg Offset
|
||||
MOV32MtoR(gprF0, (uptr)&mVU->regs->VI[REG_STATUS_FLAG].UL);
|
||||
AND32ItoR(gprF0, 0xffff);
|
||||
MOV32RtoR(gprF1, gprF0);
|
||||
MOV32RtoR(gprF2, gprF0);
|
||||
MOV32RtoR(gprF3, gprF0);
|
||||
MOV32MtoR(gprST, (uptr)&mVU->regs->VI[REG_STATUS_FLAG].UL);
|
||||
/*AND32ItoR(gprST, 0xffff);
|
||||
MOV32RtoR(gprF1, gprST);
|
||||
MOV32RtoR(gprF2, gprST);
|
||||
MOV32RtoR(gprF3, gprST);*/
|
||||
|
||||
SSE_MOVAPS_M128_to_XMM(xmmT1, (uptr)&mVU->regs->VI[REG_MAC_FLAG].UL);
|
||||
SSE_SHUFPS_XMM_to_XMM (xmmT1, xmmT1, 0);
|
||||
|
@ -20,12 +20,9 @@
|
||||
|
||||
// Sets FDIV Flags at the proper time
|
||||
microVUt(void) mVUdivSet(mV) {
|
||||
int flagReg1, flagReg2;
|
||||
if (mVUinfo.doDivFlag) {
|
||||
getFlagReg(flagReg1, sFLAG.write);
|
||||
if (!sFLAG.doFlag) { getFlagReg(flagReg2, sFLAG.lastWrite); MOV32RtoR(flagReg1, flagReg2); }
|
||||
AND32ItoR(flagReg1, 0x0fcf);
|
||||
OR32MtoR (flagReg1, (uptr)&mVU->divFlag);
|
||||
AND32ItoR(gprST, 0xfffcffff); // Clear D/I bits
|
||||
OR32MtoR (gprST, (uptr)&mVU->divFlag); // Set DS/IS/D/I bits
|
||||
}
|
||||
}
|
||||
|
||||
@ -34,18 +31,19 @@ microVUt(void) mVUstatusFlagOp(mV) {
|
||||
int curPC = iPC;
|
||||
int i = mVUcount;
|
||||
bool runLoop = 1;
|
||||
if (sFLAG.doFlag) { mVUlow.useSflag = 1; }
|
||||
if (mVUup.doFlags) { mVUlow.useSflag = 1; }
|
||||
else {
|
||||
for (; i > 0; i--) {
|
||||
incPC2(-2);
|
||||
if (mVUlow.useSflag) { runLoop = 0; break; }
|
||||
if (sFLAG.doFlag) { mVUlow.useSflag = 1; break; }
|
||||
if (mVUup.doFlags) { mVUlow.useSflag = 1; break; }
|
||||
}
|
||||
}
|
||||
if (runLoop) {
|
||||
for (; i > 0; i--) {
|
||||
incPC2(-2);
|
||||
if (mVUlow.useSflag) break;
|
||||
sFLAG.doSticky = 0;
|
||||
sFLAG.doFlag = 0;
|
||||
}
|
||||
}
|
||||
@ -79,7 +77,11 @@ microVUt(int) mVUsetFlags(mV, int* xStatus, int* xMac, int* xClip) {
|
||||
|
||||
// Ensure last ~4+ instructions update mac flags (if next block's first 4 instructions will read them)
|
||||
for (int i = mVUcount; i > 0; i--, aCount++) {
|
||||
if (sFLAG.doFlag) { if (__Mac) { mFLAG.doFlag = 1; } if (aCount >= 4) { break; } }
|
||||
if (mVUup.doFlags) {
|
||||
if (__Status) { sFLAG.doFlag = 1; }
|
||||
if (__Mac) { mFLAG.doFlag = 1; }
|
||||
if (aCount >= 4) { break; }
|
||||
}
|
||||
incPC2(-2);
|
||||
}
|
||||
|
||||
@ -142,13 +144,13 @@ microVUt(int) mVUsetFlags(mV, int* xStatus, int* xMac, int* xClip) {
|
||||
incPC2(2);
|
||||
}
|
||||
|
||||
mVUregs.flags = ((__Clip) ? 0 : (xC << 2)) | ((__Status) ? 0 : xS);
|
||||
mVUregs.flags = ((__Clip) ? 0 : (xC << 2)) /*| ((__Status) ? 0 : xS)*/;
|
||||
return cycles;
|
||||
}
|
||||
|
||||
#define getFlagReg1(x) ((x == 3) ? gprF3 : ((x == 2) ? gprF2 : ((x == 1) ? gprF1 : gprF0)))
|
||||
#define shuffleMac ((bMac [3]<<6)|(bMac [2]<<4)|(bMac [1]<<2)|bMac [0])
|
||||
#define shuffleClip ((bClip[3]<<6)|(bClip[2]<<4)|(bClip[1]<<2)|bClip[0])
|
||||
#define shuffleStatus ((bStatus[3]<<6)|(bStatus[2]<<4)|(bStatus[1]<<2)|bStatus[0])
|
||||
#define shuffleMac ((bMac [3]<<6)|(bMac [2]<<4)|(bMac [1]<<2)|bMac [0])
|
||||
#define shuffleClip ((bClip [3]<<6)|(bClip [2]<<4)|(bClip [1]<<2)|bClip [0])
|
||||
|
||||
// Recompiles Code for Proper Flags on Block Linkings
|
||||
microVUt(void) mVUsetupFlags(mV, int* xStatus, int* xMac, int* xClip, int cycles) {
|
||||
@ -156,14 +158,9 @@ microVUt(void) mVUsetupFlags(mV, int* xStatus, int* xMac, int* xClip, int cycles
|
||||
if (__Status && !mVUflagHack) {
|
||||
int bStatus[4];
|
||||
sortFlag(xStatus, bStatus, cycles);
|
||||
MOV32RtoR(gprT1, getFlagReg1(bStatus[0]));
|
||||
MOV32RtoR(gprT2, getFlagReg1(bStatus[1]));
|
||||
MOV32RtoR(gprR, getFlagReg1(bStatus[2]));
|
||||
MOV32RtoR(gprF3, getFlagReg1(bStatus[3]));
|
||||
MOV32RtoR(gprF0, gprT1);
|
||||
MOV32RtoR(gprF1, gprT2);
|
||||
MOV32RtoR(gprF2, gprR);
|
||||
MOV32ItoR(gprR, Roffset); // Restore gprR
|
||||
SSE_MOVAPS_M128_to_XMM(xmmT1, (uptr)mVU->statusFlag);
|
||||
SSE_SHUFPS_XMM_to_XMM (xmmT1, xmmT1, shuffleStatus);
|
||||
SSE_MOVAPS_XMM_to_M128((uptr)mVU->statusFlag, xmmT1);
|
||||
}
|
||||
|
||||
if (__Mac) {
|
||||
|
@ -68,6 +68,7 @@ struct microBlock {
|
||||
struct microUpperOp {
|
||||
bool eBit; // Has E-bit set
|
||||
bool iBit; // Has I-bit set
|
||||
bool doFlags; // This instruction updates Status/Mac Flags
|
||||
};
|
||||
|
||||
struct microLowerOp {
|
||||
@ -83,7 +84,8 @@ struct microLowerOp {
|
||||
};
|
||||
|
||||
struct microFlagInst {
|
||||
bool doFlag; // Update Flag on this Instruction
|
||||
bool doSticky; // Update Sticky Flags (Status Flag Only)
|
||||
bool doFlag; // Update Flag on this Instruction (For Status Flag, this means non-sticky bits)
|
||||
u8 write; // Points to the instance that should be written to (s-stage write)
|
||||
u8 lastWrite; // Points to the instance that was last written to (most up-to-date flag)
|
||||
u8 read; // Points to the instance that should be read by a lower instruction (t-stage read)
|
||||
|
@ -37,7 +37,7 @@
|
||||
SSE_MOVMSKPS_XMM_to_R32(gprTemp, xmmReg); \
|
||||
TEST32ItoR(gprTemp, 1); /* Check sign bit */ \
|
||||
aJump = JZ8(0); /* Skip if positive */ \
|
||||
MOV32ItoM((uptr)&mVU->divFlag, 0x410); /* Set Invalid Flags */ \
|
||||
MOV32ItoM((uptr)&mVU->divFlag, 0x410000); /* Set Invalid Flags */ \
|
||||
SSE_ANDPS_M128_to_XMM(xmmReg, (uptr)mVU_absclip); /* Abs(xmmReg) */ \
|
||||
x86SetJ8(aJump); \
|
||||
}
|
||||
@ -54,10 +54,10 @@ mVUop(mVU_DIV) {
|
||||
|
||||
testZero(xmmFs, xmmT1, gprT1); // Test if Fs is zero
|
||||
ajmp = JZ8(0);
|
||||
MOV32ItoM((uptr)&mVU->divFlag, 0x410); // Set invalid flag (0/0)
|
||||
MOV32ItoM((uptr)&mVU->divFlag, 0x410000); // Set invalid flag (0/0)
|
||||
bjmp = JMP8(0);
|
||||
x86SetJ8(ajmp);
|
||||
MOV32ItoM((uptr)&mVU->divFlag, 0x820); // Zero divide (only when not 0/0)
|
||||
MOV32ItoM((uptr)&mVU->divFlag, 0x820000); // Zero divide (only when not 0/0)
|
||||
x86SetJ8(bjmp);
|
||||
|
||||
SSE_XORPS_XMM_to_XMM(xmmFs, xmmFt);
|
||||
@ -112,10 +112,10 @@ mVUop(mVU_RSQRT) {
|
||||
|
||||
testZero(xmmFs, xmmT1, gprT1); // Test if Fs is zero
|
||||
bjmp = JZ8(0); // Skip if none are
|
||||
MOV32ItoM((uptr)&mVU->divFlag, 0x410); // Set invalid flag (0/0)
|
||||
MOV32ItoM((uptr)&mVU->divFlag, 0x410000); // Set invalid flag (0/0)
|
||||
cjmp = JMP8(0);
|
||||
x86SetJ8(bjmp);
|
||||
MOV32ItoM((uptr)&mVU->divFlag, 0x820); // Zero divide flag (only when not 0/0)
|
||||
MOV32ItoM((uptr)&mVU->divFlag, 0x820000); // Zero divide flag (only when not 0/0)
|
||||
x86SetJ8(cjmp);
|
||||
|
||||
SSE_ANDPS_M128_to_XMM(xmmFs, (uptr)mVU_signbit);
|
||||
@ -522,7 +522,7 @@ mVUop(mVU_FMOR) {
|
||||
mVUop(mVU_FSAND) {
|
||||
pass1 { mVUanalyzeSflag(mVU, _It_); }
|
||||
pass2 {
|
||||
mVUallocSFLAGa(gprT1, sFLAG.read);
|
||||
mVUallocSFLAGa(mVU, gprT1, sFLAG.read);
|
||||
AND16ItoR(gprT1, _Imm12_);
|
||||
mVUallocVIb(mVU, gprT1, _It_);
|
||||
}
|
||||
@ -533,7 +533,7 @@ mVUop(mVU_FSAND) {
|
||||
mVUop(mVU_FSEQ) {
|
||||
pass1 { mVUanalyzeSflag(mVU, _It_); }
|
||||
pass2 {
|
||||
mVUallocSFLAGa(gprT1, sFLAG.read);
|
||||
mVUallocSFLAGa(mVU, gprT1, sFLAG.read);
|
||||
XOR16ItoR(gprT1, _Imm12_);
|
||||
SUB16ItoR(gprT1, 1);
|
||||
SHR16ItoR(gprT1, 15);
|
||||
@ -546,7 +546,7 @@ mVUop(mVU_FSEQ) {
|
||||
mVUop(mVU_FSOR) {
|
||||
pass1 { mVUanalyzeSflag(mVU, _It_); }
|
||||
pass2 {
|
||||
mVUallocSFLAGa(gprT1, sFLAG.read);
|
||||
mVUallocSFLAGa(mVU, gprT1, sFLAG.read);
|
||||
OR16ItoR(gprT1, _Imm12_);
|
||||
mVUallocVIb(mVU, gprT1, _It_);
|
||||
}
|
||||
@ -557,11 +557,13 @@ mVUop(mVU_FSOR) {
|
||||
mVUop(mVU_FSSET) {
|
||||
pass1 { mVUanalyzeFSSET(mVU); }
|
||||
pass2 {
|
||||
int flagReg1, flagReg2;
|
||||
getFlagReg(flagReg1, sFLAG.write);
|
||||
if (!(sFLAG.doFlag||mVUinfo.doDivFlag)) { getFlagReg(flagReg2, sFLAG.lastWrite); MOV32RtoR(flagReg1, flagReg2); } // Get status result from last status setting instruction
|
||||
AND32ItoR(flagReg1, 0x03f);
|
||||
OR32ItoR (flagReg1, (_Imm12_ & 0xfc0));
|
||||
int mask;
|
||||
if (_Imm12_ & 0x800) mask |= 0x800000;
|
||||
if (_Imm12_ & 0x400) mask |= 0x400000;
|
||||
if (_Imm12_ & 0x080) mask |= 0x0000f0;
|
||||
if (_Imm12_ & 0xc40) mask |= 0x00000f;
|
||||
AND32ItoR(gprST, 0x30000);
|
||||
if (mask) OR32ItoR(gprST, mask);
|
||||
}
|
||||
pass3 { mVUlog("FSSET $%x", _Imm12_); }
|
||||
pass4 { mVUsFlagHack = 0; }
|
||||
@ -966,23 +968,22 @@ mVUop(mVU_RNEXT) {
|
||||
pass1 { mVUanalyzeR2(mVU, _Ft_, 0); }
|
||||
pass2 {
|
||||
// algorithm from www.project-fao.org
|
||||
MOV32MtoR(gprR, Rmem);
|
||||
MOV32RtoR(gprT1, gprR);
|
||||
MOV32MtoR(gprT3, Rmem);
|
||||
MOV32RtoR(gprT1, gprT3);
|
||||
SHR32ItoR(gprT1, 4);
|
||||
AND32ItoR(gprT1, 1);
|
||||
|
||||
MOV32RtoR(gprT2, gprR);
|
||||
MOV32RtoR(gprT2, gprT3);
|
||||
SHR32ItoR(gprT2, 22);
|
||||
AND32ItoR(gprT2, 1);
|
||||
|
||||
SHL32ItoR(gprR, 1);
|
||||
SHL32ItoR(gprT3, 1);
|
||||
XOR32RtoR(gprT1, gprT2);
|
||||
XOR32RtoR(gprR, gprT1);
|
||||
AND32ItoR(gprR, 0x007fffff);
|
||||
OR32ItoR (gprR, 0x3f800000);
|
||||
MOV32RtoM(Rmem, gprR);
|
||||
mVU_RGET_(mVU, gprR);
|
||||
MOV32ItoR(gprR, Roffset); // Restore gprR
|
||||
XOR32RtoR(gprT3, gprT1);
|
||||
AND32ItoR(gprT3, 0x007fffff);
|
||||
OR32ItoR (gprT3, 0x3f800000);
|
||||
MOV32RtoM(Rmem, gprT3);
|
||||
mVU_RGET_(mVU, gprT3);
|
||||
}
|
||||
pass3 { mVUlog("RNEXT.%s vf%02d, R", _XYZW_String, _Ft_); }
|
||||
}
|
||||
|
@ -131,12 +131,22 @@ declareAllVariables
|
||||
|
||||
#define gprT1 0 // Temp Reg
|
||||
#define gprT2 1 // Temp Reg
|
||||
#define gprR 2 // VI Reg Offset
|
||||
#define gprF0 3 // Status Flag 0
|
||||
#define gprT3 2 // Temp Reg
|
||||
#define gprESP 4 // Don't use?
|
||||
#define gprF1 5 // Status Flag 1
|
||||
#define gprF2 6 // Status Flag 2
|
||||
#define gprF3 7 // Status Flag 3
|
||||
#define gprT4 5 // Temp?
|
||||
#define gprT5 6 // Temp?
|
||||
#define gprR 7 // VI Reg Offset
|
||||
#define gprST 3 // Status Sticky Flag
|
||||
// gprST's Info is Stored as follows:
|
||||
//-----------------------------------------------------------------------------
|
||||
//|23 22 21 20||19 18 17 16||15 14 13 12||11 10 09 08||07 06 05 04||03 02 01 00|
|
||||
//|DS|IS| || | D| I|| OS || US || SS || ZS |
|
||||
//-----------------------------------------------------------------------------
|
||||
// Storing Flags this way eliminates Jumps when updating sticky flags.
|
||||
//
|
||||
// When a Status Flag will be read, gprST is attached with
|
||||
// the current status flag result in mVUupdateFlags. And the complete
|
||||
// Status flag instance is stored in memory (mVU->statusFlag[instance])
|
||||
|
||||
// Function Params
|
||||
#define mP microVU* mVU, int recPass
|
||||
@ -172,15 +182,13 @@ declareAllVariables
|
||||
#define mVUregsTemp mVUallocInfo.regsTemp
|
||||
#define iPC mVUallocInfo.curPC
|
||||
#define mVUsFlagHack mVUallocInfo.sFlagHack
|
||||
|
||||
#define mVUinfo mVUallocInfo.info[iPC / 2]
|
||||
#define mVUstall mVUinfo.stall
|
||||
#define mVUup mVUinfo.uOp
|
||||
#define mVUlow mVUinfo.lOp
|
||||
#define sFLAG mVUinfo.sFlag
|
||||
#define mFLAG mVUinfo.mFlag
|
||||
#define cFLAG mVUinfo.cFlag
|
||||
|
||||
#define mVUinfo mVUallocInfo.info[iPC/2] // IR info for current 64bit instruction
|
||||
#define mVUstall mVUinfo.stall // Stall info for current instruction
|
||||
#define mVUup mVUinfo.uOp // Upper Instruction Info
|
||||
#define mVUlow mVUinfo.lOp // Lower Instruction Info
|
||||
#define sFLAG mVUinfo.sFlag // Status Flag info for cur instruction
|
||||
#define mFLAG mVUinfo.mFlag // Mac Flag info for cur instruction
|
||||
#define cFLAG mVUinfo.cFlag // Clip Flag info for cur instruction
|
||||
#define mVUstartPC mVUallocInfo.startPC
|
||||
#define mVUflagInfo mVUregs.needExactMatch
|
||||
#define mVUflagHack (mVUcurProg.sFlagHack)
|
||||
@ -268,6 +276,5 @@ declareAllVariables
|
||||
MOV32ItoR(gprT2, xPC); \
|
||||
if (isEndPC) { CALLFunc((uptr)mVUprintPC2); } \
|
||||
else { CALLFunc((uptr)mVUprintPC1); } \
|
||||
MOV32ItoR(gprR, Roffset); \
|
||||
} \
|
||||
}
|
||||
|
@ -292,7 +292,6 @@ microVUt(void) mVUrestoreRegs(mV) {
|
||||
SSE_MOVAPS_M128_to_XMM(xmmPQ, (uptr)&mVU->xmmPQb[0]);
|
||||
SSE_MOVAPS_M128_to_XMM(xmmMax, (uptr)mVU_maxvals);
|
||||
SSE_MOVAPS_M128_to_XMM(xmmMin, (uptr)mVU_minvals);
|
||||
MOV32ItoR(gprR, Roffset); // Restore gprR
|
||||
}
|
||||
|
||||
// Reads entire microProgram and finds out if Status Flag is Used
|
||||
|
@ -28,21 +28,16 @@
|
||||
|
||||
// Note: If modXYZW is true, then it adjusts XYZW for Single Scalar operations
|
||||
microVUt(void) mVUupdateFlags(mV, int reg, int regT1, int regT2, int xyzw, bool modXYZW) {
|
||||
int sReg, mReg = gprT1;
|
||||
int sReg = gprT3, mReg = gprT1;
|
||||
static u8 *pjmp, *pjmp2;
|
||||
static const u16 flipMask[16] = {0, 8, 4, 12, 2, 10, 6, 14, 1, 9, 5, 13, 3, 11, 7, 15};
|
||||
|
||||
//SysPrintf("Status = %d; Mac = %d\n", sFLAG.doFlag, mFLAG.doFlag);
|
||||
if (mVUsFlagHack) { sFLAG.doFlag = 0; }
|
||||
if (!sFLAG.doFlag && !mFLAG.doFlag) { return; }
|
||||
if (!mFLAG.doFlag || (_XYZW_SS && modXYZW)) { regT1 = reg; }
|
||||
if (mVUsFlagHack) { sFLAG.doSticky = 0; sFLAG.doFlag = 0; }
|
||||
if (!mVUup.doFlags || (!sFLAG.doSticky && !sFLAG.doFlag && !mFLAG.doFlag)) { return; }
|
||||
if (!mFLAG.doFlag || (_XYZW_SS && modXYZW)) { regT1 = reg; }
|
||||
else { SSE2_PSHUFD_XMM_to_XMM(regT1, reg, 0x1B); } // Flip wzyx to xyzw
|
||||
if (sFLAG.doFlag) {
|
||||
getFlagReg(sReg, sFLAG.write); // Set sReg to valid GPR by Cur Flag Instance
|
||||
mVUallocSFLAGa(sReg, sFLAG.lastWrite); // Get Prev Status Flag
|
||||
AND32ItoR(sReg, 0xff0); // Keep Sticky and D/I flags
|
||||
}
|
||||
|
||||
if (sFLAG.doFlag) { XOR32RtoR(sReg, sReg); }
|
||||
//-------------------------Check for Signed flags------------------------------
|
||||
|
||||
// The following code makes sure the Signed Bit isn't set with Negative Zero
|
||||
@ -55,24 +50,41 @@ microVUt(void) mVUupdateFlags(mV, int reg, int regT1, int regT2, int xyzw, bool
|
||||
|
||||
AND32ItoR(mReg, AND_XYZW); // Grab "Is Signed" bits from the previous calculation
|
||||
if (sFLAG.doFlag) pjmp = JZ8(0); // Skip if none are
|
||||
if (mFLAG.doFlag) SHL32ItoR(mReg, 4 + ADD_XYZW);
|
||||
if (sFLAG.doFlag) OR32ItoR(sReg, 0x82); // SS, S flags
|
||||
if (sFLAG.doFlag && _XYZW_SS) pjmp2 = JMP8(0); // If negative and not Zero, we can skip the Zero Flag checking
|
||||
if (mFLAG.doFlag || sFLAG.doSticky) SHL32ItoR(mReg, 4 + ADD_XYZW);
|
||||
if (sFLAG.doFlag) OR32ItoR (sReg, 0x82); // SS, S flags
|
||||
if (sFLAG.doFlag && _XYZW_SS) pjmp2 = JMP8(0); // If negative and not Zero, we can skip the Zero Flag checking
|
||||
if (sFLAG.doFlag) x86SetJ8(pjmp);
|
||||
|
||||
//-------------------------Check for Zero flags------------------------------
|
||||
|
||||
AND32ItoR(gprT2, AND_XYZW); // Grab "Is Zero" bits from the previous calculation
|
||||
if (sFLAG.doFlag) pjmp = JZ8(0); // Skip if none are
|
||||
if (mFLAG.doFlag) { SHIFT_XYZW(gprT2); OR32RtoR(mReg, gprT2); }
|
||||
if (sFLAG.doFlag) { OR32ItoR(sReg, 0x41); } // ZS, Z flags
|
||||
if (mFLAG.doFlag) { SHIFT_XYZW(gprT2); OR32RtoR(mReg, gprT2); }
|
||||
if (sFLAG.doSticky && !mFLAG.doFlag) { OR32RtoR(mReg, gprT2); }
|
||||
if (sFLAG.doFlag) { OR32ItoR(sReg, 0x41); } // ZS, Z flags
|
||||
if (sFLAG.doFlag) x86SetJ8(pjmp);
|
||||
|
||||
//-------------------------Write back flags------------------------------
|
||||
|
||||
if (sFLAG.doFlag && _XYZW_SS) x86SetJ8(pjmp2); // If we skipped the Zero Flag Checking, return here
|
||||
|
||||
if (mFLAG.doFlag) mVUallocMFLAGb(mVU, mReg, mFLAG.write); // Set Mac Flag
|
||||
|
||||
if (sFLAG.doSticky) OR32RtoR(gprST, mReg); // Set Sticky Register (gprST)
|
||||
if (mFLAG.doFlag) mVUallocMFLAGb(mVU, mReg, mFLAG.write); // Set Mac Flag
|
||||
if (sFLAG.doFlag) { // Attach Sticky Register With sReg
|
||||
TEST32ItoR(gprST, 0x0f);
|
||||
pjmp = JZ8(0); // Set Z bit?
|
||||
OR32ItoR(sReg, 0x40);
|
||||
x86SetJ8(pjmp);
|
||||
TEST32ItoR(gprST, 0xf0);
|
||||
pjmp = JZ8(0); // Set S bit?
|
||||
OR32ItoR(sReg, 0x80);
|
||||
x86SetJ8(pjmp);
|
||||
MOV32RtoR(mReg, gprST); // Backup gprST
|
||||
AND32ItoR(mReg, 0xc30000); // Get D/I Bits
|
||||
SHR32ItoR(mReg, 12); // Shift D/I Bits to proper position
|
||||
OR32RtoR (sReg, mReg); // Set D/I Bits
|
||||
mVUallocSFLAGb(mVU, sReg, sFLAG.write); // Set Status Flag
|
||||
}
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------
|
||||
@ -438,9 +450,9 @@ microVUt(void) mVUupdateFlags(mV, int reg, int regT1, int regT2, int xyzw, bool
|
||||
}
|
||||
|
||||
// FMAC27~29 - MAX/MINI FMAC Opcodes
|
||||
#define mVU_FMAC27(operation, OPname) { mVU_FMAC1 (operation, OPname); pass1 { sFLAG.doFlag = 0; } }
|
||||
#define mVU_FMAC28(operation, OPname) { mVU_FMAC6 (operation, OPname); pass1 { sFLAG.doFlag = 0; } }
|
||||
#define mVU_FMAC29(operation, OPname) { mVU_FMAC3 (operation, OPname); pass1 { sFLAG.doFlag = 0; } }
|
||||
#define mVU_FMAC27(operation, OPname) { mVU_FMAC1 (operation, OPname); pass1 { mVUup.doFlags = 0; } }
|
||||
#define mVU_FMAC28(operation, OPname) { mVU_FMAC6 (operation, OPname); pass1 { mVUup.doFlags = 0; } }
|
||||
#define mVU_FMAC29(operation, OPname) { mVU_FMAC3 (operation, OPname); pass1 { mVUup.doFlags = 0; } }
|
||||
|
||||
//------------------------------------------------------------------
|
||||
// Micro VU Micromode Upper instructions
|
||||
|
Loading…
Reference in New Issue
Block a user