- Tried a complex optimization with status flag updating.
Didn't seem to work out as well as I thought.

I need some benchmarks of mVU (w/o hacks) before this update and after this update to see how effective it is.
If its not much faster I might just revert the change (because I'm sure no-one is going to understand how it works this way).

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1300 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
cottonvibes 2009-06-01 01:21:01 +00:00
parent e85124dff5
commit ebff9ec1c5
11 changed files with 134 additions and 128 deletions

View File

@ -98,6 +98,7 @@ struct microProgManager {
#define mVUcacheSize (0x2000000 / ((vuIndex) ? 1 : 4))
struct microVU {
PCSX2_ALIGNED16(u32 statusFlag[4]); // 4 instances of status flag (used in execution)
PCSX2_ALIGNED16(u32 macFlag[4]); // 4 instances of mac flag (used in execution)
PCSX2_ALIGNED16(u32 clipFlag[4]); // 4 instances of clip flag (used in execution)
PCSX2_ALIGNED16(u32 xmmPQb[4]); // Backup for xmmPQ

View File

@ -634,28 +634,13 @@ microVUt(void) mVUallocFMAC26b(mV, int& ACCw, int& ACCr) {
// Flag Allocators
//------------------------------------------------------------------
#define getFlagReg(regX, fInst) { \
switch (fInst) { \
case 0: regX = gprF0; break; \
case 1: regX = gprF1; break; \
case 2: regX = gprF2; break; \
case 3: regX = gprF3; break; \
default: \
Console::Error("microVU: Flag Instance Error (fInst = %d)", params fInst); \
regX = gprF0; \
break; \
} \
microVUt(void) mVUallocSFLAGa(mV, int reg, int fInstance) {
MOVZX32M16toR(reg, (uptr)&mVU->statusFlag[fInstance]);
}
microVUt(void) mVUallocSFLAGa(int reg, int fInstance) {
getFlagReg(fInstance, fInstance);
MOVZX32R16toR(reg, fInstance);
}
microVUt(void) mVUallocSFLAGb(int reg, int fInstance) {
getFlagReg(fInstance, fInstance);
microVUt(void) mVUallocSFLAGb(mV, int reg, int fInstance) {
//AND32ItoR(reg, 0xffff);
MOV32RtoR(fInstance, reg);
MOV32RtoM((uptr)&mVU->statusFlag[fInstance], reg);
}
microVUt(void) mVUallocMFLAGa(mV, int reg, int fInstance) {

View File

@ -62,7 +62,8 @@
}
microVUt(void) mVUanalyzeFMAC1(mV, int Fd, int Fs, int Ft) {
sFLAG.doFlag = 1;
mVUup.doFlags = 1;
sFLAG.doSticky = 1;
analyzeReg1(Fs);
analyzeReg1(Ft);
analyzeReg2(Fd, 0);
@ -91,7 +92,8 @@ microVUt(void) mVUanalyzeFMAC2(mV, int Fs, int Ft) {
}
microVUt(void) mVUanalyzeFMAC3(mV, int Fd, int Fs, int Ft) {
sFLAG.doFlag = 1;
mVUup.doFlags = 1;
sFLAG.doSticky = 1;
analyzeReg1(Fs);
analyzeReg3(Ft);
analyzeReg2(Fd, 0);
@ -269,6 +271,15 @@ microVUt(void) mVUanalyzeR2(mV, int Ft, bool canBeNOP) {
// Sflag - Status Flag Opcodes
//------------------------------------------------------------------
#define setFlagInst(xDoFlag) { \
int curPC = iPC; \
for (int i = mVUcount, j = 0; i > 0; i--, j++) { \
incPC2(-2); \
if (mVUup.doFlags) { xDoFlag = 1; if (j >= 3) { break; } } \
} \
iPC = curPC; \
}
microVUt(void) mVUanalyzeSflag(mV, int It) {
if (!It) { mVUlow.isNOP = 1; }
else {
@ -279,17 +290,15 @@ microVUt(void) mVUanalyzeSflag(mV, int It) {
// Note: useSflag is used for status flag optimizations when a FSSET instruction is called.
// Do to stalls, it can only be set one instruction prior to the status flag read instruction
// if we were guaranteed no-stalls were to happen, it could be set 4 instruction prior.
setFlagInst(sFLAG.doFlag);
}
analyzeVIreg3(It, 1);
}
microVUt(void) mVUanalyzeFSSET(mV) {
mVUinfo.swapOps = 1;
mVUlow.isFSSET = 1;
// mVUinfo &= ~_doStatus;
// Note: I'm not entirely sure if the non-sticky flags
// should be taken from the current upper instruction
// or if they should be taken from the previous instruction
// Uncomment the above line if the latter-case is true
sFLAG.doSticky = 0;
}
//------------------------------------------------------------------
@ -301,12 +310,7 @@ microVUt(void) mVUanalyzeMflag(mV, int Is, int It) {
else { // Need set _doMac for 4 previous Ops (need to do all 4 because stalls could change the result needed)
mVUinfo.swapOps = 1;
if (mVUcount < 4) { mVUpBlock->pState.needExactMatch |= 0xf << (/*mVUcount +*/ 4); }
int curPC = iPC;
for (int i = mVUcount, j = 0; i > 0; i--, j++) {
incPC2(-2);
if (sFLAG.doFlag) { mFLAG.doFlag = 1; if (j >= 3) { break; } }
}
iPC = curPC;
setFlagInst(mFLAG.doFlag);
}
analyzeVIreg1(Is);
analyzeVIreg3(It, 1);

View File

@ -59,7 +59,7 @@
#define tCycles(dest, src) { dest = aMax(dest, src); }
#define incP() { mVU->p = (mVU->p+1) & 1; }
#define incQ() { mVU->q = (mVU->q+1) & 1; }
#define doUpperOp() { mVUopU(mVU, 1); mVUdivSet(mVU); }
#define doUpperOp() { mVUdivSet(mVU); mVUopU(mVU, 1); }
#define doLowerOp() { incPC(-1); mVUopL(mVU, 1); incPC(1); }
#define doIbit() { if (mVUup.iBit) { incPC(-1); MOV32ItoM((uptr)&mVU->regs->VI[REG_I].UL, curI); incPC(1); } }
@ -169,8 +169,7 @@ microVUt(void) mVUendProgram(mV, int qInst, int pInst, int fStatus, int fMac, in
// Save Flag Instances
if (!mVUflagHack) {
getFlagReg(fStatus, fStatus);
MOV32RtoM((uptr)&mVU->regs->VI[REG_STATUS_FLAG].UL, fStatus);
MOV32RtoM((uptr)&mVU->regs->VI[REG_STATUS_FLAG].UL, gprST);
}
mVUallocMFLAGa(mVU, gprT1, fMac);
mVUallocCFLAGa(mVU, gprT2, fClip);
@ -200,7 +199,6 @@ microVUt(void) mVUtestCycles(mV) {
MOV32ItoR(gprT2, xPC);
if (!isVU1) CALLFunc((uptr)mVUwarning0);
else CALLFunc((uptr)mVUwarning1);
MOV32ItoR(gprR, Roffset); // Restore gprR
mVUendProgram(mVU, 0, 0, sI, 0, cI);
x86SetJ8(jmp8);
}
@ -322,7 +320,7 @@ microVUf(void*) __fastcall mVUcompile(u32 startPC, uptr pState) {
mVUbackupRegs(mVU);
MOV32MtoR(gprT2, (uptr)&mVU->branch); // Get startPC (ECX first argument for __fastcall)
MOV32ItoR(gprR, (u32)&pBlock->pStateEnd); // Get pState (EDX second argument for __fastcall)
MOV32ItoR(gprT3, (u32)&pBlock->pStateEnd); // Get pState (EDX second argument for __fastcall)
if (!isVU1) CALLFunc((uptr)mVUcompileVU0); //(u32 startPC, uptr pState)
else CALLFunc((uptr)mVUcompileVU1);
@ -375,12 +373,12 @@ eBitTemination:
memset(&mVUinfo, 0, sizeof(mVUinfo));
incCycles(100); // Ensures Valid P/Q instances (And sets all cycle data to 0)
mVUcycles -= 100;
if (mVUinfo.doDivFlag) {
/*if (mVUinfo.doDivFlag) {
int flagReg;
getFlagReg(flagReg, lStatus);
AND32ItoR (flagReg, 0x0fcf);
OR32MtoR (flagReg, (uptr)&mVU->divFlag);
}
}*/
if (mVUinfo.doXGKICK) { mVU_XGKICK_DELAY(mVU, 1); }
// Do E-bit end stuff here

View File

@ -41,11 +41,11 @@ microVUt(void) mVUdispatcherA(mV) {
// Load Regs
MOV32ItoR(gprR, Roffset); // Load VI Reg Offset
MOV32MtoR(gprF0, (uptr)&mVU->regs->VI[REG_STATUS_FLAG].UL);
AND32ItoR(gprF0, 0xffff);
MOV32RtoR(gprF1, gprF0);
MOV32RtoR(gprF2, gprF0);
MOV32RtoR(gprF3, gprF0);
MOV32MtoR(gprST, (uptr)&mVU->regs->VI[REG_STATUS_FLAG].UL);
/*AND32ItoR(gprST, 0xffff);
MOV32RtoR(gprF1, gprST);
MOV32RtoR(gprF2, gprST);
MOV32RtoR(gprF3, gprST);*/
SSE_MOVAPS_M128_to_XMM(xmmT1, (uptr)&mVU->regs->VI[REG_MAC_FLAG].UL);
SSE_SHUFPS_XMM_to_XMM (xmmT1, xmmT1, 0);

View File

@ -20,12 +20,9 @@
// Sets FDIV Flags at the proper time
microVUt(void) mVUdivSet(mV) {
int flagReg1, flagReg2;
if (mVUinfo.doDivFlag) {
getFlagReg(flagReg1, sFLAG.write);
if (!sFLAG.doFlag) { getFlagReg(flagReg2, sFLAG.lastWrite); MOV32RtoR(flagReg1, flagReg2); }
AND32ItoR(flagReg1, 0x0fcf);
OR32MtoR (flagReg1, (uptr)&mVU->divFlag);
AND32ItoR(gprST, 0xfffcffff); // Clear D/I bits
OR32MtoR (gprST, (uptr)&mVU->divFlag); // Set DS/IS/D/I bits
}
}
@ -34,18 +31,19 @@ microVUt(void) mVUstatusFlagOp(mV) {
int curPC = iPC;
int i = mVUcount;
bool runLoop = 1;
if (sFLAG.doFlag) { mVUlow.useSflag = 1; }
if (mVUup.doFlags) { mVUlow.useSflag = 1; }
else {
for (; i > 0; i--) {
incPC2(-2);
if (mVUlow.useSflag) { runLoop = 0; break; }
if (sFLAG.doFlag) { mVUlow.useSflag = 1; break; }
if (mVUup.doFlags) { mVUlow.useSflag = 1; break; }
}
}
if (runLoop) {
for (; i > 0; i--) {
incPC2(-2);
if (mVUlow.useSflag) break;
sFLAG.doSticky = 0;
sFLAG.doFlag = 0;
}
}
@ -79,7 +77,11 @@ microVUt(int) mVUsetFlags(mV, int* xStatus, int* xMac, int* xClip) {
// Ensure last ~4+ instructions update mac flags (if next block's first 4 instructions will read them)
for (int i = mVUcount; i > 0; i--, aCount++) {
if (sFLAG.doFlag) { if (__Mac) { mFLAG.doFlag = 1; } if (aCount >= 4) { break; } }
if (mVUup.doFlags) {
if (__Status) { sFLAG.doFlag = 1; }
if (__Mac) { mFLAG.doFlag = 1; }
if (aCount >= 4) { break; }
}
incPC2(-2);
}
@ -142,11 +144,11 @@ microVUt(int) mVUsetFlags(mV, int* xStatus, int* xMac, int* xClip) {
incPC2(2);
}
mVUregs.flags = ((__Clip) ? 0 : (xC << 2)) | ((__Status) ? 0 : xS);
mVUregs.flags = ((__Clip) ? 0 : (xC << 2)) /*| ((__Status) ? 0 : xS)*/;
return cycles;
}
#define getFlagReg1(x) ((x == 3) ? gprF3 : ((x == 2) ? gprF2 : ((x == 1) ? gprF1 : gprF0)))
#define shuffleStatus ((bStatus[3]<<6)|(bStatus[2]<<4)|(bStatus[1]<<2)|bStatus[0])
#define shuffleMac ((bMac [3]<<6)|(bMac [2]<<4)|(bMac [1]<<2)|bMac [0])
#define shuffleClip ((bClip [3]<<6)|(bClip [2]<<4)|(bClip [1]<<2)|bClip [0])
@ -156,14 +158,9 @@ microVUt(void) mVUsetupFlags(mV, int* xStatus, int* xMac, int* xClip, int cycles
if (__Status && !mVUflagHack) {
int bStatus[4];
sortFlag(xStatus, bStatus, cycles);
MOV32RtoR(gprT1, getFlagReg1(bStatus[0]));
MOV32RtoR(gprT2, getFlagReg1(bStatus[1]));
MOV32RtoR(gprR, getFlagReg1(bStatus[2]));
MOV32RtoR(gprF3, getFlagReg1(bStatus[3]));
MOV32RtoR(gprF0, gprT1);
MOV32RtoR(gprF1, gprT2);
MOV32RtoR(gprF2, gprR);
MOV32ItoR(gprR, Roffset); // Restore gprR
SSE_MOVAPS_M128_to_XMM(xmmT1, (uptr)mVU->statusFlag);
SSE_SHUFPS_XMM_to_XMM (xmmT1, xmmT1, shuffleStatus);
SSE_MOVAPS_XMM_to_M128((uptr)mVU->statusFlag, xmmT1);
}
if (__Mac) {

View File

@ -68,6 +68,7 @@ struct microBlock {
struct microUpperOp {
bool eBit; // Has E-bit set
bool iBit; // Has I-bit set
bool doFlags; // This instruction updates Status/Mac Flags
};
struct microLowerOp {
@ -83,7 +84,8 @@ struct microLowerOp {
};
struct microFlagInst {
bool doFlag; // Update Flag on this Instruction
bool doSticky; // Update Sticky Flags (Status Flag Only)
bool doFlag; // Update Flag on this Instruction (For Status Flag, this means non-sticky bits)
u8 write; // Points to the instance that should be written to (s-stage write)
u8 lastWrite; // Points to the instance that was last written to (most up-to-date flag)
u8 read; // Points to the instance that should be read by a lower instruction (t-stage read)

View File

@ -37,7 +37,7 @@
SSE_MOVMSKPS_XMM_to_R32(gprTemp, xmmReg); \
TEST32ItoR(gprTemp, 1); /* Check sign bit */ \
aJump = JZ8(0); /* Skip if positive */ \
MOV32ItoM((uptr)&mVU->divFlag, 0x410); /* Set Invalid Flags */ \
MOV32ItoM((uptr)&mVU->divFlag, 0x410000); /* Set Invalid Flags */ \
SSE_ANDPS_M128_to_XMM(xmmReg, (uptr)mVU_absclip); /* Abs(xmmReg) */ \
x86SetJ8(aJump); \
}
@ -54,10 +54,10 @@ mVUop(mVU_DIV) {
testZero(xmmFs, xmmT1, gprT1); // Test if Fs is zero
ajmp = JZ8(0);
MOV32ItoM((uptr)&mVU->divFlag, 0x410); // Set invalid flag (0/0)
MOV32ItoM((uptr)&mVU->divFlag, 0x410000); // Set invalid flag (0/0)
bjmp = JMP8(0);
x86SetJ8(ajmp);
MOV32ItoM((uptr)&mVU->divFlag, 0x820); // Zero divide (only when not 0/0)
MOV32ItoM((uptr)&mVU->divFlag, 0x820000); // Zero divide (only when not 0/0)
x86SetJ8(bjmp);
SSE_XORPS_XMM_to_XMM(xmmFs, xmmFt);
@ -112,10 +112,10 @@ mVUop(mVU_RSQRT) {
testZero(xmmFs, xmmT1, gprT1); // Test if Fs is zero
bjmp = JZ8(0); // Skip if none are
MOV32ItoM((uptr)&mVU->divFlag, 0x410); // Set invalid flag (0/0)
MOV32ItoM((uptr)&mVU->divFlag, 0x410000); // Set invalid flag (0/0)
cjmp = JMP8(0);
x86SetJ8(bjmp);
MOV32ItoM((uptr)&mVU->divFlag, 0x820); // Zero divide flag (only when not 0/0)
MOV32ItoM((uptr)&mVU->divFlag, 0x820000); // Zero divide flag (only when not 0/0)
x86SetJ8(cjmp);
SSE_ANDPS_M128_to_XMM(xmmFs, (uptr)mVU_signbit);
@ -522,7 +522,7 @@ mVUop(mVU_FMOR) {
mVUop(mVU_FSAND) {
pass1 { mVUanalyzeSflag(mVU, _It_); }
pass2 {
mVUallocSFLAGa(gprT1, sFLAG.read);
mVUallocSFLAGa(mVU, gprT1, sFLAG.read);
AND16ItoR(gprT1, _Imm12_);
mVUallocVIb(mVU, gprT1, _It_);
}
@ -533,7 +533,7 @@ mVUop(mVU_FSAND) {
mVUop(mVU_FSEQ) {
pass1 { mVUanalyzeSflag(mVU, _It_); }
pass2 {
mVUallocSFLAGa(gprT1, sFLAG.read);
mVUallocSFLAGa(mVU, gprT1, sFLAG.read);
XOR16ItoR(gprT1, _Imm12_);
SUB16ItoR(gprT1, 1);
SHR16ItoR(gprT1, 15);
@ -546,7 +546,7 @@ mVUop(mVU_FSEQ) {
mVUop(mVU_FSOR) {
pass1 { mVUanalyzeSflag(mVU, _It_); }
pass2 {
mVUallocSFLAGa(gprT1, sFLAG.read);
mVUallocSFLAGa(mVU, gprT1, sFLAG.read);
OR16ItoR(gprT1, _Imm12_);
mVUallocVIb(mVU, gprT1, _It_);
}
@ -557,11 +557,13 @@ mVUop(mVU_FSOR) {
mVUop(mVU_FSSET) {
pass1 { mVUanalyzeFSSET(mVU); }
pass2 {
int flagReg1, flagReg2;
getFlagReg(flagReg1, sFLAG.write);
if (!(sFLAG.doFlag||mVUinfo.doDivFlag)) { getFlagReg(flagReg2, sFLAG.lastWrite); MOV32RtoR(flagReg1, flagReg2); } // Get status result from last status setting instruction
AND32ItoR(flagReg1, 0x03f);
OR32ItoR (flagReg1, (_Imm12_ & 0xfc0));
int mask;
if (_Imm12_ & 0x800) mask |= 0x800000;
if (_Imm12_ & 0x400) mask |= 0x400000;
if (_Imm12_ & 0x080) mask |= 0x0000f0;
if (_Imm12_ & 0xc40) mask |= 0x00000f;
AND32ItoR(gprST, 0x30000);
if (mask) OR32ItoR(gprST, mask);
}
pass3 { mVUlog("FSSET $%x", _Imm12_); }
pass4 { mVUsFlagHack = 0; }
@ -966,23 +968,22 @@ mVUop(mVU_RNEXT) {
pass1 { mVUanalyzeR2(mVU, _Ft_, 0); }
pass2 {
// algorithm from www.project-fao.org
MOV32MtoR(gprR, Rmem);
MOV32RtoR(gprT1, gprR);
MOV32MtoR(gprT3, Rmem);
MOV32RtoR(gprT1, gprT3);
SHR32ItoR(gprT1, 4);
AND32ItoR(gprT1, 1);
MOV32RtoR(gprT2, gprR);
MOV32RtoR(gprT2, gprT3);
SHR32ItoR(gprT2, 22);
AND32ItoR(gprT2, 1);
SHL32ItoR(gprR, 1);
SHL32ItoR(gprT3, 1);
XOR32RtoR(gprT1, gprT2);
XOR32RtoR(gprR, gprT1);
AND32ItoR(gprR, 0x007fffff);
OR32ItoR (gprR, 0x3f800000);
MOV32RtoM(Rmem, gprR);
mVU_RGET_(mVU, gprR);
MOV32ItoR(gprR, Roffset); // Restore gprR
XOR32RtoR(gprT3, gprT1);
AND32ItoR(gprT3, 0x007fffff);
OR32ItoR (gprT3, 0x3f800000);
MOV32RtoM(Rmem, gprT3);
mVU_RGET_(mVU, gprT3);
}
pass3 { mVUlog("RNEXT.%s vf%02d, R", _XYZW_String, _Ft_); }
}

View File

@ -131,12 +131,22 @@ declareAllVariables
#define gprT1 0 // Temp Reg
#define gprT2 1 // Temp Reg
#define gprR 2 // VI Reg Offset
#define gprF0 3 // Status Flag 0
#define gprT3 2 // Temp Reg
#define gprESP 4 // Don't use?
#define gprF1 5 // Status Flag 1
#define gprF2 6 // Status Flag 2
#define gprF3 7 // Status Flag 3
#define gprT4 5 // Temp?
#define gprT5 6 // Temp?
#define gprR 7 // VI Reg Offset
#define gprST 3 // Status Sticky Flag
// gprST's Info is Stored as follows:
//-----------------------------------------------------------------------------
//|23 22 21 20||19 18 17 16||15 14 13 12||11 10 09 08||07 06 05 04||03 02 01 00|
//|DS|IS| || | D| I|| OS || US || SS || ZS |
//-----------------------------------------------------------------------------
// Storing Flags this way eliminates Jumps when updating sticky flags.
//
// When a Status Flag will be read, gprST is attached with
// the current status flag result in mVUupdateFlags. And the complete
// Status flag instance is stored in memory (mVU->statusFlag[instance])
// Function Params
#define mP microVU* mVU, int recPass
@ -172,15 +182,13 @@ declareAllVariables
#define mVUregsTemp mVUallocInfo.regsTemp
#define iPC mVUallocInfo.curPC
#define mVUsFlagHack mVUallocInfo.sFlagHack
#define mVUinfo mVUallocInfo.info[iPC / 2]
#define mVUstall mVUinfo.stall
#define mVUup mVUinfo.uOp
#define mVUlow mVUinfo.lOp
#define sFLAG mVUinfo.sFlag
#define mFLAG mVUinfo.mFlag
#define cFLAG mVUinfo.cFlag
#define mVUinfo mVUallocInfo.info[iPC/2] // IR info for current 64bit instruction
#define mVUstall mVUinfo.stall // Stall info for current instruction
#define mVUup mVUinfo.uOp // Upper Instruction Info
#define mVUlow mVUinfo.lOp // Lower Instruction Info
#define sFLAG mVUinfo.sFlag // Status Flag info for cur instruction
#define mFLAG mVUinfo.mFlag // Mac Flag info for cur instruction
#define cFLAG mVUinfo.cFlag // Clip Flag info for cur instruction
#define mVUstartPC mVUallocInfo.startPC
#define mVUflagInfo mVUregs.needExactMatch
#define mVUflagHack (mVUcurProg.sFlagHack)
@ -268,6 +276,5 @@ declareAllVariables
MOV32ItoR(gprT2, xPC); \
if (isEndPC) { CALLFunc((uptr)mVUprintPC2); } \
else { CALLFunc((uptr)mVUprintPC1); } \
MOV32ItoR(gprR, Roffset); \
} \
}

View File

@ -292,7 +292,6 @@ microVUt(void) mVUrestoreRegs(mV) {
SSE_MOVAPS_M128_to_XMM(xmmPQ, (uptr)&mVU->xmmPQb[0]);
SSE_MOVAPS_M128_to_XMM(xmmMax, (uptr)mVU_maxvals);
SSE_MOVAPS_M128_to_XMM(xmmMin, (uptr)mVU_minvals);
MOV32ItoR(gprR, Roffset); // Restore gprR
}
// Reads entire microProgram and finds out if Status Flag is Used

View File

@ -28,21 +28,16 @@
// Note: If modXYZW is true, then it adjusts XYZW for Single Scalar operations
microVUt(void) mVUupdateFlags(mV, int reg, int regT1, int regT2, int xyzw, bool modXYZW) {
int sReg, mReg = gprT1;
int sReg = gprT3, mReg = gprT1;
static u8 *pjmp, *pjmp2;
static const u16 flipMask[16] = {0, 8, 4, 12, 2, 10, 6, 14, 1, 9, 5, 13, 3, 11, 7, 15};
//SysPrintf("Status = %d; Mac = %d\n", sFLAG.doFlag, mFLAG.doFlag);
if (mVUsFlagHack) { sFLAG.doFlag = 0; }
if (!sFLAG.doFlag && !mFLAG.doFlag) { return; }
if (mVUsFlagHack) { sFLAG.doSticky = 0; sFLAG.doFlag = 0; }
if (!mVUup.doFlags || (!sFLAG.doSticky && !sFLAG.doFlag && !mFLAG.doFlag)) { return; }
if (!mFLAG.doFlag || (_XYZW_SS && modXYZW)) { regT1 = reg; }
else { SSE2_PSHUFD_XMM_to_XMM(regT1, reg, 0x1B); } // Flip wzyx to xyzw
if (sFLAG.doFlag) {
getFlagReg(sReg, sFLAG.write); // Set sReg to valid GPR by Cur Flag Instance
mVUallocSFLAGa(sReg, sFLAG.lastWrite); // Get Prev Status Flag
AND32ItoR(sReg, 0xff0); // Keep Sticky and D/I flags
}
if (sFLAG.doFlag) { XOR32RtoR(sReg, sReg); }
//-------------------------Check for Signed flags------------------------------
// The following code makes sure the Signed Bit isn't set with Negative Zero
@ -55,7 +50,7 @@ microVUt(void) mVUupdateFlags(mV, int reg, int regT1, int regT2, int xyzw, bool
AND32ItoR(mReg, AND_XYZW); // Grab "Is Signed" bits from the previous calculation
if (sFLAG.doFlag) pjmp = JZ8(0); // Skip if none are
if (mFLAG.doFlag) SHL32ItoR(mReg, 4 + ADD_XYZW);
if (mFLAG.doFlag || sFLAG.doSticky) SHL32ItoR(mReg, 4 + ADD_XYZW);
if (sFLAG.doFlag) OR32ItoR (sReg, 0x82); // SS, S flags
if (sFLAG.doFlag && _XYZW_SS) pjmp2 = JMP8(0); // If negative and not Zero, we can skip the Zero Flag checking
if (sFLAG.doFlag) x86SetJ8(pjmp);
@ -65,6 +60,7 @@ microVUt(void) mVUupdateFlags(mV, int reg, int regT1, int regT2, int xyzw, bool
AND32ItoR(gprT2, AND_XYZW); // Grab "Is Zero" bits from the previous calculation
if (sFLAG.doFlag) pjmp = JZ8(0); // Skip if none are
if (mFLAG.doFlag) { SHIFT_XYZW(gprT2); OR32RtoR(mReg, gprT2); }
if (sFLAG.doSticky && !mFLAG.doFlag) { OR32RtoR(mReg, gprT2); }
if (sFLAG.doFlag) { OR32ItoR(sReg, 0x41); } // ZS, Z flags
if (sFLAG.doFlag) x86SetJ8(pjmp);
@ -72,7 +68,23 @@ microVUt(void) mVUupdateFlags(mV, int reg, int regT1, int regT2, int xyzw, bool
if (sFLAG.doFlag && _XYZW_SS) x86SetJ8(pjmp2); // If we skipped the Zero Flag Checking, return here
if (sFLAG.doSticky) OR32RtoR(gprST, mReg); // Set Sticky Register (gprST)
if (mFLAG.doFlag) mVUallocMFLAGb(mVU, mReg, mFLAG.write); // Set Mac Flag
if (sFLAG.doFlag) { // Attach Sticky Register With sReg
TEST32ItoR(gprST, 0x0f);
pjmp = JZ8(0); // Set Z bit?
OR32ItoR(sReg, 0x40);
x86SetJ8(pjmp);
TEST32ItoR(gprST, 0xf0);
pjmp = JZ8(0); // Set S bit?
OR32ItoR(sReg, 0x80);
x86SetJ8(pjmp);
MOV32RtoR(mReg, gprST); // Backup gprST
AND32ItoR(mReg, 0xc30000); // Get D/I Bits
SHR32ItoR(mReg, 12); // Shift D/I Bits to proper position
OR32RtoR (sReg, mReg); // Set D/I Bits
mVUallocSFLAGb(mVU, sReg, sFLAG.write); // Set Status Flag
}
}
//------------------------------------------------------------------
@ -438,9 +450,9 @@ microVUt(void) mVUupdateFlags(mV, int reg, int regT1, int regT2, int xyzw, bool
}
// FMAC27~29 - MAX/MINI FMAC Opcodes
#define mVU_FMAC27(operation, OPname) { mVU_FMAC1 (operation, OPname); pass1 { sFLAG.doFlag = 0; } }
#define mVU_FMAC28(operation, OPname) { mVU_FMAC6 (operation, OPname); pass1 { sFLAG.doFlag = 0; } }
#define mVU_FMAC29(operation, OPname) { mVU_FMAC3 (operation, OPname); pass1 { sFLAG.doFlag = 0; } }
#define mVU_FMAC27(operation, OPname) { mVU_FMAC1 (operation, OPname); pass1 { mVUup.doFlags = 0; } }
#define mVU_FMAC28(operation, OPname) { mVU_FMAC6 (operation, OPname); pass1 { mVUup.doFlags = 0; } }
#define mVU_FMAC29(operation, OPname) { mVU_FMAC3 (operation, OPname); pass1 { mVUup.doFlags = 0; } }
//------------------------------------------------------------------
// Micro VU Micromode Upper instructions