diff --git a/pcsx2/Gif_Unit.h b/pcsx2/Gif_Unit.h index 8f9df90ec7..fbab6c90ea 100644 --- a/pcsx2/Gif_Unit.h +++ b/pcsx2/Gif_Unit.h @@ -568,8 +568,14 @@ struct Gif_Unit } if (curSize >= size) return size; - if (gifTag.tag.EOP) + if(!EmuConfig.Cpu.Recompiler.EnableVU1 && pathIdx == GIF_PATH_1) + { + return curSize | ((u32)gifTag.tag.EOP << 31); + } + if (gifTag.tag.EOP ) + { return curSize; + } } } diff --git a/pcsx2/VU.h b/pcsx2/VU.h index caef1a9519..1c3e592196 100644 --- a/pcsx2/VU.h +++ b/pcsx2/VU.h @@ -110,6 +110,7 @@ struct fmacPipe { int enable; int reg; + int flagreg; int xyzw; u32 sCycle; u32 Cycle; @@ -171,6 +172,12 @@ struct __aligned16 VURegs u8* Mem; u8* Micro; + u32 xgkickenable; + u32 xgkickaddr; + u32 xgkickdiff; + u32 xgkicksizeremaining; + u32 xgkicklastcycle; + u32 xgkickendpacket; u32 ebit; u8 VIBackupCycles; diff --git a/pcsx2/VU0micro.cpp b/pcsx2/VU0micro.cpp index 64ec95569b..6004812fb0 100644 --- a/pcsx2/VU0micro.cpp +++ b/pcsx2/VU0micro.cpp @@ -44,6 +44,8 @@ void __fastcall vu0ExecMicro(u32 addr) { // Need to copy the clip flag back to the interpreter in case COP2 has edited it VU0.clipflag = VU0.VI[REG_CLIP_FLAG].UL; + VU0.macflag = VU0.VI[REG_MAC_FLAG].UL; + VU0.statusflag = VU0.VI[REG_STATUS_FLAG].UL; VU0.VI[REG_VPU_STAT].UL &= ~0xFF; VU0.VI[REG_VPU_STAT].UL |= 0x01; VU0.cycle = cpuRegs.cycle; diff --git a/pcsx2/VU0microInterp.cpp b/pcsx2/VU0microInterp.cpp index 77ea99e13f..def0bf4584 100644 --- a/pcsx2/VU0microInterp.cpp +++ b/pcsx2/VU0microInterp.cpp @@ -19,6 +19,8 @@ #include "VUmicro.h" +#include + extern void _vuFlushAll(VURegs* VU); static void _vu0ExecUpper(VURegs* VU, u32 *ptr) { @@ -82,6 +84,8 @@ static void _vu0Exec(VURegs* VU) /* check upper flags */ if (ptr[1] & 0x80000000) { /* I flag */ + _vuTestPipes(VU); + _vu0ExecUpper(VU, ptr); VU->VI[REG_I].UL = ptr[0]; @@ -92,7 +96,7 @@ static void _vu0Exec(VURegs* VU) #ifndef INT_VUSTALLHACK _vuTestLowerStalls(VU, &lregs); #endif - + _vuTestPipes(VU); vu0branch = lregs.pipe == VUPIPE_BRANCH; vfreg = 0; vireg = 0; @@ -146,8 +150,6 @@ static void _vu0Exec(VURegs* VU) if (!(ptr[1] & 0x80000000)) _vuAddLowerStalls(VU, &lregs); - _vuTestPipes(VU); - if(VU->VIBackupCycles > 0) VU->VIBackupCycles--; @@ -179,8 +181,8 @@ static void _vu0Exec(VURegs* VU) void vu0Exec(VURegs* VU) { VU0.VI[REG_TPC].UL &= VU0_PROGMASK; - _vu0Exec(VU); VU->cycle++; + _vu0Exec(VU); if (VU->VI[0].UL != 0) DbgCon.Error("VI[0] != 0!!!!\n"); if (VU->VF[0].f.x != 0.0f) DbgCon.Error("VF[0].x != 0.0!!!!\n"); @@ -210,6 +212,9 @@ void InterpVU0::Step() void InterpVU0::Execute(u32 cycles) { + const int originalRounding = fegetround(); + fesetround(g_sseVUMXCSR.RoundingControl << 8); + VU0.VI[REG_TPC].UL <<= 3; VU0.flags &= ~VUFLAG_MFLAGSET; for (int i = (int)cycles; i > 0; i--) { @@ -222,4 +227,6 @@ void InterpVU0::Execute(u32 cycles) vu0Exec(&VU0); } VU0.VI[REG_TPC].UL >>= 3; + + fesetround(originalRounding); } diff --git a/pcsx2/VU1microInterp.cpp b/pcsx2/VU1microInterp.cpp index efc731befa..109f35bd22 100644 --- a/pcsx2/VU1microInterp.cpp +++ b/pcsx2/VU1microInterp.cpp @@ -18,17 +18,24 @@ #include "Common.h" #include "VUmicro.h" +#include "GS.h" +#include "Gif_Unit.h" #include "MTVU.h" -extern void _vuFlushAll(VURegs* VU); +#include -void _vu1ExecUpper(VURegs* VU, u32 *ptr) { +extern void _vuFlushAll(VURegs* VU); +extern void _vuXGKICKFlush(VURegs* VU); + +void _vu1ExecUpper(VURegs* VU, u32* ptr) +{ VU->code = ptr[1]; - //IdebugUPPER(VU1); + IdebugUPPER(VU1); VU1_UPPER_OPCODE[VU->code & 0x3f](); } -void _vu1ExecLower(VURegs* VU, u32 *ptr) { +void _vu1ExecLower(VURegs* VU, u32* ptr) +{ VU->code = ptr[0]; IdebugLOWER(VU1); VU1_LOWER_OPCODE[VU->code >> 25](); @@ -44,78 +51,89 @@ static void _vu1Exec(VURegs* VU) VECTOR _VFc; REG_VI _VI; REG_VI _VIc; - u32 *ptr; + u32* ptr; int vfreg; int vireg; - int discard=0; + int discard = 0; ptr = (u32*)&VU->Micro[VU->VI[REG_TPC].UL]; - VU->VI[REG_TPC].UL+=8; + VU->VI[REG_TPC].UL += 8; - if (ptr[1] & 0x40000000) { /* E flag */ + if (ptr[1] & 0x40000000) + { /* E flag */ VU->ebit = 2; } - if (ptr[1] & 0x10000000) { /* D flag */ - if (VU0.VI[REG_FBRST].UL & 0x400) { - VU0.VI[REG_VPU_STAT].UL|= 0x200; + if (ptr[1] & 0x10000000) + { /* D flag */ + if (VU0.VI[REG_FBRST].UL & 0x400) + { + VU0.VI[REG_VPU_STAT].UL |= 0x200; hwIntcIrq(INTC_VU1); VU->ebit = 1; } - } - if (ptr[1] & 0x08000000) { /* T flag */ - if (VU0.VI[REG_FBRST].UL & 0x800) { - VU0.VI[REG_VPU_STAT].UL|= 0x400; + if (ptr[1] & 0x08000000) + { /* T flag */ + if (VU0.VI[REG_FBRST].UL & 0x800) + { + VU0.VI[REG_VPU_STAT].UL |= 0x400; hwIntcIrq(INTC_VU1); VU->ebit = 1; } - } //VUM_LOG("VU->cycle = %d (flags st=%x;mac=%x;clip=%x,q=%f)", VU->cycle, VU->statusflag, VU->macflag, VU->clipflag, VU->q.F); VU->code = ptr[1]; VU1regs_UPPER_OPCODE[VU->code & 0x3f](&uregs); -#ifndef INT_VUSTALLHACK _vuTestUpperStalls(VU, &uregs); -#endif /* check upper flags */ - if (ptr[1] & 0x80000000) { /* I flag */ + if (ptr[1] & 0x80000000) + { /* I flag */ + _vuTestPipes(VU); + _vu1ExecUpper(VU, ptr); VU->VI[REG_I].UL = ptr[0]; //Lower not used, set to 0 to fill in the FMAC stall gap //Could probably get away with just running upper stalls, but lets not tempt fate. - memset(&lregs, 0, sizeof(lregs)); - } else { + memset(&lregs, 0, sizeof(lregs)); + } + else + { VU->code = ptr[0]; VU1regs_LOWER_OPCODE[VU->code >> 25](&lregs); -#ifndef INT_VUSTALLHACK + _vuTestLowerStalls(VU, &lregs); -#endif + _vuTestPipes(VU); - vu1branch = lregs.pipe == VUPIPE_BRANCH; - - vfreg = 0; vireg = 0; - if (uregs.VFwrite) { - if (lregs.VFwrite == uregs.VFwrite) { -// Console.Warning("*PCSX2*: Warning, VF write to the same reg in both lower/upper cycle"); + vfreg = 0; + vireg = 0; + if (uregs.VFwrite) + { + if (lregs.VFwrite == uregs.VFwrite) + { + //Console.Warning("*PCSX2*: Warning, VF write to the same reg in both lower/upper cycle"); discard = 1; } if (lregs.VFread0 == uregs.VFwrite || - lregs.VFread1 == uregs.VFwrite) { -// Console.WriteLn("saving reg %d at pc=%x", i, VU->VI[REG_TPC].UL); + lregs.VFread1 == uregs.VFwrite) + { + //Console.WriteLn("saving reg %d at pc=%x", i, VU->VI[REG_TPC].UL); _VF = VU->VF[uregs.VFwrite]; vfreg = uregs.VFwrite; } } - if (uregs.VIread & (1 << REG_CLIP_FLAG)) { - if (lregs.VIwrite & (1 << REG_CLIP_FLAG)) { + if (uregs.VIwrite & (1 << REG_CLIP_FLAG)) + { + if (lregs.VIwrite & (1 << REG_CLIP_FLAG)) + { Console.Warning("*PCSX2*: Warning, VI write to the same reg in both lower/upper cycle"); discard = 1; } - if (lregs.VIread & (1 << REG_CLIP_FLAG)) { + if (lregs.VIread & (1 << REG_CLIP_FLAG)) + { _VI = VU->VI[REG_CLIP_FLAG]; vireg = REG_CLIP_FLAG; } @@ -123,69 +141,120 @@ static void _vu1Exec(VURegs* VU) _vu1ExecUpper(VU, ptr); - if (discard == 0) { - if (vfreg) { + if (discard == 0) + { + if (vfreg) + { _VFc = VU->VF[vfreg]; VU->VF[vfreg] = _VF; } - if (vireg) { + if (vireg) + { _VIc = VU->VI[vireg]; VU->VI[vireg] = _VI; } _vu1ExecLower(VU, ptr); - if (vfreg) { + if (vfreg) + { VU->VF[vfreg] = _VFc; } - if (vireg) { + if (vireg) + { VU->VI[vireg] = _VIc; } } } _vuAddUpperStalls(VU, &uregs); - _vuAddLowerStalls(VU, &lregs); - _vuTestPipes(VU); - - if(VU->VIBackupCycles > 0) + //if (!(ptr[1] & 0x80000000)) + _vuAddLowerStalls(VU, &lregs); + + if (VU->VIBackupCycles > 0) VU->VIBackupCycles--; - - if (VU->branch > 0) { - if (VU->branch-- == 1) { + + if (VU->branch > 0) + { + if (VU->branch-- == 1) + { VU->VI[REG_TPC].UL = VU->branchpc; - if(VU->takedelaybranch) - { + if (VU->takedelaybranch) + { VU->branch = 1; - //DevCon.Warning("VU1 - Branch/Jump in Delay Slot"); + //DevCon.Warning("VU1 - Branch/Jump in Delay Slot"); VU->branchpc = VU->delaybranchpc; VU->delaybranchpc = 0; VU->takedelaybranch = false; - } + } } } - if( VU->ebit > 0 ) { - if( VU->ebit-- == 1 ) { + if (VU->ebit > 0) + { + if (VU->ebit-- == 1) + { VU->VIBackupCycles = 0; _vuFlushAll(VU); VU0.VI[REG_VPU_STAT].UL &= ~0x100; vif1Regs.stat.VEW = false; } } + if (VU->xgkickenable && (VU1.cycle - VU->xgkicklastcycle) >= 2) + { + if (VU->xgkicksizeremaining == 0) + { + IPU_LOG("Banana Reading next packet from %x", VU->xgkickaddr); + u32 size = gifUnit.GetGSPacketSize(GIF_PATH_1, VU->Mem, VU->xgkickaddr); + VU->xgkicksizeremaining = size & 0xFFFF; + VU->xgkickendpacket = size >> 31; + if (VU->xgkicksizeremaining == 0) + VU->xgkickenable = 0; + IPU_LOG("Banana New packet size %x", VU->xgkicksizeremaining); + } + u32 transfersize = std::min(VU->xgkicksizeremaining / 0x10, (VU1.cycle - VU->xgkicklastcycle) / 2); + if (transfersize) + { + IPU_LOG("Banana Transferring %x bytes from %x size left %x", transfersize * 0x10, VU->xgkickaddr, VU->xgkicksizeremaining); + if ((transfersize * 0x10) > VU->xgkicksizeremaining) + gifUnit.gifPath[GIF_PATH_1].CopyGSPacketData(&VU->Mem[VU->xgkickaddr], transfersize * 0x10, true); + else + gifUnit.TransferGSPacketData(GIF_TRANS_XGKICK, &VU->Mem[VU->xgkickaddr], transfersize * 0x10, true); + + VU->xgkickaddr = (VU->xgkickaddr + (transfersize * 0x10)) & 0x3FFF; + VU->xgkicksizeremaining -= (transfersize * 0x10); + VU->xgkickdiff = 0x4000 - VU->xgkickaddr; + IPU_LOG("Banana next addr %x left size %x EOP %d", VU->xgkickaddr, VU->xgkicksizeremaining, VU->xgkickendpacket); + VU->xgkicklastcycle += std::max(transfersize * 2, 2U); + + if (VU->xgkicksizeremaining || !VU->xgkickendpacket) + VU->xgkickenable = 1; + else + { + + VU->xgkickenable = 0; + IPU_LOG("Banana transfer finished"); + } + } + } } void vu1Exec(VURegs* VU) { - _vu1Exec(VU); VU->cycle++; + _vu1Exec(VU); - if (VU->VI[0].UL != 0) DbgCon.Error("VI[0] != 0!!!!\n"); - if (VU->VF[0].f.x != 0.0f) DbgCon.Error("VF[0].x != 0.0!!!!\n"); - if (VU->VF[0].f.y != 0.0f) DbgCon.Error("VF[0].y != 0.0!!!!\n"); - if (VU->VF[0].f.z != 0.0f) DbgCon.Error("VF[0].z != 0.0!!!!\n"); - if (VU->VF[0].f.w != 1.0f) DbgCon.Error("VF[0].w != 1.0!!!!\n"); + if (VU->VI[0].UL != 0) + DbgCon.Error("VI[0] != 0!!!!\n"); + if (VU->VF[0].f.x != 0.0f) + DbgCon.Error("VF[0].x != 0.0!!!!\n"); + if (VU->VF[0].f.y != 0.0f) + DbgCon.Error("VF[0].y != 0.0!!!!\n"); + if (VU->VF[0].f.z != 0.0f) + DbgCon.Error("VF[0].z != 0.0!!!!\n"); + if (VU->VF[0].f.w != 1.0f) + DbgCon.Error("VF[0].w != 1.0!!!!\n"); } InterpVU1::InterpVU1() @@ -194,11 +263,13 @@ InterpVU1::InterpVU1() IsInterpreter = true; } -void InterpVU1::Reset() { +void InterpVU1::Reset() +{ vu1Thread.WaitVU(); } -void InterpVU1::Shutdown() noexcept { +void InterpVU1::Shutdown() noexcept +{ vu1Thread.WaitVU(); } @@ -210,15 +281,21 @@ void InterpVU1::SetStartPC(u32 startPC) void InterpVU1::Step() { VU1.VI[REG_TPC].UL &= VU1_PROGMASK; - vu1Exec( &VU1 ); + vu1Exec(&VU1); } void InterpVU1::Execute(u32 cycles) { + const int originalRounding = fegetround(); + fesetround(g_sseVUMXCSR.RoundingControl << 8); + VU1.VI[REG_TPC].UL <<= 3; - for (int i = (int)cycles; i > 0; i--) { - if (!(VU0.VI[REG_VPU_STAT].UL & 0x100)) { - if (VU1.branch || VU1.ebit) { + for (int i = (int)cycles; i > 0; i--) + { + if (!(VU0.VI[REG_VPU_STAT].UL & 0x100)) + { + if (VU1.branch || VU1.ebit) + { Step(); // run branch delay slot? } break; @@ -226,5 +303,6 @@ void InterpVU1::Execute(u32 cycles) Step(); } VU1.VI[REG_TPC].UL >>= 3; -} + fesetround(originalRounding); +} diff --git a/pcsx2/VUflags.cpp b/pcsx2/VUflags.cpp index 5294bfcb85..1b5f6ea7ef 100644 --- a/pcsx2/VUflags.cpp +++ b/pcsx2/VUflags.cpp @@ -25,22 +25,6 @@ /* NEW FLAGS */ //By asadr. Thnkx F|RES :p /*****************************************/ - -void vuUpdateDI(VURegs * VU) { -// u32 Flag_S = 0; -// u32 Flag_I = 0; -// u32 Flag_D = 0; -// -// /* -// FLAG D - I -// */ -// Flag_I = (VU->statusflag >> 4) & 0x1; -// Flag_D = (VU->statusflag >> 5) & 0x1; -// -// VU->statusflag|= (Flag_I | (VU0.VI[REG_STATUS_FLAG].US[0] >> 4)) << 10; -// VU->statusflag|= (Flag_D | (VU0.VI[REG_STATUS_FLAG].US[0] >> 5)) << 11; -} - static __ri u32 VU_MAC_UPDATE( int shift, VURegs * VU, float f ) { u32 v = *(u32*)&f; @@ -64,7 +48,7 @@ static __ri u32 VU_MAC_UPDATE( int shift, VURegs * VU, float f ) VU->macflag = (VU->macflag&~(0x1000<macflag = (VU->macflag&~(0x0100<macflag = (VU->macflag&~(0x0101<macflag = (VU->macflag & ~(0x1101<macflag & 0x00F0) newflag |= 0x2; if (VU->macflag & 0x0F00) newflag |= 0x4; if (VU->macflag & 0xF000) newflag |= 0x8; - VU->statusflag = (VU->statusflag&0xc30)|newflag|((VU->statusflag&0xf)<<6); + // Save old sticky flags and D/I settings, everthing else is the new flags only + VU->statusflag = (VU->statusflag&0xff0)| newflag | (newflag<<6); } diff --git a/pcsx2/VUmicro.cpp b/pcsx2/VUmicro.cpp index ceadf5a155..1346cb56d5 100644 --- a/pcsx2/VUmicro.cpp +++ b/pcsx2/VUmicro.cpp @@ -17,6 +17,8 @@ #include "Common.h" #include "VUmicro.h" #include "MTVU.h" +#include "GS.h" +#include "Gif_Unit.h" // Executes a Block based on EE delta time void BaseVUmicroCPU::ExecuteBlock(bool startUp) @@ -32,7 +34,49 @@ void BaseVUmicroCPU::ExecuteBlock(bool startUp) } if (!(stat & test)) + { + if (m_Idx == 1) + { + if (VU1.xgkickenable && (cpuRegs.cycle - VU1.xgkicklastcycle) >= 2) + { + if (VU1.xgkicksizeremaining == 0) + { + IPU_LOG("Banana Reading next packet from %x", VU1.xgkickaddr); + u32 size = gifUnit.GetGSPacketSize(GIF_PATH_1, VU1.Mem, VU1.xgkickaddr); + VU1.xgkicksizeremaining = size & 0xFFFF; + VU1.xgkickendpacket = size >> 31; + IPU_LOG("Banana New packet size %x", VU1.xgkicksizeremaining); + } + u32 transfersize = std::min(VU1.xgkicksizeremaining / 0x10, (cpuRegs.cycle - VU1.xgkicklastcycle) / 2); + if (transfersize) + { + IPU_LOG("Banana Transferring %x bytes from %x size left %x", transfersize * 0x10, VU1.xgkickaddr, VU1.xgkicksizeremaining); + if ((transfersize * 0x10) > VU1.xgkicksizeremaining) + gifUnit.gifPath[GIF_PATH_1].CopyGSPacketData(&VU1.Mem[VU1.xgkickaddr], transfersize * 0x10, true); + else + gifUnit.TransferGSPacketData(GIF_TRANS_XGKICK, &VU1.Mem[VU1.xgkickaddr], transfersize * 0x10, true); + + VU1.xgkickaddr = (VU1.xgkickaddr + (transfersize * 0x10)) & 0x3FFF; + VU1.xgkicksizeremaining -= (transfersize * 0x10); + VU1.xgkickdiff = 0x4000 - VU1.xgkickaddr; + IPU_LOG("Banana next addr %x left size %x EOP %d", VU1.xgkickaddr, VU1.xgkicksizeremaining, VU1.xgkickendpacket); + VU1.xgkicklastcycle += std::max(transfersize * 2, 2U); + + if (VU1.xgkicksizeremaining || !VU1.xgkickendpacket) + VU1.xgkickenable = 1; + else + { + + VU1.xgkickenable = 0; + IPU_LOG("Banana transfer finished"); + } + } + else + VU1.xgkickenable = 1; + } + } return; + } if (startUp && s) // Start Executing a microprogram (When kickstarted) { diff --git a/pcsx2/VUops.cpp b/pcsx2/VUops.cpp index bdac364857..8f47ea87fc 100644 --- a/pcsx2/VUops.cpp +++ b/pcsx2/VUops.cpp @@ -21,7 +21,7 @@ #include "MTVU.h" #include - +u32 laststall = 0; //Lower/Upper instructions can use that.. #define _Ft_ ((VU->code >> 16) & 0x1F) // The rt part of the instruction register #define _Fs_ ((VU->code >> 11) & 0x1F) // The rd part of the instruction register @@ -47,44 +47,99 @@ static __aligned16 VECTOR RDzero; -static __ri void _vuFMACflush(VURegs * VU) { - int i; +static __ri bool _vuFMACflush(VURegs * VU) { + bool didflush = false; + int startpos = 0; + u32 cycle = 9999; - for (i=0; i<8; i++) { - if (VU->fmac[i].enable == 0) continue; + for (int startpipe = 0; startpipe < 8; startpipe++) + { + if (VU->fmac[startpipe].enable == 0) continue; - if ((VU->cycle - VU->fmac[i].sCycle) >= VU->fmac[i].Cycle) { - VUM_LOG("flushing FMAC pipe[%d] (macflag=%x)", i, VU->fmac[i].macflag); - - VU->fmac[i].enable = 0; - VU->VI[REG_MAC_FLAG].UL = VU->fmac[i].macflag; - VU->VI[REG_STATUS_FLAG].UL = VU->fmac[i].statusflag; - VU->VI[REG_CLIP_FLAG].UL = VU->fmac[i].clipflag; + if ((VU->fmac[startpipe].sCycle + VU->fmac[startpipe].Cycle) <= VU->cycle) + { + if ((VU->cycle - (VU->fmac[startpipe].sCycle + VU->fmac[startpipe].Cycle)) < cycle) + { + VUM_LOG("Setting start pos to %d", startpipe); + cycle = (s32)(VU->cycle - (VU->fmac[startpipe].sCycle + VU->fmac[startpipe].Cycle)); + startpos = startpipe; + } + else + VUM_LOG("Nope for pipe %d, cycle %d greater than %d", startpipe, (s32)(VU->cycle - (VU->fmac[startpipe].sCycle + VU->fmac[startpipe].Cycle)), cycle); } } + + VUM_LOG("Flushing FMAC starting at %d", startpos); + + cycle = VU->fmac[startpos].sCycle + VU->fmac[startpos].Cycle; + + for (int i=0; i<8; i++) { + int currentpipe = (i + startpos) % 8; + + if (VU->fmac[currentpipe].enable == 0) continue; + + if ((VU->cycle - VU->fmac[currentpipe].sCycle) >= VU->fmac[currentpipe].Cycle) { + + + VU->fmac[currentpipe].enable = 0; + + if ((VU->fmac[currentpipe].sCycle + VU->fmac[currentpipe].Cycle) >= cycle) + { + VUM_LOG("Writing back flags"); + if (VU->fmac[currentpipe].flagreg & (1 << REG_STATUS_FLAG)) + { + VUM_LOG("flushing FMAC Status Write pipe[%d] (status=%x) Cur Cycle %d Expected cycle %d", currentpipe, (VU->VI[REG_STATUS_FLAG].UL & 0xF30) | (VU->fmac[currentpipe].statusflag & 0x3CF), VU->cycle, VU->fmac[currentpipe].sCycle + VU->fmac[currentpipe].Cycle); + VU->VI[REG_STATUS_FLAG].UL = (VU->VI[REG_STATUS_FLAG].UL & 0x3F) | (VU->fmac[currentpipe].statusflag & 0xFC0); + } + else if (VU->fmac[currentpipe].flagreg & (1 << REG_CLIP_FLAG)) + { + VUM_LOG("flushing FMAC Clip Write pipe[%d] (clip=%x) Cur Cycle %d Expected cycle %d", currentpipe, VU->fmac[currentpipe].clipflag, VU->cycle, VU->fmac[currentpipe].sCycle + VU->fmac[currentpipe].Cycle); + VU->VI[REG_CLIP_FLAG].UL = VU->fmac[currentpipe].clipflag; + } + else + { + VUM_LOG("flushing FMAC pipe[%d] (macflag=%x status=%x) Cur Cycle %d Expected cycle %d", currentpipe, VU->fmac[currentpipe].macflag, (VU->VI[REG_STATUS_FLAG].UL & 0xF30) | (VU->fmac[currentpipe].statusflag & 0x3CF), VU->cycle, VU->fmac[currentpipe].sCycle + VU->fmac[currentpipe].Cycle); + // FMAC only affectx Z/S/I/O + VU->VI[REG_STATUS_FLAG].UL = (VU->VI[REG_STATUS_FLAG].UL & 0xFF0) | (VU->fmac[currentpipe].statusflag & 0x3CF); + VU->VI[REG_MAC_FLAG].UL = VU->fmac[currentpipe].macflag; + + } + } + didflush = true; + } + } + return didflush; } -static __ri void _vuFDIVflush(VURegs * VU) { - if (VU->fdiv.enable == 0) return; +static __ri bool _vuFDIVflush(VURegs * VU) { + if (VU->fdiv.enable == 0) return false; if ((VU->cycle - VU->fdiv.sCycle) >= VU->fdiv.Cycle) { VUM_LOG("flushing FDIV pipe"); VU->fdiv.enable = 0; VU->VI[REG_Q].UL = VU->fdiv.reg.UL; - VU->VI[REG_STATUS_FLAG].UL = VU->fdiv.statusflag; + // FDIV only affects D/I + VU->VI[REG_STATUS_FLAG].UL = (VU->VI[REG_STATUS_FLAG].UL & 0xFCF) | (VU->fdiv.statusflag & 0xC30); + + return true; } + return false; } -static __ri void _vuEFUflush(VURegs * VU) { - if (VU->efu.enable == 0) return; +static __ri bool _vuEFUflush(VURegs * VU) { + if (VU->efu.enable == 0) return false; if ((VU->cycle - VU->efu.sCycle) >= VU->efu.Cycle) { // VUM_LOG("flushing EFU pipe"); VU->efu.enable = 0; VU->VI[REG_P].UL = VU->efu.reg.UL; + + return true; } + + return false; } // called at end of program @@ -92,21 +147,51 @@ void _vuFlushAll(VURegs* VU) { int nRepeat = 1, i; + u32 startpos = 0; + s32 cycle = 9999; + + // Calculate lowest active FMAC pipe + for (int startpipe = 0; startpipe < 8; startpipe++) + { + if (VU->fmac[startpipe].enable == 0) continue; + + if ((VU->fmac[startpipe].sCycle + VU->fmac[startpipe].Cycle) <= VU->cycle) + { + if ((VU->cycle - (VU->fmac[startpipe].sCycle + VU->fmac[startpipe].Cycle)) < cycle) + { + VUM_LOG("Setting start pos to %d", startpipe); + cycle = (s32)(VU->cycle - (VU->fmac[startpipe].sCycle + VU->fmac[startpipe].Cycle)); + startpos = startpipe; + } + else + VUM_LOG("Nope for pipe %d, cycle %d greater than %d", startpipe, (s32)(VU->cycle - (VU->fmac[startpipe].sCycle + VU->fmac[startpipe].Cycle)), cycle); + } + } + do { nRepeat = 0; for (i=0; i<8; i++) { - if (VU->fmac[i].enable == 0) continue; + int currentpipe = (i + startpos) % 8; + + if (VU->fmac[currentpipe].enable == 0) continue; nRepeat = 1; - if ((VU->cycle - VU->fmac[i].sCycle) >= VU->fmac[i].Cycle) { - VUM_LOG("flushing FMAC pipe[%d] (macflag=%x)", i, VU->fmac[i].macflag); + if ((VU->cycle - VU->fmac[currentpipe].sCycle) >= VU->fmac[currentpipe].Cycle) { + VUM_LOG("flushing FMAC pipe[%d] (macflag=%x)", i, VU->fmac[currentpipe].macflag); - VU->fmac[i].enable = 0; - VU->VI[REG_MAC_FLAG].UL = VU->fmac[i].macflag; - VU->VI[REG_STATUS_FLAG].UL = VU->fmac[i].statusflag; - VU->VI[REG_CLIP_FLAG].UL = VU->fmac[i].clipflag; + VU->fmac[currentpipe].enable = 0; + if (VU->fmac[currentpipe].flagreg & (1 << REG_STATUS_FLAG)) + VU->VI[REG_STATUS_FLAG].UL = (VU->VI[REG_STATUS_FLAG].UL & 0x3F) | (VU->fmac[currentpipe].statusflag & 0xFC0); + else if (VU->fmac[currentpipe].flagreg & (1 << REG_CLIP_FLAG)) + VU->VI[REG_CLIP_FLAG].UL = VU->fmac[currentpipe].clipflag; + else + { + // FMAC only affectx Z/S/I/O + VU->VI[REG_STATUS_FLAG].UL = (VU->VI[REG_STATUS_FLAG].UL & 0xFF0) | (VU->fmac[currentpipe].statusflag & 0x3CF); + VU->VI[REG_MAC_FLAG].UL = VU->fmac[currentpipe].macflag; + } } } @@ -119,7 +204,7 @@ void _vuFlushAll(VURegs* VU) VU->fdiv.enable = 0; VU->VI[REG_Q].UL = VU->fdiv.reg.UL; - VU->VI[REG_STATUS_FLAG].UL = VU->fdiv.statusflag; + VU->VI[REG_STATUS_FLAG].UL = (VU->VI[REG_STATUS_FLAG].UL & 0xFCF) | (VU->fdiv.statusflag & 0xC30); } } @@ -141,9 +226,13 @@ void _vuFlushAll(VURegs* VU) } __fi void _vuTestPipes(VURegs * VU) { - _vuFMACflush(VU); - _vuFDIVflush(VU); - _vuEFUflush(VU); + bool flushed; + do { + flushed = false; + flushed |= _vuFMACflush(VU); + flushed |= _vuFDIVflush(VU); + flushed |= _vuEFUflush(VU); + } while (flushed == true); } static void __fastcall _vuFMACTestStall(VURegs * VU, int reg, int xyzw) { @@ -151,26 +240,25 @@ static void __fastcall _vuFMACTestStall(VURegs * VU, int reg, int xyzw) { for (i=0; i<8; i++) { if (VU->fmac[i].enable == 0) continue; + if ((VU->cycle - VU->fmac[i].sCycle) >= VU->fmac[i].Cycle) continue; if (VU->fmac[i].reg == reg && VU->fmac[i].xyzw & xyzw) break; } if (i == 8) return; - VU->fmac[i].enable = 0; - VU->VI[REG_MAC_FLAG].UL = VU->fmac[i].macflag; - VU->VI[REG_STATUS_FLAG].UL = VU->fmac[i].statusflag; - VU->VI[REG_CLIP_FLAG].UL = VU->fmac[i].clipflag; - u32 newCycle = VU->fmac[i].Cycle + VU->fmac[i].sCycle + 1; // HACK: add 1 delay (fixes segaclassics bad geom) + u32 newCycle = VU->fmac[i].Cycle + VU->fmac[i].sCycle; + VUM_LOG("FMAC[%d] stall %d", i, newCycle - VU->cycle); + if(newCycle > VU->cycle) + VU->cycle = newCycle; - VU->cycle = newCycle; - _vuTestPipes(VU); } - -static __ri void __fastcall _vuFMACAdd(VURegs * VU, int reg, int xyzw) { +u32 regcycle = 0; +static __ri void __fastcall _vuFMACAdd(VURegs * VU, _VURegsNum* VUregsn) { int i; + //If it's an FMAC which doesn't modify FMAC flags, just exit, no need to queue /* find a free fmac pipe */ for (i=0; i<8; i++) { if (VU->fmac[i].enable == 1) continue; @@ -178,18 +266,18 @@ static __ri void __fastcall _vuFMACAdd(VURegs * VU, int reg, int xyzw) { } if (i < 8) { - VUM_LOG("adding FMAC pipe[%d]; xyzw=%x", i, xyzw); - + VUM_LOG("adding FMAC pipe[%d]; reg=%x xyzw=%x flagreg=%x target=%x current %x", i, VUregsn->VFwrite, VUregsn->VFwxyzw, VUregsn->VIwrite, VU->cycle + 4, VU->cycle); VU->fmac[i].enable = 1; VU->fmac[i].sCycle = VU->cycle; - VU->fmac[i].Cycle = 3; - VU->fmac[i].reg = reg; - VU->fmac[i].xyzw = xyzw; + VU->fmac[i].Cycle = 4; + VU->fmac[i].reg = VUregsn->VFwrite; + VU->fmac[i].xyzw = VUregsn->VFwxyzw; VU->fmac[i].macflag = VU->macflag; VU->fmac[i].statusflag = VU->statusflag; VU->fmac[i].clipflag = VU->clipflag; + VU->fmac[i].flagreg = VUregsn->VIwrite; } else { - //Console.Error("*PCSX2*: error , out of fmacs %d", VU->cycle); + Console.Error("*PCSX2*: error , out of fmacs %d", VU->cycle); } } @@ -204,7 +292,7 @@ static __ri void __fastcall _vuFDIVAdd(VURegs * VU, int cycles) { } static __ri void __fastcall _vuEFUAdd(VURegs * VU, int cycles) { -// VUM_LOG("adding EFU pipe\n"); + VUM_LOG("adding EFU pipe for %d cycles\n", cycles); VU->efu.enable = 1; VU->efu.sCycle = VU->cycle; @@ -213,26 +301,34 @@ static __ri void __fastcall _vuEFUAdd(VURegs * VU, int cycles) { } static __ri void __fastcall _vuFlushFDIV(VURegs * VU) { - if (VU->fdiv.enable == 0) - return; - u32 newCycle = VU->fdiv.Cycle + VU->fdiv.sCycle; - VUM_LOG("waiting FDIV pipe %d", newCycle - VU->cycle); - - VU->fdiv.enable = 0; - VU->cycle = newCycle; - - VU->VI[REG_Q].UL = VU->fdiv.reg.UL; - VU->VI[REG_STATUS_FLAG].UL = VU->fdiv.statusflag; + if (VU->fdiv.enable != 0) + { + u32 newCycle = VU->fdiv.Cycle + VU->fdiv.sCycle; + VUM_LOG("waiting FDIV pipe %d", newCycle - VU->cycle); + if (newCycle > VU->cycle) + VU->cycle = newCycle; + } } -static __ri void __fastcall _vuFlushEFU(VURegs * VU) { +static __ri void __fastcall _vuFlushEFU(VURegs * VU, bool isWait = false) { if (VU->efu.enable == 0) return; - VU->efu.enable = 0; - VU->cycle = VU->efu.Cycle + VU->efu.sCycle; - VU->VI[REG_P].UL = VU->efu.reg.UL; + if (isWait) + { + VU->cycle = VU->efu.Cycle - 1; + VU->efu.sCycle = VU->cycle; + VU->efu.Cycle = 1; + } + else + { + u32 newCycle = (VU->efu.Cycle + VU->efu.sCycle); + VUM_LOG("waiting EFU pipe %d", newCycle - VU->cycle); + if (newCycle > VU->cycle) + VU->cycle = newCycle; + } + } static __fi void _vuTestFMACStalls(VURegs * VU, _VURegsNum *VUregsn) { @@ -245,18 +341,12 @@ static __fi void _vuTestFMACStalls(VURegs * VU, _VURegsNum *VUregsn) { } static __fi void _vuAddFMACStalls(VURegs * VU, _VURegsNum *VUregsn) { - if (VUregsn->VFwrite) { - _vuFMACAdd(VU, VUregsn->VFwrite, VUregsn->VFwxyzw); - } else - if (VUregsn->VIwrite & (1 << REG_CLIP_FLAG)) { - _vuFMACAdd(VU, -REG_CLIP_FLAG, 0); - } else { - _vuFMACAdd(VU, 0, 0); - } + _vuFMACAdd(VU, VUregsn); } static __fi void _vuTestFDIVStalls(VURegs * VU, _VURegsNum *VUregsn) { // _vuTestFMACStalls(VURegs * VU, _VURegsNum *VUregsn); + _vuTestFMACStalls(VU, VUregsn); _vuFlushFDIV(VU); } @@ -269,7 +359,8 @@ static __fi void _vuAddFDIVStalls(VURegs * VU, _VURegsNum *VUregsn) { static __fi void _vuTestEFUStalls(VURegs * VU, _VURegsNum *VUregsn) { // _vuTestFMACStalls(VURegs * VU, _VURegsNum *VUregsn); - _vuFlushEFU(VU); + _vuTestFMACStalls(VU, VUregsn); + _vuFlushEFU(VU, VUregsn->cycles == 0); } static __fi void _vuAddEFUStalls(VURegs * VU, _VURegsNum *VUregsn) { @@ -306,6 +397,21 @@ __fi void _vuAddLowerStalls(VURegs * VU, _VURegsNum *VUregsn) { } } +__fi void _vuBackupVI(VURegs* VU, int reg) +{ +#ifdef VI_BACKUP + if (VU->VIBackupCycles && reg == VU->VIRegNumber) + { + //On repeat writes we need to remember the value from before the chain + VU->VIBackupCycles = 2; + return; + } + + VU->VIBackupCycles = 2; + VU->VIRegNumber = reg; + VU->VIOldValue = VU->VI[reg].US[0]; +#endif +} /******************************/ /* VU Upper instructions */ @@ -372,7 +478,7 @@ void _vuABS(VURegs * VU) { if (_Y){ VU->VF[_Ft_].f.y = fabs(vuDouble(VU->VF[_Fs_].i.y)); } if (_Z){ VU->VF[_Ft_].f.z = fabs(vuDouble(VU->VF[_Fs_].i.z)); } if (_W){ VU->VF[_Ft_].f.w = fabs(vuDouble(VU->VF[_Fs_].i.w)); } -}/*Reworked from define to function. asadr*/ +} static __fi void _vuADD(VURegs * VU) { @@ -385,7 +491,7 @@ static __fi void _vuADD(VURegs * VU) { if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) + vuDouble(VU->VF[_Ft_].i.z)); } else VU_MACz_CLEAR(VU); if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) + vuDouble(VU->VF[_Ft_].i.w)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); -}/*Reworked from define to function. asadr*/ +} static __fi void _vuADDi(VURegs * VU) { @@ -408,7 +514,7 @@ static __fi void _vuADDi(VURegs * VU) { VU_STAT_UPDATE(VU); } -}/*Reworked from define to function. asadr*/ +} static __fi void _vuADDq(VURegs * VU) { VECTOR * dst; @@ -420,7 +526,7 @@ static __fi void _vuADDq(VURegs * VU) { if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) + vuDouble(VU->VI[REG_Q].UL)); } else VU_MACz_CLEAR(VU); if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) + vuDouble(VU->VI[REG_Q].UL)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); -}/*Reworked from define to function. asadr*/ +} static __fi void _vuADDx(VURegs * VU) { @@ -435,7 +541,7 @@ static __fi void _vuADDx(VURegs * VU) { if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) + ftx); } else VU_MACz_CLEAR(VU); if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) + ftx); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); -}/*Reworked from define to function. asadr*/ +} static __fi void _vuADDy(VURegs * VU) { float fty; @@ -449,7 +555,7 @@ static __fi void _vuADDy(VURegs * VU) { if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) + fty);} else VU_MACz_CLEAR(VU); if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) + fty);} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); -}/*Reworked from define to function. asadr*/ +} static __fi void _vuADDz(VURegs * VU) { float ftz; @@ -463,7 +569,7 @@ static __fi void _vuADDz(VURegs * VU) { if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) + ftz); } else VU_MACz_CLEAR(VU); if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) + ftz); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); -}/*Reworked from define to function. asadr*/ +} static __fi void _vuADDw(VURegs * VU) { float ftw; @@ -477,7 +583,7 @@ static __fi void _vuADDw(VURegs * VU) { if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) + ftw); } else VU_MACz_CLEAR(VU); if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) + ftw); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); -}/*Reworked from define to function. asadr*/ +} static __fi void _vuADDA(VURegs * VU) { if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) + vuDouble(VU->VF[_Ft_].i.x)); } else VU_MACx_CLEAR(VU); @@ -485,7 +591,7 @@ static __fi void _vuADDA(VURegs * VU) { if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) + vuDouble(VU->VF[_Ft_].i.z)); } else VU_MACz_CLEAR(VU); if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) + vuDouble(VU->VF[_Ft_].i.w)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); -}/*Reworked from define to function. asadr*/ +} static __fi void _vuADDAi(VURegs * VU) { float ti = vuDouble(VU->VI[REG_I].UL); @@ -495,7 +601,7 @@ static __fi void _vuADDAi(VURegs * VU) { if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) + ti); } else VU_MACz_CLEAR(VU); if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) + ti); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); -}/*Reworked from define to function. asadr*/ +} static __fi void _vuADDAq(VURegs * VU) { float tf = vuDouble(VU->VI[REG_Q].UL); @@ -505,7 +611,7 @@ static __fi void _vuADDAq(VURegs * VU) { if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) + tf); } else VU_MACz_CLEAR(VU); if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) + tf); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); -}/*Reworked from define to function. asadr*/ +} static __fi void _vuADDAx(VURegs * VU) { float tx = vuDouble(VU->VF[_Ft_].i.x); @@ -515,7 +621,7 @@ static __fi void _vuADDAx(VURegs * VU) { if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) + tx); } else VU_MACz_CLEAR(VU); if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) + tx); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); -}/*Reworked from define to function. asadr*/ +} static __fi void _vuADDAy(VURegs * VU) { float ty = vuDouble(VU->VF[_Ft_].i.y); @@ -525,7 +631,7 @@ static __fi void _vuADDAy(VURegs * VU) { if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) + ty); } else VU_MACz_CLEAR(VU); if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) + ty); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); -}/*Reworked from define to function. asadr*/ +} static __fi void _vuADDAz(VURegs * VU) { float tz = vuDouble(VU->VF[_Ft_].i.z); @@ -535,7 +641,7 @@ static __fi void _vuADDAz(VURegs * VU) { if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) + tz); } else VU_MACz_CLEAR(VU); if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) + tz); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); -}/*Reworked from define to function. asadr*/ +} static __fi void _vuADDAw(VURegs * VU) { float tw = vuDouble(VU->VF[_Ft_].i.w); @@ -545,7 +651,7 @@ static __fi void _vuADDAw(VURegs * VU) { if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) + tw); } else VU_MACz_CLEAR(VU); if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) + tw); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); -}/*Reworked from define to function. asadr*/ +} static __fi void _vuSUB(VURegs * VU) { @@ -717,7 +823,6 @@ static __fi void _vuMUL(VURegs * VU) { VU_STAT_UPDATE(VU); } -/* No need to presave I reg in ti. asadr */ static __fi void _vuMULi(VURegs * VU) { VECTOR * dst; if (_Fd_ == 0) dst = &RDzero; @@ -808,7 +913,6 @@ static __fi void _vuMULA(VURegs * VU) { VU_STAT_UPDATE(VU); } -/* No need to presave I reg in ti. asadr */ static __fi void _vuMULAi(VURegs * VU) { if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) * vuDouble(VU->VI[REG_I].UL)); } else VU_MACx_CLEAR(VU); if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) * vuDouble(VU->VI[REG_I].UL)); } else VU_MACy_CLEAR(VU); @@ -817,7 +921,6 @@ static __fi void _vuMULAi(VURegs * VU) { VU_STAT_UPDATE(VU); } -/* No need to presave Q reg in ti. asadr */ static __fi void _vuMULAq(VURegs * VU) { if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) * vuDouble(VU->VI[REG_Q].UL)); } else VU_MACx_CLEAR(VU); if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) * vuDouble(VU->VI[REG_Q].UL)); } else VU_MACy_CLEAR(VU); @@ -826,7 +929,6 @@ static __fi void _vuMULAq(VURegs * VU) { VU_STAT_UPDATE(VU); } -/* No need to presave X reg in ti. asadr */ static __fi void _vuMULAx(VURegs * VU) { if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) * vuDouble(VU->VF[_Ft_].i.x)); } else VU_MACx_CLEAR(VU); if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) * vuDouble(VU->VF[_Ft_].i.x)); } else VU_MACy_CLEAR(VU); @@ -884,7 +986,6 @@ static __fi void _vuMADDi(VURegs * VU) { VU_STAT_UPDATE(VU); } -/* No need to presave . asadr */ static __fi void _vuMADDq(VURegs * VU) { VECTOR * dst; if (_Fd_ == 0) dst = &RDzero; @@ -1190,7 +1291,6 @@ static __fi void _vuMAX(VURegs * VU) { if (_Fd_ == 0) return; - /* ft is bc */ if (_X) VU->VF[_Fd_].i.x = fp_max(VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.x); if (_Y) VU->VF[_Fd_].i.y = fp_max(VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.y); if (_Z) VU->VF[_Fd_].i.z = fp_max(VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.z); @@ -1201,7 +1301,6 @@ static __fi void _vuMAXi(VURegs * VU) { if (_Fd_ == 0) return; - /* ft is bc */ if (_X) VU->VF[_Fd_].i.x = fp_max(VU->VF[_Fs_].i.x, VU->VI[REG_I].UL); if (_Y) VU->VF[_Fd_].i.y = fp_max(VU->VF[_Fs_].i.y, VU->VI[REG_I].UL); if (_Z) VU->VF[_Fd_].i.z = fp_max(VU->VF[_Fs_].i.z, VU->VI[REG_I].UL); @@ -1414,7 +1513,6 @@ static __fi void _vuITOF15(VURegs * VU) { if (_W) VU->VF[_Ft_].f.w = int15_to_float(VU->VF[_Fs_].SL[3]); } -/* Different type of clipping by presaving w. asadr */ static __fi void _vuCLIP(VURegs * VU) { float value = fabs(vuDouble(VU->VF[_Ft_].i.w)); @@ -1426,11 +1524,7 @@ static __fi void _vuCLIP(VURegs * VU) { if ( vuDouble(VU->VF[_Fs_].i.z) > +value ) VU->clipflag|= 0x10; if ( vuDouble(VU->VF[_Fs_].i.z) < -value ) VU->clipflag|= 0x20; VU->clipflag = VU->clipflag & 0xFFFFFF; - //VU->VI[REG_CLIP_FLAG].UL = VU->clipflag; //Shouldn't need to do this, let the pipes sort it out. - - -}/*last update 16/07/05 refraction - Needs checking */ - +} /******************************/ /* VU Lower instructions */ @@ -1440,7 +1534,7 @@ static __fi void _vuDIV(VURegs * VU) { float ft = vuDouble(VU->VF[_Ft_].UL[_Ftf_]); float fs = vuDouble(VU->VF[_Fs_].UL[_Fsf_]); - VU->statusflag = (VU->statusflag&0xfcf)|((VU->statusflag&0x30)<<6); + VU->statusflag &= ~0x30; if (ft == 0.0) { if (fs == 0.0) { @@ -1463,21 +1557,19 @@ static __fi void _vuDIV(VURegs * VU) { static __fi void _vuSQRT(VURegs * VU) { float ft = vuDouble(VU->VF[_Ft_].UL[_Ftf_]); - VU->statusflag = (VU->statusflag&0xfcf)|((VU->statusflag&0x30)<<6); + VU->statusflag &= ~0x30; if (ft < 0.0 ) VU->statusflag |= 0x10; VU->q.F = sqrt(fabs(ft)); VU->q.F = vuDouble(VU->q.UL); } -/* Eminent Bug - Dvisior == 0 Check Missing ( D Flag Not Set ) */ -/* REFIXED....ASADR; rerefixed....zerofrog */ static __fi void _vuRSQRT(VURegs * VU) { float ft = vuDouble(VU->VF[_Ft_].UL[_Ftf_]); float fs = vuDouble(VU->VF[_Fs_].UL[_Fsf_]); float temp; - VU->statusflag = (VU->statusflag&0xfcf)|((VU->statusflag&0x30)<<6); + VU->statusflag &= ~0x30; if ( ft == 0.0 ) { VU->statusflag |= 0x20; @@ -1517,23 +1609,15 @@ static __fi void _vuIADDI(VURegs * VU) { imm = ((imm & 0x10 ? 0xfff0 : 0) | (imm & 0xf)); if(_It_ == 0) return; -#ifdef VI_BACKUP - VU->VIBackupCycles = 2; - VU->VIRegNumber = _It_; - VU->VIOldValue = VU->VI[_It_].US[0]; -#endif + _vuBackupVI(VU, _It_); VU->VI[_It_].SS[0] = VU->VI[_Is_].SS[0] + imm; -}//last checked 17/05/03 shadow NOTE: not quite sure about that +} static __fi void _vuIADDIU(VURegs * VU) { if(_It_ == 0) return; -#ifdef VI_BACKUP - VU->VIBackupCycles = 2; - VU->VIRegNumber = _It_; - VU->VIOldValue = VU->VI[_It_].US[0]; -#endif + _vuBackupVI(VU, _It_); VU->VI[_It_].SS[0] = VU->VI[_Is_].SS[0] + (((VU->code >> 10) & 0x7800) | (VU->code & 0x7ff)); } @@ -1541,11 +1625,7 @@ static __fi void _vuIADDIU(VURegs * VU) { static __fi void _vuIADD(VURegs * VU) { if(_Id_ == 0) return; -#ifdef VI_BACKUP - VU->VIBackupCycles = 2; - VU->VIRegNumber = _It_; - VU->VIOldValue = VU->VI[_Id_].US[0]; -#endif + _vuBackupVI(VU, _Id_); VU->VI[_Id_].SS[0] = VU->VI[_Is_].SS[0] + VU->VI[_It_].SS[0]; } @@ -1553,11 +1633,7 @@ static __fi void _vuIADD(VURegs * VU) { static __fi void _vuIAND(VURegs * VU) { if(_Id_ == 0) return; -#ifdef VI_BACKUP - VU->VIBackupCycles = 2; - VU->VIRegNumber = _It_; - VU->VIOldValue = VU->VI[_Id_].US[0]; -#endif + _vuBackupVI(VU, _Id_); VU->VI[_Id_].US[0] = VU->VI[_Is_].US[0] & VU->VI[_It_].US[0]; } @@ -1565,32 +1641,24 @@ static __fi void _vuIAND(VURegs * VU) { static __fi void _vuIOR(VURegs * VU) { if(_Id_ == 0) return; -#ifdef VI_BACKUP - VU->VIBackupCycles = 2; - VU->VIRegNumber = _It_; - VU->VIOldValue = VU->VI[_Id_].US[0]; -#endif + _vuBackupVI(VU, _Id_); VU->VI[_Id_].US[0] = VU->VI[_Is_].US[0] | VU->VI[_It_].US[0]; } static __fi void _vuISUB(VURegs * VU) { if(_Id_ == 0) return; -#ifdef VI_BACKUP - VU->VIBackupCycles = 2; - VU->VIRegNumber = _It_; - VU->VIOldValue = VU->VI[_Id_].US[0]; -#endif + + _vuBackupVI(VU, _Id_); + VU->VI[_Id_].SS[0] = VU->VI[_Is_].SS[0] - VU->VI[_It_].SS[0]; } static __fi void _vuISUBIU(VURegs * VU) { if(_It_ == 0) return; -#ifdef VI_BACKUP - VU->VIBackupCycles = 2; - VU->VIRegNumber = _It_; - VU->VIOldValue = VU->VI[_It_].US[0]; -#endif + + _vuBackupVI(VU, _It_); + VU->VI[_It_].SS[0] = VU->VI[_Is_].SS[0] - (((VU->code >> 10) & 0x7800) | (VU->code & 0x7ff)); } @@ -1612,14 +1680,11 @@ static __fi void _vuMFIR(VURegs * VU) { if (_W) VU->VF[_Ft_].SL[3] = (s32)VU->VI[_Is_].SS[0]; } -// Big bug!!! mov from fs to ft not ft to fs. asadr static __fi void _vuMTIR(VURegs * VU) { if(_It_ == 0) return; -#ifdef VI_BACKUP - VU->VIBackupCycles = 2; - VU->VIRegNumber = _It_; - VU->VIOldValue = VU->VI[_It_].US[0]; -#endif + + _vuBackupVI(VU, _It_); + VU->VI[_It_].US[0] = *(u16*)&VU->VF[_Fs_].F[_Fsf_]; } @@ -1662,6 +1727,7 @@ static __ri void _vuLQ(VURegs * VU) { } static __ri void _vuLQD( VURegs * VU ) { + _vuBackupVI(VU, _Is_); if (_Is_ != 0) VU->VI[_Is_].US[0]--; if (_Ft_ == 0) return; @@ -1674,6 +1740,7 @@ static __ri void _vuLQD( VURegs * VU ) { } static __ri void _vuLQI(VURegs * VU) { + _vuBackupVI(VU, _Is_); if (_Ft_) { u32 addr = (VU->VI[_Is_].US[0] * 16); u32* ptr = (u32*)GET_VU_MEM(VU, addr); @@ -1685,7 +1752,6 @@ static __ri void _vuLQI(VURegs * VU) { if (_Fs_ != 0) VU->VI[_Is_].US[0]++; } -/* addr is now signed. Asadr */ static __ri void _vuSQ(VURegs * VU) { s16 imm = (VU->code & 0x400) ? (VU->code & 0x3ff) | 0xfc00 : (VU->code & 0x3ff); u16 addr = ((imm + VU->VI[_It_].SS[0]) * 16); @@ -1697,6 +1763,7 @@ static __ri void _vuSQ(VURegs * VU) { } static __ri void _vuSQD(VURegs * VU) { + _vuBackupVI(VU, _It_); if(_Ft_ != 0) VU->VI[_It_].US[0]--; u32 addr = (VU->VI[_It_].US[0] * 16); u32* ptr = (u32*)GET_VU_MEM(VU, addr); @@ -1707,6 +1774,7 @@ static __ri void _vuSQD(VURegs * VU) { } static __ri void _vuSQI(VURegs * VU) { + _vuBackupVI(VU, _It_); u32 addr = (VU->VI[_It_].US[0] * 16); u32* ptr = (u32*)GET_VU_MEM(VU, addr); if (_X) ptr[0] = VU->VF[_Fs_].UL[0]; @@ -1716,7 +1784,6 @@ static __ri void _vuSQI(VURegs * VU) { if(_Ft_ != 0) VU->VI[_It_].US[0]++; } -/* addr now signed. asadr */ static __ri void _vuILW(VURegs * VU) { if (_It_ == 0) return; @@ -1724,11 +1791,7 @@ static __ri void _vuILW(VURegs * VU) { u16 addr = ((imm + VU->VI[_Is_].SS[0]) * 16); u16* ptr = (u16*)GET_VU_MEM(VU, addr); -#ifdef VI_BACKUP - VU->VIBackupCycles = 2; - VU->VIRegNumber = _It_; - VU->VIOldValue = VU->VI[_It_].US[0]; -#endif + _vuBackupVI(VU, _It_); if (_X) VU->VI[_It_].US[0] = ptr[0]; if (_Y) VU->VI[_It_].US[0] = ptr[2]; @@ -1752,11 +1815,7 @@ static __ri void _vuILWR(VURegs * VU) { u32 addr = (VU->VI[_Is_].US[0] * 16); u16* ptr = (u16*)GET_VU_MEM(VU, addr); -#ifdef VI_BACKUP - VU->VIBackupCycles = 2; - VU->VIRegNumber = _It_; - VU->VIOldValue = VU->VI[_It_].US[0]; -#endif + _vuBackupVI(VU, _It_); if (_X) VU->VI[_It_].US[0] = ptr[0]; if (_Y) VU->VI[_It_].US[0] = ptr[2]; @@ -1832,6 +1891,7 @@ static __ri void _vuWAITQ(VURegs * VU) { static __ri void _vuFSAND(VURegs * VU) { u16 imm; + imm = (((VU->code >> 21 ) & 0x1) << 11) | (VU->code & 0x7ff); if(_It_ == 0) return; VU->VI[_It_].US[0] = (VU->VI[REG_STATUS_FLAG].US[0] & 0xFFF) & imm; @@ -1839,6 +1899,7 @@ static __ri void _vuFSAND(VURegs * VU) { static __ri void _vuFSEQ(VURegs * VU) { u16 imm; + imm = (((VU->code >> 21 ) & 0x1) << 11) | (VU->code & 0x7ff); if(_It_ == 0) return; if((VU->VI[REG_STATUS_FLAG].US[0] & 0xFFF) == imm) VU->VI[_It_].US[0] = 1; @@ -1847,6 +1908,7 @@ static __ri void _vuFSEQ(VURegs * VU) { static __ri void _vuFSOR(VURegs * VU) { u16 imm; + imm = (((VU->code >> 21 ) & 0x1) << 11) | (VU->code & 0x7ff); if(_It_ == 0) return; VU->VI[_It_].US[0] = (VU->VI[REG_STATUS_FLAG].US[0] & 0xFFF) | imm; @@ -1854,23 +1916,27 @@ static __ri void _vuFSOR(VURegs * VU) { static __ri void _vuFSSET(VURegs * VU) { u16 imm = 0; + imm = (((VU->code >> 21 ) & 0x1) << 11) | (VU->code & 0x7FF); - VU->statusflag = (imm & 0xFC0) | (VU->VI[REG_STATUS_FLAG].US[0] & 0x3F); + VU->statusflag = (imm & 0xFC0) | (VU->statusflag & 0x3F); } static __ri void _vuFMAND(VURegs * VU) { if(_It_ == 0) return; + VU->VI[_It_].US[0] = VU->VI[_Is_].US[0] & (VU->VI[REG_MAC_FLAG].UL & 0xFFFF); } static __fi void _vuFMEQ(VURegs * VU) { if(_It_ == 0) return; + if((VU->VI[REG_MAC_FLAG].UL & 0xFFFF) == VU->VI[_Is_].US[0]){ VU->VI[_It_].US[0] =1;} else { VU->VI[_It_].US[0] =0; } } static __fi void _vuFMOR(VURegs * VU) { if(_It_ == 0) return; + VU->VI[_It_].US[0] = (VU->VI[REG_MAC_FLAG].UL & 0xFFFF) | VU->VI[_Is_].US[0]; } @@ -1892,11 +1958,11 @@ static __fi void _vuFCOR(VURegs * VU) { static __fi void _vuFCSET(VURegs * VU) { VU->clipflag = (u32) (VU->code & 0xFFFFFF); - VU->VI[REG_CLIP_FLAG].UL = (u32) (VU->code & 0xFFFFFF); } static __fi void _vuFCGET(VURegs * VU) { if(_It_ == 0) return; + VU->VI[_It_].US[0] = VU->VI[REG_CLIP_FLAG].UL & 0x0FFF; } @@ -1908,14 +1974,13 @@ s32 _branchAddr(VURegs * VU) { static __fi void _setBranch(VURegs * VU, u32 bpc) { if(VU->branch == 1) - { + { //DevCon.Warning("Branch in Branch Delay slot!"); VU->delaybranchpc = bpc; VU->takedelaybranch = true; } else { - VU->branch = 2; VU->branchpc = bpc; } @@ -1929,10 +1994,12 @@ static __ri void _vuIBEQ(VURegs * VU) { { if(VU->VIRegNumber == _It_) { + //DevCon.Warning("Banana IBEQ It backup used"); dest = VU->VIOldValue; } if(VU->VIRegNumber == _Is_) { + //DevCon.Warning("Banana IBEQ Is backup used"); src = VU->VIOldValue; } } @@ -1950,6 +2017,7 @@ static __ri void _vuIBGEZ(VURegs * VU) { { if(VU->VIRegNumber == _Is_) { + //DevCon.Warning("Banana IBGEZ Is backup used"); src = VU->VIOldValue; } } @@ -1967,6 +2035,7 @@ static __ri void _vuIBGTZ(VURegs * VU) { { if(VU->VIRegNumber == _Is_) { + //DevCon.Warning("Banana IBGTZ Is backup used"); src = VU->VIOldValue; } } @@ -1985,6 +2054,7 @@ static __ri void _vuIBLEZ(VURegs * VU) { { if(VU->VIRegNumber == _Is_) { + //DevCon.Warning("Banana IBLEZ Is backup used"); src = VU->VIOldValue; } } @@ -2002,6 +2072,7 @@ static __ri void _vuIBLTZ(VURegs * VU) { { if(VU->VIRegNumber == _Is_) { + //DevCon.Warning("Banana IBLTZ Is backup used"); src = VU->VIOldValue; } } @@ -2021,9 +2092,11 @@ static __ri void _vuIBNE(VURegs * VU) { if(VU->VIRegNumber == _It_) { dest = VU->VIOldValue; + //DevCon.Warning("Banana IBNE It backup used"); } if(VU->VIRegNumber == _Is_) { + //DevCon.Warning("Banana IBNE Is backup used"); src = VU->VIOldValue; } } @@ -2045,7 +2118,7 @@ static __ri void _vuBAL(VURegs * VU) { if(_It_) { - //If we are in the branch delay slot, the instruction after the first + //If we are in the branch delay slot, the instruction after the first //instruction in the first branches target becomes the linked reg. if(VU->branch == 1) { @@ -2071,7 +2144,7 @@ static __ri void _vuJALR(VURegs * VU) { if(_It_) { - //If we are in the branch delay slot, the instruction after the first + //If we are in the branch delay slot, the instruction after the first //instruction in the first branches target becomes the linked reg. if(VU->branch == 1) { @@ -2099,19 +2172,21 @@ static __ri void _vuWAITP(VURegs * VU) { } static __ri void _vuESADD(VURegs * VU) { + DevCon.Warning("ESADD"); float p = vuDouble(VU->VF[_Fs_].i.x) * vuDouble(VU->VF[_Fs_].i.x) + vuDouble(VU->VF[_Fs_].i.y) * vuDouble(VU->VF[_Fs_].i.y) + vuDouble(VU->VF[_Fs_].i.z) * vuDouble(VU->VF[_Fs_].i.z); VU->p.F = p; } static __ri void _vuERSADD(VURegs * VU) { + DevCon.Warning("ERSADD"); float p = (vuDouble(VU->VF[_Fs_].i.x) * vuDouble(VU->VF[_Fs_].i.x)) + (vuDouble(VU->VF[_Fs_].i.y) * vuDouble(VU->VF[_Fs_].i.y)) + (vuDouble(VU->VF[_Fs_].i.z) * vuDouble(VU->VF[_Fs_].i.z)); if (p != 0.0) p = 1.0f / p; VU->p.F = p; } -/* Fixed. Could have caused crash due to value being -ve for sqrt *asadr */ static __ri void _vuELENG(VURegs * VU) { + DevCon.Warning("ELENG"); float p = vuDouble(VU->VF[_Fs_].i.x) * vuDouble(VU->VF[_Fs_].i.x) + vuDouble(VU->VF[_Fs_].i.y) * vuDouble(VU->VF[_Fs_].i.y) + vuDouble(VU->VF[_Fs_].i.z) * vuDouble(VU->VF[_Fs_].i.z); if(p >= 0){ p = sqrt(p); @@ -2119,8 +2194,8 @@ static __ri void _vuELENG(VURegs * VU) { VU->p.F = p; } -/* Fixed. Could have caused crash due to divisor being = 0 *asadr */ static __ri void _vuERLENG(VURegs * VU) { + DevCon.Warning("ERLENG"); float p = vuDouble(VU->VF[_Fs_].i.x) * vuDouble(VU->VF[_Fs_].i.x) + vuDouble(VU->VF[_Fs_].i.y) * vuDouble(VU->VF[_Fs_].i.y) + vuDouble(VU->VF[_Fs_].i.z) * vuDouble(VU->VF[_Fs_].i.z); if (p >= 0) { p = sqrt(p); @@ -2131,8 +2206,8 @@ static __ri void _vuERLENG(VURegs * VU) { VU->p.F = p; } -/* Fixed. Could have caused crash due to divisor being = 0 *asadr */ static __ri void _vuEATANxy(VURegs * VU) { + DevCon.Warning("EATANxy"); float p = 0; if(vuDouble(VU->VF[_Fs_].i.x) != 0) { p = atan2(vuDouble(VU->VF[_Fs_].i.y), vuDouble(VU->VF[_Fs_].i.x)); @@ -2140,8 +2215,8 @@ static __ri void _vuEATANxy(VURegs * VU) { VU->p.F = p; } -/* Fixed. Could have caused crash due to divisor being = 0 *asadr */ static __ri void _vuEATANxz(VURegs * VU) { + DevCon.Warning("EATANxz"); float p = 0; if(vuDouble(VU->VF[_Fs_].i.x) != 0) { p = atan2(vuDouble(VU->VF[_Fs_].i.z), vuDouble(VU->VF[_Fs_].i.x)); @@ -2150,11 +2225,11 @@ static __ri void _vuEATANxz(VURegs * VU) { } static __ri void _vuESUM(VURegs * VU) { + DevCon.Warning("ESUM"); float p = vuDouble(VU->VF[_Fs_].i.x) + vuDouble(VU->VF[_Fs_].i.y) + vuDouble(VU->VF[_Fs_].i.z) + vuDouble(VU->VF[_Fs_].i.w); VU->p.F = p; } -/* Fixed. Could have caused crash due to divisor being = 0 *asadr */ static __ri void _vuERCPR(VURegs * VU) { float p = vuDouble(VU->VF[_Fs_].UL[_Fsf_]); if (p != 0){ @@ -2163,8 +2238,8 @@ static __ri void _vuERCPR(VURegs * VU) { VU->p.F = p; } -/* Fixed. Could have caused crash due to Value being -ve for sqrt *asadr */ static __ri void _vuESQRT(VURegs * VU) { + DevCon.Warning("ESQRT"); float p = vuDouble(VU->VF[_Fs_].UL[_Fsf_]); if (p >= 0){ p = sqrt(p); @@ -2172,8 +2247,8 @@ static __ri void _vuESQRT(VURegs * VU) { VU->p.F = p; } -/* Fixed. Could have caused crash due to divisor being = 0 *asadr */ static __ri void _vuERSQRT(VURegs * VU) { + DevCon.Warning("ERSQRT"); float p = vuDouble(VU->VF[_Fs_].UL[_Fsf_]); if (p >= 0) { p = sqrt(p); @@ -2185,16 +2260,19 @@ static __ri void _vuERSQRT(VURegs * VU) { } static __ri void _vuESIN(VURegs * VU) { + DevCon.Warning("ESIN"); float p = sin(vuDouble(VU->VF[_Fs_].UL[_Fsf_])); VU->p.F = p; } static __ri void _vuEATAN(VURegs * VU) { + DevCon.Warning("EATAN"); float p = atan(vuDouble(VU->VF[_Fs_].UL[_Fsf_])); VU->p.F = p; } static __ri void _vuEEXP(VURegs * VU) { + DevCon.Warning("EEXP"); float p = exp(-(vuDouble(VU->VF[_Fs_].UL[_Fsf_]))); VU->p.F = p; } @@ -2205,22 +2283,63 @@ static __ri void _vuXITOP(VURegs * VU) { else VU->VI[_It_].US[0] = VU->GetVifRegs().itop; } +void _vuXGKICKFlush(VURegs* VU) +{ + while (!VU->xgkickendpacket || VU->xgkicksizeremaining > 0) + { + if (VU->xgkicksizeremaining == 0) + { + IPU_LOG("Banana Reading next packet"); + u32 size = gifUnit.GetGSPacketSize(GIF_PATH_1, VU->Mem, VU->xgkickaddr); + VU->xgkicksizeremaining = size & 0xFFFF; + VU->xgkickendpacket = size >> 31; + VU->xgkickdiff = 0x4000 - VU->xgkickaddr; + } + + IPU_LOG("Banana Force Transferring %x bytes from %x size left %x", VU->xgkicksizeremaining, VU->xgkickaddr, VU->xgkicksizeremaining); + + if (VU->xgkicksizeremaining > VU->xgkickdiff) { + IPU_LOG( "VU1 Int: XGkick Wrap!"); + gifUnit.gifPath[GIF_PATH_1].CopyGSPacketData(&VU->Mem[VU->xgkickaddr], VU->xgkickdiff, true); + gifUnit.TransferGSPacketData(GIF_TRANS_XGKICK, &VU->Mem[0], VU->xgkicksizeremaining - VU->xgkickdiff, true); + } + else { + gifUnit.TransferGSPacketData(GIF_TRANS_XGKICK, &VU->Mem[VU->xgkickaddr], VU->xgkicksizeremaining, true); + } + + VU->xgkickaddr = (VU->xgkickaddr + VU->xgkicksizeremaining) & 0x3fff; + + if (VU0.VI[REG_VPU_STAT].UL & 0x100) + VU->cycle += VU->xgkicksizeremaining / 2; + + VU->xgkicksizeremaining = 0; + + IPU_LOG("Banana next addr %x left size %x EOP %d", VU->xgkickaddr, VU->xgkicksizeremaining, VU->xgkickendpacket); + + } + VU->xgkickenable = 0; + _vuTestPipes(VU); +} + static __ri void _vuXGKICK(VURegs * VU) { - // flush all pipelines first (in the right order) - _vuFlushAll(VU); + if (VU->xgkickenable) + { + _vuXGKICKFlush(VU); + } + u32 addr = (VU->VI[_Is_].US[0] & 0x3ff) * 16; u32 diff = 0x4000 - addr; - u32 size = gifUnit.GetGSPacketSize(GIF_PATH_1, VU->Mem, addr); - if (size > diff) { - //DevCon.WriteLn(Color_Green, "VU1 Int: XGkick Wrap!"); - gifUnit.gifPath[GIF_PATH_1].CopyGSPacketData( &VU->Mem[addr], diff,true); - gifUnit.TransferGSPacketData(GIF_TRANS_XGKICK, &VU->Mem[0],size-diff,true); - } - else { - gifUnit.TransferGSPacketData(GIF_TRANS_XGKICK, &VU->Mem[addr], size, true); - } + VU->xgkickenable = 1; + VU->xgkickaddr = addr; + VU->xgkickdiff = diff; + VU->xgkicksizeremaining = 0; + VU->xgkickendpacket = 0; + VU->xgkicklastcycle = VU->cycle; + IPU_LOG("Banana XGKick size %x EOP %d addr %x", VU->xgkicksizeremaining, VU->xgkickendpacket, addr); + //_vuXGKICKFlush(VU); + //VU->cycle = oldcycle; } static __ri void _vuXTOP(VURegs * VU) { @@ -2437,18 +2556,6 @@ VUREGS_ACCFSFTw(SUBAw, 0); #define VUREGS_FDFSFTxyzw_MUL(OP, ACC, xyzw) \ static __ri void _vuRegs##OP(const VURegs* VU, _VURegsNum *VUregsn) { \ - if( _Ft_ == 0 && (xyzw) > 1 && _XYZW == 0xf ) { /* resetting to 0 */ \ - VUregsn->pipe = VUPIPE_FMAC; \ - VUregsn->VFwrite = (ACC)?0:_Fd_; \ - VUregsn->VFwxyzw = _XYZW; \ - VUregsn->VFread0 = 0; \ - VUregsn->VFr0xyzw= _XYZW; \ - VUregsn->VFread1 = 0; \ - VUregsn->VFr1xyzw= xyzw; \ - VUregsn->VIwrite = ((ACC)?(1<VIread = ((ACC)&&(_XYZW!=15))?(1<pipe = VUPIPE_FMAC; \ VUregsn->VFwrite = (ACC)?0:_Fd_; \ VUregsn->VFwxyzw = _XYZW; \ @@ -2458,7 +2565,6 @@ static __ri void _vuRegs##OP(const VURegs* VU, _VURegsNum *VUregsn) { \ VUregsn->VFr1xyzw= xyzw; \ VUregsn->VIwrite = ((ACC)?(1<VIread = GET_VF0_FLAG(_Fs_)|(((ACC)&&(_XYZW!=15))?(1<VIread &= ~(1<VIread &= ~(1<VIread &= ~(1<VIread &= ~(1<VFr1xyzw= 1 << (3-_Ftf_); VUregsn->VIwrite = 1 << REG_Q; VUregsn->VIread = GET_VF0_FLAG(_Fs_)|GET_VF0_FLAG(_Ft_); - VUregsn->cycles = 6; + VUregsn->cycles = 7; } static __ri void _vuRegsSQRT(const VURegs* VU, _VURegsNum *VUregsn) { @@ -2643,7 +2745,7 @@ static __ri void _vuRegsSQRT(const VURegs* VU, _VURegsNum *VUregsn) { VUregsn->VFr1xyzw = 1 << (3-_Ftf_); VUregsn->VIwrite = 1 << REG_Q; VUregsn->VIread = GET_VF0_FLAG(_Ft_); - VUregsn->cycles = 6; + VUregsn->cycles = 7; } static __ri void _vuRegsRSQRT(const VURegs* VU, _VURegsNum *VUregsn) { @@ -2655,7 +2757,7 @@ static __ri void _vuRegsRSQRT(const VURegs* VU, _VURegsNum *VUregsn) { VUregsn->VFr1xyzw= 1 << (3-_Ftf_); VUregsn->VIwrite = 1 << REG_Q; VUregsn->VIread = GET_VF0_FLAG(_Fs_)|GET_VF0_FLAG(_Ft_); - VUregsn->cycles = 12; + VUregsn->cycles = 13; } VUREGS_ITIS(IADDI); @@ -2693,7 +2795,7 @@ static __ri void _vuRegsMR32(const VURegs* VU, _VURegsNum *VUregsn) { VUregsn->VFwrite = _Ft_; VUregsn->VFwxyzw = _XYZW; VUregsn->VFread0 = _Fs_; - VUregsn->VFr0xyzw= (_XYZW >> 1) | ((_XYZW << 3) & 0xf); //rotate + VUregsn->VFr0xyzw= (_XYZW >> 1) | ((_XYZW << 3) & 0x8); //rotate VUregsn->VFread1 = 0; VUregsn->VFr1xyzw = 0xff; VUregsn->VIwrite = 0; @@ -2767,7 +2869,7 @@ static __ri void _vuRegsILW(const VURegs* VU, _VURegsNum *VUregsn) { VUregsn->VFread1 = 0; VUregsn->VIwrite = 1 << _It_; VUregsn->VIread = 1 << _Is_; - VUregsn->cycles = 3; + VUregsn->cycles = 4; } static __ri void _vuRegsISW(const VURegs* VU, _VURegsNum *VUregsn) { @@ -2786,7 +2888,7 @@ static __ri void _vuRegsILWR(const VURegs* VU, _VURegsNum *VUregsn) { VUregsn->VFread1 = 0; VUregsn->VIwrite = (1 << _It_); VUregsn->VIread = (1 << _Is_); - VUregsn->cycles = 3; + VUregsn->cycles = 4; } static __ri void _vuRegsISWR(const VURegs* VU, _VURegsNum *VUregsn) { @@ -3062,21 +3164,22 @@ static __ri void _vuRegsWAITP(const VURegs* VU, _VURegsNum *VUregsn) { VUregsn->VFread1 = 0; VUregsn->VIwrite = 0; VUregsn->VIread = 0; + VUregsn->cycles = 0; } -VUREGS_PFS_xyzw(ESADD, 10); -VUREGS_PFS_xyzw(ERSADD, 17); -VUREGS_PFS_xyzw(ELENG, 17); -VUREGS_PFS_xyzw(ERLENG, 23); -VUREGS_PFS_xyzw(EATANxy, 53); -VUREGS_PFS_xyzw(EATANxz, 53); -VUREGS_PFS_xyzw(ESUM, 11); -VUREGS_PFS_fsf(ERCPR, 11); -VUREGS_PFS_fsf(ESQRT, 11); -VUREGS_PFS_fsf(ERSQRT, 17); -VUREGS_PFS_fsf(ESIN, 28); -VUREGS_PFS_fsf(EATAN, 53); -VUREGS_PFS_fsf(EEXP, 43); +VUREGS_PFS_xyzw(ESADD, 11); +VUREGS_PFS_xyzw(ERSADD, 18); +VUREGS_PFS_xyzw(ELENG, 18); +VUREGS_PFS_xyzw(ERLENG, 24); +VUREGS_PFS_xyzw(EATANxy, 54); +VUREGS_PFS_xyzw(EATANxz, 54); +VUREGS_PFS_xyzw(ESUM, 12); +VUREGS_PFS_fsf(ERCPR, 12); +VUREGS_PFS_fsf(ESQRT, 12); +VUREGS_PFS_fsf(ERSQRT, 18); +VUREGS_PFS_fsf(ESIN, 29); +VUREGS_PFS_fsf(EATAN, 54); +VUREGS_PFS_fsf(EEXP, 44); static __ri void _vuRegsXITOP(const VURegs* VU, _VURegsNum *VUregsn) { VUregsn->pipe = VUPIPE_IALU;