From ab1774b28a6a7cbdf52e9114ef323bee067e387c Mon Sep 17 00:00:00 2001 From: "memberTwo.mb2" Date: Fri, 14 Nov 2008 22:33:48 +0000 Subject: [PATCH] DC fix/hack: 1) now a GP-watchdog thread on core 2 locks CPU in gatherpipe (TODO better). 2) Video_SendFifoData send full fifo to GP (should be faster by avoiding the decoder to stall). git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@1178 8ced0084-cf51-0410-be5f-012b33b47a6e --- Source/Core/Core/Src/HW/CommandProcessor.cpp | 121 +++++++++---------- Source/Core/Core/Src/HW/CommandProcessor.h | 3 + Source/Core/Core/Src/HW/PixelEngine.cpp | 1 + Source/Core/Core/Src/Plugins/Plugin_Video.h | 2 +- Source/Core/VideoCommon/Src/Fifo.cpp | 74 ++++++++---- Source/PluginSpecs/pluginspecs_video.h | 11 +- 6 files changed, 118 insertions(+), 94 deletions(-) diff --git a/Source/Core/Core/Src/HW/CommandProcessor.cpp b/Source/Core/Core/Src/HW/CommandProcessor.cpp index 45be3701af..10bb2dd90c 100644 --- a/Source/Core/Core/Src/HW/CommandProcessor.cpp +++ b/Source/Core/Core/Src/HW/CommandProcessor.cpp @@ -26,22 +26,32 @@ // - Animal Crossing: PEfinish at start but there's a bug... // There's tons of HiWmk/LoWmk ping pong -> Another sync fashion? +// *What I guess (thx to asynchronous DualCore mode): +// PPC have a frame-finish watchdog. Handled by system timming stuff like the decrementer. +// (DualCore mode): I have observed, after ZTP logos, a fifo-recovery start when DECREMENTER_EXCEPTION is throwned. +// The frame setting (by GP) took too much time and didn't finish properly due to this watchdog. +// Faster GX plugins required, indeed :p + // * BPs are needed for some game GP/CPU sync. // But it could slowdown (MP1 at least) because our GP in DC is faster than "expected" in some area. // eg: in movie-menus in MP1, BP are reached quickly. // The bad thing is that involve too much PPC work (int ack, lock GP, reset BP, new BP addr, unlock BP...) hence the slowdown. -// Anyway, emulation is more accurate like this and it emulate some sort of better load balancing. +// Anyway, emulation should more accurate like this and it emulate some sort of better load balancing. // Eather way in those area a more accurate GP timing could be done by slowing down the GP or something less stupid. +// Not functional and not used atm (breaks MP2). // * funny, in revs before those with this note, BP irq wasn't cleared (a bug indeed) and MP1 menus was faster. // BP irq was raised and ack just once but never cleared. However it's sufficient for MP1 to work. -// I see a possible ugly hack there :p +// This hack is used atm. Known BPs handling doesn't work well (btw, BP irq clearing might be done by CPIntEnable raising edge). +// The hack seems to be responsible of the movie stutering in MP1 menus. // TODO (mb2): -// * raise watermark Ov/Un irq: DONE but not commited because useless(?) and it slowdown DC. -// TODO: commit #ifdef'ed?... if I'm begged :p -// * raise ReadIdle/CmdIdle irq and observe behaviour of MP1 & ZTP (at least) -// * find who command a complete fifo flush (could be some sort of timeout + wait for ReadIdle irq) +// * raise watermark Ov/Un irq: POINTLESS since emulated GP timings can't be accuratly set. +// Only 3 choises IMHO for a correct emulated load balancing in DC mode: +// - make our own GP watchdog hack that can lock CPU if GP too slow. +// - hack directly something in PPC timings (dunno how) +// - boost GP so we can consider it as infinitly fast compared to CPU. +// * raise ReadIdle/CmdIdle flags and observe behaviour of MP1 & ZTP (at least) // * investigate VI and PI for fixing the DC ZTP bug. // * Clean useless comments and debug stuff in Read16, Write16, GatherPipeBursted when sync will be fixed for DC // * (reminder) do the same in: @@ -160,6 +170,16 @@ inline u16 ReadHigh (u32 _reg) {return (u16)(_reg >> 16);} int et_UpdateInterrupts; +// for GP watchdog hack +void IncrementGPWDToken() +{ +#ifdef WIN32 + InterlockedIncrement((LONG*)&fifo.Fake_GPWDToken); +#else + //TODO +#endif +} + void UpdateInterrupts_Wrapper(u64 userdata, int cyclesLate) { UpdateInterrupts(); @@ -180,17 +200,14 @@ void Init() m_tokenReg = 0; - fifo.bFF_GPReadEnable = FALSE; - fifo.bFF_GPLinkEnable = FALSE; - fifo.bFF_BPEnable = FALSE; - + fifo.bFF_GPReadEnable = FALSE; + fifo.bFF_GPLinkEnable = FALSE; + fifo.bFF_BPEnable = FALSE; + // for GP watchdog hack + fifo.Fake_GPWDInterrupt = FALSE; + fifo.Fake_GPWDToken = 0; et_UpdateInterrupts = CoreTiming::RegisterEvent("UpdateInterrupts", UpdateInterrupts_Wrapper); -#ifdef _WIN32 - //InitializeCriticalSection(&fifo.sync); -#else - // fifo.sync = new Common::CriticalSection(0); -#endif } void Shutdown() @@ -250,6 +267,9 @@ void Read16(u16& _rReturnValue, const u32 _Address) case FIFO_BP_LO: _rReturnValue = ReadLow (fifo.CPBreakpoint); return; case FIFO_BP_HI: _rReturnValue = ReadHigh(fifo.CPBreakpoint); return; +// case 0x42: // first metric reg (I guess) read in case of "fifo unknown state" +// Crash(); +// return; // case 0x64: // return 4; //Number of clocks per vertex.. todo: calculate properly @@ -313,11 +333,6 @@ void Write16(const u16 _Value, const u32 _Address) ct++; if (ct) {LOG(COMMANDPROCESSOR, "(Write16): %lu cycles for nothing :[ ", ct);} } - #ifdef _WIN32 - //EnterCriticalSection(&fifo.sync); - #else - // fifo.sync->Enter(); - #endif } switch (_Address & 0xFFF) @@ -451,30 +466,18 @@ void Write16(const u16 _Value, const u32 _Address) case FIFO_RW_DISTANCE_LO: LOG(COMMANDPROCESSOR,"try to write to %s : %04x",((_Address & 0xFFF) == FIFO_RW_DISTANCE_HI) ? "FIFO_RW_DISTANCE_HI" : "FIFO_RW_DISTANCE_LO", _Value); _dbg_assert_msg_(COMMANDPROCESSOR, _Value==0, "WTF? attempt to overwrite fifo CPReadWriteDistance with a value(%04x) != 0 ",_Value); - // TODO: break this hack - // hack: if fifo is not empty and readable then spin until flushed - // sleeping here is not a perf issue since GXSetFifo shouldn't be called too much often. - while((fifo.CPReadWriteDistance>0) && (fifo.bFF_GPReadEnable==1)) - Common::SleepCurrentThread(1); - _dbg_assert_msg_(COMMANDPROCESSOR, !(fifo.CPReadWriteDistance!=0 && fifo.bFF_GPReadEnable==0), "Is that bad? WARNING: reset a not empty and not readable fifo.\n CPReadWriteDistance: %08x | fifo.bFF_GPReadEnable: %i",fifo.CPReadWriteDistance,fifo.bFF_GPReadEnable); + + // This SHOULD NOT be a problem to overwrite here unless this msg appears: + _dbg_assert_msg_(COMMANDPROCESSOR, fifo.CPReadWriteDistance==0, "Is that bad? WARNING: reset a non empty fifo.\n CPReadWriteDistance: %08x | fifo.bFF_GPReadEnable: %i",fifo.CPReadWriteDistance,fifo.bFF_GPReadEnable); + //_dbg_assert_msg_(COMMANDPROCESSOR, !(fifo.CPReadWriteDistance!=0 && fifo.bFF_GPReadEnable==0), "Is that bad? WARNING: reset a not empty and not readable fifo.\n CPReadWriteDistance: %08x | fifo.bFF_GPReadEnable: %i",fifo.CPReadWriteDistance,fifo.bFF_GPReadEnable); + #ifdef _WIN32 - InterlockedExchange((LONG*)&fifo.CPReadWriteDistance, 0); + //InterlockedExchange((LONG*)&fifo.CPReadWriteDistance, 0); #else - Common::InterlockedExchange((int*)&fifo.CPReadWriteDistance, 0); + //Common::InterlockedExchange((int*)&fifo.CPReadWriteDistance, 0); #endif break; } - - // (mb2) very dangerous to UpdateFifoRegister() that way in DC when there no criticalsections anymore - // ... completely useless btw :p - // update the registers and run the fifo - //UpdateFifoRegister(); - //if (Core::g_CoreStartupParameter.bUseDualCore) -#ifdef _WIN32 - //LeaveCriticalSection(&fifo.sync); -#else - // fifo.sync->Leave(); -#endif } void Read32(u32& _rReturnValue, const u32 _Address) @@ -496,6 +499,17 @@ void GatherPipeBursted() if (Core::g_CoreStartupParameter.bUseDualCore) { + // for GP watchdog hack + if (fifo.Fake_GPWDInterrupt) + { + // Wait for GPU to catch up + while (fifo.CPReadWriteDistance > 0) // TOCHECK: more checks could be needed + ; + InterlockedExchange((LONG*)&fifo.Fake_GPWDInterrupt, 0); + LOGV(COMMANDPROCESSOR, 2, "!!! Fake_GPWDInterrupt raised. Fake_GPWDToken %i",fifo.Fake_GPWDToken); + } + + // update the fifo-pointer fifo.CPWritePointer += GPFifo::GATHER_PIPE_SIZE; if (fifo.CPWritePointer >= fifo.CPEnd) @@ -506,25 +520,13 @@ void GatherPipeBursted() Common::InterlockedExchangeAdd((int*)&fifo.CPReadWriteDistance, GPFifo::GATHER_PIPE_SIZE); #endif - // Wait for GPU to catch up -#if 0 - // High watermark overflow handling: hacked way - u32 ct=0; - while (!(fifo.bFF_BPEnable && fifo.bFF_Breakpoint) && fifo.CPReadWriteDistance > (s32)fifo.CPHiWatermark) - { - ct++; - // dunno if others threads (like the audio thread) really need a forced context switch here - //Common::SleepCurrentThread(1); - } - if (ct) {LOG(COMMANDPROCESSOR, "(GatherPipeBursted): %lu cycles for nothing :[ ", ct);} -#else - // High watermark overflow handling: less hacked way + // High watermark overflow handling (hacked way) u32 ct=0; if (fifo.CPReadWriteDistance > (s32)fifo.CPHiWatermark) { // we should raise an Ov interrupt for an accurate fifo emulation and let PPC deal with it. // But it slowdown things because of if(interrupt blah blah){} blocks for each 32B fifo transactions. - // CPU would be a bit more loaded too by interrupt handling... + // CPU would be a bit more loaded too by its interrupt handling... // Eather way, CPU would have the ability to resume another thread. // To be clear: this spin loop is like a critical section spin loop in the emulated GX thread hence "hacked way" @@ -538,6 +540,7 @@ void GatherPipeBursted() // - CPU can write to fifo // - disable Underflow interrupt + // Wait for GPU to catch up //while (!(fifo.bFF_BPEnable && fifo.bFF_Breakpoint) && fifo.CPReadWriteDistance > (s32)fifo.CPLoWatermark) while (fifo.CPReadWriteDistance > (s32)fifo.CPLoWatermark) { @@ -545,21 +548,13 @@ void GatherPipeBursted() // dunno if others threads (like the audio thread) really need a forced context switch here //Common::SleepCurrentThread(1); } - if (ct) {LOG(COMMANDPROCESSOR, "(GatherPipeBursted): %lu cycles for nothing :[ ", ct);} + if (ct) {LOG(COMMANDPROCESSOR, "(GatherPipeBursted): %lu cycles for nothing :[", ct);} /**/ } -#endif // check if we are in sync _assert_msg_(COMMANDPROCESSOR, fifo.CPWritePointer == CPeripheralInterface::Fifo_CPUWritePointer, "FIFOs linked but out of sync"); _assert_msg_(COMMANDPROCESSOR, fifo.CPBase == CPeripheralInterface::Fifo_CPUBase, "FIFOs linked but out of sync"); _assert_msg_(COMMANDPROCESSOR, fifo.CPEnd == CPeripheralInterface::Fifo_CPUEnd, "FIFOs linked but out of sync"); - - // no not from CPU. Only GP can throw BP irq - /*if (fifo.bFF_Breakpoint) - { - m_CPStatusReg.Breakpoint = 1; - UpdateInterrupts(); - }/**/ } else { @@ -606,7 +601,7 @@ void CatchUpGPU() // We are going to do FP math on the main thread so have to save the current state SaveSSEState(); LoadDefaultSSEState(); - PluginVideo::Video_SendFifoData(ptr); + PluginVideo::Video_SendFifoData(ptr,32); LoadSSEState(); fifo.CPReadWriteDistance -= 32; diff --git a/Source/Core/Core/Src/HW/CommandProcessor.h b/Source/Core/Core/Src/HW/CommandProcessor.h index 79d27fb39b..f8bb69c7f6 100644 --- a/Source/Core/Core/Src/HW/CommandProcessor.h +++ b/Source/Core/Core/Src/HW/CommandProcessor.h @@ -79,6 +79,9 @@ void UpdateInterruptsFromVideoPlugin(); bool AllowIdleSkipping(); +// for GP watchdog hack +void IncrementGPWDToken(); + } // end of namespace CommandProcessor #endif diff --git a/Source/Core/Core/Src/HW/PixelEngine.cpp b/Source/Core/Core/Src/HW/PixelEngine.cpp index 07be296864..f46e88f967 100644 --- a/Source/Core/Core/Src/HW/PixelEngine.cpp +++ b/Source/Core/Core/Src/HW/PixelEngine.cpp @@ -177,6 +177,7 @@ void SetFinish_OnMainThread(u64 userdata, int cyclesLate) { g_bSignalFinishInterrupt = 1; UpdateInterrupts(); + CommandProcessor::IncrementGPWDToken(); } // SetToken diff --git a/Source/Core/Core/Src/Plugins/Plugin_Video.h b/Source/Core/Core/Src/Plugins/Plugin_Video.h index 05992425ac..60f3dd4d9d 100644 --- a/Source/Core/Core/Src/Plugins/Plugin_Video.h +++ b/Source/Core/Core/Src/Plugins/Plugin_Video.h @@ -38,7 +38,7 @@ typedef void (__cdecl* TDllDebugger)(HWND); typedef void (__cdecl* TVideo_Initialize)(SVideoInitialize*); typedef void (__cdecl* TVideo_Prepare)(); typedef void (__cdecl* TVideo_Shutdown)(); -typedef void (__cdecl* TVideo_SendFifoData)(u8*); +typedef void (__cdecl* TVideo_SendFifoData)(u8*,u32); typedef void (__cdecl* TVideo_UpdateXFB)(u8*, u32, u32, s32); typedef BOOL (__cdecl* TVideo_Screenshot)(TCHAR*); typedef void (__cdecl* TVideo_EnterLoop)(); diff --git a/Source/Core/VideoCommon/Src/Fifo.cpp b/Source/Core/VideoCommon/Src/Fifo.cpp index 3ba946b273..c1b1009d48 100644 --- a/Source/Core/VideoCommon/Src/Fifo.cpp +++ b/Source/Core/VideoCommon/Src/Fifo.cpp @@ -67,12 +67,9 @@ u8* FAKE_GetFifoEndPtr() return &videoBuffer[size]; } -void Video_SendFifoData(u8* _uData) +void Video_SendFifoData(u8* _uData, u32 len) { - // TODO (mb2): unrolled loop faster than memcpy here? - memcpy(videoBuffer + size, _uData, 32); - size += 32; - if (size + 32 >= FIFO_SIZE) + if (size + len >= FIFO_SIZE) { int pos = (int)(g_pVideoData-videoBuffer); if (size-pos > pos) @@ -83,12 +80,38 @@ void Video_SendFifoData(u8* _uData) size -= pos; g_pVideoData = FAKE_GetFifoStartPtr(); } + memcpy(videoBuffer + size, _uData, len); + size += len; OpcodeDecoder_Run(); } +// for GP watchdog hack +THREAD_RETURN GPWatchdogThread(void *pArg) +{ + const SCPFifoStruct &_fifo = *(SCPFifoStruct*)pArg; + u32 token = 0; + + Common::SetCurrentThreadName("GPWatchdogThread"); + Common::Thread::SetCurrentThreadAffinity(2); //Force to second core + //int i=30*1000/16; + while (1) + { + Common::SleepCurrentThread(8); // about 1s/60/2 (less than twice a frame should be enough) + //if (!_fifo.bFF_GPReadIdle) + + // TODO (mb2): FIX this !!! + //if (token == _fifo.Fake_GPWDToken) + { + InterlockedExchange((LONG*)&_fifo.Fake_GPWDInterrupt, 1); + //__Log(LogTypes::VIDEO,"!!! Watchdog hit",_fifo.CPReadWriteDistance); + } + token = _fifo.Fake_GPWDToken; + //i--; + } + return 0; +} + -//TODO - turn inside out, have the "reader" ask for bytes instead -// See Core.cpp for threading idea void Fifo_EnterLoop(const SVideoInitialize &video_initialize) { SCPFifoStruct &_fifo = *video_initialize.pCPFifo; @@ -97,6 +120,10 @@ void Fifo_EnterLoop(const SVideoInitialize &video_initialize) if (hEventOnIdle==NULL) PanicAlert("Fifo_EnterLoop() -> EventOnIdle NULL"); #endif + // for GP watchdog hack + Common::Thread *watchdogThread = NULL; + watchdogThread = new Common::Thread(GPWatchdogThread, (void*)&_fifo); + #ifdef _WIN32 // TODO(ector): Don't peek so often! while (video_initialize.pPeekMessages()) @@ -123,7 +150,7 @@ void Fifo_EnterLoop(const SVideoInitialize &video_initialize) #if defined(THREAD_VIDEO_WAKEUP_ONIDLE) && defined(_WIN32) while(_fifo.CPReadWriteDistance > 0) #else - while(_fifo.bFF_GPReadEnable && (_fifo.CPReadWriteDistance > 0) )// && count) + while(_fifo.bFF_GPReadEnable && (_fifo.CPReadWriteDistance > 0) ) #endif { // check if we are on a breakpoint @@ -144,28 +171,27 @@ void Fifo_EnterLoop(const SVideoInitialize &video_initialize) // read the data and send it to the VideoPlugin u8 *uData = video_initialize.pGetMemoryPointer(_fifo.CPReadPointer); -#ifdef _WIN32 - //EnterCriticalSection(&_fifo.sync); -#else - // _fifo.sync->Enter(); -#endif - Video_SendFifoData(uData); - // increase the ReadPtr - u32 readPtr = _fifo.CPReadPointer+32; - if (readPtr >= _fifo.CPEnd) - readPtr = _fifo.CPBase; + + u32 dist = _fifo.CPReadWriteDistance; + u32 readPtr = _fifo.CPReadPointer; + if ( (dist+readPtr) > _fifo.CPEnd) // TODO: better + { + dist =_fifo.CPEnd - readPtr; + readPtr = _fifo.CPBase; + } + else + readPtr += dist; + + Video_SendFifoData(uData, dist); #ifdef _WIN32 InterlockedExchange((LONG*)&_fifo.CPReadPointer, readPtr); - InterlockedExchangeAdd((LONG*)&_fifo.CPReadWriteDistance, -32); - //LeaveCriticalSection(&_fifo.sync); + InterlockedExchangeAdd((LONG*)&_fifo.CPReadWriteDistance, -dist); #else Common::InterlockedExchange((int*)&_fifo.CPReadPointer, readPtr); - Common::InterlockedExchangeAdd((int*)&_fifo.CPReadWriteDistance, -32); - // _fifo.sync->Leave(); + Common::InterlockedExchangeAdd((int*)&_fifo.CPReadWriteDistance, -dist); #endif - } + } } - } #if defined(THREAD_VIDEO_WAKEUP_ONIDLE) && defined(_WIN32) CloseHandle(hEventOnIdle); diff --git a/Source/PluginSpecs/pluginspecs_video.h b/Source/PluginSpecs/pluginspecs_video.h index 086916b4c7..6bd9972f56 100644 --- a/Source/PluginSpecs/pluginspecs_video.h +++ b/Source/PluginSpecs/pluginspecs_video.h @@ -39,11 +39,10 @@ typedef struct volatile BOOL bFF_BPEnable; volatile BOOL bFF_GPLinkEnable; volatile BOOL bFF_Breakpoint; -#ifdef _WIN32 -// CRITICAL_SECTION sync; -#else - Common::CriticalSection *sync; -#endif + + // for GP watchdog hack + volatile BOOL Fake_GPWDInterrupt; + volatile u32 Fake_GPWDToken; // cicular incrementer } SCPFifoStruct; typedef struct @@ -137,7 +136,7 @@ EXPORT void CALL Video_Shutdown(void); // input: a data-byte (i know we have to optimize this ;-)) // output: none // -EXPORT void CALL Video_SendFifoData(u8* _uData); +EXPORT void CALL Video_SendFifoData(u8* _uData, u32 len); // __________________________________________________________________________________________________ // Function: Video_UpdateXFB