Add a simple message event system to the GE.

With this, the CPU thread feature now mostly works.
2025-02-03 12:12:06 +00:00 · 2013-08-07 23:59:32 -07:00 · 2013-08-07 23:59:32 -07:00 · 02e301e5fe
commit 02e301e5fe
parent 52ca93aab2
7 changed files with 169 additions and 19 deletions
--- a/Core/System.cpp
+++ b/Core/System.cpp
@ -274,8 +274,11 @@ void PSP_RunLoopUntil(u64 globalticks) {
 	if (g_Config.bUseCPUThread) {
 		cpuThreadUntil = globalticks;
 		if (CPU_NextState(CPU_THREAD_RUNNING, CPU_THREAD_EXECUTE)) {
-			// TODO: Run GPU here.
-			CPU_WaitStatus(&CPU_IsReady);
+			// The CPU doesn't actually respect cpuThreadUntil well, especially when skipping frames.
+			// TODO: Something smarter?  Or force CPU to bail periodically?
+			while (!CPU_IsReady()) {
+				gpu->RunEventsUntil(CoreTiming::GetTicks() + msToCycles(100));
+			}
 		} else {
 			ERROR_LOG(CPU, "Unable to execute CPU run loop, unexpected state: %d", cpuThreadState);
 		}
--- a/GPU/GLES/DisplayListInterpreter.cpp
+++ b/GPU/GLES/DisplayListInterpreter.cpp
@ -244,6 +244,10 @@ void GLES_GPU::DeviceLost() {
 }

 void GLES_GPU::InitClear() {
+	ScheduleEvent(GPU_EVENT_INIT_CLEAR);
+}
+
+void GLES_GPU::InitClearInternal() {
 	bool useBufferedRendering = g_Config.iRenderingMode != 0 ? 1 : 0;
 	if (!useBufferedRendering) {
 		glstate.depthWrite.set(GL_TRUE);
@ -259,6 +263,10 @@ void GLES_GPU::DumpNextFrame() {
 }

 void GLES_GPU::BeginFrame() {
+	ScheduleEvent(GPU_EVENT_BEGIN_FRAME);
+}
+
+void GLES_GPU::BeginFrameInternal() {
 	// Turn off vsync when unthrottled
 	int desiredVSyncInterval = g_Config.bVSync ? 1 : 0;
 	if ((PSP_CoreParameter().unthrottle) || (PSP_CoreParameter().fpsLimit == 1))
@ -303,6 +311,10 @@ bool GLES_GPU::FramebufferDirty() {
 }

 void GLES_GPU::CopyDisplayToOutput() {
+	ScheduleEvent(GPU_EVENT_COPY_DISPLAY_TO_OUTPUT);
+}
+
+void GLES_GPU::CopyDisplayToOutputInternal() {
 	glstate.depthWrite.set(GL_TRUE);
 	glstate.colorMask.set(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);

@ -342,6 +354,33 @@ void GLES_GPU::FastRunLoop(DisplayList &list) {
 	}
 }

+void GLES_GPU::ProcessEvent(GPUEvent ev) {
+	switch (ev.type) {
+	case GPU_EVENT_INIT_CLEAR:
+		InitClearInternal();
+		break;
+
+	case GPU_EVENT_BEGIN_FRAME:
+		BeginFrameInternal();
+		break;
+
+	case GPU_EVENT_COPY_DISPLAY_TO_OUTPUT:
+		CopyDisplayToOutputInternal();
+		break;
+
+	case GPU_EVENT_INVALIDATE_CACHE:
+		InvalidateCacheInternal(ev.invalidate_cache.addr, ev.invalidate_cache.size, ev.invalidate_cache.type);
+		break;
+
+	case GPU_EVENT_FLUSH:
+		FlushInternal();
+		break;
+
+	default:
+		ERROR_LOG(G3D, "Unexpected GPU event type: %d", ev);
+	}
+}
+
 inline void GLES_GPU::CheckFlushOp(int cmd, u32 diff) {
 	if (flushBeforeCommand_[cmd] == 1 || (diff && flushBeforeCommand_[cmd] == 2))
 	{
@ -1055,6 +1094,14 @@ void GLES_GPU::DoBlockTransfer() {
 }

 void GLES_GPU::InvalidateCache(u32 addr, int size, GPUInvalidationType type) {
+	GPUEvent ev(GPU_EVENT_INVALIDATE_CACHE);
+	ev.invalidate_cache.addr = addr;
+	ev.invalidate_cache.size = size;
+	ev.invalidate_cache.type = type;
+	ScheduleEvent(ev);
+}
+
+void GLES_GPU::InvalidateCacheInternal(u32 addr, int size, GPUInvalidationType type) {
 	if (size > 0)
 		textureCache_.Invalidate(addr, size, type);
 	else
@ -1074,6 +1121,10 @@ void GLES_GPU::ClearCacheNextFrame() {


 void GLES_GPU::Flush() {
+	ScheduleEvent(GPU_EVENT_FLUSH);
+}
+
+void GLES_GPU::FlushInternal() {
 	transformDraw_.Flush();
 }

--- a/GPU/GLES/DisplayListInterpreter.h
+++ b/GPU/GLES/DisplayListInterpreter.h
@ -67,12 +67,18 @@ public:

 protected:
 	virtual void FastRunLoop(DisplayList &list);
+	virtual void ProcessEvent(GPUEvent ev);

 private:
 	void DoBlockTransfer();
 	void ApplyDrawState(int prim);
 	void CheckFlushOp(int cmd, u32 diff);
 	void BuildReportingInfo();
+	void InitClearInternal();
+	void BeginFrameInternal();
+	void CopyDisplayToOutputInternal();
+	void InvalidateCacheInternal(u32 addr, int size, GPUInvalidationType type);
+	void FlushInternal();

 	FramebufferManager framebufferManager_;
 	TextureCache textureCache_;
--- a/GPU/GPUCommon.cpp
+++ b/GPU/GPUCommon.cpp
@ -470,8 +470,7 @@ void GPUCommon::SlowRunLoop(DisplayList &list)
 }

 // The newPC parameter is used for jumps, we don't count cycles between.
-inline void GPUCommon::UpdatePC(u32 currentPC, u32 newPC)
-{
+inline void GPUCommon::UpdatePC(u32 currentPC, u32 newPC) {
 	// Rough estimate, 2 CPU ticks (it's double the clock rate) per GPU instruction.
 	cyclesExecuted += 2 * (currentPC - cycleLastPC) / 4;
 	gpuStats.otherGPUCycles += 2 * (currentPC - cycleLastPC) / 4;
@ -481,8 +480,11 @@ inline void GPUCommon::UpdatePC(u32 currentPC, u32 newPC)
 	downcount = 0;
 }

-void GPUCommon::ReapplyGfxState()
-{
+void GPUCommon::ReapplyGfxState() {
+	ScheduleEvent(GPU_EVENT_REAPPLY_GFX_STATE);
+}
+
+void GPUCommon::ReapplyGfxStateInternal() {
 	// ShaderManager_DirtyShader();
 	// The commands are embedded in the command memory so we can just reexecute the words. Convenient.
 	// To be safe we pass 0xFFFFFFF as the diff.
@ -498,36 +500,82 @@ void GPUCommon::ReapplyGfxState()
 	ExecuteOp(gstate.cmdmem[GE_CMD_SCISSOR2], 0xFFFFFFFF);
 	*/

-	for (int i = GE_CMD_VERTEXTYPE; i < GE_CMD_BONEMATRIXNUMBER; i++)
-	{
-		if (i != GE_CMD_ORIGIN)
+	for (int i = GE_CMD_VERTEXTYPE; i < GE_CMD_BONEMATRIXNUMBER; i++) {
+		if (i != GE_CMD_ORIGIN) {
 			ExecuteOp(gstate.cmdmem[i], 0xFFFFFFFF);
+		}
 	}

 	// Can't write to bonematrixnumber here

-	for (int i = GE_CMD_MORPHWEIGHT0; i < GE_CMD_PATCHFACING; i++)
-	{
+	for (int i = GE_CMD_MORPHWEIGHT0; i < GE_CMD_PATCHFACING; i++) {
 		ExecuteOp(gstate.cmdmem[i], 0xFFFFFFFF);
 	}

 	// There are a few here in the middle that we shouldn't execute...

-	for (int i = GE_CMD_VIEWPORTX1; i < GE_CMD_TRANSFERSTART; i++)
-	{
+	for (int i = GE_CMD_VIEWPORTX1; i < GE_CMD_TRANSFERSTART; i++) {
 		ExecuteOp(gstate.cmdmem[i], 0xFFFFFFFF);
 	}

 	// TODO: there's more...
 }

-inline void GPUCommon::UpdateState(GPUState state)
-{
+inline void GPUCommon::UpdateState(GPUState state) {
 	gpuState = state;
 	if (state != GPUSTATE_RUNNING)
 		downcount = 0;
 }

+GPUEvent GPUCommon::GetNextEvent() {
+	lock_guard guard(eventsLock);
+	if (events.empty()) {
+		return GPU_EVENT_INVALID;
+	}
+
+	GPUEvent ev = events.front();
+	events.pop_front();
+	eventsCond.notify_one();
+	return ev;
+}
+
+void GPUCommon::ScheduleEvent(GPUEvent ev) {
+	lock_guard guard(eventsLock);
+	events.push_back(ev);
+	eventsCond.notify_one();
+
+	if (!g_Config.bUseCPUThread) {
+		RunEventsUntil(0);
+	}
+}
+
+void GPUCommon::RunEventsUntil(u64 globalticks) {
+	do {
+		for (GPUEvent ev = GetNextEvent(); ev.type != GPU_EVENT_INVALID; ev = GetNextEvent()) {
+			switch (ev.type) {
+			case GPU_EVENT_PROCESS_QUEUE:
+				ProcessDLQueueInternal();
+				break;
+
+			case GPU_EVENT_REAPPLY_GFX_STATE:
+				ReapplyGfxStateInternal();
+				break;
+
+			default:
+				ProcessEvent(ev);
+			}
+		}
+
+		// Quit the loop if the queue is drained and coreState has tripped.
+		if (coreState != CORE_RUNNING) {
+			return;
+		}
+
+		// coreState changes won't wake us, so recheck periodically.
+		eventsCond.wait_for(eventsLock, 1);
+	} while (CoreTiming::GetTicks() < globalticks);
+}
+
 int GPUCommon::GetNextListIndex() {
 	lock_guard guard(listLock);
 	auto iter = dlQueue.begin();
@ -539,19 +587,24 @@ int GPUCommon::GetNextListIndex() {
 }

 bool GPUCommon::ProcessDLQueue() {
+	ScheduleEvent(GPU_EVENT_PROCESS_QUEUE);
+	return true;
+}
+
+void GPUCommon::ProcessDLQueueInternal() {
 	startingTicks = CoreTiming::GetTicks();
 	cyclesExecuted = 0;

 	if (startingTicks < busyTicks) {
 		DEBUG_LOG(HLE, "Can't execute a list yet, still busy for %lld ticks", busyTicks - startingTicks);
-		return false;
+		return;
 	}

 	for (int listIndex = GetNextListIndex(); listIndex != -1; listIndex = GetNextListIndex()) {
 		DisplayList &l = dls[listIndex];
 		DEBUG_LOG(G3D, "Okay, starting DL execution at %08x - stall = %08x", l.pc, l.stall);
 		if (!InterpretList(l)) {
-			return false;
+			return;
 		} else {
 			lock_guard guard(listLock);
 			// At the end, we can remove it from the queue and continue.
@ -565,8 +618,6 @@ bool GPUCommon::ProcessDLQueue() {
 	drawCompleteTicks = startingTicks + cyclesExecuted;
 	busyTicks = std::max(busyTicks, drawCompleteTicks);
 	__GeTriggerSync(WAITTYPE_GEDRAWSYNC, 1, drawCompleteTicks);
-
-	return true; //no more lists!
 }

 void GPUCommon::PreExecuteOp(u32 op, u32 diff) {
--- a/GPU/GPUCommon.h
+++ b/GPU/GPUCommon.h
@ -2,6 +2,7 @@

 #include "native/base/mutex.h"
 #include "GPU/GPUInterface.h"
+#include <deque>

 class GPUCommon : public GPUInterface
 {
@ -9,6 +10,8 @@ public:
 	GPUCommon();
 	virtual ~GPUCommon() {}

+	virtual void RunEventsUntil(u64 globalticks);
+
 	virtual void InterruptStart(int listid);
 	virtual void InterruptEnd(int listid);
 	virtual void SyncEnd(WaitType waitType, int listid, bool wokeThreads);
@ -40,6 +43,11 @@ protected:
 	void PopDLQueue();
 	void CheckDrawSync();
 	int  GetNextListIndex();
+	GPUEvent GetNextEvent();
+	void ScheduleEvent(GPUEvent ev);
+	void ProcessDLQueueInternal();
+	void ReapplyGfxStateInternal();
+	virtual void ProcessEvent(GPUEvent ev) = 0;

 	typedef std::list<int> DisplayListQueue;

@ -48,6 +56,10 @@ protected:
 	DisplayListQueue dlQueue;
 	recursive_mutex listLock;

+	std::deque<GPUEvent> events;
+	recursive_mutex eventsLock;
+	condition_variable eventsCond;
+
 	bool interruptRunning;
 	GPUState gpuState;
 	bool isbreak;
--- a/GPU/GPUInterface.h
+++ b/GPU/GPUInterface.h
@ -140,6 +140,30 @@ enum GPUInvalidationType {
 	GPU_INVALIDATE_SAFE,
 };

+enum GPUEventType {
+	GPU_EVENT_INVALID,
+	GPU_EVENT_PROCESS_QUEUE,
+	GPU_EVENT_INIT_CLEAR,
+	GPU_EVENT_BEGIN_FRAME,
+	GPU_EVENT_COPY_DISPLAY_TO_OUTPUT,
+	GPU_EVENT_REAPPLY_GFX_STATE,
+	GPU_EVENT_INVALIDATE_CACHE,
+	GPU_EVENT_FLUSH,
+};
+
+struct GPUEvent {
+	GPUEvent(GPUEventType t) : type(t) {}
+	GPUEventType type;
+	union {
+		// GPU_EVENT_INVALIDATE_CACHE
+		struct {
+			u32 addr;
+			int size;
+			GPUInvalidationType type;
+		} invalidate_cache;
+	};
+};
+
 class GPUInterface
 {
 public:
@ -150,6 +174,8 @@ public:
 	// Initialization
 	virtual void InitClear() = 0;

+	virtual void RunEventsUntil(u64 globalticks) = 0;
+
 	// Draw queue management
 	virtual DisplayList* getList(int listid) = 0;
 	// TODO: Much of this should probably be shared between the different GPU implementations.
--- a/GPU/Null/NullGpu.h
+++ b/GPU/Null/NullGpu.h
@ -50,4 +50,5 @@ public:

 protected:
 	virtual void FastRunLoop(DisplayList &list);
+	virtual void ProcessEvent(GPUEvent ev) {}
 };