Refactor the framedata/GPU thread management.

2024-11-22 21:09:52 +00:00 · 2022-10-03 15:56:30 +02:00 · 2022-10-03 15:56:30 +02:00 · b62572a78f
commit b62572a78f
parent a410ea5535
11 changed files with 130 additions and 221 deletions
--- a/Common/GPU/OpenGL/GLQueueRunner.h
+++ b/Common/GPU/OpenGL/GLQueueRunner.h
@ -2,6 +2,7 @@

 #include <cstdint>
 #include <vector>
+#include <set>
 #include <unordered_map>

 #include "Common/GPU/OpenGL/GLCommon.h"
--- a/Common/GPU/OpenGL/GLRenderManager.cpp
+++ b/Common/GPU/OpenGL/GLRenderManager.cpp
@ -112,6 +112,8 @@ void GLDeleter::Perform(GLRenderManager *renderManager, bool skipGLCalls) {
 }

 GLRenderManager::~GLRenderManager() {
+	_dbg_assert_(!run_);
+
 	for (int i = 0; i < MAX_INFLIGHT_FRAMES; i++) {
 		_assert_(frameData_[i].deleter.IsEmpty());
 		_assert_(frameData_[i].deleter_prev.IsEmpty());
@ -123,7 +125,6 @@ GLRenderManager::~GLRenderManager() {

 void GLRenderManager::ThreadStart(Draw::DrawContext *draw) {
 	queueRunner_.CreateDeviceObjects();
-	threadFrame_ = threadInitFrame_;
 	renderThreadId = std::this_thread::get_id();

 	if (newInflightFrames_ != -1) {
@ -170,7 +171,6 @@ void GLRenderManager::ThreadEnd() {
 	INFO_LOG(G3D, "ThreadEnd");

 	// Wait for any shutdown to complete in StopThread().
-	std::unique_lock<std::mutex> lock(mutex_);
 	queueRunner_.DestroyDeviceObjects();
 	VLOG("PULL: Quitting");

@ -179,14 +179,8 @@ void GLRenderManager::ThreadEnd() {
 		// Since we're in shutdown, we should skip the GL calls on Android.
 		frameData_[i].deleter.Perform(this, skipGLCalls_);
 		frameData_[i].deleter_prev.Perform(this, skipGLCalls_);
-		for (int j = 0; j < (int)frameData_[i].steps.size(); j++) {
-			delete frameData_[i].steps[j];
-		}
-		frameData_[i].steps.clear();
-		frameData_[i].initSteps.clear();
 	}
 	deleter_.Perform(this, skipGLCalls_);
-
 	for (int i = 0; i < (int)steps_.size(); i++) {
 		delete steps_[i];
 	}
@ -194,102 +188,50 @@ void GLRenderManager::ThreadEnd() {
 	initSteps_.clear();
 }

+// Unlike in Vulkan, this isn't a full independent function, instead it gets called every frame.
+// This also mean that we can poll the renderThreadQueue using a regular mutex, instead of
+// blocking on it using a condition variable.
 bool GLRenderManager::ThreadFrame() {
-	std::unique_lock<std::mutex> lock(mutex_);
 	if (!run_)
 		return false;

-	// In case of syncs or other partial completion, we keep going until we complete a frame.
-	do {
-		if (nextFrame) {
-			threadFrame_++;
-			if (threadFrame_ >= inflightFrames_)
-				threadFrame_ = 0;
-		}
-		FrameData &frameData = frameData_[threadFrame_];
-		{
-			std::unique_lock<std::mutex> lock(frameData.pull_mutex);
-			while (!frameData.readyForRun && run_) {
-				VLOG("PULL: Waiting for frame[%d].readyForRun", threadFrame_);
-				frameData.pull_condVar.wait(lock);
-			}
-			if (!frameData.readyForRun && !run_) {
-				// This means we're out of frames to render and run_ is false, so bail.
-				return false;
-			}
-			VLOG("PULL: Setting frame[%d].readyForRun = false", threadFrame_);
-			frameData.readyForRun = false;
-			frameData.deleter_prev.Perform(this, skipGLCalls_);
-			frameData.deleter_prev.Take(frameData.deleter);
-			// Previously we had a quick exit here that avoided calling Run() if run_ was suddenly false,
-			// but that created a race condition where frames could end up not finished properly on resize etc.
+	GLRRenderThreadTask task;

-			// Only increment next time if we're done.
-			nextFrame = frameData.type == GLRRunType::END;
-			_assert_(frameData.type == GLRRunType::END || frameData.type == GLRRunType::SYNC);
+	// In case of syncs or other partial completion, we keep going until we complete a frame.
+	while (true) {
+		// Pop a task of the queue and execute it.
+		{
+			std::unique_lock<std::mutex> lock(pushMutex_);
+			if (renderThreadQueue_.empty()) {
+				break;
+			}
+			task = renderThreadQueue_.front();
+			renderThreadQueue_.pop();
 		}
-		VLOG("PULL: Running frame %d", threadFrame_);
-		if (firstFrame) {
-			INFO_LOG(G3D, "Running first frame (%d)", threadFrame_);
-			firstFrame = false;
+
+		// We got a task! We can now have pushMutex_ unlocked, allowing the host to
+		// push more work when it feels like it, and just start working.
+		if (task.runType == GLRRunType::EXIT) {
+			// Oh, host wanted out. Let's leave, and also let's notify the host.
+			// This is unlike Vulkan too which can just block on the thread existing.
+			std::unique_lock<std::mutex> lock(syncMutex_);
+			syncCondVar_.notify_one();
+			syncDone_ = true;
+			break;
 		}

 		// Render the scene.
-		Run(threadFrame_);
-
-		VLOG("PULL: Finished frame %d", threadFrame_);
-	} while (!nextFrame);
+		Run(task);
+	};

 	return true;
 }

 void GLRenderManager::StopThread() {
-	// Since we don't control the thread directly, this will only pause the thread.
+	// There's not really a lot to do here anymore.

 	if (run_) {
 		run_ = false;
-		for (int i = 0; i < MAX_INFLIGHT_FRAMES; i++) {
-			auto &frameData = frameData_[i];
-			{
-				std::unique_lock<std::mutex> lock(frameData.push_mutex);
-				frameData.push_condVar.notify_all();
-			}
-			{
-				std::unique_lock<std::mutex> lock(frameData.pull_mutex);
-				frameData.pull_condVar.notify_all();
-			}
-		}
-
-		// Wait until we've definitely stopped the threadframe.
-		std::unique_lock<std::mutex> lock(mutex_);
-
-		INFO_LOG(G3D, "GL submission thread paused. Frame=%d", curFrame_);
-
-		// Eat whatever has been queued up for this frame if anything.
-		Wipe();
-
-		// Wait for any fences to finish and be resignaled, so we don't have sync issues.
-		// Also clean out any queued data, which might refer to things that might not be valid
-		// when we restart...
-		for (int i = 0; i < MAX_INFLIGHT_FRAMES; i++) {
-			auto &frameData = frameData_[i];
-			std::unique_lock<std::mutex> lock(frameData.push_mutex);
-			if (frameData.readyForRun || frameData.steps.size() != 0) {
-				Crash();
-			}
-			frameData.readyForRun = false;
-			frameData.readyForSubmit = false;
-			for (size_t i = 0; i < frameData.steps.size(); i++) {
-				delete frameData.steps[i];
-			}
-			frameData.steps.clear();
-			frameData.initSteps.clear();
-
-			while (!frameData.readyForFence) {
-				VLOG("PUSH: Waiting for frame[%d].readyForFence = 1 (stop)", i);
-				frameData.push_condVar.wait(lock);
-			}
-		}
 	} else {
 		INFO_LOG(G3D, "GL submission thread was already paused.");
 	}
@ -452,58 +394,54 @@ void GLRenderManager::CopyImageToMemorySync(GLRTexture *texture, int mipLevel, i
 void GLRenderManager::BeginFrame() {
 	VLOG("BeginFrame");

+	// TODO: Why is this debug-only?
 #ifdef _DEBUG
 	curProgram_ = nullptr;
 #endif

 	int curFrame = GetCurFrame();
-	FrameData &frameData = frameData_[curFrame];

-	// Make sure the very last command buffer from the frame before the previous has been fully executed.
+	FrameData &frameData = frameData_[curFrame_];
 	{
-		std::unique_lock<std::mutex> lock(frameData.push_mutex);
+		std::unique_lock<std::mutex> lock(frameData.fenceMutex);
 		while (!frameData.readyForFence) {
-			VLOG("PUSH: Waiting for frame[%d].readyForFence = 1", curFrame);
-			frameData.push_condVar.wait(lock);
+			frameData.fenceCondVar.wait(lock);
 		}
 		frameData.readyForFence = false;
-		frameData.readyForSubmit = true;
 	}

-	VLOG("PUSH: Fencing %d", curFrame);
-
-	// glFenceSync(&frameData.fence...)
-
 	// Must be after the fence - this performs deletes.
 	VLOG("PUSH: BeginFrame %d", curFrame);
+
 	if (!run_) {
 		WARN_LOG(G3D, "BeginFrame while !run_!");
 	}

-	// vulkan_->BeginFrame();
-	// In GL, we have to do deletes on the submission thread.
-
 	insideFrame_ = true;
 }

 void GLRenderManager::Finish() {
 	curRenderStep_ = nullptr;
+
 	int curFrame = GetCurFrame();
 	FrameData &frameData = frameData_[curFrame];
 	{
-		std::unique_lock<std::mutex> lock(frameData.pull_mutex);
-		VLOG("PUSH: Frame[%d].readyForRun = true, notifying pull", curFrame);
-		frameData.steps = std::move(steps_);
-		steps_.clear();
-		frameData.initSteps = std::move(initSteps_);
-		initSteps_.clear();
-		frameData.readyForRun = true;
-		frameData.type = GLRRunType::END;
-		frameData_[curFrame_].deleter.Take(deleter_);
-	}
+		VLOG("PUSH: Frame[%d]", curFrame);
+		GLRRenderThreadTask task;
+		task.frame = curFrame;
+		task.runType = GLRRunType::END;

-	// Notify calls do not in fact need to be done with the mutex locked.
-	frameData.pull_condVar.notify_all();
+		frameData_[curFrame_].deleter.Take(deleter_);
+
+		std::unique_lock<std::mutex> lock(pushMutex_);
+		renderThreadQueue_.push(task);
+		renderThreadQueue_.back().initSteps = std::move(initSteps_);
+		renderThreadQueue_.back().steps = std::move(steps_);
+		initSteps_.clear();
+		steps_.clear();
+
+		pushCondVar_.notify_one();
+	}

 	curFrame_++;
 	if (curFrame_ >= inflightFrames_)
@ -513,17 +451,17 @@ void GLRenderManager::Finish() {
 }

 // Render thread
-void GLRenderManager::Run(int frame) {
-	FrameData &frameData = frameData_[frame];
+void GLRenderManager::Run(GLRRenderThreadTask &task) {
+	FrameData &frameData = frameData_[task.frame];
 	if (!frameData.hasBegun) {
 		frameData.hasBegun = true;
+
+		frameData.deleter_prev.Perform(this, skipGLCalls_);
+		frameData.deleter_prev.Take(frameData.deleter);
 	}

-	auto &stepsOnThread = frameData_[frame].steps;
-	auto &initStepsOnThread = frameData_[frame].initSteps;
 	// queueRunner_.LogSteps(stepsOnThread);
-	queueRunner_.RunInitSteps(initStepsOnThread, skipGLCalls_);
-	initStepsOnThread.clear();
+	queueRunner_.RunInitSteps(task.initSteps, skipGLCalls_);

 	// Run this after RunInitSteps so any fresh GLRBuffers for the pushbuffers can get created.
 	if (!skipGLCalls_) {
@ -537,13 +475,12 @@ void GLRenderManager::Run(int frame) {
 		int passes = GetVRPassesCount();
 		for (int i = 0; i < passes; i++) {
 			PreVRFrameRender(i);
-			queueRunner_.RunSteps(stepsOnThread, skipGLCalls_, i < passes - 1, true);
+			queueRunner_.RunSteps(task.steps, skipGLCalls_, i < passes - 1, true);
 			PostVRFrameRender();
 		}
 	} else {
-		queueRunner_.RunSteps(stepsOnThread, skipGLCalls_, false, false);
+		queueRunner_.RunSteps(task.steps, skipGLCalls_, false, false);
 	}
-	stepsOnThread.clear();

 	if (!skipGLCalls_) {
 		for (auto iter : frameData.activePushBuffers) {
@ -551,19 +488,16 @@ void GLRenderManager::Run(int frame) {
 		}
 	}

-	switch (frameData.type) {
+	switch (task.runType) {
 	case GLRRunType::END:
 		frameData.hasBegun = false;

 		VLOG("PULL: Frame %d.readyForFence = true", frame);
-
 		{
-			std::unique_lock<std::mutex> lock(frameData.push_mutex);
-			_assert_(frameData.readyForSubmit);
+			std::lock_guard<std::mutex> lock(frameData.fenceMutex);
 			frameData.readyForFence = true;
-			frameData.readyForSubmit = false;
-			frameData.push_condVar.notify_all();
-		}
+			frameData.fenceCondVar.notify_one();
+		}{}

 		if (!frameData.skipSwap) {
 			if (swapIntervalChanged_) {
@ -572,6 +506,7 @@ void GLRenderManager::Run(int frame) {
 					swapIntervalFunction_(swapInterval_);
 				}
 			}
+			// This is the swapchain framebuffer flip.
 			if (swapFunction_) {
 				swapFunction_();
 			}
@ -581,13 +516,14 @@ void GLRenderManager::Run(int frame) {
 		break;

 	case GLRRunType::SYNC:
+		frameData.hasBegun = false;
+
 		// glFinish is not actually necessary here, and won't be unless we start using
 		// glBufferStorage. Then we need to use fences.
 		{
-			std::unique_lock<std::mutex> lock(frameData.push_mutex);
-			frameData.readyForFence = true;
-			frameData.readyForSubmit = true;
-			frameData.push_condVar.notify_all();
+			std::unique_lock<std::mutex> lock(syncMutex_);
+			syncDone_ = true;
+			syncCondVar_.notify_one();
 		}
 		break;

@ -599,51 +535,29 @@ void GLRenderManager::Run(int frame) {
 }

 void GLRenderManager::FlushSync() {
-	int curFrame = curFrame_;
-	FrameData &frameData = frameData_[curFrame];
 	{
-		std::unique_lock<std::mutex> lock(frameData.pull_mutex);
 		VLOG("PUSH: Frame[%d].readyForRun = true (sync)", curFrame);
-		frameData.initSteps = std::move(initSteps_);
-		initSteps_.clear();
-		frameData.steps = std::move(steps_);
+
+		GLRRenderThreadTask task;
+		task.frame = curFrame_;
+		task.runType = GLRRunType::SYNC;
+
+		std::unique_lock<std::mutex> lock(pushMutex_);
+		renderThreadQueue_.push(task);
+		renderThreadQueue_.back().initSteps = std::move(initSteps_);
+		renderThreadQueue_.back().steps = std::move(steps_);
+		pushCondVar_.notify_one();
 		steps_.clear();
-		frameData.readyForRun = true;
-		_assert_(frameData.readyForFence == false);
-		frameData.type = GLRRunType::SYNC;
-		frameData.pull_condVar.notify_all();
 	}
+
 	{
-		std::unique_lock<std::mutex> lock(frameData.push_mutex);
+		std::unique_lock<std::mutex> lock(syncMutex_);
 		// Wait for the flush to be hit, since we're syncing.
-		while (!frameData.readyForFence) {
+		while (!syncDone_) {
 			VLOG("PUSH: Waiting for frame[%d].readyForFence = 1 (sync)", curFrame);
-			frameData.push_condVar.wait(lock);
-		}
-		frameData.readyForFence = false;
-		frameData.readyForSubmit = true;
-	}
-}
-
-void GLRenderManager::Wipe() {
-	initSteps_.clear();
-	for (auto step : steps_) {
-		delete step;
-	}
-	steps_.clear();
-}
-
-void GLRenderManager::WaitUntilQueueIdle() {
-	// Just wait for all frames to be ready.
-	for (int i = 0; i < MAX_INFLIGHT_FRAMES; i++) {
-		FrameData &frameData = frameData_[i];
-
-		std::unique_lock<std::mutex> lock(frameData.push_mutex);
-		// Ignore unsubmitted frames.
-		while (!frameData.readyForFence && frameData.readyForRun) {
-			VLOG("PUSH: Waiting for frame[%d].readyForFence = 1 (wait idle)", i);
-			frameData.push_condVar.wait(lock);
+			syncCondVar_.wait(lock);
 		}
+		syncDone_ = false;
 	}
 }

--- a/Common/GPU/OpenGL/GLRenderManager.h
+++ b/Common/GPU/OpenGL/GLRenderManager.h
@ -7,6 +7,7 @@
 #include <set>
 #include <string>
 #include <mutex>
+#include <queue>
 #include <condition_variable>

 #include "Common/GPU/OpenGL/GLCommon.h"
@ -349,11 +350,29 @@ private:
 	GLBufferStrategy strategy_ = GLBufferStrategy::SUBDATA;
 };

+class GLRInputLayout {
+public:
+	struct Entry {
+		int location;
+		int count;
+		GLenum type;
+		GLboolean normalized;
+		int stride;
+		intptr_t offset;
+	};
+	std::vector<Entry> entries;
+	int semanticsMask_ = 0;
+};
+
 enum class GLRRunType {
 	END,
 	SYNC,
+	EXIT,
 };

+class GLRenderManager;
+class GLPushBuffer;
+
 class GLDeleter {
 public:
 	void Perform(GLRenderManager *renderManager, bool skipGLCalls);
@ -373,18 +392,14 @@ public:
 	std::vector<GLPushBuffer *> pushBuffers;
 };

-class GLRInputLayout {
-public:
-	struct Entry {
-		int location;
-		int count;
-		GLenum type;
-		GLboolean normalized;
-		int stride;
-		intptr_t offset;
-	};
-	std::vector<Entry> entries;
-	int semanticsMask_ = 0;
+// These are enqueued from the main thread,
+// and the render thread pops them off
+struct GLRRenderThreadTask {
+	std::vector<GLRStep *> steps;
+	std::vector<GLRInitStep> initSteps;
+
+	int frame;
+	GLRRunType runType;
 };

 // Note: The GLRenderManager is created and destroyed on the render thread, and the latter
@ -416,13 +431,7 @@ public:
 	void BeginFrame();
 	// Can run on a different thread!
 	void Finish();
-	void Run(int frame);
-
-	// Zaps queued up commands. Use if you know there's a risk you've queued up stuff that has already been deleted. Can happen during in-game shutdown.
-	void Wipe();
-
-	// Wait until no frames are pending.  Use during shutdown before freeing pointers.
-	void WaitUntilQueueIdle();
+	void Run(GLRRenderThreadTask &task);

 	// Creation commands. These were not needed in Vulkan since there we can do that on the main thread.
 	// We pass in width/height here even though it's not strictly needed until we support glTextureStorage
@ -1027,26 +1036,14 @@ private:

 	// Per-frame data, round-robin so we can overlap submission with execution of the previous frame.
 	struct FrameData {
-		std::mutex push_mutex;
-		std::condition_variable push_condVar;
-
-		std::mutex pull_mutex;
-		std::condition_variable pull_condVar;
-
-		bool readyForFence = true;
-		bool readyForRun = false;
-		bool readyForSubmit = false;
-
 		bool skipSwap = false;
-		GLRRunType type = GLRRunType::END;

-		// GLuint fence; For future AZDO stuff?
-		std::vector<GLRStep *> steps;
-		std::vector<GLRInitStep> initSteps;
+		std::mutex fenceMutex;
+		std::condition_variable fenceCondVar;
+		bool readyForFence = true;

 		// Swapchain.
 		bool hasBegun = false;
-		uint32_t curSwapchainImage = -1;

 		GLDeleter deleter;
 		GLDeleter deleter_prev;
@ -1064,16 +1061,23 @@ private:

 	// Execution time state
 	bool run_ = true;
+
 	// Thread is managed elsewhere, and should call ThreadFrame.
-	std::mutex mutex_;
-	int threadInitFrame_ = 0;
 	GLQueueRunner queueRunner_;

-	// Thread state
-	int threadFrame_ = -1;
+	// For pushing data on the queue.
+	std::mutex pushMutex_;
+	std::condition_variable pushCondVar_;

-	bool nextFrame = false;
-	bool firstFrame = true;
+	std::queue<GLRRenderThreadTask> renderThreadQueue_;
+
+	// For readbacks and other reasons we need to sync with the render thread.
+	std::mutex syncMutex_;
+	std::condition_variable syncCondVar_;
+
+	bool firstFrame_ = true;
+	bool vrRenderStarted_ = false;
+	bool syncDone_ = false;

 	GLDeleter deleter_;
 	bool skipGLCalls_ = false;
--- a/Common/GPU/Vulkan/VulkanRenderManager.cpp
+++ b/Common/GPU/Vulkan/VulkanRenderManager.cpp
@ -1327,7 +1327,7 @@ void VulkanRenderManager::FlushSync() {
 		std::unique_lock<std::mutex> lock(syncMutex_);
 		// Wait for the flush to be hit, since we're syncing.
 		while (!frameData.syncDone) {
-			VLOG("PUSH: Waiting for frame[%d].readyForFence = 1 (sync)", curFrame);
+			VLOG("PUSH: Waiting for frame[%d].syncDone = 1 (sync)", curFrame);
 			syncCondVar_.wait(lock);
 		}
 		frameData.syncDone = false;
--- a/GPU/GLES/GPU_GLES.cpp
+++ b/GPU/GLES/GPU_GLES.cpp
@ -128,11 +128,6 @@ GPU_GLES::GPU_GLES(GraphicsContext *gfxCtx, Draw::DrawContext *draw)
 }

 GPU_GLES::~GPU_GLES() {
-	if (draw_) {
-		GLRenderManager *render = (GLRenderManager *)draw_->GetNativeObject(Draw::NativeObject::RENDER_MANAGER);
-		render->Wipe();
-	}
-
 	// If we're here during app shutdown (exiting the Windows app in-game, for example)
 	// everything should already be cleared since DeviceLost has been run.

--- a/Qt/QtMain.h
+++ b/Qt/QtMain.h
@ -90,7 +90,6 @@ public:
 	}

 	void StopThread() override {
-		renderManager_->WaitUntilQueueIdle();
 		renderManager_->StopThread();
 	}

--- a/SDL/SDLGLGraphicsContext.h
+++ b/SDL/SDLGLGraphicsContext.h
@ -42,7 +42,6 @@ public:
 	}

 	void StopThread() override {
-		renderManager_->WaitUntilQueueIdle();
 		renderManager_->StopThread();
 	}

--- a/Windows/GPU/WindowsGLContext.cpp
+++ b/Windows/GPU/WindowsGLContext.cpp
@ -500,6 +500,5 @@ void WindowsGLContext::ThreadEnd() {
 }

 void WindowsGLContext::StopThread() {
-	renderManager_->WaitUntilQueueIdle();
 	renderManager_->StopThread();
 }
--- a/Windows/PPSSPP.vcxproj.filters
+++ b/Windows/PPSSPP.vcxproj.filters
@ -820,4 +820,4 @@
      <Filter>Other Platforms\SDL</Filter>
    </Text>
  </ItemGroup>
-</Project>
+</Project>
--- a/headless/SDLHeadlessHost.cpp
+++ b/headless/SDLHeadlessHost.cpp
@ -83,7 +83,6 @@ public:

 	void StopThread() override {
 		if (renderManager_) {
-			renderManager_->WaitUntilQueueIdle();
 			renderManager_->StopThread();
 		}
 	}
--- a/libretro/LibretroGLContext.h
+++ b/libretro/LibretroGLContext.h
@ -28,7 +28,6 @@ public:
 	bool ThreadFrame() override { return renderManager_->ThreadFrame(); }
 	void ThreadEnd() override { renderManager_->ThreadEnd(); }
 	void StopThread() override {
-		renderManager_->WaitUntilQueueIdle();
 		renderManager_->StopThread();
 	}