From b62572a78f8137e2e6c68ed40d7af2d54af3af8f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= <hrydgard@gmail.com>
Date: Mon, 3 Oct 2022 15:56:30 +0200
Subject: [PATCH] Refactor the framedata/GPU thread management.

---
 Common/GPU/OpenGL/GLQueueRunner.h         |   1 +
 Common/GPU/OpenGL/GLRenderManager.cpp     | 252 +++++++---------------
 Common/GPU/OpenGL/GLRenderManager.h       |  84 ++++----
 Common/GPU/Vulkan/VulkanRenderManager.cpp |   2 +-
 GPU/GLES/GPU_GLES.cpp                     |   5 -
 Qt/QtMain.h                               |   1 -
 SDL/SDLGLGraphicsContext.h                |   1 -
 Windows/GPU/WindowsGLContext.cpp          |   1 -
 Windows/PPSSPP.vcxproj.filters            |   2 +-
 headless/SDLHeadlessHost.cpp              |   1 -
 libretro/LibretroGLContext.h              |   1 -
 11 files changed, 130 insertions(+), 221 deletions(-)
diff --git a/Common/GPU/OpenGL/GLQueueRunner.h b/Common/GPU/OpenGL/GLQueueRunner.h
index 593e00f97b..b91648a1fa 100644
--- a/Common/GPU/OpenGL/GLQueueRunner.h
+++ b/Common/GPU/OpenGL/GLQueueRunner.h
@@ -2,6 +2,7 @@
 
 #include <cstdint>
 #include <vector>
+#include <set>
 #include <unordered_map>
 
 #include "Common/GPU/OpenGL/GLCommon.h"
diff --git a/Common/GPU/OpenGL/GLRenderManager.cpp b/Common/GPU/OpenGL/GLRenderManager.cpp
index 74a306fc01..11a337db09 100644
--- a/Common/GPU/OpenGL/GLRenderManager.cpp
+++ b/Common/GPU/OpenGL/GLRenderManager.cpp
@@ -112,6 +112,8 @@ void GLDeleter::Perform(GLRenderManager *renderManager, bool skipGLCalls) {
 }
 
 GLRenderManager::~GLRenderManager() {
+	_dbg_assert_(!run_);
+
 	for (int i = 0; i < MAX_INFLIGHT_FRAMES; i++) {
 		_assert_(frameData_[i].deleter.IsEmpty());
 		_assert_(frameData_[i].deleter_prev.IsEmpty());
@@ -123,7 +125,6 @@ GLRenderManager::~GLRenderManager() {
 
 void GLRenderManager::ThreadStart(Draw::DrawContext *draw) {
 	queueRunner_.CreateDeviceObjects();
-	threadFrame_ = threadInitFrame_;
 	renderThreadId = std::this_thread::get_id();
 
 	if (newInflightFrames_ != -1) {
@@ -170,7 +171,6 @@ void GLRenderManager::ThreadEnd() {
 	INFO_LOG(G3D, "ThreadEnd");
 
 	// Wait for any shutdown to complete in StopThread().
-	std::unique_lock<std::mutex> lock(mutex_);
 	queueRunner_.DestroyDeviceObjects();
 	VLOG("PULL: Quitting");
 
@@ -179,14 +179,8 @@ void GLRenderManager::ThreadEnd() {
 		// Since we're in shutdown, we should skip the GL calls on Android.
 		frameData_[i].deleter.Perform(this, skipGLCalls_);
 		frameData_[i].deleter_prev.Perform(this, skipGLCalls_);
-		for (int j = 0; j < (int)frameData_[i].steps.size(); j++) {
-			delete frameData_[i].steps[j];
-		}
-		frameData_[i].steps.clear();
-		frameData_[i].initSteps.clear();
 	}
 	deleter_.Perform(this, skipGLCalls_);
-
 	for (int i = 0; i < (int)steps_.size(); i++) {
 		delete steps_[i];
 	}
@@ -194,102 +188,50 @@ void GLRenderManager::ThreadEnd() {
 	initSteps_.clear();
 }
 
+// Unlike in Vulkan, this isn't a full independent function, instead it gets called every frame.
+// This also mean that we can poll the renderThreadQueue using a regular mutex, instead of
+// blocking on it using a condition variable.
 bool GLRenderManager::ThreadFrame() {
-	std::unique_lock<std::mutex> lock(mutex_);
 	if (!run_)
 		return false;
 
-	// In case of syncs or other partial completion, we keep going until we complete a frame.
-	do {
-		if (nextFrame) {
-			threadFrame_++;
-			if (threadFrame_ >= inflightFrames_)
-				threadFrame_ = 0;
-		}
-		FrameData &frameData = frameData_[threadFrame_];
-		{
-			std::unique_lock<std::mutex> lock(frameData.pull_mutex);
-			while (!frameData.readyForRun && run_) {
-				VLOG("PULL: Waiting for frame[%d].readyForRun", threadFrame_);
-				frameData.pull_condVar.wait(lock);
-			}
-			if (!frameData.readyForRun && !run_) {
-				// This means we're out of frames to render and run_ is false, so bail.
-				return false;
-			}
-			VLOG("PULL: Setting frame[%d].readyForRun = false", threadFrame_);
-			frameData.readyForRun = false;
-			frameData.deleter_prev.Perform(this, skipGLCalls_);
-			frameData.deleter_prev.Take(frameData.deleter);
-			// Previously we had a quick exit here that avoided calling Run() if run_ was suddenly false,
-			// but that created a race condition where frames could end up not finished properly on resize etc.
+	GLRRenderThreadTask task;
 
-			// Only increment next time if we're done.
-			nextFrame = frameData.type == GLRRunType::END;
-			_assert_(frameData.type == GLRRunType::END || frameData.type == GLRRunType::SYNC);
+	// In case of syncs or other partial completion, we keep going until we complete a frame.
+	while (true) {
+		// Pop a task of the queue and execute it.
+		{
+			std::unique_lock<std::mutex> lock(pushMutex_);
+			if (renderThreadQueue_.empty()) {
+				break;
+			}
+			task = renderThreadQueue_.front();
+			renderThreadQueue_.pop();
 		}
-		VLOG("PULL: Running frame %d", threadFrame_);
-		if (firstFrame) {
-			INFO_LOG(G3D, "Running first frame (%d)", threadFrame_);
-			firstFrame = false;
+
+		// We got a task! We can now have pushMutex_ unlocked, allowing the host to
+		// push more work when it feels like it, and just start working.
+		if (task.runType == GLRRunType::EXIT) {
+			// Oh, host wanted out. Let's leave, and also let's notify the host.
+			// This is unlike Vulkan too which can just block on the thread existing.
+			std::unique_lock<std::mutex> lock(syncMutex_);
+			syncCondVar_.notify_one();
+			syncDone_ = true;
+			break;
 		}
 
 		// Render the scene.
-		Run(threadFrame_);
-
-		VLOG("PULL: Finished frame %d", threadFrame_);
-	} while (!nextFrame);
+		Run(task);
+	};
 
 	return true;
 }
 
 void GLRenderManager::StopThread() {
-	// Since we don't control the thread directly, this will only pause the thread.
+	// There's not really a lot to do here anymore.
 
 	if (run_) {
 		run_ = false;
-		for (int i = 0; i < MAX_INFLIGHT_FRAMES; i++) {
-			auto &frameData = frameData_[i];
-			{
-				std::unique_lock<std::mutex> lock(frameData.push_mutex);
-				frameData.push_condVar.notify_all();
-			}
-			{
-				std::unique_lock<std::mutex> lock(frameData.pull_mutex);
-				frameData.pull_condVar.notify_all();
-			}
-		}
-
-		// Wait until we've definitely stopped the threadframe.
-		std::unique_lock<std::mutex> lock(mutex_);
-
-		INFO_LOG(G3D, "GL submission thread paused. Frame=%d", curFrame_);
-
-		// Eat whatever has been queued up for this frame if anything.
-		Wipe();
-
-		// Wait for any fences to finish and be resignaled, so we don't have sync issues.
-		// Also clean out any queued data, which might refer to things that might not be valid
-		// when we restart...
-		for (int i = 0; i < MAX_INFLIGHT_FRAMES; i++) {
-			auto &frameData = frameData_[i];
-			std::unique_lock<std::mutex> lock(frameData.push_mutex);
-			if (frameData.readyForRun || frameData.steps.size() != 0) {
-				Crash();
-			}
-			frameData.readyForRun = false;
-			frameData.readyForSubmit = false;
-			for (size_t i = 0; i < frameData.steps.size(); i++) {
-				delete frameData.steps[i];
-			}
-			frameData.steps.clear();
-			frameData.initSteps.clear();
-
-			while (!frameData.readyForFence) {
-				VLOG("PUSH: Waiting for frame[%d].readyForFence = 1 (stop)", i);
-				frameData.push_condVar.wait(lock);
-			}
-		}
 	} else {
 		INFO_LOG(G3D, "GL submission thread was already paused.");
 	}
@@ -452,58 +394,54 @@ void GLRenderManager::CopyImageToMemorySync(GLRTexture *texture, int mipLevel, i
 void GLRenderManager::BeginFrame() {
 	VLOG("BeginFrame");
 
+	// TODO: Why is this debug-only?
 #ifdef _DEBUG
 	curProgram_ = nullptr;
 #endif
 
 	int curFrame = GetCurFrame();
-	FrameData &frameData = frameData_[curFrame];
 
-	// Make sure the very last command buffer from the frame before the previous has been fully executed.
+	FrameData &frameData = frameData_[curFrame_];
 	{
-		std::unique_lock<std::mutex> lock(frameData.push_mutex);
+		std::unique_lock<std::mutex> lock(frameData.fenceMutex);
 		while (!frameData.readyForFence) {
-			VLOG("PUSH: Waiting for frame[%d].readyForFence = 1", curFrame);
-			frameData.push_condVar.wait(lock);
+			frameData.fenceCondVar.wait(lock);
 		}
 		frameData.readyForFence = false;
-		frameData.readyForSubmit = true;
 	}
 
-	VLOG("PUSH: Fencing %d", curFrame);
-
-	// glFenceSync(&frameData.fence...)
-
 	// Must be after the fence - this performs deletes.
 	VLOG("PUSH: BeginFrame %d", curFrame);
+
 	if (!run_) {
 		WARN_LOG(G3D, "BeginFrame while !run_!");
 	}
 
-	// vulkan_->BeginFrame();
-	// In GL, we have to do deletes on the submission thread.
-
 	insideFrame_ = true;
 }
 
 void GLRenderManager::Finish() {
 	curRenderStep_ = nullptr;
+
 	int curFrame = GetCurFrame();
 	FrameData &frameData = frameData_[curFrame];
 	{
-		std::unique_lock<std::mutex> lock(frameData.pull_mutex);
-		VLOG("PUSH: Frame[%d].readyForRun = true, notifying pull", curFrame);
-		frameData.steps = std::move(steps_);
-		steps_.clear();
-		frameData.initSteps = std::move(initSteps_);
-		initSteps_.clear();
-		frameData.readyForRun = true;
-		frameData.type = GLRRunType::END;
-		frameData_[curFrame_].deleter.Take(deleter_);
-	}
+		VLOG("PUSH: Frame[%d]", curFrame);
+		GLRRenderThreadTask task;
+		task.frame = curFrame;
+		task.runType = GLRRunType::END;
 
-	// Notify calls do not in fact need to be done with the mutex locked.
-	frameData.pull_condVar.notify_all();
+		frameData_[curFrame_].deleter.Take(deleter_);
+
+		std::unique_lock<std::mutex> lock(pushMutex_);
+		renderThreadQueue_.push(task);
+		renderThreadQueue_.back().initSteps = std::move(initSteps_);
+		renderThreadQueue_.back().steps = std::move(steps_);
+		initSteps_.clear();
+		steps_.clear();
+
+		pushCondVar_.notify_one();
+	}
 
 	curFrame_++;
 	if (curFrame_ >= inflightFrames_)
@@ -513,17 +451,17 @@ void GLRenderManager::Finish() {
 }
 
 // Render thread
-void GLRenderManager::Run(int frame) {
-	FrameData &frameData = frameData_[frame];
+void GLRenderManager::Run(GLRRenderThreadTask &task) {
+	FrameData &frameData = frameData_[task.frame];
 	if (!frameData.hasBegun) {
 		frameData.hasBegun = true;
+
+		frameData.deleter_prev.Perform(this, skipGLCalls_);
+		frameData.deleter_prev.Take(frameData.deleter);
 	}
 
-	auto &stepsOnThread = frameData_[frame].steps;
-	auto &initStepsOnThread = frameData_[frame].initSteps;
 	// queueRunner_.LogSteps(stepsOnThread);
-	queueRunner_.RunInitSteps(initStepsOnThread, skipGLCalls_);
-	initStepsOnThread.clear();
+	queueRunner_.RunInitSteps(task.initSteps, skipGLCalls_);
 
 	// Run this after RunInitSteps so any fresh GLRBuffers for the pushbuffers can get created.
 	if (!skipGLCalls_) {
@@ -537,13 +475,12 @@ void GLRenderManager::Run(int frame) {
 		int passes = GetVRPassesCount();
 		for (int i = 0; i < passes; i++) {
 			PreVRFrameRender(i);
-			queueRunner_.RunSteps(stepsOnThread, skipGLCalls_, i < passes - 1, true);
+			queueRunner_.RunSteps(task.steps, skipGLCalls_, i < passes - 1, true);
 			PostVRFrameRender();
 		}
 	} else {
-		queueRunner_.RunSteps(stepsOnThread, skipGLCalls_, false, false);
+		queueRunner_.RunSteps(task.steps, skipGLCalls_, false, false);
 	}
-	stepsOnThread.clear();
 
 	if (!skipGLCalls_) {
 		for (auto iter : frameData.activePushBuffers) {
@@ -551,19 +488,16 @@ void GLRenderManager::Run(int frame) {
 		}
 	}
 
-	switch (frameData.type) {
+	switch (task.runType) {
 	case GLRRunType::END:
 		frameData.hasBegun = false;
 
 		VLOG("PULL: Frame %d.readyForFence = true", frame);
-
 		{
-			std::unique_lock<std::mutex> lock(frameData.push_mutex);
-			_assert_(frameData.readyForSubmit);
+			std::lock_guard<std::mutex> lock(frameData.fenceMutex);
 			frameData.readyForFence = true;
-			frameData.readyForSubmit = false;
-			frameData.push_condVar.notify_all();
-		}
+			frameData.fenceCondVar.notify_one();
+		}{}
 
 		if (!frameData.skipSwap) {
 			if (swapIntervalChanged_) {
@@ -572,6 +506,7 @@ void GLRenderManager::Run(int frame) {
 					swapIntervalFunction_(swapInterval_);
 				}
 			}
+			// This is the swapchain framebuffer flip.
 			if (swapFunction_) {
 				swapFunction_();
 			}
@@ -581,13 +516,14 @@ void GLRenderManager::Run(int frame) {
 		break;
 
 	case GLRRunType::SYNC:
+		frameData.hasBegun = false;
+
 		// glFinish is not actually necessary here, and won't be unless we start using
 		// glBufferStorage. Then we need to use fences.
 		{
-			std::unique_lock<std::mutex> lock(frameData.push_mutex);
-			frameData.readyForFence = true;
-			frameData.readyForSubmit = true;
-			frameData.push_condVar.notify_all();
+			std::unique_lock<std::mutex> lock(syncMutex_);
+			syncDone_ = true;
+			syncCondVar_.notify_one();
 		}
 		break;
 
@@ -599,51 +535,29 @@ void GLRenderManager::Run(int frame) {
 }
 
 void GLRenderManager::FlushSync() {
-	int curFrame = curFrame_;
-	FrameData &frameData = frameData_[curFrame];
 	{
-		std::unique_lock<std::mutex> lock(frameData.pull_mutex);
 		VLOG("PUSH: Frame[%d].readyForRun = true (sync)", curFrame);
-		frameData.initSteps = std::move(initSteps_);
-		initSteps_.clear();
-		frameData.steps = std::move(steps_);
+
+		GLRRenderThreadTask task;
+		task.frame = curFrame_;
+		task.runType = GLRRunType::SYNC;
+
+		std::unique_lock<std::mutex> lock(pushMutex_);
+		renderThreadQueue_.push(task);
+		renderThreadQueue_.back().initSteps = std::move(initSteps_);
+		renderThreadQueue_.back().steps = std::move(steps_);
+		pushCondVar_.notify_one();
 		steps_.clear();
-		frameData.readyForRun = true;
-		_assert_(frameData.readyForFence == false);
-		frameData.type = GLRRunType::SYNC;
-		frameData.pull_condVar.notify_all();
 	}
+
 	{
-		std::unique_lock<std::mutex> lock(frameData.push_mutex);
+		std::unique_lock<std::mutex> lock(syncMutex_);
 		// Wait for the flush to be hit, since we're syncing.
-		while (!frameData.readyForFence) {
+		while (!syncDone_) {
 			VLOG("PUSH: Waiting for frame[%d].readyForFence = 1 (sync)", curFrame);
-			frameData.push_condVar.wait(lock);
-		}
-		frameData.readyForFence = false;
-		frameData.readyForSubmit = true;
-	}
-}
-
-void GLRenderManager::Wipe() {
-	initSteps_.clear();
-	for (auto step : steps_) {
-		delete step;
-	}
-	steps_.clear();
-}
-
-void GLRenderManager::WaitUntilQueueIdle() {
-	// Just wait for all frames to be ready.
-	for (int i = 0; i < MAX_INFLIGHT_FRAMES; i++) {
-		FrameData &frameData = frameData_[i];
-
-		std::unique_lock<std::mutex> lock(frameData.push_mutex);
-		// Ignore unsubmitted frames.
-		while (!frameData.readyForFence && frameData.readyForRun) {
-			VLOG("PUSH: Waiting for frame[%d].readyForFence = 1 (wait idle)", i);
-			frameData.push_condVar.wait(lock);
+			syncCondVar_.wait(lock);
 		}
+		syncDone_ = false;
 	}
 }
 
diff --git a/Common/GPU/OpenGL/GLRenderManager.h b/Common/GPU/OpenGL/GLRenderManager.h
index 243a66987c..4872155198 100644
--- a/Common/GPU/OpenGL/GLRenderManager.h
+++ b/Common/GPU/OpenGL/GLRenderManager.h
@@ -7,6 +7,7 @@
 #include <set>
 #include <string>
 #include <mutex>
+#include <queue>
 #include <condition_variable>
 
 #include "Common/GPU/OpenGL/GLCommon.h"
@@ -349,11 +350,29 @@ private:
 	GLBufferStrategy strategy_ = GLBufferStrategy::SUBDATA;
 };
 
+class GLRInputLayout {
+public:
+	struct Entry {
+		int location;
+		int count;
+		GLenum type;
+		GLboolean normalized;
+		int stride;
+		intptr_t offset;
+	};
+	std::vector<Entry> entries;
+	int semanticsMask_ = 0;
+};
+
 enum class GLRRunType {
 	END,
 	SYNC,
+	EXIT,
 };
 
+class GLRenderManager;
+class GLPushBuffer;
+
 class GLDeleter {
 public:
 	void Perform(GLRenderManager *renderManager, bool skipGLCalls);
@@ -373,18 +392,14 @@ public:
 	std::vector<GLPushBuffer *> pushBuffers;
 };
 
-class GLRInputLayout {
-public:
-	struct Entry {
-		int location;
-		int count;
-		GLenum type;
-		GLboolean normalized;
-		int stride;
-		intptr_t offset;
-	};
-	std::vector<Entry> entries;
-	int semanticsMask_ = 0;
+// These are enqueued from the main thread,
+// and the render thread pops them off
+struct GLRRenderThreadTask {
+	std::vector<GLRStep *> steps;
+	std::vector<GLRInitStep> initSteps;
+
+	int frame;
+	GLRRunType runType;
 };
 
 // Note: The GLRenderManager is created and destroyed on the render thread, and the latter
@@ -416,13 +431,7 @@ public:
 	void BeginFrame();
 	// Can run on a different thread!
 	void Finish();
-	void Run(int frame);
-
-	// Zaps queued up commands. Use if you know there's a risk you've queued up stuff that has already been deleted. Can happen during in-game shutdown.
-	void Wipe();
-
-	// Wait until no frames are pending.  Use during shutdown before freeing pointers.
-	void WaitUntilQueueIdle();
+	void Run(GLRRenderThreadTask &task);
 
 	// Creation commands. These were not needed in Vulkan since there we can do that on the main thread.
 	// We pass in width/height here even though it's not strictly needed until we support glTextureStorage
@@ -1027,26 +1036,14 @@ private:
 
 	// Per-frame data, round-robin so we can overlap submission with execution of the previous frame.
 	struct FrameData {
-		std::mutex push_mutex;
-		std::condition_variable push_condVar;
-
-		std::mutex pull_mutex;
-		std::condition_variable pull_condVar;
-
-		bool readyForFence = true;
-		bool readyForRun = false;
-		bool readyForSubmit = false;
-
 		bool skipSwap = false;
-		GLRRunType type = GLRRunType::END;
 
-		// GLuint fence; For future AZDO stuff?
-		std::vector<GLRStep *> steps;
-		std::vector<GLRInitStep> initSteps;
+		std::mutex fenceMutex;
+		std::condition_variable fenceCondVar;
+		bool readyForFence = true;
 
 		// Swapchain.
 		bool hasBegun = false;
-		uint32_t curSwapchainImage = -1;
 
 		GLDeleter deleter;
 		GLDeleter deleter_prev;
@@ -1064,16 +1061,23 @@ private:
 
 	// Execution time state
 	bool run_ = true;
+
 	// Thread is managed elsewhere, and should call ThreadFrame.
-	std::mutex mutex_;
-	int threadInitFrame_ = 0;
 	GLQueueRunner queueRunner_;
 
-	// Thread state
-	int threadFrame_ = -1;
+	// For pushing data on the queue.
+	std::mutex pushMutex_;
+	std::condition_variable pushCondVar_;
 
-	bool nextFrame = false;
-	bool firstFrame = true;
+	std::queue<GLRRenderThreadTask> renderThreadQueue_;
+
+	// For readbacks and other reasons we need to sync with the render thread.
+	std::mutex syncMutex_;
+	std::condition_variable syncCondVar_;
+
+	bool firstFrame_ = true;
+	bool vrRenderStarted_ = false;
+	bool syncDone_ = false;
 
 	GLDeleter deleter_;
 	bool skipGLCalls_ = false;
diff --git a/Common/GPU/Vulkan/VulkanRenderManager.cpp b/Common/GPU/Vulkan/VulkanRenderManager.cpp
index 63cf2e109a..0003333ac6 100644
--- a/Common/GPU/Vulkan/VulkanRenderManager.cpp
+++ b/Common/GPU/Vulkan/VulkanRenderManager.cpp
@@ -1327,7 +1327,7 @@ void VulkanRenderManager::FlushSync() {
 		std::unique_lock<std::mutex> lock(syncMutex_);
 		// Wait for the flush to be hit, since we're syncing.
 		while (!frameData.syncDone) {
-			VLOG("PUSH: Waiting for frame[%d].readyForFence = 1 (sync)", curFrame);
+			VLOG("PUSH: Waiting for frame[%d].syncDone = 1 (sync)", curFrame);
 			syncCondVar_.wait(lock);
 		}
 		frameData.syncDone = false;
diff --git a/GPU/GLES/GPU_GLES.cpp b/GPU/GLES/GPU_GLES.cpp
index a41dc68ee4..034e8bd660 100644
--- a/GPU/GLES/GPU_GLES.cpp
+++ b/GPU/GLES/GPU_GLES.cpp
@@ -128,11 +128,6 @@ GPU_GLES::GPU_GLES(GraphicsContext *gfxCtx, Draw::DrawContext *draw)
 }
 
 GPU_GLES::~GPU_GLES() {
-	if (draw_) {
-		GLRenderManager *render = (GLRenderManager *)draw_->GetNativeObject(Draw::NativeObject::RENDER_MANAGER);
-		render->Wipe();
-	}
-
 	// If we're here during app shutdown (exiting the Windows app in-game, for example)
 	// everything should already be cleared since DeviceLost has been run.
 
diff --git a/Qt/QtMain.h b/Qt/QtMain.h
index afb97149f4..0177ca9292 100644
--- a/Qt/QtMain.h
+++ b/Qt/QtMain.h
@@ -90,7 +90,6 @@ public:
 	}
 
 	void StopThread() override {
-		renderManager_->WaitUntilQueueIdle();
 		renderManager_->StopThread();
 	}
 
diff --git a/SDL/SDLGLGraphicsContext.h b/SDL/SDLGLGraphicsContext.h
index 1910ce8f56..7982a3fa49 100644
--- a/SDL/SDLGLGraphicsContext.h
+++ b/SDL/SDLGLGraphicsContext.h
@@ -42,7 +42,6 @@ public:
 	}
 
 	void StopThread() override {
-		renderManager_->WaitUntilQueueIdle();
 		renderManager_->StopThread();
 	}
 
diff --git a/Windows/GPU/WindowsGLContext.cpp b/Windows/GPU/WindowsGLContext.cpp
index 05afc4d1f5..1888b351b5 100644
--- a/Windows/GPU/WindowsGLContext.cpp
+++ b/Windows/GPU/WindowsGLContext.cpp
@@ -500,6 +500,5 @@ void WindowsGLContext::ThreadEnd() {
 }
 
 void WindowsGLContext::StopThread() {
-	renderManager_->WaitUntilQueueIdle();
 	renderManager_->StopThread();
 }
diff --git a/Windows/PPSSPP.vcxproj.filters b/Windows/PPSSPP.vcxproj.filters
index b5f7795084..291197ddf3 100644
--- a/Windows/PPSSPP.vcxproj.filters
+++ b/Windows/PPSSPP.vcxproj.filters
@@ -820,4 +820,4 @@
       <Filter>Other Platforms\SDL</Filter>
     </Text>
   </ItemGroup>
-</Project>
\ No newline at end of file
+</Project>
diff --git a/headless/SDLHeadlessHost.cpp b/headless/SDLHeadlessHost.cpp
index 13f9ad33fc..9d5524b15a 100644
--- a/headless/SDLHeadlessHost.cpp
+++ b/headless/SDLHeadlessHost.cpp
@@ -83,7 +83,6 @@ public:
 
 	void StopThread() override {
 		if (renderManager_) {
-			renderManager_->WaitUntilQueueIdle();
 			renderManager_->StopThread();
 		}
 	}
diff --git a/libretro/LibretroGLContext.h b/libretro/LibretroGLContext.h
index 5c6ded1d6e..66d8d641c7 100644
--- a/libretro/LibretroGLContext.h
+++ b/libretro/LibretroGLContext.h
@@ -28,7 +28,6 @@ public:
 	bool ThreadFrame() override { return renderManager_->ThreadFrame(); }
 	void ThreadEnd() override { renderManager_->ThreadEnd(); }
 	void StopThread() override {
-		renderManager_->WaitUntilQueueIdle();
 		renderManager_->StopThread();
 	}