From b62572a78f8137e2e6c68ed40d7af2d54af3af8f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Mon, 3 Oct 2022 15:56:30 +0200 Subject: [PATCH] Refactor the framedata/GPU thread management. --- Common/GPU/OpenGL/GLQueueRunner.h | 1 + Common/GPU/OpenGL/GLRenderManager.cpp | 252 +++++++--------------- Common/GPU/OpenGL/GLRenderManager.h | 84 ++++---- Common/GPU/Vulkan/VulkanRenderManager.cpp | 2 +- GPU/GLES/GPU_GLES.cpp | 5 - Qt/QtMain.h | 1 - SDL/SDLGLGraphicsContext.h | 1 - Windows/GPU/WindowsGLContext.cpp | 1 - Windows/PPSSPP.vcxproj.filters | 2 +- headless/SDLHeadlessHost.cpp | 1 - libretro/LibretroGLContext.h | 1 - 11 files changed, 130 insertions(+), 221 deletions(-) diff --git a/Common/GPU/OpenGL/GLQueueRunner.h b/Common/GPU/OpenGL/GLQueueRunner.h index 593e00f97b..b91648a1fa 100644 --- a/Common/GPU/OpenGL/GLQueueRunner.h +++ b/Common/GPU/OpenGL/GLQueueRunner.h @@ -2,6 +2,7 @@ #include #include +#include #include #include "Common/GPU/OpenGL/GLCommon.h" diff --git a/Common/GPU/OpenGL/GLRenderManager.cpp b/Common/GPU/OpenGL/GLRenderManager.cpp index 74a306fc01..11a337db09 100644 --- a/Common/GPU/OpenGL/GLRenderManager.cpp +++ b/Common/GPU/OpenGL/GLRenderManager.cpp @@ -112,6 +112,8 @@ void GLDeleter::Perform(GLRenderManager *renderManager, bool skipGLCalls) { } GLRenderManager::~GLRenderManager() { + _dbg_assert_(!run_); + for (int i = 0; i < MAX_INFLIGHT_FRAMES; i++) { _assert_(frameData_[i].deleter.IsEmpty()); _assert_(frameData_[i].deleter_prev.IsEmpty()); @@ -123,7 +125,6 @@ GLRenderManager::~GLRenderManager() { void GLRenderManager::ThreadStart(Draw::DrawContext *draw) { queueRunner_.CreateDeviceObjects(); - threadFrame_ = threadInitFrame_; renderThreadId = std::this_thread::get_id(); if (newInflightFrames_ != -1) { @@ -170,7 +171,6 @@ void GLRenderManager::ThreadEnd() { INFO_LOG(G3D, "ThreadEnd"); // Wait for any shutdown to complete in StopThread(). - std::unique_lock lock(mutex_); queueRunner_.DestroyDeviceObjects(); VLOG("PULL: Quitting"); @@ -179,14 +179,8 @@ void GLRenderManager::ThreadEnd() { // Since we're in shutdown, we should skip the GL calls on Android. frameData_[i].deleter.Perform(this, skipGLCalls_); frameData_[i].deleter_prev.Perform(this, skipGLCalls_); - for (int j = 0; j < (int)frameData_[i].steps.size(); j++) { - delete frameData_[i].steps[j]; - } - frameData_[i].steps.clear(); - frameData_[i].initSteps.clear(); } deleter_.Perform(this, skipGLCalls_); - for (int i = 0; i < (int)steps_.size(); i++) { delete steps_[i]; } @@ -194,102 +188,50 @@ void GLRenderManager::ThreadEnd() { initSteps_.clear(); } +// Unlike in Vulkan, this isn't a full independent function, instead it gets called every frame. +// This also mean that we can poll the renderThreadQueue using a regular mutex, instead of +// blocking on it using a condition variable. bool GLRenderManager::ThreadFrame() { - std::unique_lock lock(mutex_); if (!run_) return false; - // In case of syncs or other partial completion, we keep going until we complete a frame. - do { - if (nextFrame) { - threadFrame_++; - if (threadFrame_ >= inflightFrames_) - threadFrame_ = 0; - } - FrameData &frameData = frameData_[threadFrame_]; - { - std::unique_lock lock(frameData.pull_mutex); - while (!frameData.readyForRun && run_) { - VLOG("PULL: Waiting for frame[%d].readyForRun", threadFrame_); - frameData.pull_condVar.wait(lock); - } - if (!frameData.readyForRun && !run_) { - // This means we're out of frames to render and run_ is false, so bail. - return false; - } - VLOG("PULL: Setting frame[%d].readyForRun = false", threadFrame_); - frameData.readyForRun = false; - frameData.deleter_prev.Perform(this, skipGLCalls_); - frameData.deleter_prev.Take(frameData.deleter); - // Previously we had a quick exit here that avoided calling Run() if run_ was suddenly false, - // but that created a race condition where frames could end up not finished properly on resize etc. + GLRRenderThreadTask task; - // Only increment next time if we're done. - nextFrame = frameData.type == GLRRunType::END; - _assert_(frameData.type == GLRRunType::END || frameData.type == GLRRunType::SYNC); + // In case of syncs or other partial completion, we keep going until we complete a frame. + while (true) { + // Pop a task of the queue and execute it. + { + std::unique_lock lock(pushMutex_); + if (renderThreadQueue_.empty()) { + break; + } + task = renderThreadQueue_.front(); + renderThreadQueue_.pop(); } - VLOG("PULL: Running frame %d", threadFrame_); - if (firstFrame) { - INFO_LOG(G3D, "Running first frame (%d)", threadFrame_); - firstFrame = false; + + // We got a task! We can now have pushMutex_ unlocked, allowing the host to + // push more work when it feels like it, and just start working. + if (task.runType == GLRRunType::EXIT) { + // Oh, host wanted out. Let's leave, and also let's notify the host. + // This is unlike Vulkan too which can just block on the thread existing. + std::unique_lock lock(syncMutex_); + syncCondVar_.notify_one(); + syncDone_ = true; + break; } // Render the scene. - Run(threadFrame_); - - VLOG("PULL: Finished frame %d", threadFrame_); - } while (!nextFrame); + Run(task); + }; return true; } void GLRenderManager::StopThread() { - // Since we don't control the thread directly, this will only pause the thread. + // There's not really a lot to do here anymore. if (run_) { run_ = false; - for (int i = 0; i < MAX_INFLIGHT_FRAMES; i++) { - auto &frameData = frameData_[i]; - { - std::unique_lock lock(frameData.push_mutex); - frameData.push_condVar.notify_all(); - } - { - std::unique_lock lock(frameData.pull_mutex); - frameData.pull_condVar.notify_all(); - } - } - - // Wait until we've definitely stopped the threadframe. - std::unique_lock lock(mutex_); - - INFO_LOG(G3D, "GL submission thread paused. Frame=%d", curFrame_); - - // Eat whatever has been queued up for this frame if anything. - Wipe(); - - // Wait for any fences to finish and be resignaled, so we don't have sync issues. - // Also clean out any queued data, which might refer to things that might not be valid - // when we restart... - for (int i = 0; i < MAX_INFLIGHT_FRAMES; i++) { - auto &frameData = frameData_[i]; - std::unique_lock lock(frameData.push_mutex); - if (frameData.readyForRun || frameData.steps.size() != 0) { - Crash(); - } - frameData.readyForRun = false; - frameData.readyForSubmit = false; - for (size_t i = 0; i < frameData.steps.size(); i++) { - delete frameData.steps[i]; - } - frameData.steps.clear(); - frameData.initSteps.clear(); - - while (!frameData.readyForFence) { - VLOG("PUSH: Waiting for frame[%d].readyForFence = 1 (stop)", i); - frameData.push_condVar.wait(lock); - } - } } else { INFO_LOG(G3D, "GL submission thread was already paused."); } @@ -452,58 +394,54 @@ void GLRenderManager::CopyImageToMemorySync(GLRTexture *texture, int mipLevel, i void GLRenderManager::BeginFrame() { VLOG("BeginFrame"); + // TODO: Why is this debug-only? #ifdef _DEBUG curProgram_ = nullptr; #endif int curFrame = GetCurFrame(); - FrameData &frameData = frameData_[curFrame]; - // Make sure the very last command buffer from the frame before the previous has been fully executed. + FrameData &frameData = frameData_[curFrame_]; { - std::unique_lock lock(frameData.push_mutex); + std::unique_lock lock(frameData.fenceMutex); while (!frameData.readyForFence) { - VLOG("PUSH: Waiting for frame[%d].readyForFence = 1", curFrame); - frameData.push_condVar.wait(lock); + frameData.fenceCondVar.wait(lock); } frameData.readyForFence = false; - frameData.readyForSubmit = true; } - VLOG("PUSH: Fencing %d", curFrame); - - // glFenceSync(&frameData.fence...) - // Must be after the fence - this performs deletes. VLOG("PUSH: BeginFrame %d", curFrame); + if (!run_) { WARN_LOG(G3D, "BeginFrame while !run_!"); } - // vulkan_->BeginFrame(); - // In GL, we have to do deletes on the submission thread. - insideFrame_ = true; } void GLRenderManager::Finish() { curRenderStep_ = nullptr; + int curFrame = GetCurFrame(); FrameData &frameData = frameData_[curFrame]; { - std::unique_lock lock(frameData.pull_mutex); - VLOG("PUSH: Frame[%d].readyForRun = true, notifying pull", curFrame); - frameData.steps = std::move(steps_); - steps_.clear(); - frameData.initSteps = std::move(initSteps_); - initSteps_.clear(); - frameData.readyForRun = true; - frameData.type = GLRRunType::END; - frameData_[curFrame_].deleter.Take(deleter_); - } + VLOG("PUSH: Frame[%d]", curFrame); + GLRRenderThreadTask task; + task.frame = curFrame; + task.runType = GLRRunType::END; - // Notify calls do not in fact need to be done with the mutex locked. - frameData.pull_condVar.notify_all(); + frameData_[curFrame_].deleter.Take(deleter_); + + std::unique_lock lock(pushMutex_); + renderThreadQueue_.push(task); + renderThreadQueue_.back().initSteps = std::move(initSteps_); + renderThreadQueue_.back().steps = std::move(steps_); + initSteps_.clear(); + steps_.clear(); + + pushCondVar_.notify_one(); + } curFrame_++; if (curFrame_ >= inflightFrames_) @@ -513,17 +451,17 @@ void GLRenderManager::Finish() { } // Render thread -void GLRenderManager::Run(int frame) { - FrameData &frameData = frameData_[frame]; +void GLRenderManager::Run(GLRRenderThreadTask &task) { + FrameData &frameData = frameData_[task.frame]; if (!frameData.hasBegun) { frameData.hasBegun = true; + + frameData.deleter_prev.Perform(this, skipGLCalls_); + frameData.deleter_prev.Take(frameData.deleter); } - auto &stepsOnThread = frameData_[frame].steps; - auto &initStepsOnThread = frameData_[frame].initSteps; // queueRunner_.LogSteps(stepsOnThread); - queueRunner_.RunInitSteps(initStepsOnThread, skipGLCalls_); - initStepsOnThread.clear(); + queueRunner_.RunInitSteps(task.initSteps, skipGLCalls_); // Run this after RunInitSteps so any fresh GLRBuffers for the pushbuffers can get created. if (!skipGLCalls_) { @@ -537,13 +475,12 @@ void GLRenderManager::Run(int frame) { int passes = GetVRPassesCount(); for (int i = 0; i < passes; i++) { PreVRFrameRender(i); - queueRunner_.RunSteps(stepsOnThread, skipGLCalls_, i < passes - 1, true); + queueRunner_.RunSteps(task.steps, skipGLCalls_, i < passes - 1, true); PostVRFrameRender(); } } else { - queueRunner_.RunSteps(stepsOnThread, skipGLCalls_, false, false); + queueRunner_.RunSteps(task.steps, skipGLCalls_, false, false); } - stepsOnThread.clear(); if (!skipGLCalls_) { for (auto iter : frameData.activePushBuffers) { @@ -551,19 +488,16 @@ void GLRenderManager::Run(int frame) { } } - switch (frameData.type) { + switch (task.runType) { case GLRRunType::END: frameData.hasBegun = false; VLOG("PULL: Frame %d.readyForFence = true", frame); - { - std::unique_lock lock(frameData.push_mutex); - _assert_(frameData.readyForSubmit); + std::lock_guard lock(frameData.fenceMutex); frameData.readyForFence = true; - frameData.readyForSubmit = false; - frameData.push_condVar.notify_all(); - } + frameData.fenceCondVar.notify_one(); + }{} if (!frameData.skipSwap) { if (swapIntervalChanged_) { @@ -572,6 +506,7 @@ void GLRenderManager::Run(int frame) { swapIntervalFunction_(swapInterval_); } } + // This is the swapchain framebuffer flip. if (swapFunction_) { swapFunction_(); } @@ -581,13 +516,14 @@ void GLRenderManager::Run(int frame) { break; case GLRRunType::SYNC: + frameData.hasBegun = false; + // glFinish is not actually necessary here, and won't be unless we start using // glBufferStorage. Then we need to use fences. { - std::unique_lock lock(frameData.push_mutex); - frameData.readyForFence = true; - frameData.readyForSubmit = true; - frameData.push_condVar.notify_all(); + std::unique_lock lock(syncMutex_); + syncDone_ = true; + syncCondVar_.notify_one(); } break; @@ -599,51 +535,29 @@ void GLRenderManager::Run(int frame) { } void GLRenderManager::FlushSync() { - int curFrame = curFrame_; - FrameData &frameData = frameData_[curFrame]; { - std::unique_lock lock(frameData.pull_mutex); VLOG("PUSH: Frame[%d].readyForRun = true (sync)", curFrame); - frameData.initSteps = std::move(initSteps_); - initSteps_.clear(); - frameData.steps = std::move(steps_); + + GLRRenderThreadTask task; + task.frame = curFrame_; + task.runType = GLRRunType::SYNC; + + std::unique_lock lock(pushMutex_); + renderThreadQueue_.push(task); + renderThreadQueue_.back().initSteps = std::move(initSteps_); + renderThreadQueue_.back().steps = std::move(steps_); + pushCondVar_.notify_one(); steps_.clear(); - frameData.readyForRun = true; - _assert_(frameData.readyForFence == false); - frameData.type = GLRRunType::SYNC; - frameData.pull_condVar.notify_all(); } + { - std::unique_lock lock(frameData.push_mutex); + std::unique_lock lock(syncMutex_); // Wait for the flush to be hit, since we're syncing. - while (!frameData.readyForFence) { + while (!syncDone_) { VLOG("PUSH: Waiting for frame[%d].readyForFence = 1 (sync)", curFrame); - frameData.push_condVar.wait(lock); - } - frameData.readyForFence = false; - frameData.readyForSubmit = true; - } -} - -void GLRenderManager::Wipe() { - initSteps_.clear(); - for (auto step : steps_) { - delete step; - } - steps_.clear(); -} - -void GLRenderManager::WaitUntilQueueIdle() { - // Just wait for all frames to be ready. - for (int i = 0; i < MAX_INFLIGHT_FRAMES; i++) { - FrameData &frameData = frameData_[i]; - - std::unique_lock lock(frameData.push_mutex); - // Ignore unsubmitted frames. - while (!frameData.readyForFence && frameData.readyForRun) { - VLOG("PUSH: Waiting for frame[%d].readyForFence = 1 (wait idle)", i); - frameData.push_condVar.wait(lock); + syncCondVar_.wait(lock); } + syncDone_ = false; } } diff --git a/Common/GPU/OpenGL/GLRenderManager.h b/Common/GPU/OpenGL/GLRenderManager.h index 243a66987c..4872155198 100644 --- a/Common/GPU/OpenGL/GLRenderManager.h +++ b/Common/GPU/OpenGL/GLRenderManager.h @@ -7,6 +7,7 @@ #include #include #include +#include #include #include "Common/GPU/OpenGL/GLCommon.h" @@ -349,11 +350,29 @@ private: GLBufferStrategy strategy_ = GLBufferStrategy::SUBDATA; }; +class GLRInputLayout { +public: + struct Entry { + int location; + int count; + GLenum type; + GLboolean normalized; + int stride; + intptr_t offset; + }; + std::vector entries; + int semanticsMask_ = 0; +}; + enum class GLRRunType { END, SYNC, + EXIT, }; +class GLRenderManager; +class GLPushBuffer; + class GLDeleter { public: void Perform(GLRenderManager *renderManager, bool skipGLCalls); @@ -373,18 +392,14 @@ public: std::vector pushBuffers; }; -class GLRInputLayout { -public: - struct Entry { - int location; - int count; - GLenum type; - GLboolean normalized; - int stride; - intptr_t offset; - }; - std::vector entries; - int semanticsMask_ = 0; +// These are enqueued from the main thread, +// and the render thread pops them off +struct GLRRenderThreadTask { + std::vector steps; + std::vector initSteps; + + int frame; + GLRRunType runType; }; // Note: The GLRenderManager is created and destroyed on the render thread, and the latter @@ -416,13 +431,7 @@ public: void BeginFrame(); // Can run on a different thread! void Finish(); - void Run(int frame); - - // Zaps queued up commands. Use if you know there's a risk you've queued up stuff that has already been deleted. Can happen during in-game shutdown. - void Wipe(); - - // Wait until no frames are pending. Use during shutdown before freeing pointers. - void WaitUntilQueueIdle(); + void Run(GLRRenderThreadTask &task); // Creation commands. These were not needed in Vulkan since there we can do that on the main thread. // We pass in width/height here even though it's not strictly needed until we support glTextureStorage @@ -1027,26 +1036,14 @@ private: // Per-frame data, round-robin so we can overlap submission with execution of the previous frame. struct FrameData { - std::mutex push_mutex; - std::condition_variable push_condVar; - - std::mutex pull_mutex; - std::condition_variable pull_condVar; - - bool readyForFence = true; - bool readyForRun = false; - bool readyForSubmit = false; - bool skipSwap = false; - GLRRunType type = GLRRunType::END; - // GLuint fence; For future AZDO stuff? - std::vector steps; - std::vector initSteps; + std::mutex fenceMutex; + std::condition_variable fenceCondVar; + bool readyForFence = true; // Swapchain. bool hasBegun = false; - uint32_t curSwapchainImage = -1; GLDeleter deleter; GLDeleter deleter_prev; @@ -1064,16 +1061,23 @@ private: // Execution time state bool run_ = true; + // Thread is managed elsewhere, and should call ThreadFrame. - std::mutex mutex_; - int threadInitFrame_ = 0; GLQueueRunner queueRunner_; - // Thread state - int threadFrame_ = -1; + // For pushing data on the queue. + std::mutex pushMutex_; + std::condition_variable pushCondVar_; - bool nextFrame = false; - bool firstFrame = true; + std::queue renderThreadQueue_; + + // For readbacks and other reasons we need to sync with the render thread. + std::mutex syncMutex_; + std::condition_variable syncCondVar_; + + bool firstFrame_ = true; + bool vrRenderStarted_ = false; + bool syncDone_ = false; GLDeleter deleter_; bool skipGLCalls_ = false; diff --git a/Common/GPU/Vulkan/VulkanRenderManager.cpp b/Common/GPU/Vulkan/VulkanRenderManager.cpp index 63cf2e109a..0003333ac6 100644 --- a/Common/GPU/Vulkan/VulkanRenderManager.cpp +++ b/Common/GPU/Vulkan/VulkanRenderManager.cpp @@ -1327,7 +1327,7 @@ void VulkanRenderManager::FlushSync() { std::unique_lock lock(syncMutex_); // Wait for the flush to be hit, since we're syncing. while (!frameData.syncDone) { - VLOG("PUSH: Waiting for frame[%d].readyForFence = 1 (sync)", curFrame); + VLOG("PUSH: Waiting for frame[%d].syncDone = 1 (sync)", curFrame); syncCondVar_.wait(lock); } frameData.syncDone = false; diff --git a/GPU/GLES/GPU_GLES.cpp b/GPU/GLES/GPU_GLES.cpp index a41dc68ee4..034e8bd660 100644 --- a/GPU/GLES/GPU_GLES.cpp +++ b/GPU/GLES/GPU_GLES.cpp @@ -128,11 +128,6 @@ GPU_GLES::GPU_GLES(GraphicsContext *gfxCtx, Draw::DrawContext *draw) } GPU_GLES::~GPU_GLES() { - if (draw_) { - GLRenderManager *render = (GLRenderManager *)draw_->GetNativeObject(Draw::NativeObject::RENDER_MANAGER); - render->Wipe(); - } - // If we're here during app shutdown (exiting the Windows app in-game, for example) // everything should already be cleared since DeviceLost has been run. diff --git a/Qt/QtMain.h b/Qt/QtMain.h index afb97149f4..0177ca9292 100644 --- a/Qt/QtMain.h +++ b/Qt/QtMain.h @@ -90,7 +90,6 @@ public: } void StopThread() override { - renderManager_->WaitUntilQueueIdle(); renderManager_->StopThread(); } diff --git a/SDL/SDLGLGraphicsContext.h b/SDL/SDLGLGraphicsContext.h index 1910ce8f56..7982a3fa49 100644 --- a/SDL/SDLGLGraphicsContext.h +++ b/SDL/SDLGLGraphicsContext.h @@ -42,7 +42,6 @@ public: } void StopThread() override { - renderManager_->WaitUntilQueueIdle(); renderManager_->StopThread(); } diff --git a/Windows/GPU/WindowsGLContext.cpp b/Windows/GPU/WindowsGLContext.cpp index 05afc4d1f5..1888b351b5 100644 --- a/Windows/GPU/WindowsGLContext.cpp +++ b/Windows/GPU/WindowsGLContext.cpp @@ -500,6 +500,5 @@ void WindowsGLContext::ThreadEnd() { } void WindowsGLContext::StopThread() { - renderManager_->WaitUntilQueueIdle(); renderManager_->StopThread(); } diff --git a/Windows/PPSSPP.vcxproj.filters b/Windows/PPSSPP.vcxproj.filters index b5f7795084..291197ddf3 100644 --- a/Windows/PPSSPP.vcxproj.filters +++ b/Windows/PPSSPP.vcxproj.filters @@ -820,4 +820,4 @@ Other Platforms\SDL - \ No newline at end of file + diff --git a/headless/SDLHeadlessHost.cpp b/headless/SDLHeadlessHost.cpp index 13f9ad33fc..9d5524b15a 100644 --- a/headless/SDLHeadlessHost.cpp +++ b/headless/SDLHeadlessHost.cpp @@ -83,7 +83,6 @@ public: void StopThread() override { if (renderManager_) { - renderManager_->WaitUntilQueueIdle(); renderManager_->StopThread(); } } diff --git a/libretro/LibretroGLContext.h b/libretro/LibretroGLContext.h index 5c6ded1d6e..66d8d641c7 100644 --- a/libretro/LibretroGLContext.h +++ b/libretro/LibretroGLContext.h @@ -28,7 +28,6 @@ public: bool ThreadFrame() override { return renderManager_->ThreadFrame(); } void ThreadEnd() override { renderManager_->ThreadEnd(); } void StopThread() override { - renderManager_->WaitUntilQueueIdle(); renderManager_->StopThread(); }