From 8ee426ff74916b9637411b9cd66b1994f701b25b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Thu, 8 Feb 2018 00:23:48 +0100 Subject: [PATCH] Get rid of an unused codepath (gl rendering is now always threaded) --- Windows/GPU/WindowsGLContext.cpp | 3 +- Windows/MainThread.h | 1 + ext/native/thin3d/GLRenderManager.cpp | 70 ++++++++------------------- ext/native/thin3d/GLRenderManager.h | 17 ++----- 4 files changed, 28 insertions(+), 63 deletions(-) create mode 100644 Windows/MainThread.h diff --git a/Windows/GPU/WindowsGLContext.cpp b/Windows/GPU/WindowsGLContext.cpp index ac48df35f..7962d3a81 100644 --- a/Windows/GPU/WindowsGLContext.cpp +++ b/Windows/GPU/WindowsGLContext.cpp @@ -35,7 +35,8 @@ #include "Windows/GPU/WindowsGLContext.h" void WindowsGLContext::SwapBuffers() { - renderManager_->Swap(); + // We no longer call RenderManager::Swap here, it's handled by the render thread, which + // we're not on here. // Used during fullscreen switching to prevent rendering. if (pauseRequested) { diff --git a/Windows/MainThread.h b/Windows/MainThread.h new file mode 100644 index 000000000..6f70f09be --- /dev/null +++ b/Windows/MainThread.h @@ -0,0 +1 @@ +#pragma once diff --git a/ext/native/thin3d/GLRenderManager.cpp b/ext/native/thin3d/GLRenderManager.cpp index e4a4ab23a..b6e049a1f 100644 --- a/ext/native/thin3d/GLRenderManager.cpp +++ b/ext/native/thin3d/GLRenderManager.cpp @@ -45,11 +45,6 @@ GLRenderManager::GLRenderManager() { for (int i = 0; i < MAX_INFLIGHT_FRAMES; i++) { } - - if (!useThread_) { - // The main thread is also the render thread. - ThreadStart(); - } } GLRenderManager::~GLRenderManager() { @@ -58,12 +53,7 @@ GLRenderManager::~GLRenderManager() { } // Was anything deleted during shutdown? deleter_.Perform(); - // _assert_(deleter_.IsEmpty()); - - if (!useThread_) { - // The main thread is also the render thread. - ThreadEnd(); - } + _assert_(deleter_.IsEmpty()); } void GLRenderManager::ThreadStart() { @@ -143,7 +133,7 @@ bool GLRenderManager::ThreadFrame() { void GLRenderManager::StopThread() { // Since we don't control the thread directly, this will only pause the thread. - if (useThread_ && run_) { + if (run_) { run_ = false; for (int i = 0; i < MAX_INFLIGHT_FRAMES; i++) { auto &frameData = frameData_[i]; @@ -170,6 +160,7 @@ void GLRenderManager::StopThread() { // when we restart... for (int i = 0; i < MAX_INFLIGHT_FRAMES; i++) { auto &frameData = frameData_[i]; + std::unique_lock lock(frameData.push_mutex); if (frameData.readyForRun || frameData.steps.size() != 0) { Crash(); } @@ -181,7 +172,6 @@ void GLRenderManager::StopThread() { frameData.steps.clear(); frameData.initSteps.clear(); - std::unique_lock lock(frameData.push_mutex); while (!frameData.readyForFence) { VLOG("PUSH: Waiting for frame[%d].readyForFence = 1 (stop)", i); frameData.push_condVar.wait(lock); @@ -330,7 +320,7 @@ void GLRenderManager::BeginFrame() { FrameData &frameData = frameData_[curFrame]; // Make sure the very last command buffer from the frame before the previous has been fully executed. - if (useThread_) { + { std::unique_lock lock(frameData.push_mutex); while (!frameData.readyForFence) { VLOG("PUSH: Waiting for frame[%d].readyForFence = 1", curFrame); @@ -342,10 +332,7 @@ void GLRenderManager::BeginFrame() { VLOG("PUSH: Fencing %d", curFrame); - - // vkWaitForFences(device, 1, &frameData.fence, true, UINT64_MAX); - // vkResetFences(device, 1, &frameData.fence); - // glFenceSync(...) + // glFenceSync(&frameData.fence...) // Must be after the fence - this performs deletes. VLOG("PUSH: BeginFrame %d", curFrame); @@ -363,14 +350,7 @@ void GLRenderManager::Finish() { curRenderStep_ = nullptr; int curFrame = GetCurFrame(); FrameData &frameData = frameData_[curFrame]; - if (!useThread_) { - frameData.steps = std::move(steps_); - steps_.clear(); - frameData.initSteps = std::move(initSteps_); - initSteps_.clear(); - frameData.type = GLRRunType::END; - Run(curFrame); - } else { + { std::unique_lock lock(frameData.pull_mutex); VLOG("PUSH: Frame[%d].readyForRun = true", curFrame); frameData.steps = std::move(steps_); @@ -398,6 +378,7 @@ void GLRenderManager::BeginSubmitFrame(int frame) { } } +// Render thread void GLRenderManager::Submit(int frame, bool triggerFence) { FrameData &frameData = frameData_[frame]; @@ -409,7 +390,7 @@ void GLRenderManager::Submit(int frame, bool triggerFence) { // ideally we'd like to wait a frame or two. frameData.deleter.Perform(); - if (useThread_ && triggerFence) { + if (triggerFence) { VLOG("PULL: Frame %d.readyForFence = true", frame); std::unique_lock lock(frameData.push_mutex); @@ -420,6 +401,7 @@ void GLRenderManager::Submit(int frame, bool triggerFence) { } } +// Render thread void GLRenderManager::EndSubmitFrame(int frame) { FrameData &frameData = frameData_[frame]; frameData.hasBegun = false; @@ -435,6 +417,7 @@ void GLRenderManager::EndSubmitFrame(int frame) { } } +// Render thread void GLRenderManager::Run(int frame) { BeginSubmitFrame(frame); @@ -472,14 +455,7 @@ void GLRenderManager::FlushSync() { // TODO: Reset curRenderStep_? int curFrame = curFrame_; FrameData &frameData = frameData_[curFrame]; - if (!useThread_) { - frameData.initSteps = std::move(initSteps_); - initSteps_.clear(); - frameData.steps = std::move(steps_); - steps_.clear(); - frameData.type = GLRRunType::SYNC; - Run(curFrame); - } else { + { std::unique_lock lock(frameData.pull_mutex); VLOG("PUSH: Frame[%d].readyForRun = true (sync)", curFrame); frameData.initSteps = std::move(initSteps_); @@ -491,8 +467,7 @@ void GLRenderManager::FlushSync() { frameData.type = GLRRunType::SYNC; frameData.pull_condVar.notify_all(); } - - if (useThread_) { + { std::unique_lock lock(frameData.push_mutex); // Wait for the flush to be hit, since we're syncing. while (!frameData.readyForFence) { @@ -504,19 +479,20 @@ void GLRenderManager::FlushSync() { } } +// Render thread void GLRenderManager::EndSyncFrame(int frame) { FrameData &frameData = frameData_[frame]; Submit(frame, false); - // This is brutal! Should probably wait for a fence instead, not that it'll matter much since we'll - // still stall everything. - glFinish(); + // glFinish is not actually necessary here, and won't be until we start using + // glBufferStorage. Then we need to use fences. + // glFinish(); // At this point we can resume filling the command buffers for the current frame since // we know the device is idle - and thus all previously enqueued command buffers have been processed. // No need to switch to the next frame number. - if (useThread_) { + { std::unique_lock lock(frameData.push_mutex); frameData.readyForFence = true; frameData.readyForSubmit = true; @@ -533,11 +509,6 @@ void GLRenderManager::Wipe() { } void GLRenderManager::WaitUntilQueueIdle() { - if (!useThread_) { - // Must be idle, nothing to do. - return; - } - // Just wait for all frames to be ready. for (int i = 0; i < MAX_INFLIGHT_FRAMES; i++) { FrameData &frameData = frameData_[i]; @@ -563,16 +534,15 @@ GLPushBuffer::~GLPushBuffer() { void GLPushBuffer::Map() { assert(!writePtr_); // TODO: Even a good old glMapBuffer could actually work well here. - // VkResult res = vkMapMemory(device_, buffers_[buf_].deviceMemory, 0, size_, 0, (void **)(&writePtr_)); writePtr_ = buffers_[buf_].deviceMemory; assert(writePtr_); } void GLPushBuffer::Unmap() { assert(writePtr_); - // Here we should simply upload everything to the buffers. - // Might be worth trying with size_ instead of offset_, so the driver can replace the whole buffer. - // At least if it's close. + // Here we simply upload the data to the last buffer. + // Might be worth trying with size_ instead of offset_, so the driver can replace + // the whole buffer. At least if it's close. render_->BufferSubdata(buffers_[buf_].buffer, 0, offset_, buffers_[buf_].deviceMemory, false); writePtr_ = nullptr; } diff --git a/ext/native/thin3d/GLRenderManager.h b/ext/native/thin3d/GLRenderManager.h index 4ef204d55..b658a116c 100644 --- a/ext/native/thin3d/GLRenderManager.h +++ b/ext/native/thin3d/GLRenderManager.h @@ -663,12 +663,6 @@ public: swapFunction_ = swapFunction; } - void Swap() { - if (!useThread_ && swapFunction_) { - swapFunction_(); - } - } - void StopThread(); bool SawOutOfMemory() { @@ -738,8 +732,6 @@ private: GLDeleter deleter_; - bool useThread_ = true; - int curFrame_ = 0; std::function swapFunction_; @@ -748,10 +740,11 @@ private: int targetHeight_ = 0; }; - -// Similar to VulkanPushBuffer but uses really stupid tactics - collect all the data in RAM then do a big -// memcpy/buffer upload at the end. This can however be optimized with glBufferStorage on chips that support that -// for massive boosts. +// Similar to VulkanPushBuffer but is currently less efficient - it collects all the data in +// RAM then does a big memcpy/buffer upload at the end of the frame. This is at least a lot +// faster than the hundreds of buffer uploads or memory array buffers we used before. +// On modern GL we could avoid the copy using glBufferStorage but not sure it's worth the +// trouble. class GLPushBuffer { public: struct BufInfo {