From 1987169c8164bd9d6fbf9995ca016fe186c24101 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Wed, 17 May 2023 18:35:13 +0200 Subject: [PATCH] OpenGL: When possible, avoid rebinding vertex arrays between glDrawArrays Profitable optimization in DrawArrays-heavy games like GTA. --- Common/GPU/OpenGL/GLFrameData.h | 1 + Common/GPU/OpenGL/GLQueueRunner.cpp | 9 +++++---- Common/GPU/OpenGL/GLQueueRunner.h | 4 ++-- Common/GPU/OpenGL/GLRenderManager.cpp | 11 ++++++++--- 4 files changed, 16 insertions(+), 9 deletions(-) diff --git a/Common/GPU/OpenGL/GLFrameData.h b/Common/GPU/OpenGL/GLFrameData.h index 571b3d8abe..7d8d61c6c3 100644 --- a/Common/GPU/OpenGL/GLFrameData.h +++ b/Common/GPU/OpenGL/GLFrameData.h @@ -39,6 +39,7 @@ struct GLQueueProfileContext { bool enabled; double cpuStartTime; double cpuEndTime; + int drawArraysRebindsAvoided; }; diff --git a/Common/GPU/OpenGL/GLQueueRunner.cpp b/Common/GPU/OpenGL/GLQueueRunner.cpp index 98c2095865..9c0e19e7ef 100644 --- a/Common/GPU/OpenGL/GLQueueRunner.cpp +++ b/Common/GPU/OpenGL/GLQueueRunner.cpp @@ -651,7 +651,7 @@ retry_depth: currentReadHandle_ = fbo->handle; } -void GLQueueRunner::RunSteps(const std::vector &steps, bool skipGLCalls, bool keepSteps, bool useVR) { +void GLQueueRunner::RunSteps(const std::vector &steps, bool skipGLCalls, bool keepSteps, bool useVR, GLQueueProfileContext &profile) { if (skipGLCalls) { if (keepSteps) { return; @@ -713,9 +713,9 @@ void GLQueueRunner::RunSteps(const std::vector &steps, bool skipGLCal if (IsVREnabled()) { GLRStep vrStep = step; PreprocessStepVR(&vrStep); - PerformRenderPass(vrStep, renderCount == 1, renderCount == totalRenderCount); + PerformRenderPass(vrStep, renderCount == 1, renderCount == totalRenderCount, profile); } else { - PerformRenderPass(step, renderCount == 1, renderCount == totalRenderCount); + PerformRenderPass(step, renderCount == 1, renderCount == totalRenderCount, profile); } break; case GLRStepType::COPY: @@ -791,7 +791,7 @@ static void EnableDisableVertexArrays(uint32_t prevAttr, uint32_t newAttr) { } } -void GLQueueRunner::PerformRenderPass(const GLRStep &step, bool first, bool last) { +void GLQueueRunner::PerformRenderPass(const GLRStep &step, bool first, bool last, GLQueueProfileContext &profile) { CHECK_GL_ERROR_IF_DEBUG(); PerformBindFramebufferAsRenderTarget(step); @@ -1216,6 +1216,7 @@ void GLQueueRunner::PerformRenderPass(const GLRStep &step, bool first, bool last // Compatible draws. offset = diff / layout->stride; rebind = false; + profile.drawArraysRebindsAvoided++; } } if (rebind) { diff --git a/Common/GPU/OpenGL/GLQueueRunner.h b/Common/GPU/OpenGL/GLQueueRunner.h index 9cdbfeee01..1aa7398eaa 100644 --- a/Common/GPU/OpenGL/GLQueueRunner.h +++ b/Common/GPU/OpenGL/GLQueueRunner.h @@ -357,7 +357,7 @@ public: void RunInitSteps(const std::vector &steps, bool skipGLCalls); - void RunSteps(const std::vector &steps, bool skipGLCalls, bool keepSteps, bool useVR); + void RunSteps(const std::vector &steps, bool skipGLCalls, bool keepSteps, bool useVR, GLQueueProfileContext &profile); void CreateDeviceObjects(); void DestroyDeviceObjects(); @@ -382,7 +382,7 @@ private: void InitCreateFramebuffer(const GLRInitStep &step); void PerformBindFramebufferAsRenderTarget(const GLRStep &pass); - void PerformRenderPass(const GLRStep &pass, bool first, bool last); + void PerformRenderPass(const GLRStep &pass, bool first, bool last, GLQueueProfileContext &profile); void PerformCopy(const GLRStep &pass); void PerformBlit(const GLRStep &pass); void PerformReadback(const GLRStep &pass); diff --git a/Common/GPU/OpenGL/GLRenderManager.cpp b/Common/GPU/OpenGL/GLRenderManager.cpp index 7d094a1d51..7cfcce91cd 100644 --- a/Common/GPU/OpenGL/GLRenderManager.cpp +++ b/Common/GPU/OpenGL/GLRenderManager.cpp @@ -193,7 +193,11 @@ std::string GLRenderManager::GetGpuProfileString() const { const GLQueueProfileContext &profile = frameData_[curFrame].profile; float cputime_ms = 1000.0f * (profile.cpuEndTime - profile.cpuStartTime); - return StringFromFormat("CPU time to run the list: %0.2f ms", cputime_ms); + return StringFromFormat( + "CPU time to run the list: %0.2f ms\n" + "Avoided DrawArrays rebinds: %d", + cputime_ms, + profile.drawArraysRebindsAvoided); } void GLRenderManager::BindFramebufferAsRenderTarget(GLRFramebuffer *fb, GLRRenderPassAction color, GLRRenderPassAction depth, GLRRenderPassAction stencil, uint32_t clearColor, float clearDepth, uint8_t clearStencil, const char *tag) { @@ -429,17 +433,18 @@ bool GLRenderManager::Run(GLRRenderThreadTask &task) { if (frameData.profile.enabled) { frameData.profile.cpuStartTime = time_now_d(); + frameData.profile.drawArraysRebindsAvoided = 0; } if (IsVREnabled()) { int passes = GetVRPassesCount(); for (int i = 0; i < passes; i++) { PreVRFrameRender(i); - queueRunner_.RunSteps(task.steps, skipGLCalls_, i < passes - 1, true); + queueRunner_.RunSteps(task.steps, skipGLCalls_, i < passes - 1, true, frameData.profile); PostVRFrameRender(); } } else { - queueRunner_.RunSteps(task.steps, skipGLCalls_, false, false); + queueRunner_.RunSteps(task.steps, skipGLCalls_, false, false, frameData.profile); } if (frameData.profile.enabled) {