Refactor the framedata/GPU thread management.

This commit is contained in:
Henrik Rydgård 2022-10-03 15:56:30 +02:00
parent a410ea5535
commit b62572a78f
11 changed files with 130 additions and 221 deletions

View File

@ -2,6 +2,7 @@
#include <cstdint>
#include <vector>
#include <set>
#include <unordered_map>
#include "Common/GPU/OpenGL/GLCommon.h"

View File

@ -112,6 +112,8 @@ void GLDeleter::Perform(GLRenderManager *renderManager, bool skipGLCalls) {
}
GLRenderManager::~GLRenderManager() {
_dbg_assert_(!run_);
for (int i = 0; i < MAX_INFLIGHT_FRAMES; i++) {
_assert_(frameData_[i].deleter.IsEmpty());
_assert_(frameData_[i].deleter_prev.IsEmpty());
@ -123,7 +125,6 @@ GLRenderManager::~GLRenderManager() {
void GLRenderManager::ThreadStart(Draw::DrawContext *draw) {
queueRunner_.CreateDeviceObjects();
threadFrame_ = threadInitFrame_;
renderThreadId = std::this_thread::get_id();
if (newInflightFrames_ != -1) {
@ -170,7 +171,6 @@ void GLRenderManager::ThreadEnd() {
INFO_LOG(G3D, "ThreadEnd");
// Wait for any shutdown to complete in StopThread().
std::unique_lock<std::mutex> lock(mutex_);
queueRunner_.DestroyDeviceObjects();
VLOG("PULL: Quitting");
@ -179,14 +179,8 @@ void GLRenderManager::ThreadEnd() {
// Since we're in shutdown, we should skip the GL calls on Android.
frameData_[i].deleter.Perform(this, skipGLCalls_);
frameData_[i].deleter_prev.Perform(this, skipGLCalls_);
for (int j = 0; j < (int)frameData_[i].steps.size(); j++) {
delete frameData_[i].steps[j];
}
frameData_[i].steps.clear();
frameData_[i].initSteps.clear();
}
deleter_.Perform(this, skipGLCalls_);
for (int i = 0; i < (int)steps_.size(); i++) {
delete steps_[i];
}
@ -194,102 +188,50 @@ void GLRenderManager::ThreadEnd() {
initSteps_.clear();
}
// Unlike in Vulkan, this isn't a full independent function, instead it gets called every frame.
// This also mean that we can poll the renderThreadQueue using a regular mutex, instead of
// blocking on it using a condition variable.
bool GLRenderManager::ThreadFrame() {
std::unique_lock<std::mutex> lock(mutex_);
if (!run_)
return false;
// In case of syncs or other partial completion, we keep going until we complete a frame.
do {
if (nextFrame) {
threadFrame_++;
if (threadFrame_ >= inflightFrames_)
threadFrame_ = 0;
}
FrameData &frameData = frameData_[threadFrame_];
{
std::unique_lock<std::mutex> lock(frameData.pull_mutex);
while (!frameData.readyForRun && run_) {
VLOG("PULL: Waiting for frame[%d].readyForRun", threadFrame_);
frameData.pull_condVar.wait(lock);
}
if (!frameData.readyForRun && !run_) {
// This means we're out of frames to render and run_ is false, so bail.
return false;
}
VLOG("PULL: Setting frame[%d].readyForRun = false", threadFrame_);
frameData.readyForRun = false;
frameData.deleter_prev.Perform(this, skipGLCalls_);
frameData.deleter_prev.Take(frameData.deleter);
// Previously we had a quick exit here that avoided calling Run() if run_ was suddenly false,
// but that created a race condition where frames could end up not finished properly on resize etc.
GLRRenderThreadTask task;
// Only increment next time if we're done.
nextFrame = frameData.type == GLRRunType::END;
_assert_(frameData.type == GLRRunType::END || frameData.type == GLRRunType::SYNC);
// In case of syncs or other partial completion, we keep going until we complete a frame.
while (true) {
// Pop a task of the queue and execute it.
{
std::unique_lock<std::mutex> lock(pushMutex_);
if (renderThreadQueue_.empty()) {
break;
}
task = renderThreadQueue_.front();
renderThreadQueue_.pop();
}
VLOG("PULL: Running frame %d", threadFrame_);
if (firstFrame) {
INFO_LOG(G3D, "Running first frame (%d)", threadFrame_);
firstFrame = false;
// We got a task! We can now have pushMutex_ unlocked, allowing the host to
// push more work when it feels like it, and just start working.
if (task.runType == GLRRunType::EXIT) {
// Oh, host wanted out. Let's leave, and also let's notify the host.
// This is unlike Vulkan too which can just block on the thread existing.
std::unique_lock<std::mutex> lock(syncMutex_);
syncCondVar_.notify_one();
syncDone_ = true;
break;
}
// Render the scene.
Run(threadFrame_);
VLOG("PULL: Finished frame %d", threadFrame_);
} while (!nextFrame);
Run(task);
};
return true;
}
void GLRenderManager::StopThread() {
// Since we don't control the thread directly, this will only pause the thread.
// There's not really a lot to do here anymore.
if (run_) {
run_ = false;
for (int i = 0; i < MAX_INFLIGHT_FRAMES; i++) {
auto &frameData = frameData_[i];
{
std::unique_lock<std::mutex> lock(frameData.push_mutex);
frameData.push_condVar.notify_all();
}
{
std::unique_lock<std::mutex> lock(frameData.pull_mutex);
frameData.pull_condVar.notify_all();
}
}
// Wait until we've definitely stopped the threadframe.
std::unique_lock<std::mutex> lock(mutex_);
INFO_LOG(G3D, "GL submission thread paused. Frame=%d", curFrame_);
// Eat whatever has been queued up for this frame if anything.
Wipe();
// Wait for any fences to finish and be resignaled, so we don't have sync issues.
// Also clean out any queued data, which might refer to things that might not be valid
// when we restart...
for (int i = 0; i < MAX_INFLIGHT_FRAMES; i++) {
auto &frameData = frameData_[i];
std::unique_lock<std::mutex> lock(frameData.push_mutex);
if (frameData.readyForRun || frameData.steps.size() != 0) {
Crash();
}
frameData.readyForRun = false;
frameData.readyForSubmit = false;
for (size_t i = 0; i < frameData.steps.size(); i++) {
delete frameData.steps[i];
}
frameData.steps.clear();
frameData.initSteps.clear();
while (!frameData.readyForFence) {
VLOG("PUSH: Waiting for frame[%d].readyForFence = 1 (stop)", i);
frameData.push_condVar.wait(lock);
}
}
} else {
INFO_LOG(G3D, "GL submission thread was already paused.");
}
@ -452,58 +394,54 @@ void GLRenderManager::CopyImageToMemorySync(GLRTexture *texture, int mipLevel, i
void GLRenderManager::BeginFrame() {
VLOG("BeginFrame");
// TODO: Why is this debug-only?
#ifdef _DEBUG
curProgram_ = nullptr;
#endif
int curFrame = GetCurFrame();
FrameData &frameData = frameData_[curFrame];
// Make sure the very last command buffer from the frame before the previous has been fully executed.
FrameData &frameData = frameData_[curFrame_];
{
std::unique_lock<std::mutex> lock(frameData.push_mutex);
std::unique_lock<std::mutex> lock(frameData.fenceMutex);
while (!frameData.readyForFence) {
VLOG("PUSH: Waiting for frame[%d].readyForFence = 1", curFrame);
frameData.push_condVar.wait(lock);
frameData.fenceCondVar.wait(lock);
}
frameData.readyForFence = false;
frameData.readyForSubmit = true;
}
VLOG("PUSH: Fencing %d", curFrame);
// glFenceSync(&frameData.fence...)
// Must be after the fence - this performs deletes.
VLOG("PUSH: BeginFrame %d", curFrame);
if (!run_) {
WARN_LOG(G3D, "BeginFrame while !run_!");
}
// vulkan_->BeginFrame();
// In GL, we have to do deletes on the submission thread.
insideFrame_ = true;
}
void GLRenderManager::Finish() {
curRenderStep_ = nullptr;
int curFrame = GetCurFrame();
FrameData &frameData = frameData_[curFrame];
{
std::unique_lock<std::mutex> lock(frameData.pull_mutex);
VLOG("PUSH: Frame[%d].readyForRun = true, notifying pull", curFrame);
frameData.steps = std::move(steps_);
steps_.clear();
frameData.initSteps = std::move(initSteps_);
initSteps_.clear();
frameData.readyForRun = true;
frameData.type = GLRRunType::END;
frameData_[curFrame_].deleter.Take(deleter_);
}
VLOG("PUSH: Frame[%d]", curFrame);
GLRRenderThreadTask task;
task.frame = curFrame;
task.runType = GLRRunType::END;
// Notify calls do not in fact need to be done with the mutex locked.
frameData.pull_condVar.notify_all();
frameData_[curFrame_].deleter.Take(deleter_);
std::unique_lock<std::mutex> lock(pushMutex_);
renderThreadQueue_.push(task);
renderThreadQueue_.back().initSteps = std::move(initSteps_);
renderThreadQueue_.back().steps = std::move(steps_);
initSteps_.clear();
steps_.clear();
pushCondVar_.notify_one();
}
curFrame_++;
if (curFrame_ >= inflightFrames_)
@ -513,17 +451,17 @@ void GLRenderManager::Finish() {
}
// Render thread
void GLRenderManager::Run(int frame) {
FrameData &frameData = frameData_[frame];
void GLRenderManager::Run(GLRRenderThreadTask &task) {
FrameData &frameData = frameData_[task.frame];
if (!frameData.hasBegun) {
frameData.hasBegun = true;
frameData.deleter_prev.Perform(this, skipGLCalls_);
frameData.deleter_prev.Take(frameData.deleter);
}
auto &stepsOnThread = frameData_[frame].steps;
auto &initStepsOnThread = frameData_[frame].initSteps;
// queueRunner_.LogSteps(stepsOnThread);
queueRunner_.RunInitSteps(initStepsOnThread, skipGLCalls_);
initStepsOnThread.clear();
queueRunner_.RunInitSteps(task.initSteps, skipGLCalls_);
// Run this after RunInitSteps so any fresh GLRBuffers for the pushbuffers can get created.
if (!skipGLCalls_) {
@ -537,13 +475,12 @@ void GLRenderManager::Run(int frame) {
int passes = GetVRPassesCount();
for (int i = 0; i < passes; i++) {
PreVRFrameRender(i);
queueRunner_.RunSteps(stepsOnThread, skipGLCalls_, i < passes - 1, true);
queueRunner_.RunSteps(task.steps, skipGLCalls_, i < passes - 1, true);
PostVRFrameRender();
}
} else {
queueRunner_.RunSteps(stepsOnThread, skipGLCalls_, false, false);
queueRunner_.RunSteps(task.steps, skipGLCalls_, false, false);
}
stepsOnThread.clear();
if (!skipGLCalls_) {
for (auto iter : frameData.activePushBuffers) {
@ -551,19 +488,16 @@ void GLRenderManager::Run(int frame) {
}
}
switch (frameData.type) {
switch (task.runType) {
case GLRRunType::END:
frameData.hasBegun = false;
VLOG("PULL: Frame %d.readyForFence = true", frame);
{
std::unique_lock<std::mutex> lock(frameData.push_mutex);
_assert_(frameData.readyForSubmit);
std::lock_guard<std::mutex> lock(frameData.fenceMutex);
frameData.readyForFence = true;
frameData.readyForSubmit = false;
frameData.push_condVar.notify_all();
}
frameData.fenceCondVar.notify_one();
}{}
if (!frameData.skipSwap) {
if (swapIntervalChanged_) {
@ -572,6 +506,7 @@ void GLRenderManager::Run(int frame) {
swapIntervalFunction_(swapInterval_);
}
}
// This is the swapchain framebuffer flip.
if (swapFunction_) {
swapFunction_();
}
@ -581,13 +516,14 @@ void GLRenderManager::Run(int frame) {
break;
case GLRRunType::SYNC:
frameData.hasBegun = false;
// glFinish is not actually necessary here, and won't be unless we start using
// glBufferStorage. Then we need to use fences.
{
std::unique_lock<std::mutex> lock(frameData.push_mutex);
frameData.readyForFence = true;
frameData.readyForSubmit = true;
frameData.push_condVar.notify_all();
std::unique_lock<std::mutex> lock(syncMutex_);
syncDone_ = true;
syncCondVar_.notify_one();
}
break;
@ -599,51 +535,29 @@ void GLRenderManager::Run(int frame) {
}
void GLRenderManager::FlushSync() {
int curFrame = curFrame_;
FrameData &frameData = frameData_[curFrame];
{
std::unique_lock<std::mutex> lock(frameData.pull_mutex);
VLOG("PUSH: Frame[%d].readyForRun = true (sync)", curFrame);
frameData.initSteps = std::move(initSteps_);
initSteps_.clear();
frameData.steps = std::move(steps_);
GLRRenderThreadTask task;
task.frame = curFrame_;
task.runType = GLRRunType::SYNC;
std::unique_lock<std::mutex> lock(pushMutex_);
renderThreadQueue_.push(task);
renderThreadQueue_.back().initSteps = std::move(initSteps_);
renderThreadQueue_.back().steps = std::move(steps_);
pushCondVar_.notify_one();
steps_.clear();
frameData.readyForRun = true;
_assert_(frameData.readyForFence == false);
frameData.type = GLRRunType::SYNC;
frameData.pull_condVar.notify_all();
}
{
std::unique_lock<std::mutex> lock(frameData.push_mutex);
std::unique_lock<std::mutex> lock(syncMutex_);
// Wait for the flush to be hit, since we're syncing.
while (!frameData.readyForFence) {
while (!syncDone_) {
VLOG("PUSH: Waiting for frame[%d].readyForFence = 1 (sync)", curFrame);
frameData.push_condVar.wait(lock);
}
frameData.readyForFence = false;
frameData.readyForSubmit = true;
}
}
void GLRenderManager::Wipe() {
initSteps_.clear();
for (auto step : steps_) {
delete step;
}
steps_.clear();
}
void GLRenderManager::WaitUntilQueueIdle() {
// Just wait for all frames to be ready.
for (int i = 0; i < MAX_INFLIGHT_FRAMES; i++) {
FrameData &frameData = frameData_[i];
std::unique_lock<std::mutex> lock(frameData.push_mutex);
// Ignore unsubmitted frames.
while (!frameData.readyForFence && frameData.readyForRun) {
VLOG("PUSH: Waiting for frame[%d].readyForFence = 1 (wait idle)", i);
frameData.push_condVar.wait(lock);
syncCondVar_.wait(lock);
}
syncDone_ = false;
}
}

View File

@ -7,6 +7,7 @@
#include <set>
#include <string>
#include <mutex>
#include <queue>
#include <condition_variable>
#include "Common/GPU/OpenGL/GLCommon.h"
@ -349,11 +350,29 @@ private:
GLBufferStrategy strategy_ = GLBufferStrategy::SUBDATA;
};
class GLRInputLayout {
public:
struct Entry {
int location;
int count;
GLenum type;
GLboolean normalized;
int stride;
intptr_t offset;
};
std::vector<Entry> entries;
int semanticsMask_ = 0;
};
enum class GLRRunType {
END,
SYNC,
EXIT,
};
class GLRenderManager;
class GLPushBuffer;
class GLDeleter {
public:
void Perform(GLRenderManager *renderManager, bool skipGLCalls);
@ -373,18 +392,14 @@ public:
std::vector<GLPushBuffer *> pushBuffers;
};
class GLRInputLayout {
public:
struct Entry {
int location;
int count;
GLenum type;
GLboolean normalized;
int stride;
intptr_t offset;
};
std::vector<Entry> entries;
int semanticsMask_ = 0;
// These are enqueued from the main thread,
// and the render thread pops them off
struct GLRRenderThreadTask {
std::vector<GLRStep *> steps;
std::vector<GLRInitStep> initSteps;
int frame;
GLRRunType runType;
};
// Note: The GLRenderManager is created and destroyed on the render thread, and the latter
@ -416,13 +431,7 @@ public:
void BeginFrame();
// Can run on a different thread!
void Finish();
void Run(int frame);
// Zaps queued up commands. Use if you know there's a risk you've queued up stuff that has already been deleted. Can happen during in-game shutdown.
void Wipe();
// Wait until no frames are pending. Use during shutdown before freeing pointers.
void WaitUntilQueueIdle();
void Run(GLRRenderThreadTask &task);
// Creation commands. These were not needed in Vulkan since there we can do that on the main thread.
// We pass in width/height here even though it's not strictly needed until we support glTextureStorage
@ -1027,26 +1036,14 @@ private:
// Per-frame data, round-robin so we can overlap submission with execution of the previous frame.
struct FrameData {
std::mutex push_mutex;
std::condition_variable push_condVar;
std::mutex pull_mutex;
std::condition_variable pull_condVar;
bool readyForFence = true;
bool readyForRun = false;
bool readyForSubmit = false;
bool skipSwap = false;
GLRRunType type = GLRRunType::END;
// GLuint fence; For future AZDO stuff?
std::vector<GLRStep *> steps;
std::vector<GLRInitStep> initSteps;
std::mutex fenceMutex;
std::condition_variable fenceCondVar;
bool readyForFence = true;
// Swapchain.
bool hasBegun = false;
uint32_t curSwapchainImage = -1;
GLDeleter deleter;
GLDeleter deleter_prev;
@ -1064,16 +1061,23 @@ private:
// Execution time state
bool run_ = true;
// Thread is managed elsewhere, and should call ThreadFrame.
std::mutex mutex_;
int threadInitFrame_ = 0;
GLQueueRunner queueRunner_;
// Thread state
int threadFrame_ = -1;
// For pushing data on the queue.
std::mutex pushMutex_;
std::condition_variable pushCondVar_;
bool nextFrame = false;
bool firstFrame = true;
std::queue<GLRRenderThreadTask> renderThreadQueue_;
// For readbacks and other reasons we need to sync with the render thread.
std::mutex syncMutex_;
std::condition_variable syncCondVar_;
bool firstFrame_ = true;
bool vrRenderStarted_ = false;
bool syncDone_ = false;
GLDeleter deleter_;
bool skipGLCalls_ = false;

View File

@ -1327,7 +1327,7 @@ void VulkanRenderManager::FlushSync() {
std::unique_lock<std::mutex> lock(syncMutex_);
// Wait for the flush to be hit, since we're syncing.
while (!frameData.syncDone) {
VLOG("PUSH: Waiting for frame[%d].readyForFence = 1 (sync)", curFrame);
VLOG("PUSH: Waiting for frame[%d].syncDone = 1 (sync)", curFrame);
syncCondVar_.wait(lock);
}
frameData.syncDone = false;

View File

@ -128,11 +128,6 @@ GPU_GLES::GPU_GLES(GraphicsContext *gfxCtx, Draw::DrawContext *draw)
}
GPU_GLES::~GPU_GLES() {
if (draw_) {
GLRenderManager *render = (GLRenderManager *)draw_->GetNativeObject(Draw::NativeObject::RENDER_MANAGER);
render->Wipe();
}
// If we're here during app shutdown (exiting the Windows app in-game, for example)
// everything should already be cleared since DeviceLost has been run.

View File

@ -90,7 +90,6 @@ public:
}
void StopThread() override {
renderManager_->WaitUntilQueueIdle();
renderManager_->StopThread();
}

View File

@ -42,7 +42,6 @@ public:
}
void StopThread() override {
renderManager_->WaitUntilQueueIdle();
renderManager_->StopThread();
}

View File

@ -500,6 +500,5 @@ void WindowsGLContext::ThreadEnd() {
}
void WindowsGLContext::StopThread() {
renderManager_->WaitUntilQueueIdle();
renderManager_->StopThread();
}

View File

@ -820,4 +820,4 @@
<Filter>Other Platforms\SDL</Filter>
</Text>
</ItemGroup>
</Project>
</Project>

View File

@ -83,7 +83,6 @@ public:
void StopThread() override {
if (renderManager_) {
renderManager_->WaitUntilQueueIdle();
renderManager_->StopThread();
}
}

View File

@ -28,7 +28,6 @@ public:
bool ThreadFrame() override { return renderManager_->ThreadFrame(); }
void ThreadEnd() override { renderManager_->ThreadEnd(); }
void StopThread() override {
renderManager_->WaitUntilQueueIdle();
renderManager_->StopThread();
}