Support capturing frames from the OpenGL renderer in Tracy

- Code adapted from the example in the Tracy manual, section 3.3.3
This commit is contained in:
Jesse Talavera 2024-10-07 12:52:00 -04:00
parent b75b56382c
commit a16ddc4e77
4 changed files with 165 additions and 0 deletions

View File

@ -184,6 +184,10 @@ MelonDsDs::OpenGLRenderState::~OpenGLRenderState() noexcept {
glDeleteBuffers(1, &vbo);
glDeleteProgram(_screenProgram);
glsm_ctl(GLSM_CTL_STATE_UNBIND, nullptr);
#ifdef HAVE_TRACY
_tracyCapture = std::nullopt;
#endif
}
glsm_ctl(GLSM_CTL_STATE_CONTEXT_DESTROY, nullptr);
gl_query_core_context_unset();
@ -255,6 +259,14 @@ void MelonDsDs::OpenGLRenderState::ContextReset(melonDS::NDS& nds, const CoreCon
glsm_ctl(GLSM_CTL_STATE_UNBIND, nullptr); // Always succeeds
retro::debug("Unbound GL state");
#ifdef HAVE_TRACY
if (tracy::ProfilerAvailable()) {
// If we're using Tracy...
retro::debug("Using Tracy, will capture OpenGL calls");
_tracyCapture.emplace(); // ...then get ready to capture OpenGL calls
}
#endif
retro::debug("OpenGL context reset successfully.");
}
@ -422,6 +434,12 @@ void MelonDsDs::OpenGLRenderState::Render(
glsm_ctl(GLSM_CTL_STATE_UNBIND, nullptr);
#ifdef HAVE_TRACY
if (_tracyCapture) {
_tracyCapture->CaptureFrame(config.ScaleFactor());
}
#endif
retro::video_refresh(
RETRO_HW_FRAME_BUFFER_VALID,
screenLayout.BufferWidth(),
@ -449,6 +467,10 @@ void MelonDsDs::OpenGLRenderState::ContextDestroyed() {
ubo = 0;
// TODO: Delete these objects, since the context hasn't been destroyed yet
// (just in case it's not really destroyed afterwards)
#ifdef HAVE_TRACY
_tracyCapture = std::nullopt;
#endif
}
void MelonDsDs::OpenGLRenderState::InitFrameState(melonDS::NDS& nds, const CoreConfig& config, const ScreenLayoutData& screenLayout) noexcept {

View File

@ -28,6 +28,10 @@
#include <glm/vec2.hpp>
#include <glm/vec4.hpp>
#ifdef HAVE_TRACY
#include "tracy.hpp"
#endif
namespace MelonDsDs {
using glm::vec2;
using glm::vec4;
@ -85,6 +89,10 @@ namespace MelonDsDs {
} GL_ShaderConfig {};
GLuint ubo = 0;
#ifdef HAVE_TRACY
std::optional<OpenGlTracyCapture> _tracyCapture;
#endif
};
}

View File

@ -16,6 +16,10 @@
#include "tracy.hpp"
#if defined(HAVE_OPENGL) || defined(HAVE_OPENGLES)
#include "screenlayout.hpp"
#endif
void* operator new(std::size_t count)
{
if (count == 0)
@ -34,3 +38,107 @@ void operator delete(void* ptr) noexcept
TracySecureFree(ptr);
std::free(ptr);
}
#if defined(HAVE_OPENGL) || defined(HAVE_OPENGLES)
MelonDsDs::OpenGlTracyCapture::OpenGlTracyCapture() {
if (!tracy::ProfilerAvailable()) {
throw std::runtime_error("Tracy not available");
}
// We're going to send the OpenGL-rendered image to tracy, but for performance reasons:
// - We want to scale it down to the DS's native size (if necessary)
// - We want to do this asynchronously, so we don't block the CPU
// - The rendering can run ahead of the GPU by a few frames
ZoneScopedN(TracyFunction);
TracyGpuZone(TracyFunction);
// Allocate the textures for the resized image
glGenTextures(4, _tracyTextures.data());
// Create some FBOs to let us write to the textures
glGenFramebuffers(4, _tracyFbos.data());
// Create some PBOs to let the CPU read from the textures
glGenBuffers(4, _tracyPbos.data());
for (int i = 0; i < 4; i++) {
// Let's configure one texture at a time...
glBindTexture(GL_TEXTURE_2D, _tracyTextures[i]);
// We'll use nearest-neighbor interpolation to avoid blurring
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
// And we want our texture to be 2D, in RGBA format, big enough to hold a pair of NDS screens without mipmaps,
// and with each component being an unsigned byte.
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, NDS_SCREEN_WIDTH, NDS_SCREEN_HEIGHT * 2, 0, GL_RGBA, GL_UNSIGNED_BYTE, nullptr);
// Now we'll configure the FBO used to draw to this texture...
glBindFramebuffer(GL_FRAMEBUFFER, _tracyFbos[i]);
// ...we'll attach a texture to the new FBO.
glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, _tracyTextures[i], 0);
// And we'll create a new PBO so we can read from the texture.
glBindBuffer(GL_PIXEL_PACK_BUFFER, _tracyPbos[i]);
// And the PBO has to be big enough to hold two NDS screens.
glBufferData(GL_PIXEL_PACK_BUFFER, NDS_SCREEN_AREA<GLuint> * 2 * 4, nullptr, GL_STREAM_READ);
}
retro::debug("Initialized OpenGL Tracy capture");
}
MelonDsDs::OpenGlTracyCapture::~OpenGlTracyCapture() noexcept {
ZoneScopedN(TracyFunction);
TracyGpuZone(TracyFunction);
// Clean up the textures
glDeleteTextures(4, _tracyTextures.data());
// Clean up the FBOs
glDeleteFramebuffers(4, _tracyFbos.data());
// Clean up the PBOs
glDeleteBuffers(4, _tracyPbos.data());
// Clean up the fences
for (int i = 0; i < 4; i++) {
glDeleteSync(_tracyFences[i]);
}
}
void MelonDsDs::OpenGlTracyCapture::CaptureFrame(float scale) noexcept {
if (!tracy::ProfilerAvailable()) {
return;
}
ZoneScopedN(TracyFunction);
TracyGpuZone(TracyFunction);
while (!_tracyQueue.empty()) {
const auto fiIdx = _tracyQueue.front();
if (glClientWaitSync(_tracyFences[fiIdx], 0, 0) == GL_TIMEOUT_EXPIRED) break;
glDeleteSync(_tracyFences[fiIdx]);
glBindBuffer(GL_PIXEL_PACK_BUFFER, _tracyPbos[fiIdx]);
auto ptr = glMapBufferRange(GL_PIXEL_PACK_BUFFER, 0, NDS_SCREEN_AREA<GLuint> * 2 * 4, GL_MAP_READ_BIT);
FrameImage(ptr, NDS_SCREEN_WIDTH, NDS_SCREEN_HEIGHT * 2, _tracyQueue.size(), true);
glUnmapBuffer(GL_PIXEL_PACK_BUFFER);
_tracyQueue.erase(_tracyQueue.begin());
}
// TODO: Only downscale if playing at a scale factor other than 1
assert (m_fiQueue.empty() || m_fiQueue.front() != m_fiIdx); // check for buffer overrun
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, _tracyFbos[_tracyIndex]);
glBlitFramebuffer(0, 0, NDS_SCREEN_WIDTH * scale, NDS_SCREEN_HEIGHT * 2 * scale, 0, 0, NDS_SCREEN_WIDTH, NDS_SCREEN_HEIGHT * 2, GL_COLOR_BUFFER_BIT, GL_NEAREST);
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, 0);
glBindFramebuffer(GL_READ_FRAMEBUFFER, _tracyFbos[_tracyIndex]);
glBindBuffer(GL_PIXEL_PACK_BUFFER, _tracyPbos[_tracyIndex]);
glReadPixels(0, 0, NDS_SCREEN_WIDTH, NDS_SCREEN_HEIGHT * 2, GL_RGBA, GL_UNSIGNED_BYTE, nullptr);
glBindFramebuffer(GL_READ_FRAMEBUFFER, 0);
_tracyFences[_tracyIndex] = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
_tracyQueue.emplace_back(_tracyIndex);
_tracyIndex = (_tracyIndex + 1) % 4;
}
#endif // defined(HAVE_OPENGL) || defined(HAVE_OPENGLES)

View File

@ -125,8 +125,35 @@
#endif
#if defined(HAVE_TRACY) && (defined(HAVE_OPENGL) || defined(HAVE_OPENGLES))
#include <array>
#include <vector>
#include "PlatformOGLPrivate.h"
#include <tracy/TracyOpenGL.hpp>
namespace MelonDsDs {
/// \brief Class for capturing OpenGL frames for Tracy.
/// Suitable for both OpenGL renderers.
class OpenGlTracyCapture {
public:
OpenGlTracyCapture();
~OpenGlTracyCapture() noexcept;
// Copying the OpenGL objects is too much of a hassle.
OpenGlTracyCapture(const OpenGlTracyCapture&) = delete;
OpenGlTracyCapture& operator=(const OpenGlTracyCapture&) = delete;
OpenGlTracyCapture(OpenGlTracyCapture&&) = delete;
OpenGlTracyCapture& operator=(OpenGlTracyCapture&&) = delete;
void CaptureFrame(float scale) noexcept;
private:
std::array<GLuint, 4> _tracyTextures;
std::array<GLuint, 4> _tracyFbos;
std::array<GLuint, 4> _tracyPbos;
std::array<GLsync, 4> _tracyFences;
int _tracyIndex = 0;
std::vector<int> _tracyQueue;
};
}
#else
#define TracyGpuContext
#define TracyGpuContextName(x,y)