ppsspp/Common/GPU/OpenGL/GLRenderManager.h

1070 lines
34 KiB
C++

#pragma once
#include <thread>
#include <unordered_map>
#include <vector>
#include <functional>
#include <set>
#include <string>
#include <mutex>
#include <condition_variable>
#include "Common/GPU/OpenGL/GLCommon.h"
#include "Common/Data/Convert/SmallDataConvert.h"
#include "Common/Log.h"
#include "GLQueueRunner.h"
class GLRInputLayout;
class GLPushBuffer;
namespace Draw {
class DrawContext;
}
constexpr int MAX_GL_TEXTURE_SLOTS = 8;
class GLRTexture {
public:
GLRTexture(const Draw::DeviceCaps &caps, int width, int height, int depth, int numMips);
~GLRTexture();
GLuint texture = 0;
uint16_t w;
uint16_t h;
uint16_t d;
// We don't trust OpenGL defaults - setting wildly off values ensures that we'll end up overwriting these parameters.
GLenum target = 0xFFFF;
GLenum wrapS = 0xFFFF;
GLenum wrapT = 0xFFFF;
GLenum magFilter = 0xFFFF;
GLenum minFilter = 0xFFFF;
uint8_t numMips = 0;
bool canWrap = true;
float anisotropy = -100000.0f;
float minLod = -1000.0f;
float maxLod = 1000.0f;
float lodBias = 0.0f;
};
class GLRFramebuffer {
public:
GLRFramebuffer(const Draw::DeviceCaps &caps, int _width, int _height, bool z_stencil)
: color_texture(caps, _width, _height, 1, 1), z_stencil_texture(caps, _width, _height, 1, 1),
width(_width), height(_height), z_stencil_(z_stencil) {
}
~GLRFramebuffer();
GLuint handle = 0;
GLRTexture color_texture;
// Either z_stencil_texture, z_stencil_buffer, or (z_buffer and stencil_buffer) are set.
GLuint z_stencil_buffer = 0;
GLRTexture z_stencil_texture;
GLuint z_buffer = 0;
GLuint stencil_buffer = 0;
int width;
int height;
GLuint colorDepth = 0;
bool z_stencil_;
};
// We need to create some custom heap-allocated types so we can forward things that need to be created on the GL thread, before
// they've actually been created.
class GLRShader {
public:
~GLRShader() {
if (shader) {
glDeleteShader(shader);
}
}
GLuint shader = 0;
bool valid = false;
// Warning: Won't know until a future frame.
bool failed = false;
std::string desc;
std::string code;
std::string error;
};
class GLRProgram {
public:
~GLRProgram() {
if (program) {
glDeleteProgram(program);
}
}
struct Semantic {
int location;
const char *attrib;
};
struct UniformLocQuery {
GLint *dest;
const char *name;
bool required;
};
struct Initializer {
GLint *uniform;
int type;
int value;
};
GLuint program = 0;
std::vector<Semantic> semantics_;
std::vector<UniformLocQuery> queries_;
std::vector<Initializer> initialize_;
bool use_clip_distance0 = false;
struct UniformInfo {
int loc_;
};
// Must ONLY be called from GLQueueRunner!
// Also it's pretty slow...
int GetUniformLoc(const char *name) {
auto iter = uniformCache_.find(std::string(name));
int loc = -1;
if (iter != uniformCache_.end()) {
loc = iter->second.loc_;
} else {
loc = glGetUniformLocation(program, name);
UniformInfo info;
info.loc_ = loc;
uniformCache_[name] = info;
}
return loc;
}
std::unordered_map<std::string, UniformInfo> uniformCache_;
};
enum class GLBufferStrategy {
SUBDATA = 0,
MASK_FLUSH = 0x10,
MASK_INVALIDATE = 0x20,
// Map/unmap the buffer each frame.
FRAME_UNMAP = 1,
// Map/unmap and also invalidate the buffer on map.
INVALIDATE_UNMAP = MASK_INVALIDATE,
// Map/unmap and explicitly flushed changed ranges.
FLUSH_UNMAP = MASK_FLUSH,
// Map/unmap, invalidate on map, and explicit flush.
FLUSH_INVALIDATE_UNMAP = MASK_FLUSH | MASK_INVALIDATE,
};
static inline int operator &(const GLBufferStrategy &lhs, const GLBufferStrategy &rhs) {
return (int)lhs & (int)rhs;
}
class GLRBuffer {
public:
GLRBuffer(GLuint target, size_t size) : target_(target), size_((int)size) {}
~GLRBuffer() {
if (buffer_) {
glDeleteBuffers(1, &buffer_);
}
}
void *Map(GLBufferStrategy strategy);
bool Unmap();
bool Mapped() const {
return mapped_;
}
GLuint buffer_ = 0;
GLuint target_;
int size_;
private:
bool mapped_ = false;
bool hasStorage_ = false;
};
class GLRenderManager;
// Similar to VulkanPushBuffer but is currently less efficient - it collects all the data in
// RAM then does a big memcpy/buffer upload at the end of the frame. This is at least a lot
// faster than the hundreds of buffer uploads or memory array buffers we used before.
// On modern GL we could avoid the copy using glBufferStorage but not sure it's worth the
// trouble.
// We need to manage the lifetime of this together with the other resources so its destructor
// runs on the render thread.
class GLPushBuffer {
public:
friend class GLRenderManager;
struct BufInfo {
GLRBuffer *buffer = nullptr;
uint8_t *localMemory = nullptr;
uint8_t *deviceMemory = nullptr;
size_t flushOffset = 0;
};
GLPushBuffer(GLRenderManager *render, GLuint target, size_t size);
~GLPushBuffer();
void Reset() { offset_ = 0; }
private:
// Needs context in case of defragment.
void Begin() {
buf_ = 0;
offset_ = 0;
// Note: we must defrag because some buffers may be smaller than size_.
Defragment();
Map();
_dbg_assert_(writePtr_);
}
void BeginNoReset() {
Map();
}
void End() {
Unmap();
}
public:
void Map();
void Unmap();
bool IsReady() const {
return writePtr_ != nullptr;
}
// When using the returned memory, make sure to bind the returned vkbuf.
// This will later allow for handling overflow correctly.
size_t Allocate(size_t numBytes, GLRBuffer **vkbuf) {
size_t out = offset_;
if (offset_ + ((numBytes + 3) & ~3) >= size_) {
NextBuffer(numBytes);
out = offset_;
offset_ += (numBytes + 3) & ~3;
} else {
offset_ += (numBytes + 3) & ~3; // Round up to 4 bytes.
}
*vkbuf = buffers_[buf_].buffer;
return out;
}
// Returns the offset that should be used when binding this buffer to get this data.
size_t Push(const void *data, size_t size, GLRBuffer **vkbuf) {
_dbg_assert_(writePtr_);
size_t off = Allocate(size, vkbuf);
memcpy(writePtr_ + off, data, size);
return off;
}
uint32_t PushAligned(const void *data, size_t size, int align, GLRBuffer **vkbuf) {
_dbg_assert_(writePtr_);
offset_ = (offset_ + align - 1) & ~(align - 1);
size_t off = Allocate(size, vkbuf);
memcpy(writePtr_ + off, data, size);
return (uint32_t)off;
}
size_t GetOffset() const {
return offset_;
}
// "Zero-copy" variant - you can write the data directly as you compute it.
// Recommended.
void *Push(size_t size, uint32_t *bindOffset, GLRBuffer **vkbuf) {
_dbg_assert_(writePtr_);
size_t off = Allocate(size, vkbuf);
*bindOffset = (uint32_t)off;
return writePtr_ + off;
}
void *PushAligned(size_t size, uint32_t *bindOffset, GLRBuffer **vkbuf, int align) {
_dbg_assert_(writePtr_);
offset_ = (offset_ + align - 1) & ~(align - 1);
size_t off = Allocate(size, vkbuf);
*bindOffset = (uint32_t)off;
return writePtr_ + off;
}
size_t GetTotalSize() const;
void Destroy(bool onRenderThread);
void Flush();
protected:
void MapDevice(GLBufferStrategy strategy);
void UnmapDevice();
private:
bool AddBuffer();
void NextBuffer(size_t minSize);
void Defragment();
GLRenderManager *render_;
std::vector<BufInfo> buffers_;
size_t buf_ = 0;
size_t offset_ = 0;
size_t size_ = 0;
uint8_t *writePtr_ = nullptr;
GLuint target_;
GLBufferStrategy strategy_ = GLBufferStrategy::SUBDATA;
};
enum class GLRRunType {
END,
SYNC,
};
class GLDeleter {
public:
void Perform(GLRenderManager *renderManager, bool skipGLCalls);
bool IsEmpty() const {
return shaders.empty() && programs.empty() && buffers.empty() && textures.empty() && inputLayouts.empty() && framebuffers.empty() && pushBuffers.empty();
}
void Take(GLDeleter &other);
std::vector<GLRShader *> shaders;
std::vector<GLRProgram *> programs;
std::vector<GLRBuffer *> buffers;
std::vector<GLRTexture *> textures;
std::vector<GLRInputLayout *> inputLayouts;
std::vector<GLRFramebuffer *> framebuffers;
std::vector<GLPushBuffer *> pushBuffers;
};
class GLRInputLayout {
public:
struct Entry {
int location;
int count;
GLenum type;
GLboolean normalized;
int stride;
intptr_t offset;
};
std::vector<Entry> entries;
int semanticsMask_ = 0;
};
// Note: The GLRenderManager is created and destroyed on the render thread, and the latter
// happens after the emu thread has been destroyed. Therefore, it's safe to run wild deleting stuff
// directly in the destructor.
class GLRenderManager {
public:
GLRenderManager() {}
~GLRenderManager();
void SetErrorCallback(ErrorCallbackFn callback, void *userdata) {
queueRunner_.SetErrorCallback(callback, userdata);
}
void SetDeviceCaps(const Draw::DeviceCaps &caps) {
queueRunner_.SetDeviceCaps(caps);
caps_ = caps;
}
void ThreadStart(Draw::DrawContext *draw);
void ThreadEnd();
bool ThreadFrame(); // Returns false to request exiting the loop.
// Makes sure that the GPU has caught up enough that we can start writing buffers of this frame again.
void BeginFrame();
// Can run on a different thread!
void Finish();
void Run(int frame);
// Zaps queued up commands. Use if you know there's a risk you've queued up stuff that has already been deleted. Can happen during in-game shutdown.
void Wipe();
// Wait until no frames are pending. Use during shutdown before freeing pointers.
void WaitUntilQueueIdle();
// Creation commands. These were not needed in Vulkan since there we can do that on the main thread.
// We pass in width/height here even though it's not strictly needed until we support glTextureStorage
// and then we'll also need formats and stuff.
GLRTexture *CreateTexture(GLenum target, int width, int height, int depth, int numMips) {
GLRInitStep step{ GLRInitStepType::CREATE_TEXTURE };
step.create_texture.texture = new GLRTexture(caps_, width, height, depth, numMips);
step.create_texture.texture->target = target;
initSteps_.push_back(step);
return step.create_texture.texture;
}
GLRBuffer *CreateBuffer(GLuint target, size_t size, GLuint usage) {
GLRInitStep step{ GLRInitStepType::CREATE_BUFFER };
step.create_buffer.buffer = new GLRBuffer(target, size);
step.create_buffer.size = (int)size;
step.create_buffer.usage = usage;
initSteps_.push_back(step);
return step.create_buffer.buffer;
}
GLRShader *CreateShader(GLuint stage, const std::string &code, const std::string &desc) {
GLRInitStep step{ GLRInitStepType::CREATE_SHADER };
step.create_shader.shader = new GLRShader();
step.create_shader.shader->desc = desc;
step.create_shader.stage = stage;
step.create_shader.code = new char[code.size() + 1];
memcpy(step.create_shader.code, code.data(), code.size() + 1);
initSteps_.push_back(step);
return step.create_shader.shader;
}
GLRFramebuffer *CreateFramebuffer(int width, int height, bool z_stencil) {
GLRInitStep step{ GLRInitStepType::CREATE_FRAMEBUFFER };
step.create_framebuffer.framebuffer = new GLRFramebuffer(caps_, width, height, z_stencil);
initSteps_.push_back(step);
return step.create_framebuffer.framebuffer;
}
// Can't replace uniform initializers with direct calls to SetUniform() etc because there might
// not be an active render pass.
GLRProgram *CreateProgram(
std::vector<GLRShader *> shaders, std::vector<GLRProgram::Semantic> semantics, std::vector<GLRProgram::UniformLocQuery> queries,
std::vector<GLRProgram::Initializer> initializers, bool supportDualSource, bool useClipDistance0) {
GLRInitStep step{ GLRInitStepType::CREATE_PROGRAM };
_assert_(shaders.size() <= ARRAY_SIZE(step.create_program.shaders));
step.create_program.program = new GLRProgram();
step.create_program.program->semantics_ = semantics;
step.create_program.program->queries_ = queries;
step.create_program.program->initialize_ = initializers;
step.create_program.program->use_clip_distance0 = useClipDistance0;
step.create_program.support_dual_source = supportDualSource;
_assert_msg_(shaders.size() > 0, "Can't create a program with zero shaders");
for (size_t i = 0; i < shaders.size(); i++) {
step.create_program.shaders[i] = shaders[i];
}
#ifdef _DEBUG
for (auto &iter : queries) {
_dbg_assert_(iter.name);
}
for (auto &sem : semantics) {
_dbg_assert_(sem.attrib);
}
#endif
step.create_program.num_shaders = (int)shaders.size();
initSteps_.push_back(step);
return step.create_program.program;
}
GLRInputLayout *CreateInputLayout(std::vector<GLRInputLayout::Entry> &entries) {
GLRInitStep step{ GLRInitStepType::CREATE_INPUT_LAYOUT };
step.create_input_layout.inputLayout = new GLRInputLayout();
step.create_input_layout.inputLayout->entries = entries;
for (auto &iter : step.create_input_layout.inputLayout->entries) {
step.create_input_layout.inputLayout->semanticsMask_ |= 1 << iter.location;
}
initSteps_.push_back(step);
return step.create_input_layout.inputLayout;
}
GLPushBuffer *CreatePushBuffer(int frame, GLuint target, size_t size) {
GLPushBuffer *push = new GLPushBuffer(this, target, size);
RegisterPushBuffer(frame, push);
return push;
}
void DeleteShader(GLRShader *shader) {
deleter_.shaders.push_back(shader);
}
void DeleteProgram(GLRProgram *program) {
deleter_.programs.push_back(program);
}
void DeleteBuffer(GLRBuffer *buffer) {
deleter_.buffers.push_back(buffer);
}
void DeleteTexture(GLRTexture *texture) {
deleter_.textures.push_back(texture);
}
void DeleteInputLayout(GLRInputLayout *inputLayout) {
deleter_.inputLayouts.push_back(inputLayout);
}
void DeleteFramebuffer(GLRFramebuffer *framebuffer) {
deleter_.framebuffers.push_back(framebuffer);
}
void DeletePushBuffer(GLPushBuffer *pushbuffer) {
deleter_.pushBuffers.push_back(pushbuffer);
}
void BeginPushBuffer(GLPushBuffer *pushbuffer) {
pushbuffer->Begin();
}
void EndPushBuffer(GLPushBuffer *pushbuffer) {
pushbuffer->End();
}
// This starts a new step (like a "render pass" in Vulkan).
//
// After a "CopyFramebuffer" or the other functions that start "steps", you need to call this beforce
// making any new render state changes or draw calls.
//
// The following state needs to be reset by the caller after calling this (and will thus not safely carry over from
// the previous one):
// * Viewport/Scissor
// * Depth/stencil
// * Blend
// * Raster state like primitive, culling, etc.
//
// It can be useful to use GetCurrentStepId() to figure out when you need to send all this state again, if you're
// not keeping track of your calls to this function on your own.
void BindFramebufferAsRenderTarget(GLRFramebuffer *fb, GLRRenderPassAction color, GLRRenderPassAction depth, GLRRenderPassAction stencil, uint32_t clearColor, float clearDepth, uint8_t clearStencil, const char *tag);
// Binds a framebuffer as a texture, for the following draws.
void BindFramebufferAsTexture(GLRFramebuffer *fb, int binding, int aspectBit, int attachment);
bool CopyFramebufferToMemorySync(GLRFramebuffer *src, int aspectBits, int x, int y, int w, int h, Draw::DataFormat destFormat, uint8_t *pixels, int pixelStride, const char *tag);
void CopyImageToMemorySync(GLRTexture *texture, int mipLevel, int x, int y, int w, int h, Draw::DataFormat destFormat, uint8_t *pixels, int pixelStride, const char *tag);
void CopyFramebuffer(GLRFramebuffer *src, GLRect2D srcRect, GLRFramebuffer *dst, GLOffset2D dstPos, int aspectMask, const char *tag);
void BlitFramebuffer(GLRFramebuffer *src, GLRect2D srcRect, GLRFramebuffer *dst, GLRect2D dstRect, int aspectMask, bool filter, const char *tag);
// Takes ownership of data if deleteData = true.
void BufferSubdata(GLRBuffer *buffer, size_t offset, size_t size, uint8_t *data, bool deleteData = true) {
// TODO: Maybe should be a render command instead of an init command? When possible it's better as
// an init command, that's for sure.
GLRInitStep step{ GLRInitStepType::BUFFER_SUBDATA };
_dbg_assert_(offset >= 0);
_dbg_assert_(offset <= buffer->size_ - size);
step.buffer_subdata.buffer = buffer;
step.buffer_subdata.offset = (int)offset;
step.buffer_subdata.size = (int)size;
step.buffer_subdata.data = data;
step.buffer_subdata.deleteData = deleteData;
initSteps_.push_back(step);
}
// Takes ownership over the data pointer and delete[]-s it.
void TextureImage(GLRTexture *texture, int level, int width, int height, int depth, Draw::DataFormat format, uint8_t *data, GLRAllocType allocType = GLRAllocType::NEW, bool linearFilter = false) {
GLRInitStep step{ GLRInitStepType::TEXTURE_IMAGE };
step.texture_image.texture = texture;
step.texture_image.data = data;
step.texture_image.format = format;
step.texture_image.level = level;
step.texture_image.width = width;
step.texture_image.height = height;
step.texture_image.depth = depth;
step.texture_image.allocType = allocType;
step.texture_image.linearFilter = linearFilter;
initSteps_.push_back(step);
}
void TextureSubImage(int slot, GLRTexture *texture, int level, int x, int y, int width, int height, Draw::DataFormat format, uint8_t *data, GLRAllocType allocType = GLRAllocType::NEW) {
_dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER);
GLRRenderData _data{ GLRRenderCommand::TEXTURE_SUBIMAGE };
_data.texture_subimage.texture = texture;
_data.texture_subimage.data = data;
_data.texture_subimage.format = format;
_data.texture_subimage.level = level;
_data.texture_subimage.x = x;
_data.texture_subimage.y = y;
_data.texture_subimage.width = width;
_data.texture_subimage.height = height;
_data.texture_subimage.allocType = allocType;
_data.texture_subimage.slot = slot;
curRenderStep_->commands.push_back(_data);
}
void FinalizeTexture(GLRTexture *texture, int loadedLevels, bool genMips) {
GLRInitStep step{ GLRInitStepType::TEXTURE_FINALIZE };
step.texture_finalize.texture = texture;
step.texture_finalize.loadedLevels = loadedLevels;
step.texture_finalize.genMips = genMips;
initSteps_.push_back(step);
}
void BindTexture(int slot, GLRTexture *tex) {
_dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER);
_dbg_assert_(slot < MAX_GL_TEXTURE_SLOTS);
GLRRenderData data{ GLRRenderCommand::BINDTEXTURE };
data.texture.slot = slot;
data.texture.texture = tex;
curRenderStep_->commands.push_back(data);
}
void BindProgram(GLRProgram *program) {
_dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER);
GLRRenderData data{ GLRRenderCommand::BINDPROGRAM };
_dbg_assert_(program != nullptr);
data.program.program = program;
curRenderStep_->commands.push_back(data);
#ifdef _DEBUG
curProgram_ = program;
#endif
}
void BindPixelPackBuffer(GLRBuffer *buffer) { // Want to support an offset but can't in ES 2.0. We supply an offset when binding the buffers instead.
_dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER);
GLRRenderData data{ GLRRenderCommand::BIND_BUFFER };
data.bind_buffer.buffer = buffer;
data.bind_buffer.target = GL_PIXEL_PACK_BUFFER;
curRenderStep_->commands.push_back(data);
}
void BindIndexBuffer(GLRBuffer *buffer) { // Want to support an offset but can't in ES 2.0. We supply an offset when binding the buffers instead.
_dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER);
GLRRenderData data{ GLRRenderCommand::BIND_BUFFER};
data.bind_buffer.buffer = buffer;
data.bind_buffer.target = GL_ELEMENT_ARRAY_BUFFER;
curRenderStep_->commands.push_back(data);
}
void BindVertexBuffer(GLRInputLayout *inputLayout, GLRBuffer *buffer, size_t offset) {
_dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER);
_dbg_assert_(inputLayout);
GLRRenderData data{ GLRRenderCommand::BIND_VERTEX_BUFFER };
data.bindVertexBuffer.inputLayout = inputLayout;
data.bindVertexBuffer.offset = offset;
data.bindVertexBuffer.buffer = buffer;
curRenderStep_->commands.push_back(data);
}
void SetDepth(bool enabled, bool write, GLenum func) {
_dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER);
GLRRenderData data{ GLRRenderCommand::DEPTH };
data.depth.enabled = enabled;
data.depth.write = write;
data.depth.func = func;
curRenderStep_->commands.push_back(data);
}
void SetViewport(const GLRViewport &vp) {
_dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER);
GLRRenderData data{ GLRRenderCommand::VIEWPORT };
data.viewport.vp = vp;
curRenderStep_->commands.push_back(data);
}
void SetScissor(const GLRect2D &rc) {
_dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER);
GLRRenderData data{ GLRRenderCommand::SCISSOR };
data.scissor.rc = rc;
curRenderStep_->commands.push_back(data);
}
void SetUniformI(const GLint *loc, int count, const int *udata) {
_dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER);
#ifdef _DEBUG
_dbg_assert_(curProgram_);
#endif
GLRRenderData data{ GLRRenderCommand::UNIFORM4I };
data.uniform4.loc = loc;
data.uniform4.count = count;
memcpy(data.uniform4.v, udata, sizeof(int) * count);
curRenderStep_->commands.push_back(data);
}
void SetUniformI1(const GLint *loc, int udata) {
_dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER);
#ifdef _DEBUG
_dbg_assert_(curProgram_);
#endif
GLRRenderData data{ GLRRenderCommand::UNIFORM4I };
data.uniform4.loc = loc;
data.uniform4.count = 1;
memcpy(data.uniform4.v, &udata, sizeof(udata));
curRenderStep_->commands.push_back(data);
}
void SetUniformUI(const GLint *loc, int count, const uint32_t *udata) {
_dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER);
#ifdef _DEBUG
_dbg_assert_(curProgram_);
#endif
GLRRenderData data{ GLRRenderCommand::UNIFORM4UI };
data.uniform4.loc = loc;
data.uniform4.count = count;
memcpy(data.uniform4.v, udata, sizeof(uint32_t) * count);
curRenderStep_->commands.push_back(data);
}
void SetUniformUI1(const GLint *loc, uint32_t udata) {
_dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER);
#ifdef _DEBUG
_dbg_assert_(curProgram_);
#endif
GLRRenderData data{ GLRRenderCommand::UNIFORM4UI };
data.uniform4.loc = loc;
data.uniform4.count = 1;
memcpy(data.uniform4.v, &udata, sizeof(udata));
curRenderStep_->commands.push_back(data);
}
void SetUniformF(const GLint *loc, int count, const float *udata) {
_dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER);
#ifdef _DEBUG
_dbg_assert_(curProgram_);
#endif
GLRRenderData data{ GLRRenderCommand::UNIFORM4F };
data.uniform4.loc = loc;
data.uniform4.count = count;
memcpy(data.uniform4.v, udata, sizeof(float) * count);
curRenderStep_->commands.push_back(data);
}
void SetUniformF1(const GLint *loc, const float udata) {
_dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER);
#ifdef _DEBUG
_dbg_assert_(curProgram_);
#endif
GLRRenderData data{ GLRRenderCommand::UNIFORM4F };
data.uniform4.loc = loc;
data.uniform4.count = 1;
memcpy(data.uniform4.v, &udata, sizeof(float));
curRenderStep_->commands.push_back(data);
}
void SetUniformF(const char *name, int count, const float *udata) {
_dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER);
#ifdef _DEBUG
_dbg_assert_(curProgram_);
#endif
GLRRenderData data{ GLRRenderCommand::UNIFORM4F };
data.uniform4.name = name;
data.uniform4.count = count;
memcpy(data.uniform4.v, udata, sizeof(float) * count);
curRenderStep_->commands.push_back(data);
}
void SetUniformM4x4(const GLint *loc, const float *udata) {
_dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER);
#ifdef _DEBUG
_dbg_assert_(curProgram_);
#endif
GLRRenderData data{ GLRRenderCommand::UNIFORMMATRIX };
data.uniformMatrix4.loc = loc;
memcpy(data.uniformMatrix4.m, udata, sizeof(float) * 16);
curRenderStep_->commands.push_back(data);
}
#ifdef OPENXR
void SetUniformM4x4Stereo(const char *name, const GLint *loc, const float *left, const float *right) {
_dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER);
#ifdef _DEBUG
_dbg_assert_(curProgram_);
#endif
GLRRenderData data{ GLRRenderCommand::UNIFORMSTEREOMATRIX };
data.uniformMatrix4.name = name;
data.uniformMatrix4.loc = loc;
memcpy(&data.uniformMatrix4.m[0], left, sizeof(float) * 16);
memcpy(&data.uniformMatrix4.m[16], right, sizeof(float) * 16);
curRenderStep_->commands.push_back(data);
}
#endif
void SetUniformM4x4(const char *name, const float *udata) {
_dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER);
#ifdef _DEBUG
_dbg_assert_(curProgram_);
#endif
GLRRenderData data{ GLRRenderCommand::UNIFORMMATRIX };
data.uniformMatrix4.name = name;
memcpy(data.uniformMatrix4.m, udata, sizeof(float) * 16);
curRenderStep_->commands.push_back(data);
}
void SetBlendAndMask(int colorMask, bool blendEnabled, GLenum srcColor, GLenum dstColor, GLenum srcAlpha, GLenum dstAlpha, GLenum funcColor, GLenum funcAlpha) {
_dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER);
GLRRenderData data{ GLRRenderCommand::BLEND };
data.blend.mask = colorMask;
data.blend.enabled = blendEnabled;
data.blend.srcColor = srcColor;
data.blend.dstColor = dstColor;
data.blend.srcAlpha = srcAlpha;
data.blend.dstAlpha = dstAlpha;
data.blend.funcColor = funcColor;
data.blend.funcAlpha = funcAlpha;
curRenderStep_->commands.push_back(data);
}
void SetNoBlendAndMask(int colorMask) {
_dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER);
GLRRenderData data{ GLRRenderCommand::BLEND };
data.blend.mask = colorMask;
data.blend.enabled = false;
curRenderStep_->commands.push_back(data);
}
#ifndef USING_GLES2
void SetLogicOp(bool enabled, GLenum logicOp) {
_dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER);
GLRRenderData data{ GLRRenderCommand::LOGICOP };
data.logic.enabled = enabled;
data.logic.logicOp = logicOp;
curRenderStep_->commands.push_back(data);
}
#endif
void SetStencilFunc(bool enabled, GLenum func, uint8_t refValue, uint8_t compareMask) {
_dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER);
GLRRenderData data{ GLRRenderCommand::STENCILFUNC };
data.stencilFunc.enabled = enabled;
data.stencilFunc.func = func;
data.stencilFunc.ref = refValue;
data.stencilFunc.compareMask = compareMask;
curRenderStep_->commands.push_back(data);
}
void SetStencilOp(uint8_t writeMask, GLenum sFail, GLenum zFail, GLenum pass) {
_dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER);
GLRRenderData data{ GLRRenderCommand::STENCILOP };
data.stencilOp.writeMask = writeMask;
data.stencilOp.sFail = sFail;
data.stencilOp.zFail = zFail;
data.stencilOp.pass = pass;
curRenderStep_->commands.push_back(data);
}
void SetStencilDisabled() {
_dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER);
GLRRenderData data;
data.cmd = GLRRenderCommand::STENCILFUNC;
data.stencilFunc.enabled = false;
curRenderStep_->commands.push_back(data);
}
void SetBlendFactor(const float color[4]) {
_dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER);
GLRRenderData data{ GLRRenderCommand::BLENDCOLOR };
CopyFloat4(data.blendColor.color, color);
curRenderStep_->commands.push_back(data);
}
void SetRaster(GLboolean cullEnable, GLenum frontFace, GLenum cullFace, GLboolean ditherEnable, GLboolean depthClamp) {
_dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER);
GLRRenderData data{ GLRRenderCommand::RASTER };
data.raster.cullEnable = cullEnable;
data.raster.frontFace = frontFace;
data.raster.cullFace = cullFace;
data.raster.ditherEnable = ditherEnable;
data.raster.depthClampEnable = depthClamp;
curRenderStep_->commands.push_back(data);
}
// Modifies the current texture as per GL specs, not global state.
void SetTextureSampler(int slot, GLenum wrapS, GLenum wrapT, GLenum magFilter, GLenum minFilter, float anisotropy) {
_dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER);
_dbg_assert_(slot < MAX_GL_TEXTURE_SLOTS);
GLRRenderData data{ GLRRenderCommand::TEXTURESAMPLER };
data.textureSampler.slot = slot;
data.textureSampler.wrapS = wrapS;
data.textureSampler.wrapT = wrapT;
data.textureSampler.magFilter = magFilter;
data.textureSampler.minFilter = minFilter;
data.textureSampler.anisotropy = anisotropy;
curRenderStep_->commands.push_back(data);
}
void SetTextureLod(int slot, float minLod, float maxLod, float lodBias) {
_dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER);
_dbg_assert_(slot < MAX_GL_TEXTURE_SLOTS);
GLRRenderData data{ GLRRenderCommand::TEXTURELOD};
data.textureLod.slot = slot;
data.textureLod.minLod = minLod;
data.textureLod.maxLod = maxLod;
data.textureLod.lodBias = lodBias;
curRenderStep_->commands.push_back(data);
}
// If scissorW == 0, no scissor is applied (the whole render target is cleared).
void Clear(uint32_t clearColor, float clearZ, int clearStencil, int clearMask, int colorMask, int scissorX, int scissorY, int scissorW, int scissorH) {
_dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER);
if (!clearMask)
return;
GLRRenderData data{ GLRRenderCommand::CLEAR };
data.clear.clearMask = clearMask;
data.clear.clearColor = clearColor;
data.clear.clearZ = clearZ;
data.clear.clearStencil = clearStencil;
data.clear.colorMask = colorMask;
data.clear.scissorX = scissorX;
data.clear.scissorY = scissorY;
data.clear.scissorW = scissorW;
data.clear.scissorH = scissorH;
curRenderStep_->commands.push_back(data);
}
void Draw(GLenum mode, int first, int count) {
_dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER);
GLRRenderData data{ GLRRenderCommand::DRAW };
data.draw.mode = mode;
data.draw.first = first;
data.draw.count = count;
data.draw.buffer = 0;
curRenderStep_->commands.push_back(data);
curRenderStep_->render.numDraws++;
}
void DrawIndexed(GLenum mode, int count, GLenum indexType, void *indices, int instances = 1) {
_dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER);
GLRRenderData data{ GLRRenderCommand::DRAW_INDEXED };
data.drawIndexed.mode = mode;
data.drawIndexed.count = count;
data.drawIndexed.indexType = indexType;
data.drawIndexed.instances = instances;
data.drawIndexed.indices = indices;
curRenderStep_->commands.push_back(data);
curRenderStep_->render.numDraws++;
}
enum { MAX_INFLIGHT_FRAMES = 3 };
void SetInflightFrames(int f) {
newInflightFrames_ = f < 1 || f > MAX_INFLIGHT_FRAMES ? MAX_INFLIGHT_FRAMES : f;
}
int GetCurFrame() const {
return curFrame_;
}
void Resize(int width, int height) {
targetWidth_ = width;
targetHeight_ = height;
queueRunner_.Resize(width, height);
}
void UnregisterPushBuffer(GLPushBuffer *buffer) {
int foundCount = 0;
for (int i = 0; i < MAX_INFLIGHT_FRAMES; i++) {
auto iter = frameData_[i].activePushBuffers.find(buffer);
if (iter != frameData_[i].activePushBuffers.end()) {
frameData_[i].activePushBuffers.erase(iter);
foundCount++;
}
}
_dbg_assert_(foundCount == 1);
}
void SetSwapFunction(std::function<void()> swapFunction) {
swapFunction_ = swapFunction;
}
void SetSwapIntervalFunction(std::function<void(int)> swapIntervalFunction) {
swapIntervalFunction_ = swapIntervalFunction;
}
void SwapInterval(int interval) {
if (interval != swapInterval_) {
swapInterval_ = interval;
swapIntervalChanged_ = true;
}
}
void StopThread();
bool SawOutOfMemory() {
return queueRunner_.SawOutOfMemory();
}
// Only supports a common subset.
std::string GetGLString(int name) const {
return queueRunner_.GetGLString(name);
}
// Used during Android-style ugly shutdown. No need to have a way to set it back because we'll be
// destroyed.
void SetSkipGLCalls() {
skipGLCalls_ = true;
}
// Gets a frame-unique ID of the current step being recorded. Can be used to figure out
// when the current step has changed, which means the caller will need to re-record its state.
int GetCurrentStepId() const {
return renderStepOffset_ + (int)steps_.size();
}
private:
void BeginSubmitFrame(int frame);
void EndSubmitFrame(int frame);
void Submit(int frame, bool triggerFence);
// Bad for performance but sometimes necessary for synchronous CPU readbacks (screenshots and whatnot).
void FlushSync();
void EndSyncFrame(int frame);
// When using legacy functionality for push buffers (glBufferData), we need to flush them
// before actually making the glDraw* calls. It's best if the render manager handles that.
void RegisterPushBuffer(int frame, GLPushBuffer *buffer) {
frameData_[frame].activePushBuffers.insert(buffer);
}
// Per-frame data, round-robin so we can overlap submission with execution of the previous frame.
struct FrameData {
std::mutex push_mutex;
std::condition_variable push_condVar;
std::mutex pull_mutex;
std::condition_variable pull_condVar;
bool readyForFence = true;
bool readyForRun = false;
bool readyForSubmit = false;
bool skipSwap = false;
GLRRunType type = GLRRunType::END;
// GLuint fence; For future AZDO stuff?
std::vector<GLRStep *> steps;
std::vector<GLRInitStep> initSteps;
// Swapchain.
bool hasBegun = false;
uint32_t curSwapchainImage = -1;
GLDeleter deleter;
GLDeleter deleter_prev;
std::set<GLPushBuffer *> activePushBuffers;
};
FrameData frameData_[MAX_INFLIGHT_FRAMES];
// Submission time state
bool insideFrame_ = false;
// This is the offset within this frame, in case of a mid-frame sync.
int renderStepOffset_ = 0;
GLRStep *curRenderStep_ = nullptr;
std::vector<GLRStep *> steps_;
std::vector<GLRInitStep> initSteps_;
// Execution time state
bool run_ = true;
// Thread is managed elsewhere, and should call ThreadFrame.
std::mutex mutex_;
int threadInitFrame_ = 0;
GLQueueRunner queueRunner_;
// Thread state
int threadFrame_ = -1;
bool nextFrame = false;
bool firstFrame = true;
GLDeleter deleter_;
bool skipGLCalls_ = false;
int curFrame_ = 0;
std::function<void()> swapFunction_;
std::function<void(int)> swapIntervalFunction_;
GLBufferStrategy bufferStrategy_ = GLBufferStrategy::SUBDATA;
int inflightFrames_ = MAX_INFLIGHT_FRAMES;
int newInflightFrames_ = -1;
int swapInterval_ = 0;
bool swapIntervalChanged_ = true;
int targetWidth_ = 0;
int targetHeight_ = 0;
#ifdef _DEBUG
GLRProgram *curProgram_ = nullptr;
#endif
Draw::DeviceCaps caps_{};
};