GLES: Add dependency tracking for render passes.

Let's try to invalidate when it's possible.  We move the invalidate to the
end of the render when detected.
This commit is contained in:
Unknown W. Brackets 2020-05-16 16:56:31 -07:00
parent 32a7e7345e
commit a36239473d
9 changed files with 104 additions and 68 deletions

View File

@ -1070,6 +1070,7 @@ add_library(native STATIC
ext/native/util/text/wrap_text.h
ext/native/util/text/wrap_text.cpp
ext/native/util/const_map.h
ext/native/util/tiny_set.h
ext/native/ext/jpge/jpgd.cpp
ext/native/ext/jpge/jpgd.h
ext/native/ext/jpge/jpge.cpp

View File

@ -317,7 +317,7 @@ void FramebufferManagerGLES::UpdateDownloadTempBuffer(VirtualFramebuffer *nvfb)
_assert_msg_(G3D, nvfb->fbo, "Expecting a valid nvfb in UpdateDownloadTempBuffer");
// Discard the previous contents of this buffer where possible.
if (gl_extensions.GLES3 && glInvalidateFramebuffer != nullptr) {
if (gl_extensions.GLES3) {
draw_->BindFramebufferAsRenderTarget(nvfb->fbo, { Draw::RPAction::DONT_CARE, Draw::RPAction::DONT_CARE, Draw::RPAction::DONT_CARE });
} else if (gl_extensions.IsGLES) {
draw_->BindFramebufferAsRenderTarget(nvfb->fbo, { Draw::RPAction::CLEAR, Draw::RPAction::CLEAR, Draw::RPAction::CLEAR });

View File

@ -489,6 +489,7 @@
<ClInclude Include="util\text\shiftjis.h" />
<ClInclude Include="util\text\utf16.h" />
<ClInclude Include="util\text\utf8.h" />
<ClInclude Include="util\tiny_set.h" />
</ItemGroup>
<ItemGroup>
<ClCompile Include="base\backtrace.cpp" />

View File

@ -350,6 +350,9 @@
<ClInclude Include="ui\root.h">
<Filter>ui</Filter>
</ClInclude>
<ClInclude Include="util\tiny_set.h">
<Filter>util</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<ClCompile Include="gfx\gl_debug_log.cpp">

View File

@ -827,13 +827,15 @@ void GLQueueRunner::PerformRenderPass(const GLRStep &step) {
{
GLenum attachments[3];
int count = 0;
bool hasDepth = step.render.framebuffer ? step.render.framebuffer->z_stencil_ : false;
if (c.clear.clearMask & GL_COLOR_BUFFER_BIT)
attachments[count++] = GL_COLOR_ATTACHMENT0;
if (c.clear.clearMask & GL_DEPTH_BUFFER_BIT)
if (hasDepth && (c.clear.clearMask & GL_DEPTH_BUFFER_BIT))
attachments[count++] = GL_DEPTH_ATTACHMENT;
if (c.clear.clearMask & GL_STENCIL_BUFFER_BIT)
if (hasDepth && (c.clear.clearMask & GL_STENCIL_BUFFER_BIT))
attachments[count++] = GL_STENCIL_BUFFER_BIT;
glInvalidateFramebuffer(GL_FRAMEBUFFER, count, attachments);
if (glInvalidateFramebuffer != nullptr && count != 0)
glInvalidateFramebuffer(GL_FRAMEBUFFER, count, attachments);
CHECK_GL_ERROR_IF_DEBUG();
break;
}

View File

@ -6,6 +6,7 @@
#include "gfx/gl_common.h"
#include "thin3d/DataFormat.h"
#include "util/tiny_set.h"
struct GLRViewport {
float x, y, w, h, minZ, maxZ;
@ -295,9 +296,14 @@ struct GLRStep {
GLRStep(GLRStepType _type) : stepType(_type) {}
GLRStepType stepType;
std::vector<GLRRenderData> commands;
TinySet<const GLRFramebuffer *, 8> dependencies;
union {
struct {
GLRFramebuffer *framebuffer;
GLRRenderPassAction color;
GLRRenderPassAction depth;
GLRRenderPassAction stencil;
// Note: not accurate.
int numDraws;
} render;
struct {

View File

@ -279,6 +279,9 @@ void GLRenderManager::BindFramebufferAsRenderTarget(GLRFramebuffer *fb, GLRRende
GLRStep *step = new GLRStep{ GLRStepType::RENDER };
// This is what queues up new passes, and can end previous ones.
step->render.framebuffer = fb;
step->render.color = color;
step->render.depth = depth;
step->render.stencil = stencil;
step->render.numDraws = 0;
steps_.push_back(step);
@ -308,6 +311,12 @@ void GLRenderManager::BindFramebufferAsRenderTarget(GLRFramebuffer *fb, GLRRende
}
curRenderStep_ = step;
if (fb) {
if (color == GLRRenderPassAction::KEEP || depth == GLRRenderPassAction::KEEP || stencil == GLRRenderPassAction::KEEP) {
step->dependencies.insert(fb);
}
}
// Every step clears this state.
gstate_c.Dirty(DIRTY_BLEND_STATE | DIRTY_DEPTHSTENCIL_STATE | DIRTY_RASTER_STATE);
}
@ -319,6 +328,7 @@ void GLRenderManager::BindFramebufferAsTexture(GLRFramebuffer *fb, int binding,
data.bind_fb_texture.framebuffer = fb;
data.bind_fb_texture.aspect = aspectBit;
curRenderStep_->commands.push_back(data);
curRenderStep_->dependencies.insert(fb);
}
void GLRenderManager::CopyFramebuffer(GLRFramebuffer *src, GLRect2D srcRect, GLRFramebuffer *dst, GLOffset2D dstPos, int aspectMask) {
@ -328,6 +338,10 @@ void GLRenderManager::CopyFramebuffer(GLRFramebuffer *src, GLRect2D srcRect, GLR
step->copy.src = src;
step->copy.dst = dst;
step->copy.aspectMask = aspectMask;
step->dependencies.insert(src);
bool fillsDst = dst && srcRect.x == 0 && srcRect.y == 0 && srcRect.w == dst->width && srcRect.h == dst->height;
if (dstPos.x != 0 || dstPos.y != 0 || !fillsDst)
step->dependencies.insert(dst);
steps_.push_back(step);
// Every step clears this state.
@ -342,6 +356,10 @@ void GLRenderManager::BlitFramebuffer(GLRFramebuffer *src, GLRect2D srcRect, GLR
step->blit.dst = dst;
step->blit.aspectMask = aspectMask;
step->blit.filter = filter;
step->dependencies.insert(src);
bool fillsDst = dst && dstRect.x == 0 && dstRect.y == 0 && dstRect.w == dst->width && dstRect.h == dst->height;
if (!fillsDst)
step->dependencies.insert(dst);
steps_.push_back(step);
// Every step clears this state.
@ -356,6 +374,7 @@ bool GLRenderManager::CopyFramebufferToMemorySync(GLRFramebuffer *src, int aspec
step->readback.srcRect = { x, y, w, h };
step->readback.aspectMask = aspectBits;
step->readback.dstFormat = destFormat;
step->dependencies.insert(src);
steps_.push_back(step);
// Every step clears this state.

View File

@ -6,6 +6,7 @@
#include "Common/Vulkan/VulkanContext.h"
#include "math/dataconv.h"
#include "thin3d/DataFormat.h"
#include "util/tiny_set.h"
class VKRFramebuffer;
struct VKRImage;
@ -17,70 +18,6 @@ enum {
QUEUE_HACK_RENDERPASS_MERGE = 8,
};
// Insert-only small-set implementation. Performs no allocation unless MaxFastSize is exceeded.
template <class T, int MaxFastSize>
struct TinySet {
~TinySet() { delete slowLookup_; }
inline void insert(T t) {
// Fast linear scan.
for (int i = 0; i < fastCount; i++) {
if (fastLookup_[i] == t)
return; // We already have it.
}
// Fast insertion
if (fastCount < MaxFastSize) {
fastLookup_[fastCount++] = t;
return;
}
// Fall back to slow path.
insertSlow(t);
}
bool contains(T t) const {
for (int i = 0; i < fastCount; i++) {
if (fastLookup_[i] == t)
return true;
}
if (slowLookup_) {
for (auto x : *slowLookup_) {
if (x == t)
return true;
}
}
return false;
}
bool contains(const TinySet<T, MaxFastSize> &otherSet) {
// Awkward, kind of ruins the fun.
for (int i = 0; i < fastCount; i++) {
if (otherSet.contains(fastLookup_[i]))
return true;
}
if (slowLookup_) {
for (auto x : *slowLookup_) {
if (otherSet.contains(x))
return true;
}
}
return false;
}
private:
void insertSlow(T t) {
if (!slowLookup_) {
slowLookup_ = new std::vector<T>();
} else {
for (size_t i = 0; i < slowLookup_->size(); i++) {
if ((*slowLookup_)[i] == t)
return;
}
}
slowLookup_->push_back(t);
}
T fastLookup_[MaxFastSize];
int fastCount = 0;
int slowCount = 0;
std::vector<T> *slowLookup_ = nullptr;
};
enum class VKRRenderCommand : uint8_t {
REMOVED,
BIND_PIPELINE,

View File

@ -0,0 +1,67 @@
#pragma once
#include <vector>
// Insert-only small-set implementation. Performs no allocation unless MaxFastSize is exceeded.
template <class T, int MaxFastSize>
struct TinySet {
~TinySet() { delete slowLookup_; }
inline void insert(T t) {
// Fast linear scan.
for (int i = 0; i < fastCount; i++) {
if (fastLookup_[i] == t)
return; // We already have it.
}
// Fast insertion
if (fastCount < MaxFastSize) {
fastLookup_[fastCount++] = t;
return;
}
// Fall back to slow path.
insertSlow(t);
}
bool contains(T t) const {
for (int i = 0; i < fastCount; i++) {
if (fastLookup_[i] == t)
return true;
}
if (slowLookup_) {
for (auto x : *slowLookup_) {
if (x == t)
return true;
}
}
return false;
}
bool contains(const TinySet<T, MaxFastSize> &otherSet) {
// Awkward, kind of ruins the fun.
for (int i = 0; i < fastCount; i++) {
if (otherSet.contains(fastLookup_[i]))
return true;
}
if (slowLookup_) {
for (auto x : *slowLookup_) {
if (otherSet.contains(x))
return true;
}
}
return false;
}
private:
void insertSlow(T t) {
if (!slowLookup_) {
slowLookup_ = new std::vector<T>();
} else {
for (size_t i = 0; i < slowLookup_->size(); i++) {
if ((*slowLookup_)[i] == t)
return;
}
}
slowLookup_->push_back(t);
}
T fastLookup_[MaxFastSize];
int fastCount = 0;
int slowCount = 0;
std::vector<T> *slowLookup_ = nullptr;
};