gl_stream_buffer: optimize OpenGL buffer handling
Some checks failed
mandarine-build / macos (arm64) (push) Waiting to run
mandarine-build / macos (x86_64) (push) Waiting to run
mandarine-build / macos-universal (push) Blocked by required conditions
mandarine-build / windows (msvc) (push) Waiting to run
mandarine-build / windows (msys2) (push) Waiting to run
mandarine-build / release (push) Blocked by required conditions
mandarine-build / source (push) Failing after 0s
mandarine-build / linux (appimage) (push) Failing after 0s
mandarine-build / linux (fresh) (push) Failing after 0s
mandarine-build / android (push) Failing after 0s
mandarine-format / clang-format (push) Failing after 0s

This seems to give a huge performance boost for some Mali GPU devices.
This commit is contained in:
weihuoya 2024-09-16 22:13:55 +02:00 committed by Gamer64
parent 7031479af0
commit 107837f506
4 changed files with 26 additions and 73 deletions

View File

@ -189,7 +189,7 @@ void Driver::FindBugs() {
// TODO: Check if these have been fixed in the newer driver
if (vendor == Vendor::AMD) {
bugs |= DriverBug::ShaderStageChangeFreeze | DriverBug::VertexArrayOutOfBound;
bugs |= DriverBug::ShaderStageChangeFreeze;
}
if (vendor == Vendor::AMD || (vendor == Vendor::Intel && !is_linux)) {

View File

@ -27,18 +27,13 @@ enum class Vendor {
enum class DriverBug {
// AMD drivers sometimes freezes when one shader stage is changed but not the others.
ShaderStageChangeFreeze = 1 << 0,
// On AMD drivers there is a strange crash in indexed drawing. The crash happens when the buffer
// read position is near the end and is an out-of-bound access to the vertex buffer. This is
// probably a bug in the driver and is related to the usage of vec3<byte> attributes in the
// vertex array. Doubling the allocation size for the vertex buffer seems to avoid the crash.
VertexArrayOutOfBound = 1 << 1,
// On AMD and Intel drivers on Windows glTextureView produces incorrect results
BrokenTextureView = 1 << 2,
BrokenTextureView = 1 << 1,
// On Haswell and Broadwell Intel drivers glClearTexSubImage produces a black screen
BrokenClearTexture = 1 << 3,
BrokenClearTexture = 1 << 2,
// On some Mali GPUs, the texture buffer size is small and has reduced performance
// if the buffer is close to the maximum texture size
SlowTextureBufferWithBigSize = 1 << 4,
SlowTextureBufferWithBigSize = 1 << 3,
};
/**

View File

@ -10,35 +10,15 @@
namespace OpenGL {
OGLStreamBuffer::OGLStreamBuffer(Driver& driver, GLenum target, GLsizeiptr size,
bool prefer_coherent)
OGLStreamBuffer::OGLStreamBuffer(Driver& driver, GLenum target, GLsizeiptr size)
: gl_target(target), buffer_size(size) {
gl_buffer.Create();
glBindBuffer(gl_target, gl_buffer.handle);
GLsizeiptr allocate_size = size;
if (driver.HasBug(DriverBug::VertexArrayOutOfBound) && target == GL_ARRAY_BUFFER) {
allocate_size *= 2;
}
if (GLAD_GL_ARB_buffer_storage) {
persistent = true;
coherent = prefer_coherent;
GLbitfield flags =
GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | (coherent ? GL_MAP_COHERENT_BIT : 0);
glBufferStorage(gl_target, allocate_size, nullptr, flags);
mapped_ptr = static_cast<u8*>(glMapBufferRange(
gl_target, 0, buffer_size, flags | (coherent ? 0 : GL_MAP_FLUSH_EXPLICIT_BIT)));
} else {
glBufferData(gl_target, allocate_size, nullptr, GL_STREAM_DRAW);
}
// prefer `glBufferData` than `glBufferStorage` on mobile device
glBufferData(gl_target, buffer_size, nullptr, GL_STREAM_DRAW);
}
OGLStreamBuffer::~OGLStreamBuffer() {
if (persistent) {
glBindBuffer(gl_target, gl_buffer.handle);
glUnmapBuffer(gl_target);
}
gl_buffer.Release();
}
@ -51,48 +31,33 @@ GLsizeiptr OGLStreamBuffer::GetSize() const {
}
std::tuple<u8*, GLintptr, bool> OGLStreamBuffer::Map(GLsizeiptr size, GLintptr alignment) {
ASSERT_MSG(size <= buffer_size, "Requested size {} exceeds buffer size {}", size, buffer_size);
ASSERT(alignment <= buffer_size);
mapped_size = size;
if (alignment > 0) {
buffer_pos = Common::AlignUp<std::size_t>(buffer_pos, alignment);
}
bool invalidate = false;
buffer_pos = Common::AlignUp<std::size_t>(buffer_pos, alignment);
if (buffer_pos + size > buffer_size) {
buffer_pos = 0;
invalidate = true;
if (persistent) {
glUnmapBuffer(gl_target);
}
}
if (invalidate || !persistent) {
MANDARINE_PROFILE("OpenGL", "Stream Buffer Orphaning");
GLbitfield flags = GL_MAP_WRITE_BIT | (persistent ? GL_MAP_PERSISTENT_BIT : 0) |
(coherent ? GL_MAP_COHERENT_BIT : GL_MAP_FLUSH_EXPLICIT_BIT) |
(invalidate ? GL_MAP_INVALIDATE_BUFFER_BIT : GL_MAP_UNSYNCHRONIZED_BIT);
mapped_ptr = static_cast<u8*>(
glMapBufferRange(gl_target, buffer_pos, buffer_size - buffer_pos, flags));
mapped_offset = buffer_pos;
}
return std::make_tuple(mapped_ptr + buffer_pos - mapped_offset, buffer_pos, invalidate);
GLbitfield flags = GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT |
(invalidate ? GL_MAP_INVALIDATE_BUFFER_BIT : GL_MAP_UNSYNCHRONIZED_BIT);
u8* mapped_ptr = static_cast<u8*>(glMapBufferRange(gl_target, buffer_pos, size, flags));
return std::make_tuple(mapped_ptr, buffer_pos, invalidate);
}
void OGLStreamBuffer::Unmap(GLsizeiptr size) {
ASSERT(size <= mapped_size);
if (!coherent && size > 0) {
glFlushMappedBufferRange(gl_target, buffer_pos - mapped_offset, size);
if (size > 0) {
// flush is relative to the start of the currently mapped range of buffer
glFlushMappedBufferRange(gl_target, 0, size);
GLenum error = glGetError();
if (error != GL_NO_ERROR) {
LOG_DEBUG(Render_OpenGL,
"flush mapped buffer range error: {:04X}, target: {:04X}, offset: {}, size: "
"{}, total: {}",
error, gl_target, buffer_pos, size, buffer_size);
}
}
if (!persistent) {
glUnmapBuffer(gl_target);
}
glUnmapBuffer(gl_target);
buffer_pos += size;
}

View File

@ -13,8 +13,7 @@ class Driver;
class OGLStreamBuffer : private NonCopyable {
public:
explicit OGLStreamBuffer(Driver& driver, GLenum target, GLsizeiptr size,
bool prefer_coherent = false);
explicit OGLStreamBuffer(Driver& driver, GLenum target, GLsizeiptr size);
~OGLStreamBuffer();
GLuint GetHandle() const;
@ -28,7 +27,7 @@ public:
* and the invalidation flag for previous chunks.
* The actual used size must be specified on unmapping the chunk.
*/
std::tuple<u8*, GLintptr, bool> Map(GLsizeiptr size, GLintptr alignment = 0);
std::tuple<u8*, GLintptr, bool> Map(GLsizeiptr size, GLintptr alignment);
void Unmap(GLsizeiptr size);
@ -36,14 +35,8 @@ private:
OGLBuffer gl_buffer;
GLenum gl_target;
bool coherent = false;
bool persistent = false;
GLintptr buffer_pos = 0;
GLsizeiptr buffer_size = 0;
GLintptr mapped_offset = 0;
GLsizeiptr mapped_size = 0;
u8* mapped_ptr = nullptr;
};
} // namespace OpenGL