mirror of
https://github.com/mandarine3ds/mandarine.git
synced 2024-11-23 06:09:46 +00:00
gl_stream_buffer: optimize OpenGL buffer handling
Some checks failed
mandarine-build / macos (arm64) (push) Waiting to run
mandarine-build / macos (x86_64) (push) Waiting to run
mandarine-build / macos-universal (push) Blocked by required conditions
mandarine-build / windows (msvc) (push) Waiting to run
mandarine-build / windows (msys2) (push) Waiting to run
mandarine-build / release (push) Blocked by required conditions
mandarine-build / source (push) Failing after 0s
mandarine-build / linux (appimage) (push) Failing after 0s
mandarine-build / linux (fresh) (push) Failing after 0s
mandarine-build / android (push) Failing after 0s
mandarine-format / clang-format (push) Failing after 0s
Some checks failed
mandarine-build / macos (arm64) (push) Waiting to run
mandarine-build / macos (x86_64) (push) Waiting to run
mandarine-build / macos-universal (push) Blocked by required conditions
mandarine-build / windows (msvc) (push) Waiting to run
mandarine-build / windows (msys2) (push) Waiting to run
mandarine-build / release (push) Blocked by required conditions
mandarine-build / source (push) Failing after 0s
mandarine-build / linux (appimage) (push) Failing after 0s
mandarine-build / linux (fresh) (push) Failing after 0s
mandarine-build / android (push) Failing after 0s
mandarine-format / clang-format (push) Failing after 0s
This seems to give a huge performance boost for some Mali GPU devices.
This commit is contained in:
parent
7031479af0
commit
107837f506
@ -189,7 +189,7 @@ void Driver::FindBugs() {
|
||||
|
||||
// TODO: Check if these have been fixed in the newer driver
|
||||
if (vendor == Vendor::AMD) {
|
||||
bugs |= DriverBug::ShaderStageChangeFreeze | DriverBug::VertexArrayOutOfBound;
|
||||
bugs |= DriverBug::ShaderStageChangeFreeze;
|
||||
}
|
||||
|
||||
if (vendor == Vendor::AMD || (vendor == Vendor::Intel && !is_linux)) {
|
||||
|
@ -27,18 +27,13 @@ enum class Vendor {
|
||||
enum class DriverBug {
|
||||
// AMD drivers sometimes freezes when one shader stage is changed but not the others.
|
||||
ShaderStageChangeFreeze = 1 << 0,
|
||||
// On AMD drivers there is a strange crash in indexed drawing. The crash happens when the buffer
|
||||
// read position is near the end and is an out-of-bound access to the vertex buffer. This is
|
||||
// probably a bug in the driver and is related to the usage of vec3<byte> attributes in the
|
||||
// vertex array. Doubling the allocation size for the vertex buffer seems to avoid the crash.
|
||||
VertexArrayOutOfBound = 1 << 1,
|
||||
// On AMD and Intel drivers on Windows glTextureView produces incorrect results
|
||||
BrokenTextureView = 1 << 2,
|
||||
BrokenTextureView = 1 << 1,
|
||||
// On Haswell and Broadwell Intel drivers glClearTexSubImage produces a black screen
|
||||
BrokenClearTexture = 1 << 3,
|
||||
BrokenClearTexture = 1 << 2,
|
||||
// On some Mali GPUs, the texture buffer size is small and has reduced performance
|
||||
// if the buffer is close to the maximum texture size
|
||||
SlowTextureBufferWithBigSize = 1 << 4,
|
||||
SlowTextureBufferWithBigSize = 1 << 3,
|
||||
};
|
||||
|
||||
/**
|
||||
|
@ -10,35 +10,15 @@
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
OGLStreamBuffer::OGLStreamBuffer(Driver& driver, GLenum target, GLsizeiptr size,
|
||||
bool prefer_coherent)
|
||||
OGLStreamBuffer::OGLStreamBuffer(Driver& driver, GLenum target, GLsizeiptr size)
|
||||
: gl_target(target), buffer_size(size) {
|
||||
gl_buffer.Create();
|
||||
glBindBuffer(gl_target, gl_buffer.handle);
|
||||
|
||||
GLsizeiptr allocate_size = size;
|
||||
if (driver.HasBug(DriverBug::VertexArrayOutOfBound) && target == GL_ARRAY_BUFFER) {
|
||||
allocate_size *= 2;
|
||||
}
|
||||
|
||||
if (GLAD_GL_ARB_buffer_storage) {
|
||||
persistent = true;
|
||||
coherent = prefer_coherent;
|
||||
GLbitfield flags =
|
||||
GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | (coherent ? GL_MAP_COHERENT_BIT : 0);
|
||||
glBufferStorage(gl_target, allocate_size, nullptr, flags);
|
||||
mapped_ptr = static_cast<u8*>(glMapBufferRange(
|
||||
gl_target, 0, buffer_size, flags | (coherent ? 0 : GL_MAP_FLUSH_EXPLICIT_BIT)));
|
||||
} else {
|
||||
glBufferData(gl_target, allocate_size, nullptr, GL_STREAM_DRAW);
|
||||
}
|
||||
// prefer `glBufferData` than `glBufferStorage` on mobile device
|
||||
glBufferData(gl_target, buffer_size, nullptr, GL_STREAM_DRAW);
|
||||
}
|
||||
|
||||
OGLStreamBuffer::~OGLStreamBuffer() {
|
||||
if (persistent) {
|
||||
glBindBuffer(gl_target, gl_buffer.handle);
|
||||
glUnmapBuffer(gl_target);
|
||||
}
|
||||
gl_buffer.Release();
|
||||
}
|
||||
|
||||
@ -51,48 +31,33 @@ GLsizeiptr OGLStreamBuffer::GetSize() const {
|
||||
}
|
||||
|
||||
std::tuple<u8*, GLintptr, bool> OGLStreamBuffer::Map(GLsizeiptr size, GLintptr alignment) {
|
||||
ASSERT_MSG(size <= buffer_size, "Requested size {} exceeds buffer size {}", size, buffer_size);
|
||||
ASSERT(alignment <= buffer_size);
|
||||
mapped_size = size;
|
||||
|
||||
if (alignment > 0) {
|
||||
buffer_pos = Common::AlignUp<std::size_t>(buffer_pos, alignment);
|
||||
}
|
||||
|
||||
bool invalidate = false;
|
||||
|
||||
buffer_pos = Common::AlignUp<std::size_t>(buffer_pos, alignment);
|
||||
if (buffer_pos + size > buffer_size) {
|
||||
buffer_pos = 0;
|
||||
invalidate = true;
|
||||
|
||||
if (persistent) {
|
||||
glUnmapBuffer(gl_target);
|
||||
}
|
||||
}
|
||||
|
||||
if (invalidate || !persistent) {
|
||||
MANDARINE_PROFILE("OpenGL", "Stream Buffer Orphaning");
|
||||
GLbitfield flags = GL_MAP_WRITE_BIT | (persistent ? GL_MAP_PERSISTENT_BIT : 0) |
|
||||
(coherent ? GL_MAP_COHERENT_BIT : GL_MAP_FLUSH_EXPLICIT_BIT) |
|
||||
(invalidate ? GL_MAP_INVALIDATE_BUFFER_BIT : GL_MAP_UNSYNCHRONIZED_BIT);
|
||||
mapped_ptr = static_cast<u8*>(
|
||||
glMapBufferRange(gl_target, buffer_pos, buffer_size - buffer_pos, flags));
|
||||
mapped_offset = buffer_pos;
|
||||
}
|
||||
|
||||
return std::make_tuple(mapped_ptr + buffer_pos - mapped_offset, buffer_pos, invalidate);
|
||||
GLbitfield flags = GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT |
|
||||
(invalidate ? GL_MAP_INVALIDATE_BUFFER_BIT : GL_MAP_UNSYNCHRONIZED_BIT);
|
||||
u8* mapped_ptr = static_cast<u8*>(glMapBufferRange(gl_target, buffer_pos, size, flags));
|
||||
return std::make_tuple(mapped_ptr, buffer_pos, invalidate);
|
||||
}
|
||||
|
||||
void OGLStreamBuffer::Unmap(GLsizeiptr size) {
|
||||
ASSERT(size <= mapped_size);
|
||||
|
||||
if (!coherent && size > 0) {
|
||||
glFlushMappedBufferRange(gl_target, buffer_pos - mapped_offset, size);
|
||||
if (size > 0) {
|
||||
// flush is relative to the start of the currently mapped range of buffer
|
||||
glFlushMappedBufferRange(gl_target, 0, size);
|
||||
GLenum error = glGetError();
|
||||
if (error != GL_NO_ERROR) {
|
||||
LOG_DEBUG(Render_OpenGL,
|
||||
"flush mapped buffer range error: {:04X}, target: {:04X}, offset: {}, size: "
|
||||
"{}, total: {}",
|
||||
error, gl_target, buffer_pos, size, buffer_size);
|
||||
}
|
||||
}
|
||||
|
||||
if (!persistent) {
|
||||
glUnmapBuffer(gl_target);
|
||||
}
|
||||
|
||||
glUnmapBuffer(gl_target);
|
||||
buffer_pos += size;
|
||||
}
|
||||
|
||||
|
@ -13,8 +13,7 @@ class Driver;
|
||||
|
||||
class OGLStreamBuffer : private NonCopyable {
|
||||
public:
|
||||
explicit OGLStreamBuffer(Driver& driver, GLenum target, GLsizeiptr size,
|
||||
bool prefer_coherent = false);
|
||||
explicit OGLStreamBuffer(Driver& driver, GLenum target, GLsizeiptr size);
|
||||
~OGLStreamBuffer();
|
||||
|
||||
GLuint GetHandle() const;
|
||||
@ -28,7 +27,7 @@ public:
|
||||
* and the invalidation flag for previous chunks.
|
||||
* The actual used size must be specified on unmapping the chunk.
|
||||
*/
|
||||
std::tuple<u8*, GLintptr, bool> Map(GLsizeiptr size, GLintptr alignment = 0);
|
||||
std::tuple<u8*, GLintptr, bool> Map(GLsizeiptr size, GLintptr alignment);
|
||||
|
||||
void Unmap(GLsizeiptr size);
|
||||
|
||||
@ -36,14 +35,8 @@ private:
|
||||
OGLBuffer gl_buffer;
|
||||
GLenum gl_target;
|
||||
|
||||
bool coherent = false;
|
||||
bool persistent = false;
|
||||
|
||||
GLintptr buffer_pos = 0;
|
||||
GLsizeiptr buffer_size = 0;
|
||||
GLintptr mapped_offset = 0;
|
||||
GLsizeiptr mapped_size = 0;
|
||||
u8* mapped_ptr = nullptr;
|
||||
};
|
||||
|
||||
} // namespace OpenGL
|
||||
|
Loading…
Reference in New Issue
Block a user