mirror of
https://github.com/mandarine3ds/mandarine.git
synced 2024-11-23 14:20:02 +00:00
gl_stream_buffer: optimize OpenGL buffer handling
Some checks failed
mandarine-build / macos (arm64) (push) Waiting to run
mandarine-build / macos (x86_64) (push) Waiting to run
mandarine-build / macos-universal (push) Blocked by required conditions
mandarine-build / windows (msvc) (push) Waiting to run
mandarine-build / windows (msys2) (push) Waiting to run
mandarine-build / release (push) Blocked by required conditions
mandarine-build / source (push) Failing after 0s
mandarine-build / linux (appimage) (push) Failing after 0s
mandarine-build / linux (fresh) (push) Failing after 0s
mandarine-build / android (push) Failing after 0s
mandarine-format / clang-format (push) Failing after 0s
Some checks failed
mandarine-build / macos (arm64) (push) Waiting to run
mandarine-build / macos (x86_64) (push) Waiting to run
mandarine-build / macos-universal (push) Blocked by required conditions
mandarine-build / windows (msvc) (push) Waiting to run
mandarine-build / windows (msys2) (push) Waiting to run
mandarine-build / release (push) Blocked by required conditions
mandarine-build / source (push) Failing after 0s
mandarine-build / linux (appimage) (push) Failing after 0s
mandarine-build / linux (fresh) (push) Failing after 0s
mandarine-build / android (push) Failing after 0s
mandarine-format / clang-format (push) Failing after 0s
This seems to give a huge performance boost for some Mali GPU devices.
This commit is contained in:
parent
7031479af0
commit
107837f506
@ -189,7 +189,7 @@ void Driver::FindBugs() {
|
|||||||
|
|
||||||
// TODO: Check if these have been fixed in the newer driver
|
// TODO: Check if these have been fixed in the newer driver
|
||||||
if (vendor == Vendor::AMD) {
|
if (vendor == Vendor::AMD) {
|
||||||
bugs |= DriverBug::ShaderStageChangeFreeze | DriverBug::VertexArrayOutOfBound;
|
bugs |= DriverBug::ShaderStageChangeFreeze;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (vendor == Vendor::AMD || (vendor == Vendor::Intel && !is_linux)) {
|
if (vendor == Vendor::AMD || (vendor == Vendor::Intel && !is_linux)) {
|
||||||
|
@ -27,18 +27,13 @@ enum class Vendor {
|
|||||||
enum class DriverBug {
|
enum class DriverBug {
|
||||||
// AMD drivers sometimes freezes when one shader stage is changed but not the others.
|
// AMD drivers sometimes freezes when one shader stage is changed but not the others.
|
||||||
ShaderStageChangeFreeze = 1 << 0,
|
ShaderStageChangeFreeze = 1 << 0,
|
||||||
// On AMD drivers there is a strange crash in indexed drawing. The crash happens when the buffer
|
|
||||||
// read position is near the end and is an out-of-bound access to the vertex buffer. This is
|
|
||||||
// probably a bug in the driver and is related to the usage of vec3<byte> attributes in the
|
|
||||||
// vertex array. Doubling the allocation size for the vertex buffer seems to avoid the crash.
|
|
||||||
VertexArrayOutOfBound = 1 << 1,
|
|
||||||
// On AMD and Intel drivers on Windows glTextureView produces incorrect results
|
// On AMD and Intel drivers on Windows glTextureView produces incorrect results
|
||||||
BrokenTextureView = 1 << 2,
|
BrokenTextureView = 1 << 1,
|
||||||
// On Haswell and Broadwell Intel drivers glClearTexSubImage produces a black screen
|
// On Haswell and Broadwell Intel drivers glClearTexSubImage produces a black screen
|
||||||
BrokenClearTexture = 1 << 3,
|
BrokenClearTexture = 1 << 2,
|
||||||
// On some Mali GPUs, the texture buffer size is small and has reduced performance
|
// On some Mali GPUs, the texture buffer size is small and has reduced performance
|
||||||
// if the buffer is close to the maximum texture size
|
// if the buffer is close to the maximum texture size
|
||||||
SlowTextureBufferWithBigSize = 1 << 4,
|
SlowTextureBufferWithBigSize = 1 << 3,
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -10,35 +10,15 @@
|
|||||||
|
|
||||||
namespace OpenGL {
|
namespace OpenGL {
|
||||||
|
|
||||||
OGLStreamBuffer::OGLStreamBuffer(Driver& driver, GLenum target, GLsizeiptr size,
|
OGLStreamBuffer::OGLStreamBuffer(Driver& driver, GLenum target, GLsizeiptr size)
|
||||||
bool prefer_coherent)
|
|
||||||
: gl_target(target), buffer_size(size) {
|
: gl_target(target), buffer_size(size) {
|
||||||
gl_buffer.Create();
|
gl_buffer.Create();
|
||||||
glBindBuffer(gl_target, gl_buffer.handle);
|
glBindBuffer(gl_target, gl_buffer.handle);
|
||||||
|
// prefer `glBufferData` than `glBufferStorage` on mobile device
|
||||||
GLsizeiptr allocate_size = size;
|
glBufferData(gl_target, buffer_size, nullptr, GL_STREAM_DRAW);
|
||||||
if (driver.HasBug(DriverBug::VertexArrayOutOfBound) && target == GL_ARRAY_BUFFER) {
|
|
||||||
allocate_size *= 2;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (GLAD_GL_ARB_buffer_storage) {
|
|
||||||
persistent = true;
|
|
||||||
coherent = prefer_coherent;
|
|
||||||
GLbitfield flags =
|
|
||||||
GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | (coherent ? GL_MAP_COHERENT_BIT : 0);
|
|
||||||
glBufferStorage(gl_target, allocate_size, nullptr, flags);
|
|
||||||
mapped_ptr = static_cast<u8*>(glMapBufferRange(
|
|
||||||
gl_target, 0, buffer_size, flags | (coherent ? 0 : GL_MAP_FLUSH_EXPLICIT_BIT)));
|
|
||||||
} else {
|
|
||||||
glBufferData(gl_target, allocate_size, nullptr, GL_STREAM_DRAW);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
OGLStreamBuffer::~OGLStreamBuffer() {
|
OGLStreamBuffer::~OGLStreamBuffer() {
|
||||||
if (persistent) {
|
|
||||||
glBindBuffer(gl_target, gl_buffer.handle);
|
|
||||||
glUnmapBuffer(gl_target);
|
|
||||||
}
|
|
||||||
gl_buffer.Release();
|
gl_buffer.Release();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -51,48 +31,33 @@ GLsizeiptr OGLStreamBuffer::GetSize() const {
|
|||||||
}
|
}
|
||||||
|
|
||||||
std::tuple<u8*, GLintptr, bool> OGLStreamBuffer::Map(GLsizeiptr size, GLintptr alignment) {
|
std::tuple<u8*, GLintptr, bool> OGLStreamBuffer::Map(GLsizeiptr size, GLintptr alignment) {
|
||||||
ASSERT_MSG(size <= buffer_size, "Requested size {} exceeds buffer size {}", size, buffer_size);
|
|
||||||
ASSERT(alignment <= buffer_size);
|
|
||||||
mapped_size = size;
|
|
||||||
|
|
||||||
if (alignment > 0) {
|
|
||||||
buffer_pos = Common::AlignUp<std::size_t>(buffer_pos, alignment);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool invalidate = false;
|
bool invalidate = false;
|
||||||
|
|
||||||
|
buffer_pos = Common::AlignUp<std::size_t>(buffer_pos, alignment);
|
||||||
if (buffer_pos + size > buffer_size) {
|
if (buffer_pos + size > buffer_size) {
|
||||||
buffer_pos = 0;
|
buffer_pos = 0;
|
||||||
invalidate = true;
|
invalidate = true;
|
||||||
|
|
||||||
if (persistent) {
|
|
||||||
glUnmapBuffer(gl_target);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (invalidate || !persistent) {
|
GLbitfield flags = GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT |
|
||||||
MANDARINE_PROFILE("OpenGL", "Stream Buffer Orphaning");
|
(invalidate ? GL_MAP_INVALIDATE_BUFFER_BIT : GL_MAP_UNSYNCHRONIZED_BIT);
|
||||||
GLbitfield flags = GL_MAP_WRITE_BIT | (persistent ? GL_MAP_PERSISTENT_BIT : 0) |
|
u8* mapped_ptr = static_cast<u8*>(glMapBufferRange(gl_target, buffer_pos, size, flags));
|
||||||
(coherent ? GL_MAP_COHERENT_BIT : GL_MAP_FLUSH_EXPLICIT_BIT) |
|
return std::make_tuple(mapped_ptr, buffer_pos, invalidate);
|
||||||
(invalidate ? GL_MAP_INVALIDATE_BUFFER_BIT : GL_MAP_UNSYNCHRONIZED_BIT);
|
|
||||||
mapped_ptr = static_cast<u8*>(
|
|
||||||
glMapBufferRange(gl_target, buffer_pos, buffer_size - buffer_pos, flags));
|
|
||||||
mapped_offset = buffer_pos;
|
|
||||||
}
|
|
||||||
|
|
||||||
return std::make_tuple(mapped_ptr + buffer_pos - mapped_offset, buffer_pos, invalidate);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void OGLStreamBuffer::Unmap(GLsizeiptr size) {
|
void OGLStreamBuffer::Unmap(GLsizeiptr size) {
|
||||||
ASSERT(size <= mapped_size);
|
if (size > 0) {
|
||||||
|
// flush is relative to the start of the currently mapped range of buffer
|
||||||
if (!coherent && size > 0) {
|
glFlushMappedBufferRange(gl_target, 0, size);
|
||||||
glFlushMappedBufferRange(gl_target, buffer_pos - mapped_offset, size);
|
GLenum error = glGetError();
|
||||||
|
if (error != GL_NO_ERROR) {
|
||||||
|
LOG_DEBUG(Render_OpenGL,
|
||||||
|
"flush mapped buffer range error: {:04X}, target: {:04X}, offset: {}, size: "
|
||||||
|
"{}, total: {}",
|
||||||
|
error, gl_target, buffer_pos, size, buffer_size);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
glUnmapBuffer(gl_target);
|
||||||
if (!persistent) {
|
|
||||||
glUnmapBuffer(gl_target);
|
|
||||||
}
|
|
||||||
|
|
||||||
buffer_pos += size;
|
buffer_pos += size;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -13,8 +13,7 @@ class Driver;
|
|||||||
|
|
||||||
class OGLStreamBuffer : private NonCopyable {
|
class OGLStreamBuffer : private NonCopyable {
|
||||||
public:
|
public:
|
||||||
explicit OGLStreamBuffer(Driver& driver, GLenum target, GLsizeiptr size,
|
explicit OGLStreamBuffer(Driver& driver, GLenum target, GLsizeiptr size);
|
||||||
bool prefer_coherent = false);
|
|
||||||
~OGLStreamBuffer();
|
~OGLStreamBuffer();
|
||||||
|
|
||||||
GLuint GetHandle() const;
|
GLuint GetHandle() const;
|
||||||
@ -28,7 +27,7 @@ public:
|
|||||||
* and the invalidation flag for previous chunks.
|
* and the invalidation flag for previous chunks.
|
||||||
* The actual used size must be specified on unmapping the chunk.
|
* The actual used size must be specified on unmapping the chunk.
|
||||||
*/
|
*/
|
||||||
std::tuple<u8*, GLintptr, bool> Map(GLsizeiptr size, GLintptr alignment = 0);
|
std::tuple<u8*, GLintptr, bool> Map(GLsizeiptr size, GLintptr alignment);
|
||||||
|
|
||||||
void Unmap(GLsizeiptr size);
|
void Unmap(GLsizeiptr size);
|
||||||
|
|
||||||
@ -36,14 +35,8 @@ private:
|
|||||||
OGLBuffer gl_buffer;
|
OGLBuffer gl_buffer;
|
||||||
GLenum gl_target;
|
GLenum gl_target;
|
||||||
|
|
||||||
bool coherent = false;
|
|
||||||
bool persistent = false;
|
|
||||||
|
|
||||||
GLintptr buffer_pos = 0;
|
GLintptr buffer_pos = 0;
|
||||||
GLsizeiptr buffer_size = 0;
|
GLsizeiptr buffer_size = 0;
|
||||||
GLintptr mapped_offset = 0;
|
|
||||||
GLsizeiptr mapped_size = 0;
|
|
||||||
u8* mapped_ptr = nullptr;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace OpenGL
|
} // namespace OpenGL
|
||||||
|
Loading…
Reference in New Issue
Block a user