Use glCopyImageSubData() when it's available.

There's 4 different extensions providing this functionality.  NV is
supported on desktop and mobile, EXT/OES on mobile, and ARB on desktop.

Mostly these are only supported by desktop cards and NVIDIA mobile.

Good improvement in performance on NVIDIA when blit is called a lot, since
it doesn't need to rebind anything or change state.  Example is in desert
city in Tales of Phantasia.
This commit is contained in:
Unknown W. Brackets 2015-12-06 10:39:21 -08:00
parent a2e09eada7
commit fbc4b4e7a1
8 changed files with 59 additions and 13 deletions

View File

@ -1330,7 +1330,6 @@ void FramebufferManager::ReadFramebufferToMemory(VirtualFramebuffer *vfb, bool s
}
}
// TODO: If dimensions are the same, we can use glCopyImageSubData.
void FramebufferManager::BlitFramebuffer(VirtualFramebuffer *dst, int dstX, int dstY, VirtualFramebuffer *src, int srcX, int srcY, int w, int h, int bpp) {
if (!dst->fbo || !src->fbo || !useBufferedRendering_) {
// This can happen if they recently switched from non-buffered.
@ -1338,9 +1337,6 @@ void FramebufferManager::BlitFramebuffer(VirtualFramebuffer *dst, int dstX, int
return;
}
fbo_bind_as_render_target(dst->fbo);
glstate.scissorTest.force(false);
bool useBlit = gstate_c.Supports(GPU_SUPPORTS_ARB_FRAMEBUFFER_BLIT | GPU_SUPPORTS_NV_FRAMEBUFFER_BLIT);
bool useNV = useBlit && !gstate_c.Supports(GPU_SUPPORTS_ARB_FRAMEBUFFER_BLIT);
@ -1366,6 +1362,39 @@ void FramebufferManager::BlitFramebuffer(VirtualFramebuffer *dst, int dstX, int
int dstY1 = dstY * dstYFactor;
int dstY2 = (dstY + h) * dstYFactor;
if (gstate_c.Supports(GPU_SUPPORTS_ANY_COPY_IMAGE)) {
// Only if it's the same size.
if (dstX2 - dstX1 == srcX2 - srcX1 && dstY2 - dstY1 == srcY2 - srcY1) {
#if defined(USING_GLES2)
#ifndef IOS
glCopyImageSubDataOES(
fbo_get_color_texture(src->fbo), GL_TEXTURE_2D, 0, srcX1, srcY1, 0,
fbo_get_color_texture(dst->fbo), GL_TEXTURE_2D, 0, dstX1, dstY1, 0,
dstX2 - dstX1, dstY2 - dstY1, 1);
return;
#endif
#else
if (gl_extensions.ARB_copy_image) {
glCopyImageSubData(
fbo_get_color_texture(src->fbo), GL_TEXTURE_2D, 0, srcX1, srcY1, 0,
fbo_get_color_texture(dst->fbo), GL_TEXTURE_2D, 0, dstX1, dstY1, 0,
dstX2 - dstX1, dstY2 - dstY1, 1);
return;
} else if (gl_extensions.NV_copy_image) {
// Older, pre GL 4.x NVIDIA cards.
glCopyImageSubDataNV(
fbo_get_color_texture(src->fbo), GL_TEXTURE_2D, 0, srcX1, srcY1, 0,
fbo_get_color_texture(dst->fbo), GL_TEXTURE_2D, 0, dstX1, dstY1, 0,
dstX2 - dstX1, dstY2 - dstY1, 1);
return;
}
#endif
}
}
fbo_bind_as_render_target(dst->fbo);
glstate.scissorTest.force(false);
if (useBlit) {
fbo_bind_for_read(src->fbo);
if (!useNV) {

View File

@ -537,6 +537,9 @@ void GLES_GPU::CheckGPUFeatures() {
if (gl_extensions.EXT_blend_minmax || gl_extensions.GLES3)
features |= GPU_SUPPORTS_BLEND_MINMAX;
if (gl_extensions.OES_copy_image || gl_extensions.NV_copy_image || gl_extensions.EXT_copy_image || gl_extensions.ARB_copy_image)
features |= GPU_SUPPORTS_ANY_COPY_IMAGE;
if (!gl_extensions.IsGLES)
features |= GPU_SUPPORTS_LOGIC_OP;

View File

@ -454,6 +454,7 @@ enum {
GPU_SUPPORTS_BLEND_MINMAX = FLAG_BIT(4),
GPU_SUPPORTS_LOGIC_OP = FLAG_BIT(5),
GPU_USE_DEPTH_RANGE_HACK = FLAG_BIT(6),
GPU_SUPPORTS_ANY_COPY_IMAGE = FLAG_BIT(19),
GPU_SUPPORTS_ANY_FRAMEBUFFER_FETCH = FLAG_BIT(20),
GPU_ROUND_FRAGMENT_DEPTH_TO_16BIT = FLAG_BIT(22),
GPU_ROUND_DEPTH_TO_16BIT = FLAG_BIT(23), // Can be disabled either per game or if we use a real 16-bit depth buffer

View File

@ -51,11 +51,6 @@ extern PFNGLMAPBUFFERPROC glMapBuffer;
typedef void (EGLAPIENTRYP PFNGLDRAWTEXTURENVPROC) (GLuint texture, GLuint sampler, GLfloat x0, GLfloat y0, GLfloat x1, GLfloat y1, GLfloat z, GLfloat s0, GLfloat t0, GLfloat s1, GLfloat t1);
extern PFNGLDRAWTEXTURENVPROC glDrawTextureNV;
typedef void (EGLAPIENTRYP PFNGLCOPYIMAGESUBDATANVPROC) (GLuint srcName, GLenum
srcTarget, GLint srcLevel, GLint srcX, GLint srcY, GLint srcZ, GLuint dstName,
GLenum dstTarget, GLint dstLevel, GLint dstX, GLint dstY, GLint dstZ, GLsizei
width, GLsizei height, GLsizei depth);
extern PFNGLCOPYIMAGESUBDATANVPROC glCopyImageSubDataNV;
#ifndef ARM64
typedef void (EGLAPIENTRYP PFNGLBLITFRAMEBUFFERNVPROC) (
GLint srcX0, GLint srcY0, GLint srcX1, GLuint srcY1,

View File

@ -135,6 +135,9 @@ GLboolean gl3stubInit() {
FIND_PROC(glGetProgramResourceLocationIndexEXT);
FIND_PROC(glGetFragDataIndexEXT);
/* OES_copy_image, etc. */
FIND_PROC(glCopyImageSubDataOES);
#undef FIND_PROC
#endif // IOS
@ -363,6 +366,9 @@ GL_APICALL void (* GL_APIENTRY glBindFragDataLocationEXT) (GLuint prog
GL_APICALL GLint (* GL_APIENTRY glGetProgramResourceLocationIndexEXT) (GLuint program, GLenum programInterface, const GLchar *name);
GL_APICALL GLint (* GL_APIENTRY glGetFragDataIndexEXT) (GLuint program, const GLchar *name);
/* OES_copy_image, etc. */
GL_APICALL void (* GL_APIENTRY glCopyImageSubDataOES) (GLuint srcName, GLenum srcTarget, GLint srcLevel, GLint srcX, GLint srcY, GLint srcZ, GLuint dstName, GLenum dstTarget, GLint dstLevel, GLint dstX, GLint dstY, GLint dstZ, GLsizei width, GLsizei height, GLsizei depth);
#endif // IOS
#endif // GLES2

View File

@ -500,6 +500,8 @@ extern GL_APICALL void (* GL_APIENTRY glBindFragDataLocationEXT) (GLui
extern GL_APICALL GLint (* GL_APIENTRY glGetProgramResourceLocationIndexEXT) (GLuint program, GLenum programInterface, const GLchar *name);
extern GL_APICALL GLint (* GL_APIENTRY glGetFragDataIndexEXT) (GLuint program, const GLchar *name);
/* OES_copy_image, etc. */
extern GL_APICALL void (* GL_APIENTRY glCopyImageSubDataOES) (GLuint srcName, GLenum srcTarget, GLint srcLevel, GLint srcX, GLint srcY, GLint srcZ, GLuint dstName, GLenum dstTarget, GLint dstLevel, GLint dstX, GLint dstY, GLint dstZ, GLsizei width, GLsizei height, GLsizei depth);
#endif // IOS

View File

@ -14,7 +14,6 @@
PFNEGLGETSYSTEMTIMEFREQUENCYNVPROC eglGetSystemTimeFrequencyNV;
PFNEGLGETSYSTEMTIMENVPROC eglGetSystemTimeNV;
PFNGLDRAWTEXTURENVPROC glDrawTextureNV;
PFNGLCOPYIMAGESUBDATANVPROC glCopyImageSubDataNV;
PFNGLBLITFRAMEBUFFERNVPROC glBlitFramebufferNV;
PFNGLMAPBUFFERPROC glMapBuffer;
@ -246,6 +245,10 @@ void CheckGLExtensions() {
gl_extensions.EXT_bgra = strstr(extString, "GL_EXT_bgra") != 0;
gl_extensions.EXT_gpu_shader4 = strstr(extString, "GL_EXT_gpu_shader4") != 0;
gl_extensions.NV_framebuffer_blit = strstr(extString, "GL_NV_framebuffer_blit") != 0;
gl_extensions.NV_copy_image = strstr(extString, "GL_NV_copy_image") != 0;
gl_extensions.OES_copy_image = strstr(extString, "GL_OES_copy_image") != 0;
gl_extensions.EXT_copy_image = strstr(extString, "GL_EXT_copy_image") != 0;
gl_extensions.ARB_copy_image = strstr(extString, "GL_ARB_copy_image") != 0;
if (gl_extensions.IsGLES) {
gl_extensions.OES_texture_npot = strstr(extString, "OES_texture_npot") != 0;
@ -258,7 +261,6 @@ void CheckGLExtensions() {
gl_extensions.EXT_shader_framebuffer_fetch = strstr(extString, "GL_EXT_shader_framebuffer_fetch") != 0;
gl_extensions.NV_shader_framebuffer_fetch = strstr(extString, "GL_NV_shader_framebuffer_fetch") != 0;
gl_extensions.ARM_shader_framebuffer_fetch = strstr(extString, "GL_ARM_shader_framebuffer_fetch") != 0;
gl_extensions.NV_copy_image = strstr(extString, "GL_NV_copy_image") != 0;
#if defined(ANDROID) || defined(BLACKBERRY)
// On Android, incredibly, this is not consistently non-zero! It does seem to have the same value though.
@ -269,8 +271,13 @@ void CheckGLExtensions() {
DLOG("Addresses returned for invalid extensions: %p %p", invalidAddress, invalidAddress2);
#endif
if (gl_extensions.NV_copy_image) {
glCopyImageSubDataNV = (PFNGLCOPYIMAGESUBDATANVPROC)eglGetProcAddress("glCopyImageSubDataNV");
// These are all the same. Let's alias.
if (!gl_extensions.OES_copy_image) {
if (gl_extensions.NV_copy_image) {
glCopyImageSubDataOES = (decltype(glCopyImageSubDataOES))eglGetProcAddress("glCopyImageSubDataNV");
} else if (gl_extensions.EXT_copy_image) {
glCopyImageSubDataOES = (decltype(glCopyImageSubDataOES))eglGetProcAddress("glCopyImageSubDataEXT");
}
}
if (gl_extensions.NV_framebuffer_blit) {

View File

@ -44,6 +44,7 @@ struct GLExtensions {
bool OES_texture_npot; // If this is set, can wrap non-pow-2 textures. Set on desktop.
bool OES_mapbuffer;
bool OES_vertex_array_object;
bool OES_copy_image;
// ARB
bool ARB_framebuffer_object;
@ -52,6 +53,7 @@ struct GLExtensions {
bool EXT_blend_func_extended; // dual source blending (GLES, new 2015)
bool ARB_shader_image_load_store;
bool ARB_conservative_depth;
bool ARB_copy_image;
// EXT
bool EXT_swap_control_tear;
@ -62,6 +64,7 @@ struct GLExtensions {
bool EXT_gpu_shader4;
bool EXT_blend_minmax;
bool EXT_framebuffer_object;
bool EXT_copy_image;
bool PBO_EXT;
// NV