From c847b0220680195f0945ef77c4bb95aae0daff4c Mon Sep 17 00:00:00 2001 From: sonninnos <45124675+sonninnos@users.noreply.github.com> Date: Sat, 26 Oct 2024 16:45:45 +0300 Subject: [PATCH] Integer overscale GPU screenshot crash fixes (#17118) --- gfx/common/d3d9_common.c | 41 ++++++++++++++++++++++++++++++-------- gfx/common/d3d9_common.h | 2 ++ gfx/common/d3d_common.c | 4 ++-- gfx/common/vulkan_common.c | 2 +- gfx/drivers/d3d11.c | 17 ++++++++++------ gfx/drivers/d3d9cg.c | 21 +++++++++++++++---- gfx/drivers/d3d9hlsl.c | 20 ++++++++++++++++--- gfx/drivers/gl1.c | 18 +++++++++++------ gfx/drivers/gl2.c | 11 ++++++---- gfx/drivers/gl3.c | 22 ++++++++++---------- gfx/drivers/vulkan.c | 29 +++++++++++++++++---------- tasks/task_screenshot.c | 8 +++++++- 12 files changed, 139 insertions(+), 56 deletions(-) diff --git a/gfx/common/d3d9_common.c b/gfx/common/d3d9_common.c index d2203b2b14..dac4c61f2e 100644 --- a/gfx/common/d3d9_common.c +++ b/gfx/common/d3d9_common.c @@ -191,10 +191,10 @@ bool d3d9_initialize_symbols(enum gfx_ctx_api api) return false; #endif #if defined(DEBUG) || defined(_DEBUG) - if (!(g_d3d9_dll = dylib_load("d3d9d.dll"))) + if (!(g_d3d9_dll = dylib_load("d3d9d.dll"))) #endif - if (!(g_d3d9_dll = dylib_load("d3d9.dll"))) - return false; + if (!(g_d3d9_dll = dylib_load("d3d9.dll"))) + return false; D3D9Create = (D3D9Create_t)dylib_proc(g_d3d9_dll, "Direct3DCreate9"); #ifdef HAVE_D3DX D3D9CompileShaderFromFile = (D3D9CompileShaderFromFile_t)dylib_proc(g_d3d9x_dll, "D3DXCompileShaderFromFile"); @@ -872,18 +872,39 @@ void d3d9_set_viewport(void *data, bool force_full, bool allow_rotate) { + d3d9_video_t *d3d = (d3d9_video_t*)data; + float translate_x = d3d->translate_x; + float translate_y = d3d->translate_y; int x = 0; int y = 0; - d3d9_video_t *d3d = (d3d9_video_t*)data; d3d9_calculate_rect(d3d, &width, &height, &x, &y, force_full, allow_rotate); /* D3D doesn't support negative X/Y viewports ... */ if (x < 0) + { + if (!force_full) + d3d->translate_x = x * 2; x = 0; + } + else if (!force_full) + d3d->translate_x = 0; + if (y < 0) + { + if (!force_full) + d3d->translate_y = y * 2; y = 0; + } + else if (!force_full) + d3d->translate_y = 0; + + if (!force_full) + { + if (translate_x != d3d->translate_x || translate_y != d3d->translate_y) + d3d->needs_restore = true; + } d3d->final_viewport.X = x; d3d->final_viewport.Y = y; @@ -1073,7 +1094,8 @@ void d3d9_set_menu_texture_frame(void *data, || (d3d->menu->tex_w != width) || (d3d->menu->tex_h != height)) { - IDirect3DTexture9_Release((LPDIRECT3DTEXTURE9)d3d->menu->tex); + if (d3d->menu->tex) + IDirect3DTexture9_Release((LPDIRECT3DTEXTURE9)d3d->menu->tex); d3d->menu->tex = d3d9_texture_new(d3d->dev, width, height, 1, @@ -1096,6 +1118,7 @@ void d3d9_set_menu_texture_frame(void *data, 0, &d3dlr, NULL, D3DLOCK_NOSYSLOCK); { unsigned h, w; + if (rgb32) { uint8_t *dst = (uint8_t*)d3dlr.pBits; @@ -1281,16 +1304,18 @@ bool d3d9_read_viewport(void *data, uint8_t *buffer, bool is_idle) { unsigned x, y; + unsigned vp_width = (d3d->final_viewport.Width > width) ? width : d3d->final_viewport.Width; + unsigned vp_height = (d3d->final_viewport.Height > height) ? height : d3d->final_viewport.Height; unsigned pitchpix = rect.Pitch / 4; const uint32_t *pixels = (const uint32_t*)rect.pBits; pixels += d3d->final_viewport.X; - pixels += (d3d->final_viewport.Height - 1) * pitchpix; + pixels += (vp_height - 1) * pitchpix; pixels -= d3d->final_viewport.Y * pitchpix; - for (y = 0; y < d3d->final_viewport.Height; y++, pixels -= pitchpix) + for (y = 0; y < vp_height; y++, pixels -= pitchpix) { - for (x = 0; x < d3d->final_viewport.Width; x++) + for (x = 0; x < vp_width; x++) { *buffer++ = (pixels[x] >> 0) & 0xff; *buffer++ = (pixels[x] >> 8) & 0xff; diff --git a/gfx/common/d3d9_common.h b/gfx/common/d3d9_common.h index 8f49c7637e..e31e3f6f4d 100644 --- a/gfx/common/d3d9_common.h +++ b/gfx/common/d3d9_common.h @@ -78,6 +78,8 @@ typedef struct d3d9_video #endif LPDIRECT3DDEVICE9 dev; D3DVIEWPORT9 final_viewport; + float translate_x; + float translate_y; char *shader_path; diff --git a/gfx/common/d3d_common.c b/gfx/common/d3d_common.c index fa7a9526ca..4e81a4fe71 100644 --- a/gfx/common/d3d_common.c +++ b/gfx/common/d3d_common.c @@ -66,8 +66,8 @@ void d3d_matrix_ortho_off_center_lh(void *_pout, pout->m[0][0] = 2.0f / (r - l); pout->m[1][1] = 2.0f / (t - b); pout->m[2][2] = 1.0f / (zf -zn); - pout->m[3][0] = -1.0f -2.0f *l / (r - l); - pout->m[3][1] = 1.0f + 2.0f * t / (b - t); + pout->m[3][0] = -1.0f - 2.0f * l / (r - l); + pout->m[3][1] = 1.0f + 2.0f * t / (b - t); pout->m[3][2] = zn / (zn -zf); } diff --git a/gfx/common/vulkan_common.c b/gfx/common/vulkan_common.c index d9d8448a52..4c41c8a514 100644 --- a/gfx/common/vulkan_common.c +++ b/gfx/common/vulkan_common.c @@ -1264,7 +1264,7 @@ static void vulkan_acquire_clear_fences(gfx_ctx_vulkan_data_t *vk) if (vk->context.swapchain_wait_semaphores[i]) { - struct vulkan_context *ctx = &vk->context; + struct vulkan_context *ctx = &vk->context; VkSemaphore sem = vk->context.swapchain_wait_semaphores[i]; assert(ctx->num_recycled_acquire_semaphores < VULKAN_MAX_SWAPCHAIN_IMAGES); ctx->swapchain_recycled_semaphores[ctx->num_recycled_acquire_semaphores++] = sem; diff --git a/gfx/drivers/d3d11.c b/gfx/drivers/d3d11.c index 5209152dc5..7c335ebbef 100644 --- a/gfx/drivers/d3d11.c +++ b/gfx/drivers/d3d11.c @@ -855,7 +855,7 @@ static void d3d11_font_render_message( d3d11_font_render_line(d3d11, font, glyph_q, msg, msg_len, scale, color, pos_x, pos_y - (float)lines * line_height, - x, + x, width, height, text_align); if (!delim) @@ -3694,12 +3694,17 @@ static bool d3d11_gfx_read_viewport(void* data, uint8_t* buffer, bool is_idle) /* Assuming format is DXGI_FORMAT_R8G8B8A8_UNORM */ if (StagingDesc.Format == DXGI_FORMAT_R8G8B8A8_UNORM) { - BackBufferData += Map.RowPitch * d3d11->vp.y; - for (y = 0; y < d3d11->vp.height; y++, BackBufferData += Map.RowPitch) - { - bufferRow = buffer + 3 * (d3d11->vp.height - y - 1) * d3d11->vp.width; + unsigned vp_y = (d3d11->vp.y > 0) ? d3d11->vp.y : 0; + unsigned vp_width = (d3d11->vp.width > d3d11->vp.full_width) ? d3d11->vp.full_width : d3d11->vp.width; + unsigned vp_height = (d3d11->vp.height > d3d11->vp.full_height) ? d3d11->vp.full_height : d3d11->vp.height; - for (x = 0; x < d3d11->vp.width; x++) + BackBufferData += Map.RowPitch * vp_y; + + for (y = 0; y < vp_height; y++, BackBufferData += Map.RowPitch) + { + bufferRow = buffer + 3 * (vp_height - y - 1) * vp_width; + + for (x = 0; x < vp_width; x++) { bufferRow[3 * x + 2] = BackBufferData[4 * (x + d3d11->vp.x) + 0]; bufferRow[3 * x + 1] = BackBufferData[4 * (x + d3d11->vp.x) + 1]; diff --git a/gfx/drivers/d3d9cg.c b/gfx/drivers/d3d9cg.c index 7dce7c0e3e..360d6579f1 100644 --- a/gfx/drivers/d3d9cg.c +++ b/gfx/drivers/d3d9cg.c @@ -1755,7 +1755,21 @@ static bool d3d9_cg_initialize(d3d9_video_t *d3d, const video_info_t *info) d3d_matrix_identity(&d3d->mvp_transposed); d3d_matrix_ortho_off_center_lh(&d3d->mvp_transposed, 0, 1, 0, 1, 0, 1); - d3d_matrix_transpose(&d3d->mvp, &d3d->mvp_transposed); + d3d->mvp = d3d->mvp_transposed; + + if (d3d->translate_x) + { + struct d3d_matrix *pout = (struct d3d_matrix*)&d3d->mvp; + float vp_x = -(d3d->translate_x/(float)d3d->final_viewport.Width); + pout->m[3][0] = -1.0f + vp_x - 2.0f * 1 / (0 - 1); + } + + if (d3d->translate_y) + { + struct d3d_matrix *pout = (struct d3d_matrix*)&d3d->mvp; + float vp_y = -(d3d->translate_y/(float)d3d->final_viewport.Height); + pout->m[3][1] = 1.0f + vp_y + 2.0f * 1 / (0 - 1); + } IDirect3DDevice9_SetRenderState(d3d->dev, D3DRS_CULLMODE, D3DCULL_NONE); IDirect3DDevice9_SetRenderState(d3d->dev, D3DRS_SCISSORTESTENABLE, TRUE); @@ -2103,7 +2117,7 @@ static bool d3d9_cg_frame(void *data, const void *frame, if (d3d->overlays_enabled && overlay_behind_menu) { IDirect3DDevice9_SetVertexShaderConstantF(d3d->dev, - 0, (const float*)&d3d->mvp, 4); + 0, (const float*)&d3d->mvp_transposed, 4); for (i = 0; i < d3d->overlays_size; i++) d3d9_overlay_render(d3d, width, height, &d3d->overlays[i], true); } @@ -2121,7 +2135,6 @@ static bool d3d9_cg_frame(void *data, const void *frame, IDirect3DDevice9_SetStreamSource(d3d->dev, 0, (LPDIRECT3DVERTEXBUFFER9)d3d->menu_display.buffer, 0, sizeof(Vertex)); - IDirect3DDevice9_SetViewport(d3d->dev, (D3DVIEWPORT9*)&screen_vp); menu_driver_frame(menu_is_alive, video_info); } @@ -2142,7 +2155,7 @@ static bool d3d9_cg_frame(void *data, const void *frame, if (d3d->overlays_enabled && !overlay_behind_menu) { IDirect3DDevice9_SetVertexShaderConstantF(d3d->dev, - 0, (const float*)&d3d->mvp, 4); + 0, (const float*)&d3d->mvp_transposed, 4); for (i = 0; i < d3d->overlays_size; i++) d3d9_overlay_render(d3d, width, height, &d3d->overlays[i], true); } diff --git a/gfx/drivers/d3d9hlsl.c b/gfx/drivers/d3d9hlsl.c index 7824584c8a..d9c7f7ae23 100644 --- a/gfx/drivers/d3d9hlsl.c +++ b/gfx/drivers/d3d9hlsl.c @@ -1343,7 +1343,21 @@ static bool d3d9_hlsl_initialize( d3d_matrix_identity(&d3d->mvp_transposed); d3d_matrix_ortho_off_center_lh(&d3d->mvp_transposed, 0, 1, 0, 1, 0, 1); - d3d_matrix_transpose(&d3d->mvp, &d3d->mvp_transposed); + d3d->mvp = d3d->mvp_transposed; + + if (d3d->translate_x) + { + struct d3d_matrix *pout = (struct d3d_matrix*)&d3d->mvp; + float vp_x = -(d3d->translate_x/(float)d3d->final_viewport.Width); + pout->m[3][0] = -1.0f + vp_x - 2.0f * 1 / (0 - 1); + } + + if (d3d->translate_y) + { + struct d3d_matrix *pout = (struct d3d_matrix*)&d3d->mvp; + float vp_y = -(d3d->translate_y/(float)d3d->final_viewport.Height); + pout->m[3][1] = 1.0f + vp_y + 2.0f * 1 / (0 - 1); + } IDirect3DDevice9_SetRenderState(d3d->dev, D3DRS_CULLMODE, D3DCULL_NONE); IDirect3DDevice9_SetRenderState(d3d->dev, D3DRS_SCISSORTESTENABLE, TRUE); @@ -1675,7 +1689,7 @@ static bool d3d9_hlsl_frame(void *data, const void *frame, 0, 1, 0); IDirect3DDevice9_SetVertexShaderConstantF(d3d->dev, 0, - (const float*)&d3d->mvp_transposed, 4); + (const float*)&d3d->mvp, 4); hlsl_d3d9_renderchain_render( d3d, frame, frame_width, frame_height, pitch, d3d->dev_rotation); @@ -1713,7 +1727,7 @@ static bool d3d9_hlsl_frame(void *data, const void *frame, if (d3d->menu && d3d->menu->enabled) { IDirect3DDevice9_SetVertexShaderConstantF(d3d->dev, 0, - (const float*)&d3d->mvp_transposed, 4); + (const float*)&d3d->mvp, 4); d3d9_overlay_render(d3d, width, height, d3d->menu, false); d3d->menu_display.offset = 0; diff --git a/gfx/drivers/gl1.c b/gfx/drivers/gl1.c index 5c802798ed..c770a70c00 100644 --- a/gfx/drivers/gl1.c +++ b/gfx/drivers/gl1.c @@ -1270,10 +1270,10 @@ static void gl1_draw_tex(gl1_t *gl1, int pot_width, int pot_height, int width, i GLenum type = GL_UNSIGNED_BYTE; #endif float vertices[] = { - -1.0f, -1.0f, 0.0f, - -1.0f, 1.0f, 0.0f, - 1.0f, -1.0f, 0.0f, - 1.0f, 1.0f, 0.0f, + -1.0f, -1.0f, 0.0f, + -1.0f, 1.0f, 0.0f, + 1.0f, -1.0f, 0.0f, + 1.0f, 1.0f, 0.0f, }; float colors[] = { @@ -1408,6 +1408,7 @@ static void gl1_draw_tex(gl1_t *gl1, int pot_width, int pot_height, int width, i static void gl1_readback(gl1_t *gl1, unsigned alignment, unsigned fmt, unsigned type, + unsigned video_width, unsigned video_height, void *src) { #ifndef VITA @@ -1415,8 +1416,12 @@ static void gl1_readback(gl1_t *gl1, glPixelStorei(GL_PACK_ROW_LENGTH, 0); glReadBuffer(GL_BACK); #endif - glReadPixels(gl1->vp.x, gl1->vp.y, - gl1->vp.width, gl1->vp.height, + + glReadPixels( + (gl1->vp.x > 0) ? gl1->vp.x : 0, + (gl1->vp.y > 0) ? gl1->vp.y : 0, + (gl1->vp.width > video_width) ? video_width : gl1->vp.width, + (gl1->vp.height > video_height) ? video_height : gl1->vp.height, (GLenum)fmt, (GLenum)type, (GLvoid*)src); } @@ -1656,6 +1661,7 @@ static bool gl1_frame(void *data, const void *frame, #else GL_UNSIGNED_BYTE, #endif + video_width, video_height, gl1->readback_buffer_screenshot); diff --git a/gfx/drivers/gl2.c b/gfx/drivers/gl2.c index ae4e58472b..003a1f715a 100644 --- a/gfx/drivers/gl2.c +++ b/gfx/drivers/gl2.c @@ -906,7 +906,7 @@ static void gl2_raster_font_render_line(gl2_t *gl, if (font->block) video_coord_array_append(&font->block->carr, - &coords, coords.vertices); + &coords, coords.vertices); else gl2_raster_font_draw_vertices(gl, font, &coords); } @@ -1580,7 +1580,7 @@ static unsigned gl2_wrap_type_to_enum(enum gfx_wrap_type type) case RARCH_WRAP_MIRRORED_REPEAT: return GL_MIRRORED_REPEAT; default: - break; + break; } return 0; @@ -2391,8 +2391,11 @@ static void gl2_renderchain_readback( glReadBuffer(GL_BACK); #endif - glReadPixels(gl->vp.x, gl->vp.y, - gl->vp.width, gl->vp.height, + glReadPixels( + (gl->vp.x > 0) ? gl->vp.x : 0, + (gl->vp.y > 0) ? gl->vp.y : 0, + (gl->vp.width > gl->video_width) ? gl->video_width : gl->vp.width, + (gl->vp.height > gl->video_height) ? gl->video_height : gl->vp.height, (GLenum)fmt, (GLenum)type, (GLvoid*)src); } diff --git a/gfx/drivers/gl3.c b/gfx/drivers/gl3.c index 12f01be641..18684ef1b7 100644 --- a/gfx/drivers/gl3.c +++ b/gfx/drivers/gl3.c @@ -2183,7 +2183,7 @@ static void video_texture_load_gl3( break; case TEXTURE_FILTER_MIPMAP_LINEAR: - default: + default: mag_filter = GL_LINEAR; min_filter = GL_LINEAR_MIPMAP_LINEAR; break; @@ -2465,11 +2465,10 @@ static void gl3_set_rotation(void *data, unsigned rotation) static void gl3_viewport_info(void *data, struct video_viewport *vp) { unsigned top_y, top_dist; - gl3_t *gl = (gl3_t*)data; + gl3_t *gl = (gl3_t*)data; unsigned width = gl->video_width; unsigned height = gl->video_height; - *vp = gl->vp; vp->full_width = width; vp->full_height = height; @@ -2490,6 +2489,7 @@ static bool gl3_read_viewport(void *data, uint8_t *buffer, bool is_idle) if (gl->flags & GL3_FLAG_USE_SHARED_CONTEXT) gl->ctx_driver->bind_hw_render(gl->ctx_data, false); + num_pixels = gl->vp.width * gl->vp.height; if (gl->flags & GL3_FLAG_PBO_READBACK_ENABLE) @@ -2724,10 +2724,10 @@ static bool gl3_frame(void *data, const void *frame, texture.padded_width = gl->hw_render_max_width; texture.padded_height = gl->hw_render_max_height; - if (texture.width == 0) - texture.width = 1; - if (texture.height == 0) - texture.height = 1; + if (texture.width == 0) + texture.width = 1; + if (texture.height == 0) + texture.height = 1; } else { @@ -2843,8 +2843,11 @@ static bool gl3_frame(void *data, const void *frame, #ifndef HAVE_OPENGLES glReadBuffer(GL_BACK); #endif - glReadPixels(gl->vp.x, gl->vp.y, - gl->vp.width, gl->vp.height, + glReadPixels( + (gl->vp.x > 0) ? gl->vp.x : 0, + (gl->vp.y > 0) ? gl->vp.y : 0, + (gl->vp.width > gl->video_width) ? gl->video_width : gl->vp.width, + (gl->vp.height > gl->video_height) ? gl->video_height : gl->vp.height, GL_RGBA, GL_UNSIGNED_BYTE, gl->readback_buffer_screenshot); } @@ -2857,7 +2860,6 @@ static bool gl3_frame(void *data, const void *frame, gl3_pbo_async_readback(gl); } - if (gl->ctx_driver->swap_buffers) gl->ctx_driver->swap_buffers(gl->ctx_data); diff --git a/gfx/drivers/vulkan.c b/gfx/drivers/vulkan.c index 9641356a03..bed46d6624 100644 --- a/gfx/drivers/vulkan.c +++ b/gfx/drivers/vulkan.c @@ -3798,6 +3798,9 @@ static void vulkan_set_projection(vk_t *vk, matrix_4x4_multiply(tmp, rot, vk->mvp_no_rot); } matrix_4x4_multiply(vk->mvp, trn, tmp); + + /* Required for translate_x+y / negative offsets to also work in RGUI */ + matrix_4x4_multiply(vk->mvp_no_rot, trn, tmp); } static void vulkan_set_rotation(void *data, unsigned rotation) @@ -3863,14 +3866,15 @@ static void vulkan_set_viewport(void *data, unsigned viewport_width, if (vk->vp.x < 0) { vk->translate_x = (float)vk->vp.x * 2; - vk->vp.x = 0.0; + vk->vp.x = 0.0; } else vk->translate_x = 0.0; + if (vk->vp.y < 0) { vk->translate_y = (float)vk->vp.y * 2; - vk->vp.y = 0.0; + vk->vp.y = 0.0; } else vk->translate_y = 0.0; @@ -3920,8 +3924,8 @@ static void vulkan_readback(vk_t *vk, struct vk_image *readback_image) region.imageOffset.x = vp.x; region.imageOffset.y = vp.y; region.imageOffset.z = 0; - region.imageExtent.width = vp.width; - region.imageExtent.height = vp.height; + region.imageExtent.width = vp.width + vk->translate_x; + region.imageExtent.height = vp.height + vk->translate_y; region.imageExtent.depth = 1; staging = &vk->readback.staging[vk->context->current_frame_index]; @@ -5633,17 +5637,20 @@ static bool vulkan_read_viewport(void *data, uint8_t *buffer, bool is_idle) { int y; + unsigned vp_width = (vk->vp.width > vk->video_width) ? vk->video_width : vk->vp.width; + unsigned vp_height = (vk->vp.height > vk->video_height) ? vk->video_height : vk->vp.height; const uint8_t *src = (const uint8_t*)staging->mapped; - buffer += 3 * (vk->vp.height - 1) * vk->vp.width; + + buffer += 3 * (vp_height - 1) * vp_width; switch (format) { case VK_FORMAT_B8G8R8A8_UNORM: - for (y = 0; y < (int) vk->vp.height; y++, - src += staging->stride, buffer -= 3 * vk->vp.width) + for (y = 0; y < (int) vp_height; y++, + src += staging->stride, buffer -= 3 * vp_width) { int x; - for (x = 0; x < (int) vk->vp.width; x++) + for (x = 0; x < (int) vp_width; x++) { buffer[3 * x + 0] = src[4 * x + 0]; buffer[3 * x + 1] = src[4 * x + 1]; @@ -5654,11 +5661,11 @@ static bool vulkan_read_viewport(void *data, uint8_t *buffer, bool is_idle) case VK_FORMAT_R8G8B8A8_UNORM: case VK_FORMAT_A8B8G8R8_UNORM_PACK32: - for (y = 0; y < (int) vk->vp.height; y++, - src += staging->stride, buffer -= 3 * vk->vp.width) + for (y = 0; y < (int) vp_height; y++, + src += staging->stride, buffer -= 3 * vp_width) { int x; - for (x = 0; x < (int) vk->vp.width; x++) + for (x = 0; x < (int) vp_width; x++) { buffer[3 * x + 2] = src[4 * x + 0]; buffer[3 * x + 1] = src[4 * x + 1]; diff --git a/tasks/task_screenshot.c b/tasks/task_screenshot.c index 8297d5c3d3..43890fbc6d 100644 --- a/tasks/task_screenshot.c +++ b/tasks/task_screenshot.c @@ -476,6 +476,12 @@ static bool take_screenshot_viewport( video_st->data, buffer, runloop_flags & RUNLOOP_FLAG_IDLE))) goto error; + /* Limit image to screen size */ + if (vp.width > video_st->width) + vp.width = video_st->width; + if (vp.height > video_st->height) + vp.height = video_st->height; + /* Data read from viewport is in bottom-up order, suitable for BMP. */ if (!screenshot_dump(screenshot_dir, name_base, @@ -611,7 +617,7 @@ bool take_screenshot( return false; ret = take_screenshot_choice( - video_st, + video_st, screenshot_dir, name_base, savestate,