From a34c773b13e399a9d5da867efc0959792637939b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Sun, 10 May 2020 23:09:49 +0200 Subject: [PATCH 1/5] Vulkan: Don't merge render passes where the second one begins with a clear. God of War optimization survives this check, thankfully. Force Unleashed doesn't, but meh, it's not as bad there anyway. --- ext/native/thin3d/VulkanQueueRunner.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/ext/native/thin3d/VulkanQueueRunner.cpp b/ext/native/thin3d/VulkanQueueRunner.cpp index 77c1c603f5..df2d386255 100644 --- a/ext/native/thin3d/VulkanQueueRunner.cpp +++ b/ext/native/thin3d/VulkanQueueRunner.cpp @@ -776,7 +776,10 @@ void VulkanQueueRunner::ApplyRenderPassMerge(std::vector &steps) { if (steps[j]->dependencies.contains(touchedFramebuffers)) { goto done_fb; } - if (steps[j]->render.framebuffer == fb) { + if (steps[j]->render.framebuffer == fb && + steps[j]->render.color != VKRRenderPassAction::CLEAR && + steps[j]->render.depth != VKRRenderPassAction::CLEAR && + steps[j]->render.stencil != VKRRenderPassAction::CLEAR) { // ok. Now, if it's a render, slurp up all the commands // and kill the step. // Also slurp up any pretransitions. From b0a163ba2e0717e77a9f4a439ce1c5ad702940aa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Sun, 10 May 2020 23:07:44 +0200 Subject: [PATCH 2/5] ColorConv: Fix a few conversions that missed the lower bits of each component. --- Common/ColorConv.cpp | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/Common/ColorConv.cpp b/Common/ColorConv.cpp index bf4276f1e3..6206847d2e 100644 --- a/Common/ColorConv.cpp +++ b/Common/ColorConv.cpp @@ -308,7 +308,7 @@ void ConvertRGBA565ToRGBA8888(u32 *dst32, const u16 *src, u32 numPixels) { b = _mm_or_si128(_mm_slli_epi16(b, 3), _mm_srli_epi16(b, 2)); b = _mm_and_si128(b, mask8); - // Always set to 00FF 00FF. + // Always set alpha to 00FF 00FF. __m128i a = _mm_slli_epi16(mask8, 8); // Now combine them, RRGG RRGG and BBAA BBAA, and then interleave. @@ -472,36 +472,36 @@ void ConvertRGBA4444ToBGRA8888(u32 *dst32, const u16 *src, u32 numPixels) { u8 *dst = (u8 *)dst32; for (u32 x = 0; x < numPixels; x++) { u16 c = src[x]; - u32 r = c & 0x000f; - u32 g = (c >> 4) & 0x000f; - u32 b = (c >> 8) & 0x000f; - u32 a = (c >> 12) & 0x000f; + u32 r = Convert4To8(c & 0x000f); + u32 g = Convert4To8((c >> 4) & 0x000f); + u32 b = Convert4To8((c >> 8) & 0x000f); + u32 a = Convert4To8((c >> 12) & 0x000f); - dst[x] = (r << (16 + 4)) | (g << (8 + 4)) | (b << 4) | (a << (24 + 4)); + dst[x] = (a << 24) | (r << 16) | (g << 8) | b; } } void ConvertRGBA5551ToBGRA8888(u32 *dst, const u16 *src, u32 numPixels) { for (u32 x = 0; x < numPixels; x++) { u16 c = src[x]; - u32 r = c & 0x001f; - u32 g = (c >> 5) & 0x001f; - u32 b = (c >> 10) & 0x001f; + u32 r = Convert5To8(c & 0x001f); + u32 g = Convert6To8((c >> 5) & 0x001f); + u32 b = Convert5To8((c >> 10) & 0x001f); // We force an arithmetic shift to get the sign bits/ u32 a = ((s32)(s16)c) & 0xff000000; - dst[x] = (r << (16 + 3)) | (g << (8 + 3)) | (b << 3) | a; + dst[x] = a | (r << 16) | (g << 8) | b; } } void ConvertRGB565ToBGRA8888(u32 *dst, const u16 *src, u32 numPixels) { for (u32 x = 0; x < numPixels; x++) { u16 c = src[x]; - u32 r = c & 0x001f; - u32 g = (c >> 5) & 0x003f; - u32 b = (c >> 11) & 0x001f; + u32 r = Convert5To8(c & 0x001f); + u32 g = Convert6To8((c >> 5) & 0x003f); + u32 b = Convert5To8((c >> 11) & 0x001f); - dst[x] = (r << (16 + 3)) | (g << (8 + 2)) | (b << 3) | 0xFF000000; + dst[x] = 0xFF000000 | (r << 16) | (g << 8) | b; } } From 73c253e2ac96effee8edb85f10cda802b58bc09d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Sun, 10 May 2020 23:09:01 +0200 Subject: [PATCH 3/5] D3D11: Fix a bind ordering issue in depal (only a problem with debug layer enabled) --- GPU/D3D11/TextureCacheD3D11.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/GPU/D3D11/TextureCacheD3D11.cpp b/GPU/D3D11/TextureCacheD3D11.cpp index cb593d7873..f9adc53017 100644 --- a/GPU/D3D11/TextureCacheD3D11.cpp +++ b/GPU/D3D11/TextureCacheD3D11.cpp @@ -399,10 +399,11 @@ void TextureCacheD3D11::ApplyTextureFramebuffer(TexCacheEntry *entry, VirtualFra shaderApply.ApplyBounds(gstate_c.vertBounds, gstate_c.curTextureXOffset, gstate_c.curTextureYOffset, xoff, yoff); shaderApply.Use(depalShaderCache_->GetDepalettizeVertexShader(), depalShaderCache_->GetInputLayout()); + ID3D11ShaderResourceView *nullTexture = nullptr; + framebufferManagerD3D11_->BindFramebufferAsColorTexture(0, framebuffer, BINDFBCOLOR_SKIP_COPY | BINDFBCOLOR_FORCE_SELF); draw_->BindFramebufferAsRenderTarget(depalFBO, { Draw::RPAction::DONT_CARE, Draw::RPAction::DONT_CARE, Draw::RPAction::DONT_CARE }); context_->PSSetShaderResources(3, 1, &clutTexture); context_->PSSetSamplers(3, 1, &stockD3D11.samplerPoint2DWrap); - framebufferManagerD3D11_->BindFramebufferAsColorTexture(0, framebuffer, BINDFBCOLOR_SKIP_COPY | BINDFBCOLOR_FORCE_SELF); context_->PSSetSamplers(0, 1, &stockD3D11.samplerPoint2DWrap); shaderApply.Shade(); From f708396bda666549133e29cda337adeffa5b3618 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Sun, 10 May 2020 23:09:40 +0200 Subject: [PATCH 4/5] Fix a comment --- GPU/Common/FramebufferCommon.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GPU/Common/FramebufferCommon.cpp b/GPU/Common/FramebufferCommon.cpp index 8853b2a262..0bd3f03985 100644 --- a/GPU/Common/FramebufferCommon.cpp +++ b/GPU/Common/FramebufferCommon.cpp @@ -1212,7 +1212,7 @@ void FramebufferManagerCommon::ResizeFramebufFBO(VirtualFramebuffer *vfb, int w, } } -// This is called from detected memcopies only. Not block transfers. +// This is called from detected memcopies and framebuffer initialization from VRAM. Not block transfers. // MotoGP goes this path so we need to catch those copies here. bool FramebufferManagerCommon::NotifyFramebufferCopy(u32 src, u32 dst, int size, bool isMemset, u32 skipDrawReason) { if (size == 0) { From c1d32e8b3e885280846e7d7577c91767828a5721 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Sun, 10 May 2020 23:42:32 +0200 Subject: [PATCH 5/5] D3D11: Better set current texture to null first, otherwise we can run into another issue. --- GPU/D3D11/TextureCacheD3D11.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/GPU/D3D11/TextureCacheD3D11.cpp b/GPU/D3D11/TextureCacheD3D11.cpp index f9adc53017..568343bdd8 100644 --- a/GPU/D3D11/TextureCacheD3D11.cpp +++ b/GPU/D3D11/TextureCacheD3D11.cpp @@ -400,10 +400,11 @@ void TextureCacheD3D11::ApplyTextureFramebuffer(TexCacheEntry *entry, VirtualFra shaderApply.Use(depalShaderCache_->GetDepalettizeVertexShader(), depalShaderCache_->GetInputLayout()); ID3D11ShaderResourceView *nullTexture = nullptr; - framebufferManagerD3D11_->BindFramebufferAsColorTexture(0, framebuffer, BINDFBCOLOR_SKIP_COPY | BINDFBCOLOR_FORCE_SELF); + context_->PSSetShaderResources(0, 1, &nullTexture); // In case the target was used in the last draw call. Happens in Sega Rally. draw_->BindFramebufferAsRenderTarget(depalFBO, { Draw::RPAction::DONT_CARE, Draw::RPAction::DONT_CARE, Draw::RPAction::DONT_CARE }); context_->PSSetShaderResources(3, 1, &clutTexture); context_->PSSetSamplers(3, 1, &stockD3D11.samplerPoint2DWrap); + framebufferManagerD3D11_->BindFramebufferAsColorTexture(0, framebuffer, BINDFBCOLOR_SKIP_COPY | BINDFBCOLOR_FORCE_SELF); context_->PSSetSamplers(0, 1, &stockD3D11.samplerPoint2DWrap); shaderApply.Shade();