From 4407445d50501ee54419f8737aa9d181daf15cec Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Sun, 7 May 2017 11:28:57 +0200 Subject: [PATCH 01/25] Separate BeginFrame from BeginSurfaceRenderPass --- Common/Vulkan/VulkanContext.cpp | 9 ++++++++- Common/Vulkan/VulkanContext.h | 2 ++ ext/native/thin3d/thin3d_vulkan.cpp | 5 ++++- 3 files changed, 14 insertions(+), 2 deletions(-) diff --git a/Common/Vulkan/VulkanContext.cpp b/Common/Vulkan/VulkanContext.cpp index 3065dff6de..c621c24cc0 100644 --- a/Common/Vulkan/VulkanContext.cpp +++ b/Common/Vulkan/VulkanContext.cpp @@ -215,7 +215,7 @@ void VulkanContext::QueueBeforeSurfaceRender(VkCommandBuffer cmd) { cmdQueue_.push_back(cmd); } -VkCommandBuffer VulkanContext::BeginSurfaceRenderPass(VkClearValue clear_values[2]) { +VkCommandBuffer VulkanContext::BeginFrame() { FrameData *frame = &frame_[curFrame_]; // Get the index of the next available swapchain image, and a semaphore to block command buffer execution on. @@ -238,7 +238,11 @@ VkCommandBuffer VulkanContext::BeginSurfaceRenderPass(VkClearValue clear_values[ res = vkBeginCommandBuffer(frame->cmdBuf, &begin); TransitionFromPresent(frame->cmdBuf, swapChainBuffers[current_buffer].image); + return frame->cmdBuf; +} +VkCommandBuffer VulkanContext::BeginSurfaceRenderPass(VkClearValue clear_values[2]) { + FrameData *frame = &frame_[curFrame_]; VkRenderPassBeginInfo rp_begin = { VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO }; rp_begin.renderPass = surface_render_pass_; rp_begin.framebuffer = framebuffers_[current_buffer]; @@ -259,7 +263,10 @@ VkCommandBuffer VulkanContext::BeginSurfaceRenderPass(VkClearValue clear_values[ void VulkanContext::EndSurfaceRenderPass() { FrameData *frame = &frame_[curFrame_]; vkCmdEndRenderPass(frame->cmdBuf); +} +void VulkanContext::EndFrame() { + FrameData *frame = &frame_[curFrame_]; TransitionToPresent(frame->cmdBuf, swapChainBuffers[current_buffer].image); VkResult res = vkEndCommandBuffer(frame->cmdBuf); diff --git a/Common/Vulkan/VulkanContext.h b/Common/Vulkan/VulkanContext.h index ea93f2a0a9..b1b5c77f17 100644 --- a/Common/Vulkan/VulkanContext.h +++ b/Common/Vulkan/VulkanContext.h @@ -263,12 +263,14 @@ public: return frame_[curFrame_ & 1].cmdBuf; } + VkCommandBuffer BeginFrame(); // The surface render pass is special because it has to acquire the backbuffer, and may thus "block". // Use the returned command buffer to enqueue commands that render to the backbuffer. // To render to other buffers first, you can submit additional commandbuffers using QueueBeforeSurfaceRender(cmd). VkCommandBuffer BeginSurfaceRenderPass(VkClearValue clear_values[2]); // May eventually need the ability to break and resume the backbuffer render pass in a few rare cases. void EndSurfaceRenderPass(); + void EndFrame(); void QueueBeforeSurfaceRender(VkCommandBuffer cmd); diff --git a/ext/native/thin3d/thin3d_vulkan.cpp b/ext/native/thin3d/thin3d_vulkan.cpp index 5ee9895fc0..08d1bac9a3 100644 --- a/ext/native/thin3d/thin3d_vulkan.cpp +++ b/ext/native/thin3d/thin3d_vulkan.cpp @@ -732,6 +732,8 @@ VKContext::~VKContext() { } void VKContext::Begin(bool clear, uint32_t colorval, float depthVal, int stencilVal) { + cmd_ = vulkan_->BeginFrame(); + VkClearValue clearVal[2] = {}; Uint8x4ToFloat4(colorval, clearVal[0].color.float32); @@ -742,7 +744,7 @@ void VKContext::Begin(bool clear, uint32_t colorval, float depthVal, int stencil clearVal[1].depthStencil.depth = depthVal; clearVal[1].depthStencil.stencil = stencilVal; - cmd_ = vulkan_->BeginSurfaceRenderPass(clearVal); + vulkan_->BeginSurfaceRenderPass(clearVal); FrameData *frame = &frame_[frameNum_ & 1]; push_ = frame->pushBuffer; @@ -765,6 +767,7 @@ void VKContext::End() { // Stop collecting data in the frame's data pushbuffer. push_->End(); vulkan_->EndSurfaceRenderPass(); + vulkan_->EndFrame(); frameNum_++; cmd_ = nullptr; // will be set on the next begin From 3b6fa9be8712f4c49e797d657403bce49ec2768d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Tue, 16 May 2017 13:30:10 +0200 Subject: [PATCH 02/25] Start work on separating frame from backbuffer renderpass --- GPU/Common/FramebufferCommon.cpp | 13 +++++----- GPU/Vulkan/FramebufferVulkan.cpp | 8 +++--- ext/native/thin3d/thin3d.h | 9 +++---- ext/native/thin3d/thin3d_vulkan.cpp | 39 ++++++++++++++++------------- ext/native/ui/ui_screen.cpp | 5 ++-- 5 files changed, 40 insertions(+), 34 deletions(-) diff --git a/GPU/Common/FramebufferCommon.cpp b/GPU/Common/FramebufferCommon.cpp index 66d677bd33..b7d89316be 100644 --- a/GPU/Common/FramebufferCommon.cpp +++ b/GPU/Common/FramebufferCommon.cpp @@ -205,7 +205,7 @@ void FramebufferManagerCommon::SetNumExtraFBOs(int num) { Draw::Framebuffer *fbo = draw_->CreateFramebuffer({ (int)renderWidth_, (int)renderHeight_, 1, 1, false, Draw::FBO_8888 }); extraFBOs_.push_back(fbo); - // The new FBO is still bound after creation, but let's bind it anyway. + // The new FBO is still bound after creation, but let's bind and clear it anyway. draw_->BindFramebufferAsRenderTarget(fbo); ClearBuffer(); } @@ -828,20 +828,16 @@ void FramebufferManagerCommon::CopyDisplayToOutput() { DownloadFramebufferOnSwitch(currentRenderVfb_); SetViewport2D(0, 0, pixelWidth_, pixelHeight_); - draw_->BindBackbufferAsRenderTarget(); currentRenderVfb_ = 0; if (displayFramebufPtr_ == 0) { DEBUG_LOG(FRAMEBUF, "Display disabled, displaying only black"); // No framebuffer to display! Clear to black. + draw_->BindBackbufferAsRenderTarget(); ClearBuffer(); return; } - if (useBufferedRendering_) { - draw_->Clear(Draw::FB_COLOR_BIT | Draw::FB_STENCIL_BIT | Draw::FB_DEPTH_BIT, 0, 0, 0); - } - u32 offsetX = 0; u32 offsetY = 0; @@ -888,6 +884,11 @@ void FramebufferManagerCommon::CopyDisplayToOutput() { } } + draw_->BindBackbufferAsRenderTarget(); + if (useBufferedRendering_) { + draw_->Clear(Draw::FB_COLOR_BIT | Draw::FB_STENCIL_BIT | Draw::FB_DEPTH_BIT, 0, 0, 0); + } + if (!vfb) { if (Memory::IsValidAddress(displayFramebufPtr_)) { // The game is displaying something directly from RAM. In GTA, it's decoded video. diff --git a/GPU/Vulkan/FramebufferVulkan.cpp b/GPU/Vulkan/FramebufferVulkan.cpp index b8899551a0..eed60df430 100644 --- a/GPU/Vulkan/FramebufferVulkan.cpp +++ b/GPU/Vulkan/FramebufferVulkan.cpp @@ -444,10 +444,10 @@ void FramebufferManagerVulkan::DrawTexture(VulkanTexture *texture, float x, floa } Vulkan2D::Vertex vtx[4] = { - {x,y,0,texCoords[0],texCoords[1]}, - {x + w,y,0,texCoords[2],texCoords[3]}, - {x,y + h,0,texCoords[6],texCoords[7] }, - {x + w,y + h,0,texCoords[4],texCoords[5] }, + {x,y, 0,texCoords[0],texCoords[1]}, + {x + w,y, 0,texCoords[2],texCoords[3]}, + {x,y + h, 0,texCoords[6],texCoords[7] }, + {x + w,y + h, 0,texCoords[4],texCoords[5] }, }; float invDestW = 1.0f / (destW * 0.5f); diff --git a/ext/native/thin3d/thin3d.h b/ext/native/thin3d/thin3d.h index 3722dc4bcd..75175c431e 100644 --- a/ext/native/thin3d/thin3d.h +++ b/ext/native/thin3d/thin3d.h @@ -636,11 +636,10 @@ public: virtual void DrawIndexed(int vertexCount, int offset) = 0; virtual void DrawUP(const void *vdata, int vertexCount) = 0; - // Render pass management. Default implementations here. - virtual void Begin(bool clear, uint32_t colorval, float depthVal, int stencilVal) { - Clear(0xF, colorval, depthVal, stencilVal); - } - virtual void End() {} + // Frame management (for the purposes of sync and resource management, necessary with modern APIs). Default implementations here. + virtual void BeginFrame() {} + virtual void EndFrame() {} + virtual void Clear(int mask, uint32_t colorval, float depthVal, int stencilVal) = 0; // Necessary to correctly flip scissor rectangles etc for OpenGL. diff --git a/ext/native/thin3d/thin3d_vulkan.cpp b/ext/native/thin3d/thin3d_vulkan.cpp index 08d1bac9a3..ca0e90e3cd 100644 --- a/ext/native/thin3d/thin3d_vulkan.cpp +++ b/ext/native/thin3d/thin3d_vulkan.cpp @@ -387,8 +387,8 @@ public: void Clear(int mask, uint32_t colorval, float depthVal, int stencilVal) override; - void Begin(bool clear, uint32_t colorval, float depthVal, int stencilVal) override; - void End() override; + void BeginFrame() override; + void EndFrame() override; std::string GetInfoString(InfoField info) const override { // TODO: Make these actually query the right information @@ -731,21 +731,9 @@ VKContext::~VKContext() { vulkan_->Delete().QueueDeletePipelineCache(pipelineCache_); } -void VKContext::Begin(bool clear, uint32_t colorval, float depthVal, int stencilVal) { +void VKContext::BeginFrame() { cmd_ = vulkan_->BeginFrame(); - VkClearValue clearVal[2] = {}; - Uint8x4ToFloat4(colorval, clearVal[0].color.float32); - - // // Debug flicker - used to see if we swap at all. no longer necessary - // if (frameNum_ & 1) - // clearVal[0].color.float32[2] = 1.0f; - - clearVal[1].depthStencil.depth = depthVal; - clearVal[1].depthStencil.stencil = stencilVal; - - vulkan_->BeginSurfaceRenderPass(clearVal); - FrameData *frame = &frame_[frameNum_ & 1]; push_ = frame->pushBuffer; @@ -761,12 +749,29 @@ void VKContext::Begin(bool clear, uint32_t colorval, float depthVal, int stencil scissor_.extent.height = pixel_yres; scissorDirty_ = true; viewportDirty_ = true; + + int colorval = 0xFF000000; + float depthVal = 0.0; + int stencilVal = 0; + + VkClearValue clearVal[2] = {}; + Uint8x4ToFloat4(colorval, clearVal[0].color.float32); + + // // Debug flicker - used to see if we swap at all. no longer necessary + // if (frameNum_ & 1) + // clearVal[0].color.float32[2] = 1.0f; + + clearVal[1].depthStencil.depth = depthVal; + clearVal[1].depthStencil.stencil = stencilVal; + + vulkan_->BeginSurfaceRenderPass(clearVal); } -void VKContext::End() { +void VKContext::EndFrame() { + vulkan_->EndSurfaceRenderPass(); + // Stop collecting data in the frame's data pushbuffer. push_->End(); - vulkan_->EndSurfaceRenderPass(); vulkan_->EndFrame(); frameNum_++; diff --git a/ext/native/ui/ui_screen.cpp b/ext/native/ui/ui_screen.cpp index 3b93a1472e..becd3953ca 100644 --- a/ext/native/ui/ui_screen.cpp +++ b/ext/native/ui/ui_screen.cpp @@ -65,7 +65,8 @@ void UIScreen::preRender() { if (!draw) { return; } - draw->Begin(true, 0xFF000000, 0.0f, 0); + draw->BeginFrame(); + draw->Clear(0xF, 0xFF000000, 0.0f, 0); Draw::Viewport viewport; viewport.TopLeftX = 0; @@ -83,7 +84,7 @@ void UIScreen::postRender() { if (!draw) { return; } - draw->End(); + draw->EndFrame(); } void UIScreen::render() { From 35aefe4a2a1eb512a7bc02495cb5aca911fcfe9f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Tue, 16 May 2017 13:53:57 +0200 Subject: [PATCH 03/25] BindBackBufferAsRenderTarget is now replaced with BindFramebufferAsRT(nullptr) --- GPU/Common/FramebufferCommon.cpp | 16 +++++------ GPU/D3D11/FramebufferManagerD3D11.cpp | 8 +++--- GPU/Directx9/FramebufferDX9.cpp | 10 +++---- GPU/GLES/FramebufferManagerGLES.cpp | 10 +++---- GPU/Software/SoftGpu.cpp | 2 +- GPU/Vulkan/FramebufferVulkan.cpp | 2 +- ext/native/thin3d/thin3d.h | 7 +++-- ext/native/thin3d/thin3d_d3d11.cpp | 38 +++++++++++++-------------- ext/native/thin3d/thin3d_d3d9.cpp | 21 ++++++--------- ext/native/thin3d/thin3d_gl.cpp | 23 +++++++--------- ext/native/thin3d/thin3d_vulkan.cpp | 13 ++++----- 11 files changed, 72 insertions(+), 78 deletions(-) diff --git a/GPU/Common/FramebufferCommon.cpp b/GPU/Common/FramebufferCommon.cpp index b7d89316be..3f3716e248 100644 --- a/GPU/Common/FramebufferCommon.cpp +++ b/GPU/Common/FramebufferCommon.cpp @@ -211,7 +211,7 @@ void FramebufferManagerCommon::SetNumExtraFBOs(int num) { } currentRenderVfb_ = 0; - draw_->BindBackbufferAsRenderTarget(); + draw_->BindFramebufferAsRenderTarget(nullptr); } // Heuristics to figure out the size of FBO to create. @@ -533,7 +533,7 @@ void FramebufferManagerCommon::DestroyFramebuf(VirtualFramebuffer *v) { void FramebufferManagerCommon::NotifyRenderFramebufferCreated(VirtualFramebuffer *vfb) { if (!useBufferedRendering_) { - draw_->BindBackbufferAsRenderTarget(); + draw_->BindFramebufferAsRenderTarget(nullptr); // Let's ignore rendering to targets that have not (yet) been displayed. gstate_c.skipDrawReason |= SKIPDRAW_NON_DISPLAYED_FB; } @@ -593,7 +593,7 @@ void FramebufferManagerCommon::NotifyRenderFramebufferSwitched(VirtualFramebuffe delete vfb->fbo; vfb->fbo = nullptr; } - draw_->BindBackbufferAsRenderTarget(); + draw_->BindFramebufferAsRenderTarget(nullptr); // Let's ignore rendering to targets that have not (yet) been displayed. if (vfb->usageFlags & FB_USAGE_DISPLAYED_FRAMEBUFFER) { @@ -833,7 +833,7 @@ void FramebufferManagerCommon::CopyDisplayToOutput() { if (displayFramebufPtr_ == 0) { DEBUG_LOG(FRAMEBUF, "Display disabled, displaying only black"); // No framebuffer to display! Clear to black. - draw_->BindBackbufferAsRenderTarget(); + draw_->BindFramebufferAsRenderTarget(nullptr); ClearBuffer(); return; } @@ -884,7 +884,7 @@ void FramebufferManagerCommon::CopyDisplayToOutput() { } } - draw_->BindBackbufferAsRenderTarget(); + draw_->BindFramebufferAsRenderTarget(nullptr); if (useBufferedRendering_) { draw_->Clear(Draw::FB_COLOR_BIT | Draw::FB_STENCIL_BIT | Draw::FB_DEPTH_BIT, 0, 0, 0); } @@ -978,7 +978,7 @@ void FramebufferManagerCommon::CopyDisplayToOutput() { bool linearFilter = g_Config.iBufFilter == SCALE_LINEAR; DrawActiveTexture(0, 0, fbo_w, fbo_h, fbo_w, fbo_h, 0.0f, 0.0f, 1.0f, 1.0f, ROTATION_LOCKED_HORIZONTAL, linearFilter); - draw_->BindBackbufferAsRenderTarget(); + draw_->BindFramebufferAsRenderTarget(nullptr); // Use the extra FBO, with applied post-processing shader, as a texture. // fbo_bind_as_texture(extraFBOs_[0], FB_COLOR_BIT, 0); @@ -1047,7 +1047,7 @@ void FramebufferManagerCommon::CopyDisplayToOutput() { void FramebufferManagerCommon::DecimateFBOs() { if (useBufferedRendering_) { - draw_->BindBackbufferAsRenderTarget(); + draw_->BindFramebufferAsRenderTarget(nullptr); } currentRenderVfb_ = 0; @@ -1139,7 +1139,7 @@ void FramebufferManagerCommon::ResizeFramebufFBO(VirtualFramebuffer *vfb, u16 w, } textureCache_->ForgetLastTexture(); - draw_->BindBackbufferAsRenderTarget(); + draw_->BindFramebufferAsRenderTarget(nullptr); if (!useBufferedRendering_) { if (vfb->fbo) { diff --git a/GPU/D3D11/FramebufferManagerD3D11.cpp b/GPU/D3D11/FramebufferManagerD3D11.cpp index 5a19704a05..0532a7b356 100644 --- a/GPU/D3D11/FramebufferManagerD3D11.cpp +++ b/GPU/D3D11/FramebufferManagerD3D11.cpp @@ -474,7 +474,7 @@ void FramebufferManagerD3D11::RebindFramebuffer() { if (currentRenderVfb_ && currentRenderVfb_->fbo) { draw_->BindFramebufferAsRenderTarget(currentRenderVfb_->fbo); } else { - draw_->BindBackbufferAsRenderTarget(); + draw_->BindFramebufferAsRenderTarget(nullptr); } } @@ -717,7 +717,7 @@ void FramebufferManagerD3D11::SimpleBlit( void FramebufferManagerD3D11::BlitFramebuffer(VirtualFramebuffer *dst, int dstX, int dstY, VirtualFramebuffer *src, int srcX, int srcY, int w, int h, int bpp) { if (!dst->fbo || !src->fbo || !useBufferedRendering_) { // This can happen if they recently switched from non-buffered. - draw_->BindBackbufferAsRenderTarget(); + draw_->BindFramebufferAsRenderTarget(nullptr); return; } @@ -812,7 +812,7 @@ void ConvertFromRGBA8888(u8 *dst, u8 *src, u32 dstStride, u32 srcStride, u32 wid void FramebufferManagerD3D11::PackFramebufferD3D11_(VirtualFramebuffer *vfb, int x, int y, int w, int h) { if (!vfb->fbo) { ERROR_LOG_REPORT_ONCE(vfbfbozero, SCEGE, "PackFramebufferD3D11_: vfb->fbo == 0"); - draw_->BindBackbufferAsRenderTarget(); + draw_->BindFramebufferAsRenderTarget(nullptr); return; } @@ -884,7 +884,7 @@ std::vector FramebufferManagerD3D11::GetFramebufferList() { } void FramebufferManagerD3D11::DestroyAllFBOs() { - draw_->BindBackbufferAsRenderTarget(); + draw_->BindFramebufferAsRenderTarget(nullptr); currentRenderVfb_ = 0; displayFramebuf_ = 0; prevDisplayFramebuf_ = 0; diff --git a/GPU/Directx9/FramebufferDX9.cpp b/GPU/Directx9/FramebufferDX9.cpp index 6a9a03cb75..185b55342e 100644 --- a/GPU/Directx9/FramebufferDX9.cpp +++ b/GPU/Directx9/FramebufferDX9.cpp @@ -326,7 +326,7 @@ static const D3DVERTEXELEMENT9 g_FramebufferVertexElements[] = { if (currentRenderVfb_ && currentRenderVfb_->fbo) { draw_->BindFramebufferAsRenderTarget(currentRenderVfb_->fbo); } else { - draw_->BindBackbufferAsRenderTarget(); + draw_->BindFramebufferAsRenderTarget(nullptr); } } @@ -564,7 +564,7 @@ static const D3DVERTEXELEMENT9 g_FramebufferVertexElements[] = { void FramebufferManagerDX9::BlitFramebuffer(VirtualFramebuffer *dst, int dstX, int dstY, VirtualFramebuffer *src, int srcX, int srcY, int w, int h, int bpp) { if (!dst->fbo || !src->fbo || !useBufferedRendering_) { // This can happen if we recently switched from non-buffered. - draw_->BindBackbufferAsRenderTarget(); + draw_->BindFramebufferAsRenderTarget(nullptr); return; } @@ -664,7 +664,7 @@ static const D3DVERTEXELEMENT9 g_FramebufferVertexElements[] = { void FramebufferManagerDX9::PackFramebufferDirectx9_(VirtualFramebuffer *vfb, int x, int y, int w, int h) { if (!vfb->fbo) { ERROR_LOG_REPORT_ONCE(vfbfbozero, SCEGE, "PackFramebufferDirectx9_: vfb->fbo == 0"); - draw_->BindBackbufferAsRenderTarget(); + draw_->BindFramebufferAsRenderTarget(nullptr); return; } @@ -792,7 +792,7 @@ static const D3DVERTEXELEMENT9 g_FramebufferVertexElements[] = { } void FramebufferManagerDX9::DestroyAllFBOs() { - draw_->BindBackbufferAsRenderTarget(); + draw_->BindFramebufferAsRenderTarget(nullptr); currentRenderVfb_ = 0; displayFramebuf_ = 0; prevDisplayFramebuf_ = 0; @@ -882,7 +882,7 @@ static const D3DVERTEXELEMENT9 g_FramebufferVertexElements[] = { } bool FramebufferManagerDX9::GetOutputFramebuffer(GPUDebugBuffer &buffer) { - draw_->BindBackbufferAsRenderTarget(); + draw_->BindFramebufferAsRenderTarget(nullptr); LPDIRECT3DSURFACE9 renderTarget = nullptr; HRESULT hr = device_->GetRenderTarget(0, &renderTarget); diff --git a/GPU/GLES/FramebufferManagerGLES.cpp b/GPU/GLES/FramebufferManagerGLES.cpp index 26ce3f48d2..c14b93dc24 100644 --- a/GPU/GLES/FramebufferManagerGLES.cpp +++ b/GPU/GLES/FramebufferManagerGLES.cpp @@ -452,7 +452,7 @@ void FramebufferManagerGLES::RebindFramebuffer() { if (currentRenderVfb_ && currentRenderVfb_->fbo) { draw_->BindFramebufferAsRenderTarget(currentRenderVfb_->fbo); } else { - draw_->BindBackbufferAsRenderTarget(); + draw_->BindFramebufferAsRenderTarget(nullptr); } if (g_Config.iRenderingMode == FB_NON_BUFFERED_MODE) glstate.viewport.restore(); @@ -680,7 +680,7 @@ void FramebufferManagerGLES::UpdateDownloadTempBuffer(VirtualFramebuffer *nvfb) void FramebufferManagerGLES::BlitFramebuffer(VirtualFramebuffer *dst, int dstX, int dstY, VirtualFramebuffer *src, int srcX, int srcY, int w, int h, int bpp) { if (!dst->fbo || !src->fbo || !useBufferedRendering_) { // This can happen if they recently switched from non-buffered. - draw_->BindBackbufferAsRenderTarget(); + draw_->BindFramebufferAsRenderTarget(nullptr); return; } @@ -1165,7 +1165,7 @@ void FramebufferManagerGLES::EndFrame() { GLenum attachments[3] = { GL_COLOR_ATTACHMENT0, GL_STENCIL_ATTACHMENT, GL_DEPTH_ATTACHMENT }; glInvalidateFramebuffer(GL_FRAMEBUFFER, 3, attachments); } - draw_->BindBackbufferAsRenderTarget(); + draw_->BindFramebufferAsRenderTarget(nullptr); } CHECK_GL_ERROR_IF_DEBUG(); } @@ -1196,7 +1196,7 @@ std::vector FramebufferManagerGLES::GetFramebufferList() { void FramebufferManagerGLES::DestroyAllFBOs() { CHECK_GL_ERROR_IF_DEBUG(); - draw_->BindBackbufferAsRenderTarget(); + draw_->BindFramebufferAsRenderTarget(nullptr); currentRenderVfb_ = 0; displayFramebuf_ = 0; prevDisplayFramebuf_ = 0; @@ -1220,7 +1220,7 @@ void FramebufferManagerGLES::DestroyAllFBOs() { } tempFBOs_.clear(); - draw_->BindBackbufferAsRenderTarget(); + draw_->BindFramebufferAsRenderTarget(nullptr); DisableState(); CHECK_GL_ERROR_IF_DEBUG(); } diff --git a/GPU/Software/SoftGpu.cpp b/GPU/Software/SoftGpu.cpp index dfef1d3386..10d7ab7370 100644 --- a/GPU/Software/SoftGpu.cpp +++ b/GPU/Software/SoftGpu.cpp @@ -236,7 +236,7 @@ void SoftGPU::CopyToCurrentFboFromDisplayRam(int srcwidth, int srcheight) { if (GetGPUBackend() == GPUBackend::VULKAN) { std::swap(v0, v1); } - draw_->BindBackbufferAsRenderTarget(); + draw_->BindFramebufferAsRenderTarget(nullptr); draw_->Clear(Draw::FB_COLOR_BIT, 0, 0, 0); Draw::SamplerState *sampler; diff --git a/GPU/Vulkan/FramebufferVulkan.cpp b/GPU/Vulkan/FramebufferVulkan.cpp index eed60df430..26b7ee598a 100644 --- a/GPU/Vulkan/FramebufferVulkan.cpp +++ b/GPU/Vulkan/FramebufferVulkan.cpp @@ -482,7 +482,7 @@ void FramebufferManagerVulkan::RebindFramebuffer() { if (currentRenderVfb_ && currentRenderVfb_->fbo) { draw_->BindFramebufferAsRenderTarget(currentRenderVfb_->fbo); } else { - draw_->BindBackbufferAsRenderTarget(); + draw_->BindFramebufferAsRenderTarget(nullptr); } } diff --git a/ext/native/thin3d/thin3d.h b/ext/native/thin3d/thin3d.h index 75175c431e..2635e1296c 100644 --- a/ext/native/thin3d/thin3d.h +++ b/ext/native/thin3d/thin3d.h @@ -600,12 +600,13 @@ public: virtual bool BlitFramebuffer(Framebuffer *src, int srcX1, int srcY1, int srcX2, int srcY2, Framebuffer *dst, int dstX1, int dstY1, int dstX2, int dstY2, int channelBits, FBBlitFilter filter) = 0; // These functions should be self explanatory. + // Binding a zero render target means binding the backbuffer. virtual void BindFramebufferAsRenderTarget(Framebuffer *fbo) = 0; + // color must be 0, for now. virtual void BindFramebufferAsTexture(Framebuffer *fbo, int binding, FBChannel channelBit, int attachment) = 0; virtual void BindFramebufferForRead(Framebuffer *fbo) = 0; - virtual void BindBackbufferAsRenderTarget() = 0; virtual uintptr_t GetFramebufferAPITexture(Framebuffer *fbo, int channelBits, int attachment) = 0; virtual void GetFramebufferDimensions(Framebuffer *fbo, int *w, int *h) = 0; @@ -640,8 +641,10 @@ public: virtual void BeginFrame() {} virtual void EndFrame() {} + // This should be avoided as much as possible, in favor of clearing when binding a render target, which is native + // on Vulkan. virtual void Clear(int mask, uint32_t colorval, float depthVal, int stencilVal) = 0; - + // Necessary to correctly flip scissor rectangles etc for OpenGL. void SetTargetSize(int w, int h) { targetWidth_ = w; diff --git a/ext/native/thin3d/thin3d_d3d11.cpp b/ext/native/thin3d/thin3d_d3d11.cpp index 2d40ad00ed..d4e0add435 100644 --- a/ext/native/thin3d/thin3d_d3d11.cpp +++ b/ext/native/thin3d/thin3d_d3d11.cpp @@ -63,7 +63,6 @@ public: void BindFramebufferAsTexture(Framebuffer *fbo, int binding, FBChannel channelBit, int attachment) override; void BindFramebufferForRead(Framebuffer *fbo) override; - void BindBackbufferAsRenderTarget() override; uintptr_t GetFramebufferAPITexture(Framebuffer *fbo, int channelBit, int attachment) override; void GetFramebufferDimensions(Framebuffer *fbo, int *w, int *h) override; @@ -1287,18 +1286,27 @@ bool D3D11DrawContext::BlitFramebuffer(Framebuffer *srcfb, int srcX1, int srcY1, return false; } -// These functions should be self explanatory. void D3D11DrawContext::BindFramebufferAsRenderTarget(Framebuffer *fbo) { // TODO: deviceContext1 can actually discard. Useful on Windows Mobile. - D3D11Framebuffer *fb = (D3D11Framebuffer *)fbo; - if (curRenderTargetView_ == fb->colorRTView && curDepthStencilView_ == fb->depthStencilRTView) { - return; + if (fbo) { + D3D11Framebuffer *fb = (D3D11Framebuffer *)fbo; + if (curRenderTargetView_ == fb->colorRTView && curDepthStencilView_ == fb->depthStencilRTView) { + return; + } + context_->OMSetRenderTargets(1, &fb->colorRTView, fb->depthStencilRTView); + curRenderTargetView_ = fb->colorRTView; + curDepthStencilView_ = fb->depthStencilRTView; + curRTWidth_ = fb->width; + curRTHeight_ = fb->height; + } else { + if (curRenderTargetView_ == bbRenderTargetView_ && curDepthStencilView_ == bbDepthStencilView_) + return; + context_->OMSetRenderTargets(1, &bbRenderTargetView_, bbDepthStencilView_); + curRenderTargetView_ = bbRenderTargetView_; + curDepthStencilView_ = bbDepthStencilView_; + curRTWidth_ = bbWidth_; + curRTHeight_ = bbHeight_; } - context_->OMSetRenderTargets(1, &fb->colorRTView, fb->depthStencilRTView); - curRenderTargetView_ = fb->colorRTView; - curDepthStencilView_ = fb->depthStencilRTView; - curRTWidth_ = fb->width; - curRTHeight_ = fb->height; } // color must be 0, for now. @@ -1311,16 +1319,6 @@ void D3D11DrawContext::BindFramebufferForRead(Framebuffer *fbo) { // This is meaningless in D3D11 } -void D3D11DrawContext::BindBackbufferAsRenderTarget() { - if (curRenderTargetView_ == bbRenderTargetView_ && curDepthStencilView_ == bbDepthStencilView_) - return; - context_->OMSetRenderTargets(1, &bbRenderTargetView_, bbDepthStencilView_); - curRenderTargetView_ = bbRenderTargetView_; - curDepthStencilView_ = bbDepthStencilView_; - curRTWidth_ = bbWidth_; - curRTHeight_ = bbHeight_; -} - uintptr_t D3D11DrawContext::GetFramebufferAPITexture(Framebuffer *fbo, int channelBit, int attachment) { D3D11Framebuffer *fb = (D3D11Framebuffer *)fbo; switch (channelBit) { diff --git a/ext/native/thin3d/thin3d_d3d9.cpp b/ext/native/thin3d/thin3d_d3d9.cpp index 09998a2149..06c1fa6bc0 100644 --- a/ext/native/thin3d/thin3d_d3d9.cpp +++ b/ext/native/thin3d/thin3d_d3d9.cpp @@ -492,7 +492,6 @@ public: void BindFramebufferAsTexture(Framebuffer *fbo, int binding, FBChannel channelBit, int attachment) override; void BindFramebufferForRead(Framebuffer *fbo) override {} - void BindBackbufferAsRenderTarget() override; uintptr_t GetFramebufferAPITexture(Framebuffer *fbo, int channelBits, int attachment) override; void GetFramebufferDimensions(Framebuffer *fbo, int *w, int *h) override; @@ -1044,21 +1043,17 @@ D3D9Framebuffer::~D3D9Framebuffer() { } } -void D3D9Context::BindBackbufferAsRenderTarget() { - using namespace DX9; - - device_->SetRenderTarget(0, deviceRTsurf); - device_->SetDepthStencilSurface(deviceDSsurf); - dxstate.scissorRect.restore(); - dxstate.viewport.restore(); -} - void D3D9Context::BindFramebufferAsRenderTarget(Framebuffer *fbo) { using namespace DX9; - D3D9Framebuffer *fb = (D3D9Framebuffer *)fbo; - device_->SetRenderTarget(0, fb->surf); - device_->SetDepthStencilSurface(fb->depthstencil); + if (fbo) { + D3D9Framebuffer *fb = (D3D9Framebuffer *)fbo; + device_->SetRenderTarget(0, fb->surf); + device_->SetDepthStencilSurface(fb->depthstencil); + } else { + device_->SetRenderTarget(0, deviceRTsurf); + device_->SetDepthStencilSurface(deviceDSsurf); + } dxstate.scissorRect.restore(); dxstate.viewport.restore(); } diff --git a/ext/native/thin3d/thin3d_gl.cpp b/ext/native/thin3d/thin3d_gl.cpp index 63d9b235c7..7388cb34e5 100644 --- a/ext/native/thin3d/thin3d_gl.cpp +++ b/ext/native/thin3d/thin3d_gl.cpp @@ -470,7 +470,6 @@ public: void BindFramebufferAsTexture(Framebuffer *fbo, int binding, FBChannel channelBit, int attachment) override; void BindFramebufferForRead(Framebuffer *fbo) override; - void BindBackbufferAsRenderTarget() override; uintptr_t GetFramebufferAPITexture(Framebuffer *fbo, int channelBits, int attachment) override; void GetFramebufferDimensions(Framebuffer *fbo, int *w, int *h) override; @@ -1535,19 +1534,17 @@ void OpenGLContext::fbo_unbind() { } void OpenGLContext::BindFramebufferAsRenderTarget(Framebuffer *fbo) { - OpenGLFramebuffer *fb = (OpenGLFramebuffer *)fbo; - // Without FBO_ARB / GLES3, this will collide with bind_for_read, but there's nothing - // in ES 2.0 that actually separate them anyway of course, so doesn't matter. - fbo_bind_fb_target(false, fb->handle); - // Always restore viewport after render target binding - // TODO: Should we set viewports this way too? - glstate.viewport.restore(); CHECK_GL_ERROR_IF_DEBUG(); -} - -void OpenGLContext::BindBackbufferAsRenderTarget() { - CHECK_GL_ERROR_IF_DEBUG(); - fbo_unbind(); + if (fbo) { + OpenGLFramebuffer *fb = (OpenGLFramebuffer *)fbo; + // Without FBO_ARB / GLES3, this will collide with bind_for_read, but there's nothing + // in ES 2.0 that actually separate them anyway of course, so doesn't matter. + fbo_bind_fb_target(false, fb->handle); + // Always restore viewport after render target binding. Works around driver bugs. + glstate.viewport.restore(); + } else { + fbo_unbind(); + } CHECK_GL_ERROR_IF_DEBUG(); } diff --git a/ext/native/thin3d/thin3d_vulkan.cpp b/ext/native/thin3d/thin3d_vulkan.cpp index ca0e90e3cd..b2103485a8 100644 --- a/ext/native/thin3d/thin3d_vulkan.cpp +++ b/ext/native/thin3d/thin3d_vulkan.cpp @@ -351,7 +351,6 @@ public: void BindFramebufferAsTexture(Framebuffer *fbo, int binding, FBChannel channelBit, int attachment) override; void BindFramebufferForRead(Framebuffer *fbo) override; - void BindBackbufferAsRenderTarget() override; uintptr_t GetFramebufferAPITexture(Framebuffer *fbo, int channelBit, int attachment) override; void GetFramebufferDimensions(Framebuffer *fbo, int *w, int *h) override; @@ -1303,10 +1302,16 @@ bool VKContext::BlitFramebuffer(Framebuffer *srcfb, int srcX1, int srcY1, int sr VKFramebuffer *dst = (VKFramebuffer *)dstfb; return true; } + // These functions should be self explanatory. void VKContext::BindFramebufferAsRenderTarget(Framebuffer *fbo) { - VKFramebuffer *fb = (VKFramebuffer *)fbo; + if (fbo) { + VKFramebuffer *fb = (VKFramebuffer *)fbo; + } else { + + } } + // color must be 0, for now. void VKContext::BindFramebufferAsTexture(Framebuffer *fbo, int binding, FBChannel channelBit, int attachment) { VKFramebuffer *fb = (VKFramebuffer *)fbo; @@ -1314,10 +1319,6 @@ void VKContext::BindFramebufferAsTexture(Framebuffer *fbo, int binding, FBChanne } void VKContext::BindFramebufferForRead(Framebuffer *fbo) { /* noop */ } -void VKContext::BindBackbufferAsRenderTarget() { - -} - uintptr_t VKContext::GetFramebufferAPITexture(Framebuffer *fbo, int channelBit, int attachment) { return 0; } From 61474487197dd886526cf5eecebcdc4da8958b74 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Tue, 16 May 2017 14:09:15 +0200 Subject: [PATCH 04/25] Draw overlays at the proper time in the frame. --- UI/NativeApp.cpp | 31 +++++++++++++++++-------------- ext/native/ui/screen.cpp | 4 ++++ ext/native/ui/screen.h | 10 ++++++++++ 3 files changed, 31 insertions(+), 14 deletions(-) diff --git a/UI/NativeApp.cpp b/UI/NativeApp.cpp index 717177ef81..6eaee626bf 100644 --- a/UI/NativeApp.cpp +++ b/UI/NativeApp.cpp @@ -588,6 +588,8 @@ static void UIThemeInit() { ui_theme.popupStyle = MakeStyle(g_Config.uPopupStyleFg, g_Config.uPopupStyleBg); } +void RenderOverlays(UIContext *dc, void *userdata); + void NativeInitGraphics(GraphicsContext *graphicsContext) { ILOG("NativeInitGraphics"); @@ -652,6 +654,7 @@ void NativeInitGraphics(GraphicsContext *graphicsContext) { screenManager->setUIContext(uiContext); screenManager->setDrawContext(g_draw); + screenManager->setPostRenderCallback(&RenderOverlays, nullptr); UIBackgroundInit(*uiContext); @@ -737,7 +740,7 @@ void TakeScreenshot() { #endif } -void DrawDownloadsOverlay(UIContext &dc) { +void RenderOverlays(UIContext *dc, void *userdata) { // Thin bar at the top of the screen like Chrome. std::vector progress = g_DownloadManager.GetCurrentProgress(); if (progress.empty()) { @@ -751,27 +754,34 @@ void DrawDownloadsOverlay(UIContext &dc) { 0xFF777777, }; - dc.Begin(); + dc->Begin(); int h = 5; for (size_t i = 0; i < progress.size(); i++) { - float barWidth = 10 + (dc.GetBounds().w - 10) * progress[i]; + float barWidth = 10 + (dc->GetBounds().w - 10) * progress[i]; Bounds bounds(0, h * i, barWidth, h); UI::Drawable solid(colors[i & 3]); - dc.FillRect(solid, bounds); + dc->FillRect(solid, bounds); + } + dc->End(); + dc->Flush(); + + if (g_TakeScreenshot) { + TakeScreenshot(); } - dc.End(); - dc.Flush(); } void NativeRender(GraphicsContext *graphicsContext) { g_GameManager.Update(); + // If uitexture gets reloaded, make sure we use the latest one. + // Not sure this happens anymore now that we tear down all graphics on app switches... uiContext->FrameSetup(uiTexture->GetTexture()); float xres = dp_xres; float yres = dp_yres; // Apply the UIContext bounds as a 2D transformation matrix. + // TODO: This should be moved into the draw context... Matrix4x4 ortho; switch (GetGPUBackend()) { case GPUBackend::VULKAN: @@ -798,19 +808,12 @@ void NativeRender(GraphicsContext *graphicsContext) { ui_draw2d.PushDrawMatrix(ortho); ui_draw2d_front.PushDrawMatrix(ortho); + // All actual rendering happen in here. screenManager->render(); if (screenManager->getUIContext()->Text()) { screenManager->getUIContext()->Text()->OncePerFrame(); } - // At this point, the vulkan context has been "ended" already, no more drawing can be done in this frame. - // TODO: Integrate the download overlay with the screen system - DrawDownloadsOverlay(*screenManager->getUIContext()); - - if (g_TakeScreenshot) { - TakeScreenshot(); - } - if (resized) { resized = false; diff --git a/ext/native/ui/screen.cpp b/ext/native/ui/screen.cpp index e879fef120..45e06ef3e4 100644 --- a/ext/native/ui/screen.cpp +++ b/ext/native/ui/screen.cpp @@ -125,12 +125,16 @@ void ScreenManager::render() { backback.screen->preRender(); backback.screen->render(); stack_.back().screen->render(); + if (postRenderCb_) + postRenderCb_(getUIContext(), postRenderUserdata_); backback.screen->postRender(); break; } default: stack_.back().screen->preRender(); stack_.back().screen->render(); + if (postRenderCb_) + postRenderCb_(getUIContext(), postRenderUserdata_); stack_.back().screen->postRender(); break; } diff --git a/ext/native/ui/screen.h b/ext/native/ui/screen.h index 6997578a5a..a593ef8c1a 100644 --- a/ext/native/ui/screen.h +++ b/ext/native/ui/screen.h @@ -92,6 +92,8 @@ enum { LAYER_TRANSPARENT = 2, }; +typedef void(*PostRenderCallback)(UIContext *ui, void *userdata); + class ScreenManager { public: ScreenManager(); @@ -106,6 +108,11 @@ public: void setDrawContext(Draw::DrawContext *context) { thin3DContext_ = context; } Draw::DrawContext *getDrawContext() { return thin3DContext_; } + void setPostRenderCallback(PostRenderCallback cb, void *userdata) { + postRenderCb_ = cb; + postRenderUserdata_ = userdata; + } + void render(); void resized(); void deviceLost(); @@ -142,6 +149,9 @@ private: UIContext *uiContext_; Draw::DrawContext *thin3DContext_; + PostRenderCallback postRenderCb_ = nullptr; + void *postRenderUserdata_ = nullptr; + const Screen *dialogFinished_; DialogResult dialogResult_; From 0f171acacbd31798a167372d576ce5b0a66c5e4f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Tue, 16 May 2017 14:24:40 +0200 Subject: [PATCH 05/25] Start refactoring things to be more vulkan-friendly --- UI/EmuScreen.cpp | 56 ++++++++++++++++++++++++++----------- UI/EmuScreen.h | 2 ++ ext/native/ui/ui_screen.cpp | 2 ++ 3 files changed, 43 insertions(+), 17 deletions(-) diff --git a/UI/EmuScreen.cpp b/UI/EmuScreen.cpp index 09cdd53c91..7e810c418a 100644 --- a/UI/EmuScreen.cpp +++ b/UI/EmuScreen.cpp @@ -958,6 +958,43 @@ static void DrawFPS(DrawBuffer *draw2d, const Bounds &bounds) { draw2d->SetFontScale(1.0f, 1.0f); } +void EmuScreen::preRender() { + using namespace Draw; + DrawContext *draw = screenManager()->getDrawContext(); + draw->BeginFrame(); + // Here we do NOT bind the backbuffer or clear the screen, unless non-buffered. + // The emuscreen is different than the others - we really want to allow the game to render to framebuffers + // before we ever bind the backbuffer for rendering. On mobile GPUs, switching back and forth between render + // targets is a mortal sin so it's very important that we don't bind the backbuffer unnecessarily here. + // We only bind it in FramebufferManager::CopyDisplayToOutput (unless non-buffered)... + // We do, however, start the frame in other ways. + + bool useBufferedRendering = g_Config.iRenderingMode != FB_NON_BUFFERED_MODE; + if (!useBufferedRendering) { + // We need to clear here already so that drawing during the frame is done on a clean slate. + DrawContext *draw = screenManager()->getDrawContext(); + draw->BindFramebufferAsRenderTarget(nullptr); + draw->Clear(FBChannel::FB_COLOR_BIT | FBChannel::FB_DEPTH_BIT | FBChannel::FB_STENCIL_BIT, 0xFF000000, 0.0f, 0); + + Viewport viewport; + viewport.TopLeftX = 0; + viewport.TopLeftY = 0; + viewport.Width = pixel_xres; + viewport.Height = pixel_yres; + viewport.MaxDepth = 1.0; + viewport.MinDepth = 0.0; + draw->SetViewports(1, &viewport); + draw->SetTargetSize(pixel_xres, pixel_yres); + } +} + +void EmuScreen::postRender() { + Draw::DrawContext *draw = screenManager()->getDrawContext(); + if (!draw) + return; + draw->EndFrame(); +} + void EmuScreen::render() { using namespace Draw; @@ -979,23 +1016,6 @@ void EmuScreen::render() { } } - bool useBufferedRendering = g_Config.iRenderingMode != FB_NON_BUFFERED_MODE; - - if (!useBufferedRendering) { - DrawContext *draw = screenManager()->getDrawContext(); - draw->Clear(FBChannel::FB_COLOR_BIT | FBChannel::FB_DEPTH_BIT | FBChannel::FB_STENCIL_BIT, 0xFF000000, 0.0f, 0); - - Viewport viewport; - viewport.TopLeftX = 0; - viewport.TopLeftY = 0; - viewport.Width = pixel_xres; - viewport.Height = pixel_yres; - viewport.MaxDepth = 1.0; - viewport.MinDepth = 0.0; - draw->SetViewports(1, &viewport); - draw->SetTargetSize(pixel_xres, pixel_yres); - } - PSP_BeginHostFrame(); // We just run the CPU until we get to vblank. This will quickly sync up pretty nicely. @@ -1018,6 +1038,8 @@ void EmuScreen::render() { if (invalid_) return; + + // Here the backbuffer will always be bound. if (!osm.IsEmpty() || g_Config.bShowDebugStats || g_Config.iShowFPSCounter || g_Config.bShowTouchControls || g_Config.bShowDeveloperMenu || g_Config.bShowAudioDebug || saveStatePreview_->GetVisibility() != UI::V_GONE || g_Config.bShowFrameProfiler) { DrawContext *thin3d = screenManager()->getDrawContext(); diff --git a/UI/EmuScreen.h b/UI/EmuScreen.h index 74a24f93ee..3f103513b7 100644 --- a/UI/EmuScreen.h +++ b/UI/EmuScreen.h @@ -37,6 +37,8 @@ public: void update() override; void render() override; + void preRender() override; + void postRender() override; void deviceLost() override; void deviceRestore() override; void dialogFinished(const Screen *dialog, DialogResult result) override; diff --git a/ext/native/ui/ui_screen.cpp b/ext/native/ui/ui_screen.cpp index becd3953ca..65633292b4 100644 --- a/ext/native/ui/ui_screen.cpp +++ b/ext/native/ui/ui_screen.cpp @@ -66,6 +66,8 @@ void UIScreen::preRender() { return; } draw->BeginFrame(); + // Bind the back buffer + draw->BindFramebufferAsRenderTarget(nullptr); draw->Clear(0xF, 0xFF000000, 0.0f, 0); Draw::Viewport viewport; From 20870194e3b86811482b113ac804c755b53fc4a4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Tue, 16 May 2017 14:27:34 +0200 Subject: [PATCH 06/25] Thin3D GL: Avoid using external functions internally. --- ext/native/thin3d/thin3d_gl.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/ext/native/thin3d/thin3d_gl.cpp b/ext/native/thin3d/thin3d_gl.cpp index 7388cb34e5..62c857846e 100644 --- a/ext/native/thin3d/thin3d_gl.cpp +++ b/ext/native/thin3d/thin3d_gl.cpp @@ -1606,8 +1606,10 @@ bool OpenGLContext::BlitFramebuffer(Framebuffer *fbsrc, int srcX1, int srcY1, in bits |= GL_DEPTH_BUFFER_BIT; if (channels & FB_STENCIL_BIT) bits |= GL_STENCIL_BUFFER_BIT; - BindFramebufferAsRenderTarget(dst); - BindFramebufferForRead(src); + // Without FBO_ARB / GLES3, this will collide with bind_for_read, but there's nothing + // in ES 2.0 that actually separate them anyway of course, so doesn't matter. + fbo_bind_fb_target(false, dst->handle); + fbo_bind_fb_target(true, src->handle); if (gl_extensions.GLES3 || gl_extensions.ARB_framebuffer_object) { glBlitFramebuffer(srcX1, srcY1, srcX2, srcY2, dstX1, dstY1, dstX2, dstY2, bits, linearFilter == FB_BLIT_LINEAR ? GL_LINEAR : GL_NEAREST); CHECK_GL_ERROR_IF_DEBUG(); From ce9e864e4bcc1742c670c0789e97d2dfa3f038ba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Tue, 16 May 2017 14:45:58 +0200 Subject: [PATCH 07/25] Remove some unnecessary framebuffer binding --- GPU/Common/FramebufferCommon.cpp | 6 ------ 1 file changed, 6 deletions(-) diff --git a/GPU/Common/FramebufferCommon.cpp b/GPU/Common/FramebufferCommon.cpp index 3f3716e248..ec1fd7bb70 100644 --- a/GPU/Common/FramebufferCommon.cpp +++ b/GPU/Common/FramebufferCommon.cpp @@ -204,12 +204,7 @@ void FramebufferManagerCommon::SetNumExtraFBOs(int num) { // No depth/stencil for post processing Draw::Framebuffer *fbo = draw_->CreateFramebuffer({ (int)renderWidth_, (int)renderHeight_, 1, 1, false, Draw::FBO_8888 }); extraFBOs_.push_back(fbo); - - // The new FBO is still bound after creation, but let's bind and clear it anyway. - draw_->BindFramebufferAsRenderTarget(fbo); - ClearBuffer(); } - currentRenderVfb_ = 0; draw_->BindFramebufferAsRenderTarget(nullptr); } @@ -533,7 +528,6 @@ void FramebufferManagerCommon::DestroyFramebuf(VirtualFramebuffer *v) { void FramebufferManagerCommon::NotifyRenderFramebufferCreated(VirtualFramebuffer *vfb) { if (!useBufferedRendering_) { - draw_->BindFramebufferAsRenderTarget(nullptr); // Let's ignore rendering to targets that have not (yet) been displayed. gstate_c.skipDrawReason |= SKIPDRAW_NON_DISPLAYED_FB; } From 6bcfe539f770fef0c173111a54094415845303c2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Tue, 16 May 2017 16:00:34 +0200 Subject: [PATCH 08/25] Use vulkan-style clear-on-bind when switching render targets. Not optimally used yet. Also removes a bunch of redundant render target binds. --- GPU/Common/FramebufferCommon.cpp | 81 +++++++++++++-------------- GPU/Common/FramebufferCommon.h | 2 +- GPU/D3D11/FramebufferManagerD3D11.cpp | 24 ++++---- GPU/D3D11/StencilBufferD3D11.cpp | 5 +- GPU/D3D11/TextureCacheD3D11.cpp | 2 +- GPU/Directx9/FramebufferDX9.cpp | 18 +++--- GPU/Directx9/StencilBufferDX9.cpp | 2 +- GPU/Directx9/TextureCacheDX9.cpp | 2 +- GPU/GLES/FramebufferManagerGLES.cpp | 42 ++++++-------- GPU/GLES/StencilBufferGLES.cpp | 4 +- GPU/GLES/TextureCacheGLES.cpp | 2 +- GPU/Software/SoftGpu.cpp | 3 +- GPU/Vulkan/FramebufferVulkan.cpp | 5 +- UI/EmuScreen.cpp | 3 +- ext/native/thin3d/thin3d.h | 19 ++++++- ext/native/thin3d/thin3d_d3d11.cpp | 13 ++++- ext/native/thin3d/thin3d_d3d9.cpp | 19 ++++++- ext/native/thin3d/thin3d_gl.cpp | 36 +++++++++++- ext/native/thin3d/thin3d_vulkan.cpp | 4 +- ext/native/ui/ui_screen.cpp | 6 +- 20 files changed, 171 insertions(+), 121 deletions(-) diff --git a/GPU/Common/FramebufferCommon.cpp b/GPU/Common/FramebufferCommon.cpp index ec1fd7bb70..9a985234de 100644 --- a/GPU/Common/FramebufferCommon.cpp +++ b/GPU/Common/FramebufferCommon.cpp @@ -134,10 +134,6 @@ FramebufferManagerCommon::~FramebufferManagerCommon() { void FramebufferManagerCommon::Init() { const std::string gameId = g_paramSFO.GetValueString("DISC_ID"); - // And an initial clear. We don't clear per frame as the games are supposed to handle that - // by themselves. - ClearBuffer(); - BeginFrame(); } @@ -206,7 +202,7 @@ void FramebufferManagerCommon::SetNumExtraFBOs(int num) { extraFBOs_.push_back(fbo); } currentRenderVfb_ = 0; - draw_->BindFramebufferAsRenderTarget(nullptr); + draw_->BindFramebufferAsRenderTarget(nullptr, { Draw::RPAction::KEEP, Draw::RPAction::KEEP }); } // Heuristics to figure out the size of FBO to create. @@ -534,6 +530,7 @@ void FramebufferManagerCommon::NotifyRenderFramebufferCreated(VirtualFramebuffer textureCache_->NotifyFramebuffer(vfb->fb_address, vfb, NOTIFY_FB_CREATED); + // TODO: Is this necessary? ClearBuffer(); // ugly... @@ -575,7 +572,19 @@ void FramebufferManagerCommon::NotifyRenderFramebufferSwitched(VirtualFramebuffe if (useBufferedRendering_) { if (vfb->fbo) { - draw_->BindFramebufferAsRenderTarget(vfb->fbo); + if (gl_extensions.IsGLES) { + // Some tiled mobile GPUs benefit IMMENSELY from clearing an FBO before rendering + // to it. This broke stuff before, so now it only clears on the first use of an + // FBO in a frame. This means that some games won't be able to avoid the on-some-GPUs + // performance-crushing framebuffer reloads from RAM, but we'll have to live with that. + if (vfb->last_frame_render != gpuStats.numFlips) { + draw_->BindFramebufferAsRenderTarget(vfb->fbo, { Draw::RPAction::CLEAR, Draw::RPAction::CLEAR }); + } else { + draw_->BindFramebufferAsRenderTarget(vfb->fbo, { Draw::RPAction::KEEP, Draw::RPAction::KEEP }); + } + } else { + draw_->BindFramebufferAsRenderTarget(vfb->fbo, { Draw::RPAction::KEEP, Draw::RPAction::KEEP }); + } } else { // This should only happen very briefly when toggling useBufferedRendering_. ResizeFramebufFBO(vfb, vfb->width, vfb->height, true); @@ -587,7 +596,6 @@ void FramebufferManagerCommon::NotifyRenderFramebufferSwitched(VirtualFramebuffe delete vfb->fbo; vfb->fbo = nullptr; } - draw_->BindFramebufferAsRenderTarget(nullptr); // Let's ignore rendering to targets that have not (yet) been displayed. if (vfb->usageFlags & FB_USAGE_DISPLAYED_FRAMEBUFFER) { @@ -598,16 +606,6 @@ void FramebufferManagerCommon::NotifyRenderFramebufferSwitched(VirtualFramebuffe } textureCache_->NotifyFramebuffer(vfb->fb_address, vfb, NOTIFY_FB_UPDATED); - if (gl_extensions.IsGLES) { - // Some tiled mobile GPUs benefit IMMENSELY from clearing an FBO before rendering - // to it. This broke stuff before, so now it only clears on the first use of an - // FBO in a frame. This means that some games won't be able to avoid the on-some-GPUs - // performance-crushing framebuffer reloads from RAM, but we'll have to live with that. - if (vfb->last_frame_render != gpuStats.numFlips) { - ClearBuffer(); - } - } - // Copy depth pixel value from the read framebuffer to the draw framebuffer if (prevVfb && !g_Config.bDisableSlowFramebufEffects) { if (!prevVfb->fbo || !vfb->fbo || !useBufferedRendering_ || !prevVfb->depthUpdated || isClearingDepth) { @@ -704,7 +702,7 @@ void FramebufferManagerCommon::DrawPixels(VirtualFramebuffer *vfb, int dstX, int MakePixelTexture(srcPixels, srcPixelFormat, srcStride, width, height, u1, v1); if (useBufferedRendering_ && vfb && vfb->fbo) { - draw_->BindFramebufferAsRenderTarget(vfb->fbo); + draw_->BindFramebufferAsRenderTarget(vfb->fbo, { Draw::RPAction::KEEP, Draw::RPAction::KEEP }); SetViewport2D(0, 0, vfb->renderWidth, vfb->renderHeight); } else { // We are drawing to the back buffer so need to flip. @@ -827,8 +825,7 @@ void FramebufferManagerCommon::CopyDisplayToOutput() { if (displayFramebufPtr_ == 0) { DEBUG_LOG(FRAMEBUF, "Display disabled, displaying only black"); // No framebuffer to display! Clear to black. - draw_->BindFramebufferAsRenderTarget(nullptr); - ClearBuffer(); + draw_->BindFramebufferAsRenderTarget(nullptr, { Draw::RPAction::CLEAR, Draw::RPAction::CLEAR }); return; } @@ -878,11 +875,6 @@ void FramebufferManagerCommon::CopyDisplayToOutput() { } } - draw_->BindFramebufferAsRenderTarget(nullptr); - if (useBufferedRendering_) { - draw_->Clear(Draw::FB_COLOR_BIT | Draw::FB_STENCIL_BIT | Draw::FB_DEPTH_BIT, 0, 0, 0); - } - if (!vfb) { if (Memory::IsValidAddress(displayFramebufPtr_)) { // The game is displaying something directly from RAM. In GTA, it's decoded video. @@ -895,6 +887,10 @@ void FramebufferManagerCommon::CopyDisplayToOutput() { } if (!vfb) { + if (useBufferedRendering_) { + // Bind and clear the backbuffer. This should be the first time during the frame that it's bound. + draw_->BindFramebufferAsRenderTarget(nullptr, { Draw::RPAction::CLEAR, Draw::RPAction::CLEAR }); + } // Just a pointer to plain memory to draw. We should create a framebuffer, then draw to it. DrawFramebufferToOutput(Memory::GetPointer(displayFramebufPtr_), displayFormat_, displayStride_, true); return; @@ -902,7 +898,10 @@ void FramebufferManagerCommon::CopyDisplayToOutput() { } else { DEBUG_LOG(FRAMEBUF, "Found no FBO to display! displayFBPtr = %08x", displayFramebufPtr_); // No framebuffer to display! Clear to black. - ClearBuffer(); + if (useBufferedRendering_) { + // Bind and clear the backbuffer. This should be the first time during the frame that it's bound. + draw_->BindFramebufferAsRenderTarget(nullptr, { Draw::RPAction::CLEAR, Draw::RPAction::CLEAR }); + } return; } } @@ -924,8 +923,6 @@ void FramebufferManagerCommon::CopyDisplayToOutput() { DEBUG_LOG(FRAMEBUF, "Displaying FBO %08x", vfb->fb_address); DisableState(); - draw_->BindFramebufferAsTexture(vfb->fbo, 0, Draw::FB_COLOR_BIT, 0); - int uvRotation = useBufferedRendering_ ? g_Config.iInternalScreenRotation : ROTATION_LOCKED_HORIZONTAL; // Output coordinates @@ -940,6 +937,8 @@ void FramebufferManagerCommon::CopyDisplayToOutput() { float v1 = (272.0f + offsetY) / (float)vfb->bufferHeight; if (!usePostShader_) { + draw_->BindFramebufferAsRenderTarget(nullptr, { Draw::RPAction::CLEAR, Draw::RPAction::CLEAR }); + draw_->BindFramebufferAsTexture(vfb->fbo, 0, Draw::FB_COLOR_BIT, 0); bool linearFilter = g_Config.iBufFilter == SCALE_LINEAR; // We are doing the DrawActiveTexture call directly to the backbuffer here. Hence, we must // flip V. @@ -961,7 +960,7 @@ void FramebufferManagerCommon::CopyDisplayToOutput() { } } else if (usePostShader_ && extraFBOs_.size() == 1 && !postShaderAtOutputResolution_) { // An additional pass, post-processing shader to the extra FBO. - draw_->BindFramebufferAsRenderTarget(extraFBOs_[0]); + draw_->BindFramebufferAsRenderTarget(extraFBOs_[0], { Draw::RPAction::DONT_CARE, Draw::RPAction::DONT_CARE }); int fbo_w, fbo_h; draw_->GetFramebufferDimensions(extraFBOs_[0], &fbo_w, &fbo_h); SetViewport2D(0, 0, fbo_w, fbo_h); @@ -972,7 +971,7 @@ void FramebufferManagerCommon::CopyDisplayToOutput() { bool linearFilter = g_Config.iBufFilter == SCALE_LINEAR; DrawActiveTexture(0, 0, fbo_w, fbo_h, fbo_w, fbo_h, 0.0f, 0.0f, 1.0f, 1.0f, ROTATION_LOCKED_HORIZONTAL, linearFilter); - draw_->BindFramebufferAsRenderTarget(nullptr); + draw_->BindFramebufferAsRenderTarget(nullptr, { Draw::RPAction::CLEAR, Draw::RPAction::CLEAR }); // Use the extra FBO, with applied post-processing shader, as a texture. // fbo_bind_as_texture(extraFBOs_[0], FB_COLOR_BIT, 0); @@ -1004,11 +1003,12 @@ void FramebufferManagerCommon::CopyDisplayToOutput() { /* if (gl_extensions.GLES3 && glInvalidateFramebuffer != nullptr) { - draw_->BindFramebufferAsRenderTarget(extraFBOs_[0]); + draw_->BindFramebufferAsRenderTarget(extraFBOs_[0], { Draw::RPAction::CLEAR, Draw::RPAction::CLEAR }); GLenum attachments[3] = { GL_COLOR_ATTACHMENT0, GL_DEPTH_ATTACHMENT, GL_STENCIL_ATTACHMENT }; glInvalidateFramebuffer(GL_FRAMEBUFFER, 3, attachments); }*/ } else { + draw_->BindFramebufferAsRenderTarget(nullptr, { Draw::RPAction::CLEAR, Draw::RPAction::CLEAR }); // We are doing the DrawActiveTexture call directly to the backbuffer here. Hence, we must // flip V. if (needBackBufferYSwap_) @@ -1040,9 +1040,6 @@ void FramebufferManagerCommon::CopyDisplayToOutput() { } void FramebufferManagerCommon::DecimateFBOs() { - if (useBufferedRendering_) { - draw_->BindFramebufferAsRenderTarget(nullptr); - } currentRenderVfb_ = 0; for (size_t i = 0; i < vfbs_.size(); ++i) { @@ -1133,7 +1130,6 @@ void FramebufferManagerCommon::ResizeFramebufFBO(VirtualFramebuffer *vfb, u16 w, } textureCache_->ForgetLastTexture(); - draw_->BindFramebufferAsRenderTarget(nullptr); if (!useBufferedRendering_) { if (vfb->fbo) { @@ -1151,16 +1147,14 @@ void FramebufferManagerCommon::ResizeFramebufFBO(VirtualFramebuffer *vfb, u16 w, if (old.fbo) { INFO_LOG(FRAMEBUF, "Resizing FBO for %08x : %i x %i x %i", vfb->fb_address, w, h, vfb->format); if (vfb->fbo) { - draw_->BindFramebufferAsRenderTarget(vfb->fbo); - ClearBuffer(); + draw_->BindFramebufferAsRenderTarget(vfb->fbo, { Draw::RPAction::CLEAR, Draw::RPAction::CLEAR }); if (!skipCopy && !g_Config.bDisableSlowFramebufEffects) { BlitFramebuffer(vfb, 0, 0, &old, 0, 0, std::min(vfb->bufferWidth, vfb->width), std::min(vfb->height, vfb->bufferHeight), 0); } } - delete old.fbo; - if (vfb->fbo) { - draw_->BindFramebufferAsRenderTarget(vfb->fbo); - } + delete old.fbo; + } else { + draw_->BindFramebufferAsRenderTarget(vfb->fbo, { Draw::RPAction::CLEAR, Draw::RPAction::CLEAR }); } if (!vfb->fbo) { @@ -1796,8 +1790,9 @@ Draw::Framebuffer *FramebufferManagerCommon::GetTempFBO(u16 w, u16 h, Draw::FBCo Draw::Framebuffer *fbo = draw_->CreateFramebuffer({ w, h, 1, 1, false, depth }); if (!fbo) return fbo; - draw_->BindFramebufferAsRenderTarget(fbo); - ClearBuffer(true); + + // TODO: Move binding out of here! + draw_->BindFramebufferAsRenderTarget(fbo, { Draw::RPAction::CLEAR, Draw::RPAction::CLEAR }); const TempFBO info = { fbo, gpuStats.numFlips }; tempFBOs_[key] = info; return fbo; diff --git a/GPU/Common/FramebufferCommon.h b/GPU/Common/FramebufferCommon.h index 835e7b66cb..11f339073c 100644 --- a/GPU/Common/FramebufferCommon.h +++ b/GPU/Common/FramebufferCommon.h @@ -267,7 +267,7 @@ public: virtual void Resized(); - Draw::Framebuffer *GetTempFBO(u16 w, u16 h, Draw::FBColorDepth depth = Draw::FBO_8888); + Draw::Framebuffer *GetTempFBO(u16 w, u16 h, Draw::FBColorDepth colorDepth = Draw::FBO_8888); // Debug features virtual bool GetFramebuffer(u32 fb_address, int fb_stride, GEBufferFormat format, GPUDebugBuffer &buffer, int maxRes) = 0; diff --git a/GPU/D3D11/FramebufferManagerD3D11.cpp b/GPU/D3D11/FramebufferManagerD3D11.cpp index 0532a7b356..3d2cdf774f 100644 --- a/GPU/D3D11/FramebufferManagerD3D11.cpp +++ b/GPU/D3D11/FramebufferManagerD3D11.cpp @@ -218,8 +218,6 @@ void FramebufferManagerD3D11::DisableState() { } void FramebufferManagerD3D11::CompilePostShader() { - SetNumExtraFBOs(0); - std::string vsSource; std::string psSource; @@ -472,9 +470,10 @@ void FramebufferManagerD3D11::BindPostShader(const PostShaderUniforms &uniforms) void FramebufferManagerD3D11::RebindFramebuffer() { if (currentRenderVfb_ && currentRenderVfb_->fbo) { - draw_->BindFramebufferAsRenderTarget(currentRenderVfb_->fbo); + draw_->BindFramebufferAsRenderTarget(currentRenderVfb_->fbo, { Draw::RPAction::KEEP, Draw::RPAction::KEEP }); } else { - draw_->BindFramebufferAsRenderTarget(nullptr); + // Should this even happen? + draw_->BindFramebufferAsRenderTarget(nullptr, { Draw::RPAction::KEEP, Draw::RPAction::KEEP }); } } @@ -483,8 +482,6 @@ void FramebufferManagerD3D11::ReformatFramebufferFrom(VirtualFramebuffer *vfb, G return; } - draw_->BindFramebufferAsRenderTarget(vfb->fbo); - // Technically, we should at this point re-interpret the bytes of the old format to the new. // That might get tricky, and could cause unnecessary slowness in some games. // For now, we just clear alpha/stencil from 565, which fixes shadow issues in Kingdom Hearts. @@ -494,7 +491,10 @@ void FramebufferManagerD3D11::ReformatFramebufferFrom(VirtualFramebuffer *vfb, G // and blit with a shader to that, then replace the FBO on vfb. Stencil would still be complex // to exactly reproduce in 4444 and 8888 formats. + draw_->BindFramebufferAsRenderTarget(vfb->fbo, { Draw::RPAction::CLEAR, Draw::RPAction::KEEP }); + if (old == GE_FORMAT_565) { + // TODO: There's no way this does anything useful :( context_->OMSetDepthStencilState(stockD3D11.depthDisabledStencilWrite, 0xFF); context_->OMSetBlendState(stockD3D11.blendStateDisabledWithColorMask[0], nullptr, 0xFFFFFFFF); context_->RSSetState(stockD3D11.rasterStateNoCull); @@ -571,7 +571,6 @@ void FramebufferManagerD3D11::BindFramebufferAsColorTexture(int stage, VirtualFr if (renderCopy) { VirtualFramebuffer copyInfo = *framebuffer; copyInfo.fbo = renderCopy; - CopyFramebufferForColorTexture(©Info, framebuffer, flags); RebindFramebuffer(); draw_->BindFramebufferAsTexture(renderCopy, stage, Draw::FB_COLOR_BIT, 0); @@ -653,8 +652,7 @@ bool FramebufferManagerD3D11::CreateDownloadTempBuffer(VirtualFramebuffer *nvfb) return false; } - draw_->BindFramebufferAsRenderTarget(nvfb->fbo); - ClearBuffer(); + draw_->BindFramebufferAsRenderTarget(nvfb->fbo, { Draw::RPAction::CLEAR, Draw::RPAction::CLEAR }); return true; } @@ -697,7 +695,7 @@ void FramebufferManagerD3D11::SimpleBlit( // Unbind the texture first to avoid the D3D11 hazard check (can't set render target to things bound as textures and vice versa, not even temporarily). draw_->BindTexture(0, nullptr); - draw_->BindFramebufferAsRenderTarget(dest); + draw_->BindFramebufferAsRenderTarget(dest, { Draw::RPAction::KEEP, Draw::RPAction::KEEP }); draw_->BindFramebufferAsTexture(src, 0, Draw::FB_COLOR_BIT, 0); Bind2DShader(); @@ -717,7 +715,9 @@ void FramebufferManagerD3D11::SimpleBlit( void FramebufferManagerD3D11::BlitFramebuffer(VirtualFramebuffer *dst, int dstX, int dstY, VirtualFramebuffer *src, int srcX, int srcY, int w, int h, int bpp) { if (!dst->fbo || !src->fbo || !useBufferedRendering_) { // This can happen if they recently switched from non-buffered. - draw_->BindFramebufferAsRenderTarget(nullptr); + if (useBufferedRendering_) { + draw_->BindFramebufferAsRenderTarget(nullptr, { Draw::RPAction::KEEP, Draw::RPAction::KEEP }); + } return; } @@ -812,7 +812,6 @@ void ConvertFromRGBA8888(u8 *dst, u8 *src, u32 dstStride, u32 srcStride, u32 wid void FramebufferManagerD3D11::PackFramebufferD3D11_(VirtualFramebuffer *vfb, int x, int y, int w, int h) { if (!vfb->fbo) { ERROR_LOG_REPORT_ONCE(vfbfbozero, SCEGE, "PackFramebufferD3D11_: vfb->fbo == 0"); - draw_->BindFramebufferAsRenderTarget(nullptr); return; } @@ -884,7 +883,6 @@ std::vector FramebufferManagerD3D11::GetFramebufferList() { } void FramebufferManagerD3D11::DestroyAllFBOs() { - draw_->BindFramebufferAsRenderTarget(nullptr); currentRenderVfb_ = 0; displayFramebuf_ = 0; prevDisplayFramebuf_ = 0; diff --git a/GPU/D3D11/StencilBufferD3D11.cpp b/GPU/D3D11/StencilBufferD3D11.cpp index aa117bcca7..9f18eff386 100644 --- a/GPU/D3D11/StencilBufferD3D11.cpp +++ b/GPU/D3D11/StencilBufferD3D11.cpp @@ -194,16 +194,13 @@ bool FramebufferManagerD3D11::NotifyStencilUpload(u32 addr, int size, bool skipZ float v1 = 1.0f; MakePixelTexture(src, dstBuffer->format, dstBuffer->fb_stride, dstBuffer->bufferWidth, dstBuffer->bufferHeight, u1, v1); if (dstBuffer->fbo) { - draw_->BindFramebufferAsRenderTarget(dstBuffer->fbo); + draw_->BindFramebufferAsRenderTarget(dstBuffer->fbo, { Draw::RPAction::KEEP, Draw::RPAction::CLEAR }); } else { // something is wrong... } D3D11_VIEWPORT vp{ 0.0f, 0.0f, (float)w, (float)h, 0.0f, 1.0f }; context_->RSSetViewports(1, &vp); - // Zero stencil - draw_->Clear(Draw::FBChannel::FB_STENCIL_BIT, 0, 0, 0); - float fw = dstBuffer->width; float fh = dstBuffer->height; diff --git a/GPU/D3D11/TextureCacheD3D11.cpp b/GPU/D3D11/TextureCacheD3D11.cpp index edb91563af..8ceec37249 100644 --- a/GPU/D3D11/TextureCacheD3D11.cpp +++ b/GPU/D3D11/TextureCacheD3D11.cpp @@ -434,7 +434,7 @@ void TextureCacheD3D11::ApplyTextureFramebuffer(TexCacheEntry *entry, VirtualFra context_->PSSetShaderResources(1, 1, &clutTexture); framebufferManagerD3D11_->BindFramebufferAsColorTexture(0, framebuffer, BINDFBCOLOR_SKIP_COPY); context_->PSSetSamplers(0, 1, &stockD3D11.samplerPoint2DWrap); - draw_->BindFramebufferAsRenderTarget(depalFBO); + draw_->BindFramebufferAsRenderTarget(depalFBO, { Draw::RPAction::DONT_CARE }); shaderApply.Shade(); framebufferManagerD3D11_->RebindFramebuffer(); diff --git a/GPU/Directx9/FramebufferDX9.cpp b/GPU/Directx9/FramebufferDX9.cpp index 185b55342e..e639ddaa3f 100644 --- a/GPU/Directx9/FramebufferDX9.cpp +++ b/GPU/Directx9/FramebufferDX9.cpp @@ -324,9 +324,10 @@ static const D3DVERTEXELEMENT9 g_FramebufferVertexElements[] = { void FramebufferManagerDX9::RebindFramebuffer() { if (currentRenderVfb_ && currentRenderVfb_->fbo) { - draw_->BindFramebufferAsRenderTarget(currentRenderVfb_->fbo); + draw_->BindFramebufferAsRenderTarget(currentRenderVfb_->fbo, { Draw::RPAction::KEEP, Draw::RPAction::KEEP }); } else { - draw_->BindFramebufferAsRenderTarget(nullptr); + // Should this even happen? + draw_->BindFramebufferAsRenderTarget(nullptr, { Draw::RPAction::KEEP, Draw::RPAction::KEEP }); } } @@ -345,7 +346,7 @@ static const D3DVERTEXELEMENT9 g_FramebufferVertexElements[] = { return; } - draw_->BindFramebufferAsRenderTarget(vfb->fbo); + draw_->BindFramebufferAsRenderTarget(vfb->fbo, { Draw::RPAction::CLEAR, Draw::RPAction::KEEP }); // Technically, we should at this point re-interpret the bytes of the old format to the new. // That might get tricky, and could cause unnecessary slowness in some games. @@ -552,8 +553,7 @@ static const D3DVERTEXELEMENT9 g_FramebufferVertexElements[] = { return false; } - draw_->BindFramebufferAsRenderTarget(nvfb->fbo); - ClearBuffer(); + draw_->BindFramebufferAsRenderTarget(nvfb->fbo, { Draw::RPAction::CLEAR, Draw::RPAction::CLEAR }); return true; } @@ -564,7 +564,8 @@ static const D3DVERTEXELEMENT9 g_FramebufferVertexElements[] = { void FramebufferManagerDX9::BlitFramebuffer(VirtualFramebuffer *dst, int dstX, int dstY, VirtualFramebuffer *src, int srcX, int srcY, int w, int h, int bpp) { if (!dst->fbo || !src->fbo || !useBufferedRendering_) { // This can happen if we recently switched from non-buffered. - draw_->BindFramebufferAsRenderTarget(nullptr); + if (useBufferedRendering_) + draw_->BindFramebufferAsRenderTarget(nullptr, { Draw::RPAction::KEEP, Draw::RPAction::KEEP }); return; } @@ -664,7 +665,6 @@ static const D3DVERTEXELEMENT9 g_FramebufferVertexElements[] = { void FramebufferManagerDX9::PackFramebufferDirectx9_(VirtualFramebuffer *vfb, int x, int y, int w, int h) { if (!vfb->fbo) { ERROR_LOG_REPORT_ONCE(vfbfbozero, SCEGE, "PackFramebufferDirectx9_: vfb->fbo == 0"); - draw_->BindFramebufferAsRenderTarget(nullptr); return; } @@ -792,7 +792,6 @@ static const D3DVERTEXELEMENT9 g_FramebufferVertexElements[] = { } void FramebufferManagerDX9::DestroyAllFBOs() { - draw_->BindFramebufferAsRenderTarget(nullptr); currentRenderVfb_ = 0; displayFramebuf_ = 0; prevDisplayFramebuf_ = 0; @@ -882,8 +881,6 @@ static const D3DVERTEXELEMENT9 g_FramebufferVertexElements[] = { } bool FramebufferManagerDX9::GetOutputFramebuffer(GPUDebugBuffer &buffer) { - draw_->BindFramebufferAsRenderTarget(nullptr); - LPDIRECT3DSURFACE9 renderTarget = nullptr; HRESULT hr = device_->GetRenderTarget(0, &renderTarget); bool success = false; @@ -899,7 +896,6 @@ static const D3DVERTEXELEMENT9 g_FramebufferVertexElements[] = { } renderTarget->Release(); } - return success; } diff --git a/GPU/Directx9/StencilBufferDX9.cpp b/GPU/Directx9/StencilBufferDX9.cpp index 4ad4cc7ecf..5195c0b428 100644 --- a/GPU/Directx9/StencilBufferDX9.cpp +++ b/GPU/Directx9/StencilBufferDX9.cpp @@ -221,7 +221,7 @@ bool FramebufferManagerDX9::NotifyStencilUpload(u32 addr, int size, bool skipZer u16 h = dstBuffer->renderHeight; if (dstBuffer->fbo) { - draw_->BindFramebufferAsRenderTarget(dstBuffer->fbo); + draw_->BindFramebufferAsRenderTarget(dstBuffer->fbo, { Draw::RPAction::KEEP, Draw::RPAction::CLEAR }); } D3DVIEWPORT9 vp{ 0, 0, w, h, 0.0f, 1.0f }; device_->SetViewport(&vp); diff --git a/GPU/Directx9/TextureCacheDX9.cpp b/GPU/Directx9/TextureCacheDX9.cpp index 2a2f532686..0fffa606ea 100644 --- a/GPU/Directx9/TextureCacheDX9.cpp +++ b/GPU/Directx9/TextureCacheDX9.cpp @@ -421,7 +421,7 @@ void TextureCacheDX9::ApplyTextureFramebuffer(TexCacheEntry *entry, VirtualFrame LPDIRECT3DTEXTURE9 clutTexture = depalShaderCache_->GetClutTexture(clutFormat, clutHash_, clutBuf_); Draw::Framebuffer *depalFBO = framebufferManagerDX9_->GetTempFBO(framebuffer->renderWidth, framebuffer->renderHeight, Draw::FBO_8888); - draw_->BindFramebufferAsRenderTarget(depalFBO); + draw_->BindFramebufferAsRenderTarget(depalFBO, { Draw::RPAction::DONT_CARE, Draw::RPAction::DONT_CARE }); shaderManager_->DirtyLastShader(); float xoff = -0.5f / framebuffer->renderWidth; diff --git a/GPU/GLES/FramebufferManagerGLES.cpp b/GPU/GLES/FramebufferManagerGLES.cpp index c14b93dc24..5fcd307b93 100644 --- a/GPU/GLES/FramebufferManagerGLES.cpp +++ b/GPU/GLES/FramebufferManagerGLES.cpp @@ -93,6 +93,11 @@ void FramebufferManagerGLES::ClearBuffer(bool keepState) { #endif glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT); if (keepState) { + glstate.scissorTest.force(false); + glstate.depthWrite.force(GL_TRUE); + glstate.colorMask.force(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); + glstate.stencilFunc.force(GL_ALWAYS, 0, 0); + glstate.stencilMask.force(0xFF); glstate.scissorTest.restore(); glstate.depthWrite.restore(); glstate.colorMask.restore(); @@ -450,9 +455,10 @@ void FramebufferManagerGLES::DrawActiveTexture(float x, float y, float w, float void FramebufferManagerGLES::RebindFramebuffer() { if (currentRenderVfb_ && currentRenderVfb_->fbo) { - draw_->BindFramebufferAsRenderTarget(currentRenderVfb_->fbo); + draw_->BindFramebufferAsRenderTarget(currentRenderVfb_->fbo, { Draw::RPAction::KEEP, Draw::RPAction::KEEP }); } else { - draw_->BindFramebufferAsRenderTarget(nullptr); + // Should this even happen? + draw_->BindFramebufferAsRenderTarget(nullptr, { Draw::RPAction::KEEP, Draw::RPAction::KEEP }); } if (g_Config.iRenderingMode == FB_NON_BUFFERED_MODE) glstate.viewport.restore(); @@ -475,8 +481,6 @@ void FramebufferManagerGLES::ReformatFramebufferFrom(VirtualFramebuffer *vfb, GE return; } - draw_->BindFramebufferAsRenderTarget(vfb->fbo); - // Technically, we should at this point re-interpret the bytes of the old format to the new. // That might get tricky, and could cause unnecessary slowness in some games. // For now, we just clear alpha/stencil from 565, which fixes shadow issues in Kingdom Hearts. @@ -487,14 +491,9 @@ void FramebufferManagerGLES::ReformatFramebufferFrom(VirtualFramebuffer *vfb, GE // to exactly reproduce in 4444 and 8888 formats. if (old == GE_FORMAT_565) { - glstate.scissorTest.disable(); - glstate.depthWrite.set(GL_FALSE); - glstate.colorMask.set(false, false, false, true); - glstate.stencilFunc.set(GL_ALWAYS, 0, 0); - glstate.stencilMask.set(0xFF); - glClearColor(0.0f, 0.0f, 0.0f, 0.0f); - glClearStencil(0); - glClear(GL_COLOR_BUFFER_BIT | GL_STENCIL_BUFFER_BIT); + draw_->BindFramebufferAsRenderTarget(vfb->fbo, { Draw::RPAction::CLEAR, Draw::RPAction::CLEAR }); + } else { + draw_->BindFramebufferAsRenderTarget(vfb->fbo, { Draw::RPAction::KEEP, Draw::RPAction::KEEP }); } RebindFramebuffer(); @@ -655,10 +654,6 @@ bool FramebufferManagerGLES::CreateDownloadTempBuffer(VirtualFramebuffer *nvfb) ERROR_LOG(FRAMEBUF, "Error creating GL FBO! %i x %i", nvfb->renderWidth, nvfb->renderHeight); return false; } - - draw_->BindFramebufferAsRenderTarget(nvfb->fbo); - ClearBuffer(); - glDisable(GL_DITHER); // Weird place to do this return true; } @@ -667,11 +662,11 @@ void FramebufferManagerGLES::UpdateDownloadTempBuffer(VirtualFramebuffer *nvfb) // Discard the previous contents of this buffer where possible. if (gl_extensions.GLES3 && glInvalidateFramebuffer != nullptr) { - draw_->BindFramebufferAsRenderTarget(nvfb->fbo); + draw_->BindFramebufferAsRenderTarget(nvfb->fbo, { Draw::RPAction::DONT_CARE, Draw::RPAction::DONT_CARE }); GLenum attachments[3] = { GL_COLOR_ATTACHMENT0, GL_STENCIL_ATTACHMENT, GL_DEPTH_ATTACHMENT }; glInvalidateFramebuffer(GL_FRAMEBUFFER, 3, attachments); } else if (gl_extensions.IsGLES) { - draw_->BindFramebufferAsRenderTarget(nvfb->fbo); + draw_->BindFramebufferAsRenderTarget(nvfb->fbo, { Draw::RPAction::DONT_CARE, Draw::RPAction::DONT_CARE }); ClearBuffer(); } CHECK_GL_ERROR_IF_DEBUG(); @@ -680,7 +675,8 @@ void FramebufferManagerGLES::UpdateDownloadTempBuffer(VirtualFramebuffer *nvfb) void FramebufferManagerGLES::BlitFramebuffer(VirtualFramebuffer *dst, int dstX, int dstY, VirtualFramebuffer *src, int srcX, int srcY, int w, int h, int bpp) { if (!dst->fbo || !src->fbo || !useBufferedRendering_) { // This can happen if they recently switched from non-buffered. - draw_->BindFramebufferAsRenderTarget(nullptr); + if (useBufferedRendering_) + draw_->BindFramebufferAsRenderTarget(nullptr, { Draw::RPAction::KEEP, Draw::RPAction::KEEP }); return; } @@ -735,7 +731,7 @@ void FramebufferManagerGLES::BlitFramebuffer(VirtualFramebuffer *dst, int dstX, if (useBlit) { draw_->BlitFramebuffer(src->fbo, srcX1, srcY1, srcX2, srcY2, dst->fbo, dstX1, dstY1, dstX2, dstY2, Draw::FB_COLOR_BIT, Draw::FB_BLIT_NEAREST); } else { - draw_->BindFramebufferAsRenderTarget(dst->fbo); + draw_->BindFramebufferAsRenderTarget(dst->fbo, { Draw::RPAction::KEEP, Draw::RPAction::KEEP }); draw_->BindFramebufferAsTexture(src->fbo, 0, Draw::FB_COLOR_BIT, 0); // Make sure our 2D drawing program is ready. Compiles only if not already compiled. @@ -1161,11 +1157,11 @@ void FramebufferManagerGLES::EndFrame() { continue; } - draw_->BindFramebufferAsRenderTarget(temp.second.fbo); + draw_->BindFramebufferAsRenderTarget(temp.second.fbo, { Draw::RPAction::DONT_CARE, Draw::RPAction::DONT_CARE }); GLenum attachments[3] = { GL_COLOR_ATTACHMENT0, GL_STENCIL_ATTACHMENT, GL_DEPTH_ATTACHMENT }; glInvalidateFramebuffer(GL_FRAMEBUFFER, 3, attachments); } - draw_->BindFramebufferAsRenderTarget(nullptr); + draw_->BindFramebufferAsRenderTarget(nullptr, { Draw::RPAction::KEEP , Draw::RPAction::KEEP }); } CHECK_GL_ERROR_IF_DEBUG(); } @@ -1196,7 +1192,6 @@ std::vector FramebufferManagerGLES::GetFramebufferList() { void FramebufferManagerGLES::DestroyAllFBOs() { CHECK_GL_ERROR_IF_DEBUG(); - draw_->BindFramebufferAsRenderTarget(nullptr); currentRenderVfb_ = 0; displayFramebuf_ = 0; prevDisplayFramebuf_ = 0; @@ -1220,7 +1215,6 @@ void FramebufferManagerGLES::DestroyAllFBOs() { } tempFBOs_.clear(); - draw_->BindFramebufferAsRenderTarget(nullptr); DisableState(); CHECK_GL_ERROR_IF_DEBUG(); } diff --git a/GPU/GLES/StencilBufferGLES.cpp b/GPU/GLES/StencilBufferGLES.cpp index cdd1b6c79f..a595bb19bc 100644 --- a/GPU/GLES/StencilBufferGLES.cpp +++ b/GPU/GLES/StencilBufferGLES.cpp @@ -183,9 +183,9 @@ bool FramebufferManagerGLES::NotifyStencilUpload(u32 addr, int size, bool skipZe Draw::Framebuffer *blitFBO = nullptr; if (useBlit) { blitFBO = GetTempFBO(w, h, Draw::FBO_8888); - draw_->BindFramebufferAsRenderTarget(blitFBO); + draw_->BindFramebufferAsRenderTarget(blitFBO, { Draw::RPAction::CLEAR, Draw::RPAction::DONT_CARE }); } else if (dstBuffer->fbo) { - draw_->BindFramebufferAsRenderTarget(dstBuffer->fbo); + draw_->BindFramebufferAsRenderTarget(dstBuffer->fbo, { Draw::RPAction::KEEP, Draw::RPAction::DONT_CARE }); } glViewport(0, 0, w, h); diff --git a/GPU/GLES/TextureCacheGLES.cpp b/GPU/GLES/TextureCacheGLES.cpp index 732f6e61de..2950324c88 100644 --- a/GPU/GLES/TextureCacheGLES.cpp +++ b/GPU/GLES/TextureCacheGLES.cpp @@ -483,7 +483,7 @@ void TextureCacheGLES::ApplyTextureFramebuffer(TexCacheEntry *entry, VirtualFram const GEPaletteFormat clutFormat = gstate.getClutPaletteFormat(); GLuint clutTexture = depalShaderCache_->GetClutTexture(clutFormat, clutHash_, clutBuf_); Draw::Framebuffer *depalFBO = framebufferManagerGL_->GetTempFBO(framebuffer->renderWidth, framebuffer->renderHeight, Draw::FBO_8888); - draw_->BindFramebufferAsRenderTarget(depalFBO); + draw_->BindFramebufferAsRenderTarget(depalFBO, { Draw::RPAction::DONT_CARE, Draw::RPAction::DONT_CARE }); shaderManager_->DirtyLastShader(); TextureShaderApplier shaderApply(depal, framebuffer->bufferWidth, framebuffer->bufferHeight, framebuffer->renderWidth, framebuffer->renderHeight); diff --git a/GPU/Software/SoftGpu.cpp b/GPU/Software/SoftGpu.cpp index 10d7ab7370..91fc88fb10 100644 --- a/GPU/Software/SoftGpu.cpp +++ b/GPU/Software/SoftGpu.cpp @@ -236,8 +236,7 @@ void SoftGPU::CopyToCurrentFboFromDisplayRam(int srcwidth, int srcheight) { if (GetGPUBackend() == GPUBackend::VULKAN) { std::swap(v0, v1); } - draw_->BindFramebufferAsRenderTarget(nullptr); - draw_->Clear(Draw::FB_COLOR_BIT, 0, 0, 0); + draw_->BindFramebufferAsRenderTarget(nullptr, { Draw::RPAction::CLEAR, Draw::RPAction::DONT_CARE }); Draw::SamplerState *sampler; if (g_Config.iBufFilter == SCALE_NEAREST) { diff --git a/GPU/Vulkan/FramebufferVulkan.cpp b/GPU/Vulkan/FramebufferVulkan.cpp index 26b7ee598a..307aa03179 100644 --- a/GPU/Vulkan/FramebufferVulkan.cpp +++ b/GPU/Vulkan/FramebufferVulkan.cpp @@ -480,9 +480,10 @@ void FramebufferManagerVulkan::BindPostShader(const PostShaderUniforms &uniforms void FramebufferManagerVulkan::RebindFramebuffer() { if (currentRenderVfb_ && currentRenderVfb_->fbo) { - draw_->BindFramebufferAsRenderTarget(currentRenderVfb_->fbo); + draw_->BindFramebufferAsRenderTarget(currentRenderVfb_->fbo, { Draw::RPAction::KEEP, Draw::RPAction::KEEP }); } else { - draw_->BindFramebufferAsRenderTarget(nullptr); + // Should this even happen? + draw_->BindFramebufferAsRenderTarget(nullptr, { Draw::RPAction::KEEP, Draw::RPAction::KEEP }); } } diff --git a/UI/EmuScreen.cpp b/UI/EmuScreen.cpp index 7e810c418a..16b992a71e 100644 --- a/UI/EmuScreen.cpp +++ b/UI/EmuScreen.cpp @@ -973,8 +973,7 @@ void EmuScreen::preRender() { if (!useBufferedRendering) { // We need to clear here already so that drawing during the frame is done on a clean slate. DrawContext *draw = screenManager()->getDrawContext(); - draw->BindFramebufferAsRenderTarget(nullptr); - draw->Clear(FBChannel::FB_COLOR_BIT | FBChannel::FB_DEPTH_BIT | FBChannel::FB_STENCIL_BIT, 0xFF000000, 0.0f, 0); + draw->BindFramebufferAsRenderTarget(nullptr, { RPAction::CLEAR, RPAction::CLEAR, 0xFF000000 }); Viewport viewport; viewport.TopLeftX = 0; diff --git a/ext/native/thin3d/thin3d.h b/ext/native/thin3d/thin3d.h index 2635e1296c..1e2c9d0ca0 100644 --- a/ext/native/thin3d/thin3d.h +++ b/ext/native/thin3d/thin3d.h @@ -562,6 +562,20 @@ struct TextureDesc { std::vector initData; }; +enum class RPAction { + DONT_CARE, + CLEAR, + KEEP, +}; + +struct RenderPassInfo { + RPAction color; + RPAction depth; + uint32_t clearColor; + float clearDepth; + uint8_t clearStencil; +}; + class DrawContext { public: virtual ~DrawContext(); @@ -601,7 +615,7 @@ public: // These functions should be self explanatory. // Binding a zero render target means binding the backbuffer. - virtual void BindFramebufferAsRenderTarget(Framebuffer *fbo) = 0; + virtual void BindFramebufferAsRenderTarget(Framebuffer *fbo, const RenderPassInfo &rp) = 0; // color must be 0, for now. virtual void BindFramebufferAsTexture(Framebuffer *fbo, int binding, FBChannel channelBit, int attachment) = 0; @@ -611,6 +625,9 @@ public: virtual void GetFramebufferDimensions(Framebuffer *fbo, int *w, int *h) = 0; + // Useful in OpenGL ES to give hints about framebuffers on tiler GPUs. + virtual void InvalidateFramebuffer(Framebuffer *fbo) {} + // Dynamic state virtual void SetScissorRect(int left, int top, int width, int height) = 0; virtual void SetViewports(int count, Viewport *viewports) = 0; diff --git a/ext/native/thin3d/thin3d_d3d11.cpp b/ext/native/thin3d/thin3d_d3d11.cpp index d4e0add435..85e44c9035 100644 --- a/ext/native/thin3d/thin3d_d3d11.cpp +++ b/ext/native/thin3d/thin3d_d3d11.cpp @@ -58,7 +58,7 @@ public: bool BlitFramebuffer(Framebuffer *src, int srcX1, int srcY1, int srcX2, int srcY2, Framebuffer *dst, int dstX1, int dstY1, int dstX2, int dstY2, int channelBits, FBBlitFilter filter) override; // These functions should be self explanatory. - void BindFramebufferAsRenderTarget(Framebuffer *fbo) override; + void BindFramebufferAsRenderTarget(Framebuffer *fbo, const RenderPassInfo &rp) override; // color must be 0, for now. void BindFramebufferAsTexture(Framebuffer *fbo, int binding, FBChannel channelBit, int attachment) override; void BindFramebufferForRead(Framebuffer *fbo) override; @@ -1286,7 +1286,7 @@ bool D3D11DrawContext::BlitFramebuffer(Framebuffer *srcfb, int srcX1, int srcY1, return false; } -void D3D11DrawContext::BindFramebufferAsRenderTarget(Framebuffer *fbo) { +void D3D11DrawContext::BindFramebufferAsRenderTarget(Framebuffer *fbo, const RenderPassInfo &rp) { // TODO: deviceContext1 can actually discard. Useful on Windows Mobile. if (fbo) { D3D11Framebuffer *fb = (D3D11Framebuffer *)fbo; @@ -1307,6 +1307,15 @@ void D3D11DrawContext::BindFramebufferAsRenderTarget(Framebuffer *fbo) { curRTWidth_ = bbWidth_; curRTHeight_ = bbHeight_; } + if (rp.color == RPAction::CLEAR && curRenderTargetView_) { + float cv[4]{}; + if (rp.clearColor) + Uint8x4ToFloat4(cv, rp.clearColor); + context_->ClearRenderTargetView(curRenderTargetView_, cv); + } + if (rp.depth == RPAction::CLEAR && curDepthStencilView_) { + context_->ClearDepthStencilView(curDepthStencilView_, D3D11_CLEAR_DEPTH | D3D11_CLEAR_STENCIL, rp.clearDepth, rp.clearStencil); + } } // color must be 0, for now. diff --git a/ext/native/thin3d/thin3d_d3d9.cpp b/ext/native/thin3d/thin3d_d3d9.cpp index 06c1fa6bc0..0a4e1938e4 100644 --- a/ext/native/thin3d/thin3d_d3d9.cpp +++ b/ext/native/thin3d/thin3d_d3d9.cpp @@ -487,7 +487,7 @@ public: bool BlitFramebuffer(Framebuffer *src, int srcX1, int srcY1, int srcX2, int srcY2, Framebuffer *dst, int dstX1, int dstY1, int dstX2, int dstY2, int channelBits, FBBlitFilter filter) override; // These functions should be self explanatory. - void BindFramebufferAsRenderTarget(Framebuffer *fbo) override; + void BindFramebufferAsRenderTarget(Framebuffer *fbo, const RenderPassInfo &rp) override; // color must be 0, for now. void BindFramebufferAsTexture(Framebuffer *fbo, int binding, FBChannel channelBit, int attachment) override; void BindFramebufferForRead(Framebuffer *fbo) override {} @@ -1043,9 +1043,8 @@ D3D9Framebuffer::~D3D9Framebuffer() { } } -void D3D9Context::BindFramebufferAsRenderTarget(Framebuffer *fbo) { +void D3D9Context::BindFramebufferAsRenderTarget(Framebuffer *fbo, const RenderPassInfo &rp) { using namespace DX9; - if (fbo) { D3D9Framebuffer *fb = (D3D9Framebuffer *)fbo; device_->SetRenderTarget(0, fb->surf); @@ -1054,6 +1053,20 @@ void D3D9Context::BindFramebufferAsRenderTarget(Framebuffer *fbo) { device_->SetRenderTarget(0, deviceRTsurf); device_->SetDepthStencilSurface(deviceDSsurf); } + + int clearFlags = 0; + if (rp.color == RPAction::CLEAR) { + clearFlags |= D3DCLEAR_TARGET; + } + if (rp.depth == RPAction::CLEAR) { + clearFlags |= D3DCLEAR_ZBUFFER | D3DCLEAR_STENCIL; + } + if (clearFlags) { + dxstate.scissorTest.force(false); + device_->Clear(0, nullptr, clearFlags, (D3DCOLOR)SwapRB(rp.clearColor), rp.clearDepth, rp.clearStencil); + dxstate.scissorRect.restore(); + } + dxstate.scissorRect.restore(); dxstate.viewport.restore(); } diff --git a/ext/native/thin3d/thin3d_gl.cpp b/ext/native/thin3d/thin3d_gl.cpp index 62c857846e..4c9164ab6b 100644 --- a/ext/native/thin3d/thin3d_gl.cpp +++ b/ext/native/thin3d/thin3d_gl.cpp @@ -465,7 +465,7 @@ public: bool BlitFramebuffer(Framebuffer *src, int srcX1, int srcY1, int srcX2, int srcY2, Framebuffer *dst, int dstX1, int dstY1, int dstX2, int dstY2, int channelBits, FBBlitFilter filter) override; // These functions should be self explanatory. - void BindFramebufferAsRenderTarget(Framebuffer *fbo) override; + void BindFramebufferAsRenderTarget(Framebuffer *fbo, const RenderPassInfo &rp) override; // color must be 0, for now. void BindFramebufferAsTexture(Framebuffer *fbo, int binding, FBChannel channelBit, int attachment) override; void BindFramebufferForRead(Framebuffer *fbo) override; @@ -1468,6 +1468,7 @@ Framebuffer *OpenGLContext::CreateFramebuffer(const FramebufferDesc &desc) { FLOG("Other framebuffer error: %i", status); break; } + // Unbind state we don't need glBindRenderbuffer(GL_RENDERBUFFER, 0); glBindTexture(GL_TEXTURE_2D, 0); @@ -1533,7 +1534,7 @@ void OpenGLContext::fbo_unbind() { currentReadHandle_ = 0; } -void OpenGLContext::BindFramebufferAsRenderTarget(Framebuffer *fbo) { +void OpenGLContext::BindFramebufferAsRenderTarget(Framebuffer *fbo, const RenderPassInfo &rp) { CHECK_GL_ERROR_IF_DEBUG(); if (fbo) { OpenGLFramebuffer *fb = (OpenGLFramebuffer *)fbo; @@ -1545,6 +1546,37 @@ void OpenGLContext::BindFramebufferAsRenderTarget(Framebuffer *fbo) { } else { fbo_unbind(); } + int clearFlags = 0; + if (rp.color == RPAction::CLEAR) { + float fc[4]{}; + if (rp.clearColor) { + Uint8x4ToFloat4(fc, rp.clearColor); + } + glClearColor(fc[0], fc[1], fc[2], fc[3]); + clearFlags |= GL_COLOR_BUFFER_BIT; + glstate.colorMask.force(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); + } + if (rp.depth == RPAction::CLEAR) { + glClearDepth(rp.clearDepth); + glClearStencil(rp.clearStencil); + clearFlags |= GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT; + glstate.depthWrite.force(GL_TRUE); + glstate.stencilFunc.force(GL_ALWAYS, 0, 0); + glstate.stencilMask.force(0xFF); + } + if (clearFlags) { + glstate.scissorTest.force(false); + glClear(clearFlags); + glstate.scissorTest.restore(); + } + if (rp.color == RPAction::CLEAR) { + glstate.colorMask.restore(); + } + if (rp.depth == RPAction::CLEAR) { + glstate.depthWrite.restore(); + glstate.stencilFunc.restore(); + glstate.stencilMask.restore(); + } CHECK_GL_ERROR_IF_DEBUG(); } diff --git a/ext/native/thin3d/thin3d_vulkan.cpp b/ext/native/thin3d/thin3d_vulkan.cpp index b2103485a8..6c7beea45a 100644 --- a/ext/native/thin3d/thin3d_vulkan.cpp +++ b/ext/native/thin3d/thin3d_vulkan.cpp @@ -346,7 +346,7 @@ public: bool BlitFramebuffer(Framebuffer *src, int srcX1, int srcY1, int srcX2, int srcY2, Framebuffer *dst, int dstX1, int dstY1, int dstX2, int dstY2, int channelBits, FBBlitFilter filter) override; // These functions should be self explanatory. - void BindFramebufferAsRenderTarget(Framebuffer *fbo) override; + void BindFramebufferAsRenderTarget(Framebuffer *fbo, const RenderPassInfo &rp) override; // color must be 0, for now. void BindFramebufferAsTexture(Framebuffer *fbo, int binding, FBChannel channelBit, int attachment) override; void BindFramebufferForRead(Framebuffer *fbo) override; @@ -1304,7 +1304,7 @@ bool VKContext::BlitFramebuffer(Framebuffer *srcfb, int srcX1, int srcY1, int sr } // These functions should be self explanatory. -void VKContext::BindFramebufferAsRenderTarget(Framebuffer *fbo) { +void VKContext::BindFramebufferAsRenderTarget(Framebuffer *fbo, const RenderPassInfo &rp) { if (fbo) { VKFramebuffer *fb = (VKFramebuffer *)fbo; } else { diff --git a/ext/native/ui/ui_screen.cpp b/ext/native/ui/ui_screen.cpp index 65633292b4..fd7fcf6783 100644 --- a/ext/native/ui/ui_screen.cpp +++ b/ext/native/ui/ui_screen.cpp @@ -61,14 +61,14 @@ void UIScreen::update() { } void UIScreen::preRender() { + using namespace Draw; Draw::DrawContext *draw = screenManager()->getDrawContext(); if (!draw) { return; } draw->BeginFrame(); - // Bind the back buffer - draw->BindFramebufferAsRenderTarget(nullptr); - draw->Clear(0xF, 0xFF000000, 0.0f, 0); + // Bind and clear the back buffer + draw->BindFramebufferAsRenderTarget(nullptr, { RPAction::CLEAR, RPAction::CLEAR, 0xFF000000 }); Draw::Viewport viewport; viewport.TopLeftX = 0; From a7dd6d6085043b785a3140cd5cac6c6d32c77671 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Tue, 16 May 2017 17:20:22 +0200 Subject: [PATCH 09/25] Further steps towards Vulkan framebuffer support --- Common/Vulkan/VulkanContext.h | 3 + GPU/Common/FramebufferCommon.cpp | 4 +- GPU/GPU.vcxproj | 3 +- GPU/GPU.vcxproj.filters | 6 +- GPU/Vulkan/DrawEngineVulkan.cpp | 1 + GPU/Vulkan/FramebufferVulkan.cpp | 79 +------- GPU/Vulkan/FramebufferVulkan.h | 10 +- ext/native/thin3d/thin3d.h | 1 + ext/native/thin3d/thin3d_vulkan.cpp | 276 ++++++++++++++++++++++++++-- pspautotests | 2 +- 10 files changed, 278 insertions(+), 107 deletions(-) diff --git a/Common/Vulkan/VulkanContext.h b/Common/Vulkan/VulkanContext.h index b1b5c77f17..c6f097ac45 100644 --- a/Common/Vulkan/VulkanContext.h +++ b/Common/Vulkan/VulkanContext.h @@ -258,6 +258,9 @@ public: VkCommandBuffer GetInitCommandBuffer(); + VkFramebuffer GetSurfaceFramebuffer() { + return framebuffers_[current_buffer]; + } // This must only be accessed between BeginSurfaceRenderPass and EndSurfaceRenderPass. VkCommandBuffer GetSurfaceCommandBuffer() { return frame_[curFrame_ & 1].cmdBuf; diff --git a/GPU/Common/FramebufferCommon.cpp b/GPU/Common/FramebufferCommon.cpp index 9a985234de..8e7b1d2225 100644 --- a/GPU/Common/FramebufferCommon.cpp +++ b/GPU/Common/FramebufferCommon.cpp @@ -825,7 +825,9 @@ void FramebufferManagerCommon::CopyDisplayToOutput() { if (displayFramebufPtr_ == 0) { DEBUG_LOG(FRAMEBUF, "Display disabled, displaying only black"); // No framebuffer to display! Clear to black. - draw_->BindFramebufferAsRenderTarget(nullptr, { Draw::RPAction::CLEAR, Draw::RPAction::CLEAR }); + if (useBufferedRendering_) { + draw_->BindFramebufferAsRenderTarget(nullptr, { Draw::RPAction::CLEAR, Draw::RPAction::CLEAR }); + } return; } diff --git a/GPU/GPU.vcxproj b/GPU/GPU.vcxproj index 3959984582..dce71c3d73 100644 --- a/GPU/GPU.vcxproj +++ b/GPU/GPU.vcxproj @@ -21,7 +21,8 @@ {457F45D2-556F-47BC-A31D-AFF0D15BEAED} GPU - + + diff --git a/GPU/GPU.vcxproj.filters b/GPU/GPU.vcxproj.filters index 9032efdb16..8d92e6f620 100644 --- a/GPU/GPU.vcxproj.filters +++ b/GPU/GPU.vcxproj.filters @@ -408,9 +408,6 @@ Vulkan - - Vulkan - Vulkan @@ -510,5 +507,8 @@ Common + + Vulkan + \ No newline at end of file diff --git a/GPU/Vulkan/DrawEngineVulkan.cpp b/GPU/Vulkan/DrawEngineVulkan.cpp index 22cf8b3514..929d4f91a8 100644 --- a/GPU/Vulkan/DrawEngineVulkan.cpp +++ b/GPU/Vulkan/DrawEngineVulkan.cpp @@ -883,6 +883,7 @@ void DrawEngineVulkan::DoFlush(VkCommandBuffer cmd) { // Note: we won't get here if the clear is alpha but not color, or color but not alpha. // We let the framebuffer manager handle the clear. It can use renderpasses to optimize on tilers. + // If non-buffered though, it'll just do a plain clear. framebufferManager_->NotifyClear(gstate.isClearModeColorMask(), gstate.isClearModeAlphaMask(), gstate.isClearModeDepthMask(), result.color, result.depth); int scissorX1 = gstate.getScissorX1(); diff --git a/GPU/Vulkan/FramebufferVulkan.cpp b/GPU/Vulkan/FramebufferVulkan.cpp index 307aa03179..cb77192698 100644 --- a/GPU/Vulkan/FramebufferVulkan.cpp +++ b/GPU/Vulkan/FramebufferVulkan.cpp @@ -52,8 +52,6 @@ #include "GPU/Vulkan/ShaderManagerVulkan.h" #include "GPU/Vulkan/VulkanUtil.h" -const VkFormat framebufFormat = VK_FORMAT_B8G8R8A8_UNORM; - static const char tex_fs[] = R"(#version 400 #extension GL_ARB_separate_shader_objects : enable #extension GL_ARB_shading_language_420pack : enable @@ -92,7 +90,8 @@ FramebufferManagerVulkan::FramebufferManagerVulkan(Draw::DrawContext *draw, Vulk pixelBufObj_(nullptr), currentPBO_(0), curFrame_(0), - pipelineBasicTex_(VK_NULL_HANDLE), + pipelineBasicTexBackBuffer_(VK_NULL_HANDLE), + pipelineBasicTexFrameBuffer_(VK_NULL_HANDLE), pipelinePostShader_(VK_NULL_HANDLE), vulkan2D_(vulkan) { @@ -117,67 +116,6 @@ void FramebufferManagerVulkan::SetShaderManager(ShaderManagerVulkan *sm) { } void FramebufferManagerVulkan::InitDeviceObjects() { - // Create a bunch of render pass objects, for normal rendering with a depth buffer, - // with and without pre-clearing of both depth/stencil and color, so 4 combos. - VkAttachmentDescription attachments[2] = {}; - attachments[0].format = framebufFormat; - attachments[0].samples = VK_SAMPLE_COUNT_1_BIT; - attachments[0].loadOp = VK_ATTACHMENT_LOAD_OP_LOAD; - attachments[0].storeOp = VK_ATTACHMENT_STORE_OP_STORE; - attachments[0].stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; - attachments[0].stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE; - attachments[0].initialLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; - attachments[0].finalLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; - attachments[0].flags = 0; - - attachments[1].format = vulkan_->GetDeviceInfo().preferredDepthStencilFormat; - attachments[1].samples = VK_SAMPLE_COUNT_1_BIT; - attachments[1].loadOp = VK_ATTACHMENT_LOAD_OP_LOAD; - attachments[1].storeOp = VK_ATTACHMENT_STORE_OP_STORE; - attachments[1].stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD; - attachments[1].stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE; - attachments[1].initialLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; - attachments[1].finalLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; - attachments[1].flags = 0; - - VkAttachmentReference color_reference = {}; - color_reference.attachment = 0; - color_reference.layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; - - VkAttachmentReference depth_reference = {}; - depth_reference.attachment = 1; - depth_reference.layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; - - VkSubpassDescription subpass = {}; - subpass.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS; - subpass.flags = 0; - subpass.inputAttachmentCount = 0; - subpass.pInputAttachments = NULL; - subpass.colorAttachmentCount = 1; - subpass.pColorAttachments = &color_reference; - subpass.pResolveAttachments = NULL; - subpass.pDepthStencilAttachment = &depth_reference; - subpass.preserveAttachmentCount = 0; - subpass.pPreserveAttachments = NULL; - - VkRenderPassCreateInfo rp = { VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO }; - rp.attachmentCount = 2; - rp.pAttachments = attachments; - rp.subpassCount = 1; - rp.pSubpasses = &subpass; - rp.dependencyCount = 0; - rp.pDependencies = NULL; - - // TODO: Maybe LOAD_OP_DONT_CARE makes sense in some situations. Additionally, - // there is often no need to store the depth buffer afterwards, although hard to know up front. - vkCreateRenderPass(vulkan_->GetDevice(), &rp, nullptr, &rpLoadColorLoadDepth_); - attachments[0].loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR; - vkCreateRenderPass(vulkan_->GetDevice(), &rp, nullptr, &rpClearColorLoadDepth_); - attachments[1].loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR; - vkCreateRenderPass(vulkan_->GetDevice(), &rp, nullptr, &rpClearColorClearDepth_); - attachments[0].loadOp = VK_ATTACHMENT_LOAD_OP_LOAD; - vkCreateRenderPass(vulkan_->GetDevice(), &rp, nullptr, &rpLoadColorClearDepth_); - // Initialize framedata for (int i = 0; i < 2; i++) { VkCommandPoolCreateInfo cp = { VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO }; @@ -196,7 +134,9 @@ void FramebufferManagerVulkan::InitDeviceObjects() { assert(fsBasicTex_ != VK_NULL_HANDLE); assert(vsBasicTex_ != VK_NULL_HANDLE); - pipelineBasicTex_ = vulkan2D_.GetPipeline(pipelineCache2D_, rpClearColorClearDepth_, vsBasicTex_, fsBasicTex_); + // Get a representative render pass and use when creating the pipeline. + pipelineBasicTexBackBuffer_ = vulkan2D_.GetPipeline(pipelineCache2D_, vulkan_->GetSurfaceRenderPass(), vsBasicTex_, fsBasicTex_); + pipelineBasicTexFrameBuffer_ = vulkan2D_.GetPipeline(pipelineCache2D_, (VkRenderPass)draw_->GetNativeObject(Draw::NativeObject::RENDERPASS), vsBasicTex_, fsBasicTex_); VkSamplerCreateInfo samp = { VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO }; samp.addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; @@ -213,15 +153,6 @@ void FramebufferManagerVulkan::InitDeviceObjects() { } void FramebufferManagerVulkan::DestroyDeviceObjects() { - if (rpLoadColorLoadDepth_ != VK_NULL_HANDLE) - vulkan_->Delete().QueueDeleteRenderPass(rpLoadColorLoadDepth_); - if (rpClearColorLoadDepth_ != VK_NULL_HANDLE) - vulkan_->Delete().QueueDeleteRenderPass(rpClearColorLoadDepth_); - if (rpClearColorClearDepth_ != VK_NULL_HANDLE) - vulkan_->Delete().QueueDeleteRenderPass(rpClearColorClearDepth_); - if (rpLoadColorClearDepth_ != VK_NULL_HANDLE) - vulkan_->Delete().QueueDeleteRenderPass(rpLoadColorClearDepth_); - for (int i = 0; i < 2; i++) { if (frameData_[i].numCommandBuffers_ > 0) { vkFreeCommandBuffers(vulkan_->GetDevice(), frameData_[i].cmdPool_, frameData_[i].numCommandBuffers_, frameData_[i].commandBuffers_); diff --git a/GPU/Vulkan/FramebufferVulkan.h b/GPU/Vulkan/FramebufferVulkan.h index 95fc9cd0aa..72ab08338f 100644 --- a/GPU/Vulkan/FramebufferVulkan.h +++ b/GPU/Vulkan/FramebufferVulkan.h @@ -188,18 +188,14 @@ private: // This gets copied to the current frame's push buffer as needed. PostShaderUniforms postUniforms_; - // Renderpasses, all combination of preserving or clearing fb contents - VkRenderPass rpLoadColorLoadDepth_; - VkRenderPass rpClearColorLoadDepth_; - VkRenderPass rpLoadColorClearDepth_; - VkRenderPass rpClearColorClearDepth_; - VkPipelineCache pipelineCache2D_; // Basic shaders VkShaderModule fsBasicTex_; VkShaderModule vsBasicTex_; - VkPipeline pipelineBasicTex_; + // Might need different pipelines for rendering to backbuffer vs framebuffers due to color format incompatibility + VkPipeline pipelineBasicTexBackBuffer_; + VkPipeline pipelineBasicTexFrameBuffer_; // Postprocessing VkPipeline pipelinePostShader_; diff --git a/ext/native/thin3d/thin3d.h b/ext/native/thin3d/thin3d.h index 1e2c9d0ca0..37cd4e40ba 100644 --- a/ext/native/thin3d/thin3d.h +++ b/ext/native/thin3d/thin3d.h @@ -321,6 +321,7 @@ enum class NativeObject { BACKBUFFER_COLOR_TEX, BACKBUFFER_DEPTH_TEX, FEATURE_LEVEL, + RENDERPASS, }; enum FBColorDepth { diff --git a/ext/native/thin3d/thin3d_vulkan.cpp b/ext/native/thin3d/thin3d_vulkan.cpp index 6c7beea45a..907110190c 100644 --- a/ext/native/thin3d/thin3d_vulkan.cpp +++ b/ext/native/thin3d/thin3d_vulkan.cpp @@ -411,7 +411,12 @@ public: std::vector GetFeatureList() const override; uintptr_t GetNativeObject(NativeObject obj) const override { - return 0; + switch (obj) { + case NativeObject::RENDERPASS: + return (uintptr_t)renderPasses_[0]; + default: + return 0; + } } void HandleEvent(Event ev, int width, int height, void *param1, void *param2) override; @@ -432,6 +437,13 @@ private: VkPipelineLayout pipelineLayout_; VkPipelineCache pipelineCache_; + inline int RPIndex(RPAction color, RPAction depth) { + return (int)depth * 3 + (int)color; + } + + // Renderpasses, all combination of preserving or clearing or dont-care-ing fb contents. + VkRenderPass renderPasses_[9]; + VkCommandPool cmdPool_; VkDevice device_; VkQueue queue_; @@ -464,6 +476,9 @@ private: VulkanPushBuffer *push_ = nullptr; DeviceCaps caps_{}; + + VkFramebuffer curFramebuffer_ = VK_NULL_HANDLE;; + VkRenderPass curRenderPass_ = VK_NULL_HANDLE; }; static int GetBpp(VkFormat format) { @@ -714,9 +729,79 @@ VKContext::VKContext(VulkanContext *vulkan) assert(VK_SUCCESS == res); pipelineCache_ = vulkan_->CreatePipelineCache(); + + // Create a bunch of render pass objects, for normal rendering with a depth buffer, + // with and without pre-clearing of both depth/stencil and color, so 4 combos. + VkAttachmentDescription attachments[2] = {}; + attachments[0].format = VK_FORMAT_R8G8B8A8_UNORM; + attachments[0].samples = VK_SAMPLE_COUNT_1_BIT; + attachments[0].loadOp = VK_ATTACHMENT_LOAD_OP_LOAD; + attachments[0].storeOp = VK_ATTACHMENT_STORE_OP_STORE; + attachments[0].stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; + attachments[0].stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE; + attachments[0].initialLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; + attachments[0].finalLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; + attachments[0].flags = 0; + + attachments[1].format = vulkan_->GetDeviceInfo().preferredDepthStencilFormat; + attachments[1].samples = VK_SAMPLE_COUNT_1_BIT; + attachments[1].loadOp = VK_ATTACHMENT_LOAD_OP_LOAD; + attachments[1].storeOp = VK_ATTACHMENT_STORE_OP_STORE; + attachments[1].stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD; + attachments[1].stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE; + attachments[1].initialLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; + attachments[1].finalLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; + attachments[1].flags = 0; + + VkAttachmentReference color_reference = {}; + color_reference.attachment = 0; + color_reference.layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; + + VkAttachmentReference depth_reference = {}; + depth_reference.attachment = 1; + depth_reference.layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; + + VkSubpassDescription subpass = {}; + subpass.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS; + subpass.flags = 0; + subpass.inputAttachmentCount = 0; + subpass.pInputAttachments = NULL; + subpass.colorAttachmentCount = 1; + subpass.pColorAttachments = &color_reference; + subpass.pResolveAttachments = NULL; + subpass.pDepthStencilAttachment = &depth_reference; + subpass.preserveAttachmentCount = 0; + subpass.pPreserveAttachments = NULL; + + VkRenderPassCreateInfo rp = { VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO }; + rp.attachmentCount = 2; + rp.pAttachments = attachments; + rp.subpassCount = 1; + rp.pSubpasses = &subpass; + rp.dependencyCount = 0; + rp.pDependencies = NULL; + + for (int depth = 0; depth < 3; depth++) { + switch ((RPAction)depth) { + case RPAction::CLEAR: attachments[1].loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR; break; + case RPAction::KEEP: attachments[1].loadOp = VK_ATTACHMENT_LOAD_OP_LOAD; break; + case RPAction::DONT_CARE: attachments[1].loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; break; + } + for (int color = 0; color < 3; color++) { + switch ((RPAction)color) { + case RPAction::CLEAR: attachments[1].loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR; break; + case RPAction::KEEP: attachments[1].loadOp = VK_ATTACHMENT_LOAD_OP_LOAD; break; + case RPAction::DONT_CARE: attachments[1].loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; break; + } + vkCreateRenderPass(vulkan_->GetDevice(), &rp, nullptr, &renderPasses_[RPIndex((RPAction)color, (RPAction)depth)]); + } + } } VKContext::~VKContext() { + for (int i = 0; i < 9; i++) { + vulkan_->Delete().QueueDeleteRenderPass(renderPasses_[i]); + } vulkan_->Delete().QueueDeleteCommandPool(cmdPool_); // This also destroys all descriptor sets. for (int i = 0; i < 2; i++) { @@ -748,26 +833,14 @@ void VKContext::BeginFrame() { scissor_.extent.height = pixel_yres; scissorDirty_ = true; viewportDirty_ = true; - - int colorval = 0xFF000000; - float depthVal = 0.0; - int stencilVal = 0; - - VkClearValue clearVal[2] = {}; - Uint8x4ToFloat4(colorval, clearVal[0].color.float32); - - // // Debug flicker - used to see if we swap at all. no longer necessary - // if (frameNum_ & 1) - // clearVal[0].color.float32[2] = 1.0f; - - clearVal[1].depthStencil.depth = depthVal; - clearVal[1].depthStencil.stencil = stencilVal; - - vulkan_->BeginSurfaceRenderPass(clearVal); } void VKContext::EndFrame() { - vulkan_->EndSurfaceRenderPass(); + if (curRenderPass_) { + vulkan_->EndSurfaceRenderPass(); + curRenderPass_ = VK_NULL_HANDLE; + curFramebuffer_ = VK_NULL_HANDLE; + } // Stop collecting data in the frame's data pushbuffer. push_->End(); @@ -1277,18 +1350,124 @@ uint32_t VKContext::GetDataFormatSupport(DataFormat fmt) const { } } +// Simple independent framebuffer image. Gets its own allocation, we don't have that many framebuffers so it's fine +// to let them have individual non-pooled allocations. +struct VKImage { + VkImage image; + VkImageView view; + VkDeviceMemory memory; + VkImageLayout layout; +}; + +void CreateImage(VulkanContext *vulkan, VKImage &img, int width, int height, VkFormat format, VkImageLayout initialLayout, bool color) { + VkImageCreateInfo ici{ VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO }; + ici.arrayLayers = 1; + ici.mipLevels = 1; + ici.extent.width = width; + ici.extent.height = height; + ici.extent.depth = 1; + ici.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; + ici.imageType = VK_IMAGE_TYPE_2D; + ici.samples = VK_SAMPLE_COUNT_1_BIT; + ici.tiling = VK_IMAGE_TILING_OPTIMAL; + ici.format = format; + ici.usage = VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT; + if (color) { + ici.usage |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; + } else { + ici.usage |= VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT; + } + vkCreateImage(vulkan->GetDevice(), &ici, nullptr, &img.image); + + VkMemoryRequirements memreq; + vkGetImageMemoryRequirements(vulkan->GetDevice(), img.image, &memreq); + + VkMemoryAllocateInfo alloc{ VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO }; + alloc.allocationSize = memreq.size; + vulkan->MemoryTypeFromProperties(memreq.memoryTypeBits, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, &alloc.memoryTypeIndex); + VkResult res = vkAllocateMemory(vulkan->GetDevice(), &alloc, nullptr, &img.memory); + assert(res); + res = vkBindImageMemory(vulkan->GetDevice(), img.image, img.memory, 0); + assert(res); + img.layout = initialLayout; + + VkImageViewCreateInfo ivci{ VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO }; + ivci.components = { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_B, VK_COMPONENT_SWIZZLE_A }; + ivci.format = ici.format; + ivci.image = img.image; + ivci.viewType = VK_IMAGE_VIEW_TYPE_2D; + ivci.subresourceRange.aspectMask = color ? VK_IMAGE_ASPECT_COLOR_BIT : (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT); + ivci.subresourceRange.layerCount = 1; + ivci.subresourceRange.levelCount = 1; + res = vkCreateImageView(vulkan->GetDevice(), &ivci, nullptr, &img.view); + assert(res); + + VkImageMemoryBarrier barrier{}; + barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; + barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; + barrier.subresourceRange.layerCount = 1; + barrier.subresourceRange.levelCount = 1; + barrier.image = img.image; + barrier.srcAccessMask = 0; + switch (initialLayout) { + case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL: + barrier.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + break; + case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL: + barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + break; + case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL: + barrier.dstAccessMask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; + break; + } + barrier.newLayout = initialLayout; + barrier.subresourceRange.aspectMask = ivci.subresourceRange.aspectMask; + vkCmdPipelineBarrier(vulkan->GetInitCommandBuffer(), VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, 0, 0, 0, 1, &barrier); +} + // A VKFramebuffer is a VkFramebuffer plus all the textures it owns. class VKFramebuffer : public Framebuffer { public: + VKFramebuffer(VulkanContext *vk) : vulkan_(vk) {} + ~VKFramebuffer() { + vulkan_->Delete().QueueDeleteImage(color.image); + vulkan_->Delete().QueueDeleteImage(depth.image); + vulkan_->Delete().QueueDeleteImageView(color.view); + vulkan_->Delete().QueueDeleteImageView(depth.view); + vulkan_->Delete().QueueDeleteDeviceMemory(color.memory); + vulkan_->Delete().QueueDeleteDeviceMemory(depth.memory); + vulkan_->Delete().QueueDeleteFramebuffer(framebuf); + } + VkFramebuffer framebuf; + VKImage color; + VKImage depth; int width; int height; +private: + VulkanContext *vulkan_; }; Framebuffer *VKContext::CreateFramebuffer(const FramebufferDesc &desc) { - VKFramebuffer *fb = new VKFramebuffer(); + VKFramebuffer *fb = new VKFramebuffer(vulkan_); fb->width = desc.width; fb->height = desc.height; + CreateImage(vulkan_, fb->color, fb->width, fb->height, VK_FORMAT_R8G8B8A8_UNORM, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, true); + CreateImage(vulkan_, fb->depth, fb->width, fb->height, vulkan_->GetDeviceInfo().preferredDepthStencilFormat, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, false); + + VkFramebufferCreateInfo fbci{ VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO }; + VkImageView views[2]{}; + + fbci.renderPass = renderPasses_[0]; + fbci.attachmentCount = 2; + fbci.pAttachments = views; + views[0] = fb->color.view; + views[1] = fb->depth.view; + fbci.width = fb->width; + fbci.height = fb->height; + fbci.layers = 1; + + vkCreateFramebuffer(vulkan_->GetDevice(), &fbci, nullptr, &fb->framebuf); return fb; } @@ -1305,11 +1484,67 @@ bool VKContext::BlitFramebuffer(Framebuffer *srcfb, int srcX1, int srcY1, int sr // These functions should be self explanatory. void VKContext::BindFramebufferAsRenderTarget(Framebuffer *fbo, const RenderPassInfo &rp) { + VkFramebuffer framebuf; + int w; + int h; if (fbo) { VKFramebuffer *fb = (VKFramebuffer *)fbo; + framebuf = fb->framebuf; + w = fb->width; + h = fb->height; } else { - + framebuf = vulkan_->GetSurfaceFramebuffer(); + w = vulkan_->GetWidth(); + h = vulkan_->GetHeight(); } + + VkCommandBuffer cmd = vulkan_->GetSurfaceCommandBuffer(); + if (framebuf == curFramebuffer_) { + // If we're asking to clear, but already bound, we'll just keep it bound but send a clear command. + // We will try to avoid this as much as possible. Also, TODO, do a single vkCmdClearAttachments to clear both. + if (rp.color == RPAction::CLEAR) { + VkClearAttachment clear{}; + clear.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + Uint8x4ToFloat4(rp.clearColor, clear.clearValue.color.float32); + clear.colorAttachment = 0; + VkClearRect rc{ {0,0,(uint32_t)w,(uint32_t)h}, 0, 1 }; + vkCmdClearAttachments(cmd, 1, &clear, 1, &rc); + } + if (rp.depth == RPAction::CLEAR) { + VkClearAttachment clear{}; + clear.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; + clear.clearValue.depthStencil.depth = rp.clearDepth; + clear.clearValue.depthStencil.stencil = rp.clearStencil; + clear.colorAttachment = 0; + VkClearRect rc{ { 0,0,w,h }, 0, 1 }; + vkCmdClearAttachments(cmd, 1, &clear, 1, &rc); + } + // We're done. + return; + } + + // OK, we're switching framebuffers. + if (curRenderPass_ != VK_NULL_HANDLE) { + vkCmdEndRenderPass(cmd); + } + + VkClearValue clearVal[2] = {}; + Uint8x4ToFloat4(rp.clearColor, clearVal[0].color.float32); + clearVal[1].depthStencil.depth = rp.clearDepth; + clearVal[1].depthStencil.stencil = rp.clearStencil; + + VkRenderPassBeginInfo rp_begin = { VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO }; + rp_begin.renderPass = fbo ? renderPasses_[RPIndex(rp.color, rp.depth)] : vulkan_->GetSurfaceRenderPass(); + rp_begin.framebuffer = framebuf; + rp_begin.renderArea.offset.x = 0; + rp_begin.renderArea.offset.y = 0; + rp_begin.renderArea.extent.width = w; + rp_begin.renderArea.extent.height = h; + rp_begin.clearValueCount = 2; + rp_begin.pClearValues = clearVal; + vkCmdBeginRenderPass(cmd, &rp_begin, VK_SUBPASS_CONTENTS_INLINE); + curFramebuffer_ = framebuf; + curRenderPass_ = rp_begin.renderPass; } // color must be 0, for now. @@ -1317,6 +1552,7 @@ void VKContext::BindFramebufferAsTexture(Framebuffer *fbo, int binding, FBChanne VKFramebuffer *fb = (VKFramebuffer *)fbo; } + void VKContext::BindFramebufferForRead(Framebuffer *fbo) { /* noop */ } uintptr_t VKContext::GetFramebufferAPITexture(Framebuffer *fbo, int channelBit, int attachment) { diff --git a/pspautotests b/pspautotests index e18cface3d..d02ba74070 160000 --- a/pspautotests +++ b/pspautotests @@ -1 +1 @@ -Subproject commit e18cface3db64ccb96738dc128fe769b28fff65c +Subproject commit d02ba7407050f445edf9e908374ad4bf3b2f237b From 273c266b781a66ccd5ee362bf700d4fa9b5af05e Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Tue, 16 May 2017 20:49:12 +0200 Subject: [PATCH 10/25] Silence a perf warning --- Windows/GPU/WindowsVulkanContext.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Windows/GPU/WindowsVulkanContext.cpp b/Windows/GPU/WindowsVulkanContext.cpp index 1d0dad0f52..6affd88f17 100644 --- a/Windows/GPU/WindowsVulkanContext.cpp +++ b/Windows/GPU/WindowsVulkanContext.cpp @@ -124,6 +124,9 @@ static VkBool32 VKAPI_CALL Vulkan_Dbg(VkDebugReportFlagsEXT msgFlags, VkDebugRep } message << "[" << pLayerPrefix << "] " << ObjTypeToString(objType) << " Code " << msgCode << " : " << pMsg << "\n"; + if (msgCode == 2) // Useless perf warning + return false; + // This seems like a bogus result when submitting two command buffers in one go, one creating the image, the other one using it. if (msgCode == 6 && startsWith(pMsg, "Cannot submit cmd buffer using image")) return false; From 32728553bdf80e03f895d653db678cfa43652f3f Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Wed, 17 May 2017 02:21:03 +0200 Subject: [PATCH 11/25] Further steps towards Vulkan framebuffer support --- GPU/Common/FramebufferCommon.cpp | 1 + GPU/Vulkan/TextureCacheVulkan.cpp | 3 +- UI/EmuScreen.cpp | 10 --- ext/native/thin3d/thin3d.h | 1 + ext/native/thin3d/thin3d_vulkan.cpp | 105 +++++++++++++++++++++++----- 5 files changed, 91 insertions(+), 29 deletions(-) diff --git a/GPU/Common/FramebufferCommon.cpp b/GPU/Common/FramebufferCommon.cpp index 8e7b1d2225..2dc44a425d 100644 --- a/GPU/Common/FramebufferCommon.cpp +++ b/GPU/Common/FramebufferCommon.cpp @@ -939,6 +939,7 @@ void FramebufferManagerCommon::CopyDisplayToOutput() { float v1 = (272.0f + offsetY) / (float)vfb->bufferHeight; if (!usePostShader_) { + draw_->TransitionForSampling(vfb->fbo); // Temporary vulkan hack draw_->BindFramebufferAsRenderTarget(nullptr, { Draw::RPAction::CLEAR, Draw::RPAction::CLEAR }); draw_->BindFramebufferAsTexture(vfb->fbo, 0, Draw::FB_COLOR_BIT, 0); bool linearFilter = g_Config.iBufFilter == SCALE_LINEAR; diff --git a/GPU/Vulkan/TextureCacheVulkan.cpp b/GPU/Vulkan/TextureCacheVulkan.cpp index fd59be053e..e908d3ca29 100644 --- a/GPU/Vulkan/TextureCacheVulkan.cpp +++ b/GPU/Vulkan/TextureCacheVulkan.cpp @@ -660,8 +660,9 @@ void TextureCacheVulkan::BuildTexture(TexCacheEntry *const entry, bool replaceIm LoadTextureLevel(*entry, (uint8_t *)data, stride, level, scaleFactor, dstFmt); entry->vkTex->texture_->UploadMip(0, mipWidth, mipHeight, texBuf, bufferOffset, stride / bpp); break; - } else + } else { LoadTextureLevel(*entry, (uint8_t *)data, stride, i, scaleFactor, dstFmt); + } if (replacer_.Enabled()) { replacer_.NotifyTextureDecoded(replacedInfo, data, stride, i, mipWidth, mipHeight); } diff --git a/UI/EmuScreen.cpp b/UI/EmuScreen.cpp index 16b992a71e..b65fef4099 100644 --- a/UI/EmuScreen.cpp +++ b/UI/EmuScreen.cpp @@ -169,16 +169,6 @@ void EmuScreen::bootGame(const std::string &filename) { break; case GPUBackend::VULKAN: coreParam.gpuCore = GPUCORE_VULKAN; - if (g_Config.iRenderingMode != FB_NON_BUFFERED_MODE && !g_Config.bSoftwareRendering) { -#ifdef _WIN32 - if (IDYES == MessageBox(MainWindow::GetHWND(), L"The Vulkan backend is not yet compatible with buffered rendering. Switch to non-buffered (WARNING: This will cause glitches with the other backends unless you switch back)", L"Vulkan Experimental Support", MB_ICONINFORMATION | MB_YESNO)) { - g_Config.iRenderingMode = FB_NON_BUFFERED_MODE; - } else { - errorMessage_ = "Non-buffered rendering required for Vulkan"; - return; - } -#endif - } break; #endif } diff --git a/ext/native/thin3d/thin3d.h b/ext/native/thin3d/thin3d.h index 37cd4e40ba..c3befbeff0 100644 --- a/ext/native/thin3d/thin3d.h +++ b/ext/native/thin3d/thin3d.h @@ -621,6 +621,7 @@ public: // color must be 0, for now. virtual void BindFramebufferAsTexture(Framebuffer *fbo, int binding, FBChannel channelBit, int attachment) = 0; virtual void BindFramebufferForRead(Framebuffer *fbo) = 0; + virtual void TransitionForSampling(Framebuffer *fbo) {} // Temporary vulkan hack virtual uintptr_t GetFramebufferAPITexture(Framebuffer *fbo, int channelBits, int attachment) = 0; diff --git a/ext/native/thin3d/thin3d_vulkan.cpp b/ext/native/thin3d/thin3d_vulkan.cpp index 907110190c..a1c303c46d 100644 --- a/ext/native/thin3d/thin3d_vulkan.cpp +++ b/ext/native/thin3d/thin3d_vulkan.cpp @@ -350,6 +350,7 @@ public: // color must be 0, for now. void BindFramebufferAsTexture(Framebuffer *fbo, int binding, FBChannel channelBit, int attachment) override; void BindFramebufferForRead(Framebuffer *fbo) override; + void TransitionForSampling(Framebuffer *fbo) override; uintptr_t GetFramebufferAPITexture(Framebuffer *fbo, int channelBit, int attachment) override; @@ -425,6 +426,8 @@ private: void ApplyDynamicState(); void DirtyDynamicState(); + void EndCurrentRenderpass(); + VulkanContext *vulkan_ = nullptr; VKPipeline *curPipeline_ = nullptr; @@ -444,7 +447,6 @@ private: // Renderpasses, all combination of preserving or clearing or dont-care-ing fb contents. VkRenderPass renderPasses_[9]; - VkCommandPool cmdPool_; VkDevice device_; VkQueue queue_; int queueFamilyIndex_; @@ -463,6 +465,7 @@ private: struct FrameData { VulkanPushBuffer *pushBuffer; + VkCommandPool cmdPool_; // Per-frame descriptor set cache. As it's per frame and reset every frame, we don't need to // worry about invalidating descriptors pointing to deleted textures. @@ -675,12 +678,6 @@ VKContext::VKContext(VulkanContext *vulkan) memset(boundTextures_, 0, sizeof(boundTextures_)); CreatePresets(); - VkCommandPoolCreateInfo p = { VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO }; - p.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT; - p.queueFamilyIndex = vulkan->GetGraphicsQueueFamilyIndex(); - VkResult res = vkCreateCommandPool(device_, &p, nullptr, &cmdPool_); - assert(VK_SUCCESS == res); - VkDescriptorPoolSize dpTypes[2]; dpTypes[0].descriptorCount = 200; dpTypes[0].type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC; @@ -692,13 +689,18 @@ VKContext::VKContext(VulkanContext *vulkan) dp.maxSets = 200; // 200 textures per frame should be enough for the UI... dp.pPoolSizes = dpTypes; dp.poolSizeCount = ARRAY_SIZE(dpTypes); - res = vkCreateDescriptorPool(device_, &dp, nullptr, &frame_[0].descriptorPool); - assert(VK_SUCCESS == res); - res = vkCreateDescriptorPool(device_, &dp, nullptr, &frame_[1].descriptorPool); - assert(VK_SUCCESS == res); - frame_[0].pushBuffer = new VulkanPushBuffer(vulkan_, 1024 * 1024); - frame_[1].pushBuffer = new VulkanPushBuffer(vulkan_, 1024 * 1024); + VkCommandPoolCreateInfo p = { VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO }; + p.flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT; + p.queueFamilyIndex = vulkan->GetGraphicsQueueFamilyIndex(); + + for (int i = 0; i < 2; i++) { + VkResult res = vkCreateDescriptorPool(device_, &dp, nullptr, &frame_[i].descriptorPool); + assert(VK_SUCCESS == res); + res = vkCreateCommandPool(device_, &p, nullptr, &frame_[i].cmdPool_); + assert(VK_SUCCESS == res); + frame_[i].pushBuffer = new VulkanPushBuffer(vulkan_, 1024 * 1024); + } // binding 0 - uniform data // binding 1 - combined sampler/image @@ -717,7 +719,7 @@ VKContext::VKContext(VulkanContext *vulkan) VkDescriptorSetLayoutCreateInfo dsl = { VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO }; dsl.bindingCount = 2; dsl.pBindings = bindings; - res = vkCreateDescriptorSetLayout(device_, &dsl, nullptr, &descriptorSetLayout_); + VkResult res = vkCreateDescriptorSetLayout(device_, &dsl, nullptr, &descriptorSetLayout_); assert(VK_SUCCESS == res); VkPipelineLayoutCreateInfo pl = { VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO }; @@ -802,13 +804,13 @@ VKContext::~VKContext() { for (int i = 0; i < 9; i++) { vulkan_->Delete().QueueDeleteRenderPass(renderPasses_[i]); } - vulkan_->Delete().QueueDeleteCommandPool(cmdPool_); // This also destroys all descriptor sets. for (int i = 0; i < 2; i++) { frame_[i].descSets_.clear(); vulkan_->Delete().QueueDeleteDescriptorPool(frame_[i].descriptorPool); frame_[i].pushBuffer->Destroy(vulkan_); delete frame_[i].pushBuffer; + vulkan_->Delete().QueueDeleteCommandPool(frame_[i].cmdPool_); } vulkan_->Delete().QueueDeleteDescriptorSetLayout(descriptorSetLayout_); vulkan_->Delete().QueueDeletePipelineLayout(pipelineLayout_); @@ -1259,6 +1261,10 @@ void VKContext::DrawUP(const void *vdata, int vertexCount) { } void VKContext::Clear(int mask, uint32_t colorval, float depthVal, int stencilVal) { + ELOG("Clear: Try to avoid calling this..."); + if (curRenderPass_) { + } + if (mask & FBChannel::FB_COLOR_BIT) { VkClearColorValue col; Uint8x4ToFloat4(colorval, col.float32); @@ -1482,16 +1488,26 @@ bool VKContext::BlitFramebuffer(Framebuffer *srcfb, int srcX1, int srcY1, int sr return true; } +void VKContext::EndCurrentRenderpass() { + if (curRenderPass_ != VK_NULL_HANDLE) { + vkCmdEndRenderPass(vulkan_->GetSurfaceCommandBuffer()); + curRenderPass_ = VK_NULL_HANDLE; + curFramebuffer_ = VK_NULL_HANDLE; + } +} + // These functions should be self explanatory. void VKContext::BindFramebufferAsRenderTarget(Framebuffer *fbo, const RenderPassInfo &rp) { VkFramebuffer framebuf; int w; int h; + VkImageLayout prevLayout; if (fbo) { VKFramebuffer *fb = (VKFramebuffer *)fbo; framebuf = fb->framebuf; w = fb->width; h = fb->height; + prevLayout = fb->color.layout; } else { framebuf = vulkan_->GetSurfaceFramebuffer(); w = vulkan_->GetWidth(); @@ -1524,8 +1540,29 @@ void VKContext::BindFramebufferAsRenderTarget(Framebuffer *fbo, const RenderPass } // OK, we're switching framebuffers. - if (curRenderPass_ != VK_NULL_HANDLE) { - vkCmdEndRenderPass(cmd); + EndCurrentRenderpass(); + + if (fbo) { + VKFramebuffer *fb = (VKFramebuffer *)fbo; + // Now, if the image needs transitioning, let's transition. + // The backbuffer does not, that's handled by VulkanContext. + VkImageMemoryBarrier barrier{}; + barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; + barrier.oldLayout = fb->color.layout; + barrier.subresourceRange.layerCount = 1; + barrier.subresourceRange.levelCount = 1; + barrier.image = fb->color.image; + barrier.srcAccessMask = 0; + switch (fb->color.layout) { + case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL: + barrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT; + break; + } + barrier.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + barrier.newLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; + barrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, 0, 0, 0, 1, &barrier); + fb->color.layout = barrier.newLayout; } VkClearValue clearVal[2] = {}; @@ -1547,6 +1584,36 @@ void VKContext::BindFramebufferAsRenderTarget(Framebuffer *fbo, const RenderPass curRenderPass_ = rp_begin.renderPass; } +// This will implicitly end the current render pass. Only call right before switching to a new one, like the +// backbuffer. +void VKContext::TransitionForSampling(Framebuffer *fbo) { + EndCurrentRenderpass(); + + VKFramebuffer *fb = (VKFramebuffer *)fbo; + if (fb->color.layout == VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL) + return; + + VkImageMemoryBarrier barrier{}; + barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; + barrier.oldLayout = fb->color.layout; + barrier.subresourceRange.layerCount = 1; + barrier.subresourceRange.levelCount = 1; + barrier.image = fb->color.image; + barrier.srcAccessMask = 0; + switch (barrier.oldLayout) { + case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL: + barrier.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + break; + case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL: + barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + break; + } + barrier.newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + barrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + // we're between passes so it's OK. + vkCmdPipelineBarrier(vulkan_->GetSurfaceCommandBuffer(), VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, 0, 0, 0, 1, &barrier); +} + // color must be 0, for now. void VKContext::BindFramebufferAsTexture(Framebuffer *fbo, int binding, FBChannel channelBit, int attachment) { VKFramebuffer *fb = (VKFramebuffer *)fbo; @@ -1556,7 +1623,9 @@ void VKContext::BindFramebufferAsTexture(Framebuffer *fbo, int binding, FBChanne void VKContext::BindFramebufferForRead(Framebuffer *fbo) { /* noop */ } uintptr_t VKContext::GetFramebufferAPITexture(Framebuffer *fbo, int channelBit, int attachment) { - return 0; + // TODO: Insert transition at the end of the previous command buffer, or the one that rendered to it last. + VKFramebuffer *fb = (VKFramebuffer *)fbo; + return (uintptr_t)fb->color.image; } void VKContext::GetFramebufferDimensions(Framebuffer *fbo, int *w, int *h) { From 2b93338255158459e9dbb83131ba750149f927a5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Fri, 19 May 2017 17:21:08 +0200 Subject: [PATCH 12/25] Vulkan backend: Fix various issues, can almost run in buffered now (except the final blit) --- GPU/Common/FramebufferCommon.cpp | 5 +- GPU/Vulkan/DrawEngineVulkan.cpp | 70 +++---- GPU/Vulkan/DrawEngineVulkan.h | 25 +-- GPU/Vulkan/FramebufferVulkan.cpp | 103 ++-------- GPU/Vulkan/FramebufferVulkan.h | 13 +- GPU/Vulkan/GPU_Vulkan.cpp | 12 +- GPU/Vulkan/GPU_Vulkan.h | 2 +- GPU/Vulkan/PipelineManagerVulkan.cpp | 5 +- GPU/Vulkan/PipelineManagerVulkan.h | 5 +- GPU/Vulkan/StateMappingVulkan.cpp | 15 +- ext/native/thin3d/thin3d.h | 4 +- ext/native/thin3d/thin3d_vulkan.cpp | 283 ++++++++++++++++++--------- 12 files changed, 288 insertions(+), 254 deletions(-) diff --git a/GPU/Common/FramebufferCommon.cpp b/GPU/Common/FramebufferCommon.cpp index 2dc44a425d..9cb831339b 100644 --- a/GPU/Common/FramebufferCommon.cpp +++ b/GPU/Common/FramebufferCommon.cpp @@ -819,7 +819,6 @@ void FramebufferManagerCommon::SetViewport2D(int x, int y, int w, int h) { void FramebufferManagerCommon::CopyDisplayToOutput() { DownloadFramebufferOnSwitch(currentRenderVfb_); - SetViewport2D(0, 0, pixelWidth_, pixelHeight_); currentRenderVfb_ = 0; if (displayFramebufPtr_ == 0) { @@ -894,6 +893,7 @@ void FramebufferManagerCommon::CopyDisplayToOutput() { draw_->BindFramebufferAsRenderTarget(nullptr, { Draw::RPAction::CLEAR, Draw::RPAction::CLEAR }); } // Just a pointer to plain memory to draw. We should create a framebuffer, then draw to it. + SetViewport2D(0, 0, pixelWidth_, pixelHeight_); DrawFramebufferToOutput(Memory::GetPointer(displayFramebufPtr_), displayFormat_, displayStride_, true); return; } @@ -939,9 +939,9 @@ void FramebufferManagerCommon::CopyDisplayToOutput() { float v1 = (272.0f + offsetY) / (float)vfb->bufferHeight; if (!usePostShader_) { - draw_->TransitionForSampling(vfb->fbo); // Temporary vulkan hack draw_->BindFramebufferAsRenderTarget(nullptr, { Draw::RPAction::CLEAR, Draw::RPAction::CLEAR }); draw_->BindFramebufferAsTexture(vfb->fbo, 0, Draw::FB_COLOR_BIT, 0); + SetViewport2D(0, 0, pixelWidth_, pixelHeight_); bool linearFilter = g_Config.iBufFilter == SCALE_LINEAR; // We are doing the DrawActiveTexture call directly to the backbuffer here. Hence, we must // flip V. @@ -975,6 +975,7 @@ void FramebufferManagerCommon::CopyDisplayToOutput() { DrawActiveTexture(0, 0, fbo_w, fbo_h, fbo_w, fbo_h, 0.0f, 0.0f, 1.0f, 1.0f, ROTATION_LOCKED_HORIZONTAL, linearFilter); draw_->BindFramebufferAsRenderTarget(nullptr, { Draw::RPAction::CLEAR, Draw::RPAction::CLEAR }); + SetViewport2D(0, 0, pixelWidth_, pixelHeight_); // Use the extra FBO, with applied post-processing shader, as a texture. // fbo_bind_as_texture(extraFBOs_[0], FB_COLOR_BIT, 0); diff --git a/GPU/Vulkan/DrawEngineVulkan.cpp b/GPU/Vulkan/DrawEngineVulkan.cpp index 929d4f91a8..ff134f9f1b 100644 --- a/GPU/Vulkan/DrawEngineVulkan.cpp +++ b/GPU/Vulkan/DrawEngineVulkan.cpp @@ -64,8 +64,9 @@ enum { TRANSFORMED_VERTEX_BUFFER_SIZE = VERTEX_BUFFER_MAX * sizeof(TransformedVertex) }; -DrawEngineVulkan::DrawEngineVulkan(VulkanContext *vulkan) +DrawEngineVulkan::DrawEngineVulkan(VulkanContext *vulkan, Draw::DrawContext *draw) : vulkan_(vulkan), + draw_(draw), prevPrim_(GE_PRIM_INVALID), lastVTypeID_(-1), numDrawCalls(0), @@ -336,7 +337,7 @@ inline void DrawEngineVulkan::SetupVertexDecoderInternal(u32 vertType) { void DrawEngineVulkan::SubmitPrim(void *verts, void *inds, GEPrimitiveType prim, int vertexCount, u32 vertType, int *bytesRead) { if (!indexGen.PrimCompatible(prevPrim_, prim) || numDrawCalls >= MAX_DEFERRED_DRAW_CALLS || vertexCountInDrawCalls + vertexCount > VERTEX_BUFFER_MAX) - Flush(cmd_); + Flush(); // TODO: Is this the right thing to do? if (prim == GE_PRIM_KEEP_PREVIOUS) { @@ -375,7 +376,7 @@ void DrawEngineVulkan::SubmitPrim(void *verts, void *inds, GEPrimitiveType prim, // Rendertarget == texture? if (!g_Config.bDisableSlowFramebufEffects) { gstate_c.Dirty(DIRTY_TEXTURE_PARAMS); - Flush(cmd_); + Flush(); } } } @@ -662,7 +663,8 @@ void DrawEngineVulkan::DirtyAllUBOs() { //} // The inline wrapper in the header checks for numDrawCalls == 0d -void DrawEngineVulkan::DoFlush(VkCommandBuffer cmd) { +void DrawEngineVulkan::DoFlush() { + VkCommandBuffer cmd = (VkCommandBuffer)draw_->GetNativeObject(Draw::NativeObject::RENDERPASS_COMMANDBUFFER); gpuStats.numFlushes++; FrameData *frame = &frame_[curFrame_ & 1]; @@ -689,6 +691,8 @@ void DrawEngineVulkan::DoFlush(VkCommandBuffer cmd) { uint32_t ibOffset = 0; uint32_t vbOffset = 0; + VkRenderPass renderPass = (VkRenderPass)draw_->GetNativeObject(Draw::NativeObject::COMPATIBLE_RENDERPASS); + if (useHWTransform) { // We don't detect clears in this path, so here we can switch framebuffers if necessary. @@ -724,28 +728,28 @@ void DrawEngineVulkan::DoFlush(VkCommandBuffer cmd) { ConvertStateToVulkanKey(*framebufferManager_, shaderManager_, prim, pipelineKey_, dynState_); // TODO: Dirty-flag these. - vkCmdSetScissor(cmd_, 0, 1, &dynState_.scissor); - vkCmdSetViewport(cmd_, 0, 1, &dynState_.viewport); + vkCmdSetScissor(cmd, 0, 1, &dynState_.scissor); + vkCmdSetViewport(cmd, 0, 1, &dynState_.viewport); if (dynState_.useStencil) { - vkCmdSetStencilWriteMask(cmd_, VK_STENCIL_FRONT_AND_BACK, dynState_.stencilWriteMask); - vkCmdSetStencilCompareMask(cmd_, VK_STENCIL_FRONT_AND_BACK, dynState_.stencilCompareMask); - vkCmdSetStencilReference(cmd_, VK_STENCIL_FRONT_AND_BACK, dynState_.stencilRef); + vkCmdSetStencilWriteMask(cmd, VK_STENCIL_FRONT_AND_BACK, dynState_.stencilWriteMask); + vkCmdSetStencilCompareMask(cmd, VK_STENCIL_FRONT_AND_BACK, dynState_.stencilCompareMask); + vkCmdSetStencilReference(cmd, VK_STENCIL_FRONT_AND_BACK, dynState_.stencilRef); } if (dynState_.useBlendColor) { float bc[4]; Uint8x4ToFloat4(bc, dynState_.blendColor); - vkCmdSetBlendConstants(cmd_, bc); + vkCmdSetBlendConstants(cmd, bc); } dirtyUniforms_ |= shaderManager_->UpdateUniforms(); shaderManager_->GetShaders(prim, lastVTypeID_, &vshader, &fshader, useHWTransform); - VulkanPipeline *pipeline = pipelineManager_->GetOrCreatePipeline(pipelineLayout_, pipelineKey_, dec_, vshader, fshader, true); + VulkanPipeline *pipeline = pipelineManager_->GetOrCreatePipeline(pipelineLayout_, renderPass, pipelineKey_, dec_, vshader, fshader, true); if (!pipeline) { // Already logged, let's bail out. return; } - vkCmdBindPipeline(cmd_, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline->pipeline); // TODO: Avoid if same as last draw. + vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline->pipeline); // TODO: Avoid if same as last draw. UpdateUBOs(frame); @@ -754,7 +758,7 @@ void DrawEngineVulkan::DoFlush(VkCommandBuffer cmd) { const uint32_t dynamicUBOOffsets[3] = { baseUBOOffset, lightUBOOffset, boneUBOOffset, }; - vkCmdBindDescriptorSets(cmd_, VK_PIPELINE_BIND_POINT_GRAPHICS, pipelineLayout_, 0, 1, &ds, 3, dynamicUBOOffsets); + vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, pipelineLayout_, 0, 1, &ds, 3, dynamicUBOOffsets); int stride = dec_->GetDecVtxFmt().stride; @@ -763,13 +767,13 @@ void DrawEngineVulkan::DoFlush(VkCommandBuffer cmd) { VkBuffer ibuf; ibOffset = (uint32_t)frame->pushIndex->Push(decIndex, 2 * indexGen.VertexCount(), &ibuf); // TODO: Avoid rebinding vertex/index buffers if the vertex size stays the same by using the offset arguments - vkCmdBindVertexBuffers(cmd_, 0, 1, &vbuf, offsets); - vkCmdBindIndexBuffer(cmd_, ibuf, ibOffset, VK_INDEX_TYPE_UINT16); + vkCmdBindVertexBuffers(cmd, 0, 1, &vbuf, offsets); + vkCmdBindIndexBuffer(cmd, ibuf, ibOffset, VK_INDEX_TYPE_UINT16); int numInstances = (gstate_c.bezier || gstate_c.spline) ? numPatches : 1; - vkCmdDrawIndexed(cmd_, vertexCount, numInstances, 0, 0, 0); + vkCmdDrawIndexed(cmd, vertexCount, numInstances, 0, 0, 0); } else { - vkCmdBindVertexBuffers(cmd_, 0, 1, &vbuf, offsets); - vkCmdDraw(cmd_, vertexCount, 1, 0, 0); + vkCmdBindVertexBuffers(cmd, 0, 1, &vbuf, offsets); + vkCmdDraw(cmd, vertexCount, 1, 0, 0); } } else { // Decode to "decoded" @@ -826,32 +830,32 @@ void DrawEngineVulkan::DoFlush(VkCommandBuffer cmd) { VulkanDynamicState dynState; ConvertStateToVulkanKey(*framebufferManager_, shaderManager_, prim, pipelineKey, dynState); // TODO: Dirty-flag these. - vkCmdSetScissor(cmd_, 0, 1, &dynState.scissor); - vkCmdSetViewport(cmd_, 0, 1, &dynState.viewport); + vkCmdSetScissor(cmd, 0, 1, &dynState.scissor); + vkCmdSetViewport(cmd, 0, 1, &dynState.viewport); if (dynState.useStencil) { - vkCmdSetStencilWriteMask(cmd_, VK_STENCIL_FRONT_AND_BACK, dynState.stencilWriteMask); - vkCmdSetStencilCompareMask(cmd_, VK_STENCIL_FRONT_AND_BACK, dynState.stencilCompareMask); + vkCmdSetStencilWriteMask(cmd, VK_STENCIL_FRONT_AND_BACK, dynState.stencilWriteMask); + vkCmdSetStencilCompareMask(cmd, VK_STENCIL_FRONT_AND_BACK, dynState.stencilCompareMask); } if (result.setStencil) { - vkCmdSetStencilReference(cmd_, VK_STENCIL_FRONT_AND_BACK, result.stencilValue); + vkCmdSetStencilReference(cmd, VK_STENCIL_FRONT_AND_BACK, result.stencilValue); } else if (dynState.useStencil) { - vkCmdSetStencilReference(cmd_, VK_STENCIL_FRONT_AND_BACK, dynState.stencilRef); + vkCmdSetStencilReference(cmd, VK_STENCIL_FRONT_AND_BACK, dynState.stencilRef); } if (dynState.useBlendColor) { float bc[4]; Uint8x4ToFloat4(bc, dynState.blendColor); - vkCmdSetBlendConstants(cmd_, bc); + vkCmdSetBlendConstants(cmd, bc); } dirtyUniforms_ |= shaderManager_->UpdateUniforms(); shaderManager_->GetShaders(prim, lastVTypeID_, &vshader, &fshader, useHWTransform); - VulkanPipeline *pipeline = pipelineManager_->GetOrCreatePipeline(pipelineLayout_, pipelineKey, dec_, vshader, fshader, false); + VulkanPipeline *pipeline = pipelineManager_->GetOrCreatePipeline(pipelineLayout_, renderPass, pipelineKey, dec_, vshader, fshader, false); if (!pipeline) { // Already logged, let's bail out. return; } - vkCmdBindPipeline(cmd_, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline->pipeline); // TODO: Avoid if same as last draw. + vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline->pipeline); // TODO: Avoid if same as last draw. // Even if the first draw is through-mode, make sure we at least have one copy of these uniforms buffered UpdateUBOs(frame); @@ -860,7 +864,7 @@ void DrawEngineVulkan::DoFlush(VkCommandBuffer cmd) { const uint32_t dynamicUBOOffsets[3] = { baseUBOOffset, lightUBOOffset, boneUBOOffset, }; - vkCmdBindDescriptorSets(cmd_, VK_PIPELINE_BIND_POINT_GRAPHICS, pipelineLayout_, 0, 1, &ds, 3, dynamicUBOOffsets); + vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, pipelineLayout_, 0, 1, &ds, 3, dynamicUBOOffsets); if (drawIndexed) { VkBuffer vbuf, ibuf; @@ -868,16 +872,16 @@ void DrawEngineVulkan::DoFlush(VkCommandBuffer cmd) { ibOffset = (uint32_t)frame->pushIndex->Push(inds, sizeof(short) * numTrans, &ibuf); VkDeviceSize offsets[1] = { vbOffset }; // TODO: Avoid rebinding if the vertex size stays the same by using the offset arguments - vkCmdBindVertexBuffers(cmd_, 0, 1, &vbuf, offsets); - vkCmdBindIndexBuffer(cmd_, ibuf, ibOffset, VK_INDEX_TYPE_UINT16); - vkCmdDrawIndexed(cmd_, numTrans, 1, 0, 0, 0); + vkCmdBindVertexBuffers(cmd, 0, 1, &vbuf, offsets); + vkCmdBindIndexBuffer(cmd, ibuf, ibOffset, VK_INDEX_TYPE_UINT16); + vkCmdDrawIndexed(cmd, numTrans, 1, 0, 0, 0); } else { VkBuffer vbuf; vbOffset = (uint32_t)frame->pushVertex->Push(drawBuffer, numTrans * sizeof(TransformedVertex), &vbuf); VkDeviceSize offsets[1] = { vbOffset }; // TODO: Avoid rebinding if the vertex size stays the same by using the offset arguments - vkCmdBindVertexBuffers(cmd_, 0, 1, &vbuf, offsets); - vkCmdDraw(cmd_, numTrans, 1, 0, 0); + vkCmdBindVertexBuffers(cmd, 0, 1, &vbuf, offsets); + vkCmdDraw(cmd, numTrans, 1, 0, 0); } } else if (result.action == SW_CLEAR) { // Note: we won't get here if the clear is alpha but not color, or color but not alpha. diff --git a/GPU/Vulkan/DrawEngineVulkan.h b/GPU/Vulkan/DrawEngineVulkan.h index 1317b79a8f..6fbea4030d 100644 --- a/GPU/Vulkan/DrawEngineVulkan.h +++ b/GPU/Vulkan/DrawEngineVulkan.h @@ -70,7 +70,7 @@ struct DrawEngineVulkanStats { // Handles transform, lighting and drawing. class DrawEngineVulkan : public DrawEngineCommon { public: - DrawEngineVulkan(VulkanContext *vulkan); + DrawEngineVulkan(VulkanContext *vulkan, Draw::DrawContext *draw); virtual ~DrawEngineVulkan(); void SubmitPrim(void *verts, void *inds, GEPrimitiveType prim, int vertexCount, u32 vertType, int *bytesRead); @@ -95,23 +95,19 @@ public: void SetupVertexDecoderInternal(u32 vertType); // So that this can be inlined - void Flush(VkCommandBuffer cmd) { + void Flush() { if (!numDrawCalls) return; - DoFlush(cmd); + DoFlush(); } bool IsCodePtrVertexDecoder(const u8 *ptr) const; - void DispatchFlush() override { Flush(cmd_); } + void DispatchFlush() override { Flush(); } void DispatchSubmitPrim(void *verts, void *inds, GEPrimitiveType prim, int vertexCount, u32 vertType, int *bytesRead) override { SubmitPrim(verts, inds, prim, vertexCount, vertType, bytesRead); } - void SetCmdBuffer(VkCommandBuffer cmd) { - cmd_ = cmd; - } - VkPipelineLayout GetPipelineLayout() const { return pipelineLayout_; } @@ -140,12 +136,13 @@ private: void DecodeVerts(VulkanPushBuffer *push, uint32_t *bindOffset, VkBuffer *vkbuf); void DecodeVertsStep(u8 *dest, int &i, int &decodedVerts); - void DoFlush(VkCommandBuffer cmd); + void DoFlush(); void UpdateUBOs(FrameData *frame); VkDescriptorSet GetDescriptorSet(VkImageView imageView, VkSampler sampler, VkBuffer base, VkBuffer light, VkBuffer bone); VulkanContext *vulkan_; + Draw::DrawContext *draw_; // We use a single descriptor set layout for all PSP draws. VkDescriptorSetLayout descriptorSetLayout_; @@ -196,20 +193,14 @@ private: u16 indexUpperBound; }; - // This is always set to the current main command buffer of the VulkanContext. - // In the future, we may support flushing mid-frame and more fine grained command buffer usage, - // but for now, let's just submit a whole frame at a time. This is not compatible with some games - // that do mid frame read-backs. - VkCommandBuffer cmd_; - // Vertex collector state IndexGenerator indexGen; GEPrimitiveType prevPrim_; u32 lastVTypeID_; - TransformedVertex *transformed; - TransformedVertex *transformedExpanded; + TransformedVertex *transformed = nullptr; + TransformedVertex *transformedExpanded = nullptr; // Other ShaderManagerVulkan *shaderManager_ = nullptr; diff --git a/GPU/Vulkan/FramebufferVulkan.cpp b/GPU/Vulkan/FramebufferVulkan.cpp index cb77192698..e6ff816958 100644 --- a/GPU/Vulkan/FramebufferVulkan.cpp +++ b/GPU/Vulkan/FramebufferVulkan.cpp @@ -118,11 +118,6 @@ void FramebufferManagerVulkan::SetShaderManager(ShaderManagerVulkan *sm) { void FramebufferManagerVulkan::InitDeviceObjects() { // Initialize framedata for (int i = 0; i < 2; i++) { - VkCommandPoolCreateInfo cp = { VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO }; - cp.flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT; - cp.queueFamilyIndex = vulkan_->GetGraphicsQueueFamilyIndex(); - VkResult res = vkCreateCommandPool(vulkan_->GetDevice(), &cp, nullptr, &frameData_[i].cmdPool_); - assert(res == VK_SUCCESS); frameData_[i].push_ = new VulkanPushBuffer(vulkan_, 64 * 1024); } @@ -136,7 +131,7 @@ void FramebufferManagerVulkan::InitDeviceObjects() { // Get a representative render pass and use when creating the pipeline. pipelineBasicTexBackBuffer_ = vulkan2D_.GetPipeline(pipelineCache2D_, vulkan_->GetSurfaceRenderPass(), vsBasicTex_, fsBasicTex_); - pipelineBasicTexFrameBuffer_ = vulkan2D_.GetPipeline(pipelineCache2D_, (VkRenderPass)draw_->GetNativeObject(Draw::NativeObject::RENDERPASS), vsBasicTex_, fsBasicTex_); + pipelineBasicTexFrameBuffer_ = vulkan2D_.GetPipeline(pipelineCache2D_, (VkRenderPass)draw_->GetNativeObject(Draw::NativeObject::COMPATIBLE_RENDERPASS), vsBasicTex_, fsBasicTex_); VkSamplerCreateInfo samp = { VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO }; samp.addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; @@ -154,15 +149,6 @@ void FramebufferManagerVulkan::InitDeviceObjects() { void FramebufferManagerVulkan::DestroyDeviceObjects() { for (int i = 0; i < 2; i++) { - if (frameData_[i].numCommandBuffers_ > 0) { - vkFreeCommandBuffers(vulkan_->GetDevice(), frameData_[i].cmdPool_, frameData_[i].numCommandBuffers_, frameData_[i].commandBuffers_); - frameData_[i].numCommandBuffers_ = 0; - frameData_[i].totalCommandBuffers_ = 0; - } - if (frameData_[i].cmdPool_ != VK_NULL_HANDLE) { - vkDestroyCommandPool(vulkan_->GetDevice(), frameData_[i].cmdPool_, nullptr); - frameData_[i].cmdPool_ = VK_NULL_HANDLE; - } if (frameData_[i].push_) { frameData_[i].push_->Destroy(vulkan_); delete frameData_[i].push_; @@ -191,37 +177,15 @@ void FramebufferManagerVulkan::NotifyClear(bool clearColor, bool clearAlpha, boo float x, y, w, h; CenterDisplayOutputRect(&x, &y, &w, &h, 480.0f, 272.0f, (float)pixelWidth_, (float)pixelHeight_, ROTATION_LOCKED_HORIZONTAL); - VkClearValue colorValue, depthValue; - Uint8x4ToFloat4(colorValue.color.float32, color); - depthValue.depthStencil.depth = depth; - depthValue.depthStencil.stencil = (color >> 24) & 0xFF; - - VkClearRect rect; - rect.baseArrayLayer = 0; - rect.layerCount = 1; - rect.rect.offset.x = x; - rect.rect.offset.y = y; - rect.rect.extent.width = w; - rect.rect.extent.height = h; - - int count = 0; - VkClearAttachment attach[2]; - // The Clear detection takes care of doing a regular draw instead if separate masking - // of color and alpha is needed, so we can just treat them as the same. - if (clearColor || clearAlpha) { - attach[count].aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; - attach[count].clearValue = colorValue; - attach[count].colorAttachment = 0; - count++; - } - if (clearDepth) { - attach[count].aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; - attach[count].clearValue = depthValue; - attach[count].colorAttachment = 0; - count++; - } - vkCmdClearAttachments(curCmd_, count, attach, 1, &rect); + int mask = 0; + if (clearColor || clearAlpha) + mask |= Draw::FBChannel::FB_COLOR_BIT; + if (clearDepth) + mask |= Draw::FBChannel::FB_DEPTH_BIT; + if (clearAlpha) + mask |= Draw::FBChannel::FB_STENCIL_BIT; + draw_->Clear(mask, color, depth, 0); if (clearColor || clearAlpha) { SetColorUpdated(gstate_c.skipDrawReason); } @@ -344,7 +308,9 @@ void FramebufferManagerVulkan::SetViewport2D(int x, int y, int w, int h) { vp.y = (float)y; vp.width = (float)w; vp.height = (float)h; - vkCmdSetViewport(curCmd_, 0, 1, &vp); + + VkCommandBuffer cmd = (VkCommandBuffer)draw_->GetNativeObject(Draw::NativeObject::RENDERPASS_COMMANDBUFFER); + vkCmdSetViewport(cmd, 0, 1, &vp); } void FramebufferManagerVulkan::DrawActiveTexture(float x, float y, float w, float h, float destW, float destH, float u0, float v0, float u1, float v1, int uvRotation, bool linearFilter) { @@ -390,7 +356,7 @@ void FramebufferManagerVulkan::DrawTexture(VulkanTexture *texture, float x, floa VulkanPushBuffer *push = frameData_[curFrame_].push_; - VkCommandBuffer cmd = curCmd_; + VkCommandBuffer cmd = (VkCommandBuffer)draw_->GetNativeObject(Draw::NativeObject::RENDERPASS_COMMANDBUFFER); // TODO: Choose linear or nearest appropriately, see GL impl. vulkan2D_.BindDescriptorSet(cmd, texture->GetImageView(), linearSampler_); @@ -932,39 +898,26 @@ void FramebufferManagerVulkan::PackFramebufferSync_(VirtualFramebuffer *vfb, int } -VkCommandBuffer FramebufferManagerVulkan::AllocFrameCommandBuffer() { - FrameData &frame = frameData_[curFrame_]; - int num = frame.numCommandBuffers_; - if (!frame.commandBuffers_[num]) { - VkCommandBufferAllocateInfo cmd = { VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO }; - cmd.commandBufferCount = 1; - cmd.commandPool = frame.cmdPool_; - cmd.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY; - vkAllocateCommandBuffers(vulkan_->GetDevice(), &cmd, &frame.commandBuffers_[num]); - frame.totalCommandBuffers_ = num + 1; - } - return frame.commandBuffers_[num]; -} - void FramebufferManagerVulkan::BeginFrameVulkan() { BeginFrame(); vulkan2D_.BeginFrame(); FrameData &frame = frameData_[curFrame_]; - vkResetCommandPool(vulkan_->GetDevice(), frame.cmdPool_, 0); - frame.numCommandBuffers_ = 0; frame.push_->Reset(); frame.push_->Begin(vulkan_); if (!useBufferedRendering_) { + // TODO: This hackery should not be necessary. Is it? Need to check. // We only use a single command buffer in this case. - curCmd_ = vulkan_->GetSurfaceCommandBuffer(); + VkCommandBuffer cmd = vulkan_->GetSurfaceCommandBuffer(); VkRect2D scissor; scissor.offset = { 0, 0 }; scissor.extent = { (uint32_t)pixelWidth_, (uint32_t)pixelHeight_ }; - vkCmdSetScissor(curCmd_, 0, 1, &scissor); + vkCmdSetScissor(cmd, 0, 1, &scissor); + } else { + // Each render pass will set up scissor again. } } @@ -1043,7 +996,7 @@ void FramebufferManagerVulkan::FlushBeforeCopy() { // all the irrelevant state checking it'll use to decide what to do. Should // do something more focused here. SetRenderFrameBuffer(gstate_c.IsDirty(DIRTY_FRAMEBUF), gstate_c.skipDrawReason); - drawEngine_->Flush(curCmd_); + drawEngine_->Flush(); } void FramebufferManagerVulkan::Resized() { @@ -1143,22 +1096,6 @@ bool FramebufferManagerVulkan::GetStencilbuffer(u32 fb_address, int fb_stride, G return false; } - void FramebufferManagerVulkan::ClearBuffer(bool keepState) { - // keepState is irrelevant. - if (!currentRenderVfb_) { - return; - } - VkClearAttachment clear[2]; - memset(clear, 0, sizeof(clear)); - clear[0].aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; - clear[1].aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; - VkClearRect rc; - rc.baseArrayLayer = 0; - rc.layerCount = 1; - rc.rect.offset.x = 0; - rc.rect.offset.y = 0; - rc.rect.extent.width = currentRenderVfb_->bufferWidth; - rc.rect.extent.height = currentRenderVfb_->bufferHeight; - vkCmdClearAttachments(curCmd_, 2, clear, 1, &rc); + // TODO: Ideally, this should never be called. } diff --git a/GPU/Vulkan/FramebufferVulkan.h b/GPU/Vulkan/FramebufferVulkan.h index 72ab08338f..d7ea385d49 100644 --- a/GPU/Vulkan/FramebufferVulkan.h +++ b/GPU/Vulkan/FramebufferVulkan.h @@ -138,7 +138,6 @@ private: void MakePixelTexture(const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, int width, int height, float &u1, float &v1) override; void DoNotifyDraw(); - VkCommandBuffer AllocFrameCommandBuffer(); void UpdatePostShaderUniforms(int bufferWidth, int bufferHeight, int renderWidth, int renderHeight); void PackFramebufferAsync_(VirtualFramebuffer *vfb); @@ -149,11 +148,7 @@ private: VulkanContext *vulkan_; - // The command buffer of the current framebuffer pass being rendered to. - // One framebuffer can be used as a texturing source at multiple times in a frame, - // but then the contents have to be copied out into a new texture every time. - VkCommandBuffer curCmd_ = VK_NULL_HANDLE; - VkCommandBuffer cmdInit_ = VK_NULL_HANDLE; + // Used to keep track of command buffers here but have moved all that into Thin3D. // Used by DrawPixels VulkanTexture *drawPixelsTex_; @@ -173,13 +168,9 @@ private: MAX_COMMAND_BUFFERS = 32, }; + // Commandbuffers are handled internally in thin3d, one for each framebuffer pass. struct FrameData { - VkCommandPool cmdPool_; - // Keep track of command buffers we allocated so we can reset or free them at an appropriate point. - VkCommandBuffer commandBuffers_[MAX_COMMAND_BUFFERS]; VulkanPushBuffer *push_; - int numCommandBuffers_; - int totalCommandBuffers_; }; FrameData frameData_[2]; diff --git a/GPU/Vulkan/GPU_Vulkan.cpp b/GPU/Vulkan/GPU_Vulkan.cpp index b754b1720d..54fbb57f51 100644 --- a/GPU/Vulkan/GPU_Vulkan.cpp +++ b/GPU/Vulkan/GPU_Vulkan.cpp @@ -78,7 +78,7 @@ static const VulkanCommandTableEntry commandTable[] = { GPU_Vulkan::GPU_Vulkan(GraphicsContext *gfxCtx, Draw::DrawContext *draw) : GPUCommon(gfxCtx, draw), vulkan_((VulkanContext *)gfxCtx->GetAPIContext()), - drawEngine_(vulkan_) { + drawEngine_(vulkan_, draw) { UpdateVsyncInterval(true); CheckGPUFeatures(); @@ -197,10 +197,6 @@ void GPU_Vulkan::CheckGPUFeatures() { } void GPU_Vulkan::BeginHostFrame() { - if (g_Config.iRenderingMode == FB_NON_BUFFERED_MODE) { - // Draw everything directly to the backbuffer. - drawEngine_.SetCmdBuffer(vulkan_->GetSurfaceCommandBuffer()); - } drawEngine_.BeginFrame(); if (resized_) { @@ -396,7 +392,7 @@ bool GPU_Vulkan::FramebufferReallyDirty() { void GPU_Vulkan::CopyDisplayToOutputInternal() { // Flush anything left over. - drawEngine_.Flush(curCmd_); + drawEngine_.Flush(); shaderManagerVulkan_->DirtyLastShader(); @@ -419,7 +415,7 @@ void GPU_Vulkan::FastRunLoop(DisplayList &list) { const u32 diff = op ^ gstate.cmdmem[cmd]; // Inlined CheckFlushOp here to get rid of the dumpThisFrame_ check. if ((cmdFlags & FLAG_FLUSHBEFORE) || (diff && (cmdFlags & FLAG_FLUSHBEFOREONCHANGE))) { - drawEngine_.Flush(curCmd_); + drawEngine_.Flush(); } gstate.cmdmem[cmd] = op; // TODO: no need to write if diff==0... if ((cmdFlags & FLAG_EXECUTE) || (diff && (cmdFlags & FLAG_EXECUTEONCHANGE))) { @@ -445,7 +441,7 @@ inline void GPU_Vulkan::CheckFlushOp(int cmd, u32 diff) { if (dumpThisFrame_) { NOTICE_LOG(G3D, "================ FLUSH ================"); } - drawEngine_.Flush(curCmd_); + drawEngine_.Flush(); } } diff --git a/GPU/Vulkan/GPU_Vulkan.h b/GPU/Vulkan/GPU_Vulkan.h index e60c5ea034..7cdb665054 100644 --- a/GPU/Vulkan/GPU_Vulkan.h +++ b/GPU/Vulkan/GPU_Vulkan.h @@ -96,7 +96,7 @@ protected: private: void Flush() { - drawEngine_.Flush(nullptr); + drawEngine_.Flush(); } void CheckFlushOp(int cmd, u32 diff); void BuildReportingInfo(); diff --git a/GPU/Vulkan/PipelineManagerVulkan.cpp b/GPU/Vulkan/PipelineManagerVulkan.cpp index 3aa802cb85..ec66be9d30 100644 --- a/GPU/Vulkan/PipelineManagerVulkan.cpp +++ b/GPU/Vulkan/PipelineManagerVulkan.cpp @@ -292,9 +292,10 @@ static VulkanPipeline *CreateVulkanPipeline(VkDevice device, VkPipelineCache pip return vulkanPipeline; } -VulkanPipeline *PipelineManagerVulkan::GetOrCreatePipeline(VkPipelineLayout layout, const VulkanPipelineRasterStateKey &rasterKey, const VertexDecoder *vtxDec, VulkanVertexShader *vs, VulkanFragmentShader *fs, bool useHwTransform) { +VulkanPipeline *PipelineManagerVulkan::GetOrCreatePipeline(VkPipelineLayout layout, VkRenderPass renderPass, const VulkanPipelineRasterStateKey &rasterKey, const VertexDecoder *vtxDec, VulkanVertexShader *vs, VulkanFragmentShader *fs, bool useHwTransform) { VulkanPipelineKey key; key.raster = rasterKey; + key.renderPass = renderPass; key.useHWTransform = useHwTransform; key.vShader = vs->GetModule(); key.fShader = fs->GetModule(); @@ -305,7 +306,7 @@ VulkanPipeline *PipelineManagerVulkan::GetOrCreatePipeline(VkPipelineLayout layo } VulkanPipeline *pipeline = CreateVulkanPipeline( - vulkan_->GetDevice(), pipelineCache_, layout, vulkan_->GetSurfaceRenderPass(), + vulkan_->GetDevice(), pipelineCache_, layout, renderPass, rasterKey, vtxDec, vs, fs, useHwTransform); pipelines_[key] = pipeline; return pipeline; diff --git a/GPU/Vulkan/PipelineManagerVulkan.h b/GPU/Vulkan/PipelineManagerVulkan.h index 9f6e15ef76..59df04a911 100644 --- a/GPU/Vulkan/PipelineManagerVulkan.h +++ b/GPU/Vulkan/PipelineManagerVulkan.h @@ -39,13 +39,16 @@ enum class PspAttributeLocation { struct VulkanPipelineKey { VulkanPipelineRasterStateKey raster; // prim is included here + VkRenderPass renderPass; bool useHWTransform; const VertexDecoder *vtxDec; VkShaderModule vShader; VkShaderModule fShader; + // TODO: Probably better to use a hash function instead. bool operator < (const VulkanPipelineKey &other) const { if (raster < other.raster) return true; else if (other.raster < raster) return false; + if (renderPass < other.renderPass) return true; else if (other.renderPass < renderPass) return false; if (useHWTransform < other.useHWTransform) return true; else if (other.useHWTransform < useHWTransform) return false; if (vtxDec < other.vtxDec) return true; else if (other.vtxDec < vtxDec) return false; if (vShader < other.vShader) return true; else if (other.vShader < vShader) return false; @@ -82,7 +85,7 @@ public: PipelineManagerVulkan(VulkanContext *ctx); ~PipelineManagerVulkan(); - VulkanPipeline *GetOrCreatePipeline(VkPipelineLayout layout, const VulkanPipelineRasterStateKey &rasterKey, const VertexDecoder *vtxDec, VulkanVertexShader *vs, VulkanFragmentShader *fs, bool useHwTransform); + VulkanPipeline *GetOrCreatePipeline(VkPipelineLayout layout, VkRenderPass renderPass, const VulkanPipelineRasterStateKey &rasterKey, const VertexDecoder *vtxDec, VulkanVertexShader *vs, VulkanFragmentShader *fs, bool useHwTransform); int GetNumPipelines() const { return (int)pipelines_.size(); } void Clear(); diff --git a/GPU/Vulkan/StateMappingVulkan.cpp b/GPU/Vulkan/StateMappingVulkan.cpp index 6e9f99c3c4..af3fe9cc5a 100644 --- a/GPU/Vulkan/StateMappingVulkan.cpp +++ b/GPU/Vulkan/StateMappingVulkan.cpp @@ -341,10 +341,17 @@ void DrawEngineVulkan::ConvertStateToVulkanKey(FramebufferManagerVulkan &fbManag } VkRect2D &scissor = dynState.scissor; - scissor.offset.x = vpAndScissor.scissorX; - scissor.offset.y = vpAndScissor.scissorY; - scissor.extent.width = vpAndScissor.scissorW; - scissor.extent.height = vpAndScissor.scissorH; + if (vpAndScissor.scissorEnable) { + scissor.offset.x = vpAndScissor.scissorX; + scissor.offset.y = vpAndScissor.scissorY; + scissor.extent.width = vpAndScissor.scissorW; + scissor.extent.height = vpAndScissor.scissorH; + } else { + scissor.offset.x = 0; + scissor.offset.y = 0; + scissor.extent.width = framebufferManager_->GetRenderWidth(); + scissor.extent.height = framebufferManager_->GetRenderHeight(); + } float depthMin = vpAndScissor.depthRangeMin; float depthMax = vpAndScissor.depthRangeMax; diff --git a/ext/native/thin3d/thin3d.h b/ext/native/thin3d/thin3d.h index c3befbeff0..92ae1f7d9f 100644 --- a/ext/native/thin3d/thin3d.h +++ b/ext/native/thin3d/thin3d.h @@ -321,7 +321,9 @@ enum class NativeObject { BACKBUFFER_COLOR_TEX, BACKBUFFER_DEPTH_TEX, FEATURE_LEVEL, - RENDERPASS, + COMPATIBLE_RENDERPASS, + RENDERPASS_COMMANDBUFFER, + BOUND_TEXTURE_IMAGEVIEW, }; enum FBColorDepth { diff --git a/ext/native/thin3d/thin3d_vulkan.cpp b/ext/native/thin3d/thin3d_vulkan.cpp index a1c303c46d..624a6345a3 100644 --- a/ext/native/thin3d/thin3d_vulkan.cpp +++ b/ext/native/thin3d/thin3d_vulkan.cpp @@ -315,6 +315,39 @@ struct DescriptorSetKey { } }; +class VKTexture : public Texture { +public: + VKTexture(VulkanContext *vulkan, const TextureDesc &desc) + : vulkan_(vulkan), format_(desc.format), mipLevels_(desc.mipLevels) { + Create(desc); + } + + ~VKTexture() { + Destroy(); + } + + VkImageView GetImageView() { return vkTex_->GetImageView(); } + +private: + void SetImageData(int x, int y, int z, int width, int height, int depth, int level, int stride, const uint8_t *data); + + bool Create(const TextureDesc &desc); + + void Destroy() { + if (vkTex_) { + vkTex_->Destroy(); + delete vkTex_; + } + } + + VulkanContext *vulkan_; + VulkanTexture *vkTex_; + + int mipLevels_; + + DataFormat format_; +}; + class VKContext : public DrawContext { public: VKContext(VulkanContext *vulkan); @@ -350,7 +383,6 @@ public: // color must be 0, for now. void BindFramebufferAsTexture(Framebuffer *fbo, int binding, FBChannel channelBit, int attachment) override; void BindFramebufferForRead(Framebuffer *fbo) override; - void TransitionForSampling(Framebuffer *fbo) override; uintptr_t GetFramebufferAPITexture(Framebuffer *fbo, int channelBit, int attachment) override; @@ -413,8 +445,17 @@ public: uintptr_t GetNativeObject(NativeObject obj) const override { switch (obj) { - case NativeObject::RENDERPASS: - return (uintptr_t)renderPasses_[0]; + case NativeObject::COMPATIBLE_RENDERPASS: + // Return a representative renderpass. + return (uintptr_t)(curRenderPass_ == vulkan_->GetSurfaceRenderPass() ? curRenderPass_ : renderPasses_[0]); + case NativeObject::RENDERPASS_COMMANDBUFFER: + return (uintptr_t)cmd_; + case NativeObject::BOUND_TEXTURE_IMAGEVIEW: + if (boundTextures_[0]) { + return (uintptr_t)boundTextures_[0]->GetImageView(); + } else { + return 0; + } default: return 0; } @@ -427,6 +468,7 @@ private: void DirtyDynamicState(); void EndCurrentRenderpass(); + VkCommandBuffer AllocCmdBuf(); VulkanContext *vulkan_ = nullptr; @@ -457,18 +499,25 @@ private: bool scissorDirty_; VkRect2D scissor_; - enum {MAX_BOUND_TEXTURES = 1}; + int curWidth_ = -1; + int curHeight_ = -1; + + enum { + MAX_BOUND_TEXTURES = 1, + MAX_FRAME_COMMAND_BUFFERS = 128, + }; VKTexture *boundTextures_[MAX_BOUND_TEXTURES]; VKSamplerState *boundSamplers_[MAX_BOUND_TEXTURES]; - VkCommandBuffer cmd_; // The current one - struct FrameData { VulkanPushBuffer *pushBuffer; VkCommandPool cmdPool_; + VkCommandBuffer cmdBufs[MAX_FRAME_COMMAND_BUFFERS]; + int numCmdBufs_; // Per-frame descriptor set cache. As it's per frame and reset every frame, we don't need to // worry about invalidating descriptors pointing to deleted textures. + // However! ARM is not a fan of doing it this way. std::map descSets_; VkDescriptorPool descriptorPool; }; @@ -482,6 +531,7 @@ private: VkFramebuffer curFramebuffer_ = VK_NULL_HANDLE;; VkRenderPass curRenderPass_ = VK_NULL_HANDLE; + VkCommandBuffer cmd_ = VK_NULL_HANDLE; }; static int GetBpp(VkFormat format) { @@ -605,51 +655,20 @@ enum class TextureState { PENDING_DESTRUCTION, }; -class VKTexture : public Texture { -public: - VKTexture(VulkanContext *vulkan, const TextureDesc &desc) - : vulkan_(vulkan), format_(desc.format), mipLevels_(desc.mipLevels) { - Create(desc); - } - - ~VKTexture() { - Destroy(); - } - - VkImageView GetImageView() { return vkTex_->GetImageView(); } - -private: - void SetImageData(int x, int y, int z, int width, int height, int depth, int level, int stride, const uint8_t *data); - - bool Create(const TextureDesc &desc) { - format_ = desc.format; - mipLevels_ = desc.mipLevels; - width_ = desc.width; - height_ = desc.height; - depth_ = desc.depth; - vkTex_ = new VulkanTexture(vulkan_); - if (desc.initData.size()) { - for (int i = 0; i < (int)desc.initData.size(); i++) { - this->SetImageData(0, 0, 0, width_, height_, depth_, i, 0, desc.initData[i]); - } - } - return true; - } - - void Destroy() { - if (vkTex_) { - vkTex_->Destroy(); - delete vkTex_; +bool VKTexture::Create(const TextureDesc &desc) { + format_ = desc.format; + mipLevels_ = desc.mipLevels; + width_ = desc.width; + height_ = desc.height; + depth_ = desc.depth; + vkTex_ = new VulkanTexture(vulkan_); + if (desc.initData.size()) { + for (int i = 0; i < (int)desc.initData.size(); i++) { + this->SetImageData(0, 0, 0, width_, height_, depth_, i, 0, desc.initData[i]); } } - - VulkanContext *vulkan_; - VulkanTexture *vkTex_; - - int mipLevels_; - - DataFormat format_; -}; + return true; +} VKContext::VKContext(VulkanContext *vulkan) : viewportDirty_(false), scissorDirty_(false), vulkan_(vulkan), frameNum_(0), caps_{} { @@ -817,10 +836,36 @@ VKContext::~VKContext() { vulkan_->Delete().QueueDeletePipelineCache(pipelineCache_); } +// Effectively wiped every frame, just allocate new ones! +VkCommandBuffer VKContext::AllocCmdBuf() { + FrameData *frame = &frame_[frameNum_ & 1]; + if (frame->cmdBufs[frame->numCmdBufs_]) { + VkCommandBuffer cmdBuf = frame->cmdBufs[frame->numCmdBufs_++]; + if (!cmdBuf) + Crash(); + return cmdBuf; + } + + if (frame->numCmdBufs_ >= this->MAX_FRAME_COMMAND_BUFFERS) + Crash(); + + VkCommandBufferAllocateInfo alloc{ VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO }; + alloc.commandBufferCount = 1; + alloc.commandPool = frame->cmdPool_; + alloc.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY; + VkResult result = vkAllocateCommandBuffers(vulkan_->GetDevice(), &alloc, &frame->cmdBufs[frame->numCmdBufs_]); + assert(result == VK_SUCCESS); + if (!frame->cmdBufs[frame->numCmdBufs_]) + Crash(); + return frame->cmdBufs[frame->numCmdBufs_++]; +} + void VKContext::BeginFrame() { - cmd_ = vulkan_->BeginFrame(); + vulkan_->BeginFrame(); FrameData *frame = &frame_[frameNum_ & 1]; + frame->numCmdBufs_ = 0; + vkResetCommandPool(vulkan_->GetDevice(), frame->cmdPool_, 0); push_ = frame->pushBuffer; // OK, we now know that nothing is reading from this frame's data pushbuffer, @@ -844,6 +889,13 @@ void VKContext::EndFrame() { curFramebuffer_ = VK_NULL_HANDLE; } + // Cap off and submit all the command buffers we recorded during the frame. + FrameData &frame = frame_[frameNum_ & 1]; + for (int i = 0; i < frame.numCmdBufs_; i++) { + vkEndCommandBuffer(frame.cmdBufs[i]); + vulkan_->QueueBeforeSurfaceRender(frame.cmdBufs[i]); + } + // Stop collecting data in the frame's data pushbuffer. push_->End(); vulkan_->EndFrame(); @@ -1260,22 +1312,40 @@ void VKContext::DrawUP(const void *vdata, int vertexCount) { vkCmdDraw(cmd_, vertexCount, 1, 0, 0); } +// TODO: We should avoid this function as much as possible, instead use renderpass on-load clearing. void VKContext::Clear(int mask, uint32_t colorval, float depthVal, int stencilVal) { - ELOG("Clear: Try to avoid calling this..."); - if (curRenderPass_) { + if (!curRenderPass_) { + ELOG("Clear: Need an active render pass"); + return; } + int numAttachments = 0; + VkClearRect rc{}; + rc.baseArrayLayer = 0; + rc.layerCount = 1; + rc.rect.extent.width = curWidth_; + rc.rect.extent.height = curHeight_; + VkClearAttachment attachments[2]; if (mask & FBChannel::FB_COLOR_BIT) { - VkClearColorValue col; - Uint8x4ToFloat4(colorval, col.float32); - /* - VkRect3D rect; - rect.extent.width = - vkCmdClearColorAttachment(cmdBuf_, 0, imageLayout_, &col, 1, nullptr); - */ + VkClearAttachment &attachment = attachments[numAttachments++]; + attachment.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + attachment.colorAttachment = 0; + Uint8x4ToFloat4(colorval, attachment.clearValue.color.float32); } if (mask & (FBChannel::FB_DEPTH_BIT | FBChannel::FB_STENCIL_BIT)) { - + VkClearAttachment &attachment = attachments[numAttachments++]; + attachment.aspectMask = 0; + if (mask & FBChannel::FB_DEPTH_BIT) { + attachment.clearValue.depthStencil.depth = depthVal; + attachment.aspectMask |= VK_IMAGE_ASPECT_DEPTH_BIT; + } + if (mask & FBChannel::FB_STENCIL_BIT) { + attachment.clearValue.depthStencil.stencil = stencilVal; + attachment.aspectMask |= VK_IMAGE_ASPECT_STENCIL_BIT; + } + } + if (numAttachments) { + vkCmdClearAttachments(cmd_, numAttachments, attachments, 1, &rc); } } @@ -1392,9 +1462,9 @@ void CreateImage(VulkanContext *vulkan, VKImage &img, int width, int height, VkF alloc.allocationSize = memreq.size; vulkan->MemoryTypeFromProperties(memreq.memoryTypeBits, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, &alloc.memoryTypeIndex); VkResult res = vkAllocateMemory(vulkan->GetDevice(), &alloc, nullptr, &img.memory); - assert(res); + assert(res == VK_SUCCESS); res = vkBindImageMemory(vulkan->GetDevice(), img.image, img.memory, 0); - assert(res); + assert(res == VK_SUCCESS); img.layout = initialLayout; VkImageViewCreateInfo ivci{ VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO }; @@ -1406,7 +1476,7 @@ void CreateImage(VulkanContext *vulkan, VKImage &img, int width, int height, VkF ivci.subresourceRange.layerCount = 1; ivci.subresourceRange.levelCount = 1; res = vkCreateImageView(vulkan->GetDevice(), &ivci, nullptr, &img.view); - assert(res); + assert(res == VK_SUCCESS); VkImageMemoryBarrier barrier{}; barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; @@ -1432,6 +1502,9 @@ void CreateImage(VulkanContext *vulkan, VKImage &img, int width, int height, VkF } // A VKFramebuffer is a VkFramebuffer plus all the textures it owns. +// It also has a reference to the command buffer that it was last rendered to with. +// If it needs to be transitioned, and the frame number matches, use it, otherwise +// use this frame's init command buffer. class VKFramebuffer : public Framebuffer { public: VKFramebuffer(VulkanContext *vk) : vulkan_(vk) {} @@ -1449,6 +1522,10 @@ public: VKImage depth; int width; int height; + + // These belong together, see above. + VkCommandBuffer cmdBuf; + int frameCount; private: VulkanContext *vulkan_; }; @@ -1490,7 +1567,7 @@ bool VKContext::BlitFramebuffer(Framebuffer *srcfb, int srcX1, int srcY1, int sr void VKContext::EndCurrentRenderpass() { if (curRenderPass_ != VK_NULL_HANDLE) { - vkCmdEndRenderPass(vulkan_->GetSurfaceCommandBuffer()); + vkCmdEndRenderPass(cmd_); curRenderPass_ = VK_NULL_HANDLE; curFramebuffer_ = VK_NULL_HANDLE; } @@ -1499,6 +1576,7 @@ void VKContext::EndCurrentRenderpass() { // These functions should be self explanatory. void VKContext::BindFramebufferAsRenderTarget(Framebuffer *fbo, const RenderPassInfo &rp) { VkFramebuffer framebuf; + VkCommandBuffer cmdBuf; int w; int h; VkImageLayout prevLayout; @@ -1508,13 +1586,14 @@ void VKContext::BindFramebufferAsRenderTarget(Framebuffer *fbo, const RenderPass w = fb->width; h = fb->height; prevLayout = fb->color.layout; + cmdBuf = fb->cmdBuf; } else { framebuf = vulkan_->GetSurfaceFramebuffer(); w = vulkan_->GetWidth(); h = vulkan_->GetHeight(); + cmdBuf = vulkan_->GetSurfaceCommandBuffer(); } - VkCommandBuffer cmd = vulkan_->GetSurfaceCommandBuffer(); if (framebuf == curFramebuffer_) { // If we're asking to clear, but already bound, we'll just keep it bound but send a clear command. // We will try to avoid this as much as possible. Also, TODO, do a single vkCmdClearAttachments to clear both. @@ -1524,7 +1603,7 @@ void VKContext::BindFramebufferAsRenderTarget(Framebuffer *fbo, const RenderPass Uint8x4ToFloat4(rp.clearColor, clear.clearValue.color.float32); clear.colorAttachment = 0; VkClearRect rc{ {0,0,(uint32_t)w,(uint32_t)h}, 0, 1 }; - vkCmdClearAttachments(cmd, 1, &clear, 1, &rc); + vkCmdClearAttachments(cmdBuf, 1, &clear, 1, &rc); } if (rp.depth == RPAction::CLEAR) { VkClearAttachment clear{}; @@ -1533,7 +1612,7 @@ void VKContext::BindFramebufferAsRenderTarget(Framebuffer *fbo, const RenderPass clear.clearValue.depthStencil.stencil = rp.clearStencil; clear.colorAttachment = 0; VkClearRect rc{ { 0,0,w,h }, 0, 1 }; - vkCmdClearAttachments(cmd, 1, &clear, 1, &rc); + vkCmdClearAttachments(cmdBuf, 1, &clear, 1, &rc); } // We're done. return; @@ -1544,6 +1623,14 @@ void VKContext::BindFramebufferAsRenderTarget(Framebuffer *fbo, const RenderPass if (fbo) { VKFramebuffer *fb = (VKFramebuffer *)fbo; + fb->cmdBuf = AllocCmdBuf(); + if (!fb->cmdBuf) + Crash(); + fb->frameCount = frameNum_; + cmd_ = fb->cmdBuf; + VkCommandBufferBeginInfo begin{ VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO }; + begin.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; + vkBeginCommandBuffer(cmd_, &begin); // Now, if the image needs transitioning, let's transition. // The backbuffer does not, that's handled by VulkanContext. VkImageMemoryBarrier barrier{}; @@ -1561,37 +1648,55 @@ void VKContext::BindFramebufferAsRenderTarget(Framebuffer *fbo, const RenderPass barrier.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; barrier.newLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; barrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; - vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, 0, 0, 0, 1, &barrier); + vkCmdPipelineBarrier(cmd_, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, 0, 0, 0, 1, &barrier); fb->color.layout = barrier.newLayout; + } else { + cmd_ = vulkan_->GetSurfaceCommandBuffer(); } + int numClearVals = 0; VkClearValue clearVal[2] = {}; - Uint8x4ToFloat4(rp.clearColor, clearVal[0].color.float32); - clearVal[1].depthStencil.depth = rp.clearDepth; - clearVal[1].depthStencil.stencil = rp.clearStencil; + if (rp.color == RPAction::CLEAR) { + Uint8x4ToFloat4(rp.clearColor, clearVal[numClearVals].color.float32); + numClearVals++; + } + if (rp.depth == RPAction::CLEAR) { + clearVal[numClearVals].depthStencil.depth = rp.clearDepth; + clearVal[numClearVals].depthStencil.stencil = rp.clearStencil; + numClearVals++; + } VkRenderPassBeginInfo rp_begin = { VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO }; - rp_begin.renderPass = fbo ? renderPasses_[RPIndex(rp.color, rp.depth)] : vulkan_->GetSurfaceRenderPass(); + if (fbo) { + rp_begin.renderPass = renderPasses_[RPIndex(rp.color, rp.depth)]; + } else { + rp_begin.renderPass = vulkan_->GetSurfaceRenderPass(); + } rp_begin.framebuffer = framebuf; rp_begin.renderArea.offset.x = 0; rp_begin.renderArea.offset.y = 0; rp_begin.renderArea.extent.width = w; rp_begin.renderArea.extent.height = h; - rp_begin.clearValueCount = 2; - rp_begin.pClearValues = clearVal; - vkCmdBeginRenderPass(cmd, &rp_begin, VK_SUBPASS_CONTENTS_INLINE); + rp_begin.clearValueCount = numClearVals; + rp_begin.pClearValues = numClearVals ? clearVal : nullptr; + vkCmdBeginRenderPass(cmd_, &rp_begin, VK_SUBPASS_CONTENTS_INLINE); curFramebuffer_ = framebuf; curRenderPass_ = rp_begin.renderPass; + curWidth_ = w; + curHeight_ = h; } -// This will implicitly end the current render pass. Only call right before switching to a new one, like the -// backbuffer. -void VKContext::TransitionForSampling(Framebuffer *fbo) { - EndCurrentRenderpass(); - +// color must be 0, for now. +void VKContext::BindFramebufferAsTexture(Framebuffer *fbo, int binding, FBChannel channelBit, int attachment) { VKFramebuffer *fb = (VKFramebuffer *)fbo; - if (fb->color.layout == VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL) - return; + VkCommandBuffer transitionCmdBuf; + if (fb->cmdBuf && fb->frameCount == frameNum_) { + // If the framebuffer has a "live" command buffer, we can directly use it to transition it for sampling. + transitionCmdBuf = fb->cmdBuf; + } else { + // If not, we can just do it at the "start" of the frame. + transitionCmdBuf = vulkan_->GetInitCommandBuffer(); + } VkImageMemoryBarrier barrier{}; barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; @@ -1602,22 +1707,18 @@ void VKContext::TransitionForSampling(Framebuffer *fbo) { barrier.srcAccessMask = 0; switch (barrier.oldLayout) { case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL: - barrier.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + barrier.srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT|VK_ACCESS_COLOR_ATTACHMENT_READ_BIT; break; case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL: - barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; break; } + barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; barrier.newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; barrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; // we're between passes so it's OK. - vkCmdPipelineBarrier(vulkan_->GetSurfaceCommandBuffer(), VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, 0, 0, 0, 1, &barrier); -} - -// color must be 0, for now. -void VKContext::BindFramebufferAsTexture(Framebuffer *fbo, int binding, FBChannel channelBit, int attachment) { - VKFramebuffer *fb = (VKFramebuffer *)fbo; - + vkCmdPipelineBarrier(transitionCmdBuf, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, 0, 0, 0, 1, &barrier); + fb->color.layout = barrier.newLayout; } void VKContext::BindFramebufferForRead(Framebuffer *fbo) { /* noop */ } From 0c70735bc489ef9ec60a8fc86d8f66db20200d8a Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Sun, 21 May 2017 23:13:53 +0200 Subject: [PATCH 13/25] Buffered rendering is starting to work, though still kinda broken. --- Common/Vulkan/VulkanContext.cpp | 3 +++ Common/Vulkan/VulkanImage.cpp | 8 ++++++ Common/Vulkan/VulkanImage.h | 3 +++ GPU/Common/FramebufferCommon.cpp | 6 +++++ GPU/Vulkan/DrawEngineVulkan.cpp | 15 ++++++----- GPU/Vulkan/DrawEngineVulkan.h | 2 -- GPU/Vulkan/FramebufferVulkan.cpp | 25 ++++++++++-------- GPU/Vulkan/FramebufferVulkan.h | 4 +-- GPU/Vulkan/VulkanUtil.cpp | 1 + Windows/GPU/WindowsVulkanContext.cpp | 6 ++++- ext/native/thin3d/thin3d.h | 10 +++++++ ext/native/thin3d/thin3d_vulkan.cpp | 39 ++++++++++++++++++---------- 12 files changed, 86 insertions(+), 36 deletions(-) diff --git a/Common/Vulkan/VulkanContext.cpp b/Common/Vulkan/VulkanContext.cpp index c621c24cc0..5004737c68 100644 --- a/Common/Vulkan/VulkanContext.cpp +++ b/Common/Vulkan/VulkanContext.cpp @@ -1307,6 +1307,9 @@ void TransitionImageLayout(VkCommandBuffer cmd, VkImage image, VkImageAspectFlag if (old_image_layout == VK_IMAGE_LAYOUT_PRESENT_SRC_KHR) { image_memory_barrier.srcAccessMask |= VK_ACCESS_MEMORY_READ_BIT; } + if (old_image_layout == VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL) { + image_memory_barrier.srcAccessMask |= VK_ACCESS_SHADER_READ_BIT; + } if (old_image_layout == VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL) { image_memory_barrier.srcAccessMask |= VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; diff --git a/Common/Vulkan/VulkanImage.cpp b/Common/Vulkan/VulkanImage.cpp index fe5f5e8f08..7fca19628d 100644 --- a/Common/Vulkan/VulkanImage.cpp +++ b/Common/Vulkan/VulkanImage.cpp @@ -362,6 +362,14 @@ void VulkanTexture::EndCreate() { VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); } +void VulkanTexture::TransitionForUpload() { + VkCommandBuffer cmd = vulkan_->GetInitCommandBuffer(); + TransitionImageLayout(cmd, image, + VK_IMAGE_ASPECT_COLOR_BIT, + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); +} + void VulkanTexture::Destroy() { if (view != VK_NULL_HANDLE) { vulkan_->Delete().QueueDeleteImageView(view); diff --git a/Common/Vulkan/VulkanImage.h b/Common/Vulkan/VulkanImage.h index c7d6b61f68..3b67119946 100644 --- a/Common/Vulkan/VulkanImage.h +++ b/Common/Vulkan/VulkanImage.h @@ -34,6 +34,9 @@ public: bool CreateDirect(int w, int h, int numMips, VkFormat format, VkImageLayout initialLayout, VkImageUsageFlags usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, const VkComponentMapping *mapping = nullptr); void UploadMip(int mip, int mipWidth, int mipHeight, VkBuffer buffer, uint32_t offset, size_t rowLength); // rowLength is in pixels void EndCreate(); + + void TransitionForUpload(); + int GetNumMips() const { return numMips_; } void Destroy(); diff --git a/GPU/Common/FramebufferCommon.cpp b/GPU/Common/FramebufferCommon.cpp index 9cb831339b..c2007df6bb 100644 --- a/GPU/Common/FramebufferCommon.cpp +++ b/GPU/Common/FramebufferCommon.cpp @@ -704,6 +704,7 @@ void FramebufferManagerCommon::DrawPixels(VirtualFramebuffer *vfb, int dstX, int if (useBufferedRendering_ && vfb && vfb->fbo) { draw_->BindFramebufferAsRenderTarget(vfb->fbo, { Draw::RPAction::KEEP, Draw::RPAction::KEEP }); SetViewport2D(0, 0, vfb->renderWidth, vfb->renderHeight); + draw_->SetScissorRect(0, 0, vfb->renderWidth, vfb->renderHeight); } else { // We are drawing to the back buffer so need to flip. if (needBackBufferYSwap_) @@ -711,6 +712,7 @@ void FramebufferManagerCommon::DrawPixels(VirtualFramebuffer *vfb, int dstX, int float x, y, w, h; CenterDisplayOutputRect(&x, &y, &w, &h, 480.0f, 272.0f, (float)pixelWidth_, (float)pixelHeight_, ROTATION_LOCKED_HORIZONTAL); SetViewport2D(x, y, w, h); + draw_->SetScissorRect(0, 0, pixelWidth_, pixelHeight_); } DisableState(); @@ -894,6 +896,7 @@ void FramebufferManagerCommon::CopyDisplayToOutput() { } // Just a pointer to plain memory to draw. We should create a framebuffer, then draw to it. SetViewport2D(0, 0, pixelWidth_, pixelHeight_); + draw_->SetScissorRect(0, 0, pixelWidth_, pixelHeight_); DrawFramebufferToOutput(Memory::GetPointer(displayFramebufPtr_), displayFormat_, displayStride_, true); return; } @@ -942,6 +945,7 @@ void FramebufferManagerCommon::CopyDisplayToOutput() { draw_->BindFramebufferAsRenderTarget(nullptr, { Draw::RPAction::CLEAR, Draw::RPAction::CLEAR }); draw_->BindFramebufferAsTexture(vfb->fbo, 0, Draw::FB_COLOR_BIT, 0); SetViewport2D(0, 0, pixelWidth_, pixelHeight_); + draw_->SetScissorRect(0, 0, pixelWidth_, pixelHeight_); bool linearFilter = g_Config.iBufFilter == SCALE_LINEAR; // We are doing the DrawActiveTexture call directly to the backbuffer here. Hence, we must // flip V. @@ -967,6 +971,7 @@ void FramebufferManagerCommon::CopyDisplayToOutput() { int fbo_w, fbo_h; draw_->GetFramebufferDimensions(extraFBOs_[0], &fbo_w, &fbo_h); SetViewport2D(0, 0, fbo_w, fbo_h); + draw_->SetScissorRect(0, 0, pixelWidth_, pixelHeight_); shaderManager_->DirtyLastShader(); // dirty lastShader_ PostShaderUniforms uniforms{}; CalculatePostShaderUniforms(vfb->bufferWidth, vfb->bufferHeight, renderWidth_, renderHeight_, &uniforms); @@ -1013,6 +1018,7 @@ void FramebufferManagerCommon::CopyDisplayToOutput() { }*/ } else { draw_->BindFramebufferAsRenderTarget(nullptr, { Draw::RPAction::CLEAR, Draw::RPAction::CLEAR }); + draw_->SetScissorRect(0, 0, pixelWidth_, pixelHeight_); // We are doing the DrawActiveTexture call directly to the backbuffer here. Hence, we must // flip V. if (needBackBufferYSwap_) diff --git a/GPU/Vulkan/DrawEngineVulkan.cpp b/GPU/Vulkan/DrawEngineVulkan.cpp index ff134f9f1b..3d1bdce91d 100644 --- a/GPU/Vulkan/DrawEngineVulkan.cpp +++ b/GPU/Vulkan/DrawEngineVulkan.cpp @@ -68,7 +68,6 @@ DrawEngineVulkan::DrawEngineVulkan(VulkanContext *vulkan, Draw::DrawContext *dra : vulkan_(vulkan), draw_(draw), prevPrim_(GE_PRIM_INVALID), - lastVTypeID_(-1), numDrawCalls(0), vertexCountInDrawCalls(0), curFrame_(0), @@ -329,10 +328,12 @@ inline void DrawEngineVulkan::SetupVertexDecoderInternal(u32 vertType) { const u32 vertTypeID = (vertType & 0xFFFFFF) | (gstate.getUVGenMode() << 24); // If vtype has changed, setup the vertex decoder. - if (vertTypeID != lastVTypeID_) { + if (vertTypeID != lastVType_) { dec_ = GetVertexDecoder(vertTypeID); - lastVTypeID_ = vertTypeID; + lastVType_ = vertTypeID; } + if (!dec_) + Crash(); } void DrawEngineVulkan::SubmitPrim(void *verts, void *inds, GEPrimitiveType prim, int vertexCount, u32 vertType, int *bytesRead) { @@ -710,7 +711,7 @@ void DrawEngineVulkan::DoFlush() { } prim = indexGen.Prim(); - bool hasColor = (lastVTypeID_ & GE_VTYPE_COL_MASK) != GE_VTYPE_COL_NONE; + bool hasColor = (lastVType_ & GE_VTYPE_COL_MASK) != GE_VTYPE_COL_NONE; if (gstate.isModeThrough()) { gstate_c.vertexFullAlpha = gstate_c.vertexFullAlpha && (hasColor || gstate.getMaterialAmbientA() == 255); } else { @@ -743,7 +744,7 @@ void DrawEngineVulkan::DoFlush() { dirtyUniforms_ |= shaderManager_->UpdateUniforms(); - shaderManager_->GetShaders(prim, lastVTypeID_, &vshader, &fshader, useHWTransform); + shaderManager_->GetShaders(prim, lastVType_, &vshader, &fshader, useHWTransform); VulkanPipeline *pipeline = pipelineManager_->GetOrCreatePipeline(pipelineLayout_, renderPass, pipelineKey_, dec_, vshader, fshader, true); if (!pipeline) { // Already logged, let's bail out. @@ -778,7 +779,7 @@ void DrawEngineVulkan::DoFlush() { } else { // Decode to "decoded" DecodeVerts(nullptr, nullptr, nullptr); - bool hasColor = (lastVTypeID_ & GE_VTYPE_COL_MASK) != GE_VTYPE_COL_NONE; + bool hasColor = (lastVType_ & GE_VTYPE_COL_MASK) != GE_VTYPE_COL_NONE; if (gstate.isModeThrough()) { gstate_c.vertexFullAlpha = gstate_c.vertexFullAlpha && (hasColor || gstate.getMaterialAmbientA() == 255); } else { @@ -849,7 +850,7 @@ void DrawEngineVulkan::DoFlush() { dirtyUniforms_ |= shaderManager_->UpdateUniforms(); - shaderManager_->GetShaders(prim, lastVTypeID_, &vshader, &fshader, useHWTransform); + shaderManager_->GetShaders(prim, lastVType_, &vshader, &fshader, useHWTransform); VulkanPipeline *pipeline = pipelineManager_->GetOrCreatePipeline(pipelineLayout_, renderPass, pipelineKey, dec_, vshader, fshader, false); if (!pipeline) { // Already logged, let's bail out. diff --git a/GPU/Vulkan/DrawEngineVulkan.h b/GPU/Vulkan/DrawEngineVulkan.h index 6fbea4030d..fe109d3521 100644 --- a/GPU/Vulkan/DrawEngineVulkan.h +++ b/GPU/Vulkan/DrawEngineVulkan.h @@ -197,8 +197,6 @@ private: IndexGenerator indexGen; GEPrimitiveType prevPrim_; - u32 lastVTypeID_; - TransformedVertex *transformed = nullptr; TransformedVertex *transformedExpanded = nullptr; diff --git a/GPU/Vulkan/FramebufferVulkan.cpp b/GPU/Vulkan/FramebufferVulkan.cpp index e6ff816958..0a1e55cb04 100644 --- a/GPU/Vulkan/FramebufferVulkan.cpp +++ b/GPU/Vulkan/FramebufferVulkan.cpp @@ -129,9 +129,9 @@ void FramebufferManagerVulkan::InitDeviceObjects() { assert(fsBasicTex_ != VK_NULL_HANDLE); assert(vsBasicTex_ != VK_NULL_HANDLE); - // Get a representative render pass and use when creating the pipeline. - pipelineBasicTexBackBuffer_ = vulkan2D_.GetPipeline(pipelineCache2D_, vulkan_->GetSurfaceRenderPass(), vsBasicTex_, fsBasicTex_); - pipelineBasicTexFrameBuffer_ = vulkan2D_.GetPipeline(pipelineCache2D_, (VkRenderPass)draw_->GetNativeObject(Draw::NativeObject::COMPATIBLE_RENDERPASS), vsBasicTex_, fsBasicTex_); + // Prime the 2D pipeline cache. + vulkan2D_.GetPipeline(pipelineCache2D_, vulkan_->GetSurfaceRenderPass(), vsBasicTex_, fsBasicTex_); + vulkan2D_.GetPipeline(pipelineCache2D_, (VkRenderPass)draw_->GetNativeObject(Draw::NativeObject::COMPATIBLE_RENDERPASS), vsBasicTex_, fsBasicTex_); VkSamplerCreateInfo samp = { VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO }; samp.addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; @@ -240,6 +240,8 @@ void FramebufferManagerVulkan::MakePixelTexture(const u8 *srcPixels, GEBufferFor drawPixelsTex_->CreateDirect(width, height, 1, VK_FORMAT_R8G8B8A8_UNORM, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT); // Initialize backbuffer texture for DrawPixels drawPixelsTexFormat_ = srcPixelFormat; + } else { + drawPixelsTex_->TransitionForUpload(); } // TODO: We can just change the texture format and flip some bits around instead of this. @@ -314,11 +316,6 @@ void FramebufferManagerVulkan::SetViewport2D(int x, int y, int w, int h) { } void FramebufferManagerVulkan::DrawActiveTexture(float x, float y, float w, float h, float destW, float destH, float u0, float v0, float u1, float v1, int uvRotation, bool linearFilter) { - // TODO -} - -// x, y, w, h are relative coordinates against destW/destH, which is not very intuitive. -void FramebufferManagerVulkan::DrawTexture(VulkanTexture *texture, float x, float y, float w, float h, float destW, float destH, float u0, float v0, float u1, float v1, VkPipeline pipeline, int uvRotation) { float texCoords[8] = { u0,v0, u1,v0, @@ -354,13 +351,18 @@ void FramebufferManagerVulkan::DrawTexture(VulkanTexture *texture, float x, floa vtx[i].y = vtx[i].y * invDestH - 1.0f; } + draw_->FlushState(); + + // TODO: Should probably use draw_ directly and not go low level + VulkanPushBuffer *push = frameData_[curFrame_].push_; VkCommandBuffer cmd = (VkCommandBuffer)draw_->GetNativeObject(Draw::NativeObject::RENDERPASS_COMMANDBUFFER); // TODO: Choose linear or nearest appropriately, see GL impl. - vulkan2D_.BindDescriptorSet(cmd, texture->GetImageView(), linearSampler_); - vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); + VkImageView view = (VkImageView)draw_->GetNativeObject(Draw::NativeObject::BOUND_TEXTURE_IMAGEVIEW); + vulkan2D_.BindDescriptorSet(cmd, view, linearFilter ? linearSampler_ : nearestSampler_); + vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, cur2DPipeline_); VkBuffer vbuffer; VkDeviceSize offset = push->Push(vtx, sizeof(vtx), &vbuffer); vkCmdBindVertexBuffers(cmd, 0, 1, &vbuffer, &offset); @@ -368,7 +370,8 @@ void FramebufferManagerVulkan::DrawTexture(VulkanTexture *texture, float x, floa } void FramebufferManagerVulkan::Bind2DShader() { - + VkRenderPass rp = (VkRenderPass)draw_->GetNativeObject(Draw::NativeObject::COMPATIBLE_RENDERPASS); + cur2DPipeline_ = vulkan2D_.GetPipeline(pipelineCache2D_, rp, vsBasicTex_, fsBasicTex_); } void FramebufferManagerVulkan::BindPostShader(const PostShaderUniforms &uniforms) { diff --git a/GPU/Vulkan/FramebufferVulkan.h b/GPU/Vulkan/FramebufferVulkan.h index d7ea385d49..489837a10f 100644 --- a/GPU/Vulkan/FramebufferVulkan.h +++ b/GPU/Vulkan/FramebufferVulkan.h @@ -69,9 +69,7 @@ public: drawEngine_ = td; } - // If texture != 0, will bind it. // x,y,w,h are relative to destW, destH which fill out the target completely. - void DrawTexture(VulkanTexture *texture, float x, float y, float w, float h, float destW, float destH, float u0, float v0, float u1, float v1, VkPipeline pipeline, int uvRotation); void DrawActiveTexture(float x, float y, float w, float h, float destW, float destH, float u0, float v0, float u1, float v1, int uvRotation, bool linearFilter) override; void DestroyAllFBOs(); @@ -188,6 +186,8 @@ private: VkPipeline pipelineBasicTexBackBuffer_; VkPipeline pipelineBasicTexFrameBuffer_; + VkPipeline cur2DPipeline_; + // Postprocessing VkPipeline pipelinePostShader_; diff --git a/GPU/Vulkan/VulkanUtil.cpp b/GPU/Vulkan/VulkanUtil.cpp index 6d1be59a30..676be492ab 100644 --- a/GPU/Vulkan/VulkanUtil.cpp +++ b/GPU/Vulkan/VulkanUtil.cpp @@ -217,6 +217,7 @@ VkPipeline Vulkan2D::GetPipeline(VkPipelineCache cache, VkRenderPass rp, VkShade PipelineKey key; key.vs = vs; key.fs = fs; + key.rp = rp; auto iter = pipelines_.find(key); if (iter != pipelines_.end()) { diff --git a/Windows/GPU/WindowsVulkanContext.cpp b/Windows/GPU/WindowsVulkanContext.cpp index 6affd88f17..f64173a9b6 100644 --- a/Windows/GPU/WindowsVulkanContext.cpp +++ b/Windows/GPU/WindowsVulkanContext.cpp @@ -132,6 +132,10 @@ static VkBool32 VKAPI_CALL Vulkan_Dbg(VkDebugReportFlagsEXT msgFlags, VkDebugRep return false; if (msgCode == 11) return false; + // Silence "invalid reads of buffer data" - usually just uninitialized color buffers that will immediately get cleared due to our + // lacking clearing optimizations. + if (msgCode == 15 && objType == VK_DEBUG_REPORT_OBJECT_TYPE_DEVICE_MEMORY_EXT) + return false; #ifdef _WIN32 std::string msg = message.str(); @@ -144,7 +148,7 @@ static VkBool32 VKAPI_CALL Vulkan_Dbg(VkDebugReportFlagsEXT msgFlags, VkDebugRep MessageBoxA(NULL, message.str().c_str(), "Alert", MB_OK); } } else if (msgFlags & VK_DEBUG_REPORT_WARNING_BIT_EXT) { - if (options->breakOnWarning) { + if (options->breakOnWarning && IsDebuggerPresent()) { DebugBreak(); } } diff --git a/ext/native/thin3d/thin3d.h b/ext/native/thin3d/thin3d.h index 92ae1f7d9f..a51de9af08 100644 --- a/ext/native/thin3d/thin3d.h +++ b/ext/native/thin3d/thin3d.h @@ -677,6 +677,16 @@ public: virtual void HandleEvent(Event ev, int width, int height, void *param1 = nullptr, void *param2 = nullptr) = 0; + // This flushes command buffers and waits for execution at the point of the end of the last + // renderpass that wrote to the requested framebuffer. This is needed before trying to read it back + // on modern APIs like Vulkan. Ifr the framebuffer is currently being rendered to, we'll just end the render pass. + // The next draw call will automatically start up a new one. + // APIs like OpenGL won't need to implement this one. + virtual void WaitRenderCompletion(Framebuffer *fbo) {} + + // Flush state like scissors etc so the caller can do its own custom drawing. + virtual void FlushState() {} + protected: void CreatePresets(); diff --git a/ext/native/thin3d/thin3d_vulkan.cpp b/ext/native/thin3d/thin3d_vulkan.cpp index 624a6345a3..412fa4df60 100644 --- a/ext/native/thin3d/thin3d_vulkan.cpp +++ b/ext/native/thin3d/thin3d_vulkan.cpp @@ -422,6 +422,11 @@ public: void BeginFrame() override; void EndFrame() override; + void FlushState() override { + ApplyDynamicState(); + } + void WaitRenderCompletion(Framebuffer *fbo) override; + std::string GetInfoString(InfoField info) const override { // TODO: Make these actually query the right information switch (info) { @@ -447,15 +452,14 @@ public: switch (obj) { case NativeObject::COMPATIBLE_RENDERPASS: // Return a representative renderpass. - return (uintptr_t)(curRenderPass_ == vulkan_->GetSurfaceRenderPass() ? curRenderPass_ : renderPasses_[0]); + if (curRenderPass_ == vulkan_->GetSurfaceRenderPass()) + return (uintptr_t)curRenderPass_; + else + return (uintptr_t)renderPasses_[0]; case NativeObject::RENDERPASS_COMMANDBUFFER: return (uintptr_t)cmd_; case NativeObject::BOUND_TEXTURE_IMAGEVIEW: - if (boundTextures_[0]) { - return (uintptr_t)boundTextures_[0]->GetImageView(); - } else { - return 0; - } + return (uintptr_t)boundImageView_[0]; default: return 0; } @@ -508,11 +512,13 @@ private: }; VKTexture *boundTextures_[MAX_BOUND_TEXTURES]; VKSamplerState *boundSamplers_[MAX_BOUND_TEXTURES]; + VkImageView boundImageView_[MAX_BOUND_TEXTURES]; struct FrameData { VulkanPushBuffer *pushBuffer; VkCommandPool cmdPool_; VkCommandBuffer cmdBufs[MAX_FRAME_COMMAND_BUFFERS]; + int startCmdBufs_; int numCmdBufs_; // Per-frame descriptor set cache. As it's per frame and reset every frame, we don't need to @@ -863,17 +869,18 @@ VkCommandBuffer VKContext::AllocCmdBuf() { void VKContext::BeginFrame() { vulkan_->BeginFrame(); - FrameData *frame = &frame_[frameNum_ & 1]; - frame->numCmdBufs_ = 0; - vkResetCommandPool(vulkan_->GetDevice(), frame->cmdPool_, 0); - push_ = frame->pushBuffer; + FrameData &frame = frame_[frameNum_ & 1]; + frame.startCmdBufs_ = 0; + frame.numCmdBufs_ = 0; + vkResetCommandPool(vulkan_->GetDevice(), frame.cmdPool_, 0); + push_ = frame.pushBuffer; // OK, we now know that nothing is reading from this frame's data pushbuffer, push_->Reset(); push_->Begin(vulkan_); - frame->descSets_.clear(); - VkResult result = vkResetDescriptorPool(device_, frame->descriptorPool, 0); + frame.descSets_.clear(); + VkResult result = vkResetDescriptorPool(device_, frame.descriptorPool, 0); assert(result == VK_SUCCESS); scissor_.extent.width = pixel_xres; @@ -882,6 +889,10 @@ void VKContext::BeginFrame() { viewportDirty_ = true; } +void VKContext::WaitRenderCompletion(Framebuffer *fbo) { + // TODO +} + void VKContext::EndFrame() { if (curRenderPass_) { vulkan_->EndSurfaceRenderPass(); @@ -891,7 +902,7 @@ void VKContext::EndFrame() { // Cap off and submit all the command buffers we recorded during the frame. FrameData &frame = frame_[frameNum_ & 1]; - for (int i = 0; i < frame.numCmdBufs_; i++) { + for (int i = frame.startCmdBufs_; i < frame.numCmdBufs_; i++) { vkEndCommandBuffer(frame.cmdBufs[i]); vulkan_->QueueBeforeSurfaceRender(frame.cmdBufs[i]); } @@ -1204,6 +1215,7 @@ void VKContext::UpdateBuffer(Buffer *buffer, const uint8_t *data, size_t offset, void VKContext::BindTextures(int start, int count, Texture **textures) { for (int i = start; i < start + count; i++) { boundTextures_[i] = static_cast(textures[i]); + boundImageView_[i] = boundTextures_[i]->GetImageView(); } } @@ -1719,6 +1731,7 @@ void VKContext::BindFramebufferAsTexture(Framebuffer *fbo, int binding, FBChanne // we're between passes so it's OK. vkCmdPipelineBarrier(transitionCmdBuf, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, 0, 0, 0, 1, &barrier); fb->color.layout = barrier.newLayout; + boundImageView_[0] = fb->color.view; } void VKContext::BindFramebufferForRead(Framebuffer *fbo) { /* noop */ } From 5f388b6b64aff103602e8077b72ef2f5f40021cd Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Sun, 21 May 2017 23:34:48 +0200 Subject: [PATCH 14/25] Vulkan: Use suboptimal clears instead of no clears in buffered --- GPU/Vulkan/FramebufferVulkan.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/GPU/Vulkan/FramebufferVulkan.cpp b/GPU/Vulkan/FramebufferVulkan.cpp index 0a1e55cb04..7acb3d8a09 100644 --- a/GPU/Vulkan/FramebufferVulkan.cpp +++ b/GPU/Vulkan/FramebufferVulkan.cpp @@ -173,7 +173,7 @@ void FramebufferManagerVulkan::DestroyDeviceObjects() { } void FramebufferManagerVulkan::NotifyClear(bool clearColor, bool clearAlpha, bool clearDepth, uint32_t color, float depth) { - if (!useBufferedRendering_) { + // if (!useBufferedRendering_) { float x, y, w, h; CenterDisplayOutputRect(&x, &y, &w, &h, 480.0f, 272.0f, (float)pixelWidth_, (float)pixelHeight_, ROTATION_LOCKED_HORIZONTAL); @@ -192,9 +192,9 @@ void FramebufferManagerVulkan::NotifyClear(bool clearColor, bool clearAlpha, boo if (clearDepth) { SetDepthUpdated(); } - } else { + //} else { // TODO: Clever render pass magic. - } + //} } void FramebufferManagerVulkan::DoNotifyDraw() { From f49384ca730dc2ab57abee102b5067e1fe11835b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Mon, 22 May 2017 10:42:40 +0200 Subject: [PATCH 15/25] Vulkan: Fix binding framebuffers as textures --- GPU/Vulkan/FramebufferVulkan.cpp | 37 +++++++++++++++++++++++++++++++ GPU/Vulkan/FramebufferVulkan.h | 1 + GPU/Vulkan/TextureCacheVulkan.cpp | 20 +++++------------ 3 files changed, 43 insertions(+), 15 deletions(-) diff --git a/GPU/Vulkan/FramebufferVulkan.cpp b/GPU/Vulkan/FramebufferVulkan.cpp index 7acb3d8a09..120ebb6f81 100644 --- a/GPU/Vulkan/FramebufferVulkan.cpp +++ b/GPU/Vulkan/FramebufferVulkan.cpp @@ -446,6 +446,43 @@ void FramebufferManagerVulkan::BlitFramebufferDepth(VirtualFramebuffer *src, Vir } } + +VkImageView FramebufferManagerVulkan::BindFramebufferAsColorTexture(int stage, VirtualFramebuffer *framebuffer, int flags) { + if (!framebuffer->fbo || !useBufferedRendering_) { + gstate_c.skipDrawReason |= SKIPDRAW_BAD_FB_TEXTURE; + return VK_NULL_HANDLE; + } + + // currentRenderVfb_ will always be set when this is called, except from the GE debugger. + // Let's just not bother with the copy in that case. + bool skipCopy = (flags & BINDFBCOLOR_MAY_COPY) == 0; + if (GPUStepping::IsStepping() || g_Config.bDisableSlowFramebufEffects) { + skipCopy = true; + } + // Currently rendering to this framebuffer. Need to make a copy. + if (!skipCopy && framebuffer == currentRenderVfb_) { + // TODO: Maybe merge with bvfbs_? Not sure if those could be packing, and they're created at a different size. + Draw::Framebuffer *renderCopy = GetTempFBO(framebuffer->renderWidth, framebuffer->renderHeight, (Draw::FBColorDepth)framebuffer->colorDepth); + if (renderCopy) { + VirtualFramebuffer copyInfo = *framebuffer; + copyInfo.fbo = renderCopy; + CopyFramebufferForColorTexture(©Info, framebuffer, flags); + RebindFramebuffer(); + draw_->BindFramebufferAsTexture(renderCopy, stage, Draw::FB_COLOR_BIT, 0); + } else { + draw_->BindFramebufferAsTexture(framebuffer->fbo, stage, Draw::FB_COLOR_BIT, 0); + } + return (VkImageView)draw_->GetNativeObject(Draw::NativeObject::BOUND_TEXTURE_IMAGEVIEW); + } else if (framebuffer != currentRenderVfb_) { + draw_->BindFramebufferAsTexture(framebuffer->fbo, stage, Draw::FB_COLOR_BIT, 0); + return (VkImageView)draw_->GetNativeObject(Draw::NativeObject::BOUND_TEXTURE_IMAGEVIEW); + } else { + ERROR_LOG_REPORT_ONCE(d3d11SelfTexture, G3D, "Attempting to texture to target"); + // Badness on D3D11 to bind the currently rendered-to framebuffer as a texture. + return VK_NULL_HANDLE; + } +} + VulkanTexture *FramebufferManagerVulkan::GetFramebufferColor(u32 fbRawAddress, VirtualFramebuffer *framebuffer, int flags) { if (framebuffer == NULL) { framebuffer = currentRenderVfb_; diff --git a/GPU/Vulkan/FramebufferVulkan.h b/GPU/Vulkan/FramebufferVulkan.h index 489837a10f..a991996adf 100644 --- a/GPU/Vulkan/FramebufferVulkan.h +++ b/GPU/Vulkan/FramebufferVulkan.h @@ -104,6 +104,7 @@ public: bool GetOutputFramebuffer(GPUDebugBuffer &buffer) override; virtual void RebindFramebuffer() override; + VkImageView BindFramebufferAsColorTexture(int stage, VirtualFramebuffer *framebuffer, int flags); // VulkanFBO *GetTempFBO(u16 w, u16 h, VulkanFBOColorDepth depth = VK_FBO_8888); diff --git a/GPU/Vulkan/TextureCacheVulkan.cpp b/GPU/Vulkan/TextureCacheVulkan.cpp index e908d3ca29..239263c9eb 100644 --- a/GPU/Vulkan/TextureCacheVulkan.cpp +++ b/GPU/Vulkan/TextureCacheVulkan.cpp @@ -426,30 +426,20 @@ void TextureCacheVulkan::ApplyTextureFramebuffer(TexCacheEntry *entry, VirtualFr TexCacheEntry::Status alphaStatus = CheckAlpha(clutBuf_, getClutDestFormatVulkan(clutFormat), clutTotalColors, clutTotalColors, 1); gstate_c.SetTextureFullAlpha(alphaStatus == TexCacheEntry::STATUS_ALPHA_FULL); gstate_c.SetTextureSimpleAlpha(alphaStatus == TexCacheEntry::STATUS_ALPHA_SIMPLE); + + // imageView_ = depalFbo->getImageView(). } else { entry->status &= ~TexCacheEntry::STATUS_DEPALETTIZE; + imageView_ = framebufferManagerVulkan_->BindFramebufferAsColorTexture(0, framebuffer, BINDFBCOLOR_MAY_COPY_WITH_UV | BINDFBCOLOR_APPLY_TEX_OFFSET); + gstate_c.SetTextureFullAlpha(gstate.getTextureFormat() == GE_TFMT_5650); gstate_c.SetTextureSimpleAlpha(gstate_c.textureFullAlpha); } - /* - imageView = depalFBO->GetColorImageView(); - SamplerCacheKey samplerKey; - framebufferManager_->RebindFramebuffer(); SetFramebufferSamplingParams(framebuffer->bufferWidth, framebuffer->bufferHeight, samplerKey); - sampler = GetOrCreateSampler(samplerKey); - */ - - if (entry->vkTex) { - SamplerCacheKey key; - UpdateSamplingParams(*entry, key); - key.mipEnable = false; - sampler_ = samplerCache_.GetOrCreateSampler(key); - - InvalidateLastTexture(); - } + sampler_ = samplerCache_.GetOrCreateSampler(samplerKey); } ReplacedTextureFormat FromVulkanFormat(VkFormat fmt) { From bd9f3af0bd2cb4d93c3e74c7c07244a700838cf7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Mon, 22 May 2017 10:59:00 +0200 Subject: [PATCH 16/25] Add a way to query the current renderpass (for debug checks) --- ext/native/thin3d/thin3d.h | 1 + ext/native/thin3d/thin3d_vulkan.cpp | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/ext/native/thin3d/thin3d.h b/ext/native/thin3d/thin3d.h index a51de9af08..ff34a24fff 100644 --- a/ext/native/thin3d/thin3d.h +++ b/ext/native/thin3d/thin3d.h @@ -322,6 +322,7 @@ enum class NativeObject { BACKBUFFER_DEPTH_TEX, FEATURE_LEVEL, COMPATIBLE_RENDERPASS, + CURRENT_RENDERPASS, RENDERPASS_COMMANDBUFFER, BOUND_TEXTURE_IMAGEVIEW, }; diff --git a/ext/native/thin3d/thin3d_vulkan.cpp b/ext/native/thin3d/thin3d_vulkan.cpp index 412fa4df60..1c35ba395b 100644 --- a/ext/native/thin3d/thin3d_vulkan.cpp +++ b/ext/native/thin3d/thin3d_vulkan.cpp @@ -456,6 +456,8 @@ public: return (uintptr_t)curRenderPass_; else return (uintptr_t)renderPasses_[0]; + case NativeObject::CURRENT_RENDERPASS: + return (uintptr_t)curRenderPass_; case NativeObject::RENDERPASS_COMMANDBUFFER: return (uintptr_t)cmd_; case NativeObject::BOUND_TEXTURE_IMAGEVIEW: @@ -1467,6 +1469,8 @@ void CreateImage(VulkanContext *vulkan, VKImage &img, int width, int height, VkF } vkCreateImage(vulkan->GetDevice(), &ici, nullptr, &img.image); + // TODO: If available, use nVidia's VK_NV_dedicated_allocation for framebuffers + VkMemoryRequirements memreq; vkGetImageMemoryRequirements(vulkan->GetDevice(), img.image, &memreq); From c173da49d36cea3f67e9cfbadf56c0589640d6e0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Mon, 22 May 2017 14:48:20 +0200 Subject: [PATCH 17/25] Fix a number of bugs and stuff affecting Vulkan on Mali --- Common/Vulkan/VulkanContext.cpp | 38 ++++++------- Common/Vulkan/VulkanContext.h | 1 + Common/Vulkan/VulkanMemory.h | 1 + GPU/GLES/DrawEngineGLES.cpp | 2 + GPU/GLES/FramebufferManagerGLES.cpp | 2 +- GPU/GLES/GPU_GLES.cpp | 2 +- GPU/Software/SoftGpu.cpp | 2 +- GPU/Vulkan/DrawEngineVulkan.cpp | 5 +- GPU/Vulkan/FramebufferVulkan.cpp | 5 ++ GPU/Vulkan/GPU_Vulkan.cpp | 2 + GPU/Vulkan/PipelineManagerVulkan.cpp | 3 ++ GPU/Vulkan/ShaderManagerVulkan.cpp | 2 +- GPU/Vulkan/TextureCacheVulkan.cpp | 1 + UI/EmuScreen.cpp | 2 +- UI/GameSettingsScreen.cpp | 2 +- UI/MiscScreens.cpp | 2 +- ext/native/thin3d/thin3d_gl.cpp | 2 +- ext/native/thin3d/thin3d_vulkan.cpp | 80 ++++++++++++++++------------ 18 files changed, 91 insertions(+), 63 deletions(-) diff --git a/Common/Vulkan/VulkanContext.cpp b/Common/Vulkan/VulkanContext.cpp index 5004737c68..476b657d89 100644 --- a/Common/Vulkan/VulkanContext.cpp +++ b/Common/Vulkan/VulkanContext.cpp @@ -86,6 +86,7 @@ VulkanContext::VulkanContext(const char *app_name, int app_ver, uint32_t flags) instance_extension_names.push_back(VK_KHR_ANDROID_SURFACE_EXTENSION_NAME); #endif device_extension_names.push_back(VK_KHR_SWAPCHAIN_EXTENSION_NAME); + // device_extension_names.push_back(VK_NV_DEDICATED_ALLOCATION_EXTENSION_NAME); if (flags & VULKAN_FLAG_VALIDATE) { for (size_t i = 0; i < ARRAY_SIZE(validationLayers); i++) { @@ -217,11 +218,9 @@ void VulkanContext::QueueBeforeSurfaceRender(VkCommandBuffer cmd) { VkCommandBuffer VulkanContext::BeginFrame() { FrameData *frame = &frame_[curFrame_]; - // Get the index of the next available swapchain image, and a semaphore to block command buffer execution on. // Now, I wonder if we should do this early in the frame or late? Right now we do it early, which should be fine. VkResult res = vkAcquireNextImageKHR(device_, swap_chain_, UINT64_MAX, acquireSemaphore, VK_NULL_HANDLE, ¤t_buffer); - // TODO: Deal with the VK_SUBOPTIMAL_KHR and VK_ERROR_OUT_OF_DATE_KHR // return codes assert(res == VK_SUCCESS); @@ -252,16 +251,13 @@ VkCommandBuffer VulkanContext::BeginSurfaceRenderPass(VkClearValue clear_values[ rp_begin.renderArea.extent.height = height_; rp_begin.clearValueCount = 2; rp_begin.pClearValues = clear_values; - - // We don't really need to record this at this point in time, but hey, at some point we'll start this - // pass anyway so might as well do it now (although you can imagine getting away with just a stretchblt and not - // even starting a final render pass if there's nothing to overlay... hm. Uncommon though on mobile). vkCmdBeginRenderPass(frame->cmdBuf, &rp_begin, VK_SUBPASS_CONTENTS_INLINE); return frame->cmdBuf; } void VulkanContext::EndSurfaceRenderPass() { FrameData *frame = &frame_[curFrame_]; + ILOG("VulkanContext::EndSurfaceRenderPass"); vkCmdEndRenderPass(frame->cmdBuf); } @@ -292,12 +288,12 @@ void VulkanContext::EndFrame() { VkSubmitInfo submit_info = { VK_STRUCTURE_TYPE_SUBMIT_INFO }; submit_info.waitSemaphoreCount = 1; submit_info.pWaitSemaphores = &acquireSemaphore; - VkPipelineStageFlags waitStage[1] = { VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT }; + VkPipelineStageFlags waitStage[1] = { VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT }; submit_info.pWaitDstStageMask = waitStage; submit_info.commandBufferCount = (uint32_t)cmdBufs.size(); submit_info.pCommandBuffers = cmdBufs.data(); - submit_info.signalSemaphoreCount = 0; - submit_info.pSignalSemaphores = NULL; + submit_info.signalSemaphoreCount = 1; + submit_info.pSignalSemaphores = &renderingCompleteSemaphore; res = vkQueueSubmit(gfx_queue_, 1, &submit_info, frame->fence); assert(res == VK_SUCCESS); @@ -305,8 +301,8 @@ void VulkanContext::EndFrame() { present.swapchainCount = 1; present.pSwapchains = &swap_chain_; present.pImageIndices = ¤t_buffer; - present.pWaitSemaphores = NULL; - present.waitSemaphoreCount = 0; + present.pWaitSemaphores = &renderingCompleteSemaphore; + present.waitSemaphoreCount = 1; present.pResults = NULL; res = vkQueuePresentKHR(gfx_queue_, &present); @@ -949,13 +945,11 @@ void VulkanContext::InitQueue() { vkGetDeviceQueue(device_, graphics_queue_family_index_, 0, &gfx_queue_); ILOG("gfx_queue_: %p", gfx_queue_); - VkSemaphoreCreateInfo acquireSemaphoreCreateInfo = { VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO }; - acquireSemaphoreCreateInfo.flags = 0; - - res = vkCreateSemaphore(device_, - &acquireSemaphoreCreateInfo, - NULL, - &acquireSemaphore); + VkSemaphoreCreateInfo semaphoreCreateInfo = { VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO }; + semaphoreCreateInfo.flags = 0; + res = vkCreateSemaphore(device_, &semaphoreCreateInfo, NULL, &acquireSemaphore); + assert(res == VK_SUCCESS); + res = vkCreateSemaphore(device_, &semaphoreCreateInfo, NULL, &renderingCompleteSemaphore); assert(res == VK_SUCCESS); } @@ -1070,7 +1064,9 @@ bool VulkanContext::InitSwapchain(VkCommandBuffer cmd) { &swapchainImageCount, NULL); assert(res == VK_SUCCESS); - VkImage* swapchainImages = (VkImage*)malloc(swapchainImageCount * sizeof(VkImage)); + ILOG("Vulkan swapchain image count: %d", swapchainImageCount); + + VkImage* swapchainImages = new VkImage[swapchainImageCount]; assert(swapchainImages); res = vkGetSwapchainImagesKHR(device_, swap_chain_, &swapchainImageCount, swapchainImages); assert(res == VK_SUCCESS); @@ -1108,8 +1104,7 @@ bool VulkanContext::InitSwapchain(VkCommandBuffer cmd) { swapChainBuffers.push_back(sc_buffer); assert(res == VK_SUCCESS); } - free(swapchainImages); - + delete[] swapchainImages; current_buffer = 0; return true; @@ -1249,6 +1244,7 @@ void VulkanContext::DestroySwapChain() { swap_chain_ = VK_NULL_HANDLE; swapChainBuffers.clear(); vkDestroySemaphore(device_, acquireSemaphore, NULL); + vkDestroySemaphore(device_, renderingCompleteSemaphore, NULL); } void VulkanContext::DestroyFramebuffers() { diff --git a/Common/Vulkan/VulkanContext.h b/Common/Vulkan/VulkanContext.h index c6f097ac45..80eaf2ea4a 100644 --- a/Common/Vulkan/VulkanContext.h +++ b/Common/Vulkan/VulkanContext.h @@ -317,6 +317,7 @@ public: private: VkSemaphore acquireSemaphore; + VkSemaphore renderingCompleteSemaphore; #ifdef _WIN32 HINSTANCE connection; // hInstance - Windows Instance diff --git a/Common/Vulkan/VulkanMemory.h b/Common/Vulkan/VulkanMemory.h index 0396ff84cf..8bea2b5ae2 100644 --- a/Common/Vulkan/VulkanMemory.h +++ b/Common/Vulkan/VulkanMemory.h @@ -51,6 +51,7 @@ public: void Unmap() { assert(writePtr_); /* + // Should not need this since we use coherent memory. VkMappedMemoryRange range = { VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE }; range.offset = 0; range.size = offset_; diff --git a/GPU/GLES/DrawEngineGLES.cpp b/GPU/GLES/DrawEngineGLES.cpp index 2eda1e0866..cf8ebbe6d6 100644 --- a/GPU/GLES/DrawEngineGLES.cpp +++ b/GPU/GLES/DrawEngineGLES.cpp @@ -645,6 +645,8 @@ void DrawEngineGLES::DoFlush() { PROFILE_THIS_SCOPE("flush"); CHECK_GL_ERROR_IF_DEBUG(); + + gpuStats.numFlushes++; gpuStats.numTrackedVertexArrays = (int)vai_.size(); diff --git a/GPU/GLES/FramebufferManagerGLES.cpp b/GPU/GLES/FramebufferManagerGLES.cpp index 5fcd307b93..14d37ceb9b 100644 --- a/GPU/GLES/FramebufferManagerGLES.cpp +++ b/GPU/GLES/FramebufferManagerGLES.cpp @@ -1089,7 +1089,7 @@ void FramebufferManagerGLES::PackFramebufferSync_(VirtualFramebuffer *vfb, int x if (gl_extensions.GLES3 && glInvalidateFramebuffer != nullptr) { #ifdef USING_GLES2 // GLES3 doesn't support using GL_READ_FRAMEBUFFER here. - draw_->BindFramebufferAsRenderTarget(vfb->fbo); + draw_->BindFramebufferAsRenderTarget(vfb->fbo, { Draw::RPAction::DONT_CARE, Draw::RPAction::DONT_CARE }); const GLenum target = GL_FRAMEBUFFER; #else const GLenum target = GL_READ_FRAMEBUFFER; diff --git a/GPU/GLES/GPU_GLES.cpp b/GPU/GLES/GPU_GLES.cpp index 301ae7ff7c..0f34724d1f 100644 --- a/GPU/GLES/GPU_GLES.cpp +++ b/GPU/GLES/GPU_GLES.cpp @@ -660,7 +660,7 @@ void GPU_GLES::Execute_Prim(u32 op, u32 diff) { return; } - // This also makes skipping drawing very effective. + // This also makes skipping drawing very effective. This function can change the framebffer. framebufferManagerGL_->SetRenderFrameBuffer(gstate_c.IsDirty(DIRTY_FRAMEBUF), gstate_c.skipDrawReason); if (gstate_c.skipDrawReason & (SKIPDRAW_SKIPFRAME | SKIPDRAW_NON_DISPLAYED_FB)) { drawEngine_.SetupVertexDecoder(gstate.vertType); diff --git a/GPU/Software/SoftGpu.cpp b/GPU/Software/SoftGpu.cpp index 91fc88fb10..081d20f9fb 100644 --- a/GPU/Software/SoftGpu.cpp +++ b/GPU/Software/SoftGpu.cpp @@ -205,7 +205,7 @@ void SoftGPU::CopyToCurrentFboFromDisplayRam(int srcwidth, int srcheight) { u1 = 1.0f; } if (!hasImage) { - draw_->Clear(Draw::FB_COLOR_BIT, 0, 0, 0); + draw_->BindFramebufferAsRenderTarget(nullptr, { Draw::RPAction::CLEAR, Draw::RPAction::DONT_CARE }); return; } diff --git a/GPU/Vulkan/DrawEngineVulkan.cpp b/GPU/Vulkan/DrawEngineVulkan.cpp index 3d1bdce91d..5e0543b39e 100644 --- a/GPU/Vulkan/DrawEngineVulkan.cpp +++ b/GPU/Vulkan/DrawEngineVulkan.cpp @@ -666,6 +666,10 @@ void DrawEngineVulkan::DirtyAllUBOs() { // The inline wrapper in the header checks for numDrawCalls == 0d void DrawEngineVulkan::DoFlush() { VkCommandBuffer cmd = (VkCommandBuffer)draw_->GetNativeObject(Draw::NativeObject::RENDERPASS_COMMANDBUFFER); + VkRenderPass rp = (VkRenderPass)draw_->GetNativeObject(Draw::NativeObject::CURRENT_RENDERPASS); + if (!rp) + Crash(); + gpuStats.numFlushes++; FrameData *frame = &frame_[curFrame_ & 1]; @@ -847,7 +851,6 @@ void DrawEngineVulkan::DoFlush() { Uint8x4ToFloat4(bc, dynState.blendColor); vkCmdSetBlendConstants(cmd, bc); } - dirtyUniforms_ |= shaderManager_->UpdateUniforms(); shaderManager_->GetShaders(prim, lastVType_, &vshader, &fshader, useHWTransform); diff --git a/GPU/Vulkan/FramebufferVulkan.cpp b/GPU/Vulkan/FramebufferVulkan.cpp index 120ebb6f81..c050fd05eb 100644 --- a/GPU/Vulkan/FramebufferVulkan.cpp +++ b/GPU/Vulkan/FramebufferVulkan.cpp @@ -461,6 +461,9 @@ VkImageView FramebufferManagerVulkan::BindFramebufferAsColorTexture(int stage, V } // Currently rendering to this framebuffer. Need to make a copy. if (!skipCopy && framebuffer == currentRenderVfb_) { + // ignore this case for now, doesn't work + ILOG("Texturing from current render Vfb!"); + return VK_NULL_HANDLE; // TODO: Maybe merge with bvfbs_? Not sure if those could be packing, and they're created at a different size. Draw::Framebuffer *renderCopy = GetTempFBO(framebuffer->renderWidth, framebuffer->renderHeight, (Draw::FBColorDepth)framebuffer->colorDepth); if (renderCopy) { @@ -1036,6 +1039,8 @@ void FramebufferManagerVulkan::FlushBeforeCopy() { // all the irrelevant state checking it'll use to decide what to do. Should // do something more focused here. SetRenderFrameBuffer(gstate_c.IsDirty(DIRTY_FRAMEBUF), gstate_c.skipDrawReason); + if (!draw_->GetNativeObject(Draw::NativeObject::CURRENT_RENDERPASS)) + Crash(); drawEngine_->Flush(); } diff --git a/GPU/Vulkan/GPU_Vulkan.cpp b/GPU/Vulkan/GPU_Vulkan.cpp index 54fbb57f51..7fdbcbc694 100644 --- a/GPU/Vulkan/GPU_Vulkan.cpp +++ b/GPU/Vulkan/GPU_Vulkan.cpp @@ -489,6 +489,8 @@ void GPU_Vulkan::Execute_Prim(u32 op, u32 diff) { // This also makes skipping drawing very effective. framebufferManager_->SetRenderFrameBuffer(gstate_c.IsDirty(DIRTY_FRAMEBUF), gstate_c.skipDrawReason); + if (!draw_->GetNativeObject(Draw::NativeObject::CURRENT_RENDERPASS)) + Crash(); if (gstate_c.skipDrawReason & (SKIPDRAW_SKIPFRAME | SKIPDRAW_NON_DISPLAYED_FB)) { drawEngine_.SetupVertexDecoder(gstate.vertType); diff --git a/GPU/Vulkan/PipelineManagerVulkan.cpp b/GPU/Vulkan/PipelineManagerVulkan.cpp index ec66be9d30..25478bcdd4 100644 --- a/GPU/Vulkan/PipelineManagerVulkan.cpp +++ b/GPU/Vulkan/PipelineManagerVulkan.cpp @@ -294,6 +294,9 @@ static VulkanPipeline *CreateVulkanPipeline(VkDevice device, VkPipelineCache pip VulkanPipeline *PipelineManagerVulkan::GetOrCreatePipeline(VkPipelineLayout layout, VkRenderPass renderPass, const VulkanPipelineRasterStateKey &rasterKey, const VertexDecoder *vtxDec, VulkanVertexShader *vs, VulkanFragmentShader *fs, bool useHwTransform) { VulkanPipelineKey key; + if (!renderPass) + Crash(); + key.raster = rasterKey; key.renderPass = renderPass; key.useHWTransform = useHwTransform; diff --git a/GPU/Vulkan/ShaderManagerVulkan.cpp b/GPU/Vulkan/ShaderManagerVulkan.cpp index ec2d62baf3..ad83041dd3 100644 --- a/GPU/Vulkan/ShaderManagerVulkan.cpp +++ b/GPU/Vulkan/ShaderManagerVulkan.cpp @@ -16,7 +16,7 @@ // https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. #ifdef _WIN32 -#define SHADERLOG +//#define SHADERLOG #endif #include diff --git a/GPU/Vulkan/TextureCacheVulkan.cpp b/GPU/Vulkan/TextureCacheVulkan.cpp index 239263c9eb..dc833ba1d1 100644 --- a/GPU/Vulkan/TextureCacheVulkan.cpp +++ b/GPU/Vulkan/TextureCacheVulkan.cpp @@ -612,6 +612,7 @@ void TextureCacheVulkan::BuildTexture(TexCacheEntry *const entry, bool replaceIm entry->vkTex = nullptr; } } else { + entry->vkTex->texture_->TransitionForUpload(); // TODO: If reusing an existing texture object, we must transition it into the correct layout. } lastBoundTexture = entry->vkTex; diff --git a/UI/EmuScreen.cpp b/UI/EmuScreen.cpp index b65fef4099..3fadcc164b 100644 --- a/UI/EmuScreen.cpp +++ b/UI/EmuScreen.cpp @@ -960,7 +960,7 @@ void EmuScreen::preRender() { // We do, however, start the frame in other ways. bool useBufferedRendering = g_Config.iRenderingMode != FB_NON_BUFFERED_MODE; - if (!useBufferedRendering) { + if (!useBufferedRendering && !g_Config.bSoftwareRendering) { // We need to clear here already so that drawing during the frame is done on a clean slate. DrawContext *draw = screenManager()->getDrawContext(); draw->BindFramebufferAsRenderTarget(nullptr, { RPAction::CLEAR, RPAction::CLEAR, 0xFF000000 }); diff --git a/UI/GameSettingsScreen.cpp b/UI/GameSettingsScreen.cpp index 4171c7bdf5..15902fe0b9 100644 --- a/UI/GameSettingsScreen.cpp +++ b/UI/GameSettingsScreen.cpp @@ -187,7 +187,7 @@ void GameSettingsScreen::CreateViews() { renderingBackendChoice->HideChoice(2); // D3D11 } #endif -#if !defined(_WIN32) +#if !defined(_WIN32) && !PPSSPP_PLATFORM(ANDROID) // TODO: Add dynamic runtime check for Vulkan support on Android renderingBackendChoice->HideChoice(3); #endif diff --git a/UI/MiscScreens.cpp b/UI/MiscScreens.cpp index 6d1a0d9148..2a3a927a4f 100644 --- a/UI/MiscScreens.cpp +++ b/UI/MiscScreens.cpp @@ -510,7 +510,7 @@ void LogoScreen::render() { dc.DrawTextShadow(boot_filename.c_str(), bounds.centerX(), bounds.centerY() + 180, textColor, ALIGN_CENTER); } -#if defined(_WIN32) && !PPSSPP_PLATFORM(UWP) +#if (defined(_WIN32) && !PPSSPP_PLATFORM(UWP)) || PPSSPP_PLATFORM(ANDROID) // Draw the graphics API, except on UWP where it's always D3D11 dc.DrawText(screenManager()->getDrawContext()->GetInfoString(InfoField::APINAME).c_str(), bounds.centerX(), bounds.y2() - 100, textColor, ALIGN_CENTER); #endif diff --git a/ext/native/thin3d/thin3d_gl.cpp b/ext/native/thin3d/thin3d_gl.cpp index 4c9164ab6b..c7abf824ff 100644 --- a/ext/native/thin3d/thin3d_gl.cpp +++ b/ext/native/thin3d/thin3d_gl.cpp @@ -1557,7 +1557,7 @@ void OpenGLContext::BindFramebufferAsRenderTarget(Framebuffer *fbo, const Render glstate.colorMask.force(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); } if (rp.depth == RPAction::CLEAR) { - glClearDepth(rp.clearDepth); + glClearDepthf(rp.clearDepth); glClearStencil(rp.clearStencil); clearFlags |= GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT; glstate.depthWrite.force(GL_TRUE); diff --git a/ext/native/thin3d/thin3d_vulkan.cpp b/ext/native/thin3d/thin3d_vulkan.cpp index 1c35ba395b..983725ea60 100644 --- a/ext/native/thin3d/thin3d_vulkan.cpp +++ b/ext/native/thin3d/thin3d_vulkan.cpp @@ -521,7 +521,7 @@ private: VkCommandPool cmdPool_; VkCommandBuffer cmdBufs[MAX_FRAME_COMMAND_BUFFERS]; int startCmdBufs_; - int numCmdBufs_; + int numCmdBufs; // Per-frame descriptor set cache. As it's per frame and reset every frame, we don't need to // worry about invalidating descriptors pointing to deleted textures. @@ -847,25 +847,27 @@ VKContext::~VKContext() { // Effectively wiped every frame, just allocate new ones! VkCommandBuffer VKContext::AllocCmdBuf() { FrameData *frame = &frame_[frameNum_ & 1]; - if (frame->cmdBufs[frame->numCmdBufs_]) { - VkCommandBuffer cmdBuf = frame->cmdBufs[frame->numCmdBufs_++]; + + if (frame->numCmdBufs >= this->MAX_FRAME_COMMAND_BUFFERS) + Crash(); + + if (frame->cmdBufs[frame->numCmdBufs]) { + VkCommandBuffer cmdBuf = frame->cmdBufs[frame->numCmdBufs++]; if (!cmdBuf) Crash(); return cmdBuf; } - if (frame->numCmdBufs_ >= this->MAX_FRAME_COMMAND_BUFFERS) - Crash(); - VkCommandBufferAllocateInfo alloc{ VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO }; alloc.commandBufferCount = 1; alloc.commandPool = frame->cmdPool_; alloc.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY; - VkResult result = vkAllocateCommandBuffers(vulkan_->GetDevice(), &alloc, &frame->cmdBufs[frame->numCmdBufs_]); + VkResult result = vkAllocateCommandBuffers(vulkan_->GetDevice(), &alloc, &frame->cmdBufs[frame->numCmdBufs]); assert(result == VK_SUCCESS); - if (!frame->cmdBufs[frame->numCmdBufs_]) + VkCommandBuffer cmdBuf = frame->cmdBufs[frame->numCmdBufs++]; + if (!cmdBuf) Crash(); - return frame->cmdBufs[frame->numCmdBufs_++]; + return cmdBuf; } void VKContext::BeginFrame() { @@ -873,7 +875,7 @@ void VKContext::BeginFrame() { FrameData &frame = frame_[frameNum_ & 1]; frame.startCmdBufs_ = 0; - frame.numCmdBufs_ = 0; + frame.numCmdBufs = 0; vkResetCommandPool(vulkan_->GetDevice(), frame.cmdPool_, 0); push_ = frame.pushBuffer; @@ -897,24 +899,29 @@ void VKContext::WaitRenderCompletion(Framebuffer *fbo) { void VKContext::EndFrame() { if (curRenderPass_) { + ELOG("EndFrame: Ending render pass"); vulkan_->EndSurfaceRenderPass(); curRenderPass_ = VK_NULL_HANDLE; curFramebuffer_ = VK_NULL_HANDLE; + cmd_ = nullptr; } + if (cmd_) + Crash(); + // Cap off and submit all the command buffers we recorded during the frame. FrameData &frame = frame_[frameNum_ & 1]; - for (int i = frame.startCmdBufs_; i < frame.numCmdBufs_; i++) { + for (int i = frame.startCmdBufs_; i < frame.numCmdBufs; i++) { vkEndCommandBuffer(frame.cmdBufs[i]); vulkan_->QueueBeforeSurfaceRender(frame.cmdBufs[i]); } + frame.startCmdBufs_ = frame.numCmdBufs; // Stop collecting data in the frame's data pushbuffer. push_->End(); vulkan_->EndFrame(); frameNum_++; - cmd_ = nullptr; // will be set on the next begin push_ = nullptr; DirtyDynamicState(); @@ -1583,9 +1590,11 @@ bool VKContext::BlitFramebuffer(Framebuffer *srcfb, int srcX1, int srcY1, int sr void VKContext::EndCurrentRenderpass() { if (curRenderPass_ != VK_NULL_HANDLE) { + // ELOG("EndCurrentRenderPass: Ending render pass %d for cmd buffer %x", (int)(uintptr_t)curRenderPass_, (int)(uintptr_t)cmd_); vkCmdEndRenderPass(cmd_); curRenderPass_ = VK_NULL_HANDLE; curFramebuffer_ = VK_NULL_HANDLE; + cmd_ = VK_NULL_HANDLE; } } @@ -1611,6 +1620,10 @@ void VKContext::BindFramebufferAsRenderTarget(Framebuffer *fbo, const RenderPass } if (framebuf == curFramebuffer_) { + if (framebuf == 0) + Crash(); + if (!curRenderPass_) + Crash(); // If we're asking to clear, but already bound, we'll just keep it bound but send a clear command. // We will try to avoid this as much as possible. Also, TODO, do a single vkCmdClearAttachments to clear both. if (rp.color == RPAction::CLEAR) { @@ -1627,7 +1640,7 @@ void VKContext::BindFramebufferAsRenderTarget(Framebuffer *fbo, const RenderPass clear.clearValue.depthStencil.depth = rp.clearDepth; clear.clearValue.depthStencil.stencil = rp.clearStencil; clear.colorAttachment = 0; - VkClearRect rc{ { 0,0,w,h }, 0, 1 }; + VkClearRect rc{ { 0,0,(uint32_t)w,(uint32_t)h }, 0, 1 }; vkCmdClearAttachments(cmdBuf, 1, &clear, 1, &rc); } // We're done. @@ -1636,7 +1649,9 @@ void VKContext::BindFramebufferAsRenderTarget(Framebuffer *fbo, const RenderPass // OK, we're switching framebuffers. EndCurrentRenderpass(); - + VkRenderPass renderPass; + int numClearVals = 0; + VkClearValue clearVal[2] = {}; if (fbo) { VKFramebuffer *fb = (VKFramebuffer *)fbo; fb->cmdBuf = AllocCmdBuf(); @@ -1646,7 +1661,8 @@ void VKContext::BindFramebufferAsRenderTarget(Framebuffer *fbo, const RenderPass cmd_ = fb->cmdBuf; VkCommandBufferBeginInfo begin{ VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO }; begin.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; - vkBeginCommandBuffer(cmd_, &begin); + VkResult res = vkBeginCommandBuffer(cmd_, &begin); + assert(res == VK_SUCCESS); // Now, if the image needs transitioning, let's transition. // The backbuffer does not, that's handled by VulkanContext. VkImageMemoryBarrier barrier{}; @@ -1666,28 +1682,26 @@ void VKContext::BindFramebufferAsRenderTarget(Framebuffer *fbo, const RenderPass barrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; vkCmdPipelineBarrier(cmd_, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, 0, 0, 0, 1, &barrier); fb->color.layout = barrier.newLayout; + renderPass = renderPasses_[RPIndex(rp.color, rp.depth)]; + // ILOG("Switching framebuffer to FBO (fc=%d, cmd=%x, rp=%x)", frameNum_, (int)(uintptr_t)cmd_, (int)(uintptr_t)renderPass); + if (rp.color == RPAction::CLEAR) { + Uint8x4ToFloat4(rp.clearColor, clearVal[0].color.float32); + numClearVals = 1; + } + if (rp.depth == RPAction::CLEAR) { + clearVal[1].depthStencil.depth = rp.clearDepth; + clearVal[1].depthStencil.stencil = rp.clearStencil; + numClearVals = 2; + } } else { cmd_ = vulkan_->GetSurfaceCommandBuffer(); - } - - int numClearVals = 0; - VkClearValue clearVal[2] = {}; - if (rp.color == RPAction::CLEAR) { - Uint8x4ToFloat4(rp.clearColor, clearVal[numClearVals].color.float32); - numClearVals++; - } - if (rp.depth == RPAction::CLEAR) { - clearVal[numClearVals].depthStencil.depth = rp.clearDepth; - clearVal[numClearVals].depthStencil.stencil = rp.clearStencil; - numClearVals++; + renderPass = vulkan_->GetSurfaceRenderPass(); + // ILOG("Switching framebuffer to backbuffer (cmd=%x)", (int)(uintptr_t)cmd_); + numClearVals = 2; } VkRenderPassBeginInfo rp_begin = { VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO }; - if (fbo) { - rp_begin.renderPass = renderPasses_[RPIndex(rp.color, rp.depth)]; - } else { - rp_begin.renderPass = vulkan_->GetSurfaceRenderPass(); - } + rp_begin.renderPass = renderPass; rp_begin.framebuffer = framebuf; rp_begin.renderArea.offset.x = 0; rp_begin.renderArea.offset.y = 0; @@ -1697,7 +1711,7 @@ void VKContext::BindFramebufferAsRenderTarget(Framebuffer *fbo, const RenderPass rp_begin.pClearValues = numClearVals ? clearVal : nullptr; vkCmdBeginRenderPass(cmd_, &rp_begin, VK_SUBPASS_CONTENTS_INLINE); curFramebuffer_ = framebuf; - curRenderPass_ = rp_begin.renderPass; + curRenderPass_ = renderPass; curWidth_ = w; curHeight_ = h; } From b1c256a296b041280a5e585fe93069bfc85e2ab2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Mon, 22 May 2017 14:54:09 +0200 Subject: [PATCH 18/25] Increase the number of sampler/image descriptors further. --- GPU/Vulkan/DrawEngineVulkan.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GPU/Vulkan/DrawEngineVulkan.cpp b/GPU/Vulkan/DrawEngineVulkan.cpp index 5e0543b39e..2d18e4b32f 100644 --- a/GPU/Vulkan/DrawEngineVulkan.cpp +++ b/GPU/Vulkan/DrawEngineVulkan.cpp @@ -149,7 +149,7 @@ void DrawEngineVulkan::InitDeviceObjects() { VkDescriptorPoolSize dpTypes[2]; dpTypes[0].descriptorCount = 2048; dpTypes[0].type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC; - dpTypes[1].descriptorCount = 2048; + dpTypes[1].descriptorCount = 4096; dpTypes[1].type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; VkDescriptorPoolCreateInfo dp = { VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO }; From 38b50501c2a85b5315b038dd32a703ed6f87b399 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Mon, 22 May 2017 15:29:14 +0200 Subject: [PATCH 19/25] Clearing fix, drawpixels fix/hack --- Common/Vulkan/VulkanContext.cpp | 2 +- GPU/Vulkan/FramebufferVulkan.cpp | 11 ++++++----- GPU/Vulkan/FramebufferVulkan.h | 8 ++++---- ext/native/thin3d/thin3d_vulkan.cpp | 14 +++++++------- 4 files changed, 18 insertions(+), 17 deletions(-) diff --git a/Common/Vulkan/VulkanContext.cpp b/Common/Vulkan/VulkanContext.cpp index 476b657d89..7ef8c7ac88 100644 --- a/Common/Vulkan/VulkanContext.cpp +++ b/Common/Vulkan/VulkanContext.cpp @@ -257,7 +257,7 @@ VkCommandBuffer VulkanContext::BeginSurfaceRenderPass(VkClearValue clear_values[ void VulkanContext::EndSurfaceRenderPass() { FrameData *frame = &frame_[curFrame_]; - ILOG("VulkanContext::EndSurfaceRenderPass"); + // ILOG("VulkanContext::EndSurfaceRenderPass"); vkCmdEndRenderPass(frame->cmdBuf); } diff --git a/GPU/Vulkan/FramebufferVulkan.cpp b/GPU/Vulkan/FramebufferVulkan.cpp index c050fd05eb..7c19ad7c7e 100644 --- a/GPU/Vulkan/FramebufferVulkan.cpp +++ b/GPU/Vulkan/FramebufferVulkan.cpp @@ -90,8 +90,6 @@ FramebufferManagerVulkan::FramebufferManagerVulkan(Draw::DrawContext *draw, Vulk pixelBufObj_(nullptr), currentPBO_(0), curFrame_(0), - pipelineBasicTexBackBuffer_(VK_NULL_HANDLE), - pipelineBasicTexFrameBuffer_(VK_NULL_HANDLE), pipelinePostShader_(VK_NULL_HANDLE), vulkan2D_(vulkan) { @@ -300,6 +298,8 @@ void FramebufferManagerVulkan::MakePixelTexture(const u8 *srcPixels, GEBufferFor size_t offset = frameData_[curFrame_].push_->Push(data, width * height * 4, &buffer); drawPixelsTex_->UploadMip(0, width, height, buffer, (uint32_t)offset, width); drawPixelsTex_->EndCreate(); + + overrideImageView_ = drawPixelsTex_->GetImageView(); } void FramebufferManagerVulkan::SetViewport2D(int x, int y, int w, int h) { @@ -359,8 +359,8 @@ void FramebufferManagerVulkan::DrawActiveTexture(float x, float y, float w, floa VkCommandBuffer cmd = (VkCommandBuffer)draw_->GetNativeObject(Draw::NativeObject::RENDERPASS_COMMANDBUFFER); - // TODO: Choose linear or nearest appropriately, see GL impl. - VkImageView view = (VkImageView)draw_->GetNativeObject(Draw::NativeObject::BOUND_TEXTURE_IMAGEVIEW); + VkImageView view = overrideImageView_ ? overrideImageView_ : (VkImageView)draw_->GetNativeObject(Draw::NativeObject::BOUND_TEXTURE_IMAGEVIEW); + overrideImageView_ = VK_NULL_HANDLE; vulkan2D_.BindDescriptorSet(cmd, view, linearFilter ? linearSampler_ : nearestSampler_); vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, cur2DPipeline_); VkBuffer vbuffer; @@ -462,8 +462,9 @@ VkImageView FramebufferManagerVulkan::BindFramebufferAsColorTexture(int stage, V // Currently rendering to this framebuffer. Need to make a copy. if (!skipCopy && framebuffer == currentRenderVfb_) { // ignore this case for now, doesn't work - ILOG("Texturing from current render Vfb!"); + // ILOG("Texturing from current render Vfb!"); return VK_NULL_HANDLE; + // TODO: Maybe merge with bvfbs_? Not sure if those could be packing, and they're created at a different size. Draw::Framebuffer *renderCopy = GetTempFBO(framebuffer->renderWidth, framebuffer->renderHeight, (Draw::FBColorDepth)framebuffer->colorDepth); if (renderCopy) { diff --git a/GPU/Vulkan/FramebufferVulkan.h b/GPU/Vulkan/FramebufferVulkan.h index a991996adf..140430c7b7 100644 --- a/GPU/Vulkan/FramebufferVulkan.h +++ b/GPU/Vulkan/FramebufferVulkan.h @@ -183,11 +183,8 @@ private: // Basic shaders VkShaderModule fsBasicTex_; VkShaderModule vsBasicTex_; - // Might need different pipelines for rendering to backbuffer vs framebuffers due to color format incompatibility - VkPipeline pipelineBasicTexBackBuffer_; - VkPipeline pipelineBasicTexFrameBuffer_; - VkPipeline cur2DPipeline_; + VkPipeline cur2DPipeline_ = VK_NULL_HANDLE; // Postprocessing VkPipeline pipelinePostShader_; @@ -195,6 +192,9 @@ private: VkSampler linearSampler_; VkSampler nearestSampler_; + // hack! + VkImageView overrideImageView_ = VK_NULL_HANDLE; + // Simple 2D drawing engine. Vulkan2D vulkan2D_; }; diff --git a/ext/native/thin3d/thin3d_vulkan.cpp b/ext/native/thin3d/thin3d_vulkan.cpp index 983725ea60..cf8db31a78 100644 --- a/ext/native/thin3d/thin3d_vulkan.cpp +++ b/ext/native/thin3d/thin3d_vulkan.cpp @@ -510,7 +510,7 @@ private: enum { MAX_BOUND_TEXTURES = 1, - MAX_FRAME_COMMAND_BUFFERS = 128, + MAX_FRAME_COMMAND_BUFFERS = 256, }; VKTexture *boundTextures_[MAX_BOUND_TEXTURES]; VKSamplerState *boundSamplers_[MAX_BOUND_TEXTURES]; @@ -818,9 +818,9 @@ VKContext::VKContext(VulkanContext *vulkan) } for (int color = 0; color < 3; color++) { switch ((RPAction)color) { - case RPAction::CLEAR: attachments[1].loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR; break; - case RPAction::KEEP: attachments[1].loadOp = VK_ATTACHMENT_LOAD_OP_LOAD; break; - case RPAction::DONT_CARE: attachments[1].loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; break; + case RPAction::CLEAR: attachments[0].loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR; break; + case RPAction::KEEP: attachments[0].loadOp = VK_ATTACHMENT_LOAD_OP_LOAD; break; + case RPAction::DONT_CARE: attachments[0].loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; break; } vkCreateRenderPass(vulkan_->GetDevice(), &rp, nullptr, &renderPasses_[RPIndex((RPAction)color, (RPAction)depth)]); } @@ -899,7 +899,7 @@ void VKContext::WaitRenderCompletion(Framebuffer *fbo) { void VKContext::EndFrame() { if (curRenderPass_) { - ELOG("EndFrame: Ending render pass"); + // ELOG("EndFrame: Ending render pass"); vulkan_->EndSurfaceRenderPass(); curRenderPass_ = VK_NULL_HANDLE; curFramebuffer_ = VK_NULL_HANDLE; @@ -1651,7 +1651,8 @@ void VKContext::BindFramebufferAsRenderTarget(Framebuffer *fbo, const RenderPass EndCurrentRenderpass(); VkRenderPass renderPass; int numClearVals = 0; - VkClearValue clearVal[2] = {}; + VkClearValue clearVal[2]; + memset(clearVal, 0, sizeof(clearVal)); if (fbo) { VKFramebuffer *fb = (VKFramebuffer *)fbo; fb->cmdBuf = AllocCmdBuf(); @@ -1696,7 +1697,6 @@ void VKContext::BindFramebufferAsRenderTarget(Framebuffer *fbo, const RenderPass } else { cmd_ = vulkan_->GetSurfaceCommandBuffer(); renderPass = vulkan_->GetSurfaceRenderPass(); - // ILOG("Switching framebuffer to backbuffer (cmd=%x)", (int)(uintptr_t)cmd_); numClearVals = 2; } From a95b11c58fb3c6067afdc7087a2aff8dca30ad15 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Mon, 22 May 2017 16:16:09 +0200 Subject: [PATCH 20/25] Don't forget to initialize logicop.. --- GPU/Vulkan/DrawEngineVulkan.cpp | 1 - GPU/Vulkan/StateMappingVulkan.cpp | 3 +++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/GPU/Vulkan/DrawEngineVulkan.cpp b/GPU/Vulkan/DrawEngineVulkan.cpp index 2d18e4b32f..e8438fb2b7 100644 --- a/GPU/Vulkan/DrawEngineVulkan.cpp +++ b/GPU/Vulkan/DrawEngineVulkan.cpp @@ -168,7 +168,6 @@ void DrawEngineVulkan::InitDeviceObjects() { if (res == VK_SUCCESS) { break; } - // Let's try to reduce the counts. assert(res == VK_ERROR_OUT_OF_HOST_MEMORY || res == VK_ERROR_OUT_OF_DEVICE_MEMORY); dpTypes[0].descriptorCount /= 2; diff --git a/GPU/Vulkan/StateMappingVulkan.cpp b/GPU/Vulkan/StateMappingVulkan.cpp index af3fe9cc5a..e37c80ac06 100644 --- a/GPU/Vulkan/StateMappingVulkan.cpp +++ b/GPU/Vulkan/StateMappingVulkan.cpp @@ -152,6 +152,9 @@ void DrawEngineVulkan::ConvertStateToVulkanKey(FramebufferManagerVulkan &fbManag key.colorWriteMask = (colorMask ? (VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT) : 0) | (alphaMask ? VK_COLOR_COMPONENT_A_BIT : 0); } else { + key.logicOpEnable = false; + key.logicOp = VK_LOGIC_OP_CLEAR; + // Set blend - unless we need to do it in the shader. GenericBlendState blendState; ConvertBlendState(blendState, gstate_c.allowShaderBlend); From f762285b9c4896b0ee63d13488659c6bc0ffa782 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Tue, 23 May 2017 10:26:49 +0200 Subject: [PATCH 21/25] Vulkan: Some barrier optimization --- ext/native/thin3d/thin3d_vulkan.cpp | 48 ++++++++++++++++++----------- 1 file changed, 30 insertions(+), 18 deletions(-) diff --git a/ext/native/thin3d/thin3d_vulkan.cpp b/ext/native/thin3d/thin3d_vulkan.cpp index cf8db31a78..cb9459bc2b 100644 --- a/ext/native/thin3d/thin3d_vulkan.cpp +++ b/ext/native/thin3d/thin3d_vulkan.cpp @@ -1666,23 +1666,29 @@ void VKContext::BindFramebufferAsRenderTarget(Framebuffer *fbo, const RenderPass assert(res == VK_SUCCESS); // Now, if the image needs transitioning, let's transition. // The backbuffer does not, that's handled by VulkanContext. - VkImageMemoryBarrier barrier{}; - barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; - barrier.oldLayout = fb->color.layout; - barrier.subresourceRange.layerCount = 1; - barrier.subresourceRange.levelCount = 1; - barrier.image = fb->color.image; - barrier.srcAccessMask = 0; - switch (fb->color.layout) { - case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL: - barrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT; - break; + if (fb->color.layout != VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL) { + VkImageMemoryBarrier barrier{}; + barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; + barrier.oldLayout = fb->color.layout; + barrier.subresourceRange.layerCount = 1; + barrier.subresourceRange.levelCount = 1; + barrier.image = fb->color.image; + barrier.srcAccessMask = 0; + switch (fb->color.layout) { + case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL: + barrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT; + break; + case VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL: + barrier.srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT; + break; + } + barrier.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + barrier.newLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; + barrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + // TODO: Optimize these flags. + vkCmdPipelineBarrier(cmd_, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, 0, 0, 0, 0, 0, 1, &barrier); + fb->color.layout = barrier.newLayout; } - barrier.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; - barrier.newLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; - barrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; - vkCmdPipelineBarrier(cmd_, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, 0, 0, 0, 1, &barrier); - fb->color.layout = barrier.newLayout; renderPass = renderPasses_[RPIndex(rp.color, rp.depth)]; // ILOG("Switching framebuffer to FBO (fc=%d, cmd=%x, rp=%x)", frameNum_, (int)(uintptr_t)cmd_, (int)(uintptr_t)renderPass); if (rp.color == RPAction::CLEAR) { @@ -1719,6 +1725,11 @@ void VKContext::BindFramebufferAsRenderTarget(Framebuffer *fbo, const RenderPass // color must be 0, for now. void VKContext::BindFramebufferAsTexture(Framebuffer *fbo, int binding, FBChannel channelBit, int attachment) { VKFramebuffer *fb = (VKFramebuffer *)fbo; + boundImageView_[0] = fb->color.view; + // If we already have the right layout, nothing else to do. + if (fb->color.layout == VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL) + return; + VkCommandBuffer transitionCmdBuf; if (fb->cmdBuf && fb->frameCount == frameNum_) { // If the framebuffer has a "live" command buffer, we can directly use it to transition it for sampling. @@ -1746,10 +1757,11 @@ void VKContext::BindFramebufferAsTexture(Framebuffer *fbo, int binding, FBChanne barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; barrier.newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; barrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + // we're between passes so it's OK. - vkCmdPipelineBarrier(transitionCmdBuf, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, 0, 0, 0, 1, &barrier); + // ARM Best Practices guide recommends these stage bits. + vkCmdPipelineBarrier(transitionCmdBuf, VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, 0, 0, 0, 0, 1, &barrier); fb->color.layout = barrier.newLayout; - boundImageView_[0] = fb->color.view; } void VKContext::BindFramebufferForRead(Framebuffer *fbo) { /* noop */ } From e8890e3c4a42319ac5adb18a6317f11bbf9132ff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Tue, 23 May 2017 11:12:10 +0200 Subject: [PATCH 22/25] Address a bunch of review comments. --- GPU/Common/FramebufferCommon.cpp | 5 ++++- GPU/Common/FramebufferCommon.h | 1 + GPU/D3D11/FramebufferManagerD3D11.cpp | 5 ++--- GPU/D3D11/TextureCacheD3D11.cpp | 2 +- GPU/GLES/FramebufferManagerGLES.cpp | 6 +----- GPU/GLES/StencilBufferGLES.cpp | 4 ++-- GPU/Vulkan/FramebufferVulkan.cpp | 7 +++++-- 7 files changed, 16 insertions(+), 14 deletions(-) diff --git a/GPU/Common/FramebufferCommon.cpp b/GPU/Common/FramebufferCommon.cpp index c2007df6bb..61fa27eeb0 100644 --- a/GPU/Common/FramebufferCommon.cpp +++ b/GPU/Common/FramebufferCommon.cpp @@ -1162,7 +1162,10 @@ void FramebufferManagerCommon::ResizeFramebufFBO(VirtualFramebuffer *vfb, u16 w, BlitFramebuffer(vfb, 0, 0, &old, 0, 0, std::min(vfb->bufferWidth, vfb->width), std::min(vfb->height, vfb->bufferHeight), 0); } } - delete old.fbo; + delete old.fbo; + if (needGLESRebinds_) { + draw_->BindFramebufferAsRenderTarget(vfb->fbo, { Draw::RPAction::KEEP, Draw::RPAction::KEEP }); + } } else { draw_->BindFramebufferAsRenderTarget(vfb->fbo, { Draw::RPAction::CLEAR, Draw::RPAction::CLEAR }); } diff --git a/GPU/Common/FramebufferCommon.h b/GPU/Common/FramebufferCommon.h index 11f339073c..0530e6480a 100644 --- a/GPU/Common/FramebufferCommon.h +++ b/GPU/Common/FramebufferCommon.h @@ -375,6 +375,7 @@ protected: // Used by post-processing shaders std::vector extraFBOs_; + bool needGLESRebinds_ = false; struct TempFBO { Draw::Framebuffer *fbo; diff --git a/GPU/D3D11/FramebufferManagerD3D11.cpp b/GPU/D3D11/FramebufferManagerD3D11.cpp index 3d2cdf774f..b5423dc9dc 100644 --- a/GPU/D3D11/FramebufferManagerD3D11.cpp +++ b/GPU/D3D11/FramebufferManagerD3D11.cpp @@ -490,10 +490,9 @@ void FramebufferManagerD3D11::ReformatFramebufferFrom(VirtualFramebuffer *vfb, G // The best way to do this may ultimately be to create a new FBO (combine with any resize?) // and blit with a shader to that, then replace the FBO on vfb. Stencil would still be complex // to exactly reproduce in 4444 and 8888 formats. - - draw_->BindFramebufferAsRenderTarget(vfb->fbo, { Draw::RPAction::CLEAR, Draw::RPAction::KEEP }); - if (old == GE_FORMAT_565) { + draw_->BindFramebufferAsRenderTarget(vfb->fbo, { Draw::RPAction::CLEAR, Draw::RPAction::KEEP }); + // TODO: There's no way this does anything useful :( context_->OMSetDepthStencilState(stockD3D11.depthDisabledStencilWrite, 0xFF); context_->OMSetBlendState(stockD3D11.blendStateDisabledWithColorMask[0], nullptr, 0xFFFFFFFF); diff --git a/GPU/D3D11/TextureCacheD3D11.cpp b/GPU/D3D11/TextureCacheD3D11.cpp index 8ceec37249..344fc10a1e 100644 --- a/GPU/D3D11/TextureCacheD3D11.cpp +++ b/GPU/D3D11/TextureCacheD3D11.cpp @@ -434,7 +434,7 @@ void TextureCacheD3D11::ApplyTextureFramebuffer(TexCacheEntry *entry, VirtualFra context_->PSSetShaderResources(1, 1, &clutTexture); framebufferManagerD3D11_->BindFramebufferAsColorTexture(0, framebuffer, BINDFBCOLOR_SKIP_COPY); context_->PSSetSamplers(0, 1, &stockD3D11.samplerPoint2DWrap); - draw_->BindFramebufferAsRenderTarget(depalFBO, { Draw::RPAction::DONT_CARE }); + draw_->BindFramebufferAsRenderTarget(depalFBO, { Draw::RPAction::DONT_CARE, Draw::RPAction::DONT_CARE }); shaderApply.Shade(); framebufferManagerD3D11_->RebindFramebuffer(); diff --git a/GPU/GLES/FramebufferManagerGLES.cpp b/GPU/GLES/FramebufferManagerGLES.cpp index 14d37ceb9b..4af5f74d07 100644 --- a/GPU/GLES/FramebufferManagerGLES.cpp +++ b/GPU/GLES/FramebufferManagerGLES.cpp @@ -93,11 +93,6 @@ void FramebufferManagerGLES::ClearBuffer(bool keepState) { #endif glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT); if (keepState) { - glstate.scissorTest.force(false); - glstate.depthWrite.force(GL_TRUE); - glstate.colorMask.force(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); - glstate.stencilFunc.force(GL_ALWAYS, 0, 0); - glstate.stencilMask.force(0xFF); glstate.scissorTest.restore(); glstate.depthWrite.restore(); glstate.colorMask.restore(); @@ -239,6 +234,7 @@ FramebufferManagerGLES::FramebufferManagerGLES(Draw::DrawContext *draw) : currentPBO_(0) { needBackBufferYSwap_ = true; + needGLESRebinds_ = true; } void FramebufferManagerGLES::Init() { diff --git a/GPU/GLES/StencilBufferGLES.cpp b/GPU/GLES/StencilBufferGLES.cpp index a595bb19bc..ba5d34a3fa 100644 --- a/GPU/GLES/StencilBufferGLES.cpp +++ b/GPU/GLES/StencilBufferGLES.cpp @@ -183,9 +183,9 @@ bool FramebufferManagerGLES::NotifyStencilUpload(u32 addr, int size, bool skipZe Draw::Framebuffer *blitFBO = nullptr; if (useBlit) { blitFBO = GetTempFBO(w, h, Draw::FBO_8888); - draw_->BindFramebufferAsRenderTarget(blitFBO, { Draw::RPAction::CLEAR, Draw::RPAction::DONT_CARE }); + draw_->BindFramebufferAsRenderTarget(blitFBO, { Draw::RPAction::DONT_CARE, Draw::RPAction::DONT_CARE }); } else if (dstBuffer->fbo) { - draw_->BindFramebufferAsRenderTarget(dstBuffer->fbo, { Draw::RPAction::KEEP, Draw::RPAction::DONT_CARE }); + draw_->BindFramebufferAsRenderTarget(dstBuffer->fbo, { Draw::RPAction::KEEP, Draw::RPAction::CLEAR }); } glViewport(0, 0, w, h); diff --git a/GPU/Vulkan/FramebufferVulkan.cpp b/GPU/Vulkan/FramebufferVulkan.cpp index 7c19ad7c7e..2fa7c96208 100644 --- a/GPU/Vulkan/FramebufferVulkan.cpp +++ b/GPU/Vulkan/FramebufferVulkan.cpp @@ -176,6 +176,8 @@ void FramebufferManagerVulkan::NotifyClear(bool clearColor, bool clearAlpha, boo CenterDisplayOutputRect(&x, &y, &w, &h, 480.0f, 272.0f, (float)pixelWidth_, (float)pixelHeight_, ROTATION_LOCKED_HORIZONTAL); int mask = 0; + // The Clear detection takes care of doing a regular draw instead if separate masking + // of color and alpha is needed, so we can just treat them as the same. if (clearColor || clearAlpha) mask |= Draw::FBChannel::FB_COLOR_BIT; if (clearDepth) @@ -481,8 +483,8 @@ VkImageView FramebufferManagerVulkan::BindFramebufferAsColorTexture(int stage, V draw_->BindFramebufferAsTexture(framebuffer->fbo, stage, Draw::FB_COLOR_BIT, 0); return (VkImageView)draw_->GetNativeObject(Draw::NativeObject::BOUND_TEXTURE_IMAGEVIEW); } else { - ERROR_LOG_REPORT_ONCE(d3d11SelfTexture, G3D, "Attempting to texture to target"); - // Badness on D3D11 to bind the currently rendered-to framebuffer as a texture. + ERROR_LOG_REPORT_ONCE(vulkanSelfTexture, G3D, "Attempting to texture from target"); + // To do this safely in Vulkan, we need to use input attachments. return VK_NULL_HANDLE; } } @@ -1144,4 +1146,5 @@ bool FramebufferManagerVulkan::GetStencilbuffer(u32 fb_address, int fb_stride, G void FramebufferManagerVulkan::ClearBuffer(bool keepState) { // TODO: Ideally, this should never be called. + // assert(false); } From 137b79c7083dbd75593dd87dc36432fa603e7a48 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Tue, 23 May 2017 21:56:48 +0200 Subject: [PATCH 23/25] Fix more review comments. --- GPU/GLES/GPU_GLES.cpp | 2 +- GPU/Vulkan/FramebufferVulkan.cpp | 8 ++-- GPU/Vulkan/TextureCacheVulkan.cpp | 1 - UI/GameInfoCache.cpp | 1 + ext/native/thin3d/thin3d.h | 1 - ext/native/thin3d/thin3d_vulkan.cpp | 61 ++++++++++++++++------------- 6 files changed, 39 insertions(+), 35 deletions(-) diff --git a/GPU/GLES/GPU_GLES.cpp b/GPU/GLES/GPU_GLES.cpp index 0f34724d1f..b56c3789c6 100644 --- a/GPU/GLES/GPU_GLES.cpp +++ b/GPU/GLES/GPU_GLES.cpp @@ -660,7 +660,7 @@ void GPU_GLES::Execute_Prim(u32 op, u32 diff) { return; } - // This also makes skipping drawing very effective. This function can change the framebffer. + // This also makes skipping drawing very effective. This function can change the framebuffer. framebufferManagerGL_->SetRenderFrameBuffer(gstate_c.IsDirty(DIRTY_FRAMEBUF), gstate_c.skipDrawReason); if (gstate_c.skipDrawReason & (SKIPDRAW_SKIPFRAME | SKIPDRAW_NON_DISPLAYED_FB)) { drawEngine_.SetupVertexDecoder(gstate.vertType); diff --git a/GPU/Vulkan/FramebufferVulkan.cpp b/GPU/Vulkan/FramebufferVulkan.cpp index 2fa7c96208..60de8f83c4 100644 --- a/GPU/Vulkan/FramebufferVulkan.cpp +++ b/GPU/Vulkan/FramebufferVulkan.cpp @@ -340,10 +340,10 @@ void FramebufferManagerVulkan::DrawActiveTexture(float x, float y, float w, floa } Vulkan2D::Vertex vtx[4] = { - {x,y, 0,texCoords[0],texCoords[1]}, - {x + w,y, 0,texCoords[2],texCoords[3]}, - {x,y + h, 0,texCoords[6],texCoords[7] }, - {x + w,y + h, 0,texCoords[4],texCoords[5] }, + {x, y, 0, texCoords[0], texCoords[1]}, + {x + w, y, 0, texCoords[2], texCoords[3]}, + {x, y + h, 0, texCoords[6], texCoords[7]}, + {x + w, y + h, 0, texCoords[4], texCoords[5]}, }; float invDestW = 1.0f / (destW * 0.5f); diff --git a/GPU/Vulkan/TextureCacheVulkan.cpp b/GPU/Vulkan/TextureCacheVulkan.cpp index dc833ba1d1..9234cf0b78 100644 --- a/GPU/Vulkan/TextureCacheVulkan.cpp +++ b/GPU/Vulkan/TextureCacheVulkan.cpp @@ -613,7 +613,6 @@ void TextureCacheVulkan::BuildTexture(TexCacheEntry *const entry, bool replaceIm } } else { entry->vkTex->texture_->TransitionForUpload(); - // TODO: If reusing an existing texture object, we must transition it into the correct layout. } lastBoundTexture = entry->vkTex; diff --git a/UI/GameInfoCache.cpp b/UI/GameInfoCache.cpp index 7041dab87f..af3672a3a8 100644 --- a/UI/GameInfoCache.cpp +++ b/UI/GameInfoCache.cpp @@ -720,6 +720,7 @@ std::shared_ptr GameInfoCache::GetInfo(Draw::DrawContext *draw, const if (info->IsWorking()) { // Uh oh, it's currently in process. It could mark pending = false with the wrong wantFlags. // Let's wait it out, then queue. + // NOTE: This is bad because we're likely on the UI thread.... WaitUntilDone(info); } diff --git a/ext/native/thin3d/thin3d.h b/ext/native/thin3d/thin3d.h index ff34a24fff..ce2c286999 100644 --- a/ext/native/thin3d/thin3d.h +++ b/ext/native/thin3d/thin3d.h @@ -624,7 +624,6 @@ public: // color must be 0, for now. virtual void BindFramebufferAsTexture(Framebuffer *fbo, int binding, FBChannel channelBit, int attachment) = 0; virtual void BindFramebufferForRead(Framebuffer *fbo) = 0; - virtual void TransitionForSampling(Framebuffer *fbo) {} // Temporary vulkan hack virtual uintptr_t GetFramebufferAPITexture(Framebuffer *fbo, int channelBits, int attachment) = 0; diff --git a/ext/native/thin3d/thin3d_vulkan.cpp b/ext/native/thin3d/thin3d_vulkan.cpp index cb9459bc2b..9d6bf98a9f 100644 --- a/ext/native/thin3d/thin3d_vulkan.cpp +++ b/ext/native/thin3d/thin3d_vulkan.cpp @@ -537,7 +537,7 @@ private: DeviceCaps caps_{}; - VkFramebuffer curFramebuffer_ = VK_NULL_HANDLE;; + VkFramebuffer curFramebuffer_ = VK_NULL_HANDLE; VkRenderPass curRenderPass_ = VK_NULL_HANDLE; VkCommandBuffer cmd_ = VK_NULL_HANDLE; }; @@ -760,7 +760,7 @@ VKContext::VKContext(VulkanContext *vulkan) pipelineCache_ = vulkan_->CreatePipelineCache(); // Create a bunch of render pass objects, for normal rendering with a depth buffer, - // with and without pre-clearing of both depth/stencil and color, so 4 combos. + // with clearing, without clearing, and dont-care for both depth/stencil and color, so 3*3=9 combos. VkAttachmentDescription attachments[2] = {}; attachments[0].format = VK_FORMAT_R8G8B8A8_UNORM; attachments[0].samples = VK_SAMPLE_COUNT_1_BIT; @@ -848,7 +848,7 @@ VKContext::~VKContext() { VkCommandBuffer VKContext::AllocCmdBuf() { FrameData *frame = &frame_[frameNum_ & 1]; - if (frame->numCmdBufs >= this->MAX_FRAME_COMMAND_BUFFERS) + if (frame->numCmdBufs >= MAX_FRAME_COMMAND_BUFFERS) Crash(); if (frame->cmdBufs[frame->numCmdBufs]) { @@ -1448,7 +1448,7 @@ uint32_t VKContext::GetDataFormatSupport(DataFormat fmt) const { } // Simple independent framebuffer image. Gets its own allocation, we don't have that many framebuffers so it's fine -// to let them have individual non-pooled allocations. +// to let them have individual non-pooled allocations. Until it's not fine. We'll see. struct VKImage { VkImage image; VkImageView view; @@ -1524,7 +1524,7 @@ void CreateImage(VulkanContext *vulkan, VKImage &img, int width, int height, VkF vkCmdPipelineBarrier(vulkan->GetInitCommandBuffer(), VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, 0, 0, 0, 1, &barrier); } -// A VKFramebuffer is a VkFramebuffer plus all the textures it owns. +// A VKFramebuffer is a VkFramebuffer (note caps difference) plus all the textures it owns. // It also has a reference to the command buffer that it was last rendered to with. // If it needs to be transitioned, and the frame number matches, use it, otherwise // use this frame's init command buffer. @@ -1540,15 +1540,16 @@ public: vulkan_->Delete().QueueDeleteDeviceMemory(depth.memory); vulkan_->Delete().QueueDeleteFramebuffer(framebuf); } - VkFramebuffer framebuf; - VKImage color; - VKImage depth; - int width; - int height; + VkFramebuffer framebuf = VK_NULL_HANDLE; + VKImage color{}; + VKImage depth{}; + int width = 0; + int height = 0; // These belong together, see above. - VkCommandBuffer cmdBuf; - int frameCount; + VkCommandBuffer cmdBuf = VK_NULL_HANDLE; + int frameCount = 0; + private: VulkanContext *vulkan_; }; @@ -1580,17 +1581,20 @@ Framebuffer *VKContext::CreateFramebuffer(const FramebufferDesc &desc) { void VKContext::CopyFramebufferImage(Framebuffer *srcfb, int level, int x, int y, int z, Framebuffer *dstfb, int dstLevel, int dstX, int dstY, int dstZ, int width, int height, int depth, int channelBits) { VKFramebuffer *src = (VKFramebuffer *)srcfb; VKFramebuffer *dst = (VKFramebuffer *)dstfb; + // TODO } bool VKContext::BlitFramebuffer(Framebuffer *srcfb, int srcX1, int srcY1, int srcX2, int srcY2, Framebuffer *dstfb, int dstX1, int dstY1, int dstX2, int dstY2, int channelBits, FBBlitFilter filter) { VKFramebuffer *src = (VKFramebuffer *)srcfb; VKFramebuffer *dst = (VKFramebuffer *)dstfb; + + // TODO + return true; } void VKContext::EndCurrentRenderpass() { if (curRenderPass_ != VK_NULL_HANDLE) { - // ELOG("EndCurrentRenderPass: Ending render pass %d for cmd buffer %x", (int)(uintptr_t)curRenderPass_, (int)(uintptr_t)cmd_); vkCmdEndRenderPass(cmd_); curRenderPass_ = VK_NULL_HANDLE; curFramebuffer_ = VK_NULL_HANDLE; @@ -1598,7 +1602,6 @@ void VKContext::EndCurrentRenderpass() { } } -// These functions should be self explanatory. void VKContext::BindFramebufferAsRenderTarget(Framebuffer *fbo, const RenderPassInfo &rp) { VkFramebuffer framebuf; VkCommandBuffer cmdBuf; @@ -1625,23 +1628,25 @@ void VKContext::BindFramebufferAsRenderTarget(Framebuffer *fbo, const RenderPass if (!curRenderPass_) Crash(); // If we're asking to clear, but already bound, we'll just keep it bound but send a clear command. - // We will try to avoid this as much as possible. Also, TODO, do a single vkCmdClearAttachments to clear both. + // We will try to avoid this as much as possible. + VkClearAttachment clear[2]{}; + int count = 0; if (rp.color == RPAction::CLEAR) { - VkClearAttachment clear{}; - clear.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; - Uint8x4ToFloat4(rp.clearColor, clear.clearValue.color.float32); - clear.colorAttachment = 0; - VkClearRect rc{ {0,0,(uint32_t)w,(uint32_t)h}, 0, 1 }; - vkCmdClearAttachments(cmdBuf, 1, &clear, 1, &rc); + clear[count].aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + Uint8x4ToFloat4(rp.clearColor, clear[count].clearValue.color.float32); + clear[count].colorAttachment = 0; + count++; } if (rp.depth == RPAction::CLEAR) { - VkClearAttachment clear{}; - clear.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; - clear.clearValue.depthStencil.depth = rp.clearDepth; - clear.clearValue.depthStencil.stencil = rp.clearStencil; - clear.colorAttachment = 0; + clear[count].aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; + clear[count].clearValue.depthStencil.depth = rp.clearDepth; + clear[count].clearValue.depthStencil.stencil = rp.clearStencil; + clear[count].colorAttachment = 0; + count++; + } + if (count > 0) { VkClearRect rc{ { 0,0,(uint32_t)w,(uint32_t)h }, 0, 1 }; - vkCmdClearAttachments(cmdBuf, 1, &clear, 1, &rc); + vkCmdClearAttachments(cmdBuf, count, clear, 1, &rc); } // We're done. return; @@ -1685,7 +1690,7 @@ void VKContext::BindFramebufferAsRenderTarget(Framebuffer *fbo, const RenderPass barrier.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; barrier.newLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; barrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; - // TODO: Optimize these flags. + // TODO: Double-check these flags. Should be fine. vkCmdPipelineBarrier(cmd_, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, 0, 0, 0, 0, 0, 1, &barrier); fb->color.layout = barrier.newLayout; } From 4c8dc248345dcfde2c47e415860d611b244dcc5c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Tue, 23 May 2017 23:10:35 +0200 Subject: [PATCH 24/25] Vulkan: Implement depth buffer copies. Not sure all the barriers are right.. --- GPU/Common/FramebufferCommon.cpp | 28 ++--- GPU/Vulkan/FramebufferVulkan.cpp | 41 +++---- ext/native/thin3d/thin3d_vulkan.cpp | 179 +++++++++++++++++++++++++--- 3 files changed, 197 insertions(+), 51 deletions(-) diff --git a/GPU/Common/FramebufferCommon.cpp b/GPU/Common/FramebufferCommon.cpp index 61fa27eeb0..70b3e7ab09 100644 --- a/GPU/Common/FramebufferCommon.cpp +++ b/GPU/Common/FramebufferCommon.cpp @@ -570,6 +570,20 @@ void FramebufferManagerCommon::NotifyRenderFramebufferSwitched(VirtualFramebuffe } textureCache_->ForgetLastTexture(); + // Copy depth pixel value from the read framebuffer to the draw framebuffer + if (prevVfb && !g_Config.bDisableSlowFramebufEffects) { + if (!prevVfb->fbo || !vfb->fbo || !useBufferedRendering_ || !prevVfb->depthUpdated || isClearingDepth) { + // If depth wasn't updated, then we're at least "two degrees" away from the data. + // This is an optimization: it probably doesn't need to be copied in this case. + } else { + BlitFramebufferDepth(prevVfb, vfb); + } + } + if (vfb->drawnFormat != vfb->format) { + // TODO: Might ultimately combine this with the resize step in DoSetRenderFrameBuffer(). + ReformatFramebufferFrom(vfb, vfb->drawnFormat); + } + if (useBufferedRendering_) { if (vfb->fbo) { if (gl_extensions.IsGLES) { @@ -606,20 +620,6 @@ void FramebufferManagerCommon::NotifyRenderFramebufferSwitched(VirtualFramebuffe } textureCache_->NotifyFramebuffer(vfb->fb_address, vfb, NOTIFY_FB_UPDATED); - // Copy depth pixel value from the read framebuffer to the draw framebuffer - if (prevVfb && !g_Config.bDisableSlowFramebufEffects) { - if (!prevVfb->fbo || !vfb->fbo || !useBufferedRendering_ || !prevVfb->depthUpdated || isClearingDepth) { - // If depth wasn't updated, then we're at least "two degrees" away from the data. - // This is an optimization: it probably doesn't need to be copied in this case. - } else { - BlitFramebufferDepth(prevVfb, vfb); - } - } - if (vfb->drawnFormat != vfb->format) { - // TODO: Might ultimately combine this with the resize step in DoSetRenderFrameBuffer(). - ReformatFramebufferFrom(vfb, vfb->drawnFormat); - } - // ugly... if (gstate_c.curRTWidth != vfb->width || gstate_c.curRTHeight != vfb->height) { gstate_c.Dirty(DIRTY_PROJTHROUGHMATRIX); diff --git a/GPU/Vulkan/FramebufferVulkan.cpp b/GPU/Vulkan/FramebufferVulkan.cpp index 60de8f83c4..43d08ef759 100644 --- a/GPU/Vulkan/FramebufferVulkan.cpp +++ b/GPU/Vulkan/FramebufferVulkan.cpp @@ -403,14 +403,12 @@ int FramebufferManagerVulkan::GetLineWidth() { } } +// This also binds vfb as the current render target. void FramebufferManagerVulkan::ReformatFramebufferFrom(VirtualFramebuffer *vfb, GEBufferFormat old) { if (!useBufferedRendering_ || !vfb->fbo) { return; } - /* - BindFramebufferAsRenderTargetvfb->fbo); - // Technically, we should at this point re-interpret the bytes of the old format to the new. // That might get tricky, and could cause unnecessary slowness in some games. // For now, we just clear alpha/stencil from 565, which fixes shadow issues in Kingdom Hearts. @@ -421,34 +419,33 @@ void FramebufferManagerVulkan::ReformatFramebufferFrom(VirtualFramebuffer *vfb, // to exactly reproduce in 4444 and 8888 formats. if (old == GE_FORMAT_565) { - // TODO: Clear to black, set stencil to 0, don't touch depth (or maybe zap depth). + draw_->BindFramebufferAsRenderTarget(vfb->fbo, { Draw::RPAction::CLEAR, Draw::RPAction::CLEAR }); + } else { + draw_->BindFramebufferAsRenderTarget(vfb->fbo, { Draw::RPAction::KEEP, Draw::RPAction::KEEP }); } - - RebindFramebuffer(); - */ } +// Except for a missing rebind and silly scissor enables, identical copy of the same function in GPU_GLES - tricky parts are in thin3d. void FramebufferManagerVulkan::BlitFramebufferDepth(VirtualFramebuffer *src, VirtualFramebuffer *dst) { - if (src->z_address == dst->z_address && - src->z_stride != 0 && dst->z_stride != 0 && - src->renderWidth == dst->renderWidth && - src->renderHeight == dst->renderHeight) { + if (g_Config.bDisableSlowFramebufEffects) { + return; + } - // TODO: Let's only do this if not clearing depth. + bool matchingDepthBuffer = src->z_address == dst->z_address && src->z_stride != 0 && dst->z_stride != 0; + bool matchingSize = src->width == dst->width && src->height == dst->height; + bool matchingRenderSize = src->renderWidth == dst->renderWidth && src->renderHeight == dst->renderHeight; - VkImageCopy region = {}; - region.srcSubresource.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT; - region.dstSubresource.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT; - region.extent = { dst->renderWidth, dst->renderHeight, 1 }; - region.extent.depth = 1; - // vkCmdCopyImage(curCmd_, src->fbo->GetDepthStencil()->GetImage(), VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, - // dst->fbo->GetDepthStencil()->GetImage(), VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, 1, ®ion); - - // If we set dst->depthUpdated here, our optimization above would be pointless. + if (gstate_c.Supports(GPU_SUPPORTS_ANY_COPY_IMAGE) && matchingDepthBuffer && matchingRenderSize && matchingSize) { + draw_->CopyFramebufferImage(src->fbo, 0, 0, 0, 0, dst->fbo, 0, 0, 0, 0, src->renderWidth, src->renderHeight, 1, Draw::FB_DEPTH_BIT); + } else if (matchingDepthBuffer && matchingSize) { + int w = std::min(src->renderWidth, dst->renderWidth); + int h = std::min(src->renderHeight, dst->renderHeight); + if (gstate_c.Supports(GPU_SUPPORTS_ARB_FRAMEBUFFER_BLIT | GPU_SUPPORTS_NV_FRAMEBUFFER_BLIT)) { + draw_->BlitFramebuffer(src->fbo, 0, 0, w, h, dst->fbo, 0, 0, w, h, Draw::FB_DEPTH_BIT, Draw::FB_BLIT_NEAREST); + } } } - VkImageView FramebufferManagerVulkan::BindFramebufferAsColorTexture(int stage, VirtualFramebuffer *framebuffer, int flags) { if (!framebuffer->fbo || !useBufferedRendering_) { gstate_c.skipDrawReason |= SKIPDRAW_BAD_FB_TEXTURE; diff --git a/ext/native/thin3d/thin3d_vulkan.cpp b/ext/native/thin3d/thin3d_vulkan.cpp index 9d6bf98a9f..4be3c6bfd5 100644 --- a/ext/native/thin3d/thin3d_vulkan.cpp +++ b/ext/native/thin3d/thin3d_vulkan.cpp @@ -348,6 +348,14 @@ private: DataFormat format_; }; +// Simple independent framebuffer image. Gets its own allocation, we don't have that many framebuffers so it's fine +// to let them have individual non-pooled allocations. Until it's not fine. We'll see. +struct VKImage { + VkImage image; + VkImageView view; + VkDeviceMemory memory; + VkImageLayout layout; +}; class VKContext : public DrawContext { public: VKContext(VulkanContext *vulkan); @@ -476,6 +484,9 @@ private: void EndCurrentRenderpass(); VkCommandBuffer AllocCmdBuf(); + static void SetupTransitionToTransferSrc(VKImage &img, VkImageMemoryBarrier &barrier, VkImageAspectFlags aspect); + static void SetupTransitionToTransferDst(VKImage &img, VkImageMemoryBarrier &barrier, VkImageAspectFlags aspect); + VulkanContext *vulkan_ = nullptr; VKPipeline *curPipeline_ = nullptr; @@ -1447,15 +1458,6 @@ uint32_t VKContext::GetDataFormatSupport(DataFormat fmt) const { } } -// Simple independent framebuffer image. Gets its own allocation, we don't have that many framebuffers so it's fine -// to let them have individual non-pooled allocations. Until it's not fine. We'll see. -struct VKImage { - VkImage image; - VkImageView view; - VkDeviceMemory memory; - VkImageLayout layout; -}; - void CreateImage(VulkanContext *vulkan, VKImage &img, int width, int height, VkFormat format, VkImageLayout initialLayout, bool color) { VkImageCreateInfo ici{ VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO }; ici.arrayLayers = 1; @@ -1488,7 +1490,6 @@ void CreateImage(VulkanContext *vulkan, VKImage &img, int width, int height, VkF assert(res == VK_SUCCESS); res = vkBindImageMemory(vulkan->GetDevice(), img.image, img.memory, 0); assert(res == VK_SUCCESS); - img.layout = initialLayout; VkImageViewCreateInfo ivci{ VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO }; ivci.components = { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_B, VK_COMPONENT_SWIZZLE_A }; @@ -1521,7 +1522,8 @@ void CreateImage(VulkanContext *vulkan, VKImage &img, int width, int height, VkF } barrier.newLayout = initialLayout; barrier.subresourceRange.aspectMask = ivci.subresourceRange.aspectMask; - vkCmdPipelineBarrier(vulkan->GetInitCommandBuffer(), VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, 0, 0, 0, 1, &barrier); + vkCmdPipelineBarrier(vulkan->GetInitCommandBuffer(), VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, nullptr, 0, nullptr, 1, &barrier); + img.layout = VK_IMAGE_LAYOUT_UNDEFINED; } // A VKFramebuffer is a VkFramebuffer (note caps difference) plus all the textures it owns. @@ -1579,9 +1581,129 @@ Framebuffer *VKContext::CreateFramebuffer(const FramebufferDesc &desc) { } void VKContext::CopyFramebufferImage(Framebuffer *srcfb, int level, int x, int y, int z, Framebuffer *dstfb, int dstLevel, int dstX, int dstY, int dstZ, int width, int height, int depth, int channelBits) { + // Can't copy during render passes. + EndCurrentRenderpass(); + VKFramebuffer *src = (VKFramebuffer *)srcfb; VKFramebuffer *dst = (VKFramebuffer *)dstfb; - // TODO + + VkImageCopy copy{}; + copy.srcOffset.x = x; + copy.srcOffset.y = y; + copy.srcOffset.z = z; + copy.srcSubresource.mipLevel = level; + copy.srcSubresource.layerCount = 1; + copy.dstOffset.x = dstX; + copy.dstOffset.y = dstY; + copy.dstOffset.z = dstZ; + copy.dstSubresource.mipLevel = dstLevel; + copy.dstSubresource.layerCount = 1; + copy.extent.width = width; + copy.extent.height = height; + copy.extent.depth = depth; + + // We're gonna tack copies onto the src's command buffer, if it's from this frame. + // If from a previous frame, just do it in frame init. + VkCommandBuffer cmd = src->cmdBuf; + if (src->frameCount != frameNum_) { + cmd = vulkan_->GetInitCommandBuffer(); + } + + VkImageMemoryBarrier srcBarriers[2]{}; + VkImageMemoryBarrier dstBarriers[2]{}; + int srcCount = 0; + int dstCount = 0; + + // First source barriers. + if (channelBits & FB_COLOR_BIT) { + if (src->color.layout != VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL) { + SetupTransitionToTransferSrc(src->color, srcBarriers[srcCount++], VK_IMAGE_ASPECT_COLOR_BIT); + } + if (dst->color.layout != VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) { + SetupTransitionToTransferDst(dst->color, dstBarriers[dstCount++], VK_IMAGE_ASPECT_COLOR_BIT); + } + } + + // We can't copy only depth or only stencil unfortunately. + if (channelBits & (FB_DEPTH_BIT | FB_STENCIL_BIT)) { + if (src->depth.layout != VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL) { + SetupTransitionToTransferSrc(src->depth, srcBarriers[srcCount++], VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT); + } + if (dst->depth.layout != VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) { + SetupTransitionToTransferDst(dst->depth, dstBarriers[dstCount++], VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT); + } + } + + // TODO: Fix the pipe bits to be bit less conservative. + if (srcCount) { + vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, 0, 0, nullptr, 0, nullptr, srcCount, srcBarriers); + } + if (dstCount) { + vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, 0, 0, nullptr, 0, nullptr, dstCount, dstBarriers); + } + + if (channelBits & FB_COLOR_BIT) { + copy.srcSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + copy.dstSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + vkCmdCopyImage(cmd, src->color.image, src->color.layout, dst->color.image, dst->color.layout, 1, ©); + } + if (channelBits & (FB_DEPTH_BIT | FB_STENCIL_BIT)) { + copy.srcSubresource.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; + copy.dstSubresource.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; + vkCmdCopyImage(cmd, src->depth.image, src->depth.layout, dst->depth.image, dst->depth.layout, 1, ©); + } +} + +void VKContext::SetupTransitionToTransferSrc(VKImage &img, VkImageMemoryBarrier &barrier, VkImageAspectFlags aspect) { + barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; + barrier.oldLayout = img.layout; + barrier.subresourceRange.layerCount = 1; + barrier.subresourceRange.levelCount = 1; + barrier.image = img.image; + barrier.srcAccessMask = 0; + switch (img.layout) { + case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL: + barrier.srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + break; + case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL: + barrier.srcAccessMask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; + break; + case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL: + barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + break; + default: + Crash(); + } + barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT; + barrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL; + barrier.subresourceRange.aspectMask = aspect; + img.layout = barrier.newLayout; +} + +void VKContext::SetupTransitionToTransferDst(VKImage &img, VkImageMemoryBarrier &barrier, VkImageAspectFlags aspect) { + barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; + barrier.oldLayout = img.layout; + barrier.subresourceRange.layerCount = 1; + barrier.subresourceRange.levelCount = 1; + barrier.image = img.image; + barrier.srcAccessMask = 0; + switch (img.layout) { + case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL: + barrier.srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + break; + case VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL: + barrier.srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT; + break; + case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL: + barrier.srcAccessMask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; + break; + default: + Crash(); + } + barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + barrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; + barrier.subresourceRange.aspectMask = aspect; + img.layout = barrier.newLayout; } bool VKContext::BlitFramebuffer(Framebuffer *srcfb, int srcX1, int srcY1, int srcX2, int srcY2, Framebuffer *dstfb, int dstX1, int dstY1, int dstX2, int dstY2, int channelBits, FBBlitFilter filter) { @@ -1687,13 +1809,40 @@ void VKContext::BindFramebufferAsRenderTarget(Framebuffer *fbo, const RenderPass barrier.srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT; break; } - barrier.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + barrier.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_READ_BIT; barrier.newLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; barrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; // TODO: Double-check these flags. Should be fine. - vkCmdPipelineBarrier(cmd_, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, 0, 0, 0, 0, 0, 1, &barrier); + vkCmdPipelineBarrier(cmd_, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, 0, 0, nullptr, 0, nullptr, 1, &barrier); fb->color.layout = barrier.newLayout; } + if (fb->depth.layout != VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL) { + VkImageMemoryBarrier barrier{}; + barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; + barrier.oldLayout = fb->depth.layout; + barrier.subresourceRange.layerCount = 1; + barrier.subresourceRange.levelCount = 1; + barrier.image = fb->depth.image; + barrier.srcAccessMask = 0; + switch (fb->depth.layout) { + case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL: + barrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT; + break; + case VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL: + barrier.srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT; + break; + case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL: + barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + break; + } + barrier.dstAccessMask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT| VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT; + barrier.newLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; + barrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; + // TODO: Double-check these flags. Should be fine. + vkCmdPipelineBarrier(cmd_, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT, 0, 0, nullptr, 0, nullptr, 1, &barrier); + fb->depth.layout = barrier.newLayout; + } + renderPass = renderPasses_[RPIndex(rp.color, rp.depth)]; // ILOG("Switching framebuffer to FBO (fc=%d, cmd=%x, rp=%x)", frameNum_, (int)(uintptr_t)cmd_, (int)(uintptr_t)renderPass); if (rp.color == RPAction::CLEAR) { @@ -1765,7 +1914,7 @@ void VKContext::BindFramebufferAsTexture(Framebuffer *fbo, int binding, FBChanne // we're between passes so it's OK. // ARM Best Practices guide recommends these stage bits. - vkCmdPipelineBarrier(transitionCmdBuf, VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, 0, 0, 0, 0, 1, &barrier); + vkCmdPipelineBarrier(transitionCmdBuf, VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, 0, nullptr, 0, nullptr, 1, &barrier); fb->color.layout = barrier.newLayout; } From 44423f3ba25af28d21645c1c7a92e171b2c7015c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Wed, 24 May 2017 00:45:15 +0200 Subject: [PATCH 25/25] Vulkan: Implement BlitFramebuffer --- GPU/D3D11/TextureCacheD3D11.cpp | 2 +- GPU/Vulkan/DrawEngineVulkan.cpp | 22 +------ GPU/Vulkan/DrawEngineVulkan.h | 2 +- GPU/Vulkan/FramebufferVulkan.cpp | 40 +++--------- GPU/Vulkan/StateMappingVulkan.cpp | 18 ++++++ GPU/Vulkan/TextureCacheVulkan.cpp | 1 + ext/native/thin3d/thin3d_vulkan.cpp | 96 ++++++++++++++++++++++++++--- 7 files changed, 119 insertions(+), 62 deletions(-) diff --git a/GPU/D3D11/TextureCacheD3D11.cpp b/GPU/D3D11/TextureCacheD3D11.cpp index 344fc10a1e..95d5cf401e 100644 --- a/GPU/D3D11/TextureCacheD3D11.cpp +++ b/GPU/D3D11/TextureCacheD3D11.cpp @@ -453,7 +453,7 @@ void TextureCacheD3D11::ApplyTextureFramebuffer(TexCacheEntry *entry, VirtualFra gstate_c.SetTextureFullAlpha(gstate.getTextureFormat() == GE_TFMT_5650); gstate_c.SetTextureSimpleAlpha(gstate_c.textureFullAlpha); - framebufferManagerD3D11_->RebindFramebuffer(); + framebufferManagerD3D11_->RebindFramebuffer(); // Probably not necessary. } SamplerCacheKey samplerKey; SetFramebufferSamplingParams(framebuffer->bufferWidth, framebuffer->bufferHeight, samplerKey); diff --git a/GPU/Vulkan/DrawEngineVulkan.cpp b/GPU/Vulkan/DrawEngineVulkan.cpp index e8438fb2b7..4368f72e8c 100644 --- a/GPU/Vulkan/DrawEngineVulkan.cpp +++ b/GPU/Vulkan/DrawEngineVulkan.cpp @@ -644,33 +644,15 @@ void DrawEngineVulkan::DirtyAllUBOs() { gstate_c.Dirty(DIRTY_TEXTURE_IMAGE); } -//void DrawEngineVulkan::ApplyDrawStateLate() { - /* - // At this point, we know if the vertices are full alpha or not. - // TODO: Set the nearest/linear here (since we correctly know if alpha/color tests are needed)? - if (!gstate.isModeClear()) { - // TODO: Test texture? - - if (fboTexNeedBind_) { - // Note that this is positions, not UVs, that we need the copy from. - framebufferManager_->BindFramebufferAsColorTexture(1, framebufferManager_->GetCurrentRenderVFB(), BINDFBCOLOR_MAY_COPY); - // If we are rendering at a higher resolution, linear is probably best for the dest color. - fboTexBound_ = true; - fboTexNeedBind_ = false; - } - } - */ -//} - // The inline wrapper in the header checks for numDrawCalls == 0d void DrawEngineVulkan::DoFlush() { + gpuStats.numFlushes++; + VkCommandBuffer cmd = (VkCommandBuffer)draw_->GetNativeObject(Draw::NativeObject::RENDERPASS_COMMANDBUFFER); VkRenderPass rp = (VkRenderPass)draw_->GetNativeObject(Draw::NativeObject::CURRENT_RENDERPASS); if (!rp) Crash(); - gpuStats.numFlushes++; - FrameData *frame = &frame_[curFrame_ & 1]; bool textureNeedsApply = false; diff --git a/GPU/Vulkan/DrawEngineVulkan.h b/GPU/Vulkan/DrawEngineVulkan.h index fe109d3521..a2d809d47a 100644 --- a/GPU/Vulkan/DrawEngineVulkan.h +++ b/GPU/Vulkan/DrawEngineVulkan.h @@ -127,7 +127,7 @@ public: private: struct FrameData; - + void ApplyDrawStateLate(); void ConvertStateToVulkanKey(FramebufferManagerVulkan &fbManager, ShaderManagerVulkan *shaderManager, int prim, VulkanPipelineRasterStateKey &key, VulkanDynamicState &dynState); void InitDeviceObjects(); diff --git a/GPU/Vulkan/FramebufferVulkan.cpp b/GPU/Vulkan/FramebufferVulkan.cpp index 43d08ef759..8ed37f4b61 100644 --- a/GPU/Vulkan/FramebufferVulkan.cpp +++ b/GPU/Vulkan/FramebufferVulkan.cpp @@ -662,15 +662,13 @@ void FramebufferManagerVulkan::UpdateDownloadTempBuffer(VirtualFramebuffer *nvfb void FramebufferManagerVulkan::BlitFramebuffer(VirtualFramebuffer *dst, int dstX, int dstY, VirtualFramebuffer *src, int srcX, int srcY, int w, int h, int bpp) { if (!dst->fbo || !src->fbo || !useBufferedRendering_) { // This can happen if they recently switched from non-buffered. + if (useBufferedRendering_) + draw_->BindFramebufferAsRenderTarget(nullptr, { Draw::RPAction::KEEP, Draw::RPAction::KEEP }); return; } - // NOTE: There may be cases (like within a renderpass) where we want to - // not use a blit. - bool useBlit = true; - - float srcXFactor = useBlit ? (float)src->renderWidth / (float)src->bufferWidth : 1.0f; - float srcYFactor = useBlit ? (float)src->renderHeight / (float)src->bufferHeight : 1.0f; + float srcXFactor = (float)src->renderWidth / (float)src->bufferWidth; + float srcYFactor = (float)src->renderHeight / (float)src->bufferHeight; const int srcBpp = src->format == GE_FORMAT_8888 ? 4 : 2; if (srcBpp != bpp && bpp != 0) { srcXFactor = (srcXFactor * bpp) / srcBpp; @@ -680,8 +678,8 @@ void FramebufferManagerVulkan::BlitFramebuffer(VirtualFramebuffer *dst, int dstX int srcY1 = srcY * srcYFactor; int srcY2 = (srcY + h) * srcYFactor; - float dstXFactor = useBlit ? (float)dst->renderWidth / (float)dst->bufferWidth : 1.0f; - float dstYFactor = useBlit ? (float)dst->renderHeight / (float)dst->bufferHeight : 1.0f; + float dstXFactor = (float)dst->renderWidth / (float)dst->bufferWidth; + float dstYFactor = (float)dst->renderHeight / (float)dst->bufferHeight; const int dstBpp = dst->format == GE_FORMAT_8888 ? 4 : 2; if (dstBpp != bpp && bpp != 0) { dstXFactor = (dstXFactor * bpp) / dstBpp; @@ -697,6 +695,7 @@ void FramebufferManagerVulkan::BlitFramebuffer(VirtualFramebuffer *dst, int dstX return; } + // BlitFramebuffer can clip, but CopyFramebufferImage is more restricted. // In case the src goes outside, we just skip the optimization in that case. const bool sameSize = dstX2 - dstX1 == srcX2 - srcX1 && dstY2 - dstY1 == srcY2 - srcY1; const bool sameDepth = dst->colorDepth == src->colorDepth; @@ -705,30 +704,9 @@ void FramebufferManagerVulkan::BlitFramebuffer(VirtualFramebuffer *dst, int dstX const bool xOverlap = src == dst && srcX2 > dstX1 && srcX1 < dstX2; const bool yOverlap = src == dst && srcY2 > dstY1 && srcY1 < dstY2; if (sameSize && sameDepth && srcInsideBounds && dstInsideBounds && !(xOverlap && yOverlap)) { - VkImageCopy region = {}; - region.extent = { (uint32_t)(dstX2 - dstX1), (uint32_t)(dstY2 - dstY1), 1 }; - /* - glCopyImageSubDataOES( - fbo_get_color_texture(src->fbo), GL_TEXTURE_2D, 0, srcX1, srcY1, 0, - fbo_get_color_texture(dst->fbo), GL_TEXTURE_2D, 0, dstX1, dstY1, 0, - dstX2 - dstX1, dstY2 - dstY1, 1); - */ - return; - } - - // BindFramebufferAsRenderTargetdst->fbo); - - if (useBlit) { - // fbo_bind_for_read(src->fbo); - //glBlitFramebuffer(srcX1, srcY1, srcX2, srcY2, dstX1, dstY1, dstX2, dstY2, GL_COLOR_BUFFER_BIT, GL_NEAREST); + draw_->CopyFramebufferImage(src->fbo, 0, srcX1, srcY1, 0, dst->fbo, 0, dstX1, dstY1, 0, dstX2 - dstX1, dstY2 - dstY1, 1, Draw::FB_COLOR_BIT); } else { - // fbo_bind_color_as_texture(src->fbo, 0); - - // The first four coordinates are relative to the 6th and 7th arguments of DrawActiveTexture. - // Should maybe revamp that interface. - float srcW = src->bufferWidth; - float srcH = src->bufferHeight; - // DrawActiveTexture(0, dstX1, dstY1, w * dstXFactor, h, dst->bufferWidth, dst->bufferHeight, srcX1 / srcW, srcY1 / srcH, srcX2 / srcW, srcY2 / srcH, draw2dprogram_, ROTATION_LOCKED_HORIZONTAL); + draw_->BlitFramebuffer(src->fbo, srcX1, srcY1, srcX2, srcY2, dst->fbo, dstX1, dstY1, dstX2, dstY2, Draw::FB_COLOR_BIT, Draw::FB_BLIT_NEAREST); } } diff --git a/GPU/Vulkan/StateMappingVulkan.cpp b/GPU/Vulkan/StateMappingVulkan.cpp index e37c80ac06..74ab0cf03e 100644 --- a/GPU/Vulkan/StateMappingVulkan.cpp +++ b/GPU/Vulkan/StateMappingVulkan.cpp @@ -368,3 +368,21 @@ void DrawEngineVulkan::ConvertStateToVulkanKey(FramebufferManagerVulkan &fbManag key.topology = primToVulkan[prim]; } + + +void DrawEngineVulkan::ApplyDrawStateLate() { + // At this point, we know if the vertices are full alpha or not. + // TODO: Set the nearest/linear here (since we correctly know if alpha/color tests are needed)? + if (!gstate.isModeClear()) { + // TODO: Test texture? + /* + if (fboTexNeedBind_) { + // Note that this is positions, not UVs, that we need the copy from. + framebufferManager_->BindFramebufferAsColorTexture(1, framebufferManager_->GetCurrentRenderVFB(), BINDFBCOLOR_MAY_COPY); + // If we are rendering at a higher resolution, linear is probably best for the dest color. + fboTexBound_ = true; + fboTexNeedBind_ = false; + } + */ + } +} \ No newline at end of file diff --git a/GPU/Vulkan/TextureCacheVulkan.cpp b/GPU/Vulkan/TextureCacheVulkan.cpp index 9234cf0b78..1c79a9474d 100644 --- a/GPU/Vulkan/TextureCacheVulkan.cpp +++ b/GPU/Vulkan/TextureCacheVulkan.cpp @@ -440,6 +440,7 @@ void TextureCacheVulkan::ApplyTextureFramebuffer(TexCacheEntry *entry, VirtualFr SamplerCacheKey samplerKey; SetFramebufferSamplingParams(framebuffer->bufferWidth, framebuffer->bufferHeight, samplerKey); sampler_ = samplerCache_.GetOrCreateSampler(samplerKey); + InvalidateLastTexture(entry); } ReplacedTextureFormat FromVulkanFormat(VkFormat fmt) { diff --git a/ext/native/thin3d/thin3d_vulkan.cpp b/ext/native/thin3d/thin3d_vulkan.cpp index 4be3c6bfd5..4f3d2bc666 100644 --- a/ext/native/thin3d/thin3d_vulkan.cpp +++ b/ext/native/thin3d/thin3d_vulkan.cpp @@ -1606,6 +1606,8 @@ void VKContext::CopyFramebufferImage(Framebuffer *srcfb, int level, int x, int y // If from a previous frame, just do it in frame init. VkCommandBuffer cmd = src->cmdBuf; if (src->frameCount != frameNum_) { + // TODO: What about the case where dst->frameCount == frameNum_ here? + // That will cause bad ordering. We'll have to allocate a new command buffer and assign it to dest. cmd = vulkan_->GetInitCommandBuffer(); } @@ -1654,6 +1656,82 @@ void VKContext::CopyFramebufferImage(Framebuffer *srcfb, int level, int x, int y } } +bool VKContext::BlitFramebuffer(Framebuffer *srcfb, int srcX1, int srcY1, int srcX2, int srcY2, Framebuffer *dstfb, int dstX1, int dstY1, int dstX2, int dstY2, int channelBits, FBBlitFilter filter) { + VKFramebuffer *src = (VKFramebuffer *)srcfb; + VKFramebuffer *dst = (VKFramebuffer *)dstfb; + + // We're gonna tack blits onto the src's command buffer, if it's from this frame. + // If from a previous frame, just do it in frame init. + VkCommandBuffer cmd = src->cmdBuf; + if (src->frameCount != frameNum_) { + // TODO: What about the case where dst->frameCount == frameNum_ here? + // That will cause bad ordering. We'll have to allocate a new command buffer and assign it to dest. + cmd = vulkan_->GetInitCommandBuffer(); + } + VkImageMemoryBarrier srcBarriers[2]{}; + VkImageMemoryBarrier dstBarriers[2]{}; + int srcCount = 0; + int dstCount = 0; + + VkImageBlit blit{}; + blit.srcOffsets[0].x = srcX1; + blit.srcOffsets[0].y = srcY1; + blit.srcOffsets[0].z = 0; + blit.srcOffsets[1].x = srcX2; + blit.srcOffsets[1].y = srcY2; + blit.srcOffsets[1].z = 1; + blit.srcSubresource.mipLevel = 0; + blit.srcSubresource.layerCount = 1; + blit.dstOffsets[0].x = dstX1; + blit.dstOffsets[0].y = dstY1; + blit.dstOffsets[0].z = 0; + blit.dstOffsets[1].x = dstX2; + blit.dstOffsets[1].y = dstY2; + blit.dstOffsets[1].z = 1; + blit.dstSubresource.mipLevel = 0; + blit.dstSubresource.layerCount = 1; + + // First source barriers. + if (channelBits & FB_COLOR_BIT) { + if (src->color.layout != VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL) { + SetupTransitionToTransferSrc(src->color, srcBarriers[srcCount++], VK_IMAGE_ASPECT_COLOR_BIT); + } + if (dst->color.layout != VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) { + SetupTransitionToTransferDst(dst->color, dstBarriers[dstCount++], VK_IMAGE_ASPECT_COLOR_BIT); + } + } + + // We can't copy only depth or only stencil unfortunately. + if (channelBits & (FB_DEPTH_BIT | FB_STENCIL_BIT)) { + if (src->depth.layout != VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL) { + SetupTransitionToTransferSrc(src->depth, srcBarriers[srcCount++], VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT); + } + if (dst->depth.layout != VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) { + SetupTransitionToTransferDst(dst->depth, dstBarriers[dstCount++], VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT); + } + } + + // TODO: Fix the pipe bits to be bit less conservative. + if (srcCount) { + vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, 0, 0, nullptr, 0, nullptr, srcCount, srcBarriers); + } + if (dstCount) { + vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, 0, 0, nullptr, 0, nullptr, dstCount, dstBarriers); + } + + if (channelBits & FB_COLOR_BIT) { + blit.srcSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + blit.dstSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + vkCmdBlitImage(cmd, src->color.image, src->color.layout, dst->color.image, dst->color.layout, 1, &blit, filter == FB_BLIT_LINEAR ? VK_FILTER_LINEAR : VK_FILTER_NEAREST); + } + if (channelBits & (FB_DEPTH_BIT | FB_STENCIL_BIT)) { + blit.srcSubresource.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; + blit.dstSubresource.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; + vkCmdBlitImage(cmd, src->depth.image, src->depth.layout, dst->depth.image, dst->depth.layout, 1, &blit, filter == FB_BLIT_LINEAR ? VK_FILTER_LINEAR : VK_FILTER_NEAREST); + } + return true; +} + void VKContext::SetupTransitionToTransferSrc(VKImage &img, VkImageMemoryBarrier &barrier, VkImageAspectFlags aspect) { barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; barrier.oldLayout = img.layout; @@ -1671,6 +1749,9 @@ void VKContext::SetupTransitionToTransferSrc(VKImage &img, VkImageMemoryBarrier case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL: barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; break; + case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL: + barrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT; + break; default: Crash(); } @@ -1697,6 +1778,9 @@ void VKContext::SetupTransitionToTransferDst(VKImage &img, VkImageMemoryBarrier case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL: barrier.srcAccessMask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; break; + case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL: + barrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT; + break; default: Crash(); } @@ -1706,15 +1790,6 @@ void VKContext::SetupTransitionToTransferDst(VKImage &img, VkImageMemoryBarrier img.layout = barrier.newLayout; } -bool VKContext::BlitFramebuffer(Framebuffer *srcfb, int srcX1, int srcY1, int srcX2, int srcY2, Framebuffer *dstfb, int dstX1, int dstY1, int dstX2, int dstY2, int channelBits, FBBlitFilter filter) { - VKFramebuffer *src = (VKFramebuffer *)srcfb; - VKFramebuffer *dst = (VKFramebuffer *)dstfb; - - // TODO - - return true; -} - void VKContext::EndCurrentRenderpass() { if (curRenderPass_ != VK_NULL_HANDLE) { vkCmdEndRenderPass(cmd_); @@ -1805,6 +1880,9 @@ void VKContext::BindFramebufferAsRenderTarget(Framebuffer *fbo, const RenderPass case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL: barrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT; break; + case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL: + barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + break; case VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL: barrier.srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT; break;