diff --git a/GPU/D3D11/TextureCacheD3D11.cpp b/GPU/D3D11/TextureCacheD3D11.cpp index 344fc10a1e..95d5cf401e 100644 --- a/GPU/D3D11/TextureCacheD3D11.cpp +++ b/GPU/D3D11/TextureCacheD3D11.cpp @@ -453,7 +453,7 @@ void TextureCacheD3D11::ApplyTextureFramebuffer(TexCacheEntry *entry, VirtualFra gstate_c.SetTextureFullAlpha(gstate.getTextureFormat() == GE_TFMT_5650); gstate_c.SetTextureSimpleAlpha(gstate_c.textureFullAlpha); - framebufferManagerD3D11_->RebindFramebuffer(); + framebufferManagerD3D11_->RebindFramebuffer(); // Probably not necessary. } SamplerCacheKey samplerKey; SetFramebufferSamplingParams(framebuffer->bufferWidth, framebuffer->bufferHeight, samplerKey); diff --git a/GPU/Vulkan/DrawEngineVulkan.cpp b/GPU/Vulkan/DrawEngineVulkan.cpp index e8438fb2b7..4368f72e8c 100644 --- a/GPU/Vulkan/DrawEngineVulkan.cpp +++ b/GPU/Vulkan/DrawEngineVulkan.cpp @@ -644,33 +644,15 @@ void DrawEngineVulkan::DirtyAllUBOs() { gstate_c.Dirty(DIRTY_TEXTURE_IMAGE); } -//void DrawEngineVulkan::ApplyDrawStateLate() { - /* - // At this point, we know if the vertices are full alpha or not. - // TODO: Set the nearest/linear here (since we correctly know if alpha/color tests are needed)? - if (!gstate.isModeClear()) { - // TODO: Test texture? - - if (fboTexNeedBind_) { - // Note that this is positions, not UVs, that we need the copy from. - framebufferManager_->BindFramebufferAsColorTexture(1, framebufferManager_->GetCurrentRenderVFB(), BINDFBCOLOR_MAY_COPY); - // If we are rendering at a higher resolution, linear is probably best for the dest color. - fboTexBound_ = true; - fboTexNeedBind_ = false; - } - } - */ -//} - // The inline wrapper in the header checks for numDrawCalls == 0d void DrawEngineVulkan::DoFlush() { + gpuStats.numFlushes++; + VkCommandBuffer cmd = (VkCommandBuffer)draw_->GetNativeObject(Draw::NativeObject::RENDERPASS_COMMANDBUFFER); VkRenderPass rp = (VkRenderPass)draw_->GetNativeObject(Draw::NativeObject::CURRENT_RENDERPASS); if (!rp) Crash(); - gpuStats.numFlushes++; - FrameData *frame = &frame_[curFrame_ & 1]; bool textureNeedsApply = false; diff --git a/GPU/Vulkan/DrawEngineVulkan.h b/GPU/Vulkan/DrawEngineVulkan.h index fe109d3521..a2d809d47a 100644 --- a/GPU/Vulkan/DrawEngineVulkan.h +++ b/GPU/Vulkan/DrawEngineVulkan.h @@ -127,7 +127,7 @@ public: private: struct FrameData; - + void ApplyDrawStateLate(); void ConvertStateToVulkanKey(FramebufferManagerVulkan &fbManager, ShaderManagerVulkan *shaderManager, int prim, VulkanPipelineRasterStateKey &key, VulkanDynamicState &dynState); void InitDeviceObjects(); diff --git a/GPU/Vulkan/FramebufferVulkan.cpp b/GPU/Vulkan/FramebufferVulkan.cpp index 43d08ef759..8ed37f4b61 100644 --- a/GPU/Vulkan/FramebufferVulkan.cpp +++ b/GPU/Vulkan/FramebufferVulkan.cpp @@ -662,15 +662,13 @@ void FramebufferManagerVulkan::UpdateDownloadTempBuffer(VirtualFramebuffer *nvfb void FramebufferManagerVulkan::BlitFramebuffer(VirtualFramebuffer *dst, int dstX, int dstY, VirtualFramebuffer *src, int srcX, int srcY, int w, int h, int bpp) { if (!dst->fbo || !src->fbo || !useBufferedRendering_) { // This can happen if they recently switched from non-buffered. + if (useBufferedRendering_) + draw_->BindFramebufferAsRenderTarget(nullptr, { Draw::RPAction::KEEP, Draw::RPAction::KEEP }); return; } - // NOTE: There may be cases (like within a renderpass) where we want to - // not use a blit. - bool useBlit = true; - - float srcXFactor = useBlit ? (float)src->renderWidth / (float)src->bufferWidth : 1.0f; - float srcYFactor = useBlit ? (float)src->renderHeight / (float)src->bufferHeight : 1.0f; + float srcXFactor = (float)src->renderWidth / (float)src->bufferWidth; + float srcYFactor = (float)src->renderHeight / (float)src->bufferHeight; const int srcBpp = src->format == GE_FORMAT_8888 ? 4 : 2; if (srcBpp != bpp && bpp != 0) { srcXFactor = (srcXFactor * bpp) / srcBpp; @@ -680,8 +678,8 @@ void FramebufferManagerVulkan::BlitFramebuffer(VirtualFramebuffer *dst, int dstX int srcY1 = srcY * srcYFactor; int srcY2 = (srcY + h) * srcYFactor; - float dstXFactor = useBlit ? (float)dst->renderWidth / (float)dst->bufferWidth : 1.0f; - float dstYFactor = useBlit ? (float)dst->renderHeight / (float)dst->bufferHeight : 1.0f; + float dstXFactor = (float)dst->renderWidth / (float)dst->bufferWidth; + float dstYFactor = (float)dst->renderHeight / (float)dst->bufferHeight; const int dstBpp = dst->format == GE_FORMAT_8888 ? 4 : 2; if (dstBpp != bpp && bpp != 0) { dstXFactor = (dstXFactor * bpp) / dstBpp; @@ -697,6 +695,7 @@ void FramebufferManagerVulkan::BlitFramebuffer(VirtualFramebuffer *dst, int dstX return; } + // BlitFramebuffer can clip, but CopyFramebufferImage is more restricted. // In case the src goes outside, we just skip the optimization in that case. const bool sameSize = dstX2 - dstX1 == srcX2 - srcX1 && dstY2 - dstY1 == srcY2 - srcY1; const bool sameDepth = dst->colorDepth == src->colorDepth; @@ -705,30 +704,9 @@ void FramebufferManagerVulkan::BlitFramebuffer(VirtualFramebuffer *dst, int dstX const bool xOverlap = src == dst && srcX2 > dstX1 && srcX1 < dstX2; const bool yOverlap = src == dst && srcY2 > dstY1 && srcY1 < dstY2; if (sameSize && sameDepth && srcInsideBounds && dstInsideBounds && !(xOverlap && yOverlap)) { - VkImageCopy region = {}; - region.extent = { (uint32_t)(dstX2 - dstX1), (uint32_t)(dstY2 - dstY1), 1 }; - /* - glCopyImageSubDataOES( - fbo_get_color_texture(src->fbo), GL_TEXTURE_2D, 0, srcX1, srcY1, 0, - fbo_get_color_texture(dst->fbo), GL_TEXTURE_2D, 0, dstX1, dstY1, 0, - dstX2 - dstX1, dstY2 - dstY1, 1); - */ - return; - } - - // BindFramebufferAsRenderTargetdst->fbo); - - if (useBlit) { - // fbo_bind_for_read(src->fbo); - //glBlitFramebuffer(srcX1, srcY1, srcX2, srcY2, dstX1, dstY1, dstX2, dstY2, GL_COLOR_BUFFER_BIT, GL_NEAREST); + draw_->CopyFramebufferImage(src->fbo, 0, srcX1, srcY1, 0, dst->fbo, 0, dstX1, dstY1, 0, dstX2 - dstX1, dstY2 - dstY1, 1, Draw::FB_COLOR_BIT); } else { - // fbo_bind_color_as_texture(src->fbo, 0); - - // The first four coordinates are relative to the 6th and 7th arguments of DrawActiveTexture. - // Should maybe revamp that interface. - float srcW = src->bufferWidth; - float srcH = src->bufferHeight; - // DrawActiveTexture(0, dstX1, dstY1, w * dstXFactor, h, dst->bufferWidth, dst->bufferHeight, srcX1 / srcW, srcY1 / srcH, srcX2 / srcW, srcY2 / srcH, draw2dprogram_, ROTATION_LOCKED_HORIZONTAL); + draw_->BlitFramebuffer(src->fbo, srcX1, srcY1, srcX2, srcY2, dst->fbo, dstX1, dstY1, dstX2, dstY2, Draw::FB_COLOR_BIT, Draw::FB_BLIT_NEAREST); } } diff --git a/GPU/Vulkan/StateMappingVulkan.cpp b/GPU/Vulkan/StateMappingVulkan.cpp index e37c80ac06..74ab0cf03e 100644 --- a/GPU/Vulkan/StateMappingVulkan.cpp +++ b/GPU/Vulkan/StateMappingVulkan.cpp @@ -368,3 +368,21 @@ void DrawEngineVulkan::ConvertStateToVulkanKey(FramebufferManagerVulkan &fbManag key.topology = primToVulkan[prim]; } + + +void DrawEngineVulkan::ApplyDrawStateLate() { + // At this point, we know if the vertices are full alpha or not. + // TODO: Set the nearest/linear here (since we correctly know if alpha/color tests are needed)? + if (!gstate.isModeClear()) { + // TODO: Test texture? + /* + if (fboTexNeedBind_) { + // Note that this is positions, not UVs, that we need the copy from. + framebufferManager_->BindFramebufferAsColorTexture(1, framebufferManager_->GetCurrentRenderVFB(), BINDFBCOLOR_MAY_COPY); + // If we are rendering at a higher resolution, linear is probably best for the dest color. + fboTexBound_ = true; + fboTexNeedBind_ = false; + } + */ + } +} \ No newline at end of file diff --git a/GPU/Vulkan/TextureCacheVulkan.cpp b/GPU/Vulkan/TextureCacheVulkan.cpp index 9234cf0b78..1c79a9474d 100644 --- a/GPU/Vulkan/TextureCacheVulkan.cpp +++ b/GPU/Vulkan/TextureCacheVulkan.cpp @@ -440,6 +440,7 @@ void TextureCacheVulkan::ApplyTextureFramebuffer(TexCacheEntry *entry, VirtualFr SamplerCacheKey samplerKey; SetFramebufferSamplingParams(framebuffer->bufferWidth, framebuffer->bufferHeight, samplerKey); sampler_ = samplerCache_.GetOrCreateSampler(samplerKey); + InvalidateLastTexture(entry); } ReplacedTextureFormat FromVulkanFormat(VkFormat fmt) { diff --git a/ext/native/thin3d/thin3d_vulkan.cpp b/ext/native/thin3d/thin3d_vulkan.cpp index 4be3c6bfd5..4f3d2bc666 100644 --- a/ext/native/thin3d/thin3d_vulkan.cpp +++ b/ext/native/thin3d/thin3d_vulkan.cpp @@ -1606,6 +1606,8 @@ void VKContext::CopyFramebufferImage(Framebuffer *srcfb, int level, int x, int y // If from a previous frame, just do it in frame init. VkCommandBuffer cmd = src->cmdBuf; if (src->frameCount != frameNum_) { + // TODO: What about the case where dst->frameCount == frameNum_ here? + // That will cause bad ordering. We'll have to allocate a new command buffer and assign it to dest. cmd = vulkan_->GetInitCommandBuffer(); } @@ -1654,6 +1656,82 @@ void VKContext::CopyFramebufferImage(Framebuffer *srcfb, int level, int x, int y } } +bool VKContext::BlitFramebuffer(Framebuffer *srcfb, int srcX1, int srcY1, int srcX2, int srcY2, Framebuffer *dstfb, int dstX1, int dstY1, int dstX2, int dstY2, int channelBits, FBBlitFilter filter) { + VKFramebuffer *src = (VKFramebuffer *)srcfb; + VKFramebuffer *dst = (VKFramebuffer *)dstfb; + + // We're gonna tack blits onto the src's command buffer, if it's from this frame. + // If from a previous frame, just do it in frame init. + VkCommandBuffer cmd = src->cmdBuf; + if (src->frameCount != frameNum_) { + // TODO: What about the case where dst->frameCount == frameNum_ here? + // That will cause bad ordering. We'll have to allocate a new command buffer and assign it to dest. + cmd = vulkan_->GetInitCommandBuffer(); + } + VkImageMemoryBarrier srcBarriers[2]{}; + VkImageMemoryBarrier dstBarriers[2]{}; + int srcCount = 0; + int dstCount = 0; + + VkImageBlit blit{}; + blit.srcOffsets[0].x = srcX1; + blit.srcOffsets[0].y = srcY1; + blit.srcOffsets[0].z = 0; + blit.srcOffsets[1].x = srcX2; + blit.srcOffsets[1].y = srcY2; + blit.srcOffsets[1].z = 1; + blit.srcSubresource.mipLevel = 0; + blit.srcSubresource.layerCount = 1; + blit.dstOffsets[0].x = dstX1; + blit.dstOffsets[0].y = dstY1; + blit.dstOffsets[0].z = 0; + blit.dstOffsets[1].x = dstX2; + blit.dstOffsets[1].y = dstY2; + blit.dstOffsets[1].z = 1; + blit.dstSubresource.mipLevel = 0; + blit.dstSubresource.layerCount = 1; + + // First source barriers. + if (channelBits & FB_COLOR_BIT) { + if (src->color.layout != VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL) { + SetupTransitionToTransferSrc(src->color, srcBarriers[srcCount++], VK_IMAGE_ASPECT_COLOR_BIT); + } + if (dst->color.layout != VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) { + SetupTransitionToTransferDst(dst->color, dstBarriers[dstCount++], VK_IMAGE_ASPECT_COLOR_BIT); + } + } + + // We can't copy only depth or only stencil unfortunately. + if (channelBits & (FB_DEPTH_BIT | FB_STENCIL_BIT)) { + if (src->depth.layout != VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL) { + SetupTransitionToTransferSrc(src->depth, srcBarriers[srcCount++], VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT); + } + if (dst->depth.layout != VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) { + SetupTransitionToTransferDst(dst->depth, dstBarriers[dstCount++], VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT); + } + } + + // TODO: Fix the pipe bits to be bit less conservative. + if (srcCount) { + vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, 0, 0, nullptr, 0, nullptr, srcCount, srcBarriers); + } + if (dstCount) { + vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, 0, 0, nullptr, 0, nullptr, dstCount, dstBarriers); + } + + if (channelBits & FB_COLOR_BIT) { + blit.srcSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + blit.dstSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + vkCmdBlitImage(cmd, src->color.image, src->color.layout, dst->color.image, dst->color.layout, 1, &blit, filter == FB_BLIT_LINEAR ? VK_FILTER_LINEAR : VK_FILTER_NEAREST); + } + if (channelBits & (FB_DEPTH_BIT | FB_STENCIL_BIT)) { + blit.srcSubresource.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; + blit.dstSubresource.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; + vkCmdBlitImage(cmd, src->depth.image, src->depth.layout, dst->depth.image, dst->depth.layout, 1, &blit, filter == FB_BLIT_LINEAR ? VK_FILTER_LINEAR : VK_FILTER_NEAREST); + } + return true; +} + void VKContext::SetupTransitionToTransferSrc(VKImage &img, VkImageMemoryBarrier &barrier, VkImageAspectFlags aspect) { barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; barrier.oldLayout = img.layout; @@ -1671,6 +1749,9 @@ void VKContext::SetupTransitionToTransferSrc(VKImage &img, VkImageMemoryBarrier case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL: barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; break; + case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL: + barrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT; + break; default: Crash(); } @@ -1697,6 +1778,9 @@ void VKContext::SetupTransitionToTransferDst(VKImage &img, VkImageMemoryBarrier case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL: barrier.srcAccessMask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; break; + case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL: + barrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT; + break; default: Crash(); } @@ -1706,15 +1790,6 @@ void VKContext::SetupTransitionToTransferDst(VKImage &img, VkImageMemoryBarrier img.layout = barrier.newLayout; } -bool VKContext::BlitFramebuffer(Framebuffer *srcfb, int srcX1, int srcY1, int srcX2, int srcY2, Framebuffer *dstfb, int dstX1, int dstY1, int dstX2, int dstY2, int channelBits, FBBlitFilter filter) { - VKFramebuffer *src = (VKFramebuffer *)srcfb; - VKFramebuffer *dst = (VKFramebuffer *)dstfb; - - // TODO - - return true; -} - void VKContext::EndCurrentRenderpass() { if (curRenderPass_ != VK_NULL_HANDLE) { vkCmdEndRenderPass(cmd_); @@ -1805,6 +1880,9 @@ void VKContext::BindFramebufferAsRenderTarget(Framebuffer *fbo, const RenderPass case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL: barrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT; break; + case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL: + barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + break; case VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL: barrier.srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT; break;