diff --git a/Core/System.h b/Core/System.h index 7749d893bc..4ff1767d87 100644 --- a/Core/System.h +++ b/Core/System.h @@ -68,6 +68,8 @@ void PSP_BeginHostFrame(); void PSP_EndHostFrame(); void PSP_RunLoopUntil(u64 globalticks); void PSP_RunLoopFor(int cycles); +void PSP_BeginFrame(); +void PSP_EndFrame(); void Audio_Init(); diff --git a/GPU/Vulkan/DrawEngineVulkan.cpp b/GPU/Vulkan/DrawEngineVulkan.cpp index aa09c7511d..56298b7a04 100644 --- a/GPU/Vulkan/DrawEngineVulkan.cpp +++ b/GPU/Vulkan/DrawEngineVulkan.cpp @@ -171,10 +171,13 @@ DrawEngineVulkan::DrawEngineVulkan(VulkanContext *vulkan) void DrawEngineVulkan::BeginFrame() { FrameData *frame = &frame_[curFrame_ & 1]; vkResetDescriptorPool(vulkan_->GetDevice(), frame->descPool, 0); + frame->pushData->Begin(vulkan_->GetDevice()); frame->pushData->Reset(); } void DrawEngineVulkan::EndFrame() { + FrameData *frame = &frame_[curFrame_ & 1]; + frame->pushData->End(vulkan_->GetDevice()); curFrame_++; } @@ -471,16 +474,20 @@ void DrawEngineVulkan::DoFlush(VkCommandBuffer cmd) { if (result.action == SW_DRAW_PRIMITIVES) { if (result.setStencil) { - // dxstate.stencilFunc.set(D3DCMP_ALWAYS, result.stencilValue, 255); + // hey, dynamic state! + vkCmdSetStencilReference(cmd_, VK_STENCIL_FRONT_AND_BACK, result.stencilValue); } - VkBuffer buf[1] = {}; - VkDeviceSize offsets[1] = { 0 }; + ibOffset = (uint32_t)frame->pushData->Push(decIndex, 2 * indexGen.VertexCount()); + vbOffset = (uint32_t)frame->pushData->Push(decoded, numTrans * dec_->GetDecVtxFmt().stride); + + VkBuffer buf[1] = { frame->pushData->GetVkBuffer() }; + VkDeviceSize offsets[1] = { vbOffset }; if (drawIndexed) { // TODO: Have a buffer per frame, use a walking buffer pointer // TODO: Avoid rebinding if the vertex size stays the same by using the offset arguments vkCmdBindVertexBuffers(cmd_, 0, 1, buf, offsets); - vkCmdBindIndexBuffer(cmd_, buf[0], 0, VK_INDEX_TYPE_UINT16); + vkCmdBindIndexBuffer(cmd_, buf[0], ibOffset, VK_INDEX_TYPE_UINT16); vkCmdDrawIndexed(cmd_, numTrans, 1, 0, 0, 0); // pD3Ddevice->DrawIndexedPrimitiveUP(glprim[prim], 0, maxIndex, D3DPrimCount(glprim[prim], numTrans), inds, D3DFMT_INDEX16, drawBuffer, sizeof(TransformedVertex)); } else { @@ -497,13 +504,13 @@ void DrawEngineVulkan::DoFlush(VkCommandBuffer cmd) { if (gstate.isClearModeAlphaMask()) mask |= 2; if (gstate.isClearModeDepthMask()) mask |= 4; - VkClearValue value; - value.color.float32[0] = (result.color & 0xFF) * (1.0f / 255.0f); - value.color.float32[1] = ((result.color >> 8) & 0xFF) * (1.0f / 255.0f); - value.color.float32[2] = ((result.color >> 16) & 0xFF) * (1.0f / 255.0f); - value.color.float32[3] = ((result.color >> 24) & 0xFF) * (1.0f / 255.0f); - value.depthStencil.depth = result.depth; - value.depthStencil.stencil = (result.color >> 24) & 0xFF; + VkClearValue colorValue, depthValue; + colorValue.color.float32[0] = (result.color & 0xFF) * (1.0f / 255.0f); + colorValue.color.float32[1] = ((result.color >> 8) & 0xFF) * (1.0f / 255.0f); + colorValue.color.float32[2] = ((result.color >> 16) & 0xFF) * (1.0f / 255.0f); + colorValue.color.float32[3] = ((result.color >> 24) & 0xFF) * (1.0f / 255.0f); + depthValue.depthStencil.depth = result.depth; + depthValue.depthStencil.stencil = (result.color >> 24) & 0xFF; VkClearRect rect; rect.baseArrayLayer = 0; @@ -517,13 +524,13 @@ void DrawEngineVulkan::DoFlush(VkCommandBuffer cmd) { VkClearAttachment attach[2]; if (mask & 3) { attach[count].aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; - attach[count].clearValue = value; + attach[count].clearValue = colorValue; attach[count].colorAttachment = 0; count++; } if (mask & 4) { attach[count].aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; - attach[count].clearValue = value; + attach[count].clearValue = depthValue; attach[count].colorAttachment = 0; } vkCmdClearAttachments(cmd_, count, attach, 1, &rect); diff --git a/GPU/Vulkan/FramebufferVulkan.cpp b/GPU/Vulkan/FramebufferVulkan.cpp index b26ab43b12..378e4921fe 100644 --- a/GPU/Vulkan/FramebufferVulkan.cpp +++ b/GPU/Vulkan/FramebufferVulkan.cpp @@ -69,6 +69,23 @@ void FramebufferManagerVulkan::FlushBeforeCopy() { drawEngine_->Flush(nullptr); } +void FramebufferManagerVulkan::ReadFramebufferToMemory(VirtualFramebuffer *vfb, bool sync, int x, int y, int w, int h) { + +} + +void FramebufferManagerVulkan::DownloadFramebufferForClut(u32 fb_address, u32 loadBytes) { + +} + +bool FramebufferManagerVulkan::CreateDownloadTempBuffer(VirtualFramebuffer *nvfb) { + return false; +} + +void FramebufferManagerVulkan::UpdateDownloadTempBuffer(VirtualFramebuffer *nvfb) { + +} + + std::vector FramebufferManagerVulkan::GetFramebufferList() { return std::vector(); } diff --git a/GPU/Vulkan/FramebufferVulkan.h b/GPU/Vulkan/FramebufferVulkan.h index a30a097e70..075e2c435c 100644 --- a/GPU/Vulkan/FramebufferVulkan.h +++ b/GPU/Vulkan/FramebufferVulkan.h @@ -52,9 +52,9 @@ public: return false; } - virtual void ReadFramebufferToMemory(VirtualFramebuffer *vfb, bool sync, int x, int y, int w, int h) override { - throw std::logic_error("The method or operation is not implemented."); - } + void ReadFramebufferToMemory(VirtualFramebuffer *vfb, bool sync, int x, int y, int w, int h) override; + void DownloadFramebufferForClut(u32 fb_address, u32 loadBytes) override; + virtual void MakePixelTexture(const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, int width, int height) override { } @@ -92,16 +92,6 @@ public: virtual void NotifyRenderFramebufferUpdated(VirtualFramebuffer *vfb, bool vfbFormatChanged) override { } - void DownloadFramebufferForClut(u32 fb_address, u32 loadBytes) override { - - } - - bool CreateDownloadTempBuffer(VirtualFramebuffer *nvfb) override { - return false; - } - void UpdateDownloadTempBuffer(VirtualFramebuffer *nvfb) override { - - } void DestroyAllFBOs(); void Resized(); void DeviceLost(); @@ -111,6 +101,10 @@ public: std::vector GetFramebufferList(); +protected: + bool CreateDownloadTempBuffer(VirtualFramebuffer *nvfb) override; + void UpdateDownloadTempBuffer(VirtualFramebuffer *nvfb) override; + private: VulkanContext *vulkan_; VkCommandBuffer cmd_; diff --git a/GPU/Vulkan/GPU_Vulkan.cpp b/GPU/Vulkan/GPU_Vulkan.cpp index 25bee1aba9..d9d4d116dd 100644 --- a/GPU/Vulkan/GPU_Vulkan.cpp +++ b/GPU/Vulkan/GPU_Vulkan.cpp @@ -578,6 +578,7 @@ void GPU_Vulkan::BeginFrameInternal() { shaderManager_->DirtyUniform(DIRTY_ALL); framebufferManager_.BeginFrame(); + drawEngine_.BeginFrame(); if (g_Config.iRenderingMode == FB_NON_BUFFERED_MODE) { // Draw everything directly to the backbuffer. @@ -627,12 +628,14 @@ void GPU_Vulkan::CopyDisplayToOutput() { void GPU_Vulkan::CopyDisplayToOutputInternal() { // Flush anything left over. drawEngine_.Flush(curCmd_); + drawEngine_.EndFrame(); shaderManager_->DirtyLastShader(); framebufferManager_.CopyDisplayToOutput(); framebufferManager_.EndFrame(); + gstate_c.textureChanged = TEXCHANGE_UPDATED; } diff --git a/GPU/Vulkan/ShaderManagerVulkan.cpp b/GPU/Vulkan/ShaderManagerVulkan.cpp index 88fc61fcfb..24fc019713 100644 --- a/GPU/Vulkan/ShaderManagerVulkan.cpp +++ b/GPU/Vulkan/ShaderManagerVulkan.cpp @@ -144,8 +144,7 @@ std::string VulkanVertexShader::GetShaderString(DebugShaderStringType type) cons } } -// Depth in ogl is between -1;1 we need between 0;1 and optionally reverse it -static void ConvertProjMatrixToVulkan(Matrix4x4 &in, bool invertedX, bool invertedY, bool invertedZ) { +static void ConvertProjMatrixToVulkan(Matrix4x4 &in, bool invertedX, bool invertedY) { // Half pixel offset hack float xoff = 0.5f / gstate_c.curRTRenderWidth; xoff = gstate_c.vpXOffset + (invertedX ? xoff : -xoff); @@ -157,7 +156,9 @@ static void ConvertProjMatrixToVulkan(Matrix4x4 &in, bool invertedX, bool invert if (invertedY) yoff = -yoff; - in.translateAndScale(Vec3(xoff, yoff, 0.5f), Vec3(gstate_c.vpWidthScale, gstate_c.vpHeightScale, invertedZ ? -0.5 : 0.5f)); + const Vec3 trans(xoff, yoff, gstate_c.vpZOffset + 0.5f); + const Vec3 scale(gstate_c.vpWidthScale, gstate_c.vpHeightScale, gstate_c.vpDepthScale * 0.5f); + in.translateAndScale(trans, scale); } static void ConvertProjMatrixToVulkanThrough(Matrix4x4 &in) { @@ -228,9 +229,8 @@ void ShaderManagerVulkan::VSUpdateUniforms(int dirtyUniforms) { flippedMatrix[8] = -flippedMatrix[8]; flippedMatrix[12] = -flippedMatrix[12]; } - - const bool invertedZ = gstate_c.vpDepthScale < 0; - ConvertProjMatrixToVulkan(flippedMatrix, invertedX, invertedY, invertedZ); + + ConvertProjMatrixToVulkan(flippedMatrix, invertedX, invertedY); CopyMatrix4x4(ub_base.proj, flippedMatrix.getReadPtr()); } @@ -357,18 +357,22 @@ void ShaderManagerVulkan::VSUpdateUniforms(int dirtyUniforms) { if (dirtyUniforms & DIRTY_DEPTHRANGE) { float viewZScale = gstate.getViewportZScale(); float viewZCenter = gstate.getViewportZCenter(); - - // Given the way we do the rounding, the integer part of the offset is probably mostly irrelevant as we cancel - // it afterwards anyway. - // It seems that we should adjust for D3D projection matrix. We got squashed up to only 0-1, so we divide - // the scale factor by 2, and add an offset. But, this doesn't work! I get near-perfect results not doing it. - // viewZScale *= 2.0f; - - // Need to take the possibly inverted proj matrix into account. - if (gstate_c.vpDepthScale < 0.0) - viewZScale *= -1.0f; - viewZCenter -= 32767.5f; float viewZInvScale; + + // We had to scale and translate Z to account for our clamped Z range. + // Therefore, we also need to reverse this to round properly. + // + // Example: scale = 65535.0, center = 0.0 + // Resulting range = -65535 to 65535, clamped to [0, 65535] + // gstate_c.vpDepthScale = 2.0f + // gstate_c.vpZOffset = -1.0f + // + // The projection already accounts for those, so we need to reverse them. + // + // Additionally, D3D9 uses a range from [0, 1]. We double and move the center. + viewZScale *= (1.0f / gstate_c.vpDepthScale) * 2.0f; + viewZCenter -= 65535.0f * gstate_c.vpZOffset + 32768.5f; + if (viewZScale != 0.0) { viewZInvScale = 1.0f / viewZScale; } else { diff --git a/GPU/Vulkan/StateMappingVulkan.cpp b/GPU/Vulkan/StateMappingVulkan.cpp index ce7c1122e1..5d5960a4ae 100644 --- a/GPU/Vulkan/StateMappingVulkan.cpp +++ b/GPU/Vulkan/StateMappingVulkan.cpp @@ -279,17 +279,13 @@ void ConvertStateToVulkanKey(FramebufferManagerVulkan &fbManager, int prim, Vulk float depthMin = vpAndScissor.depthRangeMin; float depthMax = vpAndScissor.depthRangeMax; - if (!gstate.isModeThrough()) { - // Direct3D can't handle negative depth ranges, so we fix it in the projection matrix. - if (gstate_c.vpDepthScale != depthMax - depthMin) { - gstate_c.vpDepthScale = depthMax - depthMin; - vpAndScissor.dirtyProj = true; - } - if (depthMin > depthMax) { - std::swap(depthMin, depthMax); - } - if (depthMin < 0.0f) depthMin = 0.0f; - if (depthMax > 1.0f) depthMax = 1.0f; + if (depthMin < 0.0f) depthMin = 0.0f; + if (depthMax > 1.0f) depthMax = 1.0f; + if (vpAndScissor.dirtyProj) { + // shaderManager_->DirtyUniform(DIRTY_PROJMATRIX); + } + if (vpAndScissor.dirtyDepth) { + // shaderManager_->DirtyUniform(DIRTY_DEPTHRANGE); } } diff --git a/GPU/Vulkan/TextureCacheVulkan.cpp b/GPU/Vulkan/TextureCacheVulkan.cpp index 606213b80e..6c10005fba 100644 --- a/GPU/Vulkan/TextureCacheVulkan.cpp +++ b/GPU/Vulkan/TextureCacheVulkan.cpp @@ -89,6 +89,10 @@ TextureCacheVulkan::~TextureCacheVulkan() { Clear(true); } +void TextureCacheVulkan::DownloadFramebufferForClut(u32 clutAddr, u32 bytes) { + +} + static u32 EstimateTexMemoryUsage(const TextureCacheVulkan::TexCacheEntry *entry) { const u16 dim = entry->dim; const u8 dimW = ((dim >> 0) & 0xf); diff --git a/GPU/Vulkan/TextureCacheVulkan.h b/GPU/Vulkan/TextureCacheVulkan.h index 49edc5222d..cb75f1d4ba 100644 --- a/GPU/Vulkan/TextureCacheVulkan.h +++ b/GPU/Vulkan/TextureCacheVulkan.h @@ -72,13 +72,8 @@ public: void ApplyTexture(VkImageView &imageView, VkSampler &sampler); - bool DecodeTexture(u8 *dest, const GPUgstate &state) { - return false; - } - - void DownloadFramebufferForClut(u32 clutAddr, u32 bytes) override { - - } +protected: + void DownloadFramebufferForClut(u32 clutAddr, u32 bytes); private: void Decimate(); // Run this once per frame to get rid of old textures. diff --git a/UI/EmuScreen.cpp b/UI/EmuScreen.cpp index b78068a025..7b00528dc9 100644 --- a/UI/EmuScreen.cpp +++ b/UI/EmuScreen.cpp @@ -908,6 +908,7 @@ void EmuScreen::render() { while (coreState == CORE_RUNNING) { PSP_RunLoopFor(blockTicks); } + // Hopefully coreState is now CORE_NEXTFRAME if (coreState == CORE_NEXTFRAME) { // set back to running for the next frame diff --git a/UI/NativeApp.cpp b/UI/NativeApp.cpp index c2a80b3e32..a0cea8fc27 100644 --- a/UI/NativeApp.cpp +++ b/UI/NativeApp.cpp @@ -703,6 +703,7 @@ void NativeRender(GraphicsContext *graphicsContext) { ortho.setOrthoD3D(0.0f, xres, 0, yres, -1.0f, 1.0f); break; case GPUBackend::DIRECT3D9: + case GPUBackend::DIRECT3D11: ortho.setOrthoD3D(0.0f, xres, yres, 0.0f, -1.0f, 1.0f); Matrix4x4 translation; translation.setTranslation(Vec3(-0.5f, -0.5f, 0.0f)); diff --git a/ext/native/thin3d/VulkanContext.cpp b/ext/native/thin3d/VulkanContext.cpp index c0c5b504a9..4bfa3d1797 100644 --- a/ext/native/thin3d/VulkanContext.cpp +++ b/ext/native/thin3d/VulkanContext.cpp @@ -194,10 +194,8 @@ VkCommandBuffer VulkanContext::BeginSurfaceRenderPass(VkClearValue clear_values[ // Get the index of the next available swapchain image, and a semaphore to block command buffer execution on. // Now, I wonder if we should do this early in the frame or late? Right now we do it early, which should be fine. VkResult res = fpAcquireNextImageKHR(device_, swap_chain_, - UINT64_MAX, - acquireSemaphore, - NULL, - ¤t_buffer); + UINT64_MAX, acquireSemaphore, NULL, ¤t_buffer); + // TODO: Deal with the VK_SUBOPTIMAL_KHR and VK_ERROR_OUT_OF_DATE_KHR // return codes assert(res == VK_SUCCESS); @@ -917,28 +915,30 @@ void VulkanContext::InitSwapchain(VkCommandBuffer cmd) { VkExtent2D swapChainExtent; // width and height are either both -1, or both not -1. - if (surfCapabilities.currentExtent.width == -1) - { + if (surfCapabilities.currentExtent.width == -1) { // If the surface size is undefined, the size is set to // the size of the images requested. swapChainExtent.width = width; swapChainExtent.height = height; - } else - { + } else { // If the surface size is defined, the swap chain size must match swapChainExtent = surfCapabilities.currentExtent; } // If mailbox mode is available, use it, as is the lowest-latency non- - // tearing mode. If not, try IMMEDIATE which will usually be available, - // and is fastest (though it tears). If not, fall back to FIFO which is - // always available. + // tearing mode. If not, try FIFO_RELAXED, and if not that, try IMMEDIATE + // which will usually be available, and is fastest (though it tears). + // If not, fall back to FIFO which is always available. VkPresentModeKHR swapchainPresentMode = VK_PRESENT_MODE_FIFO_KHR; for (size_t i = 0; i < presentModeCount; i++) { if ((flags_ & VULKAN_FLAG_PRESENT_MAILBOX) && presentModes[i] == VK_PRESENT_MODE_MAILBOX_KHR) { swapchainPresentMode = VK_PRESENT_MODE_MAILBOX_KHR; break; } + if ((flags_ & VULKAN_FLAG_PRESENT_FIFO_RELAXED) && presentModes[i] == VK_PRESENT_MODE_FIFO_RELAXED_KHR) { + swapchainPresentMode = VK_PRESENT_MODE_FIFO_RELAXED_KHR; + break; + } if ((flags_ & VULKAN_FLAG_PRESENT_IMMEDIATE) && presentModes[i] == VK_PRESENT_MODE_IMMEDIATE_KHR) { swapchainPresentMode = VK_PRESENT_MODE_IMMEDIATE_KHR; break; diff --git a/ext/native/thin3d/VulkanContext.h b/ext/native/thin3d/VulkanContext.h index df3fc7d4a7..e6f9993c65 100644 --- a/ext/native/thin3d/VulkanContext.h +++ b/ext/native/thin3d/VulkanContext.h @@ -57,6 +57,7 @@ enum { VULKAN_FLAG_VALIDATE = 1, VULKAN_FLAG_PRESENT_MAILBOX = 2, VULKAN_FLAG_PRESENT_IMMEDIATE = 4, + VULKAN_FLAG_PRESENT_FIFO_RELAXED = 8, }; // A layer can expose extensions, keep track of those extensions here.