diff --git a/CMakeLists.txt b/CMakeLists.txt index bc81d9be2b..cc31004ebc 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -623,6 +623,8 @@ add_library(Common STATIC Common/GPU/Vulkan/VulkanRenderManager.h Common/GPU/Vulkan/VulkanQueueRunner.cpp Common/GPU/Vulkan/VulkanQueueRunner.h + Common/GPU/Vulkan/VulkanFrameData.cpp + Common/GPU/Vulkan/VulkanFrameData.h Common/Input/GestureDetector.cpp Common/Input/GestureDetector.h Common/Input/KeyCodes.h diff --git a/Common/Common.vcxproj b/Common/Common.vcxproj index 2895cdc837..d232c01534 100644 --- a/Common/Common.vcxproj +++ b/Common/Common.vcxproj @@ -441,6 +441,7 @@ + @@ -861,6 +862,7 @@ + diff --git a/Common/Common.vcxproj.filters b/Common/Common.vcxproj.filters index ff9fd9eaa6..991b03a71f 100644 --- a/Common/Common.vcxproj.filters +++ b/Common/Common.vcxproj.filters @@ -419,6 +419,9 @@ GPU\Vulkan + + GPU\Vulkan + @@ -791,6 +794,9 @@ GPU\Vulkan + + GPU\Vulkan + diff --git a/Common/GPU/D3D11/thin3d_d3d11.cpp b/Common/GPU/D3D11/thin3d_d3d11.cpp index c17bb39919..a808386ce9 100644 --- a/Common/GPU/D3D11/thin3d_d3d11.cpp +++ b/Common/GPU/D3D11/thin3d_d3d11.cpp @@ -270,6 +270,7 @@ D3D11DrawContext::D3D11DrawContext(ID3D11Device *device, ID3D11DeviceContext *de caps_.anisoSupported = true; caps_.textureNPOTFullySupported = true; caps_.fragmentShaderDepthWriteSupported = true; + caps_.blendMinMaxSupported = true; D3D11_FEATURE_DATA_D3D11_OPTIONS options{}; HRESULT result = device_->CheckFeatureSupport(D3D11_FEATURE_D3D11_OPTIONS, &options, sizeof(options)); diff --git a/Common/GPU/D3D9/thin3d_d3d9.cpp b/Common/GPU/D3D9/thin3d_d3d9.cpp index a84d42f64e..bf33fd34ca 100644 --- a/Common/GPU/D3D9/thin3d_d3d9.cpp +++ b/Common/GPU/D3D9/thin3d_d3d9.cpp @@ -646,6 +646,63 @@ void D3D9Context::InvalidateCachedState() { curPipeline_ = nullptr; } +// TODO: Move this detection elsewhere when it's needed elsewhere, not before. It's ugly. +// Source: https://envytools.readthedocs.io/en/latest/hw/pciid.html#gf100 +enum NVIDIAGeneration { + NV_PRE_KEPLER, + NV_KEPLER, + NV_MAXWELL, + NV_PASCAL, + NV_VOLTA, + NV_TURING, // or later +}; + +static NVIDIAGeneration NVIDIAGetDeviceGeneration(int deviceID) { + if (deviceID >= 0x1180 && deviceID <= 0x11bf) + return NV_KEPLER; // GK104 + if (deviceID >= 0x11c0 && deviceID <= 0x11fa) + return NV_KEPLER; // GK106 + if (deviceID >= 0x0fc0 && deviceID <= 0x0fff) + return NV_KEPLER; // GK107 + if (deviceID >= 0x1003 && deviceID <= 0x1028) + return NV_KEPLER; // GK110(B) + if (deviceID >= 0x1280 && deviceID <= 0x12ba) + return NV_KEPLER; // GK208 + if (deviceID >= 0x1381 && deviceID <= 0x13b0) + return NV_MAXWELL; // GM107 + if (deviceID >= 0x1340 && deviceID <= 0x134d) + return NV_MAXWELL; // GM108 + if (deviceID >= 0x13c0 && deviceID <= 0x13d9) + return NV_MAXWELL; // GM204 + if (deviceID >= 0x1401 && deviceID <= 0x1427) + return NV_MAXWELL; // GM206 + if (deviceID >= 0x15f7 && deviceID <= 0x15f9) + return NV_PASCAL; // GP100 + if (deviceID >= 0x15f7 && deviceID <= 0x15f9) + return NV_PASCAL; // GP100 + if (deviceID >= 0x1b00 && deviceID <= 0x1b38) + return NV_PASCAL; // GP102 + if (deviceID >= 0x1b80 && deviceID <= 0x1be1) + return NV_PASCAL; // GP104 + if (deviceID >= 0x1c02 && deviceID <= 0x1c62) + return NV_PASCAL; // GP106 + if (deviceID >= 0x1c81 && deviceID <= 0x1c92) + return NV_PASCAL; // GP107 + if (deviceID >= 0x1d01 && deviceID <= 0x1d12) + return NV_PASCAL; // GP108 + if (deviceID >= 0x1d81 && deviceID <= 0x1dba) + return NV_VOLTA; // GV100 + if (deviceID >= 0x1e02 && deviceID <= 0x1e3c) + return NV_TURING; // TU102 + if (deviceID >= 0x1e82 && deviceID <= 0x1ed0) + return NV_TURING; // TU104 + if (deviceID >= 0x1f02 && deviceID <= 0x1f51) + return NV_TURING; // TU104 + if (deviceID >= 0x1e02) + return NV_TURING; // More TU models or later, probably. + return NV_PRE_KEPLER; +} + #define FB_DIV 1 #define FOURCC_INTZ ((D3DFORMAT)(MAKEFOURCC('I', 'N', 'T', 'Z'))) @@ -665,14 +722,24 @@ D3D9Context::D3D9Context(IDirect3D9 *d3d, IDirect3D9Ex *d3dEx, int adapterId, ID caps_.vendor = GPUVendor::VENDOR_UNKNOWN; } - if (!FAILED(device->GetDeviceCaps(&d3dCaps_))) { + D3DCAPS9 caps; + ZeroMemory(&caps, sizeof(caps)); + HRESULT result = 0; + if (deviceEx_) { + result = deviceEx_->GetDeviceCaps(&caps); + } else { + result = device_->GetDeviceCaps(&caps); + } + + if (SUCCEEDED(result)) { sprintf(shadeLangVersion_, "PS: %04x VS: %04x", d3dCaps_.PixelShaderVersion & 0xFFFF, d3dCaps_.VertexShaderVersion & 0xFFFF); } else { + WARN_LOG(G3D, "Direct3D9: Failed to get the device caps!"); strcpy(shadeLangVersion_, "N/A"); } + caps_.deviceID = identifier_.DeviceId; caps_.multiViewport = false; - caps_.anisoSupported = true; caps_.depthRangeMinusOneToOne = false; caps_.preferredDepthBufferFormat = DataFormat::D24_S8; caps_.dualSourceBlend = false; @@ -684,8 +751,30 @@ D3D9Context::D3D9Context(IDirect3D9 *d3d, IDirect3D9Ex *d3dEx, int adapterId, ID caps_.framebufferDepthCopySupported = false; caps_.framebufferSeparateDepthCopySupported = false; caps_.texture3DSupported = true; - caps_.textureNPOTFullySupported = true; caps_.fragmentShaderDepthWriteSupported = true; + caps_.blendMinMaxSupported = true; + + if ((caps.RasterCaps & D3DPRASTERCAPS_ANISOTROPY) != 0 && caps.MaxAnisotropy > 1) { + caps_.anisoSupported = true; + } + if ((caps.TextureCaps & (D3DPTEXTURECAPS_NONPOW2CONDITIONAL | D3DPTEXTURECAPS_POW2)) == 0) { + caps_.textureNPOTFullySupported = true; + } + + // VS range culling (killing triangles in the vertex shader using NaN) causes problems on Intel. + // Also causes problems on old NVIDIA. + switch (caps_.vendor) { + case Draw::GPUVendor::VENDOR_INTEL: + bugs_.Infest(Bugs::BROKEN_NAN_IN_CONDITIONAL); + break; + case Draw::GPUVendor::VENDOR_NVIDIA: + // Older NVIDIAs don't seem to like NaNs in their DX9 vertex shaders. + // No idea if KEPLER is the right cutoff, but let's go with it. + if (NVIDIAGetDeviceGeneration(caps_.deviceID) < NV_KEPLER) { + bugs_.Infest(Bugs::BROKEN_NAN_IN_CONDITIONAL); + } + break; + } if (d3d) { D3DDISPLAYMODE displayMode; diff --git a/Common/GPU/OpenGL/GLQueueRunner.cpp b/Common/GPU/OpenGL/GLQueueRunner.cpp index ee850b2ad3..6157b2e1d1 100644 --- a/Common/GPU/OpenGL/GLQueueRunner.cpp +++ b/Common/GPU/OpenGL/GLQueueRunner.cpp @@ -814,7 +814,7 @@ void GLQueueRunner::PerformRenderPass(const GLRStep &step, bool first, bool last int logicOp = -1; bool logicEnabled = false; #endif - bool clipDistance0Enabled = false; + bool clipDistanceEnabled[8]{}; GLuint blendEqColor = (GLuint)-1; GLuint blendEqAlpha = (GLuint)-1; @@ -1123,14 +1123,18 @@ void GLQueueRunner::PerformRenderPass(const GLRStep &step, bool first, bool last { if (curProgram != c.program.program) { glUseProgram(c.program.program->program); - if (c.program.program->use_clip_distance0 != clipDistance0Enabled) { - if (c.program.program->use_clip_distance0) - glEnable(GL_CLIP_DISTANCE0); - else - glDisable(GL_CLIP_DISTANCE0); - clipDistance0Enabled = c.program.program->use_clip_distance0; - } curProgram = c.program.program; + + for (size_t i = 0; i < ARRAY_SIZE(clipDistanceEnabled); ++i) { + if (c.program.program->use_clip_distance[i] == clipDistanceEnabled[i]) + continue; + + if (c.program.program->use_clip_distance[i]) + glEnable(GL_CLIP_DISTANCE0 + (GLenum)i); + else + glDisable(GL_CLIP_DISTANCE0 + (GLenum)i); + clipDistanceEnabled[i] = c.program.program->use_clip_distance[i]; + } } CHECK_GL_ERROR_IF_DEBUG(); break; @@ -1371,8 +1375,10 @@ void GLQueueRunner::PerformRenderPass(const GLRStep &step, bool first, bool last glDisable(GL_COLOR_LOGIC_OP); } #endif - if (clipDistance0Enabled) - glDisable(GL_CLIP_DISTANCE0); + for (size_t i = 0; i < ARRAY_SIZE(clipDistanceEnabled); ++i) { + if (clipDistanceEnabled[i]) + glDisable(GL_CLIP_DISTANCE0 + (GLenum)i); + } if ((colorMask & 15) != 15) glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); CHECK_GL_ERROR_IF_DEBUG(); diff --git a/Common/GPU/OpenGL/GLRenderManager.cpp b/Common/GPU/OpenGL/GLRenderManager.cpp index ae5cb9dd17..01a94fb8b4 100644 --- a/Common/GPU/OpenGL/GLRenderManager.cpp +++ b/Common/GPU/OpenGL/GLRenderManager.cpp @@ -579,7 +579,6 @@ void GLRenderManager::EndSubmitFrame(int frame) { void GLRenderManager::Run(int frame) { BeginSubmitFrame(frame); - FrameData &frameData = frameData_[frame]; auto &stepsOnThread = frameData_[frame].steps; diff --git a/Common/GPU/OpenGL/GLRenderManager.h b/Common/GPU/OpenGL/GLRenderManager.h index 719603863e..089c588aeb 100644 --- a/Common/GPU/OpenGL/GLRenderManager.h +++ b/Common/GPU/OpenGL/GLRenderManager.h @@ -91,6 +91,13 @@ public: std::string error; }; +struct GLRProgramFlags { + bool supportDualSource : 1; + bool useClipDistance0 : 1; + bool useClipDistance1 : 1; + bool useClipDistance2 : 1; +}; + class GLRProgram { public: ~GLRProgram() { @@ -119,7 +126,7 @@ public: std::vector semantics_; std::vector queries_; std::vector initialize_; - bool use_clip_distance0 = false; + bool use_clip_distance[8]{}; struct UniformInfo { int loc_; @@ -427,15 +434,17 @@ public: // not be an active render pass. GLRProgram *CreateProgram( std::vector shaders, std::vector semantics, std::vector queries, - std::vector initializers, bool supportDualSource, bool useClipDistance0) { + std::vector initializers, const GLRProgramFlags &flags) { GLRInitStep step{ GLRInitStepType::CREATE_PROGRAM }; _assert_(shaders.size() <= ARRAY_SIZE(step.create_program.shaders)); step.create_program.program = new GLRProgram(); step.create_program.program->semantics_ = semantics; step.create_program.program->queries_ = queries; step.create_program.program->initialize_ = initializers; - step.create_program.program->use_clip_distance0 = useClipDistance0; - step.create_program.support_dual_source = supportDualSource; + step.create_program.program->use_clip_distance[0] = flags.useClipDistance0; + step.create_program.program->use_clip_distance[1] = flags.useClipDistance1; + step.create_program.program->use_clip_distance[2] = flags.useClipDistance2; + step.create_program.support_dual_source = flags.supportDualSource; _assert_msg_(shaders.size() > 0, "Can't create a program with zero shaders"); for (size_t i = 0; i < shaders.size(); i++) { step.create_program.shaders[i] = shaders[i]; @@ -1003,6 +1012,7 @@ private: bool readyForFence = true; bool readyForRun = false; bool readyForSubmit = false; + bool skipSwap = false; GLRRunType type = GLRRunType::END; diff --git a/Common/GPU/OpenGL/thin3d_gl.cpp b/Common/GPU/OpenGL/thin3d_gl.cpp index 047cc8b8fb..02decfe05c 100644 --- a/Common/GPU/OpenGL/thin3d_gl.cpp +++ b/Common/GPU/OpenGL/thin3d_gl.cpp @@ -552,6 +552,8 @@ OpenGLContext::OpenGLContext() { caps_.framebufferDepthBlitSupported = caps_.framebufferBlitSupported; caps_.framebufferStencilBlitSupported = caps_.framebufferBlitSupported; caps_.depthClampSupported = gl_extensions.ARB_depth_clamp; + caps_.blendMinMaxSupported = gl_extensions.EXT_blend_minmax; + if (gl_extensions.IsGLES) { caps_.clipDistanceSupported = gl_extensions.EXT_clip_cull_distance || gl_extensions.APPLE_clip_distance; caps_.cullDistanceSupported = gl_extensions.EXT_clip_cull_distance; @@ -711,8 +713,10 @@ OpenGLContext::OpenGLContext() { } } - if (gl_extensions.IsGLES) { + // NOTE: We only support framebuffer fetch on ES3 due to past issues.. + if (gl_extensions.IsGLES && gl_extensions.GLES3) { caps_.framebufferFetchSupported = (gl_extensions.EXT_shader_framebuffer_fetch || gl_extensions.ARM_shader_framebuffer_fetch); + if (gl_extensions.EXT_shader_framebuffer_fetch) { shaderLanguageDesc_.framebufferFetchExtension = "#extension GL_EXT_shader_framebuffer_fetch : require"; shaderLanguageDesc_.lastFragData = gl_extensions.GLES3 ? "fragColor0" : "gl_LastFragData[0]"; @@ -1234,7 +1238,8 @@ bool OpenGLPipeline::LinkShaders() { } } - program_ = render_->CreateProgram(linkShaders, semantics, queries, initialize, false, false); + GLRProgramFlags flags{}; + program_ = render_->CreateProgram(linkShaders, semantics, queries, initialize, flags); return true; } diff --git a/Common/GPU/Vulkan/VulkanBarrier.cpp b/Common/GPU/Vulkan/VulkanBarrier.cpp index 125d51ea6d..e4f2d09089 100644 --- a/Common/GPU/Vulkan/VulkanBarrier.cpp +++ b/Common/GPU/Vulkan/VulkanBarrier.cpp @@ -4,7 +4,7 @@ void VulkanBarrier::Flush(VkCommandBuffer cmd) { if (!imageBarriers_.empty()) { - vkCmdPipelineBarrier(cmd, srcStageMask_, dstStageMask_, 0, 0, nullptr, 0, nullptr, (uint32_t)imageBarriers_.size(), imageBarriers_.data()); + vkCmdPipelineBarrier(cmd, srcStageMask_, dstStageMask_, dependencyFlags_, 0, nullptr, 0, nullptr, (uint32_t)imageBarriers_.size(), imageBarriers_.data()); } imageBarriers_.clear(); srcStageMask_ = 0; diff --git a/Common/GPU/Vulkan/VulkanBarrier.h b/Common/GPU/Vulkan/VulkanBarrier.h index eb949dd2f0..0d5754b3f4 100644 --- a/Common/GPU/Vulkan/VulkanBarrier.h +++ b/Common/GPU/Vulkan/VulkanBarrier.h @@ -21,6 +21,7 @@ public: ) { srcStageMask_ |= srcStageMask; dstStageMask_ |= dstStageMask; + dependencyFlags_ |= VK_DEPENDENCY_BY_REGION_BIT; VkImageMemoryBarrier imageBarrier; imageBarrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; @@ -112,4 +113,5 @@ private: VkPipelineStageFlags srcStageMask_ = 0; VkPipelineStageFlags dstStageMask_ = 0; std::vector imageBarriers_; + VkDependencyFlags dependencyFlags_ = 0; }; diff --git a/Common/GPU/Vulkan/VulkanContext.cpp b/Common/GPU/Vulkan/VulkanContext.cpp index 55edc0f31f..0d29b518df 100644 --- a/Common/GPU/Vulkan/VulkanContext.cpp +++ b/Common/GPU/Vulkan/VulkanContext.cpp @@ -667,7 +667,10 @@ VkResult VulkanContext::CreateDevice() { extensionsLookup_.KHR_create_renderpass2 = true; extensionsLookup_.KHR_depth_stencil_resolve = EnableDeviceExtension(VK_KHR_DEPTH_STENCIL_RESOLVE_EXTENSION_NAME); } + extensionsLookup_.EXT_shader_stencil_export = EnableDeviceExtension(VK_EXT_SHADER_STENCIL_EXPORT_EXTENSION_NAME); + extensionsLookup_.EXT_fragment_shader_interlock = EnableDeviceExtension(VK_EXT_FRAGMENT_SHADER_INTERLOCK_EXTENSION_NAME); + extensionsLookup_.ARM_rasterization_order_attachment_access = EnableDeviceExtension(VK_ARM_RASTERIZATION_ORDER_ATTACHMENT_ACCESS_EXTENSION_NAME); VkDeviceCreateInfo device_info{ VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO }; device_info.queueCreateInfoCount = 1; diff --git a/Common/GPU/Vulkan/VulkanDebug.cpp b/Common/GPU/Vulkan/VulkanDebug.cpp index d4052e970a..b52e4396cb 100644 --- a/Common/GPU/Vulkan/VulkanDebug.cpp +++ b/Common/GPU/Vulkan/VulkanDebug.cpp @@ -86,7 +86,6 @@ VKAPI_ATTR VkBool32 VKAPI_CALL VulkanDebugUtilsCallback( } else { WARN_LOG(G3D, "VKDEBUG: %s", msg.c_str()); } - // false indicates that layer should not bail-out of an // API call that had validation failures. This may mean that the // app dies inside the driver due to invalid parameter(s). @@ -94,3 +93,4 @@ VKAPI_ATTR VkBool32 VKAPI_CALL VulkanDebugUtilsCallback( // keep that behavior here. return false; } + diff --git a/Common/GPU/Vulkan/VulkanFrameData.cpp b/Common/GPU/Vulkan/VulkanFrameData.cpp new file mode 100644 index 0000000000..2c9c2487fc --- /dev/null +++ b/Common/GPU/Vulkan/VulkanFrameData.cpp @@ -0,0 +1,207 @@ +#include "VulkanFrameData.h" +#include "Common/Log.h" + +void FrameData::Init(VulkanContext *vulkan, int index) { + this->index = index; + VkDevice device = vulkan->GetDevice(); + + VkCommandPoolCreateInfo cmd_pool_info = { VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO }; + cmd_pool_info.queueFamilyIndex = vulkan->GetGraphicsQueueFamilyIndex(); + cmd_pool_info.flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT; + VkResult res = vkCreateCommandPool(device, &cmd_pool_info, nullptr, &cmdPoolInit); + _dbg_assert_(res == VK_SUCCESS); + res = vkCreateCommandPool(device, &cmd_pool_info, nullptr, &cmdPoolMain); + _dbg_assert_(res == VK_SUCCESS); + + VkCommandBufferAllocateInfo cmd_alloc = { VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO }; + cmd_alloc.commandPool = cmdPoolInit; + cmd_alloc.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY; + cmd_alloc.commandBufferCount = 1; + res = vkAllocateCommandBuffers(device, &cmd_alloc, &initCmd); + _dbg_assert_(res == VK_SUCCESS); + cmd_alloc.commandPool = cmdPoolMain; + res = vkAllocateCommandBuffers(device, &cmd_alloc, &mainCmd); + res = vkAllocateCommandBuffers(device, &cmd_alloc, &presentCmd); + _dbg_assert_(res == VK_SUCCESS); + + // Creating the frame fence with true so they can be instantly waited on the first frame + fence = vulkan->CreateFence(true); + + // This fence one is used for synchronizing readbacks. Does not need preinitialization. + readbackFence = vulkan->CreateFence(false); + + VkQueryPoolCreateInfo query_ci{ VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO }; + query_ci.queryCount = MAX_TIMESTAMP_QUERIES; + query_ci.queryType = VK_QUERY_TYPE_TIMESTAMP; + res = vkCreateQueryPool(device, &query_ci, nullptr, &profile.queryPool); +} + +void FrameData::Destroy(VulkanContext *vulkan) { + VkDevice device = vulkan->GetDevice(); + // TODO: I don't think free-ing command buffers is necessary before destroying a pool. + vkFreeCommandBuffers(device, cmdPoolInit, 1, &initCmd); + vkFreeCommandBuffers(device, cmdPoolMain, 1, &mainCmd); + vkDestroyCommandPool(device, cmdPoolInit, nullptr); + vkDestroyCommandPool(device, cmdPoolMain, nullptr); + vkDestroyFence(device, fence, nullptr); + vkDestroyFence(device, readbackFence, nullptr); + vkDestroyQueryPool(device, profile.queryPool, nullptr); +} + +void FrameData::AcquireNextImage(VulkanContext *vulkan, FrameDataShared &shared) { + _dbg_assert_(!hasAcquired); + + // Get the index of the next available swapchain image, and a semaphore to block command buffer execution on. + VkResult res = vkAcquireNextImageKHR(vulkan->GetDevice(), vulkan->GetSwapchain(), UINT64_MAX, shared.acquireSemaphore, (VkFence)VK_NULL_HANDLE, &curSwapchainImage); + switch (res) { + case VK_SUCCESS: + hasAcquired = true; + break; + case VK_SUBOPTIMAL_KHR: + hasAcquired = true; + // Hopefully the resize will happen shortly. Ignore - one frame might look bad or something. + WARN_LOG(G3D, "VK_SUBOPTIMAL_KHR returned - ignoring"); + break; + case VK_ERROR_OUT_OF_DATE_KHR: + // We do not set hasAcquired here! + WARN_LOG(G3D, "VK_ERROR_OUT_OF_DATE_KHR returned from AcquireNextImage - processing the frame, but not presenting"); + skipSwap = true; + break; + default: + // Weird, shouldn't get any other values. Maybe lost device? + _assert_msg_(false, "vkAcquireNextImageKHR failed! result=%s", VulkanResultToString(res)); + break; + } +} + +VkResult FrameData::QueuePresent(VulkanContext *vulkan, FrameDataShared &shared) { + _dbg_assert_(hasAcquired); + hasAcquired = false; + _dbg_assert_(!skipSwap); + + VkSwapchainKHR swapchain = vulkan->GetSwapchain(); + VkPresentInfoKHR present = { VK_STRUCTURE_TYPE_PRESENT_INFO_KHR }; + present.swapchainCount = 1; + present.pSwapchains = &swapchain; + present.pImageIndices = &curSwapchainImage; + present.pWaitSemaphores = &shared.renderingCompleteSemaphore; + present.waitSemaphoreCount = 1; + + return vkQueuePresentKHR(vulkan->GetGraphicsQueue(), &present); +} + +VkCommandBuffer FrameData::GetInitCmd(VulkanContext *vulkan) { + if (!hasInitCommands) { + VkCommandBufferBeginInfo begin = { + VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, + nullptr, + VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT + }; + vkResetCommandPool(vulkan->GetDevice(), cmdPoolInit, 0); + VkResult res = vkBeginCommandBuffer(initCmd, &begin); + if (res != VK_SUCCESS) { + return VK_NULL_HANDLE; + } + hasInitCommands = true; + } + return initCmd; +} + +void FrameData::SubmitPending(VulkanContext *vulkan, FrameSubmitType type, FrameDataShared &sharedData) { + VkCommandBuffer cmdBufs[2]; + int numCmdBufs = 0; + + VkFence fenceToTrigger = VK_NULL_HANDLE; + + if (hasInitCommands) { + if (profilingEnabled_) { + // Pre-allocated query ID 1 - end of init cmdbuf. + vkCmdWriteTimestamp(initCmd, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, profile.queryPool, 1); + } + + VkResult res = vkEndCommandBuffer(initCmd); + cmdBufs[numCmdBufs++] = initCmd; + + _assert_msg_(res == VK_SUCCESS, "vkEndCommandBuffer failed (init)! result=%s", VulkanResultToString(res)); + hasInitCommands = false; + } + + if ((hasMainCommands || hasPresentCommands) && type == FrameSubmitType::Sync) { + fenceToTrigger = readbackFence; + } + + if (hasMainCommands) { + VkResult res = vkEndCommandBuffer(mainCmd); + _assert_msg_(res == VK_SUCCESS, "vkEndCommandBuffer failed (main)! result=%s", VulkanResultToString(res)); + + cmdBufs[numCmdBufs++] = mainCmd; + hasMainCommands = false; + } + + if (hasPresentCommands) { + VkResult res = vkEndCommandBuffer(presentCmd); + _assert_msg_(res == VK_SUCCESS, "vkEndCommandBuffer failed (present)! result=%s", VulkanResultToString(res)); + + cmdBufs[numCmdBufs++] = presentCmd; + hasPresentCommands = false; + + if (type == FrameSubmitType::Present) { + fenceToTrigger = fence; + } + } + + if (!numCmdBufs && fenceToTrigger == VK_NULL_HANDLE) { + // Nothing to do. + return; + } + + VkSubmitInfo submit_info{ VK_STRUCTURE_TYPE_SUBMIT_INFO }; + VkPipelineStageFlags waitStage[1]{ VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT }; + if (type == FrameSubmitType::Present && !skipSwap) { + _dbg_assert_(hasAcquired); + submit_info.waitSemaphoreCount = 1; + submit_info.pWaitSemaphores = &sharedData.acquireSemaphore; + submit_info.pWaitDstStageMask = waitStage; + } + submit_info.commandBufferCount = (uint32_t)numCmdBufs; + submit_info.pCommandBuffers = cmdBufs; + if (type == FrameSubmitType::Present && !skipSwap) { + submit_info.signalSemaphoreCount = 1; + submit_info.pSignalSemaphores = &sharedData.renderingCompleteSemaphore; + } + VkResult res = vkQueueSubmit(vulkan->GetGraphicsQueue(), 1, &submit_info, fenceToTrigger); + if (res == VK_ERROR_DEVICE_LOST) { + _assert_msg_(false, "Lost the Vulkan device in vkQueueSubmit! If this happens again, switch Graphics Backend away from Vulkan"); + } else { + _assert_msg_(res == VK_SUCCESS, "vkQueueSubmit failed (main)! result=%s", VulkanResultToString(res)); + } + + if (type == FrameSubmitType::Sync) { + // Hard stall of the GPU, not ideal, but necessary so the CPU has the contents of the readback. + vkWaitForFences(vulkan->GetDevice(), 1, &readbackFence, true, UINT64_MAX); + vkResetFences(vulkan->GetDevice(), 1, &readbackFence); + } + + // When !triggerFence, we notify after syncing with Vulkan. + if (type == FrameSubmitType::Present || type == FrameSubmitType::Sync) { + VERBOSE_LOG(G3D, "PULL: Frame %d.readyForFence = true", index); + std::unique_lock lock(push_mutex); + readyForFence = true; // misnomer in sync mode! + push_condVar.notify_all(); + } +} + +void FrameDataShared::Init(VulkanContext *vulkan) { + VkSemaphoreCreateInfo semaphoreCreateInfo = { VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO }; + semaphoreCreateInfo.flags = 0; + VkResult res = vkCreateSemaphore(vulkan->GetDevice(), &semaphoreCreateInfo, nullptr, &acquireSemaphore); + _dbg_assert_(res == VK_SUCCESS); + res = vkCreateSemaphore(vulkan->GetDevice(), &semaphoreCreateInfo, nullptr, &renderingCompleteSemaphore); + _dbg_assert_(res == VK_SUCCESS); +} + +void FrameDataShared::Destroy(VulkanContext *vulkan) { + VkDevice device = vulkan->GetDevice(); + vkDestroySemaphore(device, acquireSemaphore, nullptr); + vkDestroySemaphore(device, renderingCompleteSemaphore, nullptr); +} diff --git a/Common/GPU/Vulkan/VulkanFrameData.h b/Common/GPU/Vulkan/VulkanFrameData.h new file mode 100644 index 0000000000..53de1e8f44 --- /dev/null +++ b/Common/GPU/Vulkan/VulkanFrameData.h @@ -0,0 +1,101 @@ +#pragma once + +#include + +#include +#include + +#include "Common/GPU/Vulkan/VulkanContext.h" + +struct VKRStep; + +enum { + MAX_TIMESTAMP_QUERIES = 128, +}; + +enum class VKRRunType { + END, + SYNC, +}; + +struct QueueProfileContext { + VkQueryPool queryPool; + std::vector timestampDescriptions; + std::string profileSummary; + double cpuStartTime; + double cpuEndTime; +}; + +struct FrameDataShared { + // Permanent objects + VkSemaphore acquireSemaphore = VK_NULL_HANDLE; + VkSemaphore renderingCompleteSemaphore = VK_NULL_HANDLE; + + void Init(VulkanContext *vulkan); + void Destroy(VulkanContext *vulkan); +}; + +enum class FrameSubmitType { + Pending, + Sync, + Present, +}; + +// Per-frame data, round-robin so we can overlap submission with execution of the previous frame. +struct FrameData { + std::mutex push_mutex; + std::condition_variable push_condVar; + + std::mutex pull_mutex; + std::condition_variable pull_condVar; + + bool readyForFence = true; + bool readyForRun = false; // protected by pull_mutex + bool skipSwap = false; + + VkFence fence; + VkFence readbackFence; // Strictly speaking we might only need one global of these. + + // These are on different threads so need separate pools. + VkCommandPool cmdPoolInit; // Written to from main thread + VkCommandPool cmdPoolMain; // Written to from render thread, which also submits + + VkCommandBuffer initCmd; + VkCommandBuffer mainCmd; + VkCommandBuffer presentCmd; + + bool hasInitCommands = false; + bool hasMainCommands = false; + bool hasPresentCommands = false; + + bool hasAcquired = false; + + std::vector steps; + + // Swapchain. + uint32_t curSwapchainImage = -1; + + // Profiling. + QueueProfileContext profile; + bool profilingEnabled_; + + void Init(VulkanContext *vulkan, int index); + void Destroy(VulkanContext *vulkan); + + void AcquireNextImage(VulkanContext *vulkan, FrameDataShared &shared); + VkResult QueuePresent(VulkanContext *vulkan, FrameDataShared &shared); + VkCommandBuffer GetInitCmd(VulkanContext *vulkan); + + // This will only submit if we are actually recording init commands. + void SubmitPending(VulkanContext *vulkan, FrameSubmitType type, FrameDataShared &shared); + + VKRRunType RunType() const { + return runType_; + } + + VKRRunType runType_ = VKRRunType::END; + +private: + // Metadata for logging etc + int index; +}; diff --git a/Common/GPU/Vulkan/VulkanLoader.h b/Common/GPU/Vulkan/VulkanLoader.h index 1f3d7d7704..b0c14570da 100644 --- a/Common/GPU/Vulkan/VulkanLoader.h +++ b/Common/GPU/Vulkan/VulkanLoader.h @@ -241,6 +241,8 @@ struct VulkanExtensions { bool KHR_depth_stencil_resolve; bool EXT_shader_stencil_export; bool EXT_swapchain_colorspace; + bool ARM_rasterization_order_attachment_access; + bool EXT_fragment_shader_interlock; // bool EXT_depth_range_unrestricted; // Allows depth outside [0.0, 1.0] in 32-bit float depth buffers. }; diff --git a/Common/GPU/Vulkan/VulkanQueueRunner.cpp b/Common/GPU/Vulkan/VulkanQueueRunner.cpp index 10a88e623a..8127114148 100644 --- a/Common/GPU/Vulkan/VulkanQueueRunner.cpp +++ b/Common/GPU/Vulkan/VulkanQueueRunner.cpp @@ -34,8 +34,11 @@ RenderPassType MergeRPTypes(RenderPassType a, RenderPassType b) { if (a == b) { // Trivial merging case. return a; + } else if (a == RP_TYPE_COLOR_DEPTH && b == RP_TYPE_COLOR_DEPTH_INPUT) { + return RP_TYPE_COLOR_DEPTH_INPUT; + } else if (a == RP_TYPE_COLOR_DEPTH_INPUT && b == RP_TYPE_COLOR_DEPTH) { + return RP_TYPE_COLOR_DEPTH_INPUT; } - // More cases to be added later. return a; } @@ -138,6 +141,171 @@ void VulkanQueueRunner::DestroyDeviceObjects() { renderPasses_.Clear(); } +bool VulkanQueueRunner::CreateSwapchain(VkCommandBuffer cmdInit) { + VkResult res = vkGetSwapchainImagesKHR(vulkan_->GetDevice(), vulkan_->GetSwapchain(), &swapchainImageCount_, nullptr); + _dbg_assert_(res == VK_SUCCESS); + + VkImage *swapchainImages = new VkImage[swapchainImageCount_]; + res = vkGetSwapchainImagesKHR(vulkan_->GetDevice(), vulkan_->GetSwapchain(), &swapchainImageCount_, swapchainImages); + if (res != VK_SUCCESS) { + ERROR_LOG(G3D, "vkGetSwapchainImagesKHR failed"); + delete[] swapchainImages; + return false; + } + + for (uint32_t i = 0; i < swapchainImageCount_; i++) { + SwapchainImageData sc_buffer{}; + sc_buffer.image = swapchainImages[i]; + + VkImageViewCreateInfo color_image_view = { VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO }; + color_image_view.format = vulkan_->GetSwapchainFormat(); + color_image_view.components.r = VK_COMPONENT_SWIZZLE_IDENTITY; + color_image_view.components.g = VK_COMPONENT_SWIZZLE_IDENTITY; + color_image_view.components.b = VK_COMPONENT_SWIZZLE_IDENTITY; + color_image_view.components.a = VK_COMPONENT_SWIZZLE_IDENTITY; + color_image_view.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + color_image_view.subresourceRange.baseMipLevel = 0; + color_image_view.subresourceRange.levelCount = 1; + color_image_view.subresourceRange.baseArrayLayer = 0; + color_image_view.subresourceRange.layerCount = 1; + color_image_view.viewType = VK_IMAGE_VIEW_TYPE_2D; + color_image_view.flags = 0; + color_image_view.image = sc_buffer.image; + + // We leave the images as UNDEFINED, there's no need to pre-transition them as + // the backbuffer renderpass starts out with them being auto-transitioned from UNDEFINED anyway. + // Also, turns out it's illegal to transition un-acquired images, thanks Hans-Kristian. See #11417. + + res = vkCreateImageView(vulkan_->GetDevice(), &color_image_view, nullptr, &sc_buffer.view); + swapchainImages_.push_back(sc_buffer); + _dbg_assert_(res == VK_SUCCESS); + } + delete[] swapchainImages; + + // Must be before InitBackbufferRenderPass. + if (InitDepthStencilBuffer(cmdInit)) { + InitBackbufferFramebuffers(vulkan_->GetBackbufferWidth(), vulkan_->GetBackbufferHeight()); + } + return true; +} + + +bool VulkanQueueRunner::InitBackbufferFramebuffers(int width, int height) { + VkResult res; + // We share the same depth buffer but have multiple color buffers, see the loop below. + VkImageView attachments[2] = { VK_NULL_HANDLE, depth_.view }; + + VkFramebufferCreateInfo fb_info = { VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO }; + fb_info.renderPass = GetCompatibleRenderPass()->Get(vulkan_, RP_TYPE_BACKBUFFER); + fb_info.attachmentCount = 2; + fb_info.pAttachments = attachments; + fb_info.width = width; + fb_info.height = height; + fb_info.layers = 1; + + framebuffers_.resize(swapchainImageCount_); + + for (uint32_t i = 0; i < swapchainImageCount_; i++) { + attachments[0] = swapchainImages_[i].view; + res = vkCreateFramebuffer(vulkan_->GetDevice(), &fb_info, nullptr, &framebuffers_[i]); + _dbg_assert_(res == VK_SUCCESS); + if (res != VK_SUCCESS) { + framebuffers_.clear(); + return false; + } + } + + return true; +} + +bool VulkanQueueRunner::InitDepthStencilBuffer(VkCommandBuffer cmd) { + const VkFormat depth_format = vulkan_->GetDeviceInfo().preferredDepthStencilFormat; + int aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; + VkImageCreateInfo image_info = { VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO }; + image_info.imageType = VK_IMAGE_TYPE_2D; + image_info.format = depth_format; + image_info.extent.width = vulkan_->GetBackbufferWidth(); + image_info.extent.height = vulkan_->GetBackbufferHeight(); + image_info.extent.depth = 1; + image_info.mipLevels = 1; + image_info.arrayLayers = 1; + image_info.samples = VK_SAMPLE_COUNT_1_BIT; + image_info.queueFamilyIndexCount = 0; + image_info.pQueueFamilyIndices = nullptr; + image_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE; + image_info.usage = VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT; + image_info.flags = 0; + + depth_.format = depth_format; + + VmaAllocationCreateInfo allocCreateInfo{}; + VmaAllocationInfo allocInfo{}; + + allocCreateInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY; + + VkResult res = vmaCreateImage(vulkan_->Allocator(), &image_info, &allocCreateInfo, &depth_.image, &depth_.alloc, &allocInfo); + _dbg_assert_(res == VK_SUCCESS); + if (res != VK_SUCCESS) + return false; + + vulkan_->SetDebugName(depth_.image, VK_OBJECT_TYPE_IMAGE, "BackbufferDepth"); + + TransitionImageLayout2(cmd, depth_.image, 0, 1, + aspectMask, + VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, + VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT, + VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT, + 0, VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT); + + VkImageViewCreateInfo depth_view_info = { VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO }; + depth_view_info.image = depth_.image; + depth_view_info.format = depth_format; + depth_view_info.components.r = VK_COMPONENT_SWIZZLE_IDENTITY; + depth_view_info.components.g = VK_COMPONENT_SWIZZLE_IDENTITY; + depth_view_info.components.b = VK_COMPONENT_SWIZZLE_IDENTITY; + depth_view_info.components.a = VK_COMPONENT_SWIZZLE_IDENTITY; + depth_view_info.subresourceRange.aspectMask = aspectMask; + depth_view_info.subresourceRange.baseMipLevel = 0; + depth_view_info.subresourceRange.levelCount = 1; + depth_view_info.subresourceRange.baseArrayLayer = 0; + depth_view_info.subresourceRange.layerCount = 1; + depth_view_info.viewType = VK_IMAGE_VIEW_TYPE_2D; + depth_view_info.flags = 0; + + VkDevice device = vulkan_->GetDevice(); + + res = vkCreateImageView(device, &depth_view_info, NULL, &depth_.view); + _dbg_assert_(res == VK_SUCCESS); + if (res != VK_SUCCESS) + return false; + + return true; +} + + +void VulkanQueueRunner::DestroyBackBuffers() { + for (auto &image : swapchainImages_) { + vulkan_->Delete().QueueDeleteImageView(image.view); + } + swapchainImages_.clear(); + + if (depth_.view) { + vulkan_->Delete().QueueDeleteImageView(depth_.view); + } + if (depth_.image) { + _dbg_assert_(depth_.alloc); + vulkan_->Delete().QueueDeleteImageAllocation(depth_.image, depth_.alloc); + } + depth_ = {}; + for (uint32_t i = 0; i < framebuffers_.size(); i++) { + _dbg_assert_(framebuffers_[i] != VK_NULL_HANDLE); + vulkan_->Delete().QueueDeleteFramebuffer(framebuffers_[i]); + } + framebuffers_.clear(); + + INFO_LOG(G3D, "Backbuffers destroyed"); +} + static VkAttachmentLoadOp ConvertLoadAction(VKRRenderPassLoadAction action) { switch (action) { case VKRRenderPassLoadAction::CLEAR: return VK_ATTACHMENT_LOAD_OP_CLEAR; @@ -155,7 +323,12 @@ static VkAttachmentStoreOp ConvertStoreAction(VKRRenderPassStoreAction action) { return VK_ATTACHMENT_STORE_OP_DONT_CARE; // avoid compiler warning } +// Self-dependency: https://github.com/gpuweb/gpuweb/issues/442#issuecomment-547604827 +// Also see https://www.khronos.org/registry/vulkan/specs/1.3-extensions/html/vkspec.html#synchronization-pipeline-barriers-subpass-self-dependencies + VkRenderPass CreateRP(VulkanContext *vulkan, const RPKey &key, RenderPassType rpType) { + bool selfDependency = rpType == RP_TYPE_COLOR_DEPTH_INPUT; + VkAttachmentDescription attachments[2] = {}; attachments[0].format = rpType == RP_TYPE_BACKBUFFER ? vulkan->GetSwapchainFormat() : VK_FORMAT_R8G8B8A8_UNORM; attachments[0].samples = VK_SAMPLE_COUNT_1_BIT; @@ -179,7 +352,7 @@ VkRenderPass CreateRP(VulkanContext *vulkan, const RPKey &key, RenderPassType rp VkAttachmentReference color_reference{}; color_reference.attachment = 0; - color_reference.layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; + color_reference.layout = selfDependency ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; VkAttachmentReference depth_reference{}; depth_reference.attachment = 1; @@ -188,8 +361,13 @@ VkRenderPass CreateRP(VulkanContext *vulkan, const RPKey &key, RenderPassType rp VkSubpassDescription subpass{}; subpass.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS; subpass.flags = 0; - subpass.inputAttachmentCount = 0; - subpass.pInputAttachments = nullptr; + if (selfDependency) { + subpass.inputAttachmentCount = 1; + subpass.pInputAttachments = &color_reference; + } else { + subpass.inputAttachmentCount = 0; + subpass.pInputAttachments = nullptr; + } subpass.colorAttachmentCount = 1; subpass.pColorAttachments = &color_reference; subpass.pResolveAttachments = nullptr; @@ -198,22 +376,40 @@ VkRenderPass CreateRP(VulkanContext *vulkan, const RPKey &key, RenderPassType rp subpass.pPreserveAttachments = nullptr; // Not sure if this is really necessary. - VkSubpassDependency dep{}; - dep.srcSubpass = VK_SUBPASS_EXTERNAL; - dep.dstSubpass = 0; - dep.srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; - dep.dstStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; - dep.srcAccessMask = 0; - dep.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + VkSubpassDependency deps[2]{}; + size_t numDeps = 0; VkRenderPassCreateInfo rp{ VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO }; rp.attachmentCount = 2; rp.pAttachments = attachments; rp.subpassCount = 1; rp.pSubpasses = &subpass; + if (rpType == RP_TYPE_BACKBUFFER) { + deps[numDeps].srcSubpass = VK_SUBPASS_EXTERNAL; + deps[numDeps].dstSubpass = 0; + deps[numDeps].srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + deps[numDeps].dstStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + deps[numDeps].srcAccessMask = 0; + deps[numDeps].dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + numDeps++; rp.dependencyCount = 1; - rp.pDependencies = &dep; + } + + if (selfDependency) { + deps[numDeps].dependencyFlags = VK_DEPENDENCY_BY_REGION_BIT; + deps[numDeps].srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + deps[numDeps].dstAccessMask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT; + deps[numDeps].srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + deps[numDeps].dstStageMask = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; + deps[numDeps].srcSubpass = 0; + deps[numDeps].dstSubpass = 0; + numDeps++; + } + + if (numDeps > 0) { + rp.dependencyCount = (u32)numDeps; + rp.pDependencies = deps; } VkRenderPass pass; @@ -246,6 +442,30 @@ VKRRenderPass *VulkanQueueRunner::GetRenderPass(const RPKey &key) { return pass; } +// Must match the subpass self-dependency declared above. +void VulkanQueueRunner::SelfDependencyBarrier(VKRImage &img, VkImageAspectFlags aspect, VulkanBarrier *recordBarrier) { + if (aspect & VK_IMAGE_ASPECT_COLOR_BIT) { + VkAccessFlags srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + VkAccessFlags dstAccessMask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT; + VkPipelineStageFlags srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + VkPipelineStageFlags dstStageMask = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; + recordBarrier->TransitionImage( + img.image, + 0, + 1, + aspect, + VK_IMAGE_LAYOUT_GENERAL, + VK_IMAGE_LAYOUT_GENERAL, + srcAccessMask, + dstAccessMask, + srcStageMask, + dstStageMask + ); + } else { + _assert_msg_(false, "Depth self-dependencies not yet supported"); + } +} + void VulkanQueueRunner::PreprocessSteps(std::vector &steps) { // Optimizes renderpasses, then sequences them. // Planned optimizations: @@ -321,23 +541,47 @@ void VulkanQueueRunner::PreprocessSteps(std::vector &steps) { } } -void VulkanQueueRunner::RunSteps(VkCommandBuffer cmd, std::vector &steps, QueueProfileContext *profile) { +void VulkanQueueRunner::RunSteps(FrameData &frameData, FrameDataShared &frameDataShared) { + QueueProfileContext *profile = frameData.profilingEnabled_ ? &frameData.profile : nullptr; + if (profile) profile->cpuStartTime = time_now_d(); bool emitLabels = vulkan_->Extensions().EXT_debug_utils; - for (size_t i = 0; i < steps.size(); i++) { - const VKRStep &step = *steps[i]; + VkCommandBuffer cmd = frameData.hasPresentCommands ? frameData.presentCmd : frameData.mainCmd; + + for (size_t i = 0; i < frameData.steps.size(); i++) { + const VKRStep &step = *frameData.steps[i]; if (emitLabels) { VkDebugUtilsLabelEXT labelInfo{ VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT }; labelInfo.pLabelName = step.tag; - vkCmdBeginDebugUtilsLabelEXT(cmd, &labelInfo); + vkCmdBeginDebugUtilsLabelEXT(frameData.mainCmd, &labelInfo); } switch (step.stepType) { case VKRStepType::RENDER: + if (!step.render.framebuffer) { + frameData.SubmitPending(vulkan_, FrameSubmitType::Pending, frameDataShared); + + // When stepping in the GE debugger, we can end up here multiple times in a "frame". + // So only acquire once. + if (!frameData.hasAcquired) { + frameData.AcquireNextImage(vulkan_, frameDataShared); + SetBackbuffer(framebuffers_[frameData.curSwapchainImage], swapchainImages_[frameData.curSwapchainImage].image); + } + + _dbg_assert_(!frameData.hasPresentCommands); + // A RENDER step rendering to the backbuffer is normally the last step that happens in a frame, + // unless taking a screenshot, in which case there might be a READBACK_IMAGE after it. + // This is why we have to switch cmd to presentCmd, in this case. + VkCommandBufferBeginInfo begin{ VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO }; + begin.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; + vkBeginCommandBuffer(frameData.presentCmd, &begin); + frameData.hasPresentCommands = true; + cmd = frameData.presentCmd; + } PerformRenderPass(step, cmd); break; case VKRStepType::COPY: @@ -368,10 +612,12 @@ void VulkanQueueRunner::RunSteps(VkCommandBuffer cmd, std::vector &st // Deleting all in one go should be easier on the instruction cache than deleting // them as we go - and easier to debug because we can look backwards in the frame. - for (size_t i = 0; i < steps.size(); i++) { - delete steps[i]; + for (auto step : frameData.steps) { + delete step; } + frameData.steps.clear(); + if (profile) profile->cpuEndTime = time_now_d(); } @@ -628,6 +874,7 @@ std::string VulkanQueueRunner::StepToString(const VKRStep &step) const { switch (step.render.renderPassType) { case RP_TYPE_BACKBUFFER: renderCmd = "BACKBUF"; break; case RP_TYPE_COLOR_DEPTH: renderCmd = "RENDER"; break; + case RP_TYPE_COLOR_DEPTH_INPUT: renderCmd = "RENDER_INPUT"; break; default: renderCmd = "N/A"; } snprintf(buffer, sizeof(buffer), "%s %s (draws: %d, %dx%d/%dx%d, fb: %p, )", renderCmd, step.tag, step.render.numDraws, actual_w, actual_h, w, h, step.render.framebuffer); @@ -817,6 +1064,9 @@ void VulkanQueueRunner::LogRenderPass(const VKRStep &pass, bool verbose) { case VKRRenderCommand::REMOVED: INFO_LOG(G3D, " (Removed)"); break; + case VKRRenderCommand::SELF_DEPENDENCY_BARRIER: + INFO_LOG(G3D, " SelfBarrier()"); + break; case VKRRenderCommand::BIND_GRAPHICS_PIPELINE: INFO_LOG(G3D, " BindGraphicsPipeline(%x)", (int)(intptr_t)cmd.graphics_pipeline.pipeline); break; @@ -1070,7 +1320,6 @@ void VulkanQueueRunner::PerformRenderPass(const VKRStep &step, VkCommandBuffer c } } - // Don't execute empty renderpasses that keep the contents. if (step.commands.empty() && step.render.colorLoad == VKRRenderPassLoadAction::KEEP && step.render.depthLoad == VKRRenderPassLoadAction::KEEP && step.render.stencilLoad == VKRRenderPassLoadAction::KEEP) { // Flush the pending barrier @@ -1120,6 +1369,7 @@ void VulkanQueueRunner::PerformRenderPass(const VKRStep &step, VkCommandBuffer c // This reads the layout of the color and depth images, and chooses a render pass using them that // will transition to the desired final layout. + // // NOTE: Flushes recordBarrier_. VKRRenderPass *renderPass = PerformBindFramebufferAsRenderTarget(step, cmd); @@ -1235,6 +1485,15 @@ void VulkanQueueRunner::PerformRenderPass(const VKRStep &step, VkCommandBuffer c break; } + case VKRRenderCommand::SELF_DEPENDENCY_BARRIER: + { + _assert_(step.render.pipelineFlags & PipelineFlags::USES_INPUT_ATTACHMENT); + VulkanBarrier barrier; + SelfDependencyBarrier(step.render.framebuffer->color, VK_IMAGE_ASPECT_COLOR_BIT, &barrier); + barrier.Flush(cmd); + break; + } + case VKRRenderCommand::PUSH_CONSTANTS: vkCmdPushConstants(cmd, pipelineLayout, c.push.stages, c.push.offset, c.push.size, c.push.data); break; diff --git a/Common/GPU/Vulkan/VulkanQueueRunner.h b/Common/GPU/Vulkan/VulkanQueueRunner.h index adb9e7d96e..2c76262a6a 100644 --- a/Common/GPU/Vulkan/VulkanQueueRunner.h +++ b/Common/GPU/Vulkan/VulkanQueueRunner.h @@ -8,6 +8,7 @@ #include "Common/Data/Collections/Hashmaps.h" #include "Common/GPU/Vulkan/VulkanContext.h" #include "Common/GPU/Vulkan/VulkanBarrier.h" +#include "Common/GPU/Vulkan/VulkanFrameData.h" #include "Common/Data/Convert/SmallDataConvert.h" #include "Common/Data/Collections/TinySet.h" #include "Common/GPU/DataFormat.h" @@ -16,11 +17,11 @@ class VKRFramebuffer; struct VKRGraphicsPipeline; struct VKRComputePipeline; struct VKRImage; +struct FrameData; enum { QUEUE_HACK_MGS2_ACID = 1, QUEUE_HACK_SONIC = 2, - // Killzone PR = 4. QUEUE_HACK_RENDERPASS_MERGE = 8, }; @@ -36,20 +37,24 @@ enum class VKRRenderCommand : uint8_t { DRAW, DRAW_INDEXED, PUSH_CONSTANTS, + SELF_DEPENDENCY_BARRIER, NUM_RENDER_COMMANDS, }; -enum PipelineFlags { - PIPELINE_FLAG_NONE = 0, - PIPELINE_FLAG_USES_LINES = (1 << 2), - PIPELINE_FLAG_USES_BLEND_CONSTANT = (1 << 3), - PIPELINE_FLAG_USES_DEPTH_STENCIL = (1 << 4), // Reads or writes the depth buffer. +enum class PipelineFlags { + NONE = 0, + USES_LINES = (1 << 2), + USES_BLEND_CONSTANT = (1 << 3), + USES_DEPTH_STENCIL = (1 << 4), // Reads or writes the depth buffer. + USES_INPUT_ATTACHMENT = (1 << 5), }; +ENUM_CLASS_BITOPS(PipelineFlags); // Pipelines need to be created for the right type of render pass. enum RenderPassType { RP_TYPE_BACKBUFFER, RP_TYPE_COLOR_DEPTH, + RP_TYPE_COLOR_DEPTH_INPUT, // Later will add pure-color render passes. RP_TYPE_COUNT, }; @@ -146,14 +151,6 @@ struct TransitionRequest { VkImageLayout targetLayout; }; -struct QueueProfileContext { - VkQueryPool queryPool; - std::vector timestampDescriptions; - std::string profileSummary; - double cpuStartTime; - double cpuEndTime; -}; - class VKRRenderPass; struct VKRStep { @@ -168,7 +165,6 @@ struct VKRStep { union { struct { VKRFramebuffer *framebuffer; - // TODO: Look these up through renderPass? VKRRenderPassLoadAction colorLoad; VKRRenderPassLoadAction depthLoad; VKRRenderPassLoadAction stencilLoad; @@ -183,7 +179,7 @@ struct VKRStep { int numReads; VkImageLayout finalColorLayout; VkImageLayout finalDepthStencilLayout; - u32 pipelineFlags; + PipelineFlags pipelineFlags; // contains the self dependency flag, in the form of USES_INPUT_ATTACHMENT VkRect2D renderArea; // Render pass type. Deduced after finishing recording the pass, from the used pipelines. // NOTE: Storing the render pass here doesn't do much good, we change the compatible parameters (load/store ops) during step optimization. @@ -255,7 +251,7 @@ public: } void PreprocessSteps(std::vector &steps); - void RunSteps(VkCommandBuffer cmd, std::vector &steps, QueueProfileContext *profile); + void RunSteps(FrameData &frameData, FrameDataShared &frameDataShared); void LogSteps(const std::vector &steps, bool verbose); std::string StepToString(const VKRStep &step) const; @@ -263,6 +259,14 @@ public: void CreateDeviceObjects(); void DestroyDeviceObjects(); + // Swapchain + void DestroyBackBuffers(); + bool CreateSwapchain(VkCommandBuffer cmdInit); + + bool HasBackbuffers() const { + return !framebuffers_.empty(); + } + // Get a render pass that's compatible with all our framebuffers. // Note that it's precached, cannot look up in the map as this might be on another thread. VKRRenderPass *GetCompatibleRenderPass() const { @@ -302,6 +306,9 @@ public: } private: + bool InitBackbufferFramebuffers(int width, int height); + bool InitDepthStencilBuffer(VkCommandBuffer cmd); // Used for non-buffered rendering. + VKRRenderPass *PerformBindFramebufferAsRenderTarget(const VKRStep &pass, VkCommandBuffer cmd); void PerformRenderPass(const VKRStep &pass, VkCommandBuffer cmd); void PerformCopy(const VKRStep &pass, VkCommandBuffer cmd); @@ -324,6 +331,8 @@ private: static void SetupTransitionToTransferSrc(VKRImage &img, VkImageAspectFlags aspect, VulkanBarrier *recordBarrier); static void SetupTransitionToTransferDst(VKRImage &img, VkImageAspectFlags aspect, VulkanBarrier *recordBarrier); + static void SelfDependencyBarrier(VKRImage &img, VkImageAspectFlags aspect, VulkanBarrier *recordBarrier); + VulkanContext *vulkan_; VkFramebuffer backbuffer_ = VK_NULL_HANDLE; @@ -354,4 +363,20 @@ private: // Stored here to help reuse the allocation. VulkanBarrier recordBarrier_; + + // Swap chain management + struct SwapchainImageData { + VkImage image; + VkImageView view; + }; + std::vector framebuffers_; + std::vector swapchainImages_; + uint32_t swapchainImageCount_ = 0; + struct DepthBufferInfo { + VkFormat format = VK_FORMAT_UNDEFINED; + VkImage image = VK_NULL_HANDLE; + VmaAllocation alloc = VK_NULL_HANDLE; + VkImageView view = VK_NULL_HANDLE; + }; + DepthBufferInfo depth_; }; diff --git a/Common/GPU/Vulkan/VulkanRenderManager.cpp b/Common/GPU/Vulkan/VulkanRenderManager.cpp index 56f40655f7..3a0dde0312 100644 --- a/Common/GPU/Vulkan/VulkanRenderManager.cpp +++ b/Common/GPU/Vulkan/VulkanRenderManager.cpp @@ -223,7 +223,7 @@ void CreateImage(VulkanContext *vulkan, VkCommandBuffer cmd, VKRImage &img, int // Strictly speaking we don't yet need VK_IMAGE_USAGE_SAMPLED_BIT for depth buffers since we do not yet sample depth buffers. ici.usage = VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT; if (color) { - ici.usage |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; + ici.usage |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT; } else { ici.usage |= VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT; } @@ -288,53 +288,15 @@ void CreateImage(VulkanContext *vulkan, VkCommandBuffer cmd, VKRImage &img, int } VulkanRenderManager::VulkanRenderManager(VulkanContext *vulkan) : vulkan_(vulkan), queueRunner_(vulkan) { - VkSemaphoreCreateInfo semaphoreCreateInfo = { VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO }; - semaphoreCreateInfo.flags = 0; - VkResult res = vkCreateSemaphore(vulkan_->GetDevice(), &semaphoreCreateInfo, nullptr, &acquireSemaphore_); - _dbg_assert_(res == VK_SUCCESS); - res = vkCreateSemaphore(vulkan_->GetDevice(), &semaphoreCreateInfo, nullptr, &renderingCompleteSemaphore_); - _dbg_assert_(res == VK_SUCCESS); - inflightFramesAtStart_ = vulkan_->GetInflightFrames(); + + frameDataShared_.Init(vulkan); + for (int i = 0; i < inflightFramesAtStart_; i++) { - VkCommandPoolCreateInfo cmd_pool_info = { VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO }; - cmd_pool_info.queueFamilyIndex = vulkan_->GetGraphicsQueueFamilyIndex(); - cmd_pool_info.flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT; - VkResult res = vkCreateCommandPool(vulkan_->GetDevice(), &cmd_pool_info, nullptr, &frameData_[i].cmdPoolInit); - _dbg_assert_(res == VK_SUCCESS); - res = vkCreateCommandPool(vulkan_->GetDevice(), &cmd_pool_info, nullptr, &frameData_[i].cmdPoolMain); - _dbg_assert_(res == VK_SUCCESS); - - VkCommandBufferAllocateInfo cmd_alloc = { VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO }; - cmd_alloc.commandPool = frameData_[i].cmdPoolInit; - cmd_alloc.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY; - cmd_alloc.commandBufferCount = 1; - - res = vkAllocateCommandBuffers(vulkan_->GetDevice(), &cmd_alloc, &frameData_[i].initCmd); - _dbg_assert_(res == VK_SUCCESS); - cmd_alloc.commandPool = frameData_[i].cmdPoolMain; - res = vkAllocateCommandBuffers(vulkan_->GetDevice(), &cmd_alloc, &frameData_[i].mainCmd); - _dbg_assert_(res == VK_SUCCESS); - - // Creating the frame fence with true so they can be instantly waited on the first frame - frameData_[i].fence = vulkan_->CreateFence(true); - - // This fence one is used for synchronizing readbacks. Does not need preinitialization. - frameData_[i].readbackFence = vulkan_->CreateFence(false); - - VkQueryPoolCreateInfo query_ci{ VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO }; - query_ci.queryCount = MAX_TIMESTAMP_QUERIES; - query_ci.queryType = VK_QUERY_TYPE_TIMESTAMP; - res = vkCreateQueryPool(vulkan_->GetDevice(), &query_ci, nullptr, &frameData_[i].profile.queryPool); + frameData_[i].Init(vulkan, i); } queueRunner_.CreateDeviceObjects(); - - // AMD hack for issue #10097 (older drivers only.) - const auto &props = vulkan_->GetPhysicalDeviceProperties().properties; - if (props.vendorID == VULKAN_VENDOR_AMD && props.apiVersion < VK_API_VERSION_1_1) { - useThread_ = false; - } } bool VulkanRenderManager::CreateBackbuffers() { @@ -342,52 +304,14 @@ bool VulkanRenderManager::CreateBackbuffers() { ERROR_LOG(G3D, "No swapchain - can't create backbuffers"); return false; } - VkResult res = vkGetSwapchainImagesKHR(vulkan_->GetDevice(), vulkan_->GetSwapchain(), &swapchainImageCount_, nullptr); - _dbg_assert_(res == VK_SUCCESS); - VkImage *swapchainImages = new VkImage[swapchainImageCount_]; - res = vkGetSwapchainImagesKHR(vulkan_->GetDevice(), vulkan_->GetSwapchain(), &swapchainImageCount_, swapchainImages); - if (res != VK_SUCCESS) { - ERROR_LOG(G3D, "vkGetSwapchainImagesKHR failed"); - delete[] swapchainImages; - return false; - } VkCommandBuffer cmdInit = GetInitCmd(); - for (uint32_t i = 0; i < swapchainImageCount_; i++) { - SwapchainImageData sc_buffer{}; - sc_buffer.image = swapchainImages[i]; - - VkImageViewCreateInfo color_image_view = { VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO }; - color_image_view.format = vulkan_->GetSwapchainFormat(); - color_image_view.components.r = VK_COMPONENT_SWIZZLE_IDENTITY; - color_image_view.components.g = VK_COMPONENT_SWIZZLE_IDENTITY; - color_image_view.components.b = VK_COMPONENT_SWIZZLE_IDENTITY; - color_image_view.components.a = VK_COMPONENT_SWIZZLE_IDENTITY; - color_image_view.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; - color_image_view.subresourceRange.baseMipLevel = 0; - color_image_view.subresourceRange.levelCount = 1; - color_image_view.subresourceRange.baseArrayLayer = 0; - color_image_view.subresourceRange.layerCount = 1; - color_image_view.viewType = VK_IMAGE_VIEW_TYPE_2D; - color_image_view.flags = 0; - color_image_view.image = sc_buffer.image; - - // We leave the images as UNDEFINED, there's no need to pre-transition them as - // the backbuffer renderpass starts out with them being auto-transitioned from UNDEFINED anyway. - // Also, turns out it's illegal to transition un-acquired images, thanks Hans-Kristian. See #11417. - - res = vkCreateImageView(vulkan_->GetDevice(), &color_image_view, nullptr, &sc_buffer.view); - swapchainImages_.push_back(sc_buffer); - _dbg_assert_(res == VK_SUCCESS); + if (!queueRunner_.CreateSwapchain(cmdInit)) { + return false; } - delete[] swapchainImages; - // Must be before InitBackbufferRenderPass. - if (InitDepthStencilBuffer(cmdInit)) { - InitBackbufferFramebuffers(vulkan_->GetBackbufferWidth(), vulkan_->GetBackbufferHeight()); - } curWidthRaw_ = -1; curHeightRaw_ = -1; @@ -404,7 +328,7 @@ bool VulkanRenderManager::CreateBackbuffers() { outOfDateFrames_ = 0; // Start the thread. - if (useThread_ && HasBackbuffers()) { + if (HasBackbuffers()) { run_ = true; // Won't necessarily be 0. threadInitFrame_ = vulkan_->GetCurFrame(); @@ -417,57 +341,58 @@ bool VulkanRenderManager::CreateBackbuffers() { } void VulkanRenderManager::StopThread() { - if (useThread_ && run_) { - run_ = false; - // Stop the thread. - for (int i = 0; i < vulkan_->GetInflightFrames(); i++) { - auto &frameData = frameData_[i]; - { - std::unique_lock lock(frameData.push_mutex); - frameData.push_condVar.notify_all(); - } - { - std::unique_lock lock(frameData.pull_mutex); - frameData.pull_condVar.notify_all(); - } - // Zero the queries so we don't try to pull them later. - frameData.profile.timestampDescriptions.clear(); - } - thread_.join(); - INFO_LOG(G3D, "Vulkan submission thread joined. Frame=%d", vulkan_->GetCurFrame()); - compileCond_.notify_all(); - compileThread_.join(); - INFO_LOG(G3D, "Vulkan compiler thread joined."); - - // Eat whatever has been queued up for this frame if anything. - Wipe(); - - // Wait for any fences to finish and be resignaled, so we don't have sync issues. - // Also clean out any queued data, which might refer to things that might not be valid - // when we restart... - for (int i = 0; i < vulkan_->GetInflightFrames(); i++) { - auto &frameData = frameData_[i]; - _assert_(!frameData.readyForRun); - _assert_(frameData.steps.empty()); - if (frameData.hasInitCommands) { - // Clear 'em out. This can happen on restart sometimes. - vkEndCommandBuffer(frameData.initCmd); - frameData.hasInitCommands = false; - } - frameData.readyForRun = false; - for (size_t i = 0; i < frameData.steps.size(); i++) { - delete frameData.steps[i]; - } - frameData.steps.clear(); - - std::unique_lock lock(frameData.push_mutex); - while (!frameData.readyForFence) { - VLOG("PUSH: Waiting for frame[%d].readyForFence = 1 (stop)", i); - frameData.push_condVar.wait(lock); - } - } - } else { + if (!run_) { INFO_LOG(G3D, "Vulkan submission thread was already stopped."); + return; + } + + run_ = false; + // Stop the thread. + for (int i = 0; i < vulkan_->GetInflightFrames(); i++) { + auto &frameData = frameData_[i]; + { + std::unique_lock lock(frameData.push_mutex); + frameData.push_condVar.notify_all(); + } + { + std::unique_lock lock(frameData.pull_mutex); + frameData.pull_condVar.notify_all(); + } + // Zero the queries so we don't try to pull them later. + frameData.profile.timestampDescriptions.clear(); + } + thread_.join(); + INFO_LOG(G3D, "Vulkan submission thread joined. Frame=%d", vulkan_->GetCurFrame()); + compileCond_.notify_all(); + compileThread_.join(); + INFO_LOG(G3D, "Vulkan compiler thread joined."); + + // Eat whatever has been queued up for this frame if anything. + Wipe(); + + // Wait for any fences to finish and be resignaled, so we don't have sync issues. + // Also clean out any queued data, which might refer to things that might not be valid + // when we restart... + for (int i = 0; i < vulkan_->GetInflightFrames(); i++) { + auto &frameData = frameData_[i]; + _assert_(!frameData.readyForRun); + _assert_(frameData.steps.empty()); + if (frameData.hasInitCommands) { + // Clear 'em out. This can happen on restart sometimes. + vkEndCommandBuffer(frameData.initCmd); + frameData.hasInitCommands = false; + } + frameData.readyForRun = false; + for (size_t i = 0; i < frameData.steps.size(); i++) { + delete frameData.steps[i]; + } + frameData.steps.clear(); + + std::unique_lock lock(frameData.push_mutex); + while (!frameData.readyForFence) { + VLOG("PUSH: Waiting for frame[%d].readyForFence = 1 (stop)", i); + frameData.push_condVar.wait(lock); + } } } @@ -475,26 +400,7 @@ void VulkanRenderManager::DestroyBackbuffers() { StopThread(); vulkan_->WaitUntilQueueIdle(); - for (auto &image : swapchainImages_) { - vulkan_->Delete().QueueDeleteImageView(image.view); - } - swapchainImages_.clear(); - - if (depth_.view) { - vulkan_->Delete().QueueDeleteImageView(depth_.view); - } - if (depth_.image) { - _dbg_assert_(depth_.alloc); - vulkan_->Delete().QueueDeleteImageAllocation(depth_.image, depth_.alloc); - } - depth_ = {}; - for (uint32_t i = 0; i < framebuffers_.size(); i++) { - _dbg_assert_(framebuffers_[i] != VK_NULL_HANDLE); - vulkan_->Delete().QueueDeleteFramebuffer(framebuffers_[i]); - } - framebuffers_.clear(); - - INFO_LOG(G3D, "Backbuffers destroyed"); + queueRunner_.DestroyBackBuffers(); } VulkanRenderManager::~VulkanRenderManager() { @@ -504,16 +410,9 @@ VulkanRenderManager::~VulkanRenderManager() { DrainCompileQueue(); VkDevice device = vulkan_->GetDevice(); - vkDestroySemaphore(device, acquireSemaphore_, nullptr); - vkDestroySemaphore(device, renderingCompleteSemaphore_, nullptr); + frameDataShared_.Destroy(vulkan_); for (int i = 0; i < inflightFramesAtStart_; i++) { - vkFreeCommandBuffers(device, frameData_[i].cmdPoolInit, 1, &frameData_[i].initCmd); - vkFreeCommandBuffers(device, frameData_[i].cmdPoolMain, 1, &frameData_[i].mainCmd); - vkDestroyCommandPool(device, frameData_[i].cmdPoolInit, nullptr); - vkDestroyCommandPool(device, frameData_[i].cmdPoolMain, nullptr); - vkDestroyFence(device, frameData_[i].fence, nullptr); - vkDestroyFence(device, frameData_[i].readbackFence, nullptr); - vkDestroyQueryPool(device, frameData_[i].profile.queryPool, nullptr); + frameData_[i].Destroy(vulkan_); } queueRunner_.DestroyDeviceObjects(); } @@ -534,7 +433,9 @@ void VulkanRenderManager::CompileThreadFunc() { break; } - INFO_LOG(G3D, "Compilation thread has %d pipelines to create", (int)toCompile.size()); + if (!toCompile.empty()) { + INFO_LOG(G3D, "Compilation thread has %d pipelines to create", (int)toCompile.size()); + } // TODO: Here we can sort the pending pipelines by vertex and fragment shaders, // and split up further. @@ -574,6 +475,7 @@ void VulkanRenderManager::ThreadFunc() { threadFrame = 0; } FrameData &frameData = frameData_[threadFrame]; + std::unique_lock lock(frameData.pull_mutex); while (!frameData.readyForRun && run_) { VLOG("PULL: Waiting for frame[%d].readyForRun", threadFrame); @@ -589,8 +491,7 @@ void VulkanRenderManager::ThreadFunc() { // but that created a race condition where frames could end up not finished properly on resize etc. // Only increment next time if we're done. - nextFrame = frameData.type == VKRRunType::END; - _dbg_assert_(frameData.type == VKRRunType::END || frameData.type == VKRRunType::SYNC); + nextFrame = frameData.RunType() == VKRRunType::END; } VLOG("PULL: Running frame %d", threadFrame); if (firstFrame) { @@ -615,7 +516,7 @@ void VulkanRenderManager::BeginFrame(bool enableProfiling, bool enableLogProfile FrameData &frameData = frameData_[curFrame]; // Make sure the very last command buffer from the frame before the previous has been fully executed. - if (useThread_) { + { std::unique_lock lock(frameData.push_mutex); while (!frameData.readyForFence) { VLOG("PUSH: Waiting for frame[%d].readyForFence = 1", curFrame); @@ -633,7 +534,6 @@ void VulkanRenderManager::BeginFrame(bool enableProfiling, bool enableLogProfile // Can't set this until after the fence. frameData.profilingEnabled_ = enableProfiling; - frameData.readbackFenceUsed = false; uint64_t queryResults[MAX_TIMESTAMP_QUERIES]; @@ -698,21 +598,7 @@ void VulkanRenderManager::BeginFrame(bool enableProfiling, bool enableLogProfile VkCommandBuffer VulkanRenderManager::GetInitCmd() { int curFrame = vulkan_->GetCurFrame(); - FrameData &frameData = frameData_[curFrame]; - if (!frameData.hasInitCommands) { - VkCommandBufferBeginInfo begin = { - VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, - nullptr, - VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT - }; - vkResetCommandPool(vulkan_->GetDevice(), frameData.cmdPoolInit, 0); - VkResult res = vkBeginCommandBuffer(frameData.initCmd, &begin); - if (res != VK_SUCCESS) { - return VK_NULL_HANDLE; - } - frameData.hasInitCommands = true; - } - return frameData_[curFrame].initCmd; + return frameData_[curFrame].GetInitCmd(vulkan_); } VKRGraphicsPipeline *VulkanRenderManager::CreateGraphicsPipeline(VKRGraphicsPipelineDesc *desc, uint32_t variantBitmask, const char *tag) { @@ -771,16 +657,20 @@ void VulkanRenderManager::EndCurRenderStep() { curRenderStep_->render.colorStore, curRenderStep_->render.depthStore, curRenderStep_->render.stencilStore, }; RenderPassType rpType = RP_TYPE_COLOR_DEPTH; + // Save the accumulated pipeline flags so we can use that to configure the render pass. + // We'll often be able to avoid loading/saving the depth/stencil buffer. curRenderStep_->render.pipelineFlags = curPipelineFlags_; if (!curRenderStep_->render.framebuffer) { rpType = RP_TYPE_BACKBUFFER; + } else if (curPipelineFlags_ & PipelineFlags::USES_INPUT_ATTACHMENT) { + // Not allowed on backbuffers. + rpType = RP_TYPE_COLOR_DEPTH_INPUT; } + // TODO: Also add render pass types for depth/stencil-less. VKRRenderPass *renderPass = queueRunner_.GetRenderPass(key); curRenderStep_->render.renderPassType = rpType; - // Save the accumulated pipeline flags so we can use that to configure the render pass. - // We'll often be able to avoid loading/saving the depth/stencil buffer. compileMutex_.lock(); bool needsCompile = false; for (VKRGraphicsPipeline *pipeline : pipelinesToCheck_) { @@ -806,7 +696,12 @@ void VulkanRenderManager::EndCurRenderStep() { // We no longer have a current render step. curRenderStep_ = nullptr; - curPipelineFlags_ = 0; + curPipelineFlags_ = (PipelineFlags)0; +} + +void VulkanRenderManager::BindCurrentFramebufferAsInputAttachment0(VkImageAspectFlags aspectBits) { + _dbg_assert_(curRenderStep_); + curRenderStep_->commands.push_back(VkRenderData{ VKRRenderCommand::SELF_DEPENDENCY_BARRIER }); } void VulkanRenderManager::BindFramebufferAsRenderTarget(VKRFramebuffer *fb, VKRRenderPassLoadAction color, VKRRenderPassLoadAction depth, VKRRenderPassLoadAction stencil, uint32_t clearColor, float clearDepth, uint8_t clearStencil, const char *tag) { @@ -1028,98 +923,6 @@ void VulkanRenderManager::CopyImageToMemorySync(VkImage image, int mipLevel, int queueRunner_.CopyReadbackBuffer(w, h, destFormat, destFormat, pixelStride, pixels); } -bool VulkanRenderManager::InitBackbufferFramebuffers(int width, int height) { - VkResult res; - // We share the same depth buffer but have multiple color buffers, see the loop below. - VkImageView attachments[2] = { VK_NULL_HANDLE, depth_.view }; - - VkFramebufferCreateInfo fb_info = { VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO }; - fb_info.renderPass = queueRunner_.GetCompatibleRenderPass()->Get(vulkan_, RP_TYPE_BACKBUFFER); - fb_info.attachmentCount = 2; - fb_info.pAttachments = attachments; - fb_info.width = width; - fb_info.height = height; - fb_info.layers = 1; - - framebuffers_.resize(swapchainImageCount_); - - for (uint32_t i = 0; i < swapchainImageCount_; i++) { - attachments[0] = swapchainImages_[i].view; - res = vkCreateFramebuffer(vulkan_->GetDevice(), &fb_info, nullptr, &framebuffers_[i]); - _dbg_assert_(res == VK_SUCCESS); - if (res != VK_SUCCESS) { - framebuffers_.clear(); - return false; - } - } - - return true; -} - -bool VulkanRenderManager::InitDepthStencilBuffer(VkCommandBuffer cmd) { - const VkFormat depth_format = vulkan_->GetDeviceInfo().preferredDepthStencilFormat; - int aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; - VkImageCreateInfo image_info = { VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO }; - image_info.imageType = VK_IMAGE_TYPE_2D; - image_info.format = depth_format; - image_info.extent.width = vulkan_->GetBackbufferWidth(); - image_info.extent.height = vulkan_->GetBackbufferHeight(); - image_info.extent.depth = 1; - image_info.mipLevels = 1; - image_info.arrayLayers = 1; - image_info.samples = VK_SAMPLE_COUNT_1_BIT; - image_info.queueFamilyIndexCount = 0; - image_info.pQueueFamilyIndices = nullptr; - image_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE; - image_info.usage = VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT; - image_info.flags = 0; - - depth_.format = depth_format; - - VmaAllocationCreateInfo allocCreateInfo{}; - VmaAllocationInfo allocInfo{}; - - allocCreateInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY; - - VkResult res = vmaCreateImage(vulkan_->Allocator(), &image_info, &allocCreateInfo, &depth_.image, &depth_.alloc, &allocInfo); - _dbg_assert_(res == VK_SUCCESS); - if (res != VK_SUCCESS) - return false; - - vulkan_->SetDebugName(depth_.image, VK_OBJECT_TYPE_IMAGE, "BackbufferDepth"); - - TransitionImageLayout2(cmd, depth_.image, 0, 1, - aspectMask, - VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, - VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT, - VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT, - 0, VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT); - - VkImageViewCreateInfo depth_view_info = { VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO }; - depth_view_info.image = depth_.image; - depth_view_info.format = depth_format; - depth_view_info.components.r = VK_COMPONENT_SWIZZLE_IDENTITY; - depth_view_info.components.g = VK_COMPONENT_SWIZZLE_IDENTITY; - depth_view_info.components.b = VK_COMPONENT_SWIZZLE_IDENTITY; - depth_view_info.components.a = VK_COMPONENT_SWIZZLE_IDENTITY; - depth_view_info.subresourceRange.aspectMask = aspectMask; - depth_view_info.subresourceRange.baseMipLevel = 0; - depth_view_info.subresourceRange.levelCount = 1; - depth_view_info.subresourceRange.baseArrayLayer = 0; - depth_view_info.subresourceRange.layerCount = 1; - depth_view_info.viewType = VK_IMAGE_VIEW_TYPE_2D; - depth_view_info.flags = 0; - - VkDevice device = vulkan_->GetDevice(); - - res = vkCreateImageView(device, &depth_view_info, NULL, &depth_.view); - _dbg_assert_(res == VK_SUCCESS); - if (res != VK_SUCCESS) - return false; - - return true; -} - static void RemoveDrawCommands(std::vector *cmds) { // Here we remove any DRAW type commands when we hit a CLEAR. for (auto &c : *cmds) { @@ -1359,6 +1162,9 @@ VkImageView VulkanRenderManager::BindFramebufferAsTexture(VKRFramebuffer *fb, in } } +// Called on main thread. +// Sends the collected commands to the render thread. Submit-latency should be +// measured from here, probably. void VulkanRenderManager::Finish() { EndCurRenderStep(); @@ -1371,18 +1177,14 @@ void VulkanRenderManager::Finish() { int curFrame = vulkan_->GetCurFrame(); FrameData &frameData = frameData_[curFrame]; - if (!useThread_) { - frameData.steps = std::move(steps_); - steps_.clear(); - frameData.type = VKRRunType::END; - Run(curFrame); - } else { + + { std::unique_lock lock(frameData.pull_mutex); VLOG("PUSH: Frame[%d].readyForRun = true", curFrame); frameData.steps = std::move(steps_); steps_.clear(); frameData.readyForRun = true; - frameData.type = VKRRunType::END; + frameData.runType_ = VKRRunType::END; frameData.pull_condVar.notify_all(); } vulkan_->EndFrame(); @@ -1397,118 +1199,39 @@ void VulkanRenderManager::Wipe() { steps_.clear(); } +// Called on the render thread. +// // Can be called multiple times with no bad side effects. This is so that we can either begin a frame the normal way, // or stop it in the middle for a synchronous readback, then start over again mostly normally but without repeating // the backbuffer image acquisition. void VulkanRenderManager::BeginSubmitFrame(int frame) { FrameData &frameData = frameData_[frame]; - if (!frameData.hasBegun) { - // Get the index of the next available swapchain image, and a semaphore to block command buffer execution on. - VkResult res = vkAcquireNextImageKHR(vulkan_->GetDevice(), vulkan_->GetSwapchain(), UINT64_MAX, acquireSemaphore_, (VkFence)VK_NULL_HANDLE, &frameData.curSwapchainImage); - if (res == VK_SUBOPTIMAL_KHR) { - // Hopefully the resize will happen shortly. Ignore - one frame might look bad or something. - WARN_LOG(G3D, "VK_SUBOPTIMAL_KHR returned - ignoring"); - } else if (res == VK_ERROR_OUT_OF_DATE_KHR) { - WARN_LOG(G3D, "VK_ERROR_OUT_OF_DATE_KHR returned - processing the frame, but not presenting"); - frameData.skipSwap = true; - } else { - _assert_msg_(res == VK_SUCCESS, "vkAcquireNextImageKHR failed! result=%s", VulkanResultToString(res)); - } + // Should only have at most the init command buffer pending here (that one came from the other thread). + _dbg_assert_(!frameData.hasPresentCommands); + frameData.SubmitPending(vulkan_, FrameSubmitType::Pending, frameDataShared_); + + if (!frameData.hasMainCommands) { + // Effectively resets both main and present command buffers, since they both live in this pool. + // We always record main commands first, so we don't need to reset the present command buffer separately. vkResetCommandPool(vulkan_->GetDevice(), frameData.cmdPoolMain, 0); + VkCommandBufferBeginInfo begin{ VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO }; begin.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; - res = vkBeginCommandBuffer(frameData.mainCmd, &begin); - + VkResult res = vkBeginCommandBuffer(frameData.mainCmd, &begin); + frameData.hasMainCommands = true; _assert_msg_(res == VK_SUCCESS, "vkBeginCommandBuffer failed! result=%s", VulkanResultToString(res)); - - queueRunner_.SetBackbuffer(framebuffers_[frameData.curSwapchainImage], swapchainImages_[frameData.curSwapchainImage].image); - - frameData.hasBegun = true; } } -void VulkanRenderManager::Submit(int frame, bool triggerFrameFence) { - FrameData &frameData = frameData_[frame]; - if (frameData.hasInitCommands) { - if (frameData.profilingEnabled_ && triggerFrameFence) { - // Pre-allocated query ID 1. - vkCmdWriteTimestamp(frameData.initCmd, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, frameData.profile.queryPool, 1); - } - VkResult res = vkEndCommandBuffer(frameData.initCmd); - _assert_msg_(res == VK_SUCCESS, "vkEndCommandBuffer failed (init)! result=%s", VulkanResultToString(res)); - } - - VkResult res = vkEndCommandBuffer(frameData.mainCmd); - _assert_msg_(res == VK_SUCCESS, "vkEndCommandBuffer failed (main)! result=%s", VulkanResultToString(res)); - - VkCommandBuffer cmdBufs[2]; - int numCmdBufs = 0; - if (frameData.hasInitCommands) { - cmdBufs[numCmdBufs++] = frameData.initCmd; - if (splitSubmit_) { - // Send the init commands off separately. Used this once to confirm that the cause of a device loss was in the init cmdbuf. - VkSubmitInfo submit_info{ VK_STRUCTURE_TYPE_SUBMIT_INFO }; - submit_info.commandBufferCount = (uint32_t)numCmdBufs; - submit_info.pCommandBuffers = cmdBufs; - res = vkQueueSubmit(vulkan_->GetGraphicsQueue(), 1, &submit_info, VK_NULL_HANDLE); - if (res == VK_ERROR_DEVICE_LOST) { - _assert_msg_(false, "Lost the Vulkan device in split submit! If this happens again, switch Graphics Backend away from Vulkan"); - } else { - _assert_msg_(res == VK_SUCCESS, "vkQueueSubmit failed (init)! result=%s", VulkanResultToString(res)); - } - numCmdBufs = 0; - } - } - cmdBufs[numCmdBufs++] = frameData.mainCmd; - - VkSubmitInfo submit_info{ VK_STRUCTURE_TYPE_SUBMIT_INFO }; - VkPipelineStageFlags waitStage[1]{ VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT }; - if (triggerFrameFence && !frameData.skipSwap) { - submit_info.waitSemaphoreCount = 1; - submit_info.pWaitSemaphores = &acquireSemaphore_; - submit_info.pWaitDstStageMask = waitStage; - } - submit_info.commandBufferCount = (uint32_t)numCmdBufs; - submit_info.pCommandBuffers = cmdBufs; - if (triggerFrameFence && !frameData.skipSwap) { - submit_info.signalSemaphoreCount = 1; - submit_info.pSignalSemaphores = &renderingCompleteSemaphore_; - } - res = vkQueueSubmit(vulkan_->GetGraphicsQueue(), 1, &submit_info, triggerFrameFence ? frameData.fence : frameData.readbackFence); - if (res == VK_ERROR_DEVICE_LOST) { - _assert_msg_(false, "Lost the Vulkan device in vkQueueSubmit! If this happens again, switch Graphics Backend away from Vulkan"); - } else { - _assert_msg_(res == VK_SUCCESS, "vkQueueSubmit failed (main, split=%d)! result=%s", (int)splitSubmit_, VulkanResultToString(res)); - } - - // When !triggerFence, we notify after syncing with Vulkan. - if (useThread_ && triggerFrameFence) { - VLOG("PULL: Frame %d.readyForFence = true", frame); - std::unique_lock lock(frameData.push_mutex); - frameData.readyForFence = true; - frameData.push_condVar.notify_all(); - } - - frameData.hasInitCommands = false; -} - +// Called on the render thread. void VulkanRenderManager::EndSubmitFrame(int frame) { FrameData &frameData = frameData_[frame]; - frameData.hasBegun = false; - Submit(frame, true); + frameData.SubmitPending(vulkan_, FrameSubmitType::Present, frameDataShared_); if (!frameData.skipSwap) { - VkSwapchainKHR swapchain = vulkan_->GetSwapchain(); - VkPresentInfoKHR present = { VK_STRUCTURE_TYPE_PRESENT_INFO_KHR }; - present.swapchainCount = 1; - present.pSwapchains = &swapchain; - present.pImageIndices = &frameData.curSwapchainImage; - present.pWaitSemaphores = &renderingCompleteSemaphore_; - present.waitSemaphoreCount = 1; - - VkResult res = vkQueuePresentKHR(vulkan_->GetGraphicsQueue(), &present); + VkResult res = frameData.QueuePresent(vulkan_, frameDataShared_); if (res == VK_ERROR_OUT_OF_DATE_KHR) { // We clearly didn't get this in vkAcquireNextImageKHR because of the skipSwap check above. // Do the increment. @@ -1528,18 +1251,29 @@ void VulkanRenderManager::EndSubmitFrame(int frame) { } } +void VulkanRenderManager::EndSyncFrame(int frame) { + FrameData &frameData = frameData_[frame]; + + // The submit will trigger the readbackFence, and also do the wait for it. + frameData.SubmitPending(vulkan_, FrameSubmitType::Sync, frameDataShared_); + + // At this point we can resume filling the command buffers for the current frame since + // we know the device is idle - and thus all previously enqueued command buffers have been processed. + // No need to switch to the next frame number, would just be confusing. + std::unique_lock lock(frameData.push_mutex); + frameData.readyForFence = true; + frameData.push_condVar.notify_all(); +} + void VulkanRenderManager::Run(int frame) { BeginSubmitFrame(frame); FrameData &frameData = frameData_[frame]; - auto &stepsOnThread = frameData_[frame].steps; - VkCommandBuffer cmd = frameData.mainCmd; - queueRunner_.PreprocessSteps(stepsOnThread); + queueRunner_.PreprocessSteps(frameData_[frame].steps); //queueRunner_.LogSteps(stepsOnThread, false); - queueRunner_.RunSteps(cmd, stepsOnThread, frameData.profilingEnabled_ ? &frameData.profile : nullptr); - stepsOnThread.clear(); + queueRunner_.RunSteps(frameData, frameDataShared_); - switch (frameData.type) { + switch (frameData.runType_) { case VKRRunType::END: EndSubmitFrame(frame); break; @@ -1555,59 +1289,24 @@ void VulkanRenderManager::Run(int frame) { VLOG("PULL: Finished running frame %d", frame); } -void VulkanRenderManager::EndSyncFrame(int frame) { - FrameData &frameData = frameData_[frame]; - - frameData.readbackFenceUsed = true; - - // The submit will trigger the readbackFence. - Submit(frame, false); - - // Hard stall of the GPU, not ideal, but necessary so the CPU has the contents of the readback. - vkWaitForFences(vulkan_->GetDevice(), 1, &frameData.readbackFence, true, UINT64_MAX); - vkResetFences(vulkan_->GetDevice(), 1, &frameData.readbackFence); - - // At this point we can resume filling the command buffers for the current frame since - // we know the device is idle - and thus all previously enqueued command buffers have been processed. - // No need to switch to the next frame number. - VkCommandBufferBeginInfo begin{ - VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, - nullptr, - VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT - }; - vkResetCommandPool(vulkan_->GetDevice(), frameData.cmdPoolMain, 0); - VkResult res = vkBeginCommandBuffer(frameData.mainCmd, &begin); - _assert_(res == VK_SUCCESS); - - if (useThread_) { - std::unique_lock lock(frameData.push_mutex); - frameData.readyForFence = true; - frameData.push_condVar.notify_all(); - } -} - void VulkanRenderManager::FlushSync() { renderStepOffset_ += (int)steps_.size(); int curFrame = vulkan_->GetCurFrame(); FrameData &frameData = frameData_[curFrame]; - if (!useThread_) { - frameData.steps = std::move(steps_); - steps_.clear(); - frameData.type = VKRRunType::SYNC; - Run(curFrame); - } else { + + { std::unique_lock lock(frameData.pull_mutex); VLOG("PUSH: Frame[%d].readyForRun = true (sync)", curFrame); frameData.steps = std::move(steps_); steps_.clear(); frameData.readyForRun = true; _dbg_assert_(!frameData.readyForFence); - frameData.type = VKRRunType::SYNC; + frameData.runType_ = VKRRunType::SYNC; frameData.pull_condVar.notify_all(); } - if (useThread_) { + { std::unique_lock lock(frameData.push_mutex); // Wait for the flush to be hit, since we're syncing. while (!frameData.readyForFence) { diff --git a/Common/GPU/Vulkan/VulkanRenderManager.h b/Common/GPU/Vulkan/VulkanRenderManager.h index 4cc7aafdfc..43cbbb7b86 100644 --- a/Common/GPU/Vulkan/VulkanRenderManager.h +++ b/Common/GPU/Vulkan/VulkanRenderManager.h @@ -65,15 +65,6 @@ private: std::string tag_; }; -enum class VKRRunType { - END, - SYNC, -}; - -enum { - MAX_TIMESTAMP_QUERIES = 128, -}; - struct BoundingRect { int x1; int y1; @@ -236,6 +227,8 @@ public: // as the other backends, even though there's no actual binding happening here. VkImageView BindFramebufferAsTexture(VKRFramebuffer *fb, int binding, VkImageAspectFlags aspectBits, int attachment); + void BindCurrentFramebufferAsInputAttachment0(VkImageAspectFlags aspectBits); + bool CopyFramebufferToMemorySync(VKRFramebuffer *src, VkImageAspectFlags aspectBits, int x, int y, int w, int h, Draw::DataFormat destFormat, uint8_t *pixels, int pixelStride, const char *tag); void CopyImageToMemorySync(VkImage image, int mipLevel, int x, int y, int w, int h, Draw::DataFormat destFormat, uint8_t *pixels, int pixelStride, const char *tag); @@ -440,11 +433,7 @@ public: void DestroyBackbuffers(); bool HasBackbuffers() { - return !framebuffers_.empty(); - } - - void SetSplitSubmit(bool split) { - splitSubmit_ = split; + return queueRunner_.HasBackbuffers(); } void SetInflightFrames(int f) { @@ -470,13 +459,10 @@ public: } private: - bool InitBackbufferFramebuffers(int width, int height); - bool InitDepthStencilBuffer(VkCommandBuffer cmd); // Used for non-buffered rendering. void EndCurRenderStep(); void BeginSubmitFrame(int frame); void EndSubmitFrame(int frame); - void Submit(int frame, bool triggerFence); // Bad for performance but sometimes necessary for synchronous CPU readbacks (screenshots and whatnot). void FlushSync(); @@ -484,43 +470,7 @@ private: void StopThread(); - // Permanent objects - VkSemaphore acquireSemaphore_; - VkSemaphore renderingCompleteSemaphore_; - - // Per-frame data, round-robin so we can overlap submission with execution of the previous frame. - struct FrameData { - std::mutex push_mutex; - std::condition_variable push_condVar; - - std::mutex pull_mutex; - std::condition_variable pull_condVar; - - bool readyForFence = true; - bool readyForRun = false; - bool skipSwap = false; - VKRRunType type = VKRRunType::END; - - VkFence fence; - VkFence readbackFence; // Strictly speaking we might only need one of these. - bool readbackFenceUsed = false; - - // These are on different threads so need separate pools. - VkCommandPool cmdPoolInit; - VkCommandPool cmdPoolMain; - VkCommandBuffer initCmd; - VkCommandBuffer mainCmd; - bool hasInitCommands = false; - std::vector steps; - - // Swapchain. - bool hasBegun = false; - uint32_t curSwapchainImage = -1; - - // Profiling. - QueueProfileContext profile; - bool profilingEnabled_; - }; + FrameDataShared frameDataShared_; FrameData frameData_[VulkanContext::MAX_INFLIGHT_FRAMES]; int newInflightFrames_ = -1; @@ -544,11 +494,10 @@ private: VKRStep *curRenderStep_ = nullptr; bool curStepHasViewport_ = false; bool curStepHasScissor_ = false; - u32 curPipelineFlags_ = 0; + PipelineFlags curPipelineFlags_{}; BoundingRect curRenderArea_; std::vector steps_; - bool splitSubmit_ = false; // Execution time state bool run_ = true; @@ -568,23 +517,4 @@ private: // pipelines to check and possibly create at the end of the current render pass. std::vector pipelinesToCheck_; - - // Swap chain management - struct SwapchainImageData { - VkImage image; - VkImageView view; - }; - std::vector framebuffers_; - std::vector swapchainImages_; - uint32_t swapchainImageCount_ = 0; - struct DepthBufferInfo { - VkFormat format = VK_FORMAT_UNDEFINED; - VkImage image = VK_NULL_HANDLE; - VmaAllocation alloc = VK_NULL_HANDLE; - VkImageView view = VK_NULL_HANDLE; - }; - DepthBufferInfo depth_; - - // This works great - except see issue #10097. WTF? - bool useThread_ = true; }; diff --git a/Common/GPU/Vulkan/thin3d_vulkan.cpp b/Common/GPU/Vulkan/thin3d_vulkan.cpp index 8420c01450..34bbdd5efb 100644 --- a/Common/GPU/Vulkan/thin3d_vulkan.cpp +++ b/Common/GPU/Vulkan/thin3d_vulkan.cpp @@ -361,7 +361,7 @@ class VKFramebuffer; class VKContext : public DrawContext { public: - VKContext(VulkanContext *vulkan, bool splitSubmit); + VKContext(VulkanContext *vulkan); virtual ~VKContext(); const DeviceCaps &GetDeviceCaps() const override { @@ -401,9 +401,10 @@ public: // These functions should be self explanatory. void BindFramebufferAsRenderTarget(Framebuffer *fbo, const RenderPassInfo &rp, const char *tag) override; Framebuffer *GetCurrentRenderTarget() override { - return curFramebuffer_; + return (Framebuffer *)curFramebuffer_.ptr; } void BindFramebufferAsTexture(Framebuffer *fbo, int binding, FBChannel channelBit, int attachment) override; + void BindCurrentFramebufferForColorInput() override; void GetFramebufferDimensions(Framebuffer *fbo, int *w, int *h) override; @@ -473,27 +474,7 @@ public: std::vector GetFeatureList() const override; std::vector GetExtensionList() const override; - uint64_t GetNativeObject(NativeObject obj, void *srcObject) override { - switch (obj) { - case NativeObject::CONTEXT: - return (uint64_t)vulkan_; - case NativeObject::INIT_COMMANDBUFFER: - return (uint64_t)renderManager_.GetInitCmd(); - case NativeObject::BOUND_TEXTURE0_IMAGEVIEW: - return (uint64_t)boundImageView_[0]; - case NativeObject::BOUND_TEXTURE1_IMAGEVIEW: - return (uint64_t)boundImageView_[1]; - case NativeObject::RENDER_MANAGER: - return (uint64_t)(uintptr_t)&renderManager_; - case NativeObject::NULL_IMAGEVIEW: - return (uint64_t)GetNullTexture()->GetImageView(); - case NativeObject::TEXTURE_VIEW: - return (uint64_t)(((VKTexture *)srcObject)->GetImageView()); - default: - Crash(); - return 0; - } - } + uint64_t GetNativeObject(NativeObject obj, void *srcObject) override; void HandleEvent(Event ev, int width, int height, void *param1, void *param2) override; @@ -522,7 +503,7 @@ private: VkDescriptorSetLayout descriptorSetLayout_ = VK_NULL_HANDLE; VkPipelineLayout pipelineLayout_ = VK_NULL_HANDLE; VkPipelineCache pipelineCache_ = VK_NULL_HANDLE; - AutoRef curFramebuffer_; + AutoRef curFramebuffer_; VkDevice device_; VkQueue queue_; @@ -781,7 +762,7 @@ bool VKTexture::Create(VkCommandBuffer cmd, VulkanPushBuffer *push, const Textur return true; } -VKContext::VKContext(VulkanContext *vulkan, bool splitSubmit) +VKContext::VKContext(VulkanContext *vulkan) : vulkan_(vulkan), renderManager_(vulkan) { shaderLanguageDesc_.Init(GLSL_VULKAN); @@ -807,9 +788,11 @@ VKContext::VKContext(VulkanContext *vulkan, bool splitSubmit) caps_.fragmentShaderInt32Supported = true; caps_.textureNPOTFullySupported = true; caps_.fragmentShaderDepthWriteSupported = true; + caps_.blendMinMaxSupported = true; caps_.logicOpSupported = vulkan->GetDeviceFeatures().enabled.logicOp != 0; auto deviceProps = vulkan->GetPhysicalDeviceProperties(vulkan_->GetCurrentPhysicalDeviceIndex()).properties; + switch (deviceProps.vendorID) { case VULKAN_VENDOR_AMD: caps_.vendor = GPUVendor::VENDOR_AMD; break; case VULKAN_VENDOR_ARM: caps_.vendor = GPUVendor::VENDOR_ARM; break; @@ -831,6 +814,11 @@ VKContext::VKContext(VulkanContext *vulkan, bool splitSubmit) // Color write mask not masking write in certain scenarios with a depth test, see #10421. // Known still present on driver 0x80180000 and Adreno 5xx (possibly more.) bugs_.Infest(Bugs::COLORWRITEMASK_BROKEN_WITH_DEPTHTEST); + + // Trying to follow all the rules in https://registry.khronos.org/vulkan/specs/1.3/html/vkspec.html#synchronization-pipeline-barriers-subpass-self-dependencies + // and https://registry.khronos.org/vulkan/specs/1.3/html/vkspec.html#renderpass-feedbackloop, but still it doesn't + // quite work - artifacts on triangle boundaries on Adreno. + bugs_.Infest(Bugs::SUBPASS_FEEDBACK_BROKEN); } else if (caps_.vendor == GPUVendor::VENDOR_AMD) { // See issue #10074, and also #10065 (AMD) and #10109 for the choice of the driver version to check for. if (deviceProps.driverVersion < 0x00407000) { @@ -840,19 +828,27 @@ VKContext::VKContext(VulkanContext *vulkan, bool splitSubmit) // Workaround for Intel driver bug. TODO: Re-enable after some driver version bugs_.Infest(Bugs::DUAL_SOURCE_BLENDING_BROKEN); } else if (caps_.vendor == GPUVendor::VENDOR_ARM) { + int majorVersion = VK_API_VERSION_MAJOR(deviceProps.driverVersion); + // These GPUs (up to some certain hardware version?) have a bug where draws where gl_Position.w == .z // corrupt the depth buffer. This is easily worked around by simply scaling Z down a tiny bit when this case // is detected. See: https://github.com/hrydgard/ppsspp/issues/11937 bugs_.Infest(Bugs::EQUAL_WZ_CORRUPTS_DEPTH); - // At least one driver at the upper end of the range is known to be likely to suffer from the bug causing issue #13833 (Midnight Club map broken). - bugs_.Infest(Bugs::MALI_STENCIL_DISCARD_BUG); - // This started in driver 31 or 32. - if (VK_API_VERSION_MAJOR(deviceProps.driverVersion) >= 32) { + // Nearly identical to the the Adreno bug, see #13833 (Midnight Club map broken) and other issues. + // Reported fixed in major version 40 - let's add a check once confirmed. + bugs_.Infest(Bugs::NO_DEPTH_CANNOT_DISCARD_STENCIL); + + // This started in driver 31 or 32, fixed in 40 - let's add a check once confirmed. + if (majorVersion >= 32) { bugs_.Infest(Bugs::MALI_CONSTANT_LOAD_BUG); // See issue #15661 } } + // Limited, through input attachments and self-dependencies. + // We turn it off here already if buggy. + caps_.framebufferFetchSupported = !bugs_.Has(Bugs::SUBPASS_FEEDBACK_BROKEN); + caps_.deviceID = deviceProps.deviceID; device_ = vulkan->GetDevice(); @@ -920,8 +916,6 @@ VKContext::VKContext(VulkanContext *vulkan, bool splitSubmit) VkPipelineCacheCreateInfo pc{ VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO }; res = vkCreatePipelineCache(vulkan_->GetDevice(), &pc, nullptr, &pipelineCache_); _assert_(VK_SUCCESS == res); - - renderManager_.SetSplitSubmit(splitSubmit); } VKContext::~VKContext() { @@ -1058,12 +1052,12 @@ Pipeline *VKContext::CreateGraphicsPipeline(const PipelineDesc &desc, const char VKDepthStencilState *depth = (VKDepthStencilState *)desc.depthStencil; VKRasterState *raster = (VKRasterState *)desc.raster; - u32 pipelineFlags = 0; + PipelineFlags pipelineFlags = (PipelineFlags)0; if (depth->info.depthTestEnable || depth->info.stencilTestEnable) { - pipelineFlags |= PIPELINE_FLAG_USES_DEPTH_STENCIL; + pipelineFlags |= PipelineFlags::USES_DEPTH_STENCIL; } - VKPipeline *pipeline = new VKPipeline(vulkan_, desc.uniformDesc ? desc.uniformDesc->uniformBufferSize : 16 * sizeof(float), (PipelineFlags)pipelineFlags, tag); + VKPipeline *pipeline = new VKPipeline(vulkan_, desc.uniformDesc ? desc.uniformDesc->uniformBufferSize : 16 * sizeof(float), pipelineFlags, tag); VKRGraphicsPipelineDesc &gDesc = pipeline->vkrDesc; @@ -1401,8 +1395,8 @@ void VKContext::Clear(int clearMask, uint32_t colorval, float depthVal, int sten renderManager_.Clear(colorval, depthVal, stencilVal, mask); } -DrawContext *T3DCreateVulkanContext(VulkanContext *vulkan, bool split) { - return new VKContext(vulkan, split); +DrawContext *T3DCreateVulkanContext(VulkanContext *vulkan) { + return new VKContext(vulkan); } void AddFeature(std::vector &features, const char *name, VkBool32 available, VkBool32 enabled) { @@ -1584,6 +1578,10 @@ void VKContext::BindFramebufferAsTexture(Framebuffer *fbo, int binding, FBChanne boundImageView_[binding] = renderManager_.BindFramebufferAsTexture(fb->GetFB(), binding, aspect, attachment); } +void VKContext::BindCurrentFramebufferForColorInput() { + renderManager_.BindCurrentFramebufferAsInputAttachment0(VK_IMAGE_ASPECT_COLOR_BIT); +} + void VKContext::GetFramebufferDimensions(Framebuffer *fbo, int *w, int *h) { VKFramebuffer *fb = (VKFramebuffer *)fbo; if (fb) { @@ -1624,4 +1622,28 @@ void VKContext::InvalidateFramebuffer(FBInvalidationStage stage, uint32_t channe } } +uint64_t VKContext::GetNativeObject(NativeObject obj, void *srcObject) { + switch (obj) { + case NativeObject::CONTEXT: + return (uint64_t)vulkan_; + case NativeObject::INIT_COMMANDBUFFER: + return (uint64_t)renderManager_.GetInitCmd(); + case NativeObject::BOUND_TEXTURE0_IMAGEVIEW: + return (uint64_t)boundImageView_[0]; + case NativeObject::BOUND_TEXTURE1_IMAGEVIEW: + return (uint64_t)boundImageView_[1]; + case NativeObject::RENDER_MANAGER: + return (uint64_t)(uintptr_t)&renderManager_; + case NativeObject::NULL_IMAGEVIEW: + return (uint64_t)GetNullTexture()->GetImageView(); + case NativeObject::TEXTURE_VIEW: + return (uint64_t)(((VKTexture *)srcObject)->GetImageView()); + case NativeObject::BOUND_FRAMEBUFFER_COLOR_IMAGEVIEW: + return (uint64_t)curFramebuffer_->GetFB()->color.imageView; + default: + Crash(); + return 0; + } +} + } // namespace Draw diff --git a/Common/GPU/thin3d.cpp b/Common/GPU/thin3d.cpp index 1cc3c11fa0..acaa3dac0b 100644 --- a/Common/GPU/thin3d.cpp +++ b/Common/GPU/thin3d.cpp @@ -678,9 +678,9 @@ const char *Bugs::GetBugName(uint32_t bug) { case COLORWRITEMASK_BROKEN_WITH_DEPTHTEST: return "COLORWRITEMASK_BROKEN_WITH_DEPTHTEST"; case BROKEN_FLAT_IN_SHADER: return "BROKEN_FLAT_IN_SHADER"; case EQUAL_WZ_CORRUPTS_DEPTH: return "EQUAL_WZ_CORRUPTS_DEPTH"; - case MALI_STENCIL_DISCARD_BUG: return "MALI_STENCIL_DISCARD_BUG"; case RASPBERRY_SHADER_COMP_HANG: return "RASPBERRY_SHADER_COMP_HANG"; case MALI_CONSTANT_LOAD_BUG: return "MALI_CONSTANT_LOAD_BUG"; + case SUBPASS_FEEDBACK_BROKEN: return "SUBPASS_FEEDBACK_BROKEN"; default: return "(N/A)"; } } diff --git a/Common/GPU/thin3d.h b/Common/GPU/thin3d.h index 843e6e8813..0fba6ee6f2 100644 --- a/Common/GPU/thin3d.h +++ b/Common/GPU/thin3d.h @@ -242,6 +242,7 @@ enum class NativeObject { INIT_COMMANDBUFFER, BOUND_TEXTURE0_IMAGEVIEW, BOUND_TEXTURE1_IMAGEVIEW, + BOUND_FRAMEBUFFER_COLOR_IMAGEVIEW, RENDER_MANAGER, TEXTURE_VIEW, NULL_IMAGEVIEW, @@ -328,9 +329,9 @@ public: COLORWRITEMASK_BROKEN_WITH_DEPTHTEST = 5, BROKEN_FLAT_IN_SHADER = 6, EQUAL_WZ_CORRUPTS_DEPTH = 7, - MALI_STENCIL_DISCARD_BUG = 8, - RASPBERRY_SHADER_COMP_HANG = 9, - MALI_CONSTANT_LOAD_BUG = 10, + RASPBERRY_SHADER_COMP_HANG = 8, + MALI_CONSTANT_LOAD_BUG = 9, + SUBPASS_FEEDBACK_BROKEN = 10, MAX_BUG, }; @@ -546,6 +547,7 @@ struct DeviceCaps { bool textureNPOTFullySupported; bool fragmentShaderDepthWriteSupported; bool textureDepthSupported; + bool blendMinMaxSupported; std::string deviceName; // The device name to use when creating the thin3d context, to get the same one. }; @@ -651,6 +653,9 @@ public: // binding must be < MAX_TEXTURE_SLOTS (0, 1 are okay if it's 2). virtual void BindFramebufferAsTexture(Framebuffer *fbo, int binding, FBChannel channelBit, int attachment) = 0; + // Framebuffer fetch / input attachment support, needs to be explicit in Vulkan. + virtual void BindCurrentFramebufferForColorInput() {} + // deprecated, only used by D3D9 virtual uintptr_t GetFramebufferAPITexture(Framebuffer *fbo, int channelBits, int attachment) { return 0; diff --git a/Common/GPU/thin3d_create.h b/Common/GPU/thin3d_create.h index 702e7f45c2..fccb3b076c 100644 --- a/Common/GPU/thin3d_create.h +++ b/Common/GPU/thin3d_create.h @@ -31,6 +31,6 @@ DrawContext *T3DCreateDX9Context(IDirect3D9 *d3d, IDirect3D9Ex *d3dEx, int adapt DrawContext *T3DCreateD3D11Context(ID3D11Device *device, ID3D11DeviceContext *context, ID3D11Device1 *device1, ID3D11DeviceContext1 *context1, D3D_FEATURE_LEVEL featureLevel, HWND hWnd, std::vector adapterNames); #endif -DrawContext *T3DCreateVulkanContext(VulkanContext *context, bool splitSubmit); +DrawContext *T3DCreateVulkanContext(VulkanContext *context); } // namespace Draw diff --git a/Common/UI/Context.cpp b/Common/UI/Context.cpp index e9fe8ed0c5..21c687bd2b 100644 --- a/Common/UI/Context.cpp +++ b/Common/UI/Context.cpp @@ -170,7 +170,7 @@ void UIContext::ActivateTopScissor() { int h = std::max(0.0f, ceilf(scale_y * bounds.h)); if (x < 0 || y < 0 || x + w > pixel_xres || y + h > pixel_yres) { // This won't actually report outside a game, but we can try. - ERROR_LOG_REPORT(G3D, "UI scissor out of bounds: %d,%d-%d,%d / %d,%d", x, y, w, h, pixel_xres, pixel_yres); + ERROR_LOG_REPORT(G3D, "UI scissor out of bounds in %sScreen: %d,%d-%d,%d / %d,%d", screenTag_ ? screenTag_ : "N/A", x, y, w, h, pixel_xres, pixel_yres); x = std::max(0, x); y = std::max(0, y); w = std::min(w, pixel_xres - x); diff --git a/Common/UI/Context.h b/Common/UI/Context.h index ad02b4027c..ac26ab0202 100644 --- a/Common/UI/Context.h +++ b/Common/UI/Context.h @@ -74,7 +74,6 @@ public: const UI::Theme *theme; // Utility methods - TextDrawer *Text() const { return textDrawer_; } void SetFontStyle(const UI::FontStyle &style); @@ -103,6 +102,10 @@ public: void setUIAtlas(const std::string &name); + void SetScreenTag(const char *tag) { + screenTag_ = tag; + } + private: Draw::DrawContext *draw_ = nullptr; Bounds bounds_; @@ -126,4 +129,6 @@ private: std::string lastUIAtlas_; std::string UIAtlas_ = "ui_atlas.zim"; + + const char *screenTag_ = nullptr; }; diff --git a/Common/UI/Screen.h b/Common/UI/Screen.h index de24c74985..42f0830627 100644 --- a/Common/UI/Screen.h +++ b/Common/UI/Screen.h @@ -71,7 +71,7 @@ public: // what screen it is. virtual void *dialogData() { return 0; } - virtual std::string tag() const { return std::string(""); } + virtual const char *tag() const = 0; virtual bool isTransparent() const { return false; } virtual bool isTopLevel() const { return false; } diff --git a/Common/UI/UIScreen.cpp b/Common/UI/UIScreen.cpp index 8026de37bc..ecebae3a81 100644 --- a/Common/UI/UIScreen.cpp +++ b/Common/UI/UIScreen.cpp @@ -117,6 +117,9 @@ void UIScreen::render() { if (root_) { UIContext *uiContext = screenManager()->getUIContext(); + + uiContext->SetScreenTag(tag()); + UI::LayoutViewHierarchy(*uiContext, root_, ignoreInsets_); uiContext->PushTransform({translation_, scale_, alpha_}); diff --git a/Common/UI/UIScreen.h b/Common/UI/UIScreen.h index 43ec130d9d..4b113d3001 100644 --- a/Common/UI/UIScreen.h +++ b/Common/UI/UIScreen.h @@ -136,7 +136,7 @@ public: void SetHiddenChoices(std::set hidden) { hidden_ = hidden; } - virtual std::string tag() const override { return std::string("listpopup"); } + const char *tag() const override { return "listpopup"; } UI::Event OnChoice; @@ -187,6 +187,8 @@ public: disabled_ = *value_ < 0; } + const char *tag() const override { return "SliderPopup"; } + Event OnChange; private: @@ -214,6 +216,8 @@ public: : PopupScreen(title, "OK", "Cancel"), units_(units), value_(value), originalValue_(*value), minValue_(minValue), maxValue_(maxValue), step_(step), changing_(false), liveUpdate_(liveUpdate) {} void CreatePopupContents(UI::ViewGroup *parent) override; + const char *tag() const override { return "SliderFloatPopup"; } + Event OnChange; private: @@ -241,6 +245,8 @@ public: : PopupScreen(title, "OK", "Cancel"), value_(value), placeholder_(placeholder), maxLen_(maxLen) {} virtual void CreatePopupContents(ViewGroup *parent) override; + const char *tag() const override { return "TextEditPopup"; } + Event OnChange; private: diff --git a/Common/VR/VRRenderer.cpp b/Common/VR/VRRenderer.cpp index 79e6114a61..248b2b1487 100644 --- a/Common/VR/VRRenderer.cpp +++ b/Common/VR/VRRenderer.cpp @@ -9,6 +9,7 @@ #include #include +XrFovf fov; XrView* projections; XrPosef invViewTransform[2]; XrFrameState frameState = {}; @@ -293,7 +294,12 @@ bool VR_InitFrame( engine_t* engine ) { projections)); // + fov = {}; for (int eye = 0; eye < ovrMaxNumEyes; eye++) { + fov.angleLeft += projections[eye].fov.angleLeft / 2.0f; + fov.angleRight += projections[eye].fov.angleRight / 2.0f; + fov.angleUp += projections[eye].fov.angleUp / 2.0f; + fov.angleDown += projections[eye].fov.angleDown / 2.0f; invViewTransform[eye] = projections[eye].pose; } @@ -353,10 +359,7 @@ void VR_FinishFrame( engine_t* engine ) { for (int eye = 0; eye < ovrMaxNumEyes; eye++) { int imageLayer = engine->appState.Renderer.Multiview ? eye : 0; ovrFramebuffer* frameBuffer = &engine->appState.Renderer.FrameBuffer[0]; - XrFovf fov = projections[eye].fov; - if (vrMode == VR_MODE_MONO_6DOF) { - fov = projections[0].fov; - } else if (!engine->appState.Renderer.Multiview) { + if ((vrMode != VR_MODE_MONO_6DOF) && !engine->appState.Renderer.Multiview) { frameBuffer = &engine->appState.Renderer.FrameBuffer[eye]; } @@ -463,7 +466,6 @@ void VR_BindFramebuffer(engine_t *engine) { ovrMatrix4f VR_GetMatrix( VRMatrix matrix ) { ovrMatrix4f output; if ((matrix == VR_PROJECTION_MATRIX_LEFT_EYE) || (matrix == VR_PROJECTION_MATRIX_RIGHT_EYE)) { - XrFovf fov = matrix == VR_PROJECTION_MATRIX_LEFT_EYE ? projections[0].fov : projections[1].fov; float near = (float)vrConfig[VR_CONFIG_FOV_SCALE] / 200.0f; output = ovrMatrix4f_CreateProjectionFov(fov.angleLeft, fov.angleRight, fov.angleUp, fov.angleDown, near, 0.0f ); } else if ((matrix == VR_VIEW_MATRIX_LEFT_EYE) || (matrix == VR_VIEW_MATRIX_RIGHT_EYE)) { diff --git a/Core/Config.cpp b/Core/Config.cpp index 27f97e89c5..3798362ef8 100644 --- a/Core/Config.cpp +++ b/Core/Config.cpp @@ -938,7 +938,6 @@ static ConfigSetting graphicsSettings[] = { ReportedConfigSetting("FragmentTestCache", &g_Config.bFragmentTestCache, true, true, true), ConfigSetting("GfxDebugOutput", &g_Config.bGfxDebugOutput, false, false, false), - ConfigSetting("GfxDebugSplitSubmit", &g_Config.bGfxDebugSplitSubmit, false, false, false), ConfigSetting("LogFrameDrops", &g_Config.bLogFrameDrops, false, true, false), ConfigSetting("InflightFrames", &g_Config.iInflightFrames, 3, true, false), diff --git a/Core/Config.h b/Core/Config.h index 6ae172bfba..36ec3a760d 100644 --- a/Core/Config.h +++ b/Core/Config.h @@ -176,6 +176,7 @@ public: bool bSustainedPerformanceMode; // Android: Slows clocks down to avoid overheating/speed fluctuations. bool bIgnoreScreenInsets; // Android: Center screen disregarding insets if this is enabled. bool bVSync; + int iFrameSkip; int iFrameSkipType; int iFastForwardMode; // See FastForwardMode in ConfigValues.h. @@ -242,7 +243,6 @@ public: bool bShaderChainRequires60FPS; std::string sTextureShaderName; bool bGfxDebugOutput; - bool bGfxDebugSplitSubmit; int iInflightFrames; bool bRenderDuplicateFrames; diff --git a/Core/Dialog/PSPGamedataInstallDialog.cpp b/Core/Dialog/PSPGamedataInstallDialog.cpp index 9fe30db3c8..3bbe9442f1 100644 --- a/Core/Dialog/PSPGamedataInstallDialog.cpp +++ b/Core/Dialog/PSPGamedataInstallDialog.cpp @@ -36,8 +36,9 @@ const static u32 GAMEDATA_BYTES_PER_READ = 32768; // If this is too high, some games (e.g. Senjou no Valkyria 3) will lag. const static u32 GAMEDATA_READS_PER_UPDATE = 20; -const u32 ERROR_UTILITY_GAMEDATA_MEMSTRICK_WRITE_PROTECTED = 0x80111903; const u32 ERROR_UTILITY_GAMEDATA_MEMSTRICK_REMOVED = 0x80111901; +const u32 ERROR_UTILITY_GAMEDATA_MEMSTRICK_WRITE_PROTECTED = 0x80111903; +const u32 ERROR_UTILITY_GAMEDATA_INVALID_MODE = 0x80111908; static const std::string SFO_FILENAME = "PARAM.SFO"; @@ -88,9 +89,14 @@ int PSPGamedataInstallDialog::Init(u32 paramAddr) { } int size = Memory::Read_U32(paramAddr); + if (size != 1424 && size != 1432) { + ERROR_LOG_REPORT(SCEUTILITY, "sceGamedataInstallInitStart: invalid param size %d", size); + return SCE_ERROR_UTILITY_INVALID_PARAM_SIZE; + } + memset(&request, 0, sizeof(request)); // Only copy the right size to support different request format - Memory::Memcpy(&request, paramAddr, size); + Memory::Memcpy(&request, paramAddr, size, "sceGamedataInstallInitStart"); ChangeStatusInit(GAMEDATA_INIT_DELAY_US); return 0; @@ -100,6 +106,17 @@ int PSPGamedataInstallDialog::Update(int animSpeed) { if (GetStatus() != SCE_UTILITY_STATUS_RUNNING) return SCE_ERROR_UTILITY_INVALID_STATUS; + if (param->mode >= 2) { + param->common.result = ERROR_UTILITY_GAMEDATA_INVALID_MODE; + param.NotifyWrite("DialogResult"); + ChangeStatus(SCE_UTILITY_STATUS_FINISHED, 0); + WARN_LOG_REPORT(SCEUTILITY, "sceUtilityGamedataInstallUpdate: invalid mode %d", param->mode); + return 0; + } + + // TODO: param->mode == 1 should show a prompt to confirm, then a progress bar. + // Any other mode (i.e. 0 or negative) should proceed and show no UI. + // TODO: This should return error codes in some cases, like write failure. // request.common.result must be updated for errors as well. @@ -222,6 +239,9 @@ void PSPGamedataInstallDialog::WriteSfoFile() { } int PSPGamedataInstallDialog::Abort() { + param->common.result = 1; + param.NotifyWrite("DialogResult"); + // TODO: Delete the files or anything? return PSPDialog::Shutdown(); } diff --git a/Core/Dialog/PSPGamedataInstallDialog.h b/Core/Dialog/PSPGamedataInstallDialog.h index e669a31873..a74fdb5254 100644 --- a/Core/Dialog/PSPGamedataInstallDialog.h +++ b/Core/Dialog/PSPGamedataInstallDialog.h @@ -22,7 +22,7 @@ struct SceUtilityGamedataInstallParam { pspUtilityDialogCommon common; - u32_le unknown1; + s32_le mode; char gameName[13]; char ignore1[3]; char dataName[20]; diff --git a/Core/HLE/sceKernel.cpp b/Core/HLE/sceKernel.cpp index cb858c0e90..b9d1028b48 100644 --- a/Core/HLE/sceKernel.cpp +++ b/Core/HLE/sceKernel.cpp @@ -820,7 +820,7 @@ const HLEFunction ThreadManForUser[] = {0X87D4DD36, &WrapI_IU, "sceKernelCancelReceiveMbx", 'i', "ix" }, {0XA8E8C846, &WrapI_IU, "sceKernelReferMbxStatus", 'i', "ip" }, - {0X7C0DC2A0, &WrapI_CIUUU, "sceKernelCreateMsgPipe", 'i', "sixxx" }, + {0X7C0DC2A0, &WrapI_CIUUU, "sceKernelCreateMsgPipe", 'i', "sixxp" }, {0XF0B7DA1C, &WrapI_I, "sceKernelDeleteMsgPipe", 'i', "i" }, {0X876DBFAD, &WrapI_IUUUUU, "sceKernelSendMsgPipe", 'i', "ixxxxx" }, {0X7C41F2C2, &WrapI_IUUUUU, "sceKernelSendMsgPipeCB", 'i', "ixxxxx" }, @@ -831,7 +831,7 @@ const HLEFunction ThreadManForUser[] = {0X349B864D, &WrapI_IUU, "sceKernelCancelMsgPipe", 'i', "ixx" }, {0X33BE4024, &WrapI_IU, "sceKernelReferMsgPipeStatus", 'i', "ip" }, - {0X56C039B5, &WrapI_CIUUU, "sceKernelCreateVpl", 'i', "sixxx" }, + {0X56C039B5, &WrapI_CIUUU, "sceKernelCreateVpl", 'i', "sixxp" }, {0X89B3D48C, &WrapI_I, "sceKernelDeleteVpl", 'i', "i" }, {0XBED27435, &WrapI_IUUU, "sceKernelAllocateVpl", 'i', "ixxx", HLE_NOT_IN_INTERRUPT | HLE_NOT_DISPATCH_SUSPENDED }, {0XEC0A693F, &WrapI_IUUU, "sceKernelAllocateVplCB", 'i', "ixxx", HLE_NOT_IN_INTERRUPT | HLE_NOT_DISPATCH_SUSPENDED }, @@ -840,7 +840,7 @@ const HLEFunction ThreadManForUser[] = {0X1D371B8A, &WrapI_IU, "sceKernelCancelVpl", 'i', "ix" }, {0X39810265, &WrapI_IU, "sceKernelReferVplStatus", 'i', "ip" }, - {0XC07BB470, &WrapI_CUUUUU, "sceKernelCreateFpl", 'i', "sxxxxx" }, + {0XC07BB470, &WrapI_CUUUUU, "sceKernelCreateFpl", 'i', "sixxxp" }, {0XED1410E0, &WrapI_I, "sceKernelDeleteFpl", 'i', "i" }, {0XD979E9BF, &WrapI_IUU, "sceKernelAllocateFpl", 'i', "ixx", HLE_NOT_IN_INTERRUPT | HLE_NOT_DISPATCH_SUSPENDED }, {0XE7282CB6, &WrapI_IUU, "sceKernelAllocateFplCB", 'i', "ixx", HLE_NOT_IN_INTERRUPT | HLE_NOT_DISPATCH_SUSPENDED }, @@ -864,7 +864,7 @@ const HLEFunction ThreadManForUser[] = {0XD8B299AE, &WrapU_IUUU, "sceKernelSetVTimerHandler", 'x', "ixxx" }, {0X53B00E9A, &WrapU_IU64UU, "sceKernelSetVTimerHandlerWide", 'x', "iXxx" }, - {0X8DAFF657, &WrapI_CUUUUU, "sceKernelCreateTlspl", 'i', "sxxxxx" }, + {0X8DAFF657, &WrapI_CUUUUU, "sceKernelCreateTlspl", 'i', "sixxxp" }, {0X32BF938E, &WrapI_I, "sceKernelDeleteTlspl", 'i', "i" }, {0X721067F3, &WrapI_IU, "sceKernelReferTlsplStatus", 'i', "xp" }, // Not completely certain about args. @@ -908,7 +908,7 @@ const HLEFunction ThreadManForKernel[] = {0x1fb15a32, &WrapU_IU, "sceKernelSetEventFlag", 'x', "ix", HLE_KERNEL_SYSCALL }, {0x812346e4, &WrapU_IU, "sceKernelClearEventFlag", 'x', "ix", HLE_KERNEL_SYSCALL }, {0x402fcf22, &WrapI_IUUUU, "sceKernelWaitEventFlag", 'i', "ixxpp", HLE_NOT_IN_INTERRUPT | HLE_KERNEL_SYSCALL}, - {0xc07bb470, &WrapI_CUUUUU, "sceKernelCreateFpl", 'i', "sxxxxx" ,HLE_KERNEL_SYSCALL }, + {0xc07bb470, &WrapI_CUUUUU, "sceKernelCreateFpl", 'i', "sixxxp" ,HLE_KERNEL_SYSCALL }, {0xed1410e0, &WrapI_I, "sceKernelDeleteFpl", 'i', "i" ,HLE_KERNEL_SYSCALL }, {0x623ae665, &WrapI_IU, "sceKernelTryAllocateFpl", 'i', "ix" ,HLE_KERNEL_SYSCALL }, {0x616403ba, &WrapI_I, "sceKernelTerminateThread", 'i', "i" ,HLE_KERNEL_SYSCALL }, @@ -932,7 +932,7 @@ const HLEFunction ThreadManForKernel[] = {0x0D81716A, &WrapI_IU, "sceKernelPollMbx", 'i', "ix", HLE_KERNEL_SYSCALL }, {0x87D4DD36, &WrapI_IU, "sceKernelCancelReceiveMbx", 'i', "ix", HLE_KERNEL_SYSCALL }, {0xA8E8C846, &WrapI_IU, "sceKernelReferMbxStatus", 'i', "ip", HLE_KERNEL_SYSCALL }, - {0x56C039B5, &WrapI_CIUUU, "sceKernelCreateVpl", 'i', "sixxx", HLE_KERNEL_SYSCALL }, + {0x56C039B5, &WrapI_CIUUU, "sceKernelCreateVpl", 'i', "sixxp", HLE_KERNEL_SYSCALL }, {0x89B3D48C, &WrapI_I, "sceKernelDeleteVpl", 'i', "i", HLE_KERNEL_SYSCALL }, {0xBED27435, &WrapI_IUUU, "sceKernelAllocateVpl", 'i', "ixxx", HLE_KERNEL_SYSCALL | HLE_NOT_IN_INTERRUPT | HLE_NOT_DISPATCH_SUSPENDED }, {0xEC0A693F, &WrapI_IUUU, "sceKernelAllocateVplCB", 'i', "ixxx", HLE_KERNEL_SYSCALL | HLE_NOT_IN_INTERRUPT | HLE_NOT_DISPATCH_SUSPENDED }, diff --git a/Core/HLE/sceKernelMemory.cpp b/Core/HLE/sceKernelMemory.cpp index ac853d5bb3..935cbc8482 100644 --- a/Core/HLE/sceKernelMemory.cpp +++ b/Core/HLE/sceKernelMemory.cpp @@ -46,6 +46,7 @@ const int TLSPL_NUM_INDEXES = 16; // STATE BEGIN BlockAllocator userMemory(256); BlockAllocator kernelMemory(256); +BlockAllocator volatileMemory(256); static int vplWaitTimer = -1; static int fplWaitTimer = -1; @@ -432,6 +433,7 @@ void __KernelMemoryInit() MemBlockInfoInit(); kernelMemory.Init(PSP_GetKernelMemoryBase(), PSP_GetKernelMemoryEnd() - PSP_GetKernelMemoryBase(), false); userMemory.Init(PSP_GetUserMemoryBase(), PSP_GetUserMemoryEnd() - PSP_GetUserMemoryBase(), false); + volatileMemory.Init(PSP_GetVolatileMemoryStart(), PSP_GetVolatileMemoryEnd() - PSP_GetVolatileMemoryStart(), false); ParallelMemset(&g_threadManager, Memory::GetPointerWrite(PSP_GetKernelMemoryBase()), 0, PSP_GetUserMemoryEnd() - PSP_GetKernelMemoryBase()); NotifyMemInfo(MemBlockFlags::WRITE, PSP_GetKernelMemoryBase(), PSP_GetUserMemoryEnd() - PSP_GetKernelMemoryBase(), "MemInit"); INFO_LOG(SCEKERNEL, "Kernel and user memory pools initialized"); @@ -457,12 +459,14 @@ void __KernelMemoryInit() void __KernelMemoryDoState(PointerWrap &p) { - auto s = p.Section("sceKernelMemory", 1, 2); + auto s = p.Section("sceKernelMemory", 1, 3); if (!s) return; kernelMemory.DoState(p); userMemory.DoState(p); + if (s >= 3) + volatileMemory.DoState(p); Do(p, vplWaitTimer); CoreTiming::RestoreRegisterEvent(vplWaitTimer, "VplTimeout", __KernelVplTimeout); @@ -481,6 +485,11 @@ void __KernelMemoryDoState(PointerWrap &p) void __KernelMemoryShutdown() { +#ifdef _DEBUG + INFO_LOG(SCEKERNEL, "Shutting down volatile memory pool: "); + volatileMemory.ListBlocks(); +#endif + volatileMemory.Shutdown(); #ifdef _DEBUG INFO_LOG(SCEKERNEL,"Shutting down user memory pool: "); userMemory.ListBlocks(); @@ -495,6 +504,56 @@ void __KernelMemoryShutdown() MemBlockInfoShutdown(); } +BlockAllocator *BlockAllocatorFromID(int id) { + switch (id) { + case 1: + case 3: + case 4: + if (hleIsKernelMode()) + return &kernelMemory; + return nullptr; + + case 2: + case 6: + return &userMemory; + + case 8: + case 10: + if (hleIsKernelMode()) + return &userMemory; + return nullptr; + + case 5: + return &volatileMemory; + + default: + break; + } + + return nullptr; +} + +int BlockAllocatorToID(const BlockAllocator *alloc) { + if (alloc == &kernelMemory) + return 1; + if (alloc == &userMemory) + return 2; + if (alloc == &volatileMemory) + return 5; + return 0; +} + +BlockAllocator *BlockAllocatorFromAddr(u32 addr) { + addr &= 0x3FFFFFFF; + if (Memory::IsKernelAndNotVolatileAddress(addr)) + return &kernelMemory; + if (Memory::IsKernelAddress(addr)) + return &volatileMemory; + if (Memory::IsRAMAddress(addr)) + return &userMemory; + return nullptr; +} + enum SceKernelFplAttr { PSP_FPL_ATTR_FIFO = 0x0000, @@ -580,29 +639,18 @@ static void __KernelSortFplThreads(FPL *fpl) std::stable_sort(fpl->waitingThreads.begin(), fpl->waitingThreads.end(), __FplThreadSortPriority); } -int sceKernelCreateFpl(const char *name, u32 mpid, u32 attr, u32 blockSize, u32 numBlocks, u32 optPtr) -{ +int sceKernelCreateFpl(const char *name, u32 mpid, u32 attr, u32 blockSize, u32 numBlocks, u32 optPtr) { if (!name) - { - WARN_LOG_REPORT(SCEKERNEL, "%08x=sceKernelCreateFpl(): invalid name", SCE_KERNEL_ERROR_NO_MEMORY); - return SCE_KERNEL_ERROR_NO_MEMORY; - } + return hleLogWarning(SCEKERNEL, SCE_KERNEL_ERROR_NO_MEMORY, "invalid name"); if (mpid < 1 || mpid > 9 || mpid == 7) - { - WARN_LOG_REPORT(SCEKERNEL, "%08x=sceKernelCreateFpl(): invalid partition %d", SCE_KERNEL_ERROR_ILLEGAL_ARGUMENT, mpid); - return SCE_KERNEL_ERROR_ILLEGAL_ARGUMENT; - } - // We only support user right now. - if (mpid != 2 && mpid != 6) - { - WARN_LOG_REPORT(SCEKERNEL, "%08x=sceKernelCreateFpl(): invalid partition %d", SCE_KERNEL_ERROR_ILLEGAL_PERM, mpid); - return SCE_KERNEL_ERROR_ILLEGAL_PERM; - } + return hleLogWarning(SCEKERNEL, SCE_KERNEL_ERROR_ILLEGAL_ARGUMENT, "invalid partition %d", mpid); + + BlockAllocator *allocator = BlockAllocatorFromID(mpid); + if (allocator == nullptr) + return hleLogWarning(SCEKERNEL, SCE_KERNEL_ERROR_ILLEGAL_PERM, "invalid partition %d", mpid); if (((attr & ~PSP_FPL_ATTR_KNOWN) & ~0xFF) != 0) - { - WARN_LOG_REPORT(SCEKERNEL, "%08x=sceKernelCreateFpl(): invalid attr parameter: %08x", SCE_KERNEL_ERROR_ILLEGAL_ATTR, attr); - return SCE_KERNEL_ERROR_ILLEGAL_ATTR; - } + return hleLogWarning(SCEKERNEL, SCE_KERNEL_ERROR_ILLEGAL_ATTR, "invalid attr parameter: %08x", attr); + // There's probably a simpler way to get this same basic formula... // This is based on results from a PSP. bool illegalMemSize = blockSize == 0 || numBlocks == 0; @@ -611,25 +659,16 @@ int sceKernelCreateFpl(const char *name, u32 mpid, u32 attr, u32 blockSize, u32 if (!illegalMemSize && (u64) numBlocks >= 0x100000000ULL / (((u64) blockSize + 3ULL) & ~3ULL)) illegalMemSize = true; if (illegalMemSize) - { - WARN_LOG_REPORT(SCEKERNEL, "%08x=sceKernelCreateFpl(): invalid blockSize/count", SCE_KERNEL_ERROR_ILLEGAL_MEMSIZE); - return SCE_KERNEL_ERROR_ILLEGAL_MEMSIZE; - } + return hleReportWarning(SCEKERNEL, SCE_KERNEL_ERROR_ILLEGAL_MEMSIZE, "invalid blockSize/count"); int alignment = 4; - if (optPtr != 0) - { - u32 size = Memory::Read_U32(optPtr); - if (size > 8) - WARN_LOG_REPORT(SCEKERNEL, "sceKernelCreateFpl(): unsupported extra options, size = %d", size); + if (Memory::IsValidRange(optPtr, 4)) { + u32 size = Memory::ReadUnchecked_U32(optPtr); if (size >= 4) alignment = Memory::Read_U32(optPtr + 4); // Must be a power of 2 to be valid. if ((alignment & (alignment - 1)) != 0) - { - WARN_LOG_REPORT(SCEKERNEL, "%08x=sceKernelCreateFpl(): invalid alignment %d", SCE_KERNEL_ERROR_ILLEGAL_ARGUMENT, alignment); - return SCE_KERNEL_ERROR_ILLEGAL_ARGUMENT; - } + return hleLogWarning(SCEKERNEL, SCE_KERNEL_ERROR_ILLEGAL_ARGUMENT, "invalid alignment %d", alignment); } if (alignment < 4) @@ -638,9 +677,8 @@ int sceKernelCreateFpl(const char *name, u32 mpid, u32 attr, u32 blockSize, u32 int alignedSize = ((int)blockSize + alignment - 1) & ~(alignment - 1); u32 totalSize = alignedSize * numBlocks; bool atEnd = (attr & PSP_FPL_ATTR_HIGHMEM) != 0; - u32 address = userMemory.Alloc(totalSize, atEnd, "FPL"); - if (address == (u32)-1) - { + u32 address = allocator->Alloc(totalSize, atEnd, "FPL"); + if (address == (u32)-1) { DEBUG_LOG(SCEKERNEL, "sceKernelCreateFpl(\"%s\", partition=%i, attr=%08x, bsize=%i, nb=%i) FAILED - out of ram", name, mpid, attr, blockSize, numBlocks); return SCE_KERNEL_ERROR_NO_MEMORY; @@ -682,7 +720,10 @@ int sceKernelDeleteFpl(SceUID uid) if (wokeThreads) hleReSchedule("fpl deleted"); - userMemory.Free(fpl->address); + BlockAllocator *alloc = BlockAllocatorFromAddr(fpl->address); + _assert_msg_(alloc != nullptr, "Should always have a valid allocator/address"); + if (alloc) + alloc->Free(fpl->address); return kernelObjects.Destroy(uid); } else @@ -955,18 +996,23 @@ public: alloc->Free(address); } bool IsValid() {return address != (u32)-1;} - BlockAllocator *alloc; void DoState(PointerWrap &p) override { - auto s = p.Section("PMB", 1); + auto s = p.Section("PMB", 1, 2); if (!s) return; Do(p, address); DoArray(p, name, sizeof(name)); + if (s >= 2) { + int allocType = BlockAllocatorToID(alloc); + Do(p, allocType); + alloc = BlockAllocatorFromID(allocType); + } } + BlockAllocator *alloc; u32 address; char name[32]; }; @@ -986,44 +1032,28 @@ static u32 sceKernelTotalFreeMemSize() return retVal; } -int sceKernelAllocPartitionMemory(int partition, const char *name, int type, u32 size, u32 addr) -{ - if (name == NULL) - { - WARN_LOG_REPORT(SCEKERNEL, "%08x=sceKernelAllocPartitionMemory(): invalid name", SCE_KERNEL_ERROR_ERROR); - return SCE_KERNEL_ERROR_ERROR; - } - if (size == 0) - { - WARN_LOG_REPORT(SCEKERNEL, "%08x=sceKernelAllocPartitionMemory(): invalid size %x", SCE_KERNEL_ERROR_MEMBLOCK_ALLOC_FAILED, size); - return SCE_KERNEL_ERROR_MEMBLOCK_ALLOC_FAILED; +int sceKernelAllocPartitionMemory(int partition, const char *name, int type, u32 size, u32 addr) { + if (type < PSP_SMEM_Low || type > PSP_SMEM_HighAligned) + return hleLogWarning(SCEKERNEL, SCE_KERNEL_ERROR_ILLEGAL_MEMBLOCKTYPE, "invalid type %x", type); + // Alignment is only allowed for powers of 2. + if (type == PSP_SMEM_LowAligned || type == PSP_SMEM_HighAligned) { + if ((addr & (addr - 1)) != 0 || addr == 0) + return hleLogWarning(SCEKERNEL, SCE_KERNEL_ERROR_ILLEGAL_ALIGNMENT_SIZE, "invalid alignment %x", addr); } if (partition < 1 || partition > 9 || partition == 7) - { - WARN_LOG_REPORT(SCEKERNEL, "%08x=sceKernelAllocPartitionMemory(): invalid partition %x", SCE_KERNEL_ERROR_ILLEGAL_ARGUMENT, partition); - return SCE_KERNEL_ERROR_ILLEGAL_ARGUMENT; - } - // We only support user right now. - if (partition != 2 && partition != 5 && partition != 6) - { - WARN_LOG_REPORT(SCEKERNEL, "%08x=sceKernelAllocPartitionMemory(): invalid partition %x", SCE_KERNEL_ERROR_ILLEGAL_PARTITION, partition); - return SCE_KERNEL_ERROR_ILLEGAL_PARTITION; - } - if (type < PSP_SMEM_Low || type > PSP_SMEM_HighAligned) - { - WARN_LOG_REPORT(SCEKERNEL, "%08x=sceKernelAllocPartitionMemory(): invalid type %x", SCE_KERNEL_ERROR_ILLEGAL_MEMBLOCKTYPE, type); - return SCE_KERNEL_ERROR_ILLEGAL_MEMBLOCKTYPE; - } - // Alignment is only allowed for powers of 2. - if ((type == PSP_SMEM_LowAligned || type == PSP_SMEM_HighAligned) && ((addr & (addr - 1)) != 0 || addr == 0)) - { - WARN_LOG_REPORT(SCEKERNEL, "%08x=sceKernelAllocPartitionMemory(): invalid alignment %x", SCE_KERNEL_ERROR_ILLEGAL_ALIGNMENT_SIZE, addr); - return SCE_KERNEL_ERROR_ILLEGAL_ALIGNMENT_SIZE; - } + return hleLogWarning(SCEKERNEL, SCE_KERNEL_ERROR_ILLEGAL_ARGUMENT, "invalid partition %x", partition); - PartitionMemoryBlock *block = new PartitionMemoryBlock(&userMemory, name, size, (MemblockType)type, addr); - if (!block->IsValid()) - { + BlockAllocator *allocator = BlockAllocatorFromID(partition); + if (allocator == nullptr) + return hleLogWarning(SCEKERNEL, SCE_KERNEL_ERROR_ILLEGAL_PARTITION, "invalid partition %x", partition); + + if (name == nullptr) + return hleLogWarning(SCEKERNEL, SCE_KERNEL_ERROR_ERROR, "invalid name"); + if (size == 0) + return hleLogWarning(SCEKERNEL, SCE_KERNEL_ERROR_MEMBLOCK_ALLOC_FAILED, "invalid size %x", size); + + PartitionMemoryBlock *block = new PartitionMemoryBlock(allocator, name, size, (MemblockType)type, addr); + if (!block->IsValid()) { delete block; ERROR_LOG(SCEKERNEL, "sceKernelAllocPartitionMemory(partition = %i, %s, type= %i, size= %i, addr= %08x): allocation failed", partition, name, type, size, addr); return SCE_KERNEL_ERROR_MEMBLOCK_ALLOC_FAILED; @@ -1451,40 +1481,23 @@ static void __KernelSortVplThreads(VPL *vpl) std::stable_sort(vpl->waitingThreads.begin(), vpl->waitingThreads.end(), __VplThreadSortPriority); } -SceUID sceKernelCreateVpl(const char *name, int partition, u32 attr, u32 vplSize, u32 optPtr) -{ +SceUID sceKernelCreateVpl(const char *name, int partition, u32 attr, u32 vplSize, u32 optPtr) { if (!name) - { - WARN_LOG_REPORT(SCEKERNEL, "%08x=sceKernelCreateVpl(): invalid name", SCE_KERNEL_ERROR_ERROR); - return SCE_KERNEL_ERROR_ERROR; - } + return hleLogWarning(SCEKERNEL, SCE_KERNEL_ERROR_ERROR, "invalid name"); if (partition < 1 || partition > 9 || partition == 7) - { - WARN_LOG_REPORT(SCEKERNEL, "%08x=sceKernelCreateVpl(): invalid partition %d", SCE_KERNEL_ERROR_ILLEGAL_ARGUMENT, partition); - return SCE_KERNEL_ERROR_ILLEGAL_ARGUMENT; - } - // We only support user right now. - if (partition != 2 && partition != 6) - { - WARN_LOG_REPORT(SCEKERNEL, "%08x=sceKernelCreateVpl(): invalid partition %d", SCE_KERNEL_ERROR_ILLEGAL_PERM, partition); - return SCE_KERNEL_ERROR_ILLEGAL_PERM; - } + return hleLogWarning(SCEKERNEL, SCE_KERNEL_ERROR_ILLEGAL_ARGUMENT, "invalid partition %d", partition); + + BlockAllocator *allocator = BlockAllocatorFromID(partition); + if (allocator == nullptr) + return hleLogWarning(SCEKERNEL, SCE_KERNEL_ERROR_ILLEGAL_PERM, "invalid partition %d", partition); + if (((attr & ~PSP_VPL_ATTR_KNOWN) & ~0xFF) != 0) - { - WARN_LOG_REPORT(SCEKERNEL, "%08x=sceKernelCreateVpl(): invalid attr parameter: %08x", SCE_KERNEL_ERROR_ILLEGAL_ATTR, attr); - return SCE_KERNEL_ERROR_ILLEGAL_ATTR; - } + return hleLogWarning(SCEKERNEL, SCE_KERNEL_ERROR_ILLEGAL_ATTR, "invalid attr parameter: %08x", attr); if (vplSize == 0) - { - WARN_LOG_REPORT(SCEKERNEL, "%08x=sceKernelCreateVpl(): invalid size", SCE_KERNEL_ERROR_ILLEGAL_MEMSIZE); - return SCE_KERNEL_ERROR_ILLEGAL_MEMSIZE; - } + return hleLogWarning(SCEKERNEL, SCE_KERNEL_ERROR_ILLEGAL_MEMSIZE, "invalid size"); // Block Allocator seems to A-OK this, let's stop it here. if (vplSize >= 0x80000000) - { - WARN_LOG_REPORT(SCEKERNEL, "%08x=sceKernelCreateVpl(): way too big size", SCE_KERNEL_ERROR_NO_MEMORY); - return SCE_KERNEL_ERROR_NO_MEMORY; - } + return hleLogWarning(SCEKERNEL, SCE_KERNEL_ERROR_NO_MEMORY, "way too big size"); // Can't have that little space in a Vpl, sorry. if (vplSize <= 0x30) @@ -1493,12 +1506,9 @@ SceUID sceKernelCreateVpl(const char *name, int partition, u32 attr, u32 vplSize // We ignore the upalign to 256 and do it ourselves by 8. u32 allocSize = vplSize; - u32 memBlockPtr = userMemory.Alloc(allocSize, (attr & PSP_VPL_ATTR_HIGHMEM) != 0, "VPL"); + u32 memBlockPtr = allocator->Alloc(allocSize, (attr & PSP_VPL_ATTR_HIGHMEM) != 0, "VPL"); if (memBlockPtr == (u32)-1) - { - ERROR_LOG(SCEKERNEL, "sceKernelCreateVpl(): Failed to allocate %i bytes of pool data", vplSize); - return SCE_KERNEL_ERROR_NO_MEMORY; - } + return hleLogError(SCEKERNEL, SCE_KERNEL_ERROR_NO_MEMORY, "failed to allocate %i bytes of pool data", vplSize); VPL *vpl = new VPL; SceUID id = kernelObjects.Create(vpl); @@ -1542,7 +1552,10 @@ int sceKernelDeleteVpl(SceUID uid) if (wokeThreads) hleReSchedule("vpl deleted"); - userMemory.Free(vpl->address); + BlockAllocator *alloc = BlockAllocatorFromAddr(vpl->address); + _assert_msg_(alloc != nullptr, "Should always have a valid allocator/address"); + if (alloc) + alloc->Free(vpl->address); kernelObjects.Destroy(uid); return 0; } @@ -2044,29 +2057,17 @@ void __KernelTlsplThreadEnd(SceUID threadID) tlsplThreadEndChecks.erase(locked.first, locked.second); } -SceUID sceKernelCreateTlspl(const char *name, u32 partition, u32 attr, u32 blockSize, u32 count, u32 optionsPtr) -{ +SceUID sceKernelCreateTlspl(const char *name, u32 partition, u32 attr, u32 blockSize, u32 count, u32 optionsPtr) { if (!name) - { - WARN_LOG_REPORT(SCEKERNEL, "%08x=sceKernelCreateTlspl(): invalid name", SCE_KERNEL_ERROR_NO_MEMORY); - return SCE_KERNEL_ERROR_NO_MEMORY; - } + return hleLogWarning(SCEKERNEL, SCE_KERNEL_ERROR_NO_MEMORY, "invalid name"); if ((attr & ~PSP_TLSPL_ATTR_KNOWN) >= 0x100) - { - WARN_LOG_REPORT(SCEKERNEL, "%08x=sceKernelCreateTlspl(): invalid attr parameter: %08x", SCE_KERNEL_ERROR_ILLEGAL_ATTR, attr); - return SCE_KERNEL_ERROR_ILLEGAL_ATTR; - } + return hleLogWarning(SCEKERNEL, SCE_KERNEL_ERROR_ILLEGAL_ATTR, "invalid attr parameter: %08x", attr); if (partition < 1 || partition > 9 || partition == 7) - { - WARN_LOG_REPORT(SCEKERNEL, "%08x=sceKernelCreateTlspl(): invalid partition %d", SCE_KERNEL_ERROR_ILLEGAL_ARGUMENT, partition); - return SCE_KERNEL_ERROR_ILLEGAL_ARGUMENT; - } - // We only support user right now. - if (partition != 2 && partition != 6) - { - WARN_LOG_REPORT(SCEKERNEL, "%08x=sceKernelCreateTlspl(): invalid partition %d", SCE_KERNEL_ERROR_ILLEGAL_PERM, partition); - return SCE_KERNEL_ERROR_ILLEGAL_PERM; - } + return hleLogWarning(SCEKERNEL, SCE_KERNEL_ERROR_ILLEGAL_ARGUMENT, "invalid partition %d", partition); + + BlockAllocator *allocator = BlockAllocatorFromID(partition); + if (allocator == nullptr) + return hleLogWarning(SCEKERNEL, SCE_KERNEL_ERROR_ILLEGAL_PERM, "invalid partition %x", partition); // There's probably a simpler way to get this same basic formula... // This is based on results from a PSP. @@ -2076,41 +2077,29 @@ SceUID sceKernelCreateTlspl(const char *name, u32 partition, u32 attr, u32 block if (!illegalMemSize && (u64) count >= 0x100000000ULL / (((u64) blockSize + 3ULL) & ~3ULL)) illegalMemSize = true; if (illegalMemSize) - { - WARN_LOG_REPORT(SCEKERNEL, "%08x=sceKernelCreateTlspl(): invalid blockSize/count", SCE_KERNEL_ERROR_ILLEGAL_MEMSIZE); - return SCE_KERNEL_ERROR_ILLEGAL_MEMSIZE; - } + return hleLogWarning(SCEKERNEL, SCE_KERNEL_ERROR_ILLEGAL_MEMSIZE, "invalid blockSize/count"); int index = -1; - for (int i = 0; i < TLSPL_NUM_INDEXES; ++i) - if (tlsplUsedIndexes[i] == false) - { + for (int i = 0; i < TLSPL_NUM_INDEXES; ++i) { + if (tlsplUsedIndexes[i] == false) { index = i; break; } + } if (index == -1) - { - WARN_LOG_REPORT(SCEKERNEL, "%08x=sceKernelCreateTlspl(): ran out of indexes for TLS pools", PSP_ERROR_TOO_MANY_TLSPL); - return PSP_ERROR_TOO_MANY_TLSPL; - } + return hleLogWarning(SCEKERNEL, PSP_ERROR_TOO_MANY_TLSPL, "ran out of indexes for TLS pools"); // Unless otherwise specified, we align to 4 bytes (a mips word.) u32 alignment = 4; - if (optionsPtr != 0) - { - u32 size = Memory::Read_U32(optionsPtr); - if (size > 8) - WARN_LOG_REPORT(SCEKERNEL, "sceKernelCreateTlspl(%s) unsupported options parameter, size = %d", name, size); + if (Memory::IsValidRange(optionsPtr, 4)) { + u32 size = Memory::ReadUnchecked_U32(optionsPtr); if (size >= 8) alignment = Memory::Read_U32(optionsPtr + 4); // Note that 0 intentionally is allowed. if ((alignment & (alignment - 1)) != 0) - { - ERROR_LOG_REPORT(SCEKERNEL, "sceKernelCreateTlspl(%s): alignment is not a power of 2: %d", name, alignment); - return SCE_KERNEL_ERROR_ILLEGAL_ARGUMENT; - } + return hleLogError(SCEKERNEL, SCE_KERNEL_ERROR_ILLEGAL_ARGUMENT, "alignment is not a power of 2: %d", alignment); // This goes for 0, 1, and 2. Can't have less than 4 byte alignment. if (alignment < 4) alignment = 4; @@ -2120,16 +2109,13 @@ SceUID sceKernelCreateTlspl(const char *name, u32 partition, u32 attr, u32 block u32 alignedSize = (blockSize + alignment - 1) & ~(alignment - 1); u32 totalSize = alignedSize * count; - u32 blockPtr = userMemory.Alloc(totalSize, (attr & PSP_TLSPL_ATTR_HIGHMEM) != 0, name); + u32 blockPtr = allocator->Alloc(totalSize, (attr & PSP_TLSPL_ATTR_HIGHMEM) != 0, name); #ifdef _DEBUG - userMemory.ListBlocks(); + allocator->ListBlocks(); #endif - if (blockPtr == (u32) -1) - { - ERROR_LOG(SCEKERNEL, "%08x=sceKernelCreateTlspl(%s, %d, %08x, %d, %d, %08x): failed to allocate memory", SCE_KERNEL_ERROR_NO_MEMORY, name, partition, attr, blockSize, count, optionsPtr); - return SCE_KERNEL_ERROR_NO_MEMORY; - } + if (blockPtr == (u32)-1) + return hleLogError(SCEKERNEL, SCE_KERNEL_ERROR_NO_MEMORY, "failed to allocate memory"); TLSPL *tls = new TLSPL(); SceUID id = kernelObjects.Create(tls); @@ -2148,9 +2134,7 @@ SceUID sceKernelCreateTlspl(const char *name, u32 partition, u32 attr, u32 block tls->alignment = alignment; tls->usage.resize(count, 0); - WARN_LOG(SCEKERNEL, "%08x=sceKernelCreateTlspl(%s, %d, %08x, %d, %d, %08x)", id, name, partition, attr, blockSize, count, optionsPtr); - - return id; + return hleLogSuccessInfoI(SCEKERNEL, id); } int sceKernelDeleteTlspl(SceUID uid) @@ -2178,7 +2162,10 @@ int sceKernelDeleteTlspl(SceUID uid) HLEKernel::ResumeFromWait(threadID, WAITTYPE_TLSPL, uid, 0); hleReSchedule("deleted tlspl"); - userMemory.Free(tls->address); + BlockAllocator *allocator = BlockAllocatorFromAddr(tls->address); + _assert_msg_(allocator != nullptr, "Should always have a valid allocator/address"); + if (allocator) + allocator->Free(tls->address); tlsplUsedIndexes[tls->ntls.index] = false; kernelObjects.Destroy(uid); } diff --git a/Core/HLE/sceKernelMemory.h b/Core/HLE/sceKernelMemory.h index 29d7356d00..b217974170 100644 --- a/Core/HLE/sceKernelMemory.h +++ b/Core/HLE/sceKernelMemory.h @@ -40,6 +40,10 @@ KernelObject *__KernelMemoryVPLObject(); KernelObject *__KernelMemoryPMBObject(); KernelObject *__KernelTlsplObject(); +BlockAllocator *BlockAllocatorFromID(int id); +int BlockAllocatorToID(const BlockAllocator *alloc); +BlockAllocator *BlockAllocatorFromAddr(u32 addr); + SceUID sceKernelCreateVpl(const char *name, int partition, u32 attr, u32 vplSize, u32 optPtr); int sceKernelDeleteVpl(SceUID uid); int sceKernelAllocateVpl(SceUID uid, u32 size, u32 addrPtr, u32 timeoutPtr); diff --git a/Core/HLE/sceKernelMsgPipe.cpp b/Core/HLE/sceKernelMsgPipe.cpp index b16e8060e2..0ff34c6290 100644 --- a/Core/HLE/sceKernelMsgPipe.cpp +++ b/Core/HLE/sceKernelMsgPipe.cpp @@ -140,10 +140,13 @@ struct MsgPipe : public KernelObject int GetIDType() const override { return SCE_KERNEL_TMID_Mpipe; } MsgPipe() : buffer(0) {} - ~MsgPipe() - { - if (buffer != 0) - userMemory.Free(buffer); + ~MsgPipe() { + if (buffer != 0) { + BlockAllocator *alloc = BlockAllocatorFromAddr(buffer); + _assert_msg_(alloc != nullptr, "Should always have a valid allocator/address"); + if (alloc) + alloc->Free(buffer); + } } u32 GetUsedSize() @@ -667,41 +670,26 @@ void __KernelMsgPipeDoState(PointerWrap &p) CoreTiming::RestoreRegisterEvent(waitTimer, "MsgPipeTimeout", __KernelMsgPipeTimeout); } -int sceKernelCreateMsgPipe(const char *name, int partition, u32 attr, u32 size, u32 optionsPtr) -{ +int sceKernelCreateMsgPipe(const char *name, int partition, u32 attr, u32 size, u32 optionsPtr) { if (!name) - { - WARN_LOG_REPORT(SCEKERNEL, "%08x=sceKernelCreateMsgPipe(): invalid name", SCE_KERNEL_ERROR_NO_MEMORY); - return SCE_KERNEL_ERROR_NO_MEMORY; - } + return hleLogWarning(SCEKERNEL, SCE_KERNEL_ERROR_NO_MEMORY, "invalid name"); if (partition < 1 || partition > 9 || partition == 7) - { - WARN_LOG_REPORT(SCEKERNEL, "%08x=sceKernelCreateMsgPipe(): invalid partition %d", SCE_KERNEL_ERROR_ILLEGAL_ARGUMENT, partition); - return SCE_KERNEL_ERROR_ILLEGAL_ARGUMENT; - } - // We only support user right now. - if (partition != 2 && partition != 6) - { - WARN_LOG_REPORT(SCEKERNEL, "%08x=sceKernelCreateMsgPipe(): invalid partition %d", SCE_KERNEL_ERROR_ILLEGAL_PERM, partition); - return SCE_KERNEL_ERROR_ILLEGAL_PERM; - } + return hleLogWarning(SCEKERNEL, SCE_KERNEL_ERROR_ILLEGAL_ARGUMENT, "invalid partition %d", partition); + + BlockAllocator *allocator = BlockAllocatorFromID(partition); + if (allocator == nullptr) + return hleLogWarning(SCEKERNEL, SCE_KERNEL_ERROR_ILLEGAL_PERM, "invalid partition %d", partition); + if ((attr & ~SCE_KERNEL_MPA_KNOWN) >= 0x100) - { - WARN_LOG_REPORT(SCEKERNEL, "%08x=sceKernelCreateEventFlag(%s): invalid attr parameter: %08x", SCE_KERNEL_ERROR_ILLEGAL_ATTR, name, attr); - return SCE_KERNEL_ERROR_ILLEGAL_ATTR; - } + return hleLogWarning(SCEKERNEL, SCE_KERNEL_ERROR_ILLEGAL_ATTR, "invalid attr parameter: %08x", attr); u32 memBlockPtr = 0; - if (size != 0) - { + if (size != 0) { // We ignore the upalign to 256. u32 allocSize = size; - memBlockPtr = userMemory.Alloc(allocSize, (attr & SCE_KERNEL_MPA_HIGHMEM) != 0, "MsgPipe"); + memBlockPtr = allocator->Alloc(allocSize, (attr & SCE_KERNEL_MPA_HIGHMEM) != 0, "MsgPipe"); if (memBlockPtr == (u32)-1) - { - ERROR_LOG(SCEKERNEL, "%08x=sceKernelCreateEventFlag(%s): Failed to allocate %i bytes for buffer", SCE_KERNEL_ERROR_NO_MEMORY, name, size); - return SCE_KERNEL_ERROR_NO_MEMORY; - } + return hleLogError(SCEKERNEL, SCE_KERNEL_ERROR_NO_MEMORY, "failed to allocate %i bytes for buffer", size); } MsgPipe *m = new MsgPipe(); diff --git a/Core/HLE/sceUtility.cpp b/Core/HLE/sceUtility.cpp index e6a5124600..f1d7f73d0b 100644 --- a/Core/HLE/sceUtility.cpp +++ b/Core/HLE/sceUtility.cpp @@ -744,11 +744,14 @@ static int sceUtilityGamedataInstallInitStart(u32 paramsAddr) { } ActivateDialog(UtilityDialogType::GAMEDATAINSTALL); - return hleLogSuccessInfoX(SCEUTILITY, gamedataInstallDialog->Init(paramsAddr)); + int result = gamedataInstallDialog->Init(paramsAddr); + if (result < 0) + DeactivateDialog(); + return hleLogSuccessInfoX(SCEUTILITY, result); } static int sceUtilityGamedataInstallShutdownStart() { - if (currentDialogType != UtilityDialogType::GAMEDATAINSTALL) { + if (!currentDialogActive || currentDialogType != UtilityDialogType::GAMEDATAINSTALL) { return hleLogWarning(SCEUTILITY, SCE_ERROR_UTILITY_WRONG_TYPE, "wrong dialog type"); } @@ -757,7 +760,7 @@ static int sceUtilityGamedataInstallShutdownStart() { } static int sceUtilityGamedataInstallUpdate(int animSpeed) { - if (currentDialogType != UtilityDialogType::GAMEDATAINSTALL) { + if (!currentDialogActive || currentDialogType != UtilityDialogType::GAMEDATAINSTALL) { return hleLogWarning(SCEUTILITY, SCE_ERROR_UTILITY_WRONG_TYPE, "wrong dialog type"); } @@ -765,8 +768,9 @@ static int sceUtilityGamedataInstallUpdate(int animSpeed) { } static int sceUtilityGamedataInstallGetStatus() { - if (currentDialogType != UtilityDialogType::GAMEDATAINSTALL) { + if (!currentDialogActive || currentDialogType != UtilityDialogType::GAMEDATAINSTALL) { // This is called incorrectly all the time by some games. So let's not bother warning. + hleEatCycles(200); return hleLogDebug(SCEUTILITY, SCE_ERROR_UTILITY_WRONG_TYPE, "wrong dialog type"); } @@ -776,7 +780,7 @@ static int sceUtilityGamedataInstallGetStatus() { } static int sceUtilityGamedataInstallAbort() { - if (currentDialogType != UtilityDialogType::GAMEDATAINSTALL) { + if (!currentDialogActive || currentDialogType != UtilityDialogType::GAMEDATAINSTALL) { return hleLogWarning(SCEUTILITY, SCE_ERROR_UTILITY_WRONG_TYPE, "wrong dialog type"); } diff --git a/Core/System.cpp b/Core/System.cpp index 29b1765fb0..9c7519edd6 100644 --- a/Core/System.cpp +++ b/Core/System.cpp @@ -413,11 +413,11 @@ bool PSP_InitStart(const CoreParameter &coreParam, std::string *error_string) { } #if defined(_WIN32) && PPSSPP_ARCH(AMD64) - INFO_LOG(BOOT, "PPSSPP %s Windows 64 bit", PPSSPP_GIT_VERSION); + NOTICE_LOG(BOOT, "PPSSPP %s Windows 64 bit", PPSSPP_GIT_VERSION); #elif defined(_WIN32) && !PPSSPP_ARCH(AMD64) - INFO_LOG(BOOT, "PPSSPP %s Windows 32 bit", PPSSPP_GIT_VERSION); + NOTICE_LOG(BOOT, "PPSSPP %s Windows 32 bit", PPSSPP_GIT_VERSION); #else - INFO_LOG(BOOT, "PPSSPP %s", PPSSPP_GIT_VERSION); + NOTICE_LOG(BOOT, "PPSSPP %s", PPSSPP_GIT_VERSION); #endif Core_NotifyLifecycle(CoreLifecycle::STARTING); diff --git a/GPU/Common/DrawEngineCommon.cpp b/GPU/Common/DrawEngineCommon.cpp index d361458111..5f4256158c 100644 --- a/GPU/Common/DrawEngineCommon.cpp +++ b/GPU/Common/DrawEngineCommon.cpp @@ -19,6 +19,7 @@ #include "Common/Data/Convert/ColorConv.h" #include "Common/Profiler/Profiler.h" +#include "Common/LogReporting.h" #include "Core/Config.h" #include "GPU/Common/DrawEngineCommon.h" #include "GPU/Common/SplineCommon.h" @@ -188,6 +189,57 @@ u32 DrawEngineCommon::NormalizeVertices(u8 *outPtr, u8 *bufPtr, const u8 *inPtr, return DrawEngineCommon::NormalizeVertices(outPtr, bufPtr, inPtr, dec, lowerBound, upperBound, vertType); } +void DrawEngineCommon::DispatchSubmitImm(GEPrimitiveType prim, TransformedVertex *buffer, int vertexCount, int cullMode, bool continuation) { + // Instead of plumbing through properly (we'd need to inject these pretransformed vertices in the middle + // of SoftwareTransform(), which would take a lot of refactoring), we'll cheat and just turn these into + // through vertices. + // Since the only known use is Thrillville and it only uses it to clear, we just use color and pos. + struct ImmVertex { + float uv[2]; + uint32_t color; + float xyz[3]; + }; + std::vector temp; + temp.resize(vertexCount); + uint32_t color1Used = 0; + for (int i = 0; i < vertexCount; i++) { + // Since we're sending through, scale back up to w/h. + temp[i].uv[0] = buffer[i].u * gstate.getTextureWidth(0); + temp[i].uv[1] = buffer[i].v * gstate.getTextureHeight(0); + temp[i].color = buffer[i].color0_32; + temp[i].xyz[0] = buffer[i].pos[0]; + temp[i].xyz[1] = buffer[i].pos[1]; + temp[i].xyz[2] = buffer[i].pos[2]; + color1Used |= buffer[i].color1_32; + } + int vtype = GE_VTYPE_TC_FLOAT | GE_VTYPE_POS_FLOAT | GE_VTYPE_COL_8888 | GE_VTYPE_THROUGH; + // TODO: Handle fog and secondary color somehow? + + if (gstate.isFogEnabled() && !gstate.isModeThrough()) { + WARN_LOG_REPORT_ONCE(geimmfog, G3D, "Imm vertex used fog"); + } + if (color1Used != 0 && gstate.isUsingSecondaryColor() && !gstate.isModeThrough()) { + WARN_LOG_REPORT_ONCE(geimmcolor1, G3D, "Imm vertex used secondary color"); + } + + bool prevThrough = gstate.isModeThrough(); + // Code checks this reg directly, not just the vtype ID. + if (!prevThrough) { + gstate.vertType |= GE_VTYPE_THROUGH; + gstate_c.Dirty(DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE | DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_UVSCALEOFFSET | DIRTY_CULLRANGE); + } + + int bytesRead; + uint32_t vertTypeID = GetVertTypeID(vtype, 0); + SubmitPrim(&temp[0], nullptr, prim, vertexCount, vertTypeID, cullMode, &bytesRead); + DispatchFlush(); + + if (!prevThrough) { + gstate.vertType &= ~GE_VTYPE_THROUGH; + gstate_c.Dirty(DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE | DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_UVSCALEOFFSET | DIRTY_CULLRANGE); + } +} + // This code has plenty of potential for optimization. // // It does the simplest and safest test possible: If all points of a bbox is outside a single of @@ -484,12 +536,12 @@ u32 DrawEngineCommon::NormalizeVertices(u8 *outPtr, u8 *bufPtr, const u8 *inPtr, return GE_VTYPE_TC_FLOAT | GE_VTYPE_COL_8888 | GE_VTYPE_NRM_FLOAT | GE_VTYPE_POS_FLOAT | (vertType & (GE_VTYPE_IDX_MASK | GE_VTYPE_THROUGH)); } -void DrawEngineCommon::ApplyFramebufferRead(bool *fboTexNeedsBind) { +void DrawEngineCommon::ApplyFramebufferRead(FBOTexState *fboTexState) { if (gstate_c.Supports(GPU_SUPPORTS_ANY_FRAMEBUFFER_FETCH)) { - *fboTexNeedsBind = false; + *fboTexState = FBO_TEX_READ_FRAMEBUFFER; } else { gpuStats.numCopiesForShaderBlend++; - *fboTexNeedsBind = true; + *fboTexState = FBO_TEX_COPY_BIND_TEX; } gstate_c.Dirty(DIRTY_SHADERBLEND); diff --git a/GPU/Common/DrawEngineCommon.h b/GPU/Common/DrawEngineCommon.h index a8997454d9..40c397649b 100644 --- a/GPU/Common/DrawEngineCommon.h +++ b/GPU/Common/DrawEngineCommon.h @@ -46,6 +46,12 @@ enum { TEX_SLOT_SPLINE_WEIGHTS_V = 6, }; +enum FBOTexState { + FBO_TEX_NONE, + FBO_TEX_COPY_BIND_TEX, + FBO_TEX_READ_FRAMEBUFFER, +}; + inline uint32_t GetVertTypeID(uint32_t vertType, int uvGenMode) { // As the decoder depends on the UVGenMode when we use UV prescale, we simply mash it // into the top of the verttype where there are unused bits. @@ -84,10 +90,7 @@ public: SubmitPrim(verts, inds, prim, vertexCount, vertTypeID, cullMode, bytesRead); } - virtual void DispatchSubmitImm(const void *verts, const void *inds, GEPrimitiveType prim, int vertexCount, u32 vertTypeID, int cullMode, int *bytesRead) { - SubmitPrim(verts, inds, prim, vertexCount, vertTypeID, cullMode, bytesRead); - DispatchFlush(); - } + virtual void DispatchSubmitImm(GEPrimitiveType prim, TransformedVertex *buffer, int vertexCount, int cullMode, bool continuation); bool TestBoundingBox(const void* control_points, int vertexCount, u32 vertType, int *bytesRead); @@ -130,7 +133,7 @@ protected: // Vertex decoding void DecodeVertsStep(u8 *dest, int &i, int &decodedVerts); - void ApplyFramebufferRead(bool *fboTexNeedsBind); + void ApplyFramebufferRead(FBOTexState *fboTexState); inline int IndexSize(u32 vtype) const { const u32 indexType = (vtype & GE_VTYPE_IDX_MASK); diff --git a/GPU/Common/FragmentShaderGenerator.cpp b/GPU/Common/FragmentShaderGenerator.cpp index d37b4a772d..e2088731af 100644 --- a/GPU/Common/FragmentShaderGenerator.cpp +++ b/GPU/Common/FragmentShaderGenerator.cpp @@ -134,10 +134,12 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu GELogicOp replaceLogicOpType = isModeClear ? GE_LOGIC_COPY : (GELogicOp)id.Bits(FS_BIT_REPLACE_LOGIC_OP, 4); bool replaceLogicOp = replaceLogicOpType != GE_LOGIC_COPY && compat.bitwiseOps; - bool readFramebuffer = replaceBlend == REPLACE_BLEND_READ_FRAMEBUFFER || colorWriteMask || replaceLogicOp; - bool readFramebufferTex = readFramebuffer && !gstate_c.Supports(GPU_SUPPORTS_ANY_FRAMEBUFFER_FETCH); + bool needFramebufferRead = replaceBlend == REPLACE_BLEND_READ_FRAMEBUFFER || colorWriteMask || replaceLogicOp; - bool needFragCoord = readFramebuffer || gstate_c.Supports(GPU_ROUND_FRAGMENT_DEPTH_TO_16BIT); + bool fetchFramebuffer = needFramebufferRead && gstate_c.Supports(GPU_SUPPORTS_ANY_FRAMEBUFFER_FETCH); + bool readFramebufferTex = needFramebufferRead && !gstate_c.Supports(GPU_SUPPORTS_ANY_FRAMEBUFFER_FETCH); + + bool needFragCoord = readFramebufferTex || gstate_c.Supports(GPU_ROUND_FRAGMENT_DEPTH_TO_16BIT); bool writeDepth = gstate_c.Supports(GPU_ROUND_FRAGMENT_DEPTH_TO_16BIT); if (shaderDepalMode != ShaderDepalMode::OFF && !doTexture) { @@ -157,6 +159,11 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu if (readFramebufferTex) { WRITE(p, "layout (binding = 1) uniform sampler2D fbotex;\n"); + } else if (fetchFramebuffer) { + WRITE(p, "layout (input_attachment_index = 0, binding = 9) uniform subpassInput inputColor;\n"); + if (fragmentShaderFlags) { + *fragmentShaderFlags |= FragmentShaderFlags::INPUT_ATTACHMENT; + } } if (shaderDepalMode != ShaderDepalMode::OFF) { @@ -416,7 +423,7 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu if (!strcmp(compat.fragColor0, "fragColor0")) { const char *qualifierColor0 = "out"; - if (readFramebuffer && compat.lastFragData && !strcmp(compat.lastFragData, compat.fragColor0)) { + if (fetchFramebuffer && compat.lastFragData && !strcmp(compat.lastFragData, compat.fragColor0)) { qualifierColor0 = "inout"; } // Output the output color definitions. @@ -492,20 +499,26 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu } // Two things read from the old framebuffer - shader replacement blending and bit-level masking. - if (readFramebuffer) { + if (readFramebufferTex) { if (compat.shaderLanguage == HLSL_D3D11) { WRITE(p, " vec4 destColor = fbotex.Load(int3((int)gl_FragCoord.x, (int)gl_FragCoord.y, 0));\n"); } else if (compat.shaderLanguage == HLSL_D3D9) { WRITE(p, " vec4 destColor = tex2D(fbotex, gl_FragCoord.xy * u_fbotexSize.xy);\n", compat.texture); - } else if (gstate_c.Supports(GPU_SUPPORTS_ANY_FRAMEBUFFER_FETCH)) { - // If we have EXT_shader_framebuffer_fetch / ARM_shader_framebuffer_fetch, we skip the blit. - // We can just read the prev value more directly. - WRITE(p, " lowp vec4 destColor = %s;\n", compat.lastFragData); } else if (!compat.texelFetch) { WRITE(p, " lowp vec4 destColor = %s(fbotex, gl_FragCoord.xy * u_fbotexSize.xy);\n", compat.texture); } else { WRITE(p, " lowp vec4 destColor = %s(fbotex, ivec2(gl_FragCoord.x, gl_FragCoord.y), 0);\n", compat.texelFetch); } + } else if (fetchFramebuffer) { + // If we have EXT_shader_framebuffer_fetch / ARM_shader_framebuffer_fetch, we skip the blit. + // We can just read the prev value more directly. + if (compat.shaderLanguage == GLSL_3xx) { + WRITE(p, " lowp vec4 destColor = %s;\n", compat.lastFragData); + } else if (compat.shaderLanguage == GLSL_VULKAN) { + WRITE(p, " lowp vec4 destColor = subpassLoad(inputColor);\n", compat.lastFragData); + } else { + _assert_msg_(false, "Need fetch destColor, but not a compatible language"); + } } if (isModeClear) { diff --git a/GPU/Common/FragmentShaderGenerator.h b/GPU/Common/FragmentShaderGenerator.h index 85c651cf8b..8f358fa7ff 100644 --- a/GPU/Common/FragmentShaderGenerator.h +++ b/GPU/Common/FragmentShaderGenerator.h @@ -42,7 +42,7 @@ struct FShaderID; // Can technically be deduced from the fragment shader ID, but this is safer. enum class FragmentShaderFlags : u32 { - FS_FLAG_INPUT_ATTACHMENT = 1, + INPUT_ATTACHMENT = 1, }; ENUM_CLASS_BITOPS(FragmentShaderFlags); diff --git a/GPU/Common/GPUStateUtils.cpp b/GPU/Common/GPUStateUtils.cpp index fb90850cb9..2b8bb84dc1 100644 --- a/GPU/Common/GPUStateUtils.cpp +++ b/GPU/Common/GPUStateUtils.cpp @@ -231,6 +231,7 @@ StencilValueType ReplaceAlphaWithStencilType() { case GE_FORMAT_8888: case GE_FORMAT_INVALID: case GE_FORMAT_DEPTH16: + case GE_FORMAT_CLUT8: switch (gstate.getStencilOpZPass()) { case GE_STENCILOP_REPLACE: // TODO: Could detect zero here and force ZERO - less uniform updates? @@ -859,66 +860,63 @@ static inline bool blendColorSimilar(uint32_t a, uint32_t b, int margin = 25) { static bool SimulateLogicOpIfNeeded(BlendFactor &srcBlend, BlendFactor &dstBlend, BlendEq &blendEq) { // Note: our shader solution applies logic ops BEFORE blending, not correctly after. // This is however fine for the most common ones, like CLEAR/NOOP/SET, etc. - if (!gstate_c.Supports(GPU_SUPPORTS_LOGIC_OP)) { - if (gstate.isLogicOpEnabled()) { - switch (gstate.getLogicOp()) { - case GE_LOGIC_CLEAR: - srcBlend = BlendFactor::ZERO; - dstBlend = BlendFactor::ZERO; - blendEq = BlendEq::ADD; - return true; - case GE_LOGIC_AND: - case GE_LOGIC_AND_REVERSE: - WARN_LOG_REPORT_ONCE(d3dLogicOpAnd, G3D, "Unsupported AND logic op: %x", gstate.getLogicOp()); - break; - case GE_LOGIC_COPY: - // This is the same as off. - break; - case GE_LOGIC_COPY_INVERTED: - // Handled in the shader. - break; - case GE_LOGIC_AND_INVERTED: - case GE_LOGIC_NOR: - case GE_LOGIC_NAND: - case GE_LOGIC_EQUIV: - // Handled in the shader. - WARN_LOG_REPORT_ONCE(d3dLogicOpAndInverted, G3D, "Attempted invert for logic op: %x", gstate.getLogicOp()); - break; - case GE_LOGIC_INVERTED: - srcBlend = BlendFactor::ONE; - dstBlend = BlendFactor::ONE; - blendEq = BlendEq::SUBTRACT; - WARN_LOG_REPORT_ONCE(d3dLogicOpInverted, G3D, "Attempted inverse for logic op: %x", gstate.getLogicOp()); - return true; - case GE_LOGIC_NOOP: - srcBlend = BlendFactor::ZERO; - dstBlend = BlendFactor::ONE; - blendEq = BlendEq::ADD; - return true; - case GE_LOGIC_XOR: - WARN_LOG_REPORT_ONCE(d3dLogicOpOrXor, G3D, "Unsupported XOR logic op: %x", gstate.getLogicOp()); - break; - case GE_LOGIC_OR: - case GE_LOGIC_OR_INVERTED: - // Inverted in shader. - srcBlend = BlendFactor::ONE; - dstBlend = BlendFactor::ONE; - blendEq = BlendEq::ADD; - WARN_LOG_REPORT_ONCE(d3dLogicOpOr, G3D, "Attempted or for logic op: %x", gstate.getLogicOp()); - return true; - case GE_LOGIC_OR_REVERSE: - WARN_LOG_REPORT_ONCE(d3dLogicOpOrReverse, G3D, "Unsupported OR REVERSE logic op: %x", gstate.getLogicOp()); - break; - case GE_LOGIC_SET: - srcBlend = BlendFactor::ONE; - dstBlend = BlendFactor::ONE; - blendEq = BlendEq::ADD; - WARN_LOG_REPORT_ONCE(d3dLogicOpSet, G3D, "Attempted set for logic op: %x", gstate.getLogicOp()); - return true; - } + if (!gstate_c.Supports(GPU_SUPPORTS_LOGIC_OP) && gstate.isLogicOpEnabled()) { + switch (gstate.getLogicOp()) { + case GE_LOGIC_CLEAR: + srcBlend = BlendFactor::ZERO; + dstBlend = BlendFactor::ZERO; + blendEq = BlendEq::ADD; + return true; + case GE_LOGIC_AND: + case GE_LOGIC_AND_REVERSE: + WARN_LOG_REPORT_ONCE(d3dLogicOpAnd, G3D, "Unsupported AND logic op: %x", gstate.getLogicOp()); + break; + case GE_LOGIC_COPY: + // This is the same as off. + break; + case GE_LOGIC_COPY_INVERTED: + // Handled in the shader. + break; + case GE_LOGIC_AND_INVERTED: + case GE_LOGIC_NOR: + case GE_LOGIC_NAND: + case GE_LOGIC_EQUIV: + // Handled in the shader. + WARN_LOG_REPORT_ONCE(d3dLogicOpAndInverted, G3D, "Attempted invert for logic op: %x", gstate.getLogicOp()); + break; + case GE_LOGIC_INVERTED: + srcBlend = BlendFactor::ONE; + dstBlend = BlendFactor::ONE; + blendEq = BlendEq::SUBTRACT; + WARN_LOG_REPORT_ONCE(d3dLogicOpInverted, G3D, "Attempted inverse for logic op: %x", gstate.getLogicOp()); + return true; + case GE_LOGIC_NOOP: + srcBlend = BlendFactor::ZERO; + dstBlend = BlendFactor::ONE; + blendEq = BlendEq::ADD; + return true; + case GE_LOGIC_XOR: + WARN_LOG_REPORT_ONCE(d3dLogicOpOrXor, G3D, "Unsupported XOR logic op: %x", gstate.getLogicOp()); + break; + case GE_LOGIC_OR: + case GE_LOGIC_OR_INVERTED: + // Inverted in shader. + srcBlend = BlendFactor::ONE; + dstBlend = BlendFactor::ONE; + blendEq = BlendEq::ADD; + WARN_LOG_REPORT_ONCE(d3dLogicOpOr, G3D, "Attempted or for logic op: %x", gstate.getLogicOp()); + return true; + case GE_LOGIC_OR_REVERSE: + WARN_LOG_REPORT_ONCE(d3dLogicOpOrReverse, G3D, "Unsupported OR REVERSE logic op: %x", gstate.getLogicOp()); + break; + case GE_LOGIC_SET: + srcBlend = BlendFactor::ONE; + dstBlend = BlendFactor::ONE; + blendEq = BlendEq::ADD; + WARN_LOG_REPORT_ONCE(d3dLogicOpSet, G3D, "Attempted set for logic op: %x", gstate.getLogicOp()); + return true; } } - return false; } @@ -1080,6 +1078,12 @@ static void ConvertBlendState(GenericBlendState &blendState, bool forceReplaceBl case REPLACE_BLEND_NO: // We may still want to do something about stencil -> alpha. ApplyStencilReplaceAndLogicOpIgnoreBlend(replaceAlphaWithStencil, blendState); + + if (forceReplaceBlend) { + // If this is true, the logic and mask replacements will be applied, at least. In that case, + // we should not apply any logic op simulation. + blendState.simulateLogicOpType = LOGICOPTYPE_NORMAL; + } return; case REPLACE_BLEND_BLUE_TO_ALPHA: diff --git a/GPU/Common/ShaderId.cpp b/GPU/Common/ShaderId.cpp index 7ca42ccba6..6345816604 100644 --- a/GPU/Common/ShaderId.cpp +++ b/GPU/Common/ShaderId.cpp @@ -349,13 +349,10 @@ void ComputeFragmentShaderID(FShaderID *id_out, const ComputedPipelineState &pip id.SetBit(FS_BIT_COLOR_WRITEMASK, colorWriteMask); - if (g_Config.bVendorBugChecksEnabled) { - if (bugs.Has(Draw::Bugs::NO_DEPTH_CANNOT_DISCARD_STENCIL)) { - id.SetBit(FS_BIT_NO_DEPTH_CANNOT_DISCARD_STENCIL, !IsStencilTestOutputDisabled() && !gstate.isDepthWriteEnabled()); - } else if (bugs.Has(Draw::Bugs::MALI_STENCIL_DISCARD_BUG) && PSP_CoreParameter().compat.flags().MaliDepthStencilBugWorkaround) { - // Very similar driver bug to the Adreno one, with the same workaround (though might look into if there are cheaper ones!) - // Keeping the conditions separate since it can probably be made tighter. - id.SetBit(FS_BIT_NO_DEPTH_CANNOT_DISCARD_STENCIL, !IsStencilTestOutputDisabled() && (!gstate.isDepthTestEnabled() || !gstate.isDepthWriteEnabled())); + if (g_Config.bVendorBugChecksEnabled && bugs.Has(Draw::Bugs::NO_DEPTH_CANNOT_DISCARD_STENCIL)) { + bool stencilWithoutDepth = !IsStencilTestOutputDisabled() && (!gstate.isDepthTestEnabled() || !gstate.isDepthWriteEnabled()); + if (stencilWithoutDepth) { + id.SetBit(FS_BIT_NO_DEPTH_CANNOT_DISCARD_STENCIL, stencilWithoutDepth); } } } diff --git a/GPU/Common/StencilCommon.cpp b/GPU/Common/StencilCommon.cpp index 81749ef678..561c6a2857 100644 --- a/GPU/Common/StencilCommon.cpp +++ b/GPU/Common/StencilCommon.cpp @@ -171,6 +171,7 @@ bool FramebufferManagerCommon::PerformStencilUpload(u32 addr, int size, StencilU break; case GE_FORMAT_INVALID: case GE_FORMAT_DEPTH16: + case GE_FORMAT_CLUT8: // Inconceivable. _assert_(false); break; diff --git a/GPU/Common/TextureCacheCommon.cpp b/GPU/Common/TextureCacheCommon.cpp index 66d70f7c02..127d491010 100644 --- a/GPU/Common/TextureCacheCommon.cpp +++ b/GPU/Common/TextureCacheCommon.cpp @@ -37,6 +37,7 @@ #include "GPU/Common/ShaderId.h" #include "GPU/Common/GPUStateUtils.h" #include "GPU/Debugger/Debugger.h" +#include "GPU/Debugger/Record.h" #include "GPU/GPUCommon.h" #include "GPU/GPUInterface.h" #include "GPU/GPUState.h" @@ -292,11 +293,18 @@ SamplerCacheKey TextureCacheCommon::GetSamplingParams(int maxLevel, const TexCac SamplerCacheKey TextureCacheCommon::GetFramebufferSamplingParams(u16 bufferWidth, u16 bufferHeight) { SamplerCacheKey key = GetSamplingParams(0, nullptr); + // In case auto max quality was on, restore min filt. Another fix for water in Outrun. + if (g_Config.iTexFiltering == TEX_FILTER_AUTO_MAX_QUALITY) { + int minFilt = gstate.texfilter & 0x7; + key.minFilt = minFilt & 1; + } + // Kill any mipmapping settings. key.mipEnable = false; key.mipFilt = false; key.aniso = 0.0; key.maxLevel = 0.0f; + key.lodBias = 0.0f; // Often the framebuffer will not match the texture size. We'll wrap/clamp in the shader in that case. int w = gstate.getTextureWidth(0); @@ -1260,14 +1268,17 @@ void TextureCacheCommon::LoadClut(u32 clutAddr, u32 loadBytes) { // It's possible for a game to load CLUT outside valid memory without crashing, should result in zeroes. u32 bytes = Memory::ValidSize(clutAddr, loadBytes); - if (clutRenderAddress_ != 0xFFFFFFFF && PSP_CoreParameter().compat.flags().AllowDownloadCLUT) { + bool performDownload = PSP_CoreParameter().compat.flags().AllowDownloadCLUT; + if (GPURecord::IsActive()) + performDownload = true; + if (clutRenderAddress_ != 0xFFFFFFFF && performDownload) { framebufferManager_->DownloadFramebufferForClut(clutRenderAddress_, clutRenderOffset_ + bytes); Memory::MemcpyUnchecked(clutBufRaw_, clutAddr, bytes); if (bytes < loadBytes) { memset((u8 *)clutBufRaw_ + bytes, 0x00, loadBytes - bytes); } } else { - // Here we could check for clutRenderAddres_ != 0xFFFFFFFF and zero the CLUT or something, + // Here we could check for clutRenderAddress_ != 0xFFFFFFFF and zero the CLUT or something, // but choosing not to for now. Though the results of loading the CLUT from RAM here is // almost certainly going to be bogus. #ifdef _M_SSE @@ -1986,6 +1997,9 @@ static bool CanDepalettize(GETextureFormat texFormat, GEBufferFormat bufferForma return true; } break; + case GE_FORMAT_CLUT8: + // Shouldn't happen here. + return false; } WARN_LOG(G3D, "Invalid CLUT/framebuffer combination: %s vs %s", GeTextureFormatToString(texFormat), GeBufferFormatToString(bufferFormat)); return false; diff --git a/GPU/Common/VertexDecoderArm.cpp b/GPU/Common/VertexDecoderArm.cpp index e3d5b11f6b..2b93b563f8 100644 --- a/GPU/Common/VertexDecoderArm.cpp +++ b/GPU/Common/VertexDecoderArm.cpp @@ -872,22 +872,14 @@ void VertexDecoderJitCache::Jit_NormalFloat() { STMIA(scratchReg, false, 3, tempReg1, tempReg2, tempReg3); } -// Through expands into floats, always. Might want to look at changing this. void VertexDecoderJitCache::Jit_PosS8Through() { - DEBUG_LOG_REPORT_ONCE(vertexS8Through, G3D, "Using S8 positions in throughmode"); _dbg_assert_msg_(fpScratchReg + 1 == fpScratchReg2, "VertexDecoder fpScratchRegs must be in order."); _dbg_assert_msg_(fpScratchReg2 + 1 == fpScratchReg3, "VertexDecoder fpScratchRegs must be in order."); - // TODO: SIMD - LDRSB(tempReg1, srcReg, dec_->posoff); - LDRSB(tempReg2, srcReg, dec_->posoff + 1); - LDRB(tempReg3, srcReg, dec_->posoff + 2); - static const ARMReg tr[3] = { tempReg1, tempReg2, tempReg3 }; - static const ARMReg fr[3] = { fpScratchReg, fpScratchReg2, fpScratchReg3 }; + // 8-bit positions in throughmode always decode to 0, depth included. + VEOR(neonScratchReg, neonScratchReg, neonScratchReg); + VEOR(neonScratchReg2, neonScratchReg, neonScratchReg); ADD(scratchReg, dstReg, dec_->decFmt.posoff); - VMOV(neonScratchReg, tempReg1, tempReg2); - VMOV(neonScratchReg2, tempReg3, tempReg3); - VCVT(F_32 | I_SIGNED, neonScratchRegQ, neonScratchRegQ); VST1(F_32, neonScratchReg, scratchReg, 2, ALIGN_NONE); } diff --git a/GPU/Common/VertexDecoderArm64.cpp b/GPU/Common/VertexDecoderArm64.cpp index 5ff1d605f3..0ad04dbe5c 100644 --- a/GPU/Common/VertexDecoderArm64.cpp +++ b/GPU/Common/VertexDecoderArm64.cpp @@ -668,15 +668,11 @@ void VertexDecoderJitCache::Jit_PosFloat() { } void VertexDecoderJitCache::Jit_PosS8Through() { - LDRSB(INDEX_UNSIGNED, tempReg1, srcReg, dec_->posoff); - LDRSB(INDEX_UNSIGNED, tempReg2, srcReg, dec_->posoff + 1); - LDRB(INDEX_UNSIGNED, tempReg3, srcReg, dec_->posoff + 2); - fp.SCVTF(fpScratchReg, tempReg1); - fp.SCVTF(fpScratchReg2, tempReg2); - fp.SCVTF(fpScratchReg3, tempReg3); + // 8-bit positions in throughmode always decode to 0, depth included. + fp.EOR(fpScratchReg, fpScratchReg, fpScratchReg); STR(INDEX_UNSIGNED, fpScratchReg, dstReg, dec_->decFmt.posoff); - STR(INDEX_UNSIGNED, fpScratchReg2, dstReg, dec_->decFmt.posoff + 4); - STR(INDEX_UNSIGNED, fpScratchReg3, dstReg, dec_->decFmt.posoff + 8); + STR(INDEX_UNSIGNED, fpScratchReg, dstReg, dec_->decFmt.posoff + 4); + STR(INDEX_UNSIGNED, fpScratchReg, dstReg, dec_->decFmt.posoff + 8); } void VertexDecoderJitCache::Jit_PosS16Through() { diff --git a/GPU/Common/VertexDecoderCommon.cpp b/GPU/Common/VertexDecoderCommon.cpp index a9b4a1039a..62eb20ef84 100644 --- a/GPU/Common/VertexDecoderCommon.cpp +++ b/GPU/Common/VertexDecoderCommon.cpp @@ -773,14 +773,20 @@ void VertexDecoder::Step_PosFloatSkin() const Vec3ByMatrix43(pos, fn, skinMatrix); } -void VertexDecoder::Step_PosS8Through() const -{ +void VertexDecoder::Step_PosInvalid() const { + // Invalid positions are just culled. Simulate by forcing invalid values. float *v = (float *)(decoded_ + decFmt.posoff); - const s8 *sv = (const s8 *)(ptr_ + posoff); - const u8 *uv = (const u8 *)(ptr_ + posoff); - v[0] = sv[0]; - v[1] = sv[1]; - v[2] = uv[2]; + v[0] = std::numeric_limits::infinity(); + v[1] = std::numeric_limits::infinity(); + v[2] = std::numeric_limits::infinity(); +} + +void VertexDecoder::Step_PosS8Through() const { + // 8-bit positions in throughmode always decode to 0, depth included. + float *v = (float *)(decoded_ + decFmt.posoff); + v[0] = 0; + v[1] = 0; + v[2] = 0; } void VertexDecoder::Step_PosS16Through() const @@ -1023,35 +1029,35 @@ static const StepFunction nrmstep_morphskin[4] = { }; static const StepFunction posstep[4] = { - &VertexDecoder::Step_PosS8, + &VertexDecoder::Step_PosInvalid, &VertexDecoder::Step_PosS8, &VertexDecoder::Step_PosS16, &VertexDecoder::Step_PosFloat, }; static const StepFunction posstep_skin[4] = { - &VertexDecoder::Step_PosS8Skin, + &VertexDecoder::Step_PosInvalid, &VertexDecoder::Step_PosS8Skin, &VertexDecoder::Step_PosS16Skin, &VertexDecoder::Step_PosFloatSkin, }; static const StepFunction posstep_morph[4] = { - &VertexDecoder::Step_PosS8Morph, + &VertexDecoder::Step_PosInvalid, &VertexDecoder::Step_PosS8Morph, &VertexDecoder::Step_PosS16Morph, &VertexDecoder::Step_PosFloatMorph, }; static const StepFunction posstep_morph_skin[4] = { - &VertexDecoder::Step_PosS8MorphSkin, + &VertexDecoder::Step_PosInvalid, &VertexDecoder::Step_PosS8MorphSkin, &VertexDecoder::Step_PosS16MorphSkin, &VertexDecoder::Step_PosFloatMorphSkin, }; static const StepFunction posstep_through[4] = { - &VertexDecoder::Step_PosS8Through, + &VertexDecoder::Step_PosInvalid, &VertexDecoder::Step_PosS8Through, &VertexDecoder::Step_PosS16Through, &VertexDecoder::Step_PosFloatThrough, @@ -1224,9 +1230,8 @@ void VertexDecoder::SetVertexType(u32 fmt, const VertexDecoderOptions &options, bool reportNoPos = false; if (!pos) { reportNoPos = true; - pos = 1; } - if (pos) { // there's always a position + if (pos >= 0) { // there's always a position size = align(size, posalign[pos]); posoff = size; size += possize[pos]; diff --git a/GPU/Common/VertexDecoderCommon.h b/GPU/Common/VertexDecoderCommon.h index 983f76f9b2..6a06093902 100644 --- a/GPU/Common/VertexDecoderCommon.h +++ b/GPU/Common/VertexDecoderCommon.h @@ -433,6 +433,7 @@ public: void Step_PosS16MorphSkin() const; void Step_PosFloatMorphSkin() const; + void Step_PosInvalid() const; void Step_PosS8Through() const; void Step_PosS16Through() const; void Step_PosFloatThrough() const; diff --git a/GPU/Common/VertexDecoderX86.cpp b/GPU/Common/VertexDecoderX86.cpp index d684078989..13aabe2df3 100644 --- a/GPU/Common/VertexDecoderX86.cpp +++ b/GPU/Common/VertexDecoderX86.cpp @@ -1345,14 +1345,9 @@ void VertexDecoderJitCache::Jit_NormalFloatSkin() { // Through expands into floats, always. Might want to look at changing this. void VertexDecoderJitCache::Jit_PosS8Through() { - DEBUG_LOG_REPORT_ONCE(vertexS8Through, G3D, "Using S8 positions in throughmode"); // SIMD doesn't really matter since this isn't useful on hardware. + XORPS(fpScratchReg, R(fpScratchReg)); for (int i = 0; i < 3; i++) { - if (i == 2) - MOVZX(32, 8, tempReg1, MDisp(srcReg, dec_->posoff + i)); - else - MOVSX(32, 8, tempReg1, MDisp(srcReg, dec_->posoff + i)); - CVTSI2SS(fpScratchReg, R(tempReg1)); MOVSS(MDisp(dstReg, dec_->decFmt.posoff + i * 4), fpScratchReg); } } diff --git a/GPU/Common/VertexShaderGenerator.cpp b/GPU/Common/VertexShaderGenerator.cpp index ccda9af86b..16cce87d0f 100644 --- a/GPU/Common/VertexShaderGenerator.cpp +++ b/GPU/Common/VertexShaderGenerator.cpp @@ -142,10 +142,11 @@ bool GenerateVertexShader(const VShaderID &id, char *buffer, const ShaderLanguag if (gl_extensions.EXT_gpu_shader4) { gl_exts.push_back("#extension GL_EXT_gpu_shader4 : enable"); } - if (gl_extensions.EXT_clip_cull_distance && id.Bit(VS_BIT_VERTEX_RANGE_CULLING)) { + bool useClamp = gstate_c.Supports(GPU_SUPPORTS_DEPTH_CLAMP) && !id.Bit(VS_BIT_IS_THROUGH); + if (gl_extensions.EXT_clip_cull_distance && (id.Bit(VS_BIT_VERTEX_RANGE_CULLING) || useClamp)) { gl_exts.push_back("#extension GL_EXT_clip_cull_distance : enable"); } - if (gl_extensions.APPLE_clip_distance && id.Bit(VS_BIT_VERTEX_RANGE_CULLING)) { + if (gl_extensions.APPLE_clip_distance && (id.Bit(VS_BIT_VERTEX_RANGE_CULLING) || useClamp)) { gl_exts.push_back("#extension GL_APPLE_clip_distance : enable"); } if (gl_extensions.ARB_cull_distance && id.Bit(VS_BIT_VERTEX_RANGE_CULLING)) { @@ -227,6 +228,10 @@ bool GenerateVertexShader(const VShaderID &id, char *buffer, const ShaderLanguag bool texCoordInVec3 = false; bool vertexRangeCulling = id.Bit(VS_BIT_VERTEX_RANGE_CULLING) && !isModeThrough; + bool clipClampedDepth = !isModeThrough && gstate_c.Supports(GPU_SUPPORTS_DEPTH_CLAMP) && gstate_c.Supports(GPU_SUPPORTS_CLIP_DISTANCE); + const char *vertexRangeClipSuffix = "[0]"; + if (vertexRangeCulling && clipClampedDepth) + vertexRangeClipSuffix = "[2]"; if (compat.shaderLanguage == GLSL_VULKAN) { WRITE(p, "\n"); @@ -419,8 +424,15 @@ bool GenerateVertexShader(const VShaderID &id, char *buffer, const ShaderLanguag WRITE(p, " vec4 gl_Position : POSITION;\n"); } else { WRITE(p, " vec4 gl_Position : SV_Position;\n"); - if (vertexRangeCulling && gstate_c.Supports(GPU_SUPPORTS_CLIP_DISTANCE)) { - WRITE(p, " float gl_ClipDistance : SV_ClipDistance0;\n"); + bool clipRange = vertexRangeCulling && gstate_c.Supports(GPU_SUPPORTS_CLIP_DISTANCE); + if (clipClampedDepth && clipRange) { + WRITE(p, " float3 gl_ClipDistance : SV_ClipDistance;\n"); + vertexRangeClipSuffix = ".z"; + } else if (clipClampedDepth) { + WRITE(p, " float2 gl_ClipDistance : SV_ClipDistance;\n"); + } else if (clipRange) { + WRITE(p, " float gl_ClipDistance : SV_ClipDistance;\n"); + vertexRangeClipSuffix = ""; } if (vertexRangeCulling && gstate_c.Supports(GPU_SUPPORTS_CULL_DISTANCE)) { WRITE(p, " float2 gl_CullDistance : SV_CullDistance0;\n"); @@ -1177,8 +1189,37 @@ bool GenerateVertexShader(const VShaderID &id, char *buffer, const ShaderLanguag WRITE(p, " %sv_fogdepth = (viewPos.z + u_fogcoef.x) * u_fogcoef.y;\n", compat.vsOutPrefix); } - if (vertexRangeCulling && !IsVRBuild()) { + if (clipClampedDepth || (vertexRangeCulling && !IsVRBuild())) { WRITE(p, " vec3 projPos = outPos.xyz / outPos.w;\n"); + } + + if (clipClampedDepth) { + const char *clip0 = compat.shaderLanguage == HLSL_D3D11 ? ".x" : "[0]"; + const char *clip1 = compat.shaderLanguage == HLSL_D3D11 ? ".y" : "[1]"; + WRITE(p, " mediump float integerZ = projPos.z * u_depthRange.x + u_depthRange.y;\n"); + + // This should clip against minz, but only when it's above zero. + if (ShaderLanguageIsOpenGL(compat.shaderLanguage)) { + // On OpenGL/GLES, these values account for the -1 -> 1 range. + WRITE(p, " if (u_depthRange.y - u_depthRange.x >= 1.0) {\n"); + } else { + // Everywhere else, it's 0 -> 1, simpler. + WRITE(p, " if (u_depthRange.y >= 1.0) {\n"); + } + WRITE(p, " %sgl_ClipDistance%s = integerZ;\n", compat.vsOutPrefix, clip0); + WRITE(p, " } else {\n"); + WRITE(p, " %sgl_ClipDistance%s = 0.0;\n", compat.vsOutPrefix, clip0); + WRITE(p, " }\n"); + + // This is similar, but for maxz when it's below 65535.0. -1/0 don't matter here. + WRITE(p, " if (u_depthRange.x + u_depthRange.y <= 65534.0) {\n"); + WRITE(p, " %sgl_ClipDistance%s = 65535.0 - integerZ;\n", compat.vsOutPrefix, clip1); + WRITE(p, " } else {\n"); + WRITE(p, " %sgl_ClipDistance%s = 0.0;\n", compat.vsOutPrefix, clip1); + WRITE(p, " }\n"); + } + + if (vertexRangeCulling && !IsVRBuild()) { WRITE(p, " float projZ = (projPos.z - u_depthRange.z) * u_depthRange.w;\n"); // Vertex range culling doesn't happen when Z clips, note sign of w is important. WRITE(p, " if (u_cullRangeMin.w <= 0.0 || projZ * outPos.w > -outPos.w) {\n"); @@ -1194,12 +1235,11 @@ bool GenerateVertexShader(const VShaderID &id, char *buffer, const ShaderLanguag WRITE(p, " }\n"); WRITE(p, " }\n"); - const char *clip0 = compat.shaderLanguage == HLSL_D3D11 ? "" : "[0]"; const char *cull0 = compat.shaderLanguage == HLSL_D3D11 ? ".x" : "[0]"; const char *cull1 = compat.shaderLanguage == HLSL_D3D11 ? ".y" : "[1]"; if (gstate_c.Supports(GPU_SUPPORTS_CLIP_DISTANCE)) { // TODO: Not rectangles... - WRITE(p, " %sgl_ClipDistance%s = projZ * outPos.w + outPos.w;\n", compat.vsOutPrefix, clip0); + WRITE(p, " %sgl_ClipDistance%s = projZ * outPos.w + outPos.w;\n", compat.vsOutPrefix, vertexRangeClipSuffix); } if (gstate_c.Supports(GPU_SUPPORTS_CULL_DISTANCE)) { // Cull any triangle fully outside in the same direction when depth clamp enabled. diff --git a/GPU/D3D11/GPU_D3D11.cpp b/GPU/D3D11/GPU_D3D11.cpp index 3cfe0c4b37..bfbd12f190 100644 --- a/GPU/D3D11/GPU_D3D11.cpp +++ b/GPU/D3D11/GPU_D3D11.cpp @@ -82,7 +82,7 @@ GPU_D3D11::GPU_D3D11(GraphicsContext *gfxCtx, Draw::DrawContext *draw) // No need to flush before the tex scale/offset commands if we are baking // the tex scale/offset into the vertices anyway. UpdateCmdInfo(); - CheckGPUFeatures(); + gstate_c.featureFlags = CheckGPUFeatures(); BuildReportingInfo(); @@ -100,40 +100,16 @@ GPU_D3D11::~GPU_D3D11() { stockD3D11.Destroy(); } -void GPU_D3D11::CheckGPUFeatures() { - u32 features = 0; - - features |= GPU_SUPPORTS_BLEND_MINMAX; +u32 GPU_D3D11::CheckGPUFeatures() const { + u32 features = GPUCommon::CheckGPUFeatures(); // Accurate depth is required because the Direct3D API does not support inverse Z. // So we cannot incorrectly use the viewport transform as the depth range on Direct3D. // TODO: Breaks text in PaRappa for some reason? features |= GPU_SUPPORTS_ACCURATE_DEPTH; -#ifndef _M_ARM - // TODO: Do proper feature detection - features |= GPU_SUPPORTS_ANISOTROPY; -#endif - - features |= GPU_SUPPORTS_DEPTH_TEXTURE; - features |= GPU_SUPPORTS_TEXTURE_NPOT; - if (draw_->GetDeviceCaps().dualSourceBlend) - features |= GPU_SUPPORTS_DUALSOURCE_BLEND; if (draw_->GetDeviceCaps().depthClampSupported) features |= GPU_SUPPORTS_DEPTH_CLAMP; - if (draw_->GetDeviceCaps().clipDistanceSupported) - features |= GPU_SUPPORTS_CLIP_DISTANCE; - if (draw_->GetDeviceCaps().cullDistanceSupported) - features |= GPU_SUPPORTS_CULL_DISTANCE; - if (!draw_->GetBugs().Has(Draw::Bugs::BROKEN_NAN_IN_CONDITIONAL)) { - // Ignore the compat setting if clip and cull are both enabled. - // When supported, we can do the depth side of range culling more correctly. - const bool supported = draw_->GetDeviceCaps().clipDistanceSupported && draw_->GetDeviceCaps().cullDistanceSupported; - const bool disabled = PSP_CoreParameter().compat.flags().DisableRangeCulling; - if (supported || !disabled) { - features |= GPU_SUPPORTS_VS_RANGE_CULLING; - } - } features |= GPU_SUPPORTS_TEXTURE_FLOAT; features |= GPU_SUPPORTS_INSTANCE_RENDERING; @@ -146,10 +122,6 @@ void GPU_D3D11::CheckGPUFeatures() { features |= GPU_SUPPORTS_16BIT_FORMATS; } - if (draw_->GetDeviceCaps().logicOpSupported) { - features |= GPU_SUPPORTS_LOGIC_OP; - } - if (!g_Config.bHighQualityDepth && (features & GPU_SUPPORTS_ACCURATE_DEPTH) != 0) { features |= GPU_SCALE_DEPTH_FROM_24BIT_TO_16BIT; } else if (PSP_CoreParameter().compat.flags().PixelDepthRounding) { @@ -164,11 +136,7 @@ void GPU_D3D11::CheckGPUFeatures() { features |= GPU_USE_DEPTH_RANGE_HACK; } - if (PSP_CoreParameter().compat.flags().ClearToRAM) { - features |= GPU_USE_CLEAR_RAM_HACK; - } - - gstate_c.featureFlags = features; + return features; } // Needs to be called on GPU thread, not reporting thread. @@ -206,7 +174,7 @@ void GPU_D3D11::BeginHostFrame() { GPUCommon::BeginHostFrame(); UpdateCmdInfo(); if (resized_) { - CheckGPUFeatures(); + gstate_c.featureFlags = CheckGPUFeatures(); framebufferManager_->Resized(); drawEngine_.Resized(); textureCache_->NotifyConfigChanged(); diff --git a/GPU/D3D11/GPU_D3D11.h b/GPU/D3D11/GPU_D3D11.h index d1e5287782..e45c1ab79d 100644 --- a/GPU/D3D11/GPU_D3D11.h +++ b/GPU/D3D11/GPU_D3D11.h @@ -36,7 +36,7 @@ public: GPU_D3D11(GraphicsContext *gfxCtx, Draw::DrawContext *draw); ~GPU_D3D11(); - void CheckGPUFeatures() override; + u32 CheckGPUFeatures() const override; void PreExecuteOp(u32 op, u32 diff) override; void ExecuteOp(u32 op, u32 diff) override; diff --git a/GPU/D3D11/StateMappingD3D11.cpp b/GPU/D3D11/StateMappingD3D11.cpp index 0cee52a155..f4ce888f7b 100644 --- a/GPU/D3D11/StateMappingD3D11.cpp +++ b/GPU/D3D11/StateMappingD3D11.cpp @@ -153,15 +153,16 @@ void DrawEngineD3D11::ApplyDrawState(int prim) { // We ignore the logicState on D3D since there's no support, the emulation of it is blend-and-shader only. if (pipelineState_.FramebufferRead()) { - bool fboTexNeedsBind = false; - ApplyFramebufferRead(&fboTexNeedsBind); + FBOTexState fboTexBindState = FBO_TEX_NONE; + ApplyFramebufferRead(&fboTexBindState); // The shader takes over the responsibility for blending, so recompute. ApplyStencilReplaceAndLogicOpIgnoreBlend(blendState.replaceAlphaWithStencil, blendState); - if (fboTexNeedsBind) { + if (fboTexBindState == FBO_TEX_COPY_BIND_TEX) { framebufferManager_->BindFramebufferAsColorTexture(1, framebufferManager_->GetCurrentRenderVFB(), BINDFBCOLOR_MAY_COPY); // No sampler required, we do a plain Load in the pixel shader. fboTexBound_ = true; + fboTexBindState = FBO_TEX_NONE; framebufferManager_->RebindFramebuffer("RebindFramebuffer - ApplyDrawState"); // Must dirty blend state here so we re-copy next time. Example: Lunar's spell effects. diff --git a/GPU/Debugger/Debugger.cpp b/GPU/Debugger/Debugger.cpp index fb88fdca2f..b505521631 100644 --- a/GPU/Debugger/Debugger.cpp +++ b/GPU/Debugger/Debugger.cpp @@ -18,6 +18,7 @@ #include #include "Common/Log.h" #include "Common/StringUtils.h" +#include "Common/TimeUtil.h" #include "GPU/GPU.h" #include "GPU/Debugger/Breakpoints.h" #include "GPU/Debugger/Debugger.h" @@ -35,6 +36,8 @@ static int primsLastFrame = 0; static int primsThisFrame = 0; static int thisFlipNum = 0; +static double lastStepTime = -1.0; + static std::vector> restrictPrimRanges; static std::string restrictPrimRule; @@ -56,6 +59,7 @@ void SetActive(bool flag) { breakNext = BreakNext::NONE; breakAtCount = -1; GPUStepping::ResumeFromStepping(); + lastStepTime = -1.0; } } @@ -79,6 +83,7 @@ void SetBreakNext(BreakNext next) { GPUBreakpoints::AddCmdBreakpoint(GE_CMD_SPLINE, true); } GPUStepping::ResumeFromStepping(); + lastStepTime = next == BreakNext::NONE ? -1.0 : time_now_d(); } void SetBreakCount(int c, bool relative) { @@ -130,7 +135,12 @@ bool NotifyCommand(u32 pc) { GPUBreakpoints::ClearTempBreakpoints(); auto info = gpuDebug->DissassembleOp(pc); - NOTICE_LOG(G3D, "Waiting at %08x, %s", pc, info.desc.c_str()); + if (lastStepTime >= 0.0) { + NOTICE_LOG(G3D, "Waiting at %08x, %s (%fms)", pc, info.desc.c_str(), (time_now_d() - lastStepTime) * 1000.0); + lastStepTime = -1.0; + } else { + NOTICE_LOG(G3D, "Waiting at %08x, %s", pc, info.desc.c_str()); + } GPUStepping::EnterStepping(); } @@ -141,7 +151,12 @@ void NotifyDraw() { if (!active) return; if (breakNext == BreakNext::DRAW && !GPUStepping::IsStepping()) { - NOTICE_LOG(G3D, "Waiting at a draw"); + if (lastStepTime >= 0.0) { + NOTICE_LOG(G3D, "Waiting at a draw (%fms)", (time_now_d() - lastStepTime) * 1000.0); + lastStepTime = -1.0; + } else { + NOTICE_LOG(G3D, "Waiting at a draw"); + } GPUStepping::EnterStepping(); } } diff --git a/GPU/Debugger/Playback.cpp b/GPU/Debugger/Playback.cpp index aa34ce3ccf..c47eba66b3 100644 --- a/GPU/Debugger/Playback.cpp +++ b/GPU/Debugger/Playback.cpp @@ -298,6 +298,7 @@ private: void Registers(u32 ptr, u32 sz); void Vertices(u32 ptr, u32 sz); void Indices(u32 ptr, u32 sz); + void ClutAddr(u32 ptr, u32 sz); void Clut(u32 ptr, u32 sz); void TransferSrc(u32 ptr, u32 sz); void Memset(u32 ptr, u32 sz); @@ -308,6 +309,8 @@ private: void Display(u32 ptr, u32 sz); u32 execMemcpyDest = 0; + u32 execClutAddr = 0; + u32 execClutFlags = 0; u32 execListBuf = 0; u32 execListPos = 0; u32 execListID = 0; @@ -472,15 +475,40 @@ void DumpExecute::Indices(u32 ptr, u32 sz) { execListQueue.push_back((GE_CMD_IADDR << 24) | (psp & 0x00FFFFFF)); } -void DumpExecute::Clut(u32 ptr, u32 sz) { - u32 psp = mapping_.Map(ptr, sz, std::bind(&DumpExecute::SyncStall, this)); - if (psp == 0) { - ERROR_LOG(SYSTEM, "Unable to allocate for clut"); - return; - } +void DumpExecute::ClutAddr(u32 ptr, u32 sz) { + struct ClutAddrData { + u32 addr; + u32 flags; + }; + const ClutAddrData *data = (const ClutAddrData *)(pushbuf_.data() + ptr); + execClutAddr = data->addr; + execClutFlags = data->flags; +} - execListQueue.push_back((GE_CMD_CLUTADDRUPPER << 24) | ((psp >> 8) & 0x00FF0000)); - execListQueue.push_back((GE_CMD_CLUTADDR << 24) | (psp & 0x00FFFFFF)); +void DumpExecute::Clut(u32 ptr, u32 sz) { + // This is always run when we have the actual address set. + if (execClutAddr != 0) { + const bool isTarget = (execClutFlags & 1) != 0; + const bool unchangedVRAM = (execClutFlags & 2) != 0; + + // TODO: Could use drawnVRAM flag, but it can be wrong. + // Could potentially always skip if !isTarget, but playing it safe for offset texture behavior. + if (Memory::IsValidRange(execClutAddr, sz) && !unchangedVRAM && (!isTarget || !g_Config.bSoftwareRendering)) { + // Intentionally don't trigger an upload here. + Memory::MemcpyUnchecked(execClutAddr, pushbuf_.data() + ptr, sz); + } + + execClutAddr = 0; + } else { + u32 psp = mapping_.Map(ptr, sz, std::bind(&DumpExecute::SyncStall, this)); + if (psp == 0) { + ERROR_LOG(SYSTEM, "Unable to allocate for clut"); + return; + } + + execListQueue.push_back((GE_CMD_CLUTADDRUPPER << 24) | ((psp >> 8) & 0x00FF0000)); + execListQueue.push_back((GE_CMD_CLUTADDR << 24) | (psp & 0x00FFFFFF)); + } } void DumpExecute::TransferSrc(u32 ptr, u32 sz) { @@ -619,6 +647,10 @@ bool DumpExecute::Run() { Indices(cmd.ptr, cmd.sz); break; + case CommandType::CLUTADDR: + ClutAddr(cmd.ptr, cmd.sz); + break; + case CommandType::CLUT: Clut(cmd.ptr, cmd.sz); break; diff --git a/GPU/Debugger/Record.cpp b/GPU/Debugger/Record.cpp index cc63dccf72..248ba1e48c 100644 --- a/GPU/Debugger/Record.cpp +++ b/GPU/Debugger/Record.cpp @@ -37,6 +37,7 @@ #include "Core/MemMap.h" #include "Core/System.h" #include "Core/ThreadPools.h" +#include "GPU/Common/GPUDebugInterface.h" #include "GPU/GPUInterface.h" #include "GPU/GPUState.h" #include "GPU/ge_constants.h" @@ -152,8 +153,19 @@ static void BeginRecording() { u32 sz = 512 * 4; pushbuf.resize(pushbuf.size() + sz); gstate.Save((u32_le *)(pushbuf.data() + ptr)); - commands.push_back({CommandType::INIT, sz, ptr}); + + // Also save the initial CLUT. + GPUDebugBuffer clut; + if (gpuDebug->GetCurrentClut(clut)) { + sz = clut.GetStride() * clut.PixelSize(); + _assert_msg_(sz == 1024, "CLUT should be 1024 bytes"); + ptr = (u32)pushbuf.size(); + pushbuf.resize(pushbuf.size() + sz); + memcpy(pushbuf.data() + ptr, clut.GetData(), sz); + commands.push_back({ CommandType::CLUT, sz, ptr }); + } + DirtyAllVRAM(DirtyVRAMFlag::DIRTY); } @@ -308,6 +320,34 @@ static Command EmitCommandWithRAM(CommandType t, const void *p, u32 sz, u32 alig return cmd; } +static u32 GetTargetFlags(u32 addr, u32 sizeInRAM) { + const bool isTarget = lastRenderTargets.find(addr) != lastRenderTargets.end(); + + bool isDirtyVRAM = false; + bool isDrawnVRAM = false; + uint32_t start = (addr >> DIRTY_VRAM_SHIFT) & DIRTY_VRAM_MASK; + for (uint32_t i = 0; i < (sizeInRAM + DIRTY_VRAM_ROUND) >> DIRTY_VRAM_SHIFT; ++i) { + DirtyVRAMFlag flag = dirtyVRAM[start + i]; + isDirtyVRAM = isDirtyVRAM || flag != DirtyVRAMFlag::CLEAN; + isDrawnVRAM = isDrawnVRAM || flag == DirtyVRAMFlag::DRAWN; + + // Mark the VRAM clean now that it's been copied to VRAM. + if (flag == DirtyVRAMFlag::DIRTY) + dirtyVRAM[start + i] = DirtyVRAMFlag::CLEAN; + } + + // The isTarget flag is mostly used for replay of dumps on a PSP. + u32 flags = isTarget ? 1 : 0; + // The unchangedVRAM flag tells us we can skip recopying. + if (!isDirtyVRAM) + flags |= 2; + // And the drawn flag tells us this data was potentially drawn to. + if (isDrawnVRAM) + flags |= 4; + + return flags; +} + static void EmitTextureData(int level, u32 texaddr) { GETextureFormat format = gstate.getTextureFormat(); int w = gstate.getTextureWidth(level); @@ -315,7 +355,6 @@ static void EmitTextureData(int level, u32 texaddr) { int bufw = GetTextureBufw(level, texaddr, format); int extraw = w > bufw ? w - bufw : 0; u32 sizeInRAM = (textureBitsPerPixel[format] * (bufw * h + extraw)) / 8; - const bool isTarget = lastRenderTargets.find(texaddr) != lastRenderTargets.end(); CommandType type = CommandType((int)CommandType::TEXTURE0 + level); const u8 *p = Memory::GetPointerUnchecked(texaddr); @@ -330,27 +369,7 @@ static void EmitTextureData(int level, u32 texaddr) { u32 pad; }; - bool isDirtyVRAM = false; - bool isDrawnVRAM = false; - uint32_t start = (texaddr >> DIRTY_VRAM_SHIFT) & DIRTY_VRAM_MASK; - for (uint32_t i = 0; i < (sizeInRAM + DIRTY_VRAM_ROUND) >> DIRTY_VRAM_SHIFT; ++i) { - DirtyVRAMFlag flag = dirtyVRAM[start + i]; - isDirtyVRAM = isDirtyVRAM || flag != DirtyVRAMFlag::CLEAN; - isDrawnVRAM = isDrawnVRAM || flag == DirtyVRAMFlag::DRAWN; - - // Mark the VRAM clean now that it's been copied to VRAM. - if (flag == DirtyVRAMFlag::DIRTY) - dirtyVRAM[start + i] = DirtyVRAMFlag::CLEAN; - } - - // The isTarget flag is mostly used for replay of dumps on a PSP. - u32 flags = isTarget ? 1 : 0; - // The unchangedVRAM flag tells us we can skip recopying. - if (!isDirtyVRAM) - flags |= 2; - // And the drawn flag tells us this data was potentially drawn to. - if (isDrawnVRAM) - flags |= 4; + u32 flags = GetTargetFlags(texaddr, sizeInRAM); FramebufData framebuf{ texaddr, bufw, flags }; framebufData.resize(sizeof(framebuf) + bytes); memcpy(&framebufData[0], &framebuf, sizeof(framebuf)); @@ -456,12 +475,33 @@ static void EmitTransfer(u32 op) { static void EmitClut(u32 op) { u32 addr = gstate.getClutAddress(); + + // Hardware rendering may be using a framebuffer as CLUT. + // To get at this, we first run the command (normally we're called right before it has run.) + if (Memory::IsVRAMAddress(addr)) + gpuDebug->SetCmdValue(op); + // Actually should only be 0x3F, but we allow enhanced CLUTs. See #15727. u32 blocks = (op & 0x7F) == 0x40 ? 0x40 : (op & 0x3F); u32 bytes = blocks * 32; bytes = Memory::ValidSize(addr, bytes); if (bytes != 0) { + // Send the original address so VRAM can be reasoned about. + if (Memory::IsVRAMAddress(addr)) { + struct ClutAddrData { + u32 addr; + u32 flags; + }; + u32 flags = GetTargetFlags(addr, bytes); + ClutAddrData data{ addr, flags }; + + FlushRegisters(); + Command cmd{CommandType::CLUTADDR, sizeof(data), (u32)pushbuf.size()}; + pushbuf.resize(pushbuf.size() + sizeof(data)); + memcpy(pushbuf.data() + cmd.ptr, &data, sizeof(data)); + commands.push_back(cmd); + } EmitCommandWithRAM(CommandType::CLUT, Memory::GetPointerUnchecked(addr), bytes, 16); } diff --git a/GPU/Debugger/RecordFormat.h b/GPU/Debugger/RecordFormat.h index 8079136d0c..dc7abe3ec1 100644 --- a/GPU/Debugger/RecordFormat.h +++ b/GPU/Debugger/RecordFormat.h @@ -49,6 +49,7 @@ enum class CommandType : u8 { MEMCPYDEST = 7, MEMCPYDATA = 8, DISPLAY = 9, + CLUTADDR = 10, TEXTURE0 = 0x10, TEXTURE1 = 0x11, diff --git a/GPU/Directx9/DrawEngineDX9.h b/GPU/Directx9/DrawEngineDX9.h index a0ee23e60d..9ef5b37c65 100644 --- a/GPU/Directx9/DrawEngineDX9.h +++ b/GPU/Directx9/DrawEngineDX9.h @@ -170,6 +170,8 @@ private: // Hardware tessellation TessellationDataTransferDX9 *tessDataTransferDX9; + FBOTexState fboTexBindState_ = FBO_TEX_NONE; + int lastRenderStepId_ = -1; bool fboTexNeedsBind_ = false; diff --git a/GPU/Directx9/GPU_DX9.cpp b/GPU/Directx9/GPU_DX9.cpp index 3e5d0bac57..075d89e460 100644 --- a/GPU/Directx9/GPU_DX9.cpp +++ b/GPU/Directx9/GPU_DX9.cpp @@ -81,7 +81,7 @@ GPU_DX9::GPU_DX9(GraphicsContext *gfxCtx, Draw::DrawContext *draw) // No need to flush before the tex scale/offset commands if we are baking // the tex scale/offset into the vertices anyway. UpdateCmdInfo(); - CheckGPUFeatures(); + gstate_c.featureFlags = CheckGPUFeatures(); BuildReportingInfo(); @@ -98,68 +98,9 @@ GPU_DX9::GPU_DX9(GraphicsContext *gfxCtx, Draw::DrawContext *draw) } } -// TODO: Move this detection elsewhere when it's needed elsewhere, not before. It's ugly. -// Source: https://envytools.readthedocs.io/en/latest/hw/pciid.html#gf100 -enum NVIDIAGeneration { - NV_PRE_KEPLER, - NV_KEPLER, - NV_MAXWELL, - NV_PASCAL, - NV_VOLTA, - NV_TURING, // or later -}; - -static NVIDIAGeneration NVIDIAGetDeviceGeneration(int deviceID) { - if (deviceID >= 0x1180 && deviceID <= 0x11bf) - return NV_KEPLER; // GK104 - if (deviceID >= 0x11c0 && deviceID <= 0x11fa) - return NV_KEPLER; // GK106 - if (deviceID >= 0x0fc0 && deviceID <= 0x0fff) - return NV_KEPLER; // GK107 - if (deviceID >= 0x1003 && deviceID <= 0x1028) - return NV_KEPLER; // GK110(B) - if (deviceID >= 0x1280 && deviceID <= 0x12ba) - return NV_KEPLER; // GK208 - if (deviceID >= 0x1381 && deviceID <= 0x13b0) - return NV_MAXWELL; // GM107 - if (deviceID >= 0x1340 && deviceID <= 0x134d) - return NV_MAXWELL; // GM108 - if (deviceID >= 0x13c0 && deviceID <= 0x13d9) - return NV_MAXWELL; // GM204 - if (deviceID >= 0x1401 && deviceID <= 0x1427) - return NV_MAXWELL; // GM206 - if (deviceID >= 0x15f7 && deviceID <= 0x15f9) - return NV_PASCAL; // GP100 - if (deviceID >= 0x15f7 && deviceID <= 0x15f9) - return NV_PASCAL; // GP100 - if (deviceID >= 0x1b00 && deviceID <= 0x1b38) - return NV_PASCAL; // GP102 - if (deviceID >= 0x1b80 && deviceID <= 0x1be1) - return NV_PASCAL; // GP104 - if (deviceID >= 0x1c02 && deviceID <= 0x1c62) - return NV_PASCAL; // GP106 - if (deviceID >= 0x1c81 && deviceID <= 0x1c92) - return NV_PASCAL; // GP107 - if (deviceID >= 0x1d01 && deviceID <= 0x1d12) - return NV_PASCAL; // GP108 - if (deviceID >= 0x1d81 && deviceID <= 0x1dba) - return NV_VOLTA; // GV100 - if (deviceID >= 0x1e02 && deviceID <= 0x1e3c) - return NV_TURING; // TU102 - if (deviceID >= 0x1e82 && deviceID <= 0x1ed0) - return NV_TURING; // TU104 - if (deviceID >= 0x1f02 && deviceID <= 0x1f51) - return NV_TURING; // TU104 - if (deviceID >= 0x1e02) - return NV_TURING; // More TU models or later, probably. - return NV_PRE_KEPLER; -} - -void GPU_DX9::CheckGPUFeatures() { - u32 features = 0; +u32 GPU_DX9::CheckGPUFeatures() const { + u32 features = GPUCommon::CheckGPUFeatures(); features |= GPU_SUPPORTS_16BIT_FORMATS; - features |= GPU_SUPPORTS_BLEND_MINMAX; - features |= GPU_SUPPORTS_DEPTH_TEXTURE; features |= GPU_SUPPORTS_TEXTURE_LOD_CONTROL; // Accurate depth is required because the Direct3D API does not support inverse Z. @@ -168,41 +109,6 @@ void GPU_DX9::CheckGPUFeatures() { features |= GPU_SUPPORTS_ACCURATE_DEPTH; auto vendor = draw_->GetDeviceCaps().vendor; - if (!PSP_CoreParameter().compat.flags().DisableRangeCulling) { - // VS range culling (killing triangles in the vertex shader using NaN) causes problems on Intel. - // Also causes problems on old NVIDIA. - switch (vendor) { - case Draw::GPUVendor::VENDOR_INTEL: - break; - case Draw::GPUVendor::VENDOR_NVIDIA: - // Older NVIDIAs don't seem to like NaNs in their DX9 vertex shaders. - // No idea if KEPLER is the right cutoff, but let's go with it. - if (NVIDIAGetDeviceGeneration(draw_->GetDeviceCaps().deviceID) >= NV_KEPLER) { - features |= GPU_SUPPORTS_VS_RANGE_CULLING; - } - break; - default: - features |= GPU_SUPPORTS_VS_RANGE_CULLING; - break; - } - } - - D3DCAPS9 caps; - ZeroMemory(&caps, sizeof(caps)); - HRESULT result = 0; - if (deviceEx_) { - result = deviceEx_->GetDeviceCaps(&caps); - } else { - result = device_->GetDeviceCaps(&caps); - } - if (FAILED(result)) { - WARN_LOG_REPORT(G3D, "Direct3D9: Failed to get the device caps!"); - } else { - if ((caps.RasterCaps & D3DPRASTERCAPS_ANISOTROPY) != 0 && caps.MaxAnisotropy > 1) - features |= GPU_SUPPORTS_ANISOTROPY; - if ((caps.TextureCaps & (D3DPTEXTURECAPS_NONPOW2CONDITIONAL | D3DPTEXTURECAPS_POW2)) == 0) - features |= GPU_SUPPORTS_TEXTURE_NPOT; - } if (!g_Config.bHighQualityDepth) { features |= GPU_SCALE_DEPTH_FROM_24BIT_TO_16BIT; @@ -213,11 +119,7 @@ void GPU_DX9::CheckGPUFeatures() { features |= GPU_ROUND_DEPTH_TO_16BIT; } - if (PSP_CoreParameter().compat.flags().ClearToRAM) { - features |= GPU_USE_CLEAR_RAM_HACK; - } - - gstate_c.featureFlags = features; + return features; } GPU_DX9::~GPU_DX9() { @@ -261,7 +163,7 @@ void GPU_DX9::BeginHostFrame() { GPUCommon::BeginHostFrame(); UpdateCmdInfo(); if (resized_) { - CheckGPUFeatures(); + gstate_c.featureFlags = CheckGPUFeatures(); framebufferManager_->Resized(); drawEngine_.Resized(); shaderManagerDX9_->DirtyShader(); diff --git a/GPU/Directx9/GPU_DX9.h b/GPU/Directx9/GPU_DX9.h index 0e313da397..7c6a88d180 100644 --- a/GPU/Directx9/GPU_DX9.h +++ b/GPU/Directx9/GPU_DX9.h @@ -35,7 +35,7 @@ public: GPU_DX9(GraphicsContext *gfxCtx, Draw::DrawContext *draw); ~GPU_DX9(); - void CheckGPUFeatures() override; + u32 CheckGPUFeatures() const override; void PreExecuteOp(u32 op, u32 diff) override; void ExecuteOp(u32 op, u32 diff) override; diff --git a/GPU/Directx9/StateMappingDX9.cpp b/GPU/Directx9/StateMappingDX9.cpp index 2ebadb3974..0dfa352f00 100644 --- a/GPU/Directx9/StateMappingDX9.cpp +++ b/GPU/Directx9/StateMappingDX9.cpp @@ -99,14 +99,14 @@ void DrawEngineDX9::ApplyDrawState(int prim) { if (!gstate.isModeClear()) { textureCache_->ApplyTexture(); - if (fboTexNeedsBind_) { + if (fboTexBindState_ = FBO_TEX_COPY_BIND_TEX) { // Note that this is positions, not UVs, that we need the copy from. framebufferManager_->BindFramebufferAsColorTexture(1, framebufferManager_->GetCurrentRenderVFB(), BINDFBCOLOR_MAY_COPY); // If we are rendering at a higher resolution, linear is probably best for the dest color. device_->SetSamplerState(1, D3DSAMP_MAGFILTER, D3DTEXF_LINEAR); device_->SetSamplerState(1, D3DSAMP_MINFILTER, D3DTEXF_LINEAR); fboTexBound_ = true; - fboTexNeedsBind_ = false; + fboTexBindState_ = FBO_TEX_NONE; } // TODO: Test texture? @@ -133,20 +133,23 @@ void DrawEngineDX9::ApplyDrawState(int prim) { // We ignore the logicState on D3D since there's no support, the emulation of it is blend-and-shader only. if (pipelineState_.FramebufferRead()) { - bool fboTexNeedsBind = false; - ApplyFramebufferRead(&fboTexNeedsBind); + ApplyFramebufferRead(&fboTexBindState_); // The shader takes over the responsibility for blending, so recompute. ApplyStencilReplaceAndLogicOpIgnoreBlend(blendState.replaceAlphaWithStencil, blendState); - if (fboTexNeedsBind) { + if (fboTexBindState_ == FBO_TEX_COPY_BIND_TEX) { // Note that this is positions, not UVs, that we need the copy from. framebufferManager_->BindFramebufferAsColorTexture(1, framebufferManager_->GetCurrentRenderVFB(), BINDFBCOLOR_MAY_COPY); // If we are rendering at a higher resolution, linear is probably best for the dest color. device_->SetSamplerState(1, D3DSAMP_MAGFILTER, D3DTEXF_LINEAR); device_->SetSamplerState(1, D3DSAMP_MINFILTER, D3DTEXF_LINEAR); fboTexBound_ = true; + fboTexBindState_ = FBO_TEX_NONE; dirtyRequiresRecheck_ |= DIRTY_BLEND_STATE; gstate_c.Dirty(DIRTY_BLEND_STATE); + } else if (fboTexBindState_ == FBO_TEX_READ_FRAMEBUFFER) { + // Not supported. + fboTexBindState_ = FBO_TEX_NONE; } dirtyRequiresRecheck_ |= DIRTY_FRAGMENTSHADER_STATE; diff --git a/GPU/GLES/DepthBufferGLES.cpp b/GPU/GLES/DepthBufferGLES.cpp index e5bcc20bf9..342ebdad60 100644 --- a/GPU/GLES/DepthBufferGLES.cpp +++ b/GPU/GLES/DepthBufferGLES.cpp @@ -118,7 +118,8 @@ void FramebufferManagerGLES::PackDepthbuffer(VirtualFramebuffer *vfb, int x, int queries.push_back({ &u_depthDownloadTo8, "u_depthTo8" }); std::vector inits; inits.push_back({ &u_depthDownloadTex, 0, TEX_SLOT_PSP_TEXTURE }); - depthDownloadProgram_ = render->CreateProgram(shaders, semantics, queries, inits, false, false); + GLRProgramFlags flags{}; + depthDownloadProgram_ = render->CreateProgram(shaders, semantics, queries, inits, flags); for (auto iter : shaders) { render->DeleteShader(iter); } diff --git a/GPU/GLES/GPU_GLES.cpp b/GPU/GLES/GPU_GLES.cpp index 75fd0080b5..abae5134f9 100644 --- a/GPU/GLES/GPU_GLES.cpp +++ b/GPU/GLES/GPU_GLES.cpp @@ -54,7 +54,7 @@ GPU_GLES::GPU_GLES(GraphicsContext *gfxCtx, Draw::DrawContext *draw) : GPUCommon(gfxCtx, draw), drawEngine_(draw), fragmentTestCache_(draw) { UpdateVsyncInterval(true); - CheckGPUFeatures(); + gstate_c.featureFlags = CheckGPUFeatures(); shaderManagerGL_ = new ShaderManagerGLES(draw); framebufferManagerGL_ = new FramebufferManagerGLES(draw); @@ -148,42 +148,17 @@ GPU_GLES::~GPU_GLES() { // Take the raw GL extension and versioning data and turn into feature flags. // TODO: This should use DrawContext::GetDeviceCaps() more and more, and eventually // this can be shared between all the backends. -void GPU_GLES::CheckGPUFeatures() { - u32 features = 0; +u32 GPU_GLES::CheckGPUFeatures() const { + u32 features = GPUCommon::CheckGPUFeatures(); features |= GPU_SUPPORTS_16BIT_FORMATS; - if (draw_->GetDeviceCaps().dualSourceBlend) { - if (!g_Config.bVendorBugChecksEnabled || !draw_->GetBugs().Has(Draw::Bugs::DUAL_SOURCE_BLENDING_BROKEN)) { - features |= GPU_SUPPORTS_DUALSOURCE_BLEND; - } - } - - if (gl_extensions.EXT_shader_framebuffer_fetch || gl_extensions.ARM_shader_framebuffer_fetch) { - // This has caused problems in the past. Let's only enable on GLES3. - if (gl_extensions.GLES3) { - features |= GPU_SUPPORTS_ANY_FRAMEBUFFER_FETCH; - } - } - if ((gl_extensions.gpuVendor == GPU_VENDOR_NVIDIA) || (gl_extensions.gpuVendor == GPU_VENDOR_AMD)) features |= GPU_PREFER_REVERSE_COLOR_ORDER; - if (draw_->GetDeviceCaps().textureNPOTFullySupported) - features |= GPU_SUPPORTS_TEXTURE_NPOT; - - if (gl_extensions.EXT_blend_minmax) - features |= GPU_SUPPORTS_BLEND_MINMAX; - - if (draw_->GetDeviceCaps().logicOpSupported) - features |= GPU_SUPPORTS_LOGIC_OP; - if (gl_extensions.GLES3 || !gl_extensions.IsGLES) features |= GPU_SUPPORTS_TEXTURE_LOD_CONTROL; - if (draw_->GetDeviceCaps().anisoSupported) - features |= GPU_SUPPORTS_ANISOTROPY; - bool canUseInstanceID = gl_extensions.EXT_draw_instanced || gl_extensions.ARB_draw_instanced; bool canDefInstanceID = gl_extensions.IsGLES || gl_extensions.EXT_gpu_shader4 || gl_extensions.VersionGEThan(3, 1); bool instanceRendering = gl_extensions.GLES3 || (canUseInstanceID && canDefInstanceID); @@ -202,21 +177,6 @@ void GPU_GLES::CheckGPUFeatures() { // Our implementation of depth texturing needs simple Z range, so can't // use the extension hacks (yet). } - if (draw_->GetDeviceCaps().textureDepthSupported) - features |= GPU_SUPPORTS_DEPTH_TEXTURE; - if (draw_->GetDeviceCaps().clipDistanceSupported) - features |= GPU_SUPPORTS_CLIP_DISTANCE; - if (draw_->GetDeviceCaps().cullDistanceSupported) - features |= GPU_SUPPORTS_CULL_DISTANCE; - if (!draw_->GetBugs().Has(Draw::Bugs::BROKEN_NAN_IN_CONDITIONAL)) { - // Ignore the compat setting if clip and cull are both enabled. - // When supported, we can do the depth side of range culling more correctly. - const bool supported = draw_->GetDeviceCaps().clipDistanceSupported && draw_->GetDeviceCaps().cullDistanceSupported; - const bool disabled = PSP_CoreParameter().compat.flags().DisableRangeCulling; - if (supported || !disabled) { - features |= GPU_SUPPORTS_VS_RANGE_CULLING; - } - } // If we already have a 16-bit depth buffer, we don't need to round. bool prefer24 = draw_->GetDeviceCaps().preferredDepthBufferFormat == Draw::DataFormat::D24_S8; @@ -245,11 +205,7 @@ void GPU_GLES::CheckGPUFeatures() { features |= GPU_USE_DEPTH_RANGE_HACK; } - if (PSP_CoreParameter().compat.flags().ClearToRAM) { - features |= GPU_USE_CLEAR_RAM_HACK; - } - - gstate_c.featureFlags = features; + return features; } bool GPU_GLES::IsReady() { @@ -321,7 +277,7 @@ void GPU_GLES::BeginHostFrame() { GPUCommon::BeginHostFrame(); UpdateCmdInfo(); if (resized_) { - CheckGPUFeatures(); + gstate_c.featureFlags = CheckGPUFeatures(); framebufferManager_->Resized(); drawEngine_.Resized(); shaderManagerGL_->DirtyShader(); diff --git a/GPU/GLES/GPU_GLES.h b/GPU/GLES/GPU_GLES.h index cbe6bc00b0..98bb1d9362 100644 --- a/GPU/GLES/GPU_GLES.h +++ b/GPU/GLES/GPU_GLES.h @@ -38,7 +38,7 @@ public: ~GPU_GLES(); // This gets called on startup and when we get back from settings. - void CheckGPUFeatures() override; + u32 CheckGPUFeatures() const override; bool IsReady() override; void CancelReady() override; diff --git a/GPU/GLES/ShaderManagerGLES.cpp b/GPU/GLES/ShaderManagerGLES.cpp index 18dbe3680f..92dfa9488f 100644 --- a/GPU/GLES/ShaderManagerGLES.cpp +++ b/GPU/GLES/ShaderManagerGLES.cpp @@ -192,9 +192,18 @@ LinkedShader::LinkedShader(GLRenderManager *render, VShaderID VSID, Shader *vs, initialize.push_back({ &u_tess_weights_u, 0, TEX_SLOT_SPLINE_WEIGHTS_U }); initialize.push_back({ &u_tess_weights_v, 0, TEX_SLOT_SPLINE_WEIGHTS_V }); - bool useDualSource = (gstate_c.featureFlags & GPU_SUPPORTS_DUALSOURCE_BLEND) != 0; - bool useClip0 = VSID.Bit(VS_BIT_VERTEX_RANGE_CULLING) && gstate_c.Supports(GPU_SUPPORTS_CLIP_DISTANCE); - program = render->CreateProgram(shaders, semantics, queries, initialize, useDualSource, useClip0); + GLRProgramFlags flags{}; + flags.supportDualSource = (gstate_c.featureFlags & GPU_SUPPORTS_DUALSOURCE_BLEND) != 0; + if (!VSID.Bit(VS_BIT_IS_THROUGH) && gstate_c.Supports(GPU_SUPPORTS_DEPTH_CLAMP)) { + flags.useClipDistance0 = true; + flags.useClipDistance1 = true; + if (VSID.Bit(VS_BIT_VERTEX_RANGE_CULLING) && gstate_c.Supports(GPU_SUPPORTS_CLIP_DISTANCE)) + flags.useClipDistance2 = true; + } else if (VSID.Bit(VS_BIT_VERTEX_RANGE_CULLING) && gstate_c.Supports(GPU_SUPPORTS_CLIP_DISTANCE)) { + flags.useClipDistance0 = true; + } + + program = render->CreateProgram(shaders, semantics, queries, initialize, flags); // The rest, use the "dirty" mechanism. dirtyUniforms = DIRTY_ALL_UNIFORMS; diff --git a/GPU/GLES/StateMappingGLES.cpp b/GPU/GLES/StateMappingGLES.cpp index dbfc115a91..cdcc9e5069 100644 --- a/GPU/GLES/StateMappingGLES.cpp +++ b/GPU/GLES/StateMappingGLES.cpp @@ -149,13 +149,14 @@ void DrawEngineGLES::ApplyDrawState(int prim) { GenericLogicState &logicState = pipelineState_.logicState; if (pipelineState_.FramebufferRead()) { - bool fboTexNeedsBind = false; - ApplyFramebufferRead(&fboTexNeedsBind); + FBOTexState fboTexBindState = FBO_TEX_NONE; + ApplyFramebufferRead(&fboTexBindState); // The shader takes over the responsibility for blending, so recompute. ApplyStencilReplaceAndLogicOpIgnoreBlend(blendState.replaceAlphaWithStencil, blendState); // We copy the framebuffer here, as doing so will wipe any blend state if we do it later. - if (fboTexNeedsBind) { + // fboTexNeedsBind_ won't be set if we can read directly from the target. + if (fboTexBindState == FBO_TEX_COPY_BIND_TEX) { // Note that this is positions, not UVs, that we need the copy from. framebufferManager_->BindFramebufferAsColorTexture(1, framebufferManager_->GetCurrentRenderVFB(), BINDFBCOLOR_MAY_COPY); // If we are rendering at a higher resolution, linear is probably best for the dest color. @@ -166,6 +167,9 @@ void DrawEngineGLES::ApplyDrawState(int prim) { // Must dirty blend state here so we re-copy next time. Example: Lunar's spell effects. dirtyRequiresRecheck_ |= DIRTY_BLEND_STATE; gstate_c.Dirty(DIRTY_BLEND_STATE); + } else if (fboTexBindState == FBO_TEX_READ_FRAMEBUFFER) { + // No action needed here. + fboTexBindState = FBO_TEX_NONE; } dirtyRequiresRecheck_ |= DIRTY_FRAGMENTSHADER_STATE; gstate_c.Dirty(DIRTY_FRAGMENTSHADER_STATE); diff --git a/GPU/GPUCommon.cpp b/GPU/GPUCommon.cpp index 107398b7c8..3d64bdb1ab 100644 --- a/GPU/GPUCommon.cpp +++ b/GPU/GPUCommon.cpp @@ -89,7 +89,7 @@ const CommonCommandTableEntry commonCommandTable[] = { // These affect the fragment shader so need flushing. { GE_CMD_CLEARMODE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_BLEND_STATE | DIRTY_DEPTHSTENCIL_STATE | DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_CULLRANGE | DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE }, { GE_CMD_TEXTUREMAPENABLE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE }, - { GE_CMD_FOGENABLE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE}, + { GE_CMD_FOGENABLE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE }, { GE_CMD_TEXMODE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_TEXTURE_PARAMS | DIRTY_FRAGMENTSHADER_STATE }, { GE_CMD_TEXSHADELS, FLAG_FLUSHBEFOREONCHANGE, DIRTY_VERTEXSHADER_STATE }, // Raster state for Direct3D 9, uncommon. @@ -2414,10 +2414,10 @@ void GPUCommon::Execute_ImmVertexAlphaPrim(u32 op, u32 diff) { immPrim_ = (GEPrimitiveType)prim; // Flags seem to only be respected from the first prim. immFlags_ = op & 0x00FFF800; + immFirstSent_ = false; } else if (prim == GE_PRIM_KEEP_PREVIOUS && immPrim_ != GE_PRIM_INVALID) { static constexpr int flushPrimCount[] = { 1, 2, 0, 3, 0, 0, 2, 0 }; - // Instead of finding a proper point to flush, we just emit a full rectangle every time one - // is finished. + // Instead of finding a proper point to flush, we just emit prims when we can. if (immCount_ == flushPrimCount[immPrim_ & 7]) FlushImm(); } else { @@ -2439,31 +2439,6 @@ void GPUCommon::FlushImm() { } UpdateUVScaleOffset(); - // Instead of plumbing through properly (we'd need to inject these pretransformed vertices in the middle - // of SoftwareTransform(), which would take a lot of refactoring), we'll cheat and just turn these into - // through vertices. - // Since the only known use is Thrillville and it only uses it to clear, we just use color and pos. - struct ImmVertex { - float uv[2]; - uint32_t color; - float xyz[3]; - }; - ImmVertex temp[MAX_IMMBUFFER_SIZE]; - uint32_t color1Used = 0; - for (int i = 0; i < immCount_; i++) { - // Since we're sending through, scale back up to w/h. - temp[i].uv[0] = immBuffer_[i].u * gstate.getTextureWidth(0); - temp[i].uv[1] = immBuffer_[i].v * gstate.getTextureHeight(0); - temp[i].color = immBuffer_[i].color0_32; - temp[i].xyz[0] = immBuffer_[i].pos[0]; - temp[i].xyz[1] = immBuffer_[i].pos[1]; - temp[i].xyz[2] = immBuffer_[i].pos[2]; - color1Used |= immBuffer_[i].color1_32; - } - int vtype = GE_VTYPE_TC_FLOAT | GE_VTYPE_POS_FLOAT | GE_VTYPE_COL_8888 | GE_VTYPE_THROUGH; - - // TODO: Handle fog and secondary color somehow? - bool antialias = (immFlags_ & GE_IMM_ANTIALIAS) != 0; bool prevAntialias = gstate.isAntiAliasEnabled(); bool shading = (immFlags_ & GE_IMM_SHADING) != 0; @@ -2473,40 +2448,42 @@ void GPUCommon::FlushImm() { int cullMode = (immFlags_ & GE_IMM_CULLFACE) != 0 ? 1 : 0; bool texturing = (immFlags_ & GE_IMM_TEXTURE) != 0; bool prevTexturing = gstate.isTextureMapEnabled(); + bool fog = (immFlags_ & GE_IMM_FOG) != 0; + bool prevFog = gstate.isFogEnabled(); bool dither = (immFlags_ & GE_IMM_DITHER) != 0; bool prevDither = gstate.isDitherEnabled(); if ((immFlags_ & GE_IMM_CLIPMASK) != 0) { WARN_LOG_REPORT_ONCE(geimmclipvalue, G3D, "Imm vertex used clip value, flags=%06x", immFlags_); - } else if ((immFlags_ & GE_IMM_FOG) != 0) { - WARN_LOG_REPORT_ONCE(geimmfog, G3D, "Imm vertex used fog, flags=%06x", immFlags_); - } else if (color1Used != 0 && gstate.isUsingSecondaryColor()) { - WARN_LOG_REPORT_ONCE(geimmcolor1, G3D, "Imm vertex used secondary color, flags=%06x", immFlags_); } - if (texturing != prevTexturing || cullEnable != prevCullEnable || dither != prevDither || prevShading != shading) { + bool changed = texturing != prevTexturing || cullEnable != prevCullEnable || dither != prevDither; + changed = changed || prevShading != shading || prevFog != fog; + if (changed) { DispatchFlush(); gstate.antiAliasEnable = (GE_CMD_ANTIALIASENABLE << 24) | (int)antialias; gstate.shademodel = (GE_CMD_SHADEMODE << 24) | (int)shading; gstate.cullfaceEnable = (GE_CMD_CULLFACEENABLE << 24) | (int)cullEnable; gstate.textureMapEnable = (GE_CMD_TEXTUREMAPENABLE << 24) | (int)texturing; + gstate.fogEnable = (GE_CMD_FOGENABLE << 24) | (int)fog; gstate.ditherEnable = (GE_CMD_DITHERENABLE << 24) | (int)dither; - gstate_c.Dirty(DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE | DIRTY_RASTER_STATE); + gstate_c.Dirty(DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE | DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_UVSCALEOFFSET | DIRTY_CULLRANGE); } - int bytesRead; - uint32_t vertTypeID = GetVertTypeID(vtype, 0); - drawEngineCommon_->DispatchSubmitImm(temp, nullptr, immPrim_, immCount_, vertTypeID, cullMode, &bytesRead); - // TODO: In the future, make a special path for these. - // drawEngineCommon_->DispatchSubmitImm(immBuffer_, immCount_); + drawEngineCommon_->DispatchSubmitImm(immPrim_, immBuffer_, immCount_, cullMode, immFirstSent_); immCount_ = 0; + immFirstSent_ = true; - gstate.antiAliasEnable = (GE_CMD_ANTIALIASENABLE << 24) | (int)prevAntialias; - gstate.shademodel = (GE_CMD_SHADEMODE << 24) | (int)prevShading; - gstate.cullfaceEnable = (GE_CMD_CULLFACEENABLE << 24) | (int)prevCullEnable; - gstate.textureMapEnable = (GE_CMD_TEXTUREMAPENABLE << 24) | (int)prevTexturing; - gstate.ditherEnable = (GE_CMD_DITHERENABLE << 24) | (int)prevDither; - gstate_c.Dirty(DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE | DIRTY_RASTER_STATE); + if (changed) { + DispatchFlush(); + gstate.antiAliasEnable = (GE_CMD_ANTIALIASENABLE << 24) | (int)prevAntialias; + gstate.shademodel = (GE_CMD_SHADEMODE << 24) | (int)prevShading; + gstate.cullfaceEnable = (GE_CMD_CULLFACEENABLE << 24) | (int)prevCullEnable; + gstate.textureMapEnable = (GE_CMD_TEXTUREMAPENABLE << 24) | (int)prevTexturing; + gstate.fogEnable = (GE_CMD_FOGENABLE << 24) | (int)prevFog; + gstate.ditherEnable = (GE_CMD_DITHERENABLE << 24) | (int)prevDither; + gstate_c.Dirty(DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE | DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_UVSCALEOFFSET | DIRTY_CULLRANGE); + } } void GPUCommon::ExecuteOp(u32 op, u32 diff) { @@ -3176,3 +3153,56 @@ size_t GPUCommon::FormatGPUStatsCommon(char *buffer, size_t size) { vertexAverageCycles ); } + +u32 GPUCommon::CheckGPUFeatures() const { + u32 features = 0; + if (draw_->GetDeviceCaps().logicOpSupported) { + features |= GPU_SUPPORTS_LOGIC_OP; + } + if (draw_->GetDeviceCaps().anisoSupported) { + features |= GPU_SUPPORTS_ANISOTROPY; + } + if (draw_->GetDeviceCaps().textureNPOTFullySupported) { + features |= GPU_SUPPORTS_TEXTURE_NPOT; + } + if (draw_->GetDeviceCaps().dualSourceBlend) { + if (!g_Config.bVendorBugChecksEnabled || !draw_->GetBugs().Has(Draw::Bugs::DUAL_SOURCE_BLENDING_BROKEN)) { + features |= GPU_SUPPORTS_DUALSOURCE_BLEND; + } + } + if (draw_->GetDeviceCaps().blendMinMaxSupported) { + features |= GPU_SUPPORTS_BLEND_MINMAX; + } + + if (draw_->GetDeviceCaps().clipDistanceSupported) { + features |= GPU_SUPPORTS_CLIP_DISTANCE; + } + + if (draw_->GetDeviceCaps().cullDistanceSupported) { + features |= GPU_SUPPORTS_CULL_DISTANCE; + } + + if (draw_->GetDeviceCaps().textureDepthSupported) { + features |= GPU_SUPPORTS_DEPTH_TEXTURE; + } + + if (!draw_->GetBugs().Has(Draw::Bugs::BROKEN_NAN_IN_CONDITIONAL)) { + // Ignore the compat setting if clip and cull are both enabled. + // When supported, we can do the depth side of range culling more correctly. + const bool supported = draw_->GetDeviceCaps().clipDistanceSupported && draw_->GetDeviceCaps().cullDistanceSupported; + const bool disabled = PSP_CoreParameter().compat.flags().DisableRangeCulling; + if (supported || !disabled) { + features |= GPU_SUPPORTS_VS_RANGE_CULLING; + } + } + + if (draw_->GetDeviceCaps().framebufferFetchSupported) { + features |= GPU_SUPPORTS_ANY_FRAMEBUFFER_FETCH; + } + + if (PSP_CoreParameter().compat.flags().ClearToRAM) { + features |= GPU_USE_CLEAR_RAM_HACK; + } + + return features; +} diff --git a/GPU/GPUCommon.h b/GPU/GPUCommon.h index 3a5674127f..1127e38d91 100644 --- a/GPU/GPUCommon.h +++ b/GPU/GPUCommon.h @@ -76,7 +76,7 @@ public: Draw::DrawContext *GetDrawContext() override { return draw_; } - virtual void CheckGPUFeatures() = 0; + virtual u32 CheckGPUFeatures() const; void UpdateCmdInfo(); @@ -103,7 +103,7 @@ public: void ExecuteOp(u32 op, u32 diff) override; void PreExecuteOp(u32 op, u32 diff) override; - bool InterpretList(DisplayList &list) override; + bool InterpretList(DisplayList &list); void ProcessDLQueue(); u32 UpdateStall(int listid, u32 newstall) override; u32 EnqueueList(u32 listpc, u32 stall, int subIntrBase, PSPPointer args, bool head) override; @@ -355,6 +355,7 @@ protected: int immCount_ = 0; GEPrimitiveType immPrim_ = GE_PRIM_INVALID; uint32_t immFlags_ = 0; + bool immFirstSent_ = false; std::string reportingPrimaryInfo_; std::string reportingFullInfo_; diff --git a/GPU/GPUInterface.h b/GPU/GPUInterface.h index 832897f598..2ce7c48620 100644 --- a/GPU/GPUInterface.h +++ b/GPU/GPUInterface.h @@ -205,7 +205,6 @@ public: virtual void PreExecuteOp(u32 op, u32 diff) = 0; virtual void ExecuteOp(u32 op, u32 diff) = 0; - virtual bool InterpretList(DisplayList& list) = 0; // Framebuffer management virtual void SetDisplayFramebuffer(u32 framebuf, u32 stride, GEBufferFormat format) = 0; diff --git a/GPU/GPUState.h b/GPU/GPUState.h index cbfb58395d..191a670376 100644 --- a/GPU/GPUState.h +++ b/GPU/GPUState.h @@ -485,7 +485,8 @@ enum { // Free bit: 15 GPU_SUPPORTS_DEPTH_TEXTURE = FLAG_BIT(16), GPU_SUPPORTS_ACCURATE_DEPTH = FLAG_BIT(17), - // Free bits: 18-19 + GPU_SUPPORTS_FRAGMENT_SHADER_INTERLOCK = FLAG_BIT(18), + // Free bits: 19 GPU_SUPPORTS_ANY_FRAMEBUFFER_FETCH = FLAG_BIT(20), GPU_SCALE_DEPTH_FROM_24BIT_TO_16BIT = FLAG_BIT(21), GPU_ROUND_FRAGMENT_DEPTH_TO_16BIT = FLAG_BIT(22), diff --git a/GPU/Software/BinManager.cpp b/GPU/Software/BinManager.cpp index 3438cb222d..42eef12eda 100644 --- a/GPU/Software/BinManager.cpp +++ b/GPU/Software/BinManager.cpp @@ -167,7 +167,7 @@ void BinManager::UpdateState(bool throughMode) { if (states_.Full()) Flush("states"); stateIndex_ = (uint16_t)states_.Push(RasterizerState()); - ComputeRasterizerState(&states_[stateIndex_], throughMode); + ComputeRasterizerState(&states_[stateIndex_]); states_[stateIndex_].samplerID.cached.clut = cluts_[clutIndex_].readable; ClearDirty(SoftDirty::PIXEL_ALL | SoftDirty::SAMPLER_ALL | SoftDirty::RAST_ALL); @@ -326,7 +326,7 @@ void BinManager::AddTriangle(const VertexData &v0, const VertexData &v1, const V if (d01.x * d02.y - d01.y * d02.x < 0) return; // If all points have identical coords, we'll have 0 weights and not skip properly, so skip here. - if (d01.x == 0 && d01.y == 0 && d02.x == 0 && d02.y == 0) + if ((d01.x == 0 && d02.x == 0) || (d01.y == 0 && d02.y == 0)) return; // Was it fully outside the scissor? @@ -474,6 +474,9 @@ void BinManager::Drain() { } void BinManager::Flush(const char *reason) { + if (queueRange_.x1 == 0x7FFFFFFF) + return; + double st; if (coreCollectDebugStats) st = time_now_d(); diff --git a/GPU/Software/Clipper.cpp b/GPU/Software/Clipper.cpp index 166947ef14..6fcb2fe47b 100644 --- a/GPU/Software/Clipper.cpp +++ b/GPU/Software/Clipper.cpp @@ -133,6 +133,10 @@ static inline bool CheckOutsideZ(ClipCoords p, int &pos, int &neg) { void ProcessRect(const VertexData &v0, const VertexData &v1, BinManager &binner) { if (!binner.State().throughMode) { + // If any verts were outside range, throw the entire prim away. + if (v0.OutsideRange() || v1.OutsideRange()) + return; + // We may discard the entire rect based on depth values. int outsidePos = 0, outsideNeg = 0; CheckOutsideZ(v0.clippos, outsidePos, outsideNeg); @@ -176,6 +180,12 @@ void ProcessRect(const VertexData &v0, const VertexData &v1, BinManager &binner) } void ProcessPoint(const VertexData &v0, BinManager &binner) { + // If any verts were outside range, throw the entire prim away. + if (!binner.State().throughMode) { + if (v0.OutsideRange()) + return; + } + // Points need no clipping. Will be bounds checked in the rasterizer (which seems backwards?) binner.AddPoint(v0); } @@ -187,6 +197,10 @@ void ProcessLine(const VertexData &v0, const VertexData &v1, BinManager &binner) return; } + // If any verts were outside range, throw the entire prim away. + if (v0.OutsideRange() || v1.OutsideRange()) + return; + int outsidePos = 0, outsideNeg = 0; CheckOutsideZ(v0.clippos, outsidePos, outsideNeg); CheckOutsideZ(v1.clippos, outsidePos, outsideNeg); @@ -222,6 +236,10 @@ void ProcessLine(const VertexData &v0, const VertexData &v1, BinManager &binner) void ProcessTriangle(const VertexData &v0, const VertexData &v1, const VertexData &v2, const VertexData &provoking, BinManager &binner) { int mask = 0; if (!binner.State().throughMode) { + // If any verts were outside range, throw the entire prim away. + if (v0.OutsideRange() || v1.OutsideRange() || v2.OutsideRange()) + return; + mask |= CalcClipMask(v0.clippos); mask |= CalcClipMask(v1.clippos); mask |= CalcClipMask(v2.clippos); diff --git a/GPU/Software/FuncId.cpp b/GPU/Software/FuncId.cpp index 501b5aaf27..0d194d0012 100644 --- a/GPU/Software/FuncId.cpp +++ b/GPU/Software/FuncId.cpp @@ -48,11 +48,11 @@ static inline PixelBlendFactor OptimizeAlphaFactor(uint32_t color) { return PixelBlendFactor::FIX; } -void ComputePixelFuncID(PixelFuncID *id, bool throughMode) { +void ComputePixelFuncID(PixelFuncID *id) { id->fullKey = 0; // TODO: Could this be minz > 0x0000 || maxz < 0xFFFF? Maybe unsafe, depending on verts... - id->applyDepthRange = !throughMode; + id->applyDepthRange = !gstate.isModeThrough(); // Dither happens even in clear mode. id->dithering = gstate.isDitherEnabled(); id->fbFormat = gstate.FrameBufFormat(); @@ -169,7 +169,7 @@ void ComputePixelFuncID(PixelFuncID *id, bool throughMode) { } id->applyLogicOp = gstate.isLogicOpEnabled() && gstate.getLogicOp() != GE_LOGIC_COPY; - id->applyFog = gstate.isFogEnabled() && !throughMode; + id->applyFog = gstate.isFogEnabled() && !gstate.isModeThrough(); id->earlyZChecks = id->DepthTestFunc() != GE_COMP_ALWAYS; if (id->stencilTest && id->earlyZChecks) { diff --git a/GPU/Software/FuncId.h b/GPU/Software/FuncId.h index 46307fc55c..d9d8e51573 100644 --- a/GPU/Software/FuncId.h +++ b/GPU/Software/FuncId.h @@ -244,7 +244,7 @@ struct hash { }; -void ComputePixelFuncID(PixelFuncID *id, bool throughMode); +void ComputePixelFuncID(PixelFuncID *id); std::string DescribePixelFuncID(const PixelFuncID &id); void ComputeSamplerID(SamplerID *id); diff --git a/GPU/Software/Rasterizer.cpp b/GPU/Software/Rasterizer.cpp index 1f4039d05a..2336ff6bef 100644 --- a/GPU/Software/Rasterizer.cpp +++ b/GPU/Software/Rasterizer.cpp @@ -93,8 +93,8 @@ static inline Vec4 Interpolate(const float &c0, const float &c1, const fl return Interpolate(c0, c1, c2, w0.Cast(), w1.Cast(), w2.Cast(), wsum_recip); } -void ComputeRasterizerState(RasterizerState *state, bool throughMode) { - ComputePixelFuncID(&state->pixelID, throughMode); +void ComputeRasterizerState(RasterizerState *state) { + ComputePixelFuncID(&state->pixelID); state->drawPixel = Rasterizer::GetSingleFunc(state->pixelID); state->enableTextures = gstate.isTextureMapEnabled() && !state->pixelID.clearMode; @@ -132,7 +132,7 @@ void ComputeRasterizerState(RasterizerState *state, bool throughMode) { } state->shadeGouraud = gstate.getShadeMode() == GE_SHADE_GOURAUD; - state->throughMode = throughMode; + state->throughMode = gstate.isModeThrough(); state->antialiasLines = gstate.isAntiAliasEnabled(); #if defined(SOFTGPU_MEMORY_TAGGING_DETAILED) || defined(SOFTGPU_MEMORY_TAGGING_BASIC) @@ -1235,6 +1235,7 @@ void ClearRectangle(const VertexData &v0, const VertexData &v1, const BinCoords case GE_FORMAT_INVALID: case GE_FORMAT_DEPTH16: + case GE_FORMAT_CLUT8: _dbg_assert_msg_(false, "Software: invalid framebuf format."); break; } @@ -1365,7 +1366,7 @@ void DrawLine(const VertexData &v0, const VertexData &v1, const BinCoords &range maskOK = false; } - if (!CheckDepthTestPassed(pixelID.DepthTestFunc(), x, y, pixelID.cached.depthbufStride, z)) { + if (!CheckDepthTestPassed(pixelID.DepthTestFunc(), p.x, p.y, pixelID.cached.depthbufStride, z)) { maskOK = false; } } diff --git a/GPU/Software/Rasterizer.h b/GPU/Software/Rasterizer.h index 089f99a657..93e89f9596 100644 --- a/GPU/Software/Rasterizer.h +++ b/GPU/Software/Rasterizer.h @@ -65,7 +65,7 @@ struct RasterizerState { } }; -void ComputeRasterizerState(RasterizerState *state, bool throughMode); +void ComputeRasterizerState(RasterizerState *state); // Draws a triangle if its vertices are specified in counter-clockwise order void DrawTriangle(const VertexData &v0, const VertexData &v1, const VertexData &v2, const BinCoords &range, const RasterizerState &state); diff --git a/GPU/Software/RasterizerRectangle.cpp b/GPU/Software/RasterizerRectangle.cpp index 4c2e4ad1a9..ff5b9c85d9 100644 --- a/GPU/Software/RasterizerRectangle.cpp +++ b/GPU/Software/RasterizerRectangle.cpp @@ -116,8 +116,12 @@ void DrawSprite(const VertexData &v0, const VertexData &v1, const BinCoords &ran DrawingCoords scissorTL = TransformUnit::ScreenToDrawing(range.x1, range.y1); DrawingCoords scissorBR = TransformUnit::ScreenToDrawing(range.x2, range.y2); - int z = v1.screenpos.z; - int fog = 255; + const int z = v1.screenpos.z; + constexpr int fog = 255; + + // Since it's flat, we can check depth range early. Matters for earlyZChecks. + if (pixelID.applyDepthRange && (z < pixelID.cached.minz || z > pixelID.cached.maxz)) + return; bool isWhite = v1.color0 == 0xFFFFFFFF; @@ -204,15 +208,31 @@ void DrawSprite(const VertexData &v0, const VertexData &v1, const BinCoords &ran float t = tf_start; const Vec4 c0 = Vec4::FromRGBA(v1.color0); - for (int y = pos0.y; y < pos1.y; y++) { - float s = sf_start; - // Not really that fast but faster than triangle. - for (int x = pos0.x; x < pos1.x; x++) { - Vec4 prim_color = state.nearest(s, t, xoff, yoff, ToVec4IntArg(c0), &texptr, &texbufw, 0, 0, state.samplerID); - state.drawPixel(x, y, z, 255, ToVec4IntArg(prim_color), pixelID); - s += dsf; + if (pixelID.earlyZChecks) { + for (int y = pos0.y; y < pos1.y; y++) { + float s = sf_start; + // Not really that fast but faster than triangle. + for (int x = pos0.x; x < pos1.x; x++) { + if (CheckDepthTestPassed(pixelID.DepthTestFunc(), x, y, pixelID.cached.depthbufStride, z)) { + Vec4 prim_color = state.nearest(s, t, xoff, yoff, ToVec4IntArg(c0), &texptr, &texbufw, 0, 0, state.samplerID); + state.drawPixel(x, y, z, fog, ToVec4IntArg(prim_color), pixelID); + } + + s += dsf; + } + t += dtf; + } + } else { + for (int y = pos0.y; y < pos1.y; y++) { + float s = sf_start; + // Not really that fast but faster than triangle. + for (int x = pos0.x; x < pos1.x; x++) { + Vec4 prim_color = state.nearest(s, t, xoff, yoff, ToVec4IntArg(c0), &texptr, &texbufw, 0, 0, state.samplerID); + state.drawPixel(x, y, z, fog, ToVec4IntArg(prim_color), pixelID); + s += dsf; + } + t += dtf; } - t += dtf; } } } else { @@ -239,6 +259,16 @@ void DrawSprite(const VertexData &v0, const VertexData &v1, const BinCoords &ran pixel++; } } + } else if (pixelID.earlyZChecks) { + const Vec4 prim_color = Vec4::FromRGBA(v1.color0); + for (int y = pos0.y; y < pos1.y; y++) { + for (int x = pos0.x; x < pos1.x; x++) { + if (!CheckDepthTestPassed(pixelID.DepthTestFunc(), x, y, pixelID.cached.depthbufStride, z)) + continue; + + state.drawPixel(x, y, z, fog, ToVec4IntArg(prim_color), pixelID); + } + } } else { const Vec4 prim_color = Vec4::FromRGBA(v1.color0); for (int y = pos0.y; y < pos1.y; y++) { @@ -325,15 +355,18 @@ bool RectangleFastPath(const VertexData &v0, const VertexData &v1, BinManager &b } static bool AreCoordsRectangleCompatible(const RasterizerState &state, const VertexData &data0, const VertexData &data1) { - if (!(data1.color0 == data0.color0)) + if (data1.color0 != data0.color0) return false; - if (!(data1.screenpos.z == data0.screenpos.z)) { + if (data1.screenpos.z != data0.screenpos.z) { // Sometimes, we don't actually care about z. if (state.pixelID.depthWrite || state.pixelID.DepthTestFunc() != GE_COMP_ALWAYS) return false; } if (!state.throughMode) { - if (!state.throughMode && !(data1.color1 == data0.color1)) + if (data1.color1 != data0.color1) + return false; + // This means it should be culled, outside range. + if (data1.OutsideRange() || data0.OutsideRange()) return false; // Do we have to think about perspective correction or slope mip level? if (state.enableTextures && data1.clippos.w != data0.clippos.w) { diff --git a/GPU/Software/SoftGpu.cpp b/GPU/Software/SoftGpu.cpp index 18cdb0b314..0f2d3bcda1 100644 --- a/GPU/Software/SoftGpu.cpp +++ b/GPU/Software/SoftGpu.cpp @@ -361,7 +361,7 @@ const SoftwareCommandTableEntry softgpuCommandTable[] = { { GE_CMD_VTCT }, { GE_CMD_VTCQ }, { GE_CMD_VCV }, - { GE_CMD_VAP, FLAG_EXECUTE, SoftDirty::NONE, &GPUCommon::Execute_ImmVertexAlphaPrim }, + { GE_CMD_VAP, FLAG_EXECUTE, SoftDirty::NONE, &SoftGPU::Execute_ImmVertexAlphaPrim }, { GE_CMD_VFC }, { GE_CMD_VSCV }, @@ -639,6 +639,7 @@ void SoftGPU::CopyToCurrentFboFromDisplayRam(int srcwidth, int srcheight) { } void SoftGPU::CopyDisplayToOutput(bool reallyDirty) { + drawEngine_->transformUnit.Flush("output"); // The display always shows 480x272. CopyToCurrentFboFromDisplayRam(FB_WIDTH, FB_HEIGHT); MarkDirty(displayFramebuf_, displayStride_, 272, displayFormat_, SoftGPUVRAMDirty::CLEAR); @@ -650,7 +651,7 @@ void SoftGPU::MarkDirty(uint32_t addr, uint32_t stride, uint32_t height, GEBuffe } void SoftGPU::MarkDirty(uint32_t addr, uint32_t bytes, SoftGPUVRAMDirty value) { - // Don't bother tracking if frameskipping. + // Only bother tracking if frameskipping. if (g_Config.iFrameSkip == 0) return; if (!Memory::IsVRAMAddress(addr) || !Memory::IsVRAMAddress(addr + bytes - 1)) @@ -1005,19 +1006,24 @@ void SoftGPU::Execute_LoadClut(u32 op, u32 diff) { void SoftGPU::Execute_FramebufPtr(u32 op, u32 diff) { // We assume fb.data won't change while we're drawing. - drawEngine_->transformUnit.Flush("framebuf"); - fb.data = Memory::GetPointerWrite(gstate.getFrameBufAddress()); + if (diff) { + drawEngine_->transformUnit.Flush("framebuf"); + fb.data = Memory::GetPointerWrite(gstate.getFrameBufAddress()); + } } void SoftGPU::Execute_FramebufFormat(u32 op, u32 diff) { // We should flush, because ranges within bins may change. - drawEngine_->transformUnit.Flush("framebuf"); + if (diff) + drawEngine_->transformUnit.Flush("framebuf"); } void SoftGPU::Execute_ZbufPtr(u32 op, u32 diff) { // We assume depthbuf.data won't change while we're drawing. - drawEngine_->transformUnit.Flush("depthbuf"); - depthbuf.data = Memory::GetPointerWrite(gstate.getDepthBufAddress()); + if (diff) { + drawEngine_->transformUnit.Flush("depthbuf"); + depthbuf.data = Memory::GetPointerWrite(gstate.getDepthBufAddress()); + } } void SoftGPU::Execute_VertexType(u32 op, u32 diff) { @@ -1109,6 +1115,12 @@ void SoftGPU::Execute_BoneMtxData(u32 op, u32 diff) { gstate.boneMatrixData = GE_CMD_BONEMATRIXDATA << 24; } +void SoftGPU::Execute_ImmVertexAlphaPrim(u32 op, u32 diff) { + GPUCommon::Execute_ImmVertexAlphaPrim(op, diff); + // We won't flush as often as hardware renderers, so we want to flush right away. + FlushImm(); +} + void SoftGPU::Execute_Call(u32 op, u32 diff) { PROFILE_THIS_SCOPE("gpu_call"); @@ -1138,6 +1150,18 @@ void SoftGPU::FinishDeferred() { drawEngine_->transformUnit.Flush("finish"); } +int SoftGPU::ListSync(int listid, int mode) { + // Take this as a cue that we need to finish drawing. + drawEngine_->transformUnit.Flush("listsync"); + return GPUCommon::ListSync(listid, mode); +} + +u32 SoftGPU::DrawSync(int mode) { + // Take this as a cue that we need to finish drawing. + drawEngine_->transformUnit.Flush("drawsync"); + return GPUCommon::DrawSync(mode); +} + void SoftGPU::GetStats(char *buffer, size_t bufsize) { drawEngine_->transformUnit.GetStats(buffer, bufsize); } diff --git a/GPU/Software/SoftGpu.h b/GPU/Software/SoftGpu.h index 11d5dd16f3..e90f7c0fb8 100644 --- a/GPU/Software/SoftGpu.h +++ b/GPU/Software/SoftGpu.h @@ -127,10 +127,12 @@ public: SoftGPU(GraphicsContext *gfxCtx, Draw::DrawContext *draw); ~SoftGPU(); - void CheckGPUFeatures() override {} + u32 CheckGPUFeatures() const override { return 0; } void InitClear() override {} void ExecuteOp(u32 op, u32 diff) override; void FinishDeferred() override; + int ListSync(int listid, int mode) override; + u32 DrawSync(int mode) override; void SetDisplayFramebuffer(u32 framebuf, u32 stride, GEBufferFormat format) override; void CopyDisplayToOutput(bool reallyDirty) override; @@ -185,6 +187,8 @@ public: void Execute_TgenMtxData(u32 op, u32 diff); void Execute_BoneMtxData(u32 op, u32 diff); + void Execute_ImmVertexAlphaPrim(u32 op, u32 diff); + typedef void (SoftGPU::*CmdFunc)(u32 op, u32 diff); protected: diff --git a/GPU/Software/TransformUnit.cpp b/GPU/Software/TransformUnit.cpp index 0cb91dd280..c1471f7172 100644 --- a/GPU/Software/TransformUnit.cpp +++ b/GPU/Software/TransformUnit.cpp @@ -70,14 +70,66 @@ void SoftwareDrawEngine::DispatchSubmitPrim(const void *verts, const void *inds, transformUnit.SubmitPrimitive(verts, inds, prim, vertexCount, vertTypeID, bytesRead, this); } -void SoftwareDrawEngine::DispatchSubmitImm(const void *verts, const void *inds, GEPrimitiveType prim, int vertexCount, u32 vertTypeID, int cullMode, int *bytesRead) { +void SoftwareDrawEngine::DispatchSubmitImm(GEPrimitiveType prim, TransformedVertex *buffer, int vertexCount, int cullMode, bool continuation) { + uint32_t vertTypeID = GetVertTypeID(gstate.vertType | GE_VTYPE_POS_FLOAT, gstate.getUVGenMode()); + int flipCull = cullMode != gstate.getCullMode() ? 1 : 0; // TODO: For now, just setting all dirty. transformUnit.SetDirty(SoftDirty(-1)); gstate.cullmode ^= flipCull; - transformUnit.SubmitPrimitive(verts, inds, prim, vertexCount, vertTypeID, bytesRead, this); + + // TODO: This is a bit ugly. Should bypass when clipping... + uint32_t xScale = gstate.viewportxscale; + uint32_t xCenter = gstate.viewportxcenter; + uint32_t yScale = gstate.viewportyscale; + uint32_t yCenter = gstate.viewportycenter; + uint32_t zScale = gstate.viewportzscale; + uint32_t zCenter = gstate.viewportzcenter; + + // Force scale to 1 and center to zero. + gstate.viewportxscale = (GE_CMD_VIEWPORTXSCALE << 24) | 0x3F8000; + gstate.viewportxcenter = (GE_CMD_VIEWPORTXCENTER << 24) | 0x000000; + gstate.viewportyscale = (GE_CMD_VIEWPORTYSCALE << 24) | 0x3F8000; + gstate.viewportycenter = (GE_CMD_VIEWPORTYCENTER << 24) | 0x000000; + // Z we scale to 65535 for neg z clipping. + gstate.viewportzscale = (GE_CMD_VIEWPORTZSCALE << 24) | 0x477FFF; + gstate.viewportzcenter = (GE_CMD_VIEWPORTZCENTER << 24) | 0x000000; + + // Before we start, submit 0 prims to reset the prev prim type. + // Following submits will always be KEEP_PREVIOUS. + if (!continuation) + transformUnit.SubmitPrimitive(nullptr, nullptr, prim, 0, vertTypeID, nullptr, this); + + for (int i = 0; i < vertexCount; i++) { + VertexData vert; + vert.clippos = ClipCoords(buffer[i].pos); + vert.texturecoords.x = buffer[i].u; + vert.texturecoords.y = buffer[i].v; + if (gstate.isModeThrough()) { + vert.texturecoords.x *= gstate.getTextureWidth(0); + vert.texturecoords.y *= gstate.getTextureHeight(0); + } else { + vert.clippos.z *= 1.0f / 65535.0f; + } + vert.color0 = buffer[i].color0_32; + vert.color1 = gstate.isUsingSecondaryColor() && !gstate.isModeThrough() ? buffer[i].color1_32 : 0; + vert.fogdepth = buffer[i].fog; + vert.screenpos.x = (int)(buffer[i].x * 16.0f); + vert.screenpos.y = (int)(buffer[i].y * 16.0f); + vert.screenpos.z = (u16)(u32)buffer[i].z; + + transformUnit.SubmitImmVertex(vert, this); + } + + gstate.viewportxscale = xScale; + gstate.viewportxcenter = xCenter; + gstate.viewportyscale = yScale; + gstate.viewportycenter = yCenter; + gstate.viewportzscale = zScale; + gstate.viewportzcenter = zCenter; + gstate.cullmode ^= flipCull; - // TODO: Should really clear, but the vertex type is faked so things might need resetting... + // TODO: Should really clear, but a bunch of values are forced so we this is safest. transformUnit.SetDirty(SoftDirty(-1)); } @@ -273,7 +325,7 @@ void ComputeTransformState(TransformState *state, const VertexReader &vreader) { state->roundToScreen = &ClipToScreenInternal; } -VertexData TransformUnit::ReadVertex(VertexReader &vreader, const TransformState &state, bool &outside_range_flag) { +VertexData TransformUnit::ReadVertex(VertexReader &vreader, const TransformState &state) { PROFILE_THIS_SCOPE("read_vert"); VertexData vertex; @@ -362,9 +414,13 @@ VertexData TransformUnit::ReadVertex(VertexReader &vreader, const TransformState #else screenScaled = vertex.clippos.xyz() * state.screenScale / vertex.clippos.w + state.screenAdd; #endif + bool outside_range_flag = false; vertex.screenpos = state.roundToScreen(screenScaled, vertex.clippos, &outside_range_flag); - if (outside_range_flag) + if (outside_range_flag) { + // We use this, essentially, as the flag. + vertex.screenpos.x = 0x7FFFFFFF; return vertex; + } if (state.enableFog) { vertex.fogdepth = (viewpos.z + state.fogEnd) * state.fogSlope; @@ -447,20 +503,19 @@ void TransformUnit::SubmitPrimitive(const void* vertices, const void* indices, G if (gstate_c.skipDrawReason & SKIPDRAW_SKIPFRAME) { return; } - // Throughmode never draws 8-bit primitives, maybe because they can't fully specify the screen? - if ((vertex_type & GE_VTYPE_THROUGH_MASK) != 0 && (vertex_type & GE_VTYPE_POS_MASK) == GE_VTYPE_POS_8BIT) - return; // Vertices without position are just entirely culled. + // Note: Throughmode does draw 8-bit primitives, but positions are always zero - handled in decode. if ((vertex_type & GE_VTYPE_POS_MASK) == 0) return; u16 index_lower_bound = 0; - u16 index_upper_bound = vertex_count - 1; + u16 index_upper_bound = vertex_count == 0 ? 0 : vertex_count - 1; IndexConverter ConvertIndex(vertex_type, indices); if (indices) GetIndexBounds(indices, vertex_count, vertex_type, &index_lower_bound, &index_upper_bound); - vdecoder.DecodeVerts(decoded_, vertices, index_lower_bound, index_upper_bound); + if (vertex_count != 0) + vdecoder.DecodeVerts(decoded_, vertices, index_lower_bound, index_upper_bound); VertexReader vreader(decoded_, vtxfmt, vertex_type); @@ -471,19 +526,11 @@ void TransformUnit::SubmitPrimitive(const void* vertices, const void* indices, G prim_type = prev_prim_; } - int vtcs_per_prim; - switch (prim_type) { - case GE_PRIM_POINTS: vtcs_per_prim = 1; break; - case GE_PRIM_LINES: vtcs_per_prim = 2; break; - case GE_PRIM_TRIANGLES: vtcs_per_prim = 3; break; - case GE_PRIM_RECTANGLES: vtcs_per_prim = 2; break; - default: vtcs_per_prim = 0; break; - } - // TODO: Do this in two passes - first process the vertices (before indexing/stripping), // then resolve the indices. This lets us avoid transforming shared vertices twice. binner_->UpdateState(vreader.isThrough()); + hasDraws_ = true; static TransformState transformState; if (binner_->HasDirty(SoftDirty::LIGHT_ALL | SoftDirty::TRANSFORM_ALL)) { @@ -494,9 +541,17 @@ void TransformUnit::SubmitPrimitive(const void* vertices, const void* indices, G bool skipCull = !gstate.isCullEnabled() || gstate.isModeClear(); const CullType cullType = skipCull ? CullType::OFF : (gstate.getCullMode() ? CullType::CCW : CullType::CW); - bool outside_range_flag = false; + auto readVertexAt = [&](VertexReader &vreader, const TransformState &transformState, int vtx) { + if (indices) { + vreader.Goto(ConvertIndex(vtx) - index_lower_bound); + } else { + vreader.Goto(vtx); + } - if (vreader.isThrough() && cullType == CullType::OFF && prim_type == GE_PRIM_TRIANGLES && data_index_ + vertex_count >= 6 && ((data_index_ + vertex_count) % 6) == 0) { + return ReadVertex(vreader, transformState); + }; + + if (vreader.isThrough() && cullType == CullType::OFF && prim_type == GE_PRIM_TRIANGLES && data_index_ == 0 && vertex_count >= 6 && ((vertex_count) % 6) == 0) { // Some games send rectangles as a series of regular triangles. // We look for this, but only in throughmode. VertexData buf[6]; @@ -506,20 +561,7 @@ void TransformUnit::SubmitPrimitive(const void* vertices, const void* indices, G } for (int vtx = 0; vtx < vertex_count; ++vtx) { - if (indices) { - vreader.Goto(ConvertIndex(vtx) - index_lower_bound); - } else { - vreader.Goto(vtx); - } - - buf[buf_index++] = ReadVertex(vreader, transformState, outside_range_flag); - if (buf_index >= 3 && outside_range_flag) { - // Cull, just pretend it didn't happen. - buf_index -= 3; - outside_range_flag = false; - continue; - } - + buf[buf_index++] = readVertexAt(vreader, transformState, vtx); if (buf_index < 6) continue; @@ -552,73 +594,54 @@ void TransformUnit::SubmitPrimitive(const void* vertices, const void* indices, G return; } + // Note: intentionally, these allow for the case of vertex_count == 0, but data_index_ > 0. + // This is used for immediate-mode primitives. switch (prim_type) { case GE_PRIM_POINTS: - case GE_PRIM_LINES: - case GE_PRIM_TRIANGLES: - { - for (int vtx = 0; vtx < vertex_count; ++vtx) { - if (indices) { - vreader.Goto(ConvertIndex(vtx) - index_lower_bound); - } else { - vreader.Goto(vtx); - } - - data_[data_index_++] = ReadVertex(vreader, transformState, outside_range_flag); - if (data_index_ < vtcs_per_prim) { - // Keep reading. Note: an incomplete prim will stay read for GE_PRIM_KEEP_PREVIOUS. - continue; - } - - // Okay, we've got enough verts. Reset the index for next time. - data_index_ = 0; - if (outside_range_flag) { - // Cull the prim if it was outside, and move to the next prim. - outside_range_flag = false; - continue; - } - - switch (prim_type) { - case GE_PRIM_TRIANGLES: - SendTriangle(cullType, &data_[0]); - break; - - case GE_PRIM_LINES: - Clipper::ProcessLine(data_[0], data_[1], *binner_); - break; - - case GE_PRIM_POINTS: - Clipper::ProcessPoint(data_[0], *binner_); - break; - - default: - _dbg_assert_msg_(false, "Unexpected prim type: %d", prim_type); - } - } - break; + for (int i = 0; i < data_index_; ++i) + Clipper::ProcessPoint(data_[i], *binner_); + data_index_ = 0; + for (int vtx = 0; vtx < vertex_count; ++vtx) { + data_[0] = readVertexAt(vreader, transformState, vtx); + Clipper::ProcessPoint(data_[0], *binner_); } + break; + + case GE_PRIM_LINES: + for (int i = 0; i < data_index_ - 1; i += 2) + Clipper::ProcessLine(data_[i + 0], data_[i + 1], *binner_); + data_index_ &= 1; + for (int vtx = 0; vtx < vertex_count; ++vtx) { + data_[data_index_++] = readVertexAt(vreader, transformState, vtx); + if (data_index_ == 2) { + Clipper::ProcessLine(data_[0], data_[1], *binner_); + data_index_ = 0; + } + } + break; + + case GE_PRIM_TRIANGLES: + for (int vtx = 0; vtx < vertex_count; ++vtx) { + data_[data_index_++] = readVertexAt(vreader, transformState, vtx); + if (data_index_ < 3) { + // Keep reading. Note: an incomplete prim will stay read for GE_PRIM_KEEP_PREVIOUS. + continue; + } + // Okay, we've got enough verts. Reset the index for next time. + data_index_ = 0; + + SendTriangle(cullType, &data_[0]); + } + // In case vertex_count was 0. + if (data_index_ >= 3) { + SendTriangle(cullType, &data_[0]); + data_index_ = 0; + } + break; case GE_PRIM_RECTANGLES: for (int vtx = 0; vtx < vertex_count; ++vtx) { - if (indices) { - vreader.Goto(ConvertIndex(vtx) - index_lower_bound); - } else { - vreader.Goto(vtx); - } - - data_[data_index_++] = ReadVertex(vreader, transformState, outside_range_flag); - if (outside_range_flag) { - outside_range_flag = false; - // Note: this is the post increment index. If odd, we set the first vert. - if (data_index_ & 1) { - // Skip the next one and forget this one. - vtx++; - data_index_--; - } else { - // Forget both of the last 2. - data_index_ -= 2; - } - } + data_[data_index_++] = readVertexAt(vreader, transformState, vtx); if (data_index_ == 4 && vreader.isThrough() && cullType == CullType::OFF) { if (Rasterizer::DetectRectangleThroughModeSlices(binner_->State(), data_)) { @@ -646,19 +669,7 @@ void TransformUnit::SubmitPrimitive(const void* vertices, const void* indices, G // If data_index_ is 1 or 2, etc., it means we're continuing a line strip. int skip_count = data_index_ == 0 ? 1 : 0; for (int vtx = 0; vtx < vertex_count; ++vtx) { - if (indices) { - vreader.Goto(ConvertIndex(vtx) - index_lower_bound); - } else { - vreader.Goto(vtx); - } - - data_[(data_index_++) & 1] = ReadVertex(vreader, transformState, outside_range_flag); - if (outside_range_flag) { - // Drop all primitives containing the current vertex - skip_count = 2; - outside_range_flag = false; - continue; - } + data_[(data_index_++) & 1] = readVertexAt(vreader, transformState, vtx); if (skip_count) { --skip_count; @@ -667,6 +678,9 @@ void TransformUnit::SubmitPrimitive(const void* vertices, const void* indices, G Clipper::ProcessLine(data_[data_index_ & 1], data_[(data_index_ & 1) ^ 1], *binner_); } } + // If this is from immediate-mode drawing, we always had one new vert (already in data_.) + if (isImmDraw_ && data_index_ >= 2) + Clipper::ProcessLine(data_[data_index_ & 1], data_[(data_index_ & 1) ^ 1], *binner_); break; } @@ -681,19 +695,15 @@ void TransformUnit::SubmitPrimitive(const void* vertices, const void* indices, G if (data_index_ == 0 && vertex_count >= 4 && (vertex_count & 1) == 0 && cullType == CullType::OFF) { for (int base = 0; base < vertex_count - 2; base += 2) { for (int vtx = base == 0 ? 0 : 2; vtx < 4; ++vtx) { - if (indices) { - vreader.Goto(ConvertIndex(base + vtx) - index_lower_bound); - } else { - vreader.Goto(base + vtx); - } - data_[vtx] = ReadVertex(vreader, transformState, outside_range_flag); + data_[vtx] = readVertexAt(vreader, transformState, base + vtx); } // If a strip is effectively a rectangle, draw it as such! int tl = -1, br = -1; - if (!outside_range_flag && Rasterizer::DetectRectangleFromStrip(binner_->State(), data_, &tl, &br)) { + if (Rasterizer::DetectRectangleFromStrip(binner_->State(), data_, &tl, &br)) { Clipper::ProcessRect(data_[tl], data_[br], *binner_); start_vtx += 2; + skip_count = 0; if (base + 4 >= vertex_count) { start_vtx = vertex_count; break; @@ -710,32 +720,29 @@ void TransformUnit::SubmitPrimitive(const void* vertices, const void* indices, G } } - outside_range_flag = false; - for (int vtx = start_vtx; vtx < vertex_count; ++vtx) { - if (indices) { - vreader.Goto(ConvertIndex(vtx) - index_lower_bound); - } else { - vreader.Goto(vtx); - } - + for (int vtx = start_vtx; vtx < vertex_count && skip_count > 0; ++vtx) { int provoking_index = (data_index_++) % 3; - data_[provoking_index] = ReadVertex(vreader, transformState, outside_range_flag); - if (outside_range_flag) { - // Drop all primitives containing the current vertex - skip_count = 2; - outside_range_flag = false; - continue; - } + data_[provoking_index] = readVertexAt(vreader, transformState, vtx); + --skip_count; + ++start_vtx; + } - if (skip_count) { - --skip_count; - continue; - } + for (int vtx = start_vtx; vtx < vertex_count; ++vtx) { + int provoking_index = (data_index_++) % 3; + data_[provoking_index] = readVertexAt(vreader, transformState, vtx); int wind = (data_index_ - 1) % 2; CullType altCullType = cullType == CullType::OFF ? cullType : CullType((int)cullType ^ wind); SendTriangle(altCullType, &data_[0], provoking_index); } + + // If this is from immediate-mode drawing, we always had one new vert (already in data_.) + if (isImmDraw_ && data_index_ >= 3) { + int provoking_index = (data_index_ - 1) % 3; + int wind = (data_index_ - 1) % 2; + CullType altCullType = cullType == CullType::OFF ? cullType : CullType((int)cullType ^ wind); + SendTriangle(altCullType, &data_[0], provoking_index); + } break; } @@ -747,64 +754,47 @@ void TransformUnit::SubmitPrimitive(const void* vertices, const void* indices, G int start_vtx = 0; // Only read the central vertex if we're not continuing. - if (data_index_ == 0) { - if (indices) { - vreader.Goto(ConvertIndex(0) - index_lower_bound); - } else { - vreader.Goto(0); - } - data_[0] = ReadVertex(vreader, transformState, outside_range_flag); + if (data_index_ == 0 && vertex_count > 0) { + data_[0] = readVertexAt(vreader, transformState, 0); data_index_++; start_vtx = 1; - - // If the central vertex is outside range, all the points are toast. - if (outside_range_flag) - break; } if (data_index_ == 1 && vertex_count == 4 && cullType == CullType::OFF) { for (int vtx = start_vtx; vtx < vertex_count; ++vtx) { - if (indices) { - vreader.Goto(ConvertIndex(vtx) - index_lower_bound); - } else { - vreader.Goto(vtx); - } - data_[vtx] = ReadVertex(vreader, transformState, outside_range_flag); + data_[vtx] = readVertexAt(vreader, transformState, vtx); } int tl = -1, br = -1; - if (!outside_range_flag && Rasterizer::DetectRectangleFromFan(binner_->State(), data_, vertex_count, &tl, &br)) { + if (Rasterizer::DetectRectangleFromFan(binner_->State(), data_, vertex_count, &tl, &br)) { Clipper::ProcessRect(data_[tl], data_[br], *binner_); break; } } - outside_range_flag = false; - for (int vtx = start_vtx; vtx < vertex_count; ++vtx) { - if (indices) { - vreader.Goto(ConvertIndex(vtx) - index_lower_bound); - } else { - vreader.Goto(vtx); - } - + for (int vtx = start_vtx; vtx < vertex_count && skip_count > 0; ++vtx) { int provoking_index = 2 - ((data_index_++) % 2); - data_[provoking_index] = ReadVertex(vreader, transformState, outside_range_flag); - if (outside_range_flag) { - // Drop all primitives containing the current vertex - skip_count = 2; - outside_range_flag = false; - continue; - } + data_[provoking_index] = readVertexAt(vreader, transformState, vtx); + --skip_count; + ++start_vtx; + } - if (skip_count) { - --skip_count; - continue; - } + for (int vtx = start_vtx; vtx < vertex_count; ++vtx) { + int provoking_index = 2 - ((data_index_++) % 2); + data_[provoking_index] = readVertexAt(vreader, transformState, vtx); int wind = (data_index_ - 1) % 2; CullType altCullType = cullType == CullType::OFF ? cullType : CullType((int)cullType ^ wind); SendTriangle(altCullType, &data_[0], provoking_index); } + + // If this is from immediate-mode drawing, we always had one new vert (already in data_.) + if (isImmDraw_ && data_index_ >= 3) { + int wind = (data_index_ - 1) % 2; + int provoking_index = 2 - wind; + CullType altCullType = cullType == CullType::OFF ? cullType : CullType((int)cullType ^ wind); + SendTriangle(altCullType, &data_[0], provoking_index); + } break; } @@ -814,6 +804,47 @@ void TransformUnit::SubmitPrimitive(const void* vertices, const void* indices, G } } +void TransformUnit::SubmitImmVertex(const VertexData &vert, SoftwareDrawEngine *drawEngine) { + // Where we put it is different for STRIP/FAN types. + switch (prev_prim_) { + case GE_PRIM_POINTS: + case GE_PRIM_LINES: + case GE_PRIM_TRIANGLES: + case GE_PRIM_RECTANGLES: + // This is the easy one. SubmitPrimitive resets data_index_. + data_[data_index_++] = vert; + break; + + case GE_PRIM_LINE_STRIP: + // This one alternates, and data_index_ > 0 means it draws a segment. + data_[(data_index_++) & 1] = vert; + break; + + case GE_PRIM_TRIANGLE_STRIP: + data_[(data_index_++) % 3] = vert; + break; + + case GE_PRIM_TRIANGLE_FAN: + if (data_index_ == 0) { + data_[data_index_++] = vert; + } else { + int provoking_index = 2 - ((data_index_++) % 2); + data_[provoking_index] = vert; + } + break; + + default: + _assert_msg_(false, "Invalid prim type: %d", (int)prev_prim_); + break; + } + + uint32_t vertTypeID = GetVertTypeID(gstate.vertType | GE_VTYPE_POS_FLOAT, gstate.getUVGenMode()); + // This now processes the step with shared logic, given the existing data_. + isImmDraw_ = true; + SubmitPrimitive(nullptr, nullptr, GE_PRIM_KEEP_PREVIOUS, 0, vertTypeID, nullptr, drawEngine); + isImmDraw_ = false; +} + void TransformUnit::SendTriangle(CullType cullType, const VertexData *verts, int provoking) { if (cullType == CullType::OFF) { Clipper::ProcessTriangle(verts[0], verts[1], verts[2], verts[provoking], *binner_); @@ -826,8 +857,12 @@ void TransformUnit::SendTriangle(CullType cullType, const VertexData *verts, int } void TransformUnit::Flush(const char *reason) { + if (!hasDraws_) + return; + binner_->Flush(reason); GPUDebug::NotifyDraw(); + hasDraws_ = false; } void TransformUnit::GetStats(char *buffer, size_t bufsize) { @@ -836,6 +871,9 @@ void TransformUnit::GetStats(char *buffer, size_t bufsize) { } void TransformUnit::FlushIfOverlap(const char *reason, bool modifying, uint32_t addr, uint32_t stride, uint32_t w, uint32_t h) { + if (!hasDraws_) + return; + if (binner_->HasPendingWrite(addr, stride, w, h)) Flush(reason); if (modifying && binner_->HasPendingRead(addr, stride, w, h)) diff --git a/GPU/Software/TransformUnit.h b/GPU/Software/TransformUnit.h index fd6e047350..465f4bd8ab 100644 --- a/GPU/Software/TransformUnit.h +++ b/GPU/Software/TransformUnit.h @@ -90,6 +90,10 @@ struct VertexData { color1 = LerpInt, 256>(Vec3::FromRGB(a.color1), Vec3::FromRGB(b.color1), t_int).ToRGB(); } + bool OutsideRange() const { + return screenpos.x == 0x7FFFFFFF; + } + ClipCoords clippos; Vec2 texturecoords; uint32_t color0; @@ -125,6 +129,7 @@ public: static ScreenCoords DrawingToScreen(const DrawingCoords &coords, u16 z); void SubmitPrimitive(const void* vertices, const void* indices, GEPrimitiveType prim_type, int vertex_count, u32 vertex_type, int *bytesRead, SoftwareDrawEngine *drawEngine); + void SubmitImmVertex(const VertexData &vert, SoftwareDrawEngine *drawEngine); bool GetCurrentSimpleVertices(int count, std::vector &vertices, std::vector &indices); @@ -138,7 +143,7 @@ public: SoftDirty GetDirty(); private: - VertexData ReadVertex(VertexReader &vreader, const TransformState &lstate, bool &outside_range_flag); + VertexData ReadVertex(VertexReader &vreader, const TransformState &state); void SendTriangle(CullType cullType, const VertexData *verts, int provoking = 2); u8 *decoded_ = nullptr; @@ -149,6 +154,8 @@ private: // This is the index of the next vert in data (or higher, may need modulus.) int data_index_ = 0; GEPrimitiveType prev_prim_ = GE_PRIM_POINTS; + bool hasDraws_ = false; + bool isImmDraw_ = false; }; class SoftwareDrawEngine : public DrawEngineCommon { @@ -158,7 +165,7 @@ public: void DispatchFlush() override; void DispatchSubmitPrim(const void *verts, const void *inds, GEPrimitiveType prim, int vertexCount, u32 vertType, int cullMode, int *bytesRead) override; - void DispatchSubmitImm(const void *verts, const void *inds, GEPrimitiveType prim, int vertexCount, u32 vertTypeID, int cullMode, int *bytesRead) override; + void DispatchSubmitImm(GEPrimitiveType prim, TransformedVertex *buffer, int vertexCount, int cullMode, bool continuation) override; VertexDecoder *FindVertexDecoder(u32 vtype); diff --git a/GPU/Vulkan/DrawEngineVulkan.cpp b/GPU/Vulkan/DrawEngineVulkan.cpp index e8ebd485d1..f992df082f 100644 --- a/GPU/Vulkan/DrawEngineVulkan.cpp +++ b/GPU/Vulkan/DrawEngineVulkan.cpp @@ -71,6 +71,7 @@ enum { DRAW_BINDING_TESS_STORAGE_BUF = 6, DRAW_BINDING_TESS_STORAGE_BUF_WU = 7, DRAW_BINDING_TESS_STORAGE_BUF_WV = 8, + DRAW_BINDING_INPUT_ATTACHMENT = 9, }; enum { @@ -94,7 +95,10 @@ DrawEngineVulkan::DrawEngineVulkan(Draw::DrawContext *draw) void DrawEngineVulkan::InitDeviceObjects() { // All resources we need for PSP drawing. Usually only bindings 0 and 2-4 are populated. - VkDescriptorSetLayoutBinding bindings[9]{}; + + // TODO: Make things more flexible, so we at least have specialized layouts for input attachments and tess. + // Note that it becomes a support matrix.. + VkDescriptorSetLayoutBinding bindings[10]{}; bindings[0].descriptorCount = 1; bindings[0].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; bindings[0].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; @@ -132,6 +136,10 @@ void DrawEngineVulkan::InitDeviceObjects() { bindings[8].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; bindings[8].stageFlags = VK_SHADER_STAGE_VERTEX_BIT; bindings[8].binding = DRAW_BINDING_TESS_STORAGE_BUF_WV; + bindings[9].descriptorCount = 1; + bindings[9].descriptorType = VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT; + bindings[9].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; + bindings[9].binding = DRAW_BINDING_INPUT_ATTACHMENT; VulkanContext *vulkan = (VulkanContext *)draw_->GetNativeObject(Draw::NativeObject::CONTEXT); VkDevice device = vulkan->GetDevice(); @@ -145,13 +153,15 @@ void DrawEngineVulkan::InitDeviceObjects() { static constexpr int DEFAULT_DESC_POOL_SIZE = 512; std::vector dpTypes; - dpTypes.resize(3); + dpTypes.resize(4); dpTypes[0].descriptorCount = DEFAULT_DESC_POOL_SIZE * 3; dpTypes[0].type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC; dpTypes[1].descriptorCount = DEFAULT_DESC_POOL_SIZE * 3; // Don't use these for tess anymore, need max three per set. dpTypes[1].type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; dpTypes[2].descriptorCount = DEFAULT_DESC_POOL_SIZE * 3; // TODO: Use a separate layout when no spline stuff is needed to reduce the need for these. dpTypes[2].type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; + dpTypes[3].descriptorCount = DEFAULT_DESC_POOL_SIZE; // TODO: Use a separate layout when no spline stuff is needed to reduce the need for these. + dpTypes[3].type = VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT; VkDescriptorPoolCreateInfo dp{ VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO }; // Don't want to mess around with individually freeing these. @@ -379,6 +389,7 @@ VkDescriptorSet DrawEngineVulkan::GetOrCreateDescriptorSet(VkImageView imageView key.base_ = base; key.light_ = light; key.bone_ = bone; + key.secondaryIsInputAttachment = boundSecondaryIsInputAttachment_; FrameData &frame = GetCurFrame(); // See if we already have this descriptor set cached. @@ -417,15 +428,15 @@ VkDescriptorSet DrawEngineVulkan::GetOrCreateDescriptorSet(VkImageView imageView } if (boundSecondary_) { - tex[1].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + tex[1].imageLayout = key.secondaryIsInputAttachment ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; tex[1].imageView = boundSecondary_; tex[1].sampler = samplerSecondaryNearest_; writes[n].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; writes[n].pNext = nullptr; - writes[n].dstBinding = DRAW_BINDING_2ND_TEXTURE; + writes[n].dstBinding = key.secondaryIsInputAttachment ? DRAW_BINDING_INPUT_ATTACHMENT : DRAW_BINDING_2ND_TEXTURE; writes[n].pImageInfo = &tex[1]; writes[n].descriptorCount = 1; - writes[n].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; + writes[n].descriptorType = key.secondaryIsInputAttachment ? VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT : VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; writes[n].dstSet = desc; n++; } @@ -788,7 +799,7 @@ void DrawEngineVulkan::DoFlush() { lastRenderStepId_ = curRenderStepId; } - renderManager->BindPipeline(pipeline->pipeline, (PipelineFlags)pipeline->flags, pipelineLayout_); + renderManager->BindPipeline(pipeline->pipeline, pipeline->pipelineFlags, pipelineLayout_); if (pipeline != lastPipeline_) { if (lastPipeline_ && !(lastPipeline_->UsesBlendConstant() && pipeline->UsesBlendConstant())) { gstate_c.Dirty(DIRTY_BLEND_STATE); @@ -916,7 +927,7 @@ void DrawEngineVulkan::DoFlush() { lastRenderStepId_ = curRenderStepId; } - renderManager->BindPipeline(pipeline->pipeline, (PipelineFlags)pipeline->flags, pipelineLayout_); + renderManager->BindPipeline(pipeline->pipeline, pipeline->pipelineFlags, pipelineLayout_); if (pipeline != lastPipeline_) { if (lastPipeline_ && !lastPipeline_->UsesBlendConstant() && pipeline->UsesBlendConstant()) { gstate_c.Dirty(DIRTY_BLEND_STATE); diff --git a/GPU/Vulkan/DrawEngineVulkan.h b/GPU/Vulkan/DrawEngineVulkan.h index 531e05c4ed..0de67940de 100644 --- a/GPU/Vulkan/DrawEngineVulkan.h +++ b/GPU/Vulkan/DrawEngineVulkan.h @@ -217,6 +217,8 @@ private: // Secondary texture for shader blending VkImageView boundSecondary_ = VK_NULL_HANDLE; + bool boundSecondaryIsInputAttachment_ = false; + // CLUT texture for shader depal VkImageView boundDepal_ = VK_NULL_HANDLE; bool boundDepalSmoothed_ = false; @@ -234,6 +236,7 @@ private: VkSampler sampler_; VkBuffer base_, light_, bone_; // All three UBO slots will be set to this. This will usually be identical // for all draws in a frame, except when the buffer has to grow. + bool secondaryIsInputAttachment; }; // We alternate between these. @@ -281,7 +284,7 @@ private: VulkanDynamicState dynState_{}; int tessOffset_ = 0; - bool fboTexNeedsBind_ = false; + FBOTexState fboTexBindState_ = FBO_TEX_NONE; // Hardware tessellation TessellationDataTransferVulkan *tessDataTransferVulkan; diff --git a/GPU/Vulkan/FramebufferManagerVulkan.h b/GPU/Vulkan/FramebufferManagerVulkan.h index 0f5d7c4f53..d3370fafb7 100644 --- a/GPU/Vulkan/FramebufferManagerVulkan.h +++ b/GPU/Vulkan/FramebufferManagerVulkan.h @@ -33,7 +33,7 @@ class VulkanPushBuffer; class FramebufferManagerVulkan : public FramebufferManagerCommon { public: - FramebufferManagerVulkan(Draw::DrawContext *draw); + explicit FramebufferManagerVulkan(Draw::DrawContext *draw); ~FramebufferManagerVulkan(); // If within a render pass, this will just issue a regular clear. If beginning a new render pass, diff --git a/GPU/Vulkan/GPU_Vulkan.cpp b/GPU/Vulkan/GPU_Vulkan.cpp index f96d282fc7..b38201c8c5 100644 --- a/GPU/Vulkan/GPU_Vulkan.cpp +++ b/GPU/Vulkan/GPU_Vulkan.cpp @@ -52,7 +52,7 @@ GPU_Vulkan::GPU_Vulkan(GraphicsContext *gfxCtx, Draw::DrawContext *draw) : GPUCommon(gfxCtx, draw), drawEngine_(draw) { - CheckGPUFeatures(); + gstate_c.featureFlags = CheckGPUFeatures(); VulkanContext *vulkan = (VulkanContext *)gfxCtx->GetAPIContext(); @@ -182,8 +182,8 @@ GPU_Vulkan::~GPU_Vulkan() { delete framebufferManagerVulkan_; } -void GPU_Vulkan::CheckGPUFeatures() { - uint32_t features = 0; +u32 GPU_Vulkan::CheckGPUFeatures() const { + uint32_t features = GPUCommon::CheckGPUFeatures(); VulkanContext *vulkan = (VulkanContext *)draw_->GetNativeObject(Draw::NativeObject::CONTEXT); switch (vulkan->GetPhysicalDeviceProperties().properties.vendorID) { @@ -222,44 +222,14 @@ void GPU_Vulkan::CheckGPUFeatures() { // Mandatory features on Vulkan, which may be checked in "centralized" code features |= GPU_SUPPORTS_TEXTURE_LOD_CONTROL; - features |= GPU_SUPPORTS_BLEND_MINMAX; - features |= GPU_SUPPORTS_TEXTURE_NPOT; features |= GPU_SUPPORTS_INSTANCE_RENDERING; features |= GPU_SUPPORTS_VERTEX_TEXTURE_FETCH; features |= GPU_SUPPORTS_TEXTURE_FLOAT; - features |= GPU_SUPPORTS_DEPTH_TEXTURE; auto &enabledFeatures = vulkan->GetDeviceFeatures().enabled; if (enabledFeatures.depthClamp) { features |= GPU_SUPPORTS_DEPTH_CLAMP; } - if (enabledFeatures.shaderClipDistance) { - features |= GPU_SUPPORTS_CLIP_DISTANCE; - } - if (enabledFeatures.shaderCullDistance) { - // Must support at least 8 if feature supported, so we're fine. - features |= GPU_SUPPORTS_CULL_DISTANCE; - } - if (!draw_->GetBugs().Has(Draw::Bugs::BROKEN_NAN_IN_CONDITIONAL)) { - // Ignore the compat setting if clip and cull are both enabled. - // When supported, we can do the depth side of range culling more correctly. - const bool supported = draw_->GetDeviceCaps().clipDistanceSupported && draw_->GetDeviceCaps().cullDistanceSupported; - const bool disabled = PSP_CoreParameter().compat.flags().DisableRangeCulling; - if (supported || !disabled) { - features |= GPU_SUPPORTS_VS_RANGE_CULLING; - } - } - if (enabledFeatures.dualSrcBlend) { - if (!g_Config.bVendorBugChecksEnabled || !draw_->GetBugs().Has(Draw::Bugs::DUAL_SOURCE_BLENDING_BROKEN)) { - features |= GPU_SUPPORTS_DUALSOURCE_BLEND; - } - } - if (draw_->GetDeviceCaps().logicOpSupported) { - features |= GPU_SUPPORTS_LOGIC_OP; - } - if (draw_->GetDeviceCaps().anisoSupported) { - features |= GPU_SUPPORTS_ANISOTROPY; - } // These are VULKAN_4444_FORMAT and friends. uint32_t fmt4444 = draw_->GetDataFormatSupport(Draw::DataFormat::B4G4R4A4_UNORM_PACK16); @@ -275,10 +245,6 @@ void GPU_Vulkan::CheckGPUFeatures() { INFO_LOG(G3D, "Deficient texture format support: 4444: %d 1555: %d 565: %d", fmt4444, fmt1555, fmt565); } - if (PSP_CoreParameter().compat.flags().ClearToRAM) { - features |= GPU_USE_CLEAR_RAM_HACK; - } - if (!g_Config.bHighQualityDepth && (features & GPU_SUPPORTS_ACCURATE_DEPTH) != 0) { features |= GPU_SCALE_DEPTH_FROM_24BIT_TO_16BIT; } @@ -290,7 +256,7 @@ void GPU_Vulkan::CheckGPUFeatures() { features |= GPU_ROUND_DEPTH_TO_16BIT; } - gstate_c.featureFlags = features; + return features; } void GPU_Vulkan::BeginHostFrame() { @@ -298,7 +264,7 @@ void GPU_Vulkan::BeginHostFrame() { UpdateCmdInfo(); if (resized_) { - CheckGPUFeatures(); + gstate_c.featureFlags = CheckGPUFeatures(); // In case the GPU changed. BuildReportingInfo(); framebufferManager_->Resized(); @@ -537,7 +503,7 @@ void GPU_Vulkan::DeviceRestore() { GPUCommon::DeviceRestore(); InitDeviceObjects(); - CheckGPUFeatures(); + gstate_c.featureFlags = CheckGPUFeatures(); BuildReportingInfo(); UpdateCmdInfo(); diff --git a/GPU/Vulkan/GPU_Vulkan.h b/GPU/Vulkan/GPU_Vulkan.h index 3c13d57621..3fbd329aae 100644 --- a/GPU/Vulkan/GPU_Vulkan.h +++ b/GPU/Vulkan/GPU_Vulkan.h @@ -38,7 +38,7 @@ public: ~GPU_Vulkan(); // This gets called on startup and when we get back from settings. - void CheckGPUFeatures() override; + u32 CheckGPUFeatures() const override; bool IsReady() override; void CancelReady() override; diff --git a/GPU/Vulkan/PipelineManagerVulkan.cpp b/GPU/Vulkan/PipelineManagerVulkan.cpp index a88fe235a5..e950dfc457 100644 --- a/GPU/Vulkan/PipelineManagerVulkan.cpp +++ b/GPU/Vulkan/PipelineManagerVulkan.cpp @@ -170,7 +170,7 @@ static std::string CutFromMain(std::string str) { } static VulkanPipeline *CreateVulkanPipeline(VulkanRenderManager *renderManager, VkPipelineCache pipelineCache, - VkPipelineLayout layout, const VulkanPipelineRasterStateKey &key, + VkPipelineLayout layout, PipelineFlags pipelineFlags, const VulkanPipelineRasterStateKey &key, const DecVtxFormat *decFmt, VulkanVertexShader *vs, VulkanFragmentShader *fs, bool useHwTransform, u32 variantBitmask) { VulkanPipeline *vulkanPipeline = new VulkanPipeline(); VKRGraphicsPipelineDesc *desc = &vulkanPipeline->desc; @@ -299,14 +299,14 @@ static VulkanPipeline *CreateVulkanPipeline(VulkanRenderManager *renderManager, VKRGraphicsPipeline *pipeline = renderManager->CreateGraphicsPipeline(desc, variantBitmask, "game"); vulkanPipeline->pipeline = pipeline; - vulkanPipeline->flags = 0; if (useBlendConstant) - vulkanPipeline->flags |= PIPELINE_FLAG_USES_BLEND_CONSTANT; + pipelineFlags |= PipelineFlags::USES_BLEND_CONSTANT; if (key.topology == VK_PRIMITIVE_TOPOLOGY_LINE_LIST || key.topology == VK_PRIMITIVE_TOPOLOGY_LINE_STRIP) - vulkanPipeline->flags |= PIPELINE_FLAG_USES_LINES; + pipelineFlags |= PipelineFlags::USES_LINES; if (dss.depthTestEnable || dss.stencilTestEnable) { - vulkanPipeline->flags |= PIPELINE_FLAG_USES_DEPTH_STENCIL; + pipelineFlags |= PipelineFlags::USES_DEPTH_STENCIL; } + vulkanPipeline->pipelineFlags = pipelineFlags; return vulkanPipeline; } @@ -329,8 +329,13 @@ VulkanPipeline *PipelineManagerVulkan::GetOrCreatePipeline(VulkanRenderManager * if (iter) return iter; + PipelineFlags pipelineFlags = (PipelineFlags)0; + if (fs->Flags() & FragmentShaderFlags::INPUT_ATTACHMENT) { + pipelineFlags |= PipelineFlags::USES_INPUT_ATTACHMENT; + } + VulkanPipeline *pipeline = CreateVulkanPipeline( - renderManager, pipelineCache_, layout, + renderManager, pipelineCache_, layout, pipelineFlags, rasterKey, decFmt, vs, fs, useHwTransform, variantBitmask); pipelines_.Insert(key, pipeline); diff --git a/GPU/Vulkan/PipelineManagerVulkan.h b/GPU/Vulkan/PipelineManagerVulkan.h index af32aa81bd..08907e3b3e 100644 --- a/GPU/Vulkan/PipelineManagerVulkan.h +++ b/GPU/Vulkan/PipelineManagerVulkan.h @@ -55,11 +55,12 @@ struct VulkanPipelineKey { struct VulkanPipeline { VKRGraphicsPipeline *pipeline; VKRGraphicsPipelineDesc desc; - int flags; // PipelineFlags enum above. + PipelineFlags pipelineFlags; // PipelineFlags enum above. - bool UsesBlendConstant() const { return (flags & PIPELINE_FLAG_USES_BLEND_CONSTANT) != 0; } - bool UsesLines() const { return (flags & PIPELINE_FLAG_USES_LINES) != 0; } - bool UsesDepthStencil() const { return (flags & PIPELINE_FLAG_USES_DEPTH_STENCIL) != 0; } + bool UsesBlendConstant() const { return (pipelineFlags & PipelineFlags::USES_BLEND_CONSTANT) != 0; } + bool UsesLines() const { return (pipelineFlags & PipelineFlags::USES_LINES) != 0; } + bool UsesDepthStencil() const { return (pipelineFlags & PipelineFlags::USES_DEPTH_STENCIL) != 0; } + bool UsesInputAttachment() const { return (pipelineFlags & PipelineFlags::USES_INPUT_ATTACHMENT) != 0; } u32 GetVariantsBitmask() const; }; diff --git a/GPU/Vulkan/StateMappingVulkan.cpp b/GPU/Vulkan/StateMappingVulkan.cpp index 6457b99ad9..290c7b010c 100644 --- a/GPU/Vulkan/StateMappingVulkan.cpp +++ b/GPU/Vulkan/StateMappingVulkan.cpp @@ -153,7 +153,7 @@ void DrawEngineVulkan::ConvertStateToVulkanKey(FramebufferManagerVulkan &fbManag GenericLogicState &logicState = pipelineState_.logicState; if (pipelineState_.FramebufferRead()) { - ApplyFramebufferRead(&fboTexNeedsBind_); + ApplyFramebufferRead(&fboTexBindState_); // The shader takes over the responsibility for blending, so recompute. // We might still end up using blend to write something to alpha. ApplyStencilReplaceAndLogicOpIgnoreBlend(blendState.replaceAlphaWithStencil, blendState); @@ -364,15 +364,23 @@ void DrawEngineVulkan::BindShaderBlendTex() { // TODO: At this point, we know if the vertices are full alpha or not. // Set the nearest/linear here (since we correctly know if alpha/color tests are needed)? if (!gstate.isModeClear()) { - if (fboTexNeedsBind_) { + if (fboTexBindState_ == FBO_TEX_COPY_BIND_TEX) { bool bindResult = framebufferManager_->BindFramebufferAsColorTexture(1, framebufferManager_->GetCurrentRenderVFB(), BINDFBCOLOR_MAY_COPY); _dbg_assert_(bindResult); boundSecondary_ = (VkImageView)draw_->GetNativeObject(Draw::NativeObject::BOUND_TEXTURE1_IMAGEVIEW); + boundSecondaryIsInputAttachment_ = false; fboTexBound_ = true; - fboTexNeedsBind_ = false; + fboTexBindState_ = FBO_TEX_NONE; // Must dirty blend state here so we re-copy next time. Example: Lunar's spell effects. dirtyRequiresRecheck_ |= DIRTY_BLEND_STATE; + } else if (fboTexBindState_ == FBO_TEX_READ_FRAMEBUFFER) { + draw_->BindCurrentFramebufferForColorInput(); + boundSecondary_ = (VkImageView)draw_->GetNativeObject(Draw::NativeObject::BOUND_FRAMEBUFFER_COLOR_IMAGEVIEW); + boundSecondaryIsInputAttachment_ = true; + fboTexBindState_ = FBO_TEX_NONE; + } else { + boundSecondary_ = VK_NULL_HANDLE; } } } diff --git a/SDL/SDLVulkanGraphicsContext.cpp b/SDL/SDLVulkanGraphicsContext.cpp index 433f35b03f..b7eb932b11 100644 --- a/SDL/SDLVulkanGraphicsContext.cpp +++ b/SDL/SDLVulkanGraphicsContext.cpp @@ -116,7 +116,7 @@ bool SDLVulkanGraphicsContext::Init(SDL_Window *&window, int x, int y, int mode, return false; } - draw_ = Draw::T3DCreateVulkanContext(vulkan_, false); + draw_ = Draw::T3DCreateVulkanContext(vulkan_); SetGPUBackend(GPUBackend::VULKAN); bool success = draw_->CreatePresets(); _assert_(success); diff --git a/UI/ComboKeyMappingScreen.cpp b/UI/ComboKeyMappingScreen.cpp index 3a4bc277d1..d4c8a814b6 100644 --- a/UI/ComboKeyMappingScreen.cpp +++ b/UI/ComboKeyMappingScreen.cpp @@ -56,6 +56,8 @@ public: parent->Add(scroll); } + const char *tag() const override { return "ButtonShape"; } + private: int *setting_; }; @@ -84,6 +86,8 @@ public: parent->Add(scroll); } + const char *tag() const override { return "ButtonIcon"; } + private: int *setting_; }; diff --git a/UI/ComboKeyMappingScreen.h b/UI/ComboKeyMappingScreen.h index 6eb271211d..c2ed6396d8 100644 --- a/UI/ComboKeyMappingScreen.h +++ b/UI/ComboKeyMappingScreen.h @@ -28,6 +28,8 @@ class ComboKeyScreen : public UIDialogScreenWithBackground { public: ComboKeyScreen(int id): id_(id) {} + const char *tag() const override { return "ComboKey"; } + void CreateViews() override; void onFinish(DialogResult result) override; diff --git a/UI/ControlMappingScreen.cpp b/UI/ControlMappingScreen.cpp index 7acdd36f05..f6255ad4e3 100644 --- a/UI/ControlMappingScreen.cpp +++ b/UI/ControlMappingScreen.cpp @@ -313,7 +313,7 @@ UI::EventReturn ControlMappingScreen::OnVisualizeMapping(UI::EventParams ¶ms } void ControlMappingScreen::dialogFinished(const Screen *dialog, DialogResult result) { - if (result == DR_OK && dialog->tag() == "listpopup") { + if (result == DR_OK && std::string(dialog->tag()) == "listpopup") { ListPopupScreen *popup = (ListPopupScreen *)dialog; KeyMap::AutoConfForPad(popup->GetChoiceString()); } diff --git a/UI/ControlMappingScreen.h b/UI/ControlMappingScreen.h index add89bfdfa..29f1360fbe 100644 --- a/UI/ControlMappingScreen.h +++ b/UI/ControlMappingScreen.h @@ -35,7 +35,7 @@ class SingleControlMapper; class ControlMappingScreen : public UIDialogScreenWithBackground { public: ControlMappingScreen() {} - std::string tag() const override { return "control mapping"; } + const char *tag() const override { return "ControlMapping"; } protected: void CreateViews() override; @@ -47,7 +47,7 @@ private: UI::EventReturn OnAutoConfigure(UI::EventParams ¶ms); UI::EventReturn OnVisualizeMapping(UI::EventParams ¶ms); - virtual void dialogFinished(const Screen *dialog, DialogResult result) override; + void dialogFinished(const Screen *dialog, DialogResult result) override; UI::ScrollView *rightScroll_; std::vector mappers_; @@ -61,17 +61,19 @@ public: pspBtn_ = btn; } - virtual bool key(const KeyInput &key) override; - virtual bool axis(const AxisInput &axis) override; + const char *tag() const override { return "KeyMappingNewKey"; } + + bool key(const KeyInput &key) override; + bool axis(const AxisInput &axis) override; void SetDelay(float t); protected: void CreatePopupContents(UI::ViewGroup *parent) override; - virtual bool FillVertical() const override { return false; } - virtual bool ShowButtons() const override { return true; } - virtual void OnCompleted(DialogResult result) override {} + bool FillVertical() const override { return false; } + bool ShowButtons() const override { return true; } + void OnCompleted(DialogResult result) override {} private: int pspBtn_; @@ -87,6 +89,8 @@ public: pspBtn_ = btn; } + const char *tag() const override { return "KeyMappingNewMouseKey"; } + bool key(const KeyInput &key) override; bool axis(const AxisInput &axis) override; @@ -114,6 +118,8 @@ public: void update() override; + const char *tag() const override { return "AnalogSetup"; } + protected: void CreateViews() override; @@ -144,6 +150,8 @@ public: bool key(const KeyInput &key) override; bool axis(const AxisInput &axis) override; + const char *tag() const override { return "TouchTest"; } + protected: struct TrackedTouch { int id; @@ -171,6 +179,8 @@ class VisualMappingScreen : public UIDialogScreenWithBackground { public: VisualMappingScreen() {} + const char *tag() const override { return "VisualMapping"; } + protected: void CreateViews() override; diff --git a/UI/CwCheatScreen.h b/UI/CwCheatScreen.h index e2e167c771..27a28a1b51 100644 --- a/UI/CwCheatScreen.h +++ b/UI/CwCheatScreen.h @@ -41,6 +41,8 @@ public: void update() override; void onFinish(DialogResult result) override; + const char *tag() const override { return "CwCheat"; } + protected: void CreateViews() override; diff --git a/UI/DevScreens.cpp b/UI/DevScreens.cpp index a804eb98f0..ce5bd3fada 100644 --- a/UI/DevScreens.cpp +++ b/UI/DevScreens.cpp @@ -85,7 +85,7 @@ static const char *logLevelList[] = { "Verb." }; -void DevMenu::CreatePopupContents(UI::ViewGroup *parent) { +void DevMenuScreen::CreatePopupContents(UI::ViewGroup *parent) { using namespace UI; auto dev = GetI18NCategory("Developer"); auto sy = GetI18NCategory("System"); @@ -94,25 +94,25 @@ void DevMenu::CreatePopupContents(UI::ViewGroup *parent) { LinearLayout *items = new LinearLayout(ORIENT_VERTICAL); #if !defined(MOBILE_DEVICE) - items->Add(new Choice(dev->T("Log View")))->OnClick.Handle(this, &DevMenu::OnLogView); + items->Add(new Choice(dev->T("Log View")))->OnClick.Handle(this, &DevMenuScreen::OnLogView); #endif - items->Add(new Choice(dev->T("Logging Channels")))->OnClick.Handle(this, &DevMenu::OnLogConfig); - items->Add(new Choice(sy->T("Developer Tools")))->OnClick.Handle(this, &DevMenu::OnDeveloperTools); - items->Add(new Choice(dev->T("Jit Compare")))->OnClick.Handle(this, &DevMenu::OnJitCompare); - items->Add(new Choice(dev->T("Shader Viewer")))->OnClick.Handle(this, &DevMenu::OnShaderView); + items->Add(new Choice(dev->T("Logging Channels")))->OnClick.Handle(this, &DevMenuScreen::OnLogConfig); + items->Add(new Choice(sy->T("Developer Tools")))->OnClick.Handle(this, &DevMenuScreen::OnDeveloperTools); + items->Add(new Choice(dev->T("Jit Compare")))->OnClick.Handle(this, &DevMenuScreen::OnJitCompare); + items->Add(new Choice(dev->T("Shader Viewer")))->OnClick.Handle(this, &DevMenuScreen::OnShaderView); if (g_Config.iGPUBackend == (int)GPUBackend::VULKAN) { // TODO: Make a new allocator visualizer for VMA. // items->Add(new CheckBox(&g_Config.bShowAllocatorDebug, dev->T("Allocator Viewer"))); items->Add(new CheckBox(&g_Config.bShowGpuProfile, dev->T("GPU Profile"))); } - items->Add(new Choice(dev->T("Toggle Freeze")))->OnClick.Handle(this, &DevMenu::OnFreezeFrame); - items->Add(new Choice(dev->T("Dump Frame GPU Commands")))->OnClick.Handle(this, &DevMenu::OnDumpFrame); - items->Add(new Choice(dev->T("Toggle Audio Debug")))->OnClick.Handle(this, &DevMenu::OnToggleAudioDebug); + items->Add(new Choice(dev->T("Toggle Freeze")))->OnClick.Handle(this, &DevMenuScreen::OnFreezeFrame); + items->Add(new Choice(dev->T("Dump Frame GPU Commands")))->OnClick.Handle(this, &DevMenuScreen::OnDumpFrame); + items->Add(new Choice(dev->T("Toggle Audio Debug")))->OnClick.Handle(this, &DevMenuScreen::OnToggleAudioDebug); #ifdef USE_PROFILER items->Add(new CheckBox(&g_Config.bShowFrameProfiler, dev->T("Frame Profiler"), "")); #endif items->Add(new CheckBox(&g_Config.bDrawFrameGraph, dev->T("Draw Frametimes Graph"))); - items->Add(new Choice(dev->T("Reset limited logging")))->OnClick.Handle(this, &DevMenu::OnResetLimitedLogging); + items->Add(new Choice(dev->T("Reset limited logging")))->OnClick.Handle(this, &DevMenuScreen::OnResetLimitedLogging); scroll->Add(items); parent->Add(scroll); @@ -123,48 +123,48 @@ void DevMenu::CreatePopupContents(UI::ViewGroup *parent) { } } -UI::EventReturn DevMenu::OnToggleAudioDebug(UI::EventParams &e) { +UI::EventReturn DevMenuScreen::OnToggleAudioDebug(UI::EventParams &e) { g_Config.bShowAudioDebug = !g_Config.bShowAudioDebug; return UI::EVENT_DONE; } -UI::EventReturn DevMenu::OnResetLimitedLogging(UI::EventParams &e) { +UI::EventReturn DevMenuScreen::OnResetLimitedLogging(UI::EventParams &e) { Reporting::ResetCounts(); return UI::EVENT_DONE; } -UI::EventReturn DevMenu::OnLogView(UI::EventParams &e) { +UI::EventReturn DevMenuScreen::OnLogView(UI::EventParams &e) { UpdateUIState(UISTATE_PAUSEMENU); screenManager()->push(new LogScreen()); return UI::EVENT_DONE; } -UI::EventReturn DevMenu::OnLogConfig(UI::EventParams &e) { +UI::EventReturn DevMenuScreen::OnLogConfig(UI::EventParams &e) { UpdateUIState(UISTATE_PAUSEMENU); screenManager()->push(new LogConfigScreen()); return UI::EVENT_DONE; } -UI::EventReturn DevMenu::OnDeveloperTools(UI::EventParams &e) { +UI::EventReturn DevMenuScreen::OnDeveloperTools(UI::EventParams &e) { UpdateUIState(UISTATE_PAUSEMENU); screenManager()->push(new DeveloperToolsScreen()); return UI::EVENT_DONE; } -UI::EventReturn DevMenu::OnJitCompare(UI::EventParams &e) { +UI::EventReturn DevMenuScreen::OnJitCompare(UI::EventParams &e) { UpdateUIState(UISTATE_PAUSEMENU); screenManager()->push(new JitCompareScreen()); return UI::EVENT_DONE; } -UI::EventReturn DevMenu::OnShaderView(UI::EventParams &e) { +UI::EventReturn DevMenuScreen::OnShaderView(UI::EventParams &e) { UpdateUIState(UISTATE_PAUSEMENU); if (gpu) // Avoid crashing if chosen while the game is being loaded. screenManager()->push(new ShaderListScreen()); return UI::EVENT_DONE; } -UI::EventReturn DevMenu::OnFreezeFrame(UI::EventParams &e) { +UI::EventReturn DevMenuScreen::OnFreezeFrame(UI::EventParams &e) { if (PSP_CoreParameter().frozen) { PSP_CoreParameter().frozen = false; } else { @@ -173,12 +173,12 @@ UI::EventReturn DevMenu::OnFreezeFrame(UI::EventParams &e) { return UI::EVENT_DONE; } -UI::EventReturn DevMenu::OnDumpFrame(UI::EventParams &e) { +UI::EventReturn DevMenuScreen::OnDumpFrame(UI::EventParams &e) { gpu->DumpNextFrame(); return UI::EVENT_DONE; } -void DevMenu::dialogFinished(const Screen *dialog, DialogResult result) { +void DevMenuScreen::dialogFinished(const Screen *dialog, DialogResult result) { UpdateUIState(UISTATE_INGAME); // Close when a subscreen got closed. // TODO: a bug in screenmanager causes this not to work here. @@ -514,7 +514,15 @@ void SystemInfoScreen::CreateViews() { const std::string apiNameKey = draw->GetInfoString(InfoField::APINAME); const char *apiName = gr->T(apiNameKey); deviceSpecs->Add(new InfoItem(si->T("3D API"), apiName)); - deviceSpecs->Add(new InfoItem(si->T("Vendor"), draw->GetInfoString(InfoField::VENDORSTRING))); + + // TODO: Not really vendor, on most APIs it's a device name (GL calls it vendor though). + std::string vendorString; + if (draw->GetDeviceCaps().deviceID != 0) { + vendorString = StringFromFormat("%s (%08x)", draw->GetInfoString(InfoField::VENDORSTRING).c_str(), draw->GetDeviceCaps().deviceID); + } else { + vendorString = draw->GetInfoString(InfoField::VENDORSTRING); + } + deviceSpecs->Add(new InfoItem(si->T("Vendor"), vendorString)); std::string vendor = draw->GetInfoString(InfoField::VENDOR); if (vendor.size()) deviceSpecs->Add(new InfoItem(si->T("Vendor (detected)"), vendor)); diff --git a/UI/DevScreens.h b/UI/DevScreens.h index 4e7065307e..0df3d73ce3 100644 --- a/UI/DevScreens.h +++ b/UI/DevScreens.h @@ -28,9 +28,11 @@ #include "UI/MiscScreens.h" #include "GPU/Common/ShaderCommon.h" -class DevMenu : public PopupScreen { +class DevMenuScreen : public PopupScreen { public: - DevMenu(std::shared_ptr i18n) : PopupScreen(i18n->T("Dev Tools")) {} + DevMenuScreen(std::shared_ptr i18n) : PopupScreen(i18n->T("Dev Tools")) {} + + const char *tag() const override { return "DevMenu"; } void CreatePopupContents(UI::ViewGroup *parent) override; void dialogFinished(const Screen *dialog, DialogResult result) override; @@ -50,7 +52,9 @@ protected: class JitDebugScreen : public UIDialogScreenWithBackground { public: JitDebugScreen() {} - virtual void CreateViews() override; + void CreateViews() override; + + const char *tag() const override { return "JitDebug"; } private: UI::EventReturn OnEnableAll(UI::EventParams &e); @@ -60,7 +64,9 @@ private: class LogConfigScreen : public UIDialogScreenWithBackground { public: LogConfigScreen() {} - virtual void CreateViews() override; + void CreateViews() override; + + const char *tag() const override { return "LogConfig"; } private: UI::EventReturn OnToggleAll(UI::EventParams &e); @@ -76,6 +82,8 @@ public: void CreateViews() override; void update() override; + const char *tag() const override { return "Log"; } + private: void UpdateLog(); UI::EventReturn OnSubmit(UI::EventParams &e); @@ -89,14 +97,16 @@ class LogLevelScreen : public ListPopupScreen { public: LogLevelScreen(const std::string &title); -private: - virtual void OnCompleted(DialogResult result); + const char *tag() const override { return "LogLevel"; } +private: + void OnCompleted(DialogResult result) override; }; class SystemInfoScreen : public UIDialogScreenWithBackground { public: - SystemInfoScreen() {} + const char *tag() const override { return "SystemInfo"; } + void CreateViews() override; }; @@ -106,13 +116,15 @@ public: memset(buttons_, 0, sizeof(buttons_)); } - virtual bool key(const KeyInput &key) override; + const char *tag() const override { return "AddressPrompt"; } + + bool key(const KeyInput &key) override; UI::Event OnChoice; protected: - virtual void CreatePopupContents(UI::ViewGroup *parent) override; - virtual void OnCompleted(DialogResult result) override; + void CreatePopupContents(UI::ViewGroup *parent) override; + void OnCompleted(DialogResult result) override; UI::EventReturn OnDigitButton(UI::EventParams &e); UI::EventReturn OnBackspace(UI::EventParams &e); @@ -128,8 +140,9 @@ private: class JitCompareScreen : public UIDialogScreenWithBackground { public: - JitCompareScreen() : currentBlock_(-1) {} - virtual void CreateViews() override; + void CreateViews() override; + + const char *tag() const override { return "JitCompare"; } private: void UpdateDisasm(); @@ -146,7 +159,7 @@ private: UI::EventReturn OnAddressChange(UI::EventParams &e); UI::EventReturn OnShowStats(UI::EventParams &e); - int currentBlock_; + int currentBlock_ = -1; UI::TextView *blockName_; UI::TextEdit *blockAddr_; @@ -158,9 +171,10 @@ private: class ShaderListScreen : public UIDialogScreenWithBackground { public: - ShaderListScreen() {} void CreateViews() override; + const char *tag() const override { return "ShaderList"; } + private: int ListShaders(DebugShaderType shaderType, UI::LinearLayout *view); @@ -175,6 +189,9 @@ public: : id_(id), type_(type) {} void CreateViews() override; + + const char *tag() const override { return "ShaderView"; } + private: std::string id_; DebugShaderType type_; @@ -188,6 +205,8 @@ public: void CreateViews() override; void update() override; + const char *tag() const override { return "FrameDumpTest"; } + private: UI::EventReturn OnLoadDump(UI::EventParams &e); diff --git a/UI/DisplayLayoutScreen.h b/UI/DisplayLayoutScreen.h index 15835258eb..06730742d4 100644 --- a/UI/DisplayLayoutScreen.h +++ b/UI/DisplayLayoutScreen.h @@ -31,7 +31,7 @@ public: virtual void dialogFinished(const Screen *dialog, DialogResult result) override; virtual void onFinish(DialogResult reason) override; virtual void resized() override; - std::string tag() const override { return "display layout screen"; } + const char *tag() const override { return "DisplayLayout"; } protected: virtual UI::EventReturn OnCenter(UI::EventParams &e); diff --git a/UI/EmuScreen.cpp b/UI/EmuScreen.cpp index 1b98b91bb8..14ee1ced6c 100644 --- a/UI/EmuScreen.cpp +++ b/UI/EmuScreen.cpp @@ -955,7 +955,7 @@ void EmuScreen::CreateViews() { UI::EventReturn EmuScreen::OnDevTools(UI::EventParams ¶ms) { auto dev = GetI18NCategory("Developer"); - DevMenu *devMenu = new DevMenu(dev); + DevMenuScreen *devMenu = new DevMenuScreen(dev); if (params.v) devMenu->SetPopupOrigin(params.v); screenManager()->push(devMenu); diff --git a/UI/EmuScreen.h b/UI/EmuScreen.h index ff41bd81a4..1fd41ffb9d 100644 --- a/UI/EmuScreen.h +++ b/UI/EmuScreen.h @@ -40,6 +40,8 @@ public: EmuScreen(const Path &filename); ~EmuScreen(); + const char *tag() const override { return "Emu"; } + void update() override; void render() override; void preRender() override; diff --git a/UI/GPUDriverTestScreen.h b/UI/GPUDriverTestScreen.h index f2c5796be1..38ec2c33e1 100644 --- a/UI/GPUDriverTestScreen.h +++ b/UI/GPUDriverTestScreen.h @@ -17,6 +17,8 @@ public: void CreateViews() override; void render() override; + const char *tag() const override { return "GPUDriverTest"; } + private: void DiscardTest(); void ShaderTest(); diff --git a/UI/GameScreen.cpp b/UI/GameScreen.cpp index 15d4073f98..edf519f46b 100644 --- a/UI/GameScreen.cpp +++ b/UI/GameScreen.cpp @@ -443,6 +443,7 @@ UI::EventReturn GameScreen::OnRemoveFromRecent(UI::EventParams &e) { class SetBackgroundPopupScreen : public PopupScreen { public: SetBackgroundPopupScreen(const std::string &title, const Path &gamePath); + const char *tag() const override { return "SetBackgroundPopup"; } protected: bool FillVertical() const override { return false; } diff --git a/UI/GameScreen.h b/UI/GameScreen.h index 9379c8f134..cff0c48366 100644 --- a/UI/GameScreen.h +++ b/UI/GameScreen.h @@ -38,7 +38,7 @@ public: void render() override; - std::string tag() const override { return "game"; } + const char *tag() const override { return "Game"; } protected: void CreateViews() override; diff --git a/UI/GameSettingsScreen.cpp b/UI/GameSettingsScreen.cpp index 6c702bd7f0..0c1c0d009e 100644 --- a/UI/GameSettingsScreen.cpp +++ b/UI/GameSettingsScreen.cpp @@ -469,7 +469,7 @@ void GameSettingsScreen::CreateViews() { if (GetGPUBackend() == GPUBackend::VULKAN || GetGPUBackend() == GPUBackend::OPENGL) { static const char *bufferOptions[] = { "No buffer", "Up to 1", "Up to 2" }; - PopupMultiChoice *inflightChoice = graphicsSettings->Add(new PopupMultiChoice(&g_Config.iInflightFrames, gr->T("Buffer graphics commands (faster, input lag)"), bufferOptions, 0, ARRAY_SIZE(bufferOptions), gr->GetName(), screenManager())); + PopupMultiChoice *inflightChoice = graphicsSettings->Add(new PopupMultiChoice(&g_Config.iInflightFrames, gr->T("Buffer graphics commands (faster, input lag)"), bufferOptions, 1, ARRAY_SIZE(bufferOptions), gr->GetName(), screenManager())); inflightChoice->OnChoice.Handle(this, &GameSettingsScreen::OnInflightFramesChoice); } diff --git a/UI/GameSettingsScreen.h b/UI/GameSettingsScreen.h index 087b05740e..82b4cd859e 100644 --- a/UI/GameSettingsScreen.h +++ b/UI/GameSettingsScreen.h @@ -32,7 +32,7 @@ public: void update() override; void onFinish(DialogResult result) override; - std::string tag() const override { return "settings"; } + const char *tag() const override { return "GameSettings"; } protected: void sendMessage(const char *message, const char *value) override; @@ -157,10 +157,11 @@ private: class DeveloperToolsScreen : public UIDialogScreenWithBackground { public: - DeveloperToolsScreen() {} void update() override; void onFinish(DialogResult result) override; + const char *tag() const override { return "DeveloperTools"; } + protected: void CreateViews() override; @@ -205,6 +206,8 @@ public: void CreatePopupContents(UI::ViewGroup *parent) override; + const char *tag() const override { return "HostnameSelect"; } + protected: void OnCompleted(DialogResult result) override; bool CanComplete(DialogResult result) override; @@ -247,4 +250,6 @@ private: class GestureMappingScreen : public UIDialogScreenWithBackground { public: void CreateViews() override; + + const char *tag() const override { return "GestureMapping"; } }; diff --git a/UI/InstallZipScreen.h b/UI/InstallZipScreen.h index 6daa13bf7f..140b82fec2 100644 --- a/UI/InstallZipScreen.h +++ b/UI/InstallZipScreen.h @@ -30,6 +30,8 @@ public: virtual void update() override; virtual bool key(const KeyInput &key) override; + const char *tag() const override { return "InstallZip"; } + protected: virtual void CreateViews() override; diff --git a/UI/MainScreen.cpp b/UI/MainScreen.cpp index 3e025d06ba..d5ed3ab0a4 100644 --- a/UI/MainScreen.cpp +++ b/UI/MainScreen.cpp @@ -1432,11 +1432,12 @@ UI::EventReturn MainScreen::OnExit(UI::EventParams &e) { } void MainScreen::dialogFinished(const Screen *dialog, DialogResult result) { - if (dialog->tag() == "store") { + std::string tag = dialog->tag(); + if (tag == "Store") { backFromStore_ = true; RecreateViews(); } - if (dialog->tag() == "game") { + if (tag == "Game") { if (!restoreFocusGamePath_.empty() && UI::IsFocusMovementEnabled()) { // Prevent the background from fading, since we just were displaying it. highlightedGamePath_ = restoreFocusGamePath_; diff --git a/UI/MainScreen.h b/UI/MainScreen.h index 6b15f06427..b81e193b93 100644 --- a/UI/MainScreen.h +++ b/UI/MainScreen.h @@ -102,6 +102,8 @@ public: bool isTopLevel() const override { return true; } + const char *tag() const override { return "Main"; } + // Horrible hack to show the demos & homebrew tab after having installed a game from a zip file. static bool showHomebrewTab; @@ -153,7 +155,7 @@ protected: class UmdReplaceScreen : public UIDialogScreenWithBackground { public: - UmdReplaceScreen() {} + const char *tag() const override { return "UmdReplace"; } protected: void CreateViews() override; @@ -174,6 +176,8 @@ public: void CreatePopupContents(UI::ViewGroup *parent) override; UI::Event OnRecentChanged; + const char *tag() const override { return "GridSettings"; } + private: UI::EventReturn GridPlusClick(UI::EventParams &e); UI::EventReturn GridMinusClick(UI::EventParams &e); diff --git a/UI/MemStickScreen.h b/UI/MemStickScreen.h index f315467c56..378f4f3ed0 100644 --- a/UI/MemStickScreen.h +++ b/UI/MemStickScreen.h @@ -36,7 +36,7 @@ public: MemStickScreen(bool initialSetup); ~MemStickScreen() {} - std::string tag() const override { return "game"; } + const char *tag() const override { return "MemStick"; } enum Choice { CHOICE_BROWSE_FOLDER, @@ -112,6 +112,9 @@ class ConfirmMemstickMoveScreen : public UIDialogScreenWithBackground { public: ConfirmMemstickMoveScreen(Path newMemstickFolder, bool initialSetup); ~ConfirmMemstickMoveScreen(); + + const char *tag() const override { return "ConfirmMemstickMove"; } + protected: void update() override; void CreateViews() override; diff --git a/UI/MiscScreens.cpp b/UI/MiscScreens.cpp index 2a0ea9bfe7..d90d92f76d 100644 --- a/UI/MiscScreens.cpp +++ b/UI/MiscScreens.cpp @@ -387,13 +387,13 @@ void HandleCommonMessages(const char *message, const char *value, ScreenManager MIPSComp::jit->ClearCache(); } currentMIPS->UpdateCore((CPUCore)g_Config.iCpuCore); - } else if (!strcmp(message, "control mapping") && isActiveScreen && activeScreen->tag() != "control mapping") { + } else if (!strcmp(message, "control mapping") && isActiveScreen && std::string(activeScreen->tag()) != "ControlMapping") { UpdateUIState(UISTATE_MENU); manager->push(new ControlMappingScreen()); - } else if (!strcmp(message, "display layout editor") && isActiveScreen && activeScreen->tag() != "display layout screen") { + } else if (!strcmp(message, "display layout editor") && isActiveScreen && std::string(activeScreen->tag()) != "DisplayLayout") { UpdateUIState(UISTATE_MENU); manager->push(new DisplayLayoutScreen()); - } else if (!strcmp(message, "settings") && isActiveScreen && activeScreen->tag() != "settings") { + } else if (!strcmp(message, "settings") && isActiveScreen && std::string(activeScreen->tag()) != "GameSettings") { UpdateUIState(UISTATE_MENU); manager->push(new GameSettingsScreen(Path())); } else if (!strcmp(message, "language screen") && isActiveScreen) { diff --git a/UI/MiscScreens.h b/UI/MiscScreens.h index d094b6d3db..164a06cfec 100644 --- a/UI/MiscScreens.h +++ b/UI/MiscScreens.h @@ -82,6 +82,8 @@ public: void TriggerFinish(DialogResult result) override; + const char *tag() const override { return "Prompt"; } + private: UI::EventReturn OnYes(UI::EventParams &e); UI::EventReturn OnNo(UI::EventParams &e); @@ -96,6 +98,8 @@ class NewLanguageScreen : public ListPopupScreen { public: NewLanguageScreen(const std::string &title); + const char *tag() const override { return "NewLanguage"; } + private: void OnCompleted(DialogResult result) override; bool ShowButtons() const override { return true; } @@ -110,6 +114,8 @@ public: void CreateViews() override; + const char *tag() const override { return "PostProc"; } + private: void OnCompleted(DialogResult result) override; bool ShowButtons() const override { return true; } @@ -123,6 +129,8 @@ public: void CreateViews() override; + const char *tag() const override { return "TextureShader"; } + private: void OnCompleted(DialogResult result) override; bool ShowButtons() const override { return true; } @@ -146,6 +154,8 @@ public: void sendMessage(const char *message, const char *value) override; void CreateViews() override {} + const char *tag() const override { return "Logo"; } + private: void Next(); int frames_ = 0; @@ -162,6 +172,8 @@ public: void CreateViews() override; + const char *tag() const override { return "Credits"; } + private: UI::EventReturn OnOK(UI::EventParams &e); diff --git a/UI/PauseScreen.cpp b/UI/PauseScreen.cpp index 339409ed6e..748fcdf86d 100644 --- a/UI/PauseScreen.cpp +++ b/UI/PauseScreen.cpp @@ -172,9 +172,7 @@ public: return slot_; } - std::string tag() const override { - return "screenshot"; - } + const char *tag() const override { return "ScreenshotView"; } protected: bool FillVertical() const override { return false; } @@ -460,7 +458,7 @@ UI::EventReturn GamePauseScreen::OnState(UI::EventParams &e) { void GamePauseScreen::dialogFinished(const Screen *dialog, DialogResult dr) { std::string tag = dialog->tag(); - if (tag == "screenshot" && dr == DR_OK) { + if (tag == "ScreenshotView" && dr == DR_OK) { finishNextFrame_ = true; } else { // There may have been changes to our savestates, so let's recreate. diff --git a/UI/PauseScreen.h b/UI/PauseScreen.h index 15b22174e2..3c66609285 100644 --- a/UI/PauseScreen.h +++ b/UI/PauseScreen.h @@ -33,6 +33,8 @@ public: virtual void dialogFinished(const Screen *dialog, DialogResult dr) override; + const char *tag() const override { return "GamePause"; } + protected: virtual void CreateViews() override; virtual void update() override; diff --git a/UI/RemoteISOScreen.h b/UI/RemoteISOScreen.h index bfcac815be..b8676e4832 100644 --- a/UI/RemoteISOScreen.h +++ b/UI/RemoteISOScreen.h @@ -29,6 +29,8 @@ class RemoteISOScreen : public UIScreenWithBackground { public: RemoteISOScreen(); + const char *tag() const override { return "RemoteISO"; } + protected: void update() override; void CreateViews() override; @@ -57,6 +59,8 @@ public: RemoteISOConnectScreen(); ~RemoteISOConnectScreen() override; + const char *tag() const override { return "RemoteISOConnect"; } + protected: void update() override; void CreateViews() override; @@ -83,6 +87,8 @@ class RemoteISOBrowseScreen : public MainScreen { public: RemoteISOBrowseScreen(const std::string &url, const std::vector &games); + const char *tag() const override { return "RemoteISOBrowse"; } + protected: void CreateViews() override; @@ -94,6 +100,8 @@ class RemoteISOSettingsScreen : public UIDialogScreenWithBackground { public: RemoteISOSettingsScreen(); + const char *tag() const override { return "RemoteISOSettings"; } + UI::EventReturn OnClickRemoteISOSubdir(UI::EventParams &e); UI::EventReturn OnClickRemoteServer(UI::EventParams &e); protected: diff --git a/UI/ReportScreen.h b/UI/ReportScreen.h index a7d132b2c8..006d90e6f5 100644 --- a/UI/ReportScreen.h +++ b/UI/ReportScreen.h @@ -37,6 +37,8 @@ class ReportScreen : public UIDialogScreenWithGameBackground { public: ReportScreen(const Path &gamePath); + const char *tag() const override { return "Report"; } + protected: void postRender() override; void update() override; @@ -75,6 +77,8 @@ class ReportFinishScreen : public UIDialogScreenWithGameBackground { public: ReportFinishScreen(const Path &gamePath, ReportingOverallScore score); + const char *tag() const override { return "ReportFinish"; } + protected: void update() override; void CreateViews() override; diff --git a/UI/SavedataScreen.cpp b/UI/SavedataScreen.cpp index 731e3d5d95..0172b6a19b 100644 --- a/UI/SavedataScreen.cpp +++ b/UI/SavedataScreen.cpp @@ -78,8 +78,9 @@ static std::string TrimString(const std::string &str) { class SavedataPopupScreen : public PopupScreen { public: - SavedataPopupScreen(std::string savePath, std::string title) : PopupScreen(TrimString(title)), savePath_(savePath) { - } + SavedataPopupScreen(std::string savePath, std::string title) : PopupScreen(TrimString(title)), savePath_(savePath) { } + + const char *tag() const override { return "SavedataPopup"; } void CreatePopupContents(UI::ViewGroup *parent) override { using namespace UI; diff --git a/UI/SavedataScreen.h b/UI/SavedataScreen.h index 1f9509e687..1bb2f42140 100644 --- a/UI/SavedataScreen.h +++ b/UI/SavedataScreen.h @@ -74,6 +74,8 @@ public: void dialogFinished(const Screen *dialog, DialogResult result) override; void sendMessage(const char *message, const char *value) override; + const char *tag() const override { return "Savedata"; } + protected: UI::EventReturn OnSavedataButtonClick(UI::EventParams &e); UI::EventReturn OnSortClick(UI::EventParams &e); diff --git a/UI/Store.h b/UI/Store.h index 263e5dbde1..8f02f1a615 100644 --- a/UI/Store.h +++ b/UI/Store.h @@ -67,7 +67,7 @@ public: ~StoreScreen(); void update() override; - std::string tag() const override { return "store"; } + const char *tag() const override { return "Store"; } protected: void CreateViews() override; diff --git a/UI/TiltAnalogSettingsScreen.h b/UI/TiltAnalogSettingsScreen.h index 3aeece6eb9..832a436842 100644 --- a/UI/TiltAnalogSettingsScreen.h +++ b/UI/TiltAnalogSettingsScreen.h @@ -27,6 +27,8 @@ public: void CreateViews() override; bool axis(const AxisInput &axis) override; + const char *tag() const override { return "TiltAnalogSettings"; } + private: UI::EventReturn OnCalibrate(UI::EventParams &e); float currentTiltX_ = 0.0f; diff --git a/UI/TouchControlLayoutScreen.h b/UI/TouchControlLayoutScreen.h index 3db9394258..9834f217f0 100644 --- a/UI/TouchControlLayoutScreen.h +++ b/UI/TouchControlLayoutScreen.h @@ -33,6 +33,8 @@ public: virtual void update() override; virtual void resized() override; + const char *tag() const override { return "TouchControlLayout"; } + protected: virtual UI::EventReturn OnReset(UI::EventParams &e); virtual UI::EventReturn OnVisibility(UI::EventParams &e); diff --git a/UI/TouchControlVisibilityScreen.h b/UI/TouchControlVisibilityScreen.h index f44e333395..52d6d00eea 100644 --- a/UI/TouchControlVisibilityScreen.h +++ b/UI/TouchControlVisibilityScreen.h @@ -36,6 +36,8 @@ public: void CreateViews() override; void onFinish(DialogResult result) override; + const char *tag() const override { return "TouchControlVisibility"; } + protected: UI::EventReturn OnToggleAll(UI::EventParams &e); @@ -47,4 +49,6 @@ private: class RightAnalogMappingScreen : public UIDialogScreenWithBackground { public: void CreateViews() override; + + const char *tag() const override { return "RightAnalogMapping"; } }; diff --git a/Windows/GPU/WindowsVulkanContext.cpp b/Windows/GPU/WindowsVulkanContext.cpp index 5618e2f1f5..818f398420 100644 --- a/Windows/GPU/WindowsVulkanContext.cpp +++ b/Windows/GPU/WindowsVulkanContext.cpp @@ -131,9 +131,7 @@ bool WindowsVulkanContext::Init(HINSTANCE hInst, HWND hWnd, std::string *error_m return false; } - bool splitSubmit = g_Config.bGfxDebugSplitSubmit; - - draw_ = Draw::T3DCreateVulkanContext(vulkan_, splitSubmit); + draw_ = Draw::T3DCreateVulkanContext(vulkan_); SetGPUBackend(GPUBackend::VULKAN, vulkan_->GetPhysicalDeviceProperties(deviceNum).properties.deviceName); bool success = draw_->CreatePresets(); _assert_msg_(success, "Failed to compile preset shaders"); diff --git a/Windows/W32Util/Misc.cpp b/Windows/W32Util/Misc.cpp index 657a0193d2..1849070afc 100644 --- a/Windows/W32Util/Misc.cpp +++ b/Windows/W32Util/Misc.cpp @@ -288,9 +288,49 @@ int GenericListControl::HandleNotify(LPARAM lParam) { return 0; } + if (mhdr->code == LVN_INCREMENTALSEARCH) { + NMLVFINDITEM *request = (NMLVFINDITEM *)lParam; + uint32_t supported = LVFI_WRAP | LVFI_STRING | LVFI_PARTIAL | LVFI_SUBSTRING; + if ((request->lvfi.flags & ~supported) == 0 && (request->lvfi.flags & LVFI_STRING) != 0) { + bool wrap = (request->lvfi.flags & LVFI_WRAP) != 0; + bool partial = (request->lvfi.flags & (LVFI_PARTIAL | LVFI_SUBSTRING)) != 0; + + // It seems like 0 is always sent for start, let's override. + int startRow = request->iStart; + if (startRow == 0) + startRow = GetSelectedIndex(); + int result = OnIncrementalSearch(startRow, request->lvfi.psz, wrap, partial); + if (result != -1) { + request->lvfi.flags = LVFI_PARAM; + request->lvfi.lParam = (LPARAM)result; + } + } + } + return 0; } +int GenericListControl::OnIncrementalSearch(int startRow, const wchar_t *str, bool wrap, bool partial) { + int size = GetRowCount(); + size_t searchlen = wcslen(str); + if (!wrap) + size -= startRow; + + // We start with the earliest column, preferring matches on the leftmost columns by default. + for (int c = 0; c < columnCount; ++c) { + for (int i = 0; i < size; ++i) { + int r = (startRow + i) % size; + stringBuffer[0] = 0; + GetColumnText(stringBuffer, r, c); + int difference = partial ? _wcsnicmp(str, stringBuffer, searchlen) : _wcsicmp(str, stringBuffer); + if (difference == 0) + return r; + } + } + + return -1; +} + void GenericListControl::Update() { if (!updateScheduled_) { SetTimer(handle, IDT_UPDATE, UPDATE_DELAY, nullptr); diff --git a/Windows/W32Util/Misc.h b/Windows/W32Util/Misc.h index 8a43c55b59..1b723cc241 100644 --- a/Windows/W32Util/Misc.h +++ b/Windows/W32Util/Misc.h @@ -68,6 +68,8 @@ protected: virtual bool OnRowPrePaint(int row, LPNMLVCUSTOMDRAW msg) { return false; } virtual bool OnColPrePaint(int row, int col, LPNMLVCUSTOMDRAW msg) { return false; } + virtual int OnIncrementalSearch(int startRow, const wchar_t *str, bool wrap, bool partial); + private: static LRESULT CALLBACK wndProc(HWND hwnd, UINT msg, WPARAM wParam, LPARAM lParam); void ProcessUpdate(); diff --git a/Windows/main.cpp b/Windows/main.cpp index 4c0752bda8..e3888dfe4c 100644 --- a/Windows/main.cpp +++ b/Windows/main.cpp @@ -34,6 +34,7 @@ #include "Common/System/Display.h" #include "Common/System/NativeApp.h" #include "Common/System/System.h" +#include "Common/File/FileUtil.h" #include "Common/File/VFS/VFS.h" #include "Common/File/VFS/AssetReader.h" #include "Common/Data/Text/I18n.h" @@ -109,9 +110,12 @@ static std::thread inputBoxThread; static bool inputBoxRunning = false; void OpenDirectory(const char *path) { + // SHParseDisplayName can't handle relative paths, so normalize first. + std::string resolved = ReplaceAll(File::ResolvePath(path), "/", "\\"); + SFGAOF flags; PIDLIST_ABSOLUTE pidl = nullptr; - HRESULT hr = SHParseDisplayName(ConvertUTF8ToWString(ReplaceAll(path, "/", "\\")).c_str(), nullptr, &pidl, 0, &flags); + HRESULT hr = SHParseDisplayName(ConvertUTF8ToWString(resolved).c_str(), nullptr, &pidl, 0, &flags); if (pidl) { if (SUCCEEDED(hr)) diff --git a/android/jni/Android.mk b/android/jni/Android.mk index b47db9d5cc..f317315e0a 100644 --- a/android/jni/Android.mk +++ b/android/jni/Android.mk @@ -51,6 +51,7 @@ VULKAN_FILES := \ $(SRC)/Common/GPU/Vulkan/thin3d_vulkan.cpp \ $(SRC)/Common/GPU/Vulkan/VulkanQueueRunner.cpp \ $(SRC)/Common/GPU/Vulkan/VulkanRenderManager.cpp \ + $(SRC)/Common/GPU/Vulkan/VulkanFrameData.cpp \ $(SRC)/Common/GPU/Vulkan/VulkanLoader.cpp \ $(SRC)/Common/GPU/Vulkan/VulkanContext.cpp \ $(SRC)/Common/GPU/Vulkan/VulkanDebug.cpp \ diff --git a/android/jni/AndroidVulkanContext.cpp b/android/jni/AndroidVulkanContext.cpp index dc68a8d75a..af8a87e751 100644 --- a/android/jni/AndroidVulkanContext.cpp +++ b/android/jni/AndroidVulkanContext.cpp @@ -101,7 +101,7 @@ bool AndroidVulkanContext::InitFromRenderThread(ANativeWindow *wnd, int desiredB bool success = true; if (g_Vulkan->InitSwapchain()) { - draw_ = Draw::T3DCreateVulkanContext(g_Vulkan, g_Config.bGfxDebugSplitSubmit); + draw_ = Draw::T3DCreateVulkanContext(g_Vulkan); SetGPUBackend(GPUBackend::VULKAN); success = draw_->CreatePresets(); // Doesn't fail, we ship the compiler. _assert_msg_(success, "Failed to compile preset shaders"); diff --git a/assets/compat.ini b/assets/compat.ini index f79978e9fd..f153b93933 100644 --- a/assets/compat.ini +++ b/assets/compat.ini @@ -584,54 +584,6 @@ NPJH50304 = true ULES00703 = true ULAS42095 = true -[MaliDepthStencilBugWorkaround] -# See issue #13833 where the map is supposed to be round but is not. - -# Midnight Club: LA Remix -ULUS10383 = true -ULES01144 = true -ULJS00180 = true -ULJS00267 = true -ULJM05904 = true -NPJH50440 = true -# Midnight Club 3 : DUB edition -ULUS10021 = true -ULES00108 = true - -# Tales of Phantasia - Narikiri Dungeon X. See #15526 -ULJS00293 = true - -# The Simpsons Game - issue #9830 -ULUS10295 = true -ULES00975 = true -ULES00979 = true -ULES00978 = true -ULES00977 = true -ULES00976 = true - -# Surf's Up - see issue #15016 -ULES00816 = true -ULES00817 = true -ULUS10262 = true - -# Kenka Bancho - Badass Rumble - see issue #15016 -ULUS10442 = true -ULJS00166 = true -UCAS40231 = true -ULJS19030 = true -NPJH50223 = true -ULJS19082 = true - -# Nanoha Magical Girl - see issue #15016 -ULJS00384 = true -ULJS00385 = true -ULJS19078 = true - -# Suikoden Woven Web of The Centuries / Genso Suikoden: Tsumugareshi Hyakunen no Toki -ULJM05886 = true -ULJM08063 = true -NPJH50535 = true - [RequireDefaultCPUClock] # GOW : Ghost of Sparta UCUS98737 = true diff --git a/ffmpeg b/ffmpeg index 3ad7ddb9eb..cea6dd17be 160000 --- a/ffmpeg +++ b/ffmpeg @@ -1 +1 @@ -Subproject commit 3ad7ddb9eb2af898dce8c4b5e9a28b77b3f7ddd7 +Subproject commit cea6dd17be4ea045946dc991ef0bca91f9005345 diff --git a/headless/Compare.cpp b/headless/Compare.cpp index f5127e5ee0..76254d005a 100644 --- a/headless/Compare.cpp +++ b/headless/Compare.cpp @@ -305,11 +305,18 @@ bool CompareOutput(const Path &bootFilename, const std::string &output, bool ver } } -inline int ComparePixel(u32 pix1, u32 pix2) { - // For now, if they're different at all except alpha, it's an error. - if ((pix1 & 0xFFFFFF) != (pix2 & 0xFFFFFF)) - return 1; - return 0; +static inline double CompareChannel(int pix1, int pix2) { + double diff = pix1 - pix2; + return diff * diff; +} + +static inline double ComparePixel(u32 pix1, u32 pix2) { + // Ignore alpha. + double r = CompareChannel(pix1 & 0xFF, pix2 & 0xFF); + double g = CompareChannel((pix1 >> 8) & 0xFF, (pix2 >> 8) & 0xFF); + double b = CompareChannel((pix1 >> 16) & 0xFF, (pix2 >> 16) & 0xFF); + + return r + g + b; } std::vector TranslateDebugBufferToCompare(const GPUDebugBuffer *buffer, u32 stride, u32 h) { @@ -338,7 +345,6 @@ std::vector TranslateDebugBufferToCompare(const GPUDebugBuffer *buffer, u32 dst += (h - safeH) * stride; } - u32 errors = 0; for (u32 y = 0; y < safeH; ++y) { switch (buffer->GetFormat()) { case GPU_DBG_FORMAT_8888: @@ -429,7 +435,7 @@ double ScreenshotComparer::Compare(const Path &screenshotFilename) { return -1.0f; } - u32 errors = 0; + double errors = 0; if (asBitmap_) { // The reference is flipped and BGRA by default for the common BMP compare case. for (u32 y = 0; y < h_; ++y) { @@ -447,7 +453,8 @@ double ScreenshotComparer::Compare(const Path &screenshotFilename) { } } - return (double) errors / (double) (w_ * h_); + // Convert to MSE, accounting for all three channels (RGB.) + return errors / (double)(w_ * h_ * 3); } bool ScreenshotComparer::SaveActualBitmap(const Path &resultFilename) { diff --git a/headless/Headless.cpp b/headless/Headless.cpp index 0be0518e8b..1deebd95cd 100644 --- a/headless/Headless.cpp +++ b/headless/Headless.cpp @@ -126,15 +126,12 @@ int printUsage(const char *progname, const char *reason) fprintf(stderr, " -m, --mount umd.cso mount iso on umd1:\n"); fprintf(stderr, " -r, --root some/path mount path on host0: (elfs must be in here)\n"); fprintf(stderr, " -l, --log full log output, not just emulated printfs\n"); - fprintf(stderr, " --debugger=PORT enable websocket debugger and break at start\n"); + fprintf(stderr, " --debugger=PORT enable websocket debugger and break at start\n"); -#if defined(HEADLESSHOST_CLASS) - { - fprintf(stderr, " --graphics=BACKEND use the full gpu backend (slower)\n"); - fprintf(stderr, " options: gles, software, directx9, etc.\n"); - fprintf(stderr, " --screenshot=FILE compare against a screenshot\n"); - } -#endif + fprintf(stderr, " --graphics=BACKEND use a different gpu backend\n"); + fprintf(stderr, " options: gles, software, directx9, etc.\n"); + fprintf(stderr, " --screenshot=FILE compare against a screenshot\n"); + fprintf(stderr, " --max-mse=NUMBER maximum allowed MSE error for screenshot\n"); fprintf(stderr, " --timeout=SECONDS abort test it if takes longer than SECONDS\n"); fprintf(stderr, " -v, --verbose show the full passed/failed result\n"); @@ -142,6 +139,7 @@ int printUsage(const char *progname, const char *reason) fprintf(stderr, " --ir use ir interpreter\n"); fprintf(stderr, " -j use jit (default)\n"); fprintf(stderr, " -c, --compare compare with output in file.expected\n"); + fprintf(stderr, " --bench run multiple times and output speed\n"); fprintf(stderr, "\nSee headless.txt for details.\n"); return 1; @@ -161,13 +159,20 @@ static HeadlessHost *getHost(GPUCore gpuCore) { } } -bool RunAutoTest(HeadlessHost *headlessHost, CoreParameter &coreParameter, bool autoCompare, bool verbose, double timeout) -{ +struct AutoTestOptions { + double timeout; + double maxScreenshotError; + bool compare : 1; + bool verbose : 1; + bool bench : 1; +}; + +bool RunAutoTest(HeadlessHost *headlessHost, CoreParameter &coreParameter, const AutoTestOptions &opt) { // Kinda ugly, trying to guesstimate the test name from filename... currentTestName = GetTestName(coreParameter.fileToStart); std::string output; - if (autoCompare) + if (opt.compare || opt.bench) coreParameter.collectEmuLog = &output; std::string error_string; @@ -181,23 +186,19 @@ bool RunAutoTest(HeadlessHost *headlessHost, CoreParameter &coreParameter, bool TeamCityPrint("testStarted name='%s' captureStandardOutput='true'", currentTestName.c_str()); - host->BootDone(); - - if (autoCompare) - headlessHost->SetComparisonScreenshot(ExpectedScreenshotFromFilename(coreParameter.fileToStart)); + if (opt.compare) + headlessHost->SetComparisonScreenshot(ExpectedScreenshotFromFilename(coreParameter.fileToStart), opt.maxScreenshotError); while (!PSP_InitUpdate(&error_string)) sleep_ms(1); if (!PSP_IsInited()) { TeamCityPrint("testFailed name='%s' message='Startup failed'", currentTestName.c_str()); TeamCityPrint("testFinished name='%s'", currentTestName.c_str()); - GitHubActionsPrint("error", "Test timeout for %s", currentTestName.c_str()); + GitHubActionsPrint("error", "Test init failed for %s", currentTestName.c_str()); return false; } - bool passed = true; - double deadline; - deadline = time_now_d() + timeout; + host->BootDone(); Core_UpdateDebugStats(g_Config.bShowDebugStats || g_Config.bLogFrameDrops); @@ -205,6 +206,8 @@ bool RunAutoTest(HeadlessHost *headlessHost, CoreParameter &coreParameter, bool if (coreParameter.graphicsContext && coreParameter.graphicsContext->GetDrawContext()) coreParameter.graphicsContext->GetDrawContext()->BeginFrame(); + bool passed = true; + double deadline = time_now_d() + opt.timeout; coreState = coreParameter.startBreak ? CORE_STEPPING : CORE_RUNNING; while (coreState == CORE_RUNNING || coreState == CORE_STEPPING) { @@ -221,12 +224,15 @@ bool RunAutoTest(HeadlessHost *headlessHost, CoreParameter &coreParameter, bool } if (time_now_d() > deadline) { // Don't compare, print the output at least up to this point, and bail. - printf("%s", output.c_str()); - passed = false; + if (!opt.bench) { + printf("%s", output.c_str()); - host->SendDebugOutput("TIMEOUT\n"); - TeamCityPrint("testFailed name='%s' message='Test timeout'", currentTestName.c_str()); - GitHubActionsPrint("error", "Test timeout for %s", currentTestName.c_str()); + host->SendDebugOutput("TIMEOUT\n"); + TeamCityPrint("testFailed name='%s' message='Test timeout'", currentTestName.c_str()); + GitHubActionsPrint("error", "Test timeout for %s", currentTestName.c_str()); + } + + passed = false; Core_Stop(); } } @@ -237,10 +243,11 @@ bool RunAutoTest(HeadlessHost *headlessHost, CoreParameter &coreParameter, bool PSP_Shutdown(); - headlessHost->FlushDebugOutput(); + if (!opt.bench) + headlessHost->FlushDebugOutput(); - if (autoCompare && passed) - passed = CompareOutput(coreParameter.fileToStart, output, verbose); + if (opt.compare && passed) + passed = CompareOutput(coreParameter.fileToStart, output, opt.verbose); TeamCityPrint("testFinished name='%s'", currentTestName.c_str()); @@ -263,9 +270,9 @@ int main(int argc, const char* argv[]) _CrtSetDbgFlag(_CRTDBG_ALLOC_MEM_DF | _CRTDBG_LEAK_CHECK_DF); #endif + AutoTestOptions testOptions{}; + testOptions.timeout = std::numeric_limits::infinity(); bool fullLog = false; - bool autoCompare = false; - bool verbose = false; const char *stateToLoad = 0; GPUCore gpuCore = GPUCORE_SOFTWARE; CPUCore cpuCore = CPUCore::JIT; @@ -275,7 +282,6 @@ int main(int argc, const char* argv[]) const char *mountIso = nullptr; const char *mountRoot = nullptr; const char *screenshotFilename = nullptr; - float timeout = std::numeric_limits::infinity(); for (int i = 1; i < argc; i++) { @@ -300,9 +306,11 @@ int main(int argc, const char* argv[]) else if (!strcmp(argv[i], "--ir")) cpuCore = CPUCore::IR_JIT; else if (!strcmp(argv[i], "-c") || !strcmp(argv[i], "--compare")) - autoCompare = true; + testOptions.compare = true; + else if (!strcmp(argv[i], "--bench")) + testOptions.bench = true; else if (!strcmp(argv[i], "-v") || !strcmp(argv[i], "--verbose")) - verbose = true; + testOptions.verbose = true; else if (!strncmp(argv[i], "--graphics=", strlen("--graphics=")) && strlen(argv[i]) > strlen("--graphics=")) { const char *gpuName = argv[i] + strlen("--graphics="); @@ -330,7 +338,9 @@ int main(int argc, const char* argv[]) } else if (!strncmp(argv[i], "--screenshot=", strlen("--screenshot=")) && strlen(argv[i]) > strlen("--screenshot=")) screenshotFilename = argv[i] + strlen("--screenshot="); else if (!strncmp(argv[i], "--timeout=", strlen("--timeout=")) && strlen(argv[i]) > strlen("--timeout=")) - timeout = (float)strtod(argv[i] + strlen("--timeout="), NULL); + testOptions.timeout = strtod(argv[i] + strlen("--timeout="), nullptr); + else if (!strncmp(argv[i], "--max-mse=", strlen("--max-mse=")) && strlen(argv[i]) > strlen("--max-mse=")) + testOptions.maxScreenshotError = strtod(argv[i] + strlen("--max-mse="), nullptr); else if (!strncmp(argv[i], "--debugger=", strlen("--debugger=")) && strlen(argv[i]) > strlen("--debugger=")) debuggerPort = (int)strtoul(argv[i] + strlen("--debugger="), NULL, 10); else if (!strcmp(argv[i], "--teamcity")) @@ -388,7 +398,7 @@ int main(int argc, const char* argv[]) coreParameter.mountIso = mountIso ? Path(std::string(mountIso)) : Path(); coreParameter.mountRoot = mountRoot ? Path(std::string(mountRoot)) : Path(); coreParameter.startBreak = false; - coreParameter.printfEmuLog = !autoCompare; + coreParameter.printfEmuLog = !testOptions.compare; coreParameter.headLess = true; coreParameter.renderScaleFactor = 1; coreParameter.renderWidth = 480; @@ -456,8 +466,9 @@ int main(int argc, const char* argv[]) if (!File::Exists(g_Config.flash0Directory)) g_Config.flash0Directory = File::GetExeDirectory() / "assets/flash0"; - if (screenshotFilename != 0) - headlessHost->SetComparisonScreenshot(Path(std::string(screenshotFilename))); + if (screenshotFilename) + headlessHost->SetComparisonScreenshot(Path(std::string(screenshotFilename)), testOptions.maxScreenshotError); + headlessHost->SetWriteFailureScreenshot(!teamCityMode && !getenv("GITHUB_ACTIONS") && !testOptions.bench); #if PPSSPP_PLATFORM(ANDROID) // For some reason the debugger installs it with this name? @@ -487,14 +498,28 @@ int main(int argc, const char* argv[]) for (size_t i = 0; i < testFilenames.size(); ++i) { coreParameter.fileToStart = Path(testFilenames[i]); - if (autoCompare) + if (testOptions.compare) printf("%s:\n", coreParameter.fileToStart.c_str()); - bool passed = RunAutoTest(headlessHost, coreParameter, autoCompare, verbose, timeout); - if (autoCompare) - { + bool passed = RunAutoTest(headlessHost, coreParameter, testOptions); + if (testOptions.bench) { + double st = time_now_d(); + double deadline = st + testOptions.timeout; + double runs = 0.0; + for (int i = 0; i < 100; ++i) { + RunAutoTest(headlessHost, coreParameter, testOptions); + runs++; + + if (time_now_d() > deadline) + break; + } + double et = time_now_d(); + std::string testName = GetTestName(coreParameter.fileToStart); - if (passed) - { + printf(" %s - %f seconds average\n", testName.c_str(), (et - st) / runs); + } + if (testOptions.compare) { + std::string testName = GetTestName(coreParameter.fileToStart); + if (passed) { passedTests.push_back(testName); printf(" %s - passed!\n", testName.c_str()); } @@ -503,8 +528,7 @@ int main(int argc, const char* argv[]) } } - if (autoCompare) - { + if (testOptions.compare) { printf("%d tests passed, %d tests failed.\n", (int)passedTests.size(), (int)failedTests.size()); if (!failedTests.empty()) { diff --git a/headless/StubHost.cpp b/headless/StubHost.cpp index e7fbd03f9a..b3896df478 100644 --- a/headless/StubHost.cpp +++ b/headless/StubHost.cpp @@ -54,10 +54,10 @@ void HeadlessHost::SendDebugScreenshot(const u8 *pixbuf, u32 w, u32 h) { if (errors < 0) SendOrCollectDebugOutput(comparer.GetError() + "\n"); - if (errors > 0) - SendOrCollectDebugOutput(StringFromFormat("Screenshot error: %f%%\n", errors * 100.0f)); + if (errors > maxScreenshotError_) + SendOrCollectDebugOutput(StringFromFormat("Screenshot MSE: %f\n", errors)); - if (errors > 0 && !teamCityMode && !getenv("GITHUB_ACTIONS")) { + if (errors > maxScreenshotError_ && writeFailureScreenshot_) { if (comparer.SaveActualBitmap(Path("__testfailure.bmp"))) SendOrCollectDebugOutput("Actual output written to: __testfailure.bmp\n"); comparer.SaveVisualComparisonPNG(Path("__testcompare.png")); diff --git a/headless/StubHost.h b/headless/StubHost.h index c914f025b2..a77a42424d 100644 --- a/headless/StubHost.h +++ b/headless/StubHost.h @@ -68,8 +68,12 @@ public: } } - virtual void SetComparisonScreenshot(const Path &filename) { + void SetComparisonScreenshot(const Path &filename, double maxError) { comparisonScreenshot_ = filename; + maxScreenshotError_ = maxError; + } + void SetWriteFailureScreenshot(bool flag) { + writeFailureScreenshot_ = flag; } void SendDebugScreenshot(const u8 *pixbuf, u32 w, u32 h) override; @@ -83,7 +87,9 @@ protected: void SendOrCollectDebugOutput(const std::string &output); Path comparisonScreenshot_; + double maxScreenshotError_ = 0.0; std::string debugOutputBuffer_; GPUCore gpuCore_; GraphicsContext *gfx_ = nullptr; + bool writeFailureScreenshot_ = true; }; diff --git a/libretro/LibretroVulkanContext.cpp b/libretro/LibretroVulkanContext.cpp index 571b7bba9d..ff9180cecf 100644 --- a/libretro/LibretroVulkanContext.cpp +++ b/libretro/LibretroVulkanContext.cpp @@ -137,7 +137,7 @@ void LibretroVulkanContext::CreateDrawContext() { return; } - draw_ = Draw::T3DCreateVulkanContext(vk, false); + draw_ = Draw::T3DCreateVulkanContext(vk); ((VulkanRenderManager*)draw_->GetNativeObject(Draw::NativeObject::RENDER_MANAGER))->SetInflightFrames(g_Config.iInflightFrames); SetGPUBackend(GPUBackend::VULKAN); } diff --git a/libretro/Makefile.common b/libretro/Makefile.common index ab2c4bfe1c..f08bd45633 100644 --- a/libretro/Makefile.common +++ b/libretro/Makefile.common @@ -253,6 +253,7 @@ SOURCES_CXX += \ $(COMMONDIR)/GPU/Vulkan/thin3d_vulkan.cpp \ $(COMMONDIR)/GPU/Vulkan/VulkanQueueRunner.cpp \ $(COMMONDIR)/GPU/Vulkan/VulkanRenderManager.cpp \ + $(COMMONDIR)/GPU/Vulkan/VulkanFrameData.cpp \ $(COMMONDIR)/GPU/Vulkan/VulkanLoader.cpp \ $(COMMONDIR)/GPU/Vulkan/VulkanContext.cpp \ $(COMMONDIR)/GPU/Vulkan/VulkanDebug.cpp \ diff --git a/pspautotests b/pspautotests index 682a4303ab..d7a55d5487 160000 --- a/pspautotests +++ b/pspautotests @@ -1 +1 @@ -Subproject commit 682a4303aba63a50c91ae0fa6928c9dac8ca9b92 +Subproject commit d7a55d5487b1fa1a688178206abaad4aea5be18a diff --git a/test.py b/test.py index 72c6302eb2..47f5c36034 100755 --- a/test.py +++ b/test.py @@ -147,6 +147,7 @@ tests_good = [ "gpu/commands/blend", "gpu/commands/blend565", "gpu/commands/blocktransfer", + "gpu/commands/fog", "gpu/commands/material", "gpu/displaylist/alignment", "gpu/dither/dither", @@ -157,7 +158,11 @@ tests_good = [ "gpu/ge/enqueueparam", "gpu/ge/queue", "gpu/primitives/indices", + "gpu/primitives/invalidprim", + "gpu/primitives/trianglefan", + "gpu/primitives/trianglestrip", "gpu/primitives/triangles", + "gpu/rendertarget/copy", "gpu/rendertarget/depal", "gpu/signals/pause", "gpu/signals/pause2", @@ -204,6 +209,7 @@ tests_good = [ "sysmem/freesize", "sysmem/memblock", "sysmem/sysmem", + "sysmem/volatile", "threads/alarm/alarm", "threads/alarm/cancel/cancel", "threads/alarm/refer/refer", @@ -269,6 +275,7 @@ tests_good = [ "threads/mutex/refer", "threads/mutex/try", "threads/mutex/unlock", + "threads/mutex/unlock2", "threads/semaphores/semaphores", "threads/semaphores/cancel", "threads/semaphores/create", @@ -394,21 +401,19 @@ tests_next = [ "gpu/ge/get", "gpu/primitives/bezier", "gpu/primitives/continue", - "gpu/primitives/invalidprim", + "gpu/primitives/immediate", "gpu/primitives/lines", "gpu/primitives/linestrip", "gpu/primitives/points", "gpu/primitives/rectangles", "gpu/primitives/spline", - "gpu/primitives/trianglefan", - "gpu/primitives/trianglestrip", "gpu/reflection/reflection", - "gpu/rendertarget/copy", "gpu/rendertarget/rendertarget", "gpu/signals/continue", "gpu/signals/jumps", "gpu/signals/simple", "gpu/simple/simple", + "gpu/textures/size", "gpu/triangle/triangle", "gpu/vertices/colors", "gpu/vertices/texcoords",