diff --git a/CMakeLists.txt b/CMakeLists.txt
index bc81d9be2b..cc31004ebc 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -623,6 +623,8 @@ add_library(Common STATIC
Common/GPU/Vulkan/VulkanRenderManager.h
Common/GPU/Vulkan/VulkanQueueRunner.cpp
Common/GPU/Vulkan/VulkanQueueRunner.h
+ Common/GPU/Vulkan/VulkanFrameData.cpp
+ Common/GPU/Vulkan/VulkanFrameData.h
Common/Input/GestureDetector.cpp
Common/Input/GestureDetector.h
Common/Input/KeyCodes.h
diff --git a/Common/Common.vcxproj b/Common/Common.vcxproj
index 2895cdc837..d232c01534 100644
--- a/Common/Common.vcxproj
+++ b/Common/Common.vcxproj
@@ -441,6 +441,7 @@
+
@@ -861,6 +862,7 @@
+
diff --git a/Common/Common.vcxproj.filters b/Common/Common.vcxproj.filters
index ff9fd9eaa6..991b03a71f 100644
--- a/Common/Common.vcxproj.filters
+++ b/Common/Common.vcxproj.filters
@@ -419,6 +419,9 @@
GPU\Vulkan
+
+ GPU\Vulkan
+
@@ -791,6 +794,9 @@
GPU\Vulkan
+
+ GPU\Vulkan
+
diff --git a/Common/GPU/D3D11/thin3d_d3d11.cpp b/Common/GPU/D3D11/thin3d_d3d11.cpp
index c17bb39919..a808386ce9 100644
--- a/Common/GPU/D3D11/thin3d_d3d11.cpp
+++ b/Common/GPU/D3D11/thin3d_d3d11.cpp
@@ -270,6 +270,7 @@ D3D11DrawContext::D3D11DrawContext(ID3D11Device *device, ID3D11DeviceContext *de
caps_.anisoSupported = true;
caps_.textureNPOTFullySupported = true;
caps_.fragmentShaderDepthWriteSupported = true;
+ caps_.blendMinMaxSupported = true;
D3D11_FEATURE_DATA_D3D11_OPTIONS options{};
HRESULT result = device_->CheckFeatureSupport(D3D11_FEATURE_D3D11_OPTIONS, &options, sizeof(options));
diff --git a/Common/GPU/D3D9/thin3d_d3d9.cpp b/Common/GPU/D3D9/thin3d_d3d9.cpp
index a84d42f64e..bf33fd34ca 100644
--- a/Common/GPU/D3D9/thin3d_d3d9.cpp
+++ b/Common/GPU/D3D9/thin3d_d3d9.cpp
@@ -646,6 +646,63 @@ void D3D9Context::InvalidateCachedState() {
curPipeline_ = nullptr;
}
+// TODO: Move this detection elsewhere when it's needed elsewhere, not before. It's ugly.
+// Source: https://envytools.readthedocs.io/en/latest/hw/pciid.html#gf100
+enum NVIDIAGeneration {
+ NV_PRE_KEPLER,
+ NV_KEPLER,
+ NV_MAXWELL,
+ NV_PASCAL,
+ NV_VOLTA,
+ NV_TURING, // or later
+};
+
+static NVIDIAGeneration NVIDIAGetDeviceGeneration(int deviceID) {
+ if (deviceID >= 0x1180 && deviceID <= 0x11bf)
+ return NV_KEPLER; // GK104
+ if (deviceID >= 0x11c0 && deviceID <= 0x11fa)
+ return NV_KEPLER; // GK106
+ if (deviceID >= 0x0fc0 && deviceID <= 0x0fff)
+ return NV_KEPLER; // GK107
+ if (deviceID >= 0x1003 && deviceID <= 0x1028)
+ return NV_KEPLER; // GK110(B)
+ if (deviceID >= 0x1280 && deviceID <= 0x12ba)
+ return NV_KEPLER; // GK208
+ if (deviceID >= 0x1381 && deviceID <= 0x13b0)
+ return NV_MAXWELL; // GM107
+ if (deviceID >= 0x1340 && deviceID <= 0x134d)
+ return NV_MAXWELL; // GM108
+ if (deviceID >= 0x13c0 && deviceID <= 0x13d9)
+ return NV_MAXWELL; // GM204
+ if (deviceID >= 0x1401 && deviceID <= 0x1427)
+ return NV_MAXWELL; // GM206
+ if (deviceID >= 0x15f7 && deviceID <= 0x15f9)
+ return NV_PASCAL; // GP100
+ if (deviceID >= 0x15f7 && deviceID <= 0x15f9)
+ return NV_PASCAL; // GP100
+ if (deviceID >= 0x1b00 && deviceID <= 0x1b38)
+ return NV_PASCAL; // GP102
+ if (deviceID >= 0x1b80 && deviceID <= 0x1be1)
+ return NV_PASCAL; // GP104
+ if (deviceID >= 0x1c02 && deviceID <= 0x1c62)
+ return NV_PASCAL; // GP106
+ if (deviceID >= 0x1c81 && deviceID <= 0x1c92)
+ return NV_PASCAL; // GP107
+ if (deviceID >= 0x1d01 && deviceID <= 0x1d12)
+ return NV_PASCAL; // GP108
+ if (deviceID >= 0x1d81 && deviceID <= 0x1dba)
+ return NV_VOLTA; // GV100
+ if (deviceID >= 0x1e02 && deviceID <= 0x1e3c)
+ return NV_TURING; // TU102
+ if (deviceID >= 0x1e82 && deviceID <= 0x1ed0)
+ return NV_TURING; // TU104
+ if (deviceID >= 0x1f02 && deviceID <= 0x1f51)
+ return NV_TURING; // TU104
+ if (deviceID >= 0x1e02)
+ return NV_TURING; // More TU models or later, probably.
+ return NV_PRE_KEPLER;
+}
+
#define FB_DIV 1
#define FOURCC_INTZ ((D3DFORMAT)(MAKEFOURCC('I', 'N', 'T', 'Z')))
@@ -665,14 +722,24 @@ D3D9Context::D3D9Context(IDirect3D9 *d3d, IDirect3D9Ex *d3dEx, int adapterId, ID
caps_.vendor = GPUVendor::VENDOR_UNKNOWN;
}
- if (!FAILED(device->GetDeviceCaps(&d3dCaps_))) {
+ D3DCAPS9 caps;
+ ZeroMemory(&caps, sizeof(caps));
+ HRESULT result = 0;
+ if (deviceEx_) {
+ result = deviceEx_->GetDeviceCaps(&caps);
+ } else {
+ result = device_->GetDeviceCaps(&caps);
+ }
+
+ if (SUCCEEDED(result)) {
sprintf(shadeLangVersion_, "PS: %04x VS: %04x", d3dCaps_.PixelShaderVersion & 0xFFFF, d3dCaps_.VertexShaderVersion & 0xFFFF);
} else {
+ WARN_LOG(G3D, "Direct3D9: Failed to get the device caps!");
strcpy(shadeLangVersion_, "N/A");
}
+
caps_.deviceID = identifier_.DeviceId;
caps_.multiViewport = false;
- caps_.anisoSupported = true;
caps_.depthRangeMinusOneToOne = false;
caps_.preferredDepthBufferFormat = DataFormat::D24_S8;
caps_.dualSourceBlend = false;
@@ -684,8 +751,30 @@ D3D9Context::D3D9Context(IDirect3D9 *d3d, IDirect3D9Ex *d3dEx, int adapterId, ID
caps_.framebufferDepthCopySupported = false;
caps_.framebufferSeparateDepthCopySupported = false;
caps_.texture3DSupported = true;
- caps_.textureNPOTFullySupported = true;
caps_.fragmentShaderDepthWriteSupported = true;
+ caps_.blendMinMaxSupported = true;
+
+ if ((caps.RasterCaps & D3DPRASTERCAPS_ANISOTROPY) != 0 && caps.MaxAnisotropy > 1) {
+ caps_.anisoSupported = true;
+ }
+ if ((caps.TextureCaps & (D3DPTEXTURECAPS_NONPOW2CONDITIONAL | D3DPTEXTURECAPS_POW2)) == 0) {
+ caps_.textureNPOTFullySupported = true;
+ }
+
+ // VS range culling (killing triangles in the vertex shader using NaN) causes problems on Intel.
+ // Also causes problems on old NVIDIA.
+ switch (caps_.vendor) {
+ case Draw::GPUVendor::VENDOR_INTEL:
+ bugs_.Infest(Bugs::BROKEN_NAN_IN_CONDITIONAL);
+ break;
+ case Draw::GPUVendor::VENDOR_NVIDIA:
+ // Older NVIDIAs don't seem to like NaNs in their DX9 vertex shaders.
+ // No idea if KEPLER is the right cutoff, but let's go with it.
+ if (NVIDIAGetDeviceGeneration(caps_.deviceID) < NV_KEPLER) {
+ bugs_.Infest(Bugs::BROKEN_NAN_IN_CONDITIONAL);
+ }
+ break;
+ }
if (d3d) {
D3DDISPLAYMODE displayMode;
diff --git a/Common/GPU/OpenGL/GLQueueRunner.cpp b/Common/GPU/OpenGL/GLQueueRunner.cpp
index ee850b2ad3..6157b2e1d1 100644
--- a/Common/GPU/OpenGL/GLQueueRunner.cpp
+++ b/Common/GPU/OpenGL/GLQueueRunner.cpp
@@ -814,7 +814,7 @@ void GLQueueRunner::PerformRenderPass(const GLRStep &step, bool first, bool last
int logicOp = -1;
bool logicEnabled = false;
#endif
- bool clipDistance0Enabled = false;
+ bool clipDistanceEnabled[8]{};
GLuint blendEqColor = (GLuint)-1;
GLuint blendEqAlpha = (GLuint)-1;
@@ -1123,14 +1123,18 @@ void GLQueueRunner::PerformRenderPass(const GLRStep &step, bool first, bool last
{
if (curProgram != c.program.program) {
glUseProgram(c.program.program->program);
- if (c.program.program->use_clip_distance0 != clipDistance0Enabled) {
- if (c.program.program->use_clip_distance0)
- glEnable(GL_CLIP_DISTANCE0);
- else
- glDisable(GL_CLIP_DISTANCE0);
- clipDistance0Enabled = c.program.program->use_clip_distance0;
- }
curProgram = c.program.program;
+
+ for (size_t i = 0; i < ARRAY_SIZE(clipDistanceEnabled); ++i) {
+ if (c.program.program->use_clip_distance[i] == clipDistanceEnabled[i])
+ continue;
+
+ if (c.program.program->use_clip_distance[i])
+ glEnable(GL_CLIP_DISTANCE0 + (GLenum)i);
+ else
+ glDisable(GL_CLIP_DISTANCE0 + (GLenum)i);
+ clipDistanceEnabled[i] = c.program.program->use_clip_distance[i];
+ }
}
CHECK_GL_ERROR_IF_DEBUG();
break;
@@ -1371,8 +1375,10 @@ void GLQueueRunner::PerformRenderPass(const GLRStep &step, bool first, bool last
glDisable(GL_COLOR_LOGIC_OP);
}
#endif
- if (clipDistance0Enabled)
- glDisable(GL_CLIP_DISTANCE0);
+ for (size_t i = 0; i < ARRAY_SIZE(clipDistanceEnabled); ++i) {
+ if (clipDistanceEnabled[i])
+ glDisable(GL_CLIP_DISTANCE0 + (GLenum)i);
+ }
if ((colorMask & 15) != 15)
glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
CHECK_GL_ERROR_IF_DEBUG();
diff --git a/Common/GPU/OpenGL/GLRenderManager.cpp b/Common/GPU/OpenGL/GLRenderManager.cpp
index ae5cb9dd17..01a94fb8b4 100644
--- a/Common/GPU/OpenGL/GLRenderManager.cpp
+++ b/Common/GPU/OpenGL/GLRenderManager.cpp
@@ -579,7 +579,6 @@ void GLRenderManager::EndSubmitFrame(int frame) {
void GLRenderManager::Run(int frame) {
BeginSubmitFrame(frame);
-
FrameData &frameData = frameData_[frame];
auto &stepsOnThread = frameData_[frame].steps;
diff --git a/Common/GPU/OpenGL/GLRenderManager.h b/Common/GPU/OpenGL/GLRenderManager.h
index 719603863e..089c588aeb 100644
--- a/Common/GPU/OpenGL/GLRenderManager.h
+++ b/Common/GPU/OpenGL/GLRenderManager.h
@@ -91,6 +91,13 @@ public:
std::string error;
};
+struct GLRProgramFlags {
+ bool supportDualSource : 1;
+ bool useClipDistance0 : 1;
+ bool useClipDistance1 : 1;
+ bool useClipDistance2 : 1;
+};
+
class GLRProgram {
public:
~GLRProgram() {
@@ -119,7 +126,7 @@ public:
std::vector semantics_;
std::vector queries_;
std::vector initialize_;
- bool use_clip_distance0 = false;
+ bool use_clip_distance[8]{};
struct UniformInfo {
int loc_;
@@ -427,15 +434,17 @@ public:
// not be an active render pass.
GLRProgram *CreateProgram(
std::vector shaders, std::vector semantics, std::vector queries,
- std::vector initializers, bool supportDualSource, bool useClipDistance0) {
+ std::vector initializers, const GLRProgramFlags &flags) {
GLRInitStep step{ GLRInitStepType::CREATE_PROGRAM };
_assert_(shaders.size() <= ARRAY_SIZE(step.create_program.shaders));
step.create_program.program = new GLRProgram();
step.create_program.program->semantics_ = semantics;
step.create_program.program->queries_ = queries;
step.create_program.program->initialize_ = initializers;
- step.create_program.program->use_clip_distance0 = useClipDistance0;
- step.create_program.support_dual_source = supportDualSource;
+ step.create_program.program->use_clip_distance[0] = flags.useClipDistance0;
+ step.create_program.program->use_clip_distance[1] = flags.useClipDistance1;
+ step.create_program.program->use_clip_distance[2] = flags.useClipDistance2;
+ step.create_program.support_dual_source = flags.supportDualSource;
_assert_msg_(shaders.size() > 0, "Can't create a program with zero shaders");
for (size_t i = 0; i < shaders.size(); i++) {
step.create_program.shaders[i] = shaders[i];
@@ -1003,6 +1012,7 @@ private:
bool readyForFence = true;
bool readyForRun = false;
bool readyForSubmit = false;
+
bool skipSwap = false;
GLRRunType type = GLRRunType::END;
diff --git a/Common/GPU/OpenGL/thin3d_gl.cpp b/Common/GPU/OpenGL/thin3d_gl.cpp
index 047cc8b8fb..02decfe05c 100644
--- a/Common/GPU/OpenGL/thin3d_gl.cpp
+++ b/Common/GPU/OpenGL/thin3d_gl.cpp
@@ -552,6 +552,8 @@ OpenGLContext::OpenGLContext() {
caps_.framebufferDepthBlitSupported = caps_.framebufferBlitSupported;
caps_.framebufferStencilBlitSupported = caps_.framebufferBlitSupported;
caps_.depthClampSupported = gl_extensions.ARB_depth_clamp;
+ caps_.blendMinMaxSupported = gl_extensions.EXT_blend_minmax;
+
if (gl_extensions.IsGLES) {
caps_.clipDistanceSupported = gl_extensions.EXT_clip_cull_distance || gl_extensions.APPLE_clip_distance;
caps_.cullDistanceSupported = gl_extensions.EXT_clip_cull_distance;
@@ -711,8 +713,10 @@ OpenGLContext::OpenGLContext() {
}
}
- if (gl_extensions.IsGLES) {
+ // NOTE: We only support framebuffer fetch on ES3 due to past issues..
+ if (gl_extensions.IsGLES && gl_extensions.GLES3) {
caps_.framebufferFetchSupported = (gl_extensions.EXT_shader_framebuffer_fetch || gl_extensions.ARM_shader_framebuffer_fetch);
+
if (gl_extensions.EXT_shader_framebuffer_fetch) {
shaderLanguageDesc_.framebufferFetchExtension = "#extension GL_EXT_shader_framebuffer_fetch : require";
shaderLanguageDesc_.lastFragData = gl_extensions.GLES3 ? "fragColor0" : "gl_LastFragData[0]";
@@ -1234,7 +1238,8 @@ bool OpenGLPipeline::LinkShaders() {
}
}
- program_ = render_->CreateProgram(linkShaders, semantics, queries, initialize, false, false);
+ GLRProgramFlags flags{};
+ program_ = render_->CreateProgram(linkShaders, semantics, queries, initialize, flags);
return true;
}
diff --git a/Common/GPU/Vulkan/VulkanBarrier.cpp b/Common/GPU/Vulkan/VulkanBarrier.cpp
index 125d51ea6d..e4f2d09089 100644
--- a/Common/GPU/Vulkan/VulkanBarrier.cpp
+++ b/Common/GPU/Vulkan/VulkanBarrier.cpp
@@ -4,7 +4,7 @@
void VulkanBarrier::Flush(VkCommandBuffer cmd) {
if (!imageBarriers_.empty()) {
- vkCmdPipelineBarrier(cmd, srcStageMask_, dstStageMask_, 0, 0, nullptr, 0, nullptr, (uint32_t)imageBarriers_.size(), imageBarriers_.data());
+ vkCmdPipelineBarrier(cmd, srcStageMask_, dstStageMask_, dependencyFlags_, 0, nullptr, 0, nullptr, (uint32_t)imageBarriers_.size(), imageBarriers_.data());
}
imageBarriers_.clear();
srcStageMask_ = 0;
diff --git a/Common/GPU/Vulkan/VulkanBarrier.h b/Common/GPU/Vulkan/VulkanBarrier.h
index eb949dd2f0..0d5754b3f4 100644
--- a/Common/GPU/Vulkan/VulkanBarrier.h
+++ b/Common/GPU/Vulkan/VulkanBarrier.h
@@ -21,6 +21,7 @@ public:
) {
srcStageMask_ |= srcStageMask;
dstStageMask_ |= dstStageMask;
+ dependencyFlags_ |= VK_DEPENDENCY_BY_REGION_BIT;
VkImageMemoryBarrier imageBarrier;
imageBarrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
@@ -112,4 +113,5 @@ private:
VkPipelineStageFlags srcStageMask_ = 0;
VkPipelineStageFlags dstStageMask_ = 0;
std::vector imageBarriers_;
+ VkDependencyFlags dependencyFlags_ = 0;
};
diff --git a/Common/GPU/Vulkan/VulkanContext.cpp b/Common/GPU/Vulkan/VulkanContext.cpp
index 55edc0f31f..0d29b518df 100644
--- a/Common/GPU/Vulkan/VulkanContext.cpp
+++ b/Common/GPU/Vulkan/VulkanContext.cpp
@@ -667,7 +667,10 @@ VkResult VulkanContext::CreateDevice() {
extensionsLookup_.KHR_create_renderpass2 = true;
extensionsLookup_.KHR_depth_stencil_resolve = EnableDeviceExtension(VK_KHR_DEPTH_STENCIL_RESOLVE_EXTENSION_NAME);
}
+
extensionsLookup_.EXT_shader_stencil_export = EnableDeviceExtension(VK_EXT_SHADER_STENCIL_EXPORT_EXTENSION_NAME);
+ extensionsLookup_.EXT_fragment_shader_interlock = EnableDeviceExtension(VK_EXT_FRAGMENT_SHADER_INTERLOCK_EXTENSION_NAME);
+ extensionsLookup_.ARM_rasterization_order_attachment_access = EnableDeviceExtension(VK_ARM_RASTERIZATION_ORDER_ATTACHMENT_ACCESS_EXTENSION_NAME);
VkDeviceCreateInfo device_info{ VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO };
device_info.queueCreateInfoCount = 1;
diff --git a/Common/GPU/Vulkan/VulkanDebug.cpp b/Common/GPU/Vulkan/VulkanDebug.cpp
index d4052e970a..b52e4396cb 100644
--- a/Common/GPU/Vulkan/VulkanDebug.cpp
+++ b/Common/GPU/Vulkan/VulkanDebug.cpp
@@ -86,7 +86,6 @@ VKAPI_ATTR VkBool32 VKAPI_CALL VulkanDebugUtilsCallback(
} else {
WARN_LOG(G3D, "VKDEBUG: %s", msg.c_str());
}
-
// false indicates that layer should not bail-out of an
// API call that had validation failures. This may mean that the
// app dies inside the driver due to invalid parameter(s).
@@ -94,3 +93,4 @@ VKAPI_ATTR VkBool32 VKAPI_CALL VulkanDebugUtilsCallback(
// keep that behavior here.
return false;
}
+
diff --git a/Common/GPU/Vulkan/VulkanFrameData.cpp b/Common/GPU/Vulkan/VulkanFrameData.cpp
new file mode 100644
index 0000000000..2c9c2487fc
--- /dev/null
+++ b/Common/GPU/Vulkan/VulkanFrameData.cpp
@@ -0,0 +1,207 @@
+#include "VulkanFrameData.h"
+#include "Common/Log.h"
+
+void FrameData::Init(VulkanContext *vulkan, int index) {
+ this->index = index;
+ VkDevice device = vulkan->GetDevice();
+
+ VkCommandPoolCreateInfo cmd_pool_info = { VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO };
+ cmd_pool_info.queueFamilyIndex = vulkan->GetGraphicsQueueFamilyIndex();
+ cmd_pool_info.flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT;
+ VkResult res = vkCreateCommandPool(device, &cmd_pool_info, nullptr, &cmdPoolInit);
+ _dbg_assert_(res == VK_SUCCESS);
+ res = vkCreateCommandPool(device, &cmd_pool_info, nullptr, &cmdPoolMain);
+ _dbg_assert_(res == VK_SUCCESS);
+
+ VkCommandBufferAllocateInfo cmd_alloc = { VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO };
+ cmd_alloc.commandPool = cmdPoolInit;
+ cmd_alloc.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
+ cmd_alloc.commandBufferCount = 1;
+ res = vkAllocateCommandBuffers(device, &cmd_alloc, &initCmd);
+ _dbg_assert_(res == VK_SUCCESS);
+ cmd_alloc.commandPool = cmdPoolMain;
+ res = vkAllocateCommandBuffers(device, &cmd_alloc, &mainCmd);
+ res = vkAllocateCommandBuffers(device, &cmd_alloc, &presentCmd);
+ _dbg_assert_(res == VK_SUCCESS);
+
+ // Creating the frame fence with true so they can be instantly waited on the first frame
+ fence = vulkan->CreateFence(true);
+
+ // This fence one is used for synchronizing readbacks. Does not need preinitialization.
+ readbackFence = vulkan->CreateFence(false);
+
+ VkQueryPoolCreateInfo query_ci{ VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO };
+ query_ci.queryCount = MAX_TIMESTAMP_QUERIES;
+ query_ci.queryType = VK_QUERY_TYPE_TIMESTAMP;
+ res = vkCreateQueryPool(device, &query_ci, nullptr, &profile.queryPool);
+}
+
+void FrameData::Destroy(VulkanContext *vulkan) {
+ VkDevice device = vulkan->GetDevice();
+ // TODO: I don't think free-ing command buffers is necessary before destroying a pool.
+ vkFreeCommandBuffers(device, cmdPoolInit, 1, &initCmd);
+ vkFreeCommandBuffers(device, cmdPoolMain, 1, &mainCmd);
+ vkDestroyCommandPool(device, cmdPoolInit, nullptr);
+ vkDestroyCommandPool(device, cmdPoolMain, nullptr);
+ vkDestroyFence(device, fence, nullptr);
+ vkDestroyFence(device, readbackFence, nullptr);
+ vkDestroyQueryPool(device, profile.queryPool, nullptr);
+}
+
+void FrameData::AcquireNextImage(VulkanContext *vulkan, FrameDataShared &shared) {
+ _dbg_assert_(!hasAcquired);
+
+ // Get the index of the next available swapchain image, and a semaphore to block command buffer execution on.
+ VkResult res = vkAcquireNextImageKHR(vulkan->GetDevice(), vulkan->GetSwapchain(), UINT64_MAX, shared.acquireSemaphore, (VkFence)VK_NULL_HANDLE, &curSwapchainImage);
+ switch (res) {
+ case VK_SUCCESS:
+ hasAcquired = true;
+ break;
+ case VK_SUBOPTIMAL_KHR:
+ hasAcquired = true;
+ // Hopefully the resize will happen shortly. Ignore - one frame might look bad or something.
+ WARN_LOG(G3D, "VK_SUBOPTIMAL_KHR returned - ignoring");
+ break;
+ case VK_ERROR_OUT_OF_DATE_KHR:
+ // We do not set hasAcquired here!
+ WARN_LOG(G3D, "VK_ERROR_OUT_OF_DATE_KHR returned from AcquireNextImage - processing the frame, but not presenting");
+ skipSwap = true;
+ break;
+ default:
+ // Weird, shouldn't get any other values. Maybe lost device?
+ _assert_msg_(false, "vkAcquireNextImageKHR failed! result=%s", VulkanResultToString(res));
+ break;
+ }
+}
+
+VkResult FrameData::QueuePresent(VulkanContext *vulkan, FrameDataShared &shared) {
+ _dbg_assert_(hasAcquired);
+ hasAcquired = false;
+ _dbg_assert_(!skipSwap);
+
+ VkSwapchainKHR swapchain = vulkan->GetSwapchain();
+ VkPresentInfoKHR present = { VK_STRUCTURE_TYPE_PRESENT_INFO_KHR };
+ present.swapchainCount = 1;
+ present.pSwapchains = &swapchain;
+ present.pImageIndices = &curSwapchainImage;
+ present.pWaitSemaphores = &shared.renderingCompleteSemaphore;
+ present.waitSemaphoreCount = 1;
+
+ return vkQueuePresentKHR(vulkan->GetGraphicsQueue(), &present);
+}
+
+VkCommandBuffer FrameData::GetInitCmd(VulkanContext *vulkan) {
+ if (!hasInitCommands) {
+ VkCommandBufferBeginInfo begin = {
+ VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
+ nullptr,
+ VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT
+ };
+ vkResetCommandPool(vulkan->GetDevice(), cmdPoolInit, 0);
+ VkResult res = vkBeginCommandBuffer(initCmd, &begin);
+ if (res != VK_SUCCESS) {
+ return VK_NULL_HANDLE;
+ }
+ hasInitCommands = true;
+ }
+ return initCmd;
+}
+
+void FrameData::SubmitPending(VulkanContext *vulkan, FrameSubmitType type, FrameDataShared &sharedData) {
+ VkCommandBuffer cmdBufs[2];
+ int numCmdBufs = 0;
+
+ VkFence fenceToTrigger = VK_NULL_HANDLE;
+
+ if (hasInitCommands) {
+ if (profilingEnabled_) {
+ // Pre-allocated query ID 1 - end of init cmdbuf.
+ vkCmdWriteTimestamp(initCmd, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, profile.queryPool, 1);
+ }
+
+ VkResult res = vkEndCommandBuffer(initCmd);
+ cmdBufs[numCmdBufs++] = initCmd;
+
+ _assert_msg_(res == VK_SUCCESS, "vkEndCommandBuffer failed (init)! result=%s", VulkanResultToString(res));
+ hasInitCommands = false;
+ }
+
+ if ((hasMainCommands || hasPresentCommands) && type == FrameSubmitType::Sync) {
+ fenceToTrigger = readbackFence;
+ }
+
+ if (hasMainCommands) {
+ VkResult res = vkEndCommandBuffer(mainCmd);
+ _assert_msg_(res == VK_SUCCESS, "vkEndCommandBuffer failed (main)! result=%s", VulkanResultToString(res));
+
+ cmdBufs[numCmdBufs++] = mainCmd;
+ hasMainCommands = false;
+ }
+
+ if (hasPresentCommands) {
+ VkResult res = vkEndCommandBuffer(presentCmd);
+ _assert_msg_(res == VK_SUCCESS, "vkEndCommandBuffer failed (present)! result=%s", VulkanResultToString(res));
+
+ cmdBufs[numCmdBufs++] = presentCmd;
+ hasPresentCommands = false;
+
+ if (type == FrameSubmitType::Present) {
+ fenceToTrigger = fence;
+ }
+ }
+
+ if (!numCmdBufs && fenceToTrigger == VK_NULL_HANDLE) {
+ // Nothing to do.
+ return;
+ }
+
+ VkSubmitInfo submit_info{ VK_STRUCTURE_TYPE_SUBMIT_INFO };
+ VkPipelineStageFlags waitStage[1]{ VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT };
+ if (type == FrameSubmitType::Present && !skipSwap) {
+ _dbg_assert_(hasAcquired);
+ submit_info.waitSemaphoreCount = 1;
+ submit_info.pWaitSemaphores = &sharedData.acquireSemaphore;
+ submit_info.pWaitDstStageMask = waitStage;
+ }
+ submit_info.commandBufferCount = (uint32_t)numCmdBufs;
+ submit_info.pCommandBuffers = cmdBufs;
+ if (type == FrameSubmitType::Present && !skipSwap) {
+ submit_info.signalSemaphoreCount = 1;
+ submit_info.pSignalSemaphores = &sharedData.renderingCompleteSemaphore;
+ }
+ VkResult res = vkQueueSubmit(vulkan->GetGraphicsQueue(), 1, &submit_info, fenceToTrigger);
+ if (res == VK_ERROR_DEVICE_LOST) {
+ _assert_msg_(false, "Lost the Vulkan device in vkQueueSubmit! If this happens again, switch Graphics Backend away from Vulkan");
+ } else {
+ _assert_msg_(res == VK_SUCCESS, "vkQueueSubmit failed (main)! result=%s", VulkanResultToString(res));
+ }
+
+ if (type == FrameSubmitType::Sync) {
+ // Hard stall of the GPU, not ideal, but necessary so the CPU has the contents of the readback.
+ vkWaitForFences(vulkan->GetDevice(), 1, &readbackFence, true, UINT64_MAX);
+ vkResetFences(vulkan->GetDevice(), 1, &readbackFence);
+ }
+
+ // When !triggerFence, we notify after syncing with Vulkan.
+ if (type == FrameSubmitType::Present || type == FrameSubmitType::Sync) {
+ VERBOSE_LOG(G3D, "PULL: Frame %d.readyForFence = true", index);
+ std::unique_lock lock(push_mutex);
+ readyForFence = true; // misnomer in sync mode!
+ push_condVar.notify_all();
+ }
+}
+
+void FrameDataShared::Init(VulkanContext *vulkan) {
+ VkSemaphoreCreateInfo semaphoreCreateInfo = { VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO };
+ semaphoreCreateInfo.flags = 0;
+ VkResult res = vkCreateSemaphore(vulkan->GetDevice(), &semaphoreCreateInfo, nullptr, &acquireSemaphore);
+ _dbg_assert_(res == VK_SUCCESS);
+ res = vkCreateSemaphore(vulkan->GetDevice(), &semaphoreCreateInfo, nullptr, &renderingCompleteSemaphore);
+ _dbg_assert_(res == VK_SUCCESS);
+}
+
+void FrameDataShared::Destroy(VulkanContext *vulkan) {
+ VkDevice device = vulkan->GetDevice();
+ vkDestroySemaphore(device, acquireSemaphore, nullptr);
+ vkDestroySemaphore(device, renderingCompleteSemaphore, nullptr);
+}
diff --git a/Common/GPU/Vulkan/VulkanFrameData.h b/Common/GPU/Vulkan/VulkanFrameData.h
new file mode 100644
index 0000000000..53de1e8f44
--- /dev/null
+++ b/Common/GPU/Vulkan/VulkanFrameData.h
@@ -0,0 +1,101 @@
+#pragma once
+
+#include
+
+#include
+#include
+
+#include "Common/GPU/Vulkan/VulkanContext.h"
+
+struct VKRStep;
+
+enum {
+ MAX_TIMESTAMP_QUERIES = 128,
+};
+
+enum class VKRRunType {
+ END,
+ SYNC,
+};
+
+struct QueueProfileContext {
+ VkQueryPool queryPool;
+ std::vector timestampDescriptions;
+ std::string profileSummary;
+ double cpuStartTime;
+ double cpuEndTime;
+};
+
+struct FrameDataShared {
+ // Permanent objects
+ VkSemaphore acquireSemaphore = VK_NULL_HANDLE;
+ VkSemaphore renderingCompleteSemaphore = VK_NULL_HANDLE;
+
+ void Init(VulkanContext *vulkan);
+ void Destroy(VulkanContext *vulkan);
+};
+
+enum class FrameSubmitType {
+ Pending,
+ Sync,
+ Present,
+};
+
+// Per-frame data, round-robin so we can overlap submission with execution of the previous frame.
+struct FrameData {
+ std::mutex push_mutex;
+ std::condition_variable push_condVar;
+
+ std::mutex pull_mutex;
+ std::condition_variable pull_condVar;
+
+ bool readyForFence = true;
+ bool readyForRun = false; // protected by pull_mutex
+ bool skipSwap = false;
+
+ VkFence fence;
+ VkFence readbackFence; // Strictly speaking we might only need one global of these.
+
+ // These are on different threads so need separate pools.
+ VkCommandPool cmdPoolInit; // Written to from main thread
+ VkCommandPool cmdPoolMain; // Written to from render thread, which also submits
+
+ VkCommandBuffer initCmd;
+ VkCommandBuffer mainCmd;
+ VkCommandBuffer presentCmd;
+
+ bool hasInitCommands = false;
+ bool hasMainCommands = false;
+ bool hasPresentCommands = false;
+
+ bool hasAcquired = false;
+
+ std::vector steps;
+
+ // Swapchain.
+ uint32_t curSwapchainImage = -1;
+
+ // Profiling.
+ QueueProfileContext profile;
+ bool profilingEnabled_;
+
+ void Init(VulkanContext *vulkan, int index);
+ void Destroy(VulkanContext *vulkan);
+
+ void AcquireNextImage(VulkanContext *vulkan, FrameDataShared &shared);
+ VkResult QueuePresent(VulkanContext *vulkan, FrameDataShared &shared);
+ VkCommandBuffer GetInitCmd(VulkanContext *vulkan);
+
+ // This will only submit if we are actually recording init commands.
+ void SubmitPending(VulkanContext *vulkan, FrameSubmitType type, FrameDataShared &shared);
+
+ VKRRunType RunType() const {
+ return runType_;
+ }
+
+ VKRRunType runType_ = VKRRunType::END;
+
+private:
+ // Metadata for logging etc
+ int index;
+};
diff --git a/Common/GPU/Vulkan/VulkanLoader.h b/Common/GPU/Vulkan/VulkanLoader.h
index 1f3d7d7704..b0c14570da 100644
--- a/Common/GPU/Vulkan/VulkanLoader.h
+++ b/Common/GPU/Vulkan/VulkanLoader.h
@@ -241,6 +241,8 @@ struct VulkanExtensions {
bool KHR_depth_stencil_resolve;
bool EXT_shader_stencil_export;
bool EXT_swapchain_colorspace;
+ bool ARM_rasterization_order_attachment_access;
+ bool EXT_fragment_shader_interlock;
// bool EXT_depth_range_unrestricted; // Allows depth outside [0.0, 1.0] in 32-bit float depth buffers.
};
diff --git a/Common/GPU/Vulkan/VulkanQueueRunner.cpp b/Common/GPU/Vulkan/VulkanQueueRunner.cpp
index 10a88e623a..8127114148 100644
--- a/Common/GPU/Vulkan/VulkanQueueRunner.cpp
+++ b/Common/GPU/Vulkan/VulkanQueueRunner.cpp
@@ -34,8 +34,11 @@ RenderPassType MergeRPTypes(RenderPassType a, RenderPassType b) {
if (a == b) {
// Trivial merging case.
return a;
+ } else if (a == RP_TYPE_COLOR_DEPTH && b == RP_TYPE_COLOR_DEPTH_INPUT) {
+ return RP_TYPE_COLOR_DEPTH_INPUT;
+ } else if (a == RP_TYPE_COLOR_DEPTH_INPUT && b == RP_TYPE_COLOR_DEPTH) {
+ return RP_TYPE_COLOR_DEPTH_INPUT;
}
- // More cases to be added later.
return a;
}
@@ -138,6 +141,171 @@ void VulkanQueueRunner::DestroyDeviceObjects() {
renderPasses_.Clear();
}
+bool VulkanQueueRunner::CreateSwapchain(VkCommandBuffer cmdInit) {
+ VkResult res = vkGetSwapchainImagesKHR(vulkan_->GetDevice(), vulkan_->GetSwapchain(), &swapchainImageCount_, nullptr);
+ _dbg_assert_(res == VK_SUCCESS);
+
+ VkImage *swapchainImages = new VkImage[swapchainImageCount_];
+ res = vkGetSwapchainImagesKHR(vulkan_->GetDevice(), vulkan_->GetSwapchain(), &swapchainImageCount_, swapchainImages);
+ if (res != VK_SUCCESS) {
+ ERROR_LOG(G3D, "vkGetSwapchainImagesKHR failed");
+ delete[] swapchainImages;
+ return false;
+ }
+
+ for (uint32_t i = 0; i < swapchainImageCount_; i++) {
+ SwapchainImageData sc_buffer{};
+ sc_buffer.image = swapchainImages[i];
+
+ VkImageViewCreateInfo color_image_view = { VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO };
+ color_image_view.format = vulkan_->GetSwapchainFormat();
+ color_image_view.components.r = VK_COMPONENT_SWIZZLE_IDENTITY;
+ color_image_view.components.g = VK_COMPONENT_SWIZZLE_IDENTITY;
+ color_image_view.components.b = VK_COMPONENT_SWIZZLE_IDENTITY;
+ color_image_view.components.a = VK_COMPONENT_SWIZZLE_IDENTITY;
+ color_image_view.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
+ color_image_view.subresourceRange.baseMipLevel = 0;
+ color_image_view.subresourceRange.levelCount = 1;
+ color_image_view.subresourceRange.baseArrayLayer = 0;
+ color_image_view.subresourceRange.layerCount = 1;
+ color_image_view.viewType = VK_IMAGE_VIEW_TYPE_2D;
+ color_image_view.flags = 0;
+ color_image_view.image = sc_buffer.image;
+
+ // We leave the images as UNDEFINED, there's no need to pre-transition them as
+ // the backbuffer renderpass starts out with them being auto-transitioned from UNDEFINED anyway.
+ // Also, turns out it's illegal to transition un-acquired images, thanks Hans-Kristian. See #11417.
+
+ res = vkCreateImageView(vulkan_->GetDevice(), &color_image_view, nullptr, &sc_buffer.view);
+ swapchainImages_.push_back(sc_buffer);
+ _dbg_assert_(res == VK_SUCCESS);
+ }
+ delete[] swapchainImages;
+
+ // Must be before InitBackbufferRenderPass.
+ if (InitDepthStencilBuffer(cmdInit)) {
+ InitBackbufferFramebuffers(vulkan_->GetBackbufferWidth(), vulkan_->GetBackbufferHeight());
+ }
+ return true;
+}
+
+
+bool VulkanQueueRunner::InitBackbufferFramebuffers(int width, int height) {
+ VkResult res;
+ // We share the same depth buffer but have multiple color buffers, see the loop below.
+ VkImageView attachments[2] = { VK_NULL_HANDLE, depth_.view };
+
+ VkFramebufferCreateInfo fb_info = { VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO };
+ fb_info.renderPass = GetCompatibleRenderPass()->Get(vulkan_, RP_TYPE_BACKBUFFER);
+ fb_info.attachmentCount = 2;
+ fb_info.pAttachments = attachments;
+ fb_info.width = width;
+ fb_info.height = height;
+ fb_info.layers = 1;
+
+ framebuffers_.resize(swapchainImageCount_);
+
+ for (uint32_t i = 0; i < swapchainImageCount_; i++) {
+ attachments[0] = swapchainImages_[i].view;
+ res = vkCreateFramebuffer(vulkan_->GetDevice(), &fb_info, nullptr, &framebuffers_[i]);
+ _dbg_assert_(res == VK_SUCCESS);
+ if (res != VK_SUCCESS) {
+ framebuffers_.clear();
+ return false;
+ }
+ }
+
+ return true;
+}
+
+bool VulkanQueueRunner::InitDepthStencilBuffer(VkCommandBuffer cmd) {
+ const VkFormat depth_format = vulkan_->GetDeviceInfo().preferredDepthStencilFormat;
+ int aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT;
+ VkImageCreateInfo image_info = { VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO };
+ image_info.imageType = VK_IMAGE_TYPE_2D;
+ image_info.format = depth_format;
+ image_info.extent.width = vulkan_->GetBackbufferWidth();
+ image_info.extent.height = vulkan_->GetBackbufferHeight();
+ image_info.extent.depth = 1;
+ image_info.mipLevels = 1;
+ image_info.arrayLayers = 1;
+ image_info.samples = VK_SAMPLE_COUNT_1_BIT;
+ image_info.queueFamilyIndexCount = 0;
+ image_info.pQueueFamilyIndices = nullptr;
+ image_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
+ image_info.usage = VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT;
+ image_info.flags = 0;
+
+ depth_.format = depth_format;
+
+ VmaAllocationCreateInfo allocCreateInfo{};
+ VmaAllocationInfo allocInfo{};
+
+ allocCreateInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY;
+
+ VkResult res = vmaCreateImage(vulkan_->Allocator(), &image_info, &allocCreateInfo, &depth_.image, &depth_.alloc, &allocInfo);
+ _dbg_assert_(res == VK_SUCCESS);
+ if (res != VK_SUCCESS)
+ return false;
+
+ vulkan_->SetDebugName(depth_.image, VK_OBJECT_TYPE_IMAGE, "BackbufferDepth");
+
+ TransitionImageLayout2(cmd, depth_.image, 0, 1,
+ aspectMask,
+ VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
+ VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT,
+ VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT,
+ 0, VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT);
+
+ VkImageViewCreateInfo depth_view_info = { VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO };
+ depth_view_info.image = depth_.image;
+ depth_view_info.format = depth_format;
+ depth_view_info.components.r = VK_COMPONENT_SWIZZLE_IDENTITY;
+ depth_view_info.components.g = VK_COMPONENT_SWIZZLE_IDENTITY;
+ depth_view_info.components.b = VK_COMPONENT_SWIZZLE_IDENTITY;
+ depth_view_info.components.a = VK_COMPONENT_SWIZZLE_IDENTITY;
+ depth_view_info.subresourceRange.aspectMask = aspectMask;
+ depth_view_info.subresourceRange.baseMipLevel = 0;
+ depth_view_info.subresourceRange.levelCount = 1;
+ depth_view_info.subresourceRange.baseArrayLayer = 0;
+ depth_view_info.subresourceRange.layerCount = 1;
+ depth_view_info.viewType = VK_IMAGE_VIEW_TYPE_2D;
+ depth_view_info.flags = 0;
+
+ VkDevice device = vulkan_->GetDevice();
+
+ res = vkCreateImageView(device, &depth_view_info, NULL, &depth_.view);
+ _dbg_assert_(res == VK_SUCCESS);
+ if (res != VK_SUCCESS)
+ return false;
+
+ return true;
+}
+
+
+void VulkanQueueRunner::DestroyBackBuffers() {
+ for (auto &image : swapchainImages_) {
+ vulkan_->Delete().QueueDeleteImageView(image.view);
+ }
+ swapchainImages_.clear();
+
+ if (depth_.view) {
+ vulkan_->Delete().QueueDeleteImageView(depth_.view);
+ }
+ if (depth_.image) {
+ _dbg_assert_(depth_.alloc);
+ vulkan_->Delete().QueueDeleteImageAllocation(depth_.image, depth_.alloc);
+ }
+ depth_ = {};
+ for (uint32_t i = 0; i < framebuffers_.size(); i++) {
+ _dbg_assert_(framebuffers_[i] != VK_NULL_HANDLE);
+ vulkan_->Delete().QueueDeleteFramebuffer(framebuffers_[i]);
+ }
+ framebuffers_.clear();
+
+ INFO_LOG(G3D, "Backbuffers destroyed");
+}
+
static VkAttachmentLoadOp ConvertLoadAction(VKRRenderPassLoadAction action) {
switch (action) {
case VKRRenderPassLoadAction::CLEAR: return VK_ATTACHMENT_LOAD_OP_CLEAR;
@@ -155,7 +323,12 @@ static VkAttachmentStoreOp ConvertStoreAction(VKRRenderPassStoreAction action) {
return VK_ATTACHMENT_STORE_OP_DONT_CARE; // avoid compiler warning
}
+// Self-dependency: https://github.com/gpuweb/gpuweb/issues/442#issuecomment-547604827
+// Also see https://www.khronos.org/registry/vulkan/specs/1.3-extensions/html/vkspec.html#synchronization-pipeline-barriers-subpass-self-dependencies
+
VkRenderPass CreateRP(VulkanContext *vulkan, const RPKey &key, RenderPassType rpType) {
+ bool selfDependency = rpType == RP_TYPE_COLOR_DEPTH_INPUT;
+
VkAttachmentDescription attachments[2] = {};
attachments[0].format = rpType == RP_TYPE_BACKBUFFER ? vulkan->GetSwapchainFormat() : VK_FORMAT_R8G8B8A8_UNORM;
attachments[0].samples = VK_SAMPLE_COUNT_1_BIT;
@@ -179,7 +352,7 @@ VkRenderPass CreateRP(VulkanContext *vulkan, const RPKey &key, RenderPassType rp
VkAttachmentReference color_reference{};
color_reference.attachment = 0;
- color_reference.layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
+ color_reference.layout = selfDependency ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
VkAttachmentReference depth_reference{};
depth_reference.attachment = 1;
@@ -188,8 +361,13 @@ VkRenderPass CreateRP(VulkanContext *vulkan, const RPKey &key, RenderPassType rp
VkSubpassDescription subpass{};
subpass.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS;
subpass.flags = 0;
- subpass.inputAttachmentCount = 0;
- subpass.pInputAttachments = nullptr;
+ if (selfDependency) {
+ subpass.inputAttachmentCount = 1;
+ subpass.pInputAttachments = &color_reference;
+ } else {
+ subpass.inputAttachmentCount = 0;
+ subpass.pInputAttachments = nullptr;
+ }
subpass.colorAttachmentCount = 1;
subpass.pColorAttachments = &color_reference;
subpass.pResolveAttachments = nullptr;
@@ -198,22 +376,40 @@ VkRenderPass CreateRP(VulkanContext *vulkan, const RPKey &key, RenderPassType rp
subpass.pPreserveAttachments = nullptr;
// Not sure if this is really necessary.
- VkSubpassDependency dep{};
- dep.srcSubpass = VK_SUBPASS_EXTERNAL;
- dep.dstSubpass = 0;
- dep.srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
- dep.dstStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
- dep.srcAccessMask = 0;
- dep.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
+ VkSubpassDependency deps[2]{};
+ size_t numDeps = 0;
VkRenderPassCreateInfo rp{ VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO };
rp.attachmentCount = 2;
rp.pAttachments = attachments;
rp.subpassCount = 1;
rp.pSubpasses = &subpass;
+
if (rpType == RP_TYPE_BACKBUFFER) {
+ deps[numDeps].srcSubpass = VK_SUBPASS_EXTERNAL;
+ deps[numDeps].dstSubpass = 0;
+ deps[numDeps].srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
+ deps[numDeps].dstStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
+ deps[numDeps].srcAccessMask = 0;
+ deps[numDeps].dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
+ numDeps++;
rp.dependencyCount = 1;
- rp.pDependencies = &dep;
+ }
+
+ if (selfDependency) {
+ deps[numDeps].dependencyFlags = VK_DEPENDENCY_BY_REGION_BIT;
+ deps[numDeps].srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
+ deps[numDeps].dstAccessMask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT;
+ deps[numDeps].srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
+ deps[numDeps].dstStageMask = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
+ deps[numDeps].srcSubpass = 0;
+ deps[numDeps].dstSubpass = 0;
+ numDeps++;
+ }
+
+ if (numDeps > 0) {
+ rp.dependencyCount = (u32)numDeps;
+ rp.pDependencies = deps;
}
VkRenderPass pass;
@@ -246,6 +442,30 @@ VKRRenderPass *VulkanQueueRunner::GetRenderPass(const RPKey &key) {
return pass;
}
+// Must match the subpass self-dependency declared above.
+void VulkanQueueRunner::SelfDependencyBarrier(VKRImage &img, VkImageAspectFlags aspect, VulkanBarrier *recordBarrier) {
+ if (aspect & VK_IMAGE_ASPECT_COLOR_BIT) {
+ VkAccessFlags srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
+ VkAccessFlags dstAccessMask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT;
+ VkPipelineStageFlags srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
+ VkPipelineStageFlags dstStageMask = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
+ recordBarrier->TransitionImage(
+ img.image,
+ 0,
+ 1,
+ aspect,
+ VK_IMAGE_LAYOUT_GENERAL,
+ VK_IMAGE_LAYOUT_GENERAL,
+ srcAccessMask,
+ dstAccessMask,
+ srcStageMask,
+ dstStageMask
+ );
+ } else {
+ _assert_msg_(false, "Depth self-dependencies not yet supported");
+ }
+}
+
void VulkanQueueRunner::PreprocessSteps(std::vector &steps) {
// Optimizes renderpasses, then sequences them.
// Planned optimizations:
@@ -321,23 +541,47 @@ void VulkanQueueRunner::PreprocessSteps(std::vector &steps) {
}
}
-void VulkanQueueRunner::RunSteps(VkCommandBuffer cmd, std::vector &steps, QueueProfileContext *profile) {
+void VulkanQueueRunner::RunSteps(FrameData &frameData, FrameDataShared &frameDataShared) {
+ QueueProfileContext *profile = frameData.profilingEnabled_ ? &frameData.profile : nullptr;
+
if (profile)
profile->cpuStartTime = time_now_d();
bool emitLabels = vulkan_->Extensions().EXT_debug_utils;
- for (size_t i = 0; i < steps.size(); i++) {
- const VKRStep &step = *steps[i];
+ VkCommandBuffer cmd = frameData.hasPresentCommands ? frameData.presentCmd : frameData.mainCmd;
+
+ for (size_t i = 0; i < frameData.steps.size(); i++) {
+ const VKRStep &step = *frameData.steps[i];
if (emitLabels) {
VkDebugUtilsLabelEXT labelInfo{ VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT };
labelInfo.pLabelName = step.tag;
- vkCmdBeginDebugUtilsLabelEXT(cmd, &labelInfo);
+ vkCmdBeginDebugUtilsLabelEXT(frameData.mainCmd, &labelInfo);
}
switch (step.stepType) {
case VKRStepType::RENDER:
+ if (!step.render.framebuffer) {
+ frameData.SubmitPending(vulkan_, FrameSubmitType::Pending, frameDataShared);
+
+ // When stepping in the GE debugger, we can end up here multiple times in a "frame".
+ // So only acquire once.
+ if (!frameData.hasAcquired) {
+ frameData.AcquireNextImage(vulkan_, frameDataShared);
+ SetBackbuffer(framebuffers_[frameData.curSwapchainImage], swapchainImages_[frameData.curSwapchainImage].image);
+ }
+
+ _dbg_assert_(!frameData.hasPresentCommands);
+ // A RENDER step rendering to the backbuffer is normally the last step that happens in a frame,
+ // unless taking a screenshot, in which case there might be a READBACK_IMAGE after it.
+ // This is why we have to switch cmd to presentCmd, in this case.
+ VkCommandBufferBeginInfo begin{ VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO };
+ begin.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
+ vkBeginCommandBuffer(frameData.presentCmd, &begin);
+ frameData.hasPresentCommands = true;
+ cmd = frameData.presentCmd;
+ }
PerformRenderPass(step, cmd);
break;
case VKRStepType::COPY:
@@ -368,10 +612,12 @@ void VulkanQueueRunner::RunSteps(VkCommandBuffer cmd, std::vector &st
// Deleting all in one go should be easier on the instruction cache than deleting
// them as we go - and easier to debug because we can look backwards in the frame.
- for (size_t i = 0; i < steps.size(); i++) {
- delete steps[i];
+ for (auto step : frameData.steps) {
+ delete step;
}
+ frameData.steps.clear();
+
if (profile)
profile->cpuEndTime = time_now_d();
}
@@ -628,6 +874,7 @@ std::string VulkanQueueRunner::StepToString(const VKRStep &step) const {
switch (step.render.renderPassType) {
case RP_TYPE_BACKBUFFER: renderCmd = "BACKBUF"; break;
case RP_TYPE_COLOR_DEPTH: renderCmd = "RENDER"; break;
+ case RP_TYPE_COLOR_DEPTH_INPUT: renderCmd = "RENDER_INPUT"; break;
default: renderCmd = "N/A";
}
snprintf(buffer, sizeof(buffer), "%s %s (draws: %d, %dx%d/%dx%d, fb: %p, )", renderCmd, step.tag, step.render.numDraws, actual_w, actual_h, w, h, step.render.framebuffer);
@@ -817,6 +1064,9 @@ void VulkanQueueRunner::LogRenderPass(const VKRStep &pass, bool verbose) {
case VKRRenderCommand::REMOVED:
INFO_LOG(G3D, " (Removed)");
break;
+ case VKRRenderCommand::SELF_DEPENDENCY_BARRIER:
+ INFO_LOG(G3D, " SelfBarrier()");
+ break;
case VKRRenderCommand::BIND_GRAPHICS_PIPELINE:
INFO_LOG(G3D, " BindGraphicsPipeline(%x)", (int)(intptr_t)cmd.graphics_pipeline.pipeline);
break;
@@ -1070,7 +1320,6 @@ void VulkanQueueRunner::PerformRenderPass(const VKRStep &step, VkCommandBuffer c
}
}
-
// Don't execute empty renderpasses that keep the contents.
if (step.commands.empty() && step.render.colorLoad == VKRRenderPassLoadAction::KEEP && step.render.depthLoad == VKRRenderPassLoadAction::KEEP && step.render.stencilLoad == VKRRenderPassLoadAction::KEEP) {
// Flush the pending barrier
@@ -1120,6 +1369,7 @@ void VulkanQueueRunner::PerformRenderPass(const VKRStep &step, VkCommandBuffer c
// This reads the layout of the color and depth images, and chooses a render pass using them that
// will transition to the desired final layout.
+ //
// NOTE: Flushes recordBarrier_.
VKRRenderPass *renderPass = PerformBindFramebufferAsRenderTarget(step, cmd);
@@ -1235,6 +1485,15 @@ void VulkanQueueRunner::PerformRenderPass(const VKRStep &step, VkCommandBuffer c
break;
}
+ case VKRRenderCommand::SELF_DEPENDENCY_BARRIER:
+ {
+ _assert_(step.render.pipelineFlags & PipelineFlags::USES_INPUT_ATTACHMENT);
+ VulkanBarrier barrier;
+ SelfDependencyBarrier(step.render.framebuffer->color, VK_IMAGE_ASPECT_COLOR_BIT, &barrier);
+ barrier.Flush(cmd);
+ break;
+ }
+
case VKRRenderCommand::PUSH_CONSTANTS:
vkCmdPushConstants(cmd, pipelineLayout, c.push.stages, c.push.offset, c.push.size, c.push.data);
break;
diff --git a/Common/GPU/Vulkan/VulkanQueueRunner.h b/Common/GPU/Vulkan/VulkanQueueRunner.h
index adb9e7d96e..2c76262a6a 100644
--- a/Common/GPU/Vulkan/VulkanQueueRunner.h
+++ b/Common/GPU/Vulkan/VulkanQueueRunner.h
@@ -8,6 +8,7 @@
#include "Common/Data/Collections/Hashmaps.h"
#include "Common/GPU/Vulkan/VulkanContext.h"
#include "Common/GPU/Vulkan/VulkanBarrier.h"
+#include "Common/GPU/Vulkan/VulkanFrameData.h"
#include "Common/Data/Convert/SmallDataConvert.h"
#include "Common/Data/Collections/TinySet.h"
#include "Common/GPU/DataFormat.h"
@@ -16,11 +17,11 @@ class VKRFramebuffer;
struct VKRGraphicsPipeline;
struct VKRComputePipeline;
struct VKRImage;
+struct FrameData;
enum {
QUEUE_HACK_MGS2_ACID = 1,
QUEUE_HACK_SONIC = 2,
- // Killzone PR = 4.
QUEUE_HACK_RENDERPASS_MERGE = 8,
};
@@ -36,20 +37,24 @@ enum class VKRRenderCommand : uint8_t {
DRAW,
DRAW_INDEXED,
PUSH_CONSTANTS,
+ SELF_DEPENDENCY_BARRIER,
NUM_RENDER_COMMANDS,
};
-enum PipelineFlags {
- PIPELINE_FLAG_NONE = 0,
- PIPELINE_FLAG_USES_LINES = (1 << 2),
- PIPELINE_FLAG_USES_BLEND_CONSTANT = (1 << 3),
- PIPELINE_FLAG_USES_DEPTH_STENCIL = (1 << 4), // Reads or writes the depth buffer.
+enum class PipelineFlags {
+ NONE = 0,
+ USES_LINES = (1 << 2),
+ USES_BLEND_CONSTANT = (1 << 3),
+ USES_DEPTH_STENCIL = (1 << 4), // Reads or writes the depth buffer.
+ USES_INPUT_ATTACHMENT = (1 << 5),
};
+ENUM_CLASS_BITOPS(PipelineFlags);
// Pipelines need to be created for the right type of render pass.
enum RenderPassType {
RP_TYPE_BACKBUFFER,
RP_TYPE_COLOR_DEPTH,
+ RP_TYPE_COLOR_DEPTH_INPUT,
// Later will add pure-color render passes.
RP_TYPE_COUNT,
};
@@ -146,14 +151,6 @@ struct TransitionRequest {
VkImageLayout targetLayout;
};
-struct QueueProfileContext {
- VkQueryPool queryPool;
- std::vector timestampDescriptions;
- std::string profileSummary;
- double cpuStartTime;
- double cpuEndTime;
-};
-
class VKRRenderPass;
struct VKRStep {
@@ -168,7 +165,6 @@ struct VKRStep {
union {
struct {
VKRFramebuffer *framebuffer;
- // TODO: Look these up through renderPass?
VKRRenderPassLoadAction colorLoad;
VKRRenderPassLoadAction depthLoad;
VKRRenderPassLoadAction stencilLoad;
@@ -183,7 +179,7 @@ struct VKRStep {
int numReads;
VkImageLayout finalColorLayout;
VkImageLayout finalDepthStencilLayout;
- u32 pipelineFlags;
+ PipelineFlags pipelineFlags; // contains the self dependency flag, in the form of USES_INPUT_ATTACHMENT
VkRect2D renderArea;
// Render pass type. Deduced after finishing recording the pass, from the used pipelines.
// NOTE: Storing the render pass here doesn't do much good, we change the compatible parameters (load/store ops) during step optimization.
@@ -255,7 +251,7 @@ public:
}
void PreprocessSteps(std::vector &steps);
- void RunSteps(VkCommandBuffer cmd, std::vector &steps, QueueProfileContext *profile);
+ void RunSteps(FrameData &frameData, FrameDataShared &frameDataShared);
void LogSteps(const std::vector &steps, bool verbose);
std::string StepToString(const VKRStep &step) const;
@@ -263,6 +259,14 @@ public:
void CreateDeviceObjects();
void DestroyDeviceObjects();
+ // Swapchain
+ void DestroyBackBuffers();
+ bool CreateSwapchain(VkCommandBuffer cmdInit);
+
+ bool HasBackbuffers() const {
+ return !framebuffers_.empty();
+ }
+
// Get a render pass that's compatible with all our framebuffers.
// Note that it's precached, cannot look up in the map as this might be on another thread.
VKRRenderPass *GetCompatibleRenderPass() const {
@@ -302,6 +306,9 @@ public:
}
private:
+ bool InitBackbufferFramebuffers(int width, int height);
+ bool InitDepthStencilBuffer(VkCommandBuffer cmd); // Used for non-buffered rendering.
+
VKRRenderPass *PerformBindFramebufferAsRenderTarget(const VKRStep &pass, VkCommandBuffer cmd);
void PerformRenderPass(const VKRStep &pass, VkCommandBuffer cmd);
void PerformCopy(const VKRStep &pass, VkCommandBuffer cmd);
@@ -324,6 +331,8 @@ private:
static void SetupTransitionToTransferSrc(VKRImage &img, VkImageAspectFlags aspect, VulkanBarrier *recordBarrier);
static void SetupTransitionToTransferDst(VKRImage &img, VkImageAspectFlags aspect, VulkanBarrier *recordBarrier);
+ static void SelfDependencyBarrier(VKRImage &img, VkImageAspectFlags aspect, VulkanBarrier *recordBarrier);
+
VulkanContext *vulkan_;
VkFramebuffer backbuffer_ = VK_NULL_HANDLE;
@@ -354,4 +363,20 @@ private:
// Stored here to help reuse the allocation.
VulkanBarrier recordBarrier_;
+
+ // Swap chain management
+ struct SwapchainImageData {
+ VkImage image;
+ VkImageView view;
+ };
+ std::vector framebuffers_;
+ std::vector swapchainImages_;
+ uint32_t swapchainImageCount_ = 0;
+ struct DepthBufferInfo {
+ VkFormat format = VK_FORMAT_UNDEFINED;
+ VkImage image = VK_NULL_HANDLE;
+ VmaAllocation alloc = VK_NULL_HANDLE;
+ VkImageView view = VK_NULL_HANDLE;
+ };
+ DepthBufferInfo depth_;
};
diff --git a/Common/GPU/Vulkan/VulkanRenderManager.cpp b/Common/GPU/Vulkan/VulkanRenderManager.cpp
index 56f40655f7..3a0dde0312 100644
--- a/Common/GPU/Vulkan/VulkanRenderManager.cpp
+++ b/Common/GPU/Vulkan/VulkanRenderManager.cpp
@@ -223,7 +223,7 @@ void CreateImage(VulkanContext *vulkan, VkCommandBuffer cmd, VKRImage &img, int
// Strictly speaking we don't yet need VK_IMAGE_USAGE_SAMPLED_BIT for depth buffers since we do not yet sample depth buffers.
ici.usage = VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT;
if (color) {
- ici.usage |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
+ ici.usage |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT;
} else {
ici.usage |= VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT;
}
@@ -288,53 +288,15 @@ void CreateImage(VulkanContext *vulkan, VkCommandBuffer cmd, VKRImage &img, int
}
VulkanRenderManager::VulkanRenderManager(VulkanContext *vulkan) : vulkan_(vulkan), queueRunner_(vulkan) {
- VkSemaphoreCreateInfo semaphoreCreateInfo = { VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO };
- semaphoreCreateInfo.flags = 0;
- VkResult res = vkCreateSemaphore(vulkan_->GetDevice(), &semaphoreCreateInfo, nullptr, &acquireSemaphore_);
- _dbg_assert_(res == VK_SUCCESS);
- res = vkCreateSemaphore(vulkan_->GetDevice(), &semaphoreCreateInfo, nullptr, &renderingCompleteSemaphore_);
- _dbg_assert_(res == VK_SUCCESS);
-
inflightFramesAtStart_ = vulkan_->GetInflightFrames();
+
+ frameDataShared_.Init(vulkan);
+
for (int i = 0; i < inflightFramesAtStart_; i++) {
- VkCommandPoolCreateInfo cmd_pool_info = { VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO };
- cmd_pool_info.queueFamilyIndex = vulkan_->GetGraphicsQueueFamilyIndex();
- cmd_pool_info.flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT;
- VkResult res = vkCreateCommandPool(vulkan_->GetDevice(), &cmd_pool_info, nullptr, &frameData_[i].cmdPoolInit);
- _dbg_assert_(res == VK_SUCCESS);
- res = vkCreateCommandPool(vulkan_->GetDevice(), &cmd_pool_info, nullptr, &frameData_[i].cmdPoolMain);
- _dbg_assert_(res == VK_SUCCESS);
-
- VkCommandBufferAllocateInfo cmd_alloc = { VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO };
- cmd_alloc.commandPool = frameData_[i].cmdPoolInit;
- cmd_alloc.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
- cmd_alloc.commandBufferCount = 1;
-
- res = vkAllocateCommandBuffers(vulkan_->GetDevice(), &cmd_alloc, &frameData_[i].initCmd);
- _dbg_assert_(res == VK_SUCCESS);
- cmd_alloc.commandPool = frameData_[i].cmdPoolMain;
- res = vkAllocateCommandBuffers(vulkan_->GetDevice(), &cmd_alloc, &frameData_[i].mainCmd);
- _dbg_assert_(res == VK_SUCCESS);
-
- // Creating the frame fence with true so they can be instantly waited on the first frame
- frameData_[i].fence = vulkan_->CreateFence(true);
-
- // This fence one is used for synchronizing readbacks. Does not need preinitialization.
- frameData_[i].readbackFence = vulkan_->CreateFence(false);
-
- VkQueryPoolCreateInfo query_ci{ VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO };
- query_ci.queryCount = MAX_TIMESTAMP_QUERIES;
- query_ci.queryType = VK_QUERY_TYPE_TIMESTAMP;
- res = vkCreateQueryPool(vulkan_->GetDevice(), &query_ci, nullptr, &frameData_[i].profile.queryPool);
+ frameData_[i].Init(vulkan, i);
}
queueRunner_.CreateDeviceObjects();
-
- // AMD hack for issue #10097 (older drivers only.)
- const auto &props = vulkan_->GetPhysicalDeviceProperties().properties;
- if (props.vendorID == VULKAN_VENDOR_AMD && props.apiVersion < VK_API_VERSION_1_1) {
- useThread_ = false;
- }
}
bool VulkanRenderManager::CreateBackbuffers() {
@@ -342,52 +304,14 @@ bool VulkanRenderManager::CreateBackbuffers() {
ERROR_LOG(G3D, "No swapchain - can't create backbuffers");
return false;
}
- VkResult res = vkGetSwapchainImagesKHR(vulkan_->GetDevice(), vulkan_->GetSwapchain(), &swapchainImageCount_, nullptr);
- _dbg_assert_(res == VK_SUCCESS);
- VkImage *swapchainImages = new VkImage[swapchainImageCount_];
- res = vkGetSwapchainImagesKHR(vulkan_->GetDevice(), vulkan_->GetSwapchain(), &swapchainImageCount_, swapchainImages);
- if (res != VK_SUCCESS) {
- ERROR_LOG(G3D, "vkGetSwapchainImagesKHR failed");
- delete[] swapchainImages;
- return false;
- }
VkCommandBuffer cmdInit = GetInitCmd();
- for (uint32_t i = 0; i < swapchainImageCount_; i++) {
- SwapchainImageData sc_buffer{};
- sc_buffer.image = swapchainImages[i];
-
- VkImageViewCreateInfo color_image_view = { VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO };
- color_image_view.format = vulkan_->GetSwapchainFormat();
- color_image_view.components.r = VK_COMPONENT_SWIZZLE_IDENTITY;
- color_image_view.components.g = VK_COMPONENT_SWIZZLE_IDENTITY;
- color_image_view.components.b = VK_COMPONENT_SWIZZLE_IDENTITY;
- color_image_view.components.a = VK_COMPONENT_SWIZZLE_IDENTITY;
- color_image_view.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
- color_image_view.subresourceRange.baseMipLevel = 0;
- color_image_view.subresourceRange.levelCount = 1;
- color_image_view.subresourceRange.baseArrayLayer = 0;
- color_image_view.subresourceRange.layerCount = 1;
- color_image_view.viewType = VK_IMAGE_VIEW_TYPE_2D;
- color_image_view.flags = 0;
- color_image_view.image = sc_buffer.image;
-
- // We leave the images as UNDEFINED, there's no need to pre-transition them as
- // the backbuffer renderpass starts out with them being auto-transitioned from UNDEFINED anyway.
- // Also, turns out it's illegal to transition un-acquired images, thanks Hans-Kristian. See #11417.
-
- res = vkCreateImageView(vulkan_->GetDevice(), &color_image_view, nullptr, &sc_buffer.view);
- swapchainImages_.push_back(sc_buffer);
- _dbg_assert_(res == VK_SUCCESS);
+ if (!queueRunner_.CreateSwapchain(cmdInit)) {
+ return false;
}
- delete[] swapchainImages;
- // Must be before InitBackbufferRenderPass.
- if (InitDepthStencilBuffer(cmdInit)) {
- InitBackbufferFramebuffers(vulkan_->GetBackbufferWidth(), vulkan_->GetBackbufferHeight());
- }
curWidthRaw_ = -1;
curHeightRaw_ = -1;
@@ -404,7 +328,7 @@ bool VulkanRenderManager::CreateBackbuffers() {
outOfDateFrames_ = 0;
// Start the thread.
- if (useThread_ && HasBackbuffers()) {
+ if (HasBackbuffers()) {
run_ = true;
// Won't necessarily be 0.
threadInitFrame_ = vulkan_->GetCurFrame();
@@ -417,57 +341,58 @@ bool VulkanRenderManager::CreateBackbuffers() {
}
void VulkanRenderManager::StopThread() {
- if (useThread_ && run_) {
- run_ = false;
- // Stop the thread.
- for (int i = 0; i < vulkan_->GetInflightFrames(); i++) {
- auto &frameData = frameData_[i];
- {
- std::unique_lock lock(frameData.push_mutex);
- frameData.push_condVar.notify_all();
- }
- {
- std::unique_lock lock(frameData.pull_mutex);
- frameData.pull_condVar.notify_all();
- }
- // Zero the queries so we don't try to pull them later.
- frameData.profile.timestampDescriptions.clear();
- }
- thread_.join();
- INFO_LOG(G3D, "Vulkan submission thread joined. Frame=%d", vulkan_->GetCurFrame());
- compileCond_.notify_all();
- compileThread_.join();
- INFO_LOG(G3D, "Vulkan compiler thread joined.");
-
- // Eat whatever has been queued up for this frame if anything.
- Wipe();
-
- // Wait for any fences to finish and be resignaled, so we don't have sync issues.
- // Also clean out any queued data, which might refer to things that might not be valid
- // when we restart...
- for (int i = 0; i < vulkan_->GetInflightFrames(); i++) {
- auto &frameData = frameData_[i];
- _assert_(!frameData.readyForRun);
- _assert_(frameData.steps.empty());
- if (frameData.hasInitCommands) {
- // Clear 'em out. This can happen on restart sometimes.
- vkEndCommandBuffer(frameData.initCmd);
- frameData.hasInitCommands = false;
- }
- frameData.readyForRun = false;
- for (size_t i = 0; i < frameData.steps.size(); i++) {
- delete frameData.steps[i];
- }
- frameData.steps.clear();
-
- std::unique_lock lock(frameData.push_mutex);
- while (!frameData.readyForFence) {
- VLOG("PUSH: Waiting for frame[%d].readyForFence = 1 (stop)", i);
- frameData.push_condVar.wait(lock);
- }
- }
- } else {
+ if (!run_) {
INFO_LOG(G3D, "Vulkan submission thread was already stopped.");
+ return;
+ }
+
+ run_ = false;
+ // Stop the thread.
+ for (int i = 0; i < vulkan_->GetInflightFrames(); i++) {
+ auto &frameData = frameData_[i];
+ {
+ std::unique_lock lock(frameData.push_mutex);
+ frameData.push_condVar.notify_all();
+ }
+ {
+ std::unique_lock lock(frameData.pull_mutex);
+ frameData.pull_condVar.notify_all();
+ }
+ // Zero the queries so we don't try to pull them later.
+ frameData.profile.timestampDescriptions.clear();
+ }
+ thread_.join();
+ INFO_LOG(G3D, "Vulkan submission thread joined. Frame=%d", vulkan_->GetCurFrame());
+ compileCond_.notify_all();
+ compileThread_.join();
+ INFO_LOG(G3D, "Vulkan compiler thread joined.");
+
+ // Eat whatever has been queued up for this frame if anything.
+ Wipe();
+
+ // Wait for any fences to finish and be resignaled, so we don't have sync issues.
+ // Also clean out any queued data, which might refer to things that might not be valid
+ // when we restart...
+ for (int i = 0; i < vulkan_->GetInflightFrames(); i++) {
+ auto &frameData = frameData_[i];
+ _assert_(!frameData.readyForRun);
+ _assert_(frameData.steps.empty());
+ if (frameData.hasInitCommands) {
+ // Clear 'em out. This can happen on restart sometimes.
+ vkEndCommandBuffer(frameData.initCmd);
+ frameData.hasInitCommands = false;
+ }
+ frameData.readyForRun = false;
+ for (size_t i = 0; i < frameData.steps.size(); i++) {
+ delete frameData.steps[i];
+ }
+ frameData.steps.clear();
+
+ std::unique_lock lock(frameData.push_mutex);
+ while (!frameData.readyForFence) {
+ VLOG("PUSH: Waiting for frame[%d].readyForFence = 1 (stop)", i);
+ frameData.push_condVar.wait(lock);
+ }
}
}
@@ -475,26 +400,7 @@ void VulkanRenderManager::DestroyBackbuffers() {
StopThread();
vulkan_->WaitUntilQueueIdle();
- for (auto &image : swapchainImages_) {
- vulkan_->Delete().QueueDeleteImageView(image.view);
- }
- swapchainImages_.clear();
-
- if (depth_.view) {
- vulkan_->Delete().QueueDeleteImageView(depth_.view);
- }
- if (depth_.image) {
- _dbg_assert_(depth_.alloc);
- vulkan_->Delete().QueueDeleteImageAllocation(depth_.image, depth_.alloc);
- }
- depth_ = {};
- for (uint32_t i = 0; i < framebuffers_.size(); i++) {
- _dbg_assert_(framebuffers_[i] != VK_NULL_HANDLE);
- vulkan_->Delete().QueueDeleteFramebuffer(framebuffers_[i]);
- }
- framebuffers_.clear();
-
- INFO_LOG(G3D, "Backbuffers destroyed");
+ queueRunner_.DestroyBackBuffers();
}
VulkanRenderManager::~VulkanRenderManager() {
@@ -504,16 +410,9 @@ VulkanRenderManager::~VulkanRenderManager() {
DrainCompileQueue();
VkDevice device = vulkan_->GetDevice();
- vkDestroySemaphore(device, acquireSemaphore_, nullptr);
- vkDestroySemaphore(device, renderingCompleteSemaphore_, nullptr);
+ frameDataShared_.Destroy(vulkan_);
for (int i = 0; i < inflightFramesAtStart_; i++) {
- vkFreeCommandBuffers(device, frameData_[i].cmdPoolInit, 1, &frameData_[i].initCmd);
- vkFreeCommandBuffers(device, frameData_[i].cmdPoolMain, 1, &frameData_[i].mainCmd);
- vkDestroyCommandPool(device, frameData_[i].cmdPoolInit, nullptr);
- vkDestroyCommandPool(device, frameData_[i].cmdPoolMain, nullptr);
- vkDestroyFence(device, frameData_[i].fence, nullptr);
- vkDestroyFence(device, frameData_[i].readbackFence, nullptr);
- vkDestroyQueryPool(device, frameData_[i].profile.queryPool, nullptr);
+ frameData_[i].Destroy(vulkan_);
}
queueRunner_.DestroyDeviceObjects();
}
@@ -534,7 +433,9 @@ void VulkanRenderManager::CompileThreadFunc() {
break;
}
- INFO_LOG(G3D, "Compilation thread has %d pipelines to create", (int)toCompile.size());
+ if (!toCompile.empty()) {
+ INFO_LOG(G3D, "Compilation thread has %d pipelines to create", (int)toCompile.size());
+ }
// TODO: Here we can sort the pending pipelines by vertex and fragment shaders,
// and split up further.
@@ -574,6 +475,7 @@ void VulkanRenderManager::ThreadFunc() {
threadFrame = 0;
}
FrameData &frameData = frameData_[threadFrame];
+
std::unique_lock lock(frameData.pull_mutex);
while (!frameData.readyForRun && run_) {
VLOG("PULL: Waiting for frame[%d].readyForRun", threadFrame);
@@ -589,8 +491,7 @@ void VulkanRenderManager::ThreadFunc() {
// but that created a race condition where frames could end up not finished properly on resize etc.
// Only increment next time if we're done.
- nextFrame = frameData.type == VKRRunType::END;
- _dbg_assert_(frameData.type == VKRRunType::END || frameData.type == VKRRunType::SYNC);
+ nextFrame = frameData.RunType() == VKRRunType::END;
}
VLOG("PULL: Running frame %d", threadFrame);
if (firstFrame) {
@@ -615,7 +516,7 @@ void VulkanRenderManager::BeginFrame(bool enableProfiling, bool enableLogProfile
FrameData &frameData = frameData_[curFrame];
// Make sure the very last command buffer from the frame before the previous has been fully executed.
- if (useThread_) {
+ {
std::unique_lock lock(frameData.push_mutex);
while (!frameData.readyForFence) {
VLOG("PUSH: Waiting for frame[%d].readyForFence = 1", curFrame);
@@ -633,7 +534,6 @@ void VulkanRenderManager::BeginFrame(bool enableProfiling, bool enableLogProfile
// Can't set this until after the fence.
frameData.profilingEnabled_ = enableProfiling;
- frameData.readbackFenceUsed = false;
uint64_t queryResults[MAX_TIMESTAMP_QUERIES];
@@ -698,21 +598,7 @@ void VulkanRenderManager::BeginFrame(bool enableProfiling, bool enableLogProfile
VkCommandBuffer VulkanRenderManager::GetInitCmd() {
int curFrame = vulkan_->GetCurFrame();
- FrameData &frameData = frameData_[curFrame];
- if (!frameData.hasInitCommands) {
- VkCommandBufferBeginInfo begin = {
- VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
- nullptr,
- VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT
- };
- vkResetCommandPool(vulkan_->GetDevice(), frameData.cmdPoolInit, 0);
- VkResult res = vkBeginCommandBuffer(frameData.initCmd, &begin);
- if (res != VK_SUCCESS) {
- return VK_NULL_HANDLE;
- }
- frameData.hasInitCommands = true;
- }
- return frameData_[curFrame].initCmd;
+ return frameData_[curFrame].GetInitCmd(vulkan_);
}
VKRGraphicsPipeline *VulkanRenderManager::CreateGraphicsPipeline(VKRGraphicsPipelineDesc *desc, uint32_t variantBitmask, const char *tag) {
@@ -771,16 +657,20 @@ void VulkanRenderManager::EndCurRenderStep() {
curRenderStep_->render.colorStore, curRenderStep_->render.depthStore, curRenderStep_->render.stencilStore,
};
RenderPassType rpType = RP_TYPE_COLOR_DEPTH;
+ // Save the accumulated pipeline flags so we can use that to configure the render pass.
+ // We'll often be able to avoid loading/saving the depth/stencil buffer.
curRenderStep_->render.pipelineFlags = curPipelineFlags_;
if (!curRenderStep_->render.framebuffer) {
rpType = RP_TYPE_BACKBUFFER;
+ } else if (curPipelineFlags_ & PipelineFlags::USES_INPUT_ATTACHMENT) {
+ // Not allowed on backbuffers.
+ rpType = RP_TYPE_COLOR_DEPTH_INPUT;
}
+ // TODO: Also add render pass types for depth/stencil-less.
VKRRenderPass *renderPass = queueRunner_.GetRenderPass(key);
curRenderStep_->render.renderPassType = rpType;
- // Save the accumulated pipeline flags so we can use that to configure the render pass.
- // We'll often be able to avoid loading/saving the depth/stencil buffer.
compileMutex_.lock();
bool needsCompile = false;
for (VKRGraphicsPipeline *pipeline : pipelinesToCheck_) {
@@ -806,7 +696,12 @@ void VulkanRenderManager::EndCurRenderStep() {
// We no longer have a current render step.
curRenderStep_ = nullptr;
- curPipelineFlags_ = 0;
+ curPipelineFlags_ = (PipelineFlags)0;
+}
+
+void VulkanRenderManager::BindCurrentFramebufferAsInputAttachment0(VkImageAspectFlags aspectBits) {
+ _dbg_assert_(curRenderStep_);
+ curRenderStep_->commands.push_back(VkRenderData{ VKRRenderCommand::SELF_DEPENDENCY_BARRIER });
}
void VulkanRenderManager::BindFramebufferAsRenderTarget(VKRFramebuffer *fb, VKRRenderPassLoadAction color, VKRRenderPassLoadAction depth, VKRRenderPassLoadAction stencil, uint32_t clearColor, float clearDepth, uint8_t clearStencil, const char *tag) {
@@ -1028,98 +923,6 @@ void VulkanRenderManager::CopyImageToMemorySync(VkImage image, int mipLevel, int
queueRunner_.CopyReadbackBuffer(w, h, destFormat, destFormat, pixelStride, pixels);
}
-bool VulkanRenderManager::InitBackbufferFramebuffers(int width, int height) {
- VkResult res;
- // We share the same depth buffer but have multiple color buffers, see the loop below.
- VkImageView attachments[2] = { VK_NULL_HANDLE, depth_.view };
-
- VkFramebufferCreateInfo fb_info = { VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO };
- fb_info.renderPass = queueRunner_.GetCompatibleRenderPass()->Get(vulkan_, RP_TYPE_BACKBUFFER);
- fb_info.attachmentCount = 2;
- fb_info.pAttachments = attachments;
- fb_info.width = width;
- fb_info.height = height;
- fb_info.layers = 1;
-
- framebuffers_.resize(swapchainImageCount_);
-
- for (uint32_t i = 0; i < swapchainImageCount_; i++) {
- attachments[0] = swapchainImages_[i].view;
- res = vkCreateFramebuffer(vulkan_->GetDevice(), &fb_info, nullptr, &framebuffers_[i]);
- _dbg_assert_(res == VK_SUCCESS);
- if (res != VK_SUCCESS) {
- framebuffers_.clear();
- return false;
- }
- }
-
- return true;
-}
-
-bool VulkanRenderManager::InitDepthStencilBuffer(VkCommandBuffer cmd) {
- const VkFormat depth_format = vulkan_->GetDeviceInfo().preferredDepthStencilFormat;
- int aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT;
- VkImageCreateInfo image_info = { VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO };
- image_info.imageType = VK_IMAGE_TYPE_2D;
- image_info.format = depth_format;
- image_info.extent.width = vulkan_->GetBackbufferWidth();
- image_info.extent.height = vulkan_->GetBackbufferHeight();
- image_info.extent.depth = 1;
- image_info.mipLevels = 1;
- image_info.arrayLayers = 1;
- image_info.samples = VK_SAMPLE_COUNT_1_BIT;
- image_info.queueFamilyIndexCount = 0;
- image_info.pQueueFamilyIndices = nullptr;
- image_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
- image_info.usage = VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT;
- image_info.flags = 0;
-
- depth_.format = depth_format;
-
- VmaAllocationCreateInfo allocCreateInfo{};
- VmaAllocationInfo allocInfo{};
-
- allocCreateInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY;
-
- VkResult res = vmaCreateImage(vulkan_->Allocator(), &image_info, &allocCreateInfo, &depth_.image, &depth_.alloc, &allocInfo);
- _dbg_assert_(res == VK_SUCCESS);
- if (res != VK_SUCCESS)
- return false;
-
- vulkan_->SetDebugName(depth_.image, VK_OBJECT_TYPE_IMAGE, "BackbufferDepth");
-
- TransitionImageLayout2(cmd, depth_.image, 0, 1,
- aspectMask,
- VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
- VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT,
- VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT,
- 0, VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT);
-
- VkImageViewCreateInfo depth_view_info = { VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO };
- depth_view_info.image = depth_.image;
- depth_view_info.format = depth_format;
- depth_view_info.components.r = VK_COMPONENT_SWIZZLE_IDENTITY;
- depth_view_info.components.g = VK_COMPONENT_SWIZZLE_IDENTITY;
- depth_view_info.components.b = VK_COMPONENT_SWIZZLE_IDENTITY;
- depth_view_info.components.a = VK_COMPONENT_SWIZZLE_IDENTITY;
- depth_view_info.subresourceRange.aspectMask = aspectMask;
- depth_view_info.subresourceRange.baseMipLevel = 0;
- depth_view_info.subresourceRange.levelCount = 1;
- depth_view_info.subresourceRange.baseArrayLayer = 0;
- depth_view_info.subresourceRange.layerCount = 1;
- depth_view_info.viewType = VK_IMAGE_VIEW_TYPE_2D;
- depth_view_info.flags = 0;
-
- VkDevice device = vulkan_->GetDevice();
-
- res = vkCreateImageView(device, &depth_view_info, NULL, &depth_.view);
- _dbg_assert_(res == VK_SUCCESS);
- if (res != VK_SUCCESS)
- return false;
-
- return true;
-}
-
static void RemoveDrawCommands(std::vector *cmds) {
// Here we remove any DRAW type commands when we hit a CLEAR.
for (auto &c : *cmds) {
@@ -1359,6 +1162,9 @@ VkImageView VulkanRenderManager::BindFramebufferAsTexture(VKRFramebuffer *fb, in
}
}
+// Called on main thread.
+// Sends the collected commands to the render thread. Submit-latency should be
+// measured from here, probably.
void VulkanRenderManager::Finish() {
EndCurRenderStep();
@@ -1371,18 +1177,14 @@ void VulkanRenderManager::Finish() {
int curFrame = vulkan_->GetCurFrame();
FrameData &frameData = frameData_[curFrame];
- if (!useThread_) {
- frameData.steps = std::move(steps_);
- steps_.clear();
- frameData.type = VKRRunType::END;
- Run(curFrame);
- } else {
+
+ {
std::unique_lock lock(frameData.pull_mutex);
VLOG("PUSH: Frame[%d].readyForRun = true", curFrame);
frameData.steps = std::move(steps_);
steps_.clear();
frameData.readyForRun = true;
- frameData.type = VKRRunType::END;
+ frameData.runType_ = VKRRunType::END;
frameData.pull_condVar.notify_all();
}
vulkan_->EndFrame();
@@ -1397,118 +1199,39 @@ void VulkanRenderManager::Wipe() {
steps_.clear();
}
+// Called on the render thread.
+//
// Can be called multiple times with no bad side effects. This is so that we can either begin a frame the normal way,
// or stop it in the middle for a synchronous readback, then start over again mostly normally but without repeating
// the backbuffer image acquisition.
void VulkanRenderManager::BeginSubmitFrame(int frame) {
FrameData &frameData = frameData_[frame];
- if (!frameData.hasBegun) {
- // Get the index of the next available swapchain image, and a semaphore to block command buffer execution on.
- VkResult res = vkAcquireNextImageKHR(vulkan_->GetDevice(), vulkan_->GetSwapchain(), UINT64_MAX, acquireSemaphore_, (VkFence)VK_NULL_HANDLE, &frameData.curSwapchainImage);
- if (res == VK_SUBOPTIMAL_KHR) {
- // Hopefully the resize will happen shortly. Ignore - one frame might look bad or something.
- WARN_LOG(G3D, "VK_SUBOPTIMAL_KHR returned - ignoring");
- } else if (res == VK_ERROR_OUT_OF_DATE_KHR) {
- WARN_LOG(G3D, "VK_ERROR_OUT_OF_DATE_KHR returned - processing the frame, but not presenting");
- frameData.skipSwap = true;
- } else {
- _assert_msg_(res == VK_SUCCESS, "vkAcquireNextImageKHR failed! result=%s", VulkanResultToString(res));
- }
+ // Should only have at most the init command buffer pending here (that one came from the other thread).
+ _dbg_assert_(!frameData.hasPresentCommands);
+ frameData.SubmitPending(vulkan_, FrameSubmitType::Pending, frameDataShared_);
+
+ if (!frameData.hasMainCommands) {
+ // Effectively resets both main and present command buffers, since they both live in this pool.
+ // We always record main commands first, so we don't need to reset the present command buffer separately.
vkResetCommandPool(vulkan_->GetDevice(), frameData.cmdPoolMain, 0);
+
VkCommandBufferBeginInfo begin{ VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO };
begin.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
- res = vkBeginCommandBuffer(frameData.mainCmd, &begin);
-
+ VkResult res = vkBeginCommandBuffer(frameData.mainCmd, &begin);
+ frameData.hasMainCommands = true;
_assert_msg_(res == VK_SUCCESS, "vkBeginCommandBuffer failed! result=%s", VulkanResultToString(res));
-
- queueRunner_.SetBackbuffer(framebuffers_[frameData.curSwapchainImage], swapchainImages_[frameData.curSwapchainImage].image);
-
- frameData.hasBegun = true;
}
}
-void VulkanRenderManager::Submit(int frame, bool triggerFrameFence) {
- FrameData &frameData = frameData_[frame];
- if (frameData.hasInitCommands) {
- if (frameData.profilingEnabled_ && triggerFrameFence) {
- // Pre-allocated query ID 1.
- vkCmdWriteTimestamp(frameData.initCmd, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, frameData.profile.queryPool, 1);
- }
- VkResult res = vkEndCommandBuffer(frameData.initCmd);
- _assert_msg_(res == VK_SUCCESS, "vkEndCommandBuffer failed (init)! result=%s", VulkanResultToString(res));
- }
-
- VkResult res = vkEndCommandBuffer(frameData.mainCmd);
- _assert_msg_(res == VK_SUCCESS, "vkEndCommandBuffer failed (main)! result=%s", VulkanResultToString(res));
-
- VkCommandBuffer cmdBufs[2];
- int numCmdBufs = 0;
- if (frameData.hasInitCommands) {
- cmdBufs[numCmdBufs++] = frameData.initCmd;
- if (splitSubmit_) {
- // Send the init commands off separately. Used this once to confirm that the cause of a device loss was in the init cmdbuf.
- VkSubmitInfo submit_info{ VK_STRUCTURE_TYPE_SUBMIT_INFO };
- submit_info.commandBufferCount = (uint32_t)numCmdBufs;
- submit_info.pCommandBuffers = cmdBufs;
- res = vkQueueSubmit(vulkan_->GetGraphicsQueue(), 1, &submit_info, VK_NULL_HANDLE);
- if (res == VK_ERROR_DEVICE_LOST) {
- _assert_msg_(false, "Lost the Vulkan device in split submit! If this happens again, switch Graphics Backend away from Vulkan");
- } else {
- _assert_msg_(res == VK_SUCCESS, "vkQueueSubmit failed (init)! result=%s", VulkanResultToString(res));
- }
- numCmdBufs = 0;
- }
- }
- cmdBufs[numCmdBufs++] = frameData.mainCmd;
-
- VkSubmitInfo submit_info{ VK_STRUCTURE_TYPE_SUBMIT_INFO };
- VkPipelineStageFlags waitStage[1]{ VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT };
- if (triggerFrameFence && !frameData.skipSwap) {
- submit_info.waitSemaphoreCount = 1;
- submit_info.pWaitSemaphores = &acquireSemaphore_;
- submit_info.pWaitDstStageMask = waitStage;
- }
- submit_info.commandBufferCount = (uint32_t)numCmdBufs;
- submit_info.pCommandBuffers = cmdBufs;
- if (triggerFrameFence && !frameData.skipSwap) {
- submit_info.signalSemaphoreCount = 1;
- submit_info.pSignalSemaphores = &renderingCompleteSemaphore_;
- }
- res = vkQueueSubmit(vulkan_->GetGraphicsQueue(), 1, &submit_info, triggerFrameFence ? frameData.fence : frameData.readbackFence);
- if (res == VK_ERROR_DEVICE_LOST) {
- _assert_msg_(false, "Lost the Vulkan device in vkQueueSubmit! If this happens again, switch Graphics Backend away from Vulkan");
- } else {
- _assert_msg_(res == VK_SUCCESS, "vkQueueSubmit failed (main, split=%d)! result=%s", (int)splitSubmit_, VulkanResultToString(res));
- }
-
- // When !triggerFence, we notify after syncing with Vulkan.
- if (useThread_ && triggerFrameFence) {
- VLOG("PULL: Frame %d.readyForFence = true", frame);
- std::unique_lock lock(frameData.push_mutex);
- frameData.readyForFence = true;
- frameData.push_condVar.notify_all();
- }
-
- frameData.hasInitCommands = false;
-}
-
+// Called on the render thread.
void VulkanRenderManager::EndSubmitFrame(int frame) {
FrameData &frameData = frameData_[frame];
- frameData.hasBegun = false;
- Submit(frame, true);
+ frameData.SubmitPending(vulkan_, FrameSubmitType::Present, frameDataShared_);
if (!frameData.skipSwap) {
- VkSwapchainKHR swapchain = vulkan_->GetSwapchain();
- VkPresentInfoKHR present = { VK_STRUCTURE_TYPE_PRESENT_INFO_KHR };
- present.swapchainCount = 1;
- present.pSwapchains = &swapchain;
- present.pImageIndices = &frameData.curSwapchainImage;
- present.pWaitSemaphores = &renderingCompleteSemaphore_;
- present.waitSemaphoreCount = 1;
-
- VkResult res = vkQueuePresentKHR(vulkan_->GetGraphicsQueue(), &present);
+ VkResult res = frameData.QueuePresent(vulkan_, frameDataShared_);
if (res == VK_ERROR_OUT_OF_DATE_KHR) {
// We clearly didn't get this in vkAcquireNextImageKHR because of the skipSwap check above.
// Do the increment.
@@ -1528,18 +1251,29 @@ void VulkanRenderManager::EndSubmitFrame(int frame) {
}
}
+void VulkanRenderManager::EndSyncFrame(int frame) {
+ FrameData &frameData = frameData_[frame];
+
+ // The submit will trigger the readbackFence, and also do the wait for it.
+ frameData.SubmitPending(vulkan_, FrameSubmitType::Sync, frameDataShared_);
+
+ // At this point we can resume filling the command buffers for the current frame since
+ // we know the device is idle - and thus all previously enqueued command buffers have been processed.
+ // No need to switch to the next frame number, would just be confusing.
+ std::unique_lock lock(frameData.push_mutex);
+ frameData.readyForFence = true;
+ frameData.push_condVar.notify_all();
+}
+
void VulkanRenderManager::Run(int frame) {
BeginSubmitFrame(frame);
FrameData &frameData = frameData_[frame];
- auto &stepsOnThread = frameData_[frame].steps;
- VkCommandBuffer cmd = frameData.mainCmd;
- queueRunner_.PreprocessSteps(stepsOnThread);
+ queueRunner_.PreprocessSteps(frameData_[frame].steps);
//queueRunner_.LogSteps(stepsOnThread, false);
- queueRunner_.RunSteps(cmd, stepsOnThread, frameData.profilingEnabled_ ? &frameData.profile : nullptr);
- stepsOnThread.clear();
+ queueRunner_.RunSteps(frameData, frameDataShared_);
- switch (frameData.type) {
+ switch (frameData.runType_) {
case VKRRunType::END:
EndSubmitFrame(frame);
break;
@@ -1555,59 +1289,24 @@ void VulkanRenderManager::Run(int frame) {
VLOG("PULL: Finished running frame %d", frame);
}
-void VulkanRenderManager::EndSyncFrame(int frame) {
- FrameData &frameData = frameData_[frame];
-
- frameData.readbackFenceUsed = true;
-
- // The submit will trigger the readbackFence.
- Submit(frame, false);
-
- // Hard stall of the GPU, not ideal, but necessary so the CPU has the contents of the readback.
- vkWaitForFences(vulkan_->GetDevice(), 1, &frameData.readbackFence, true, UINT64_MAX);
- vkResetFences(vulkan_->GetDevice(), 1, &frameData.readbackFence);
-
- // At this point we can resume filling the command buffers for the current frame since
- // we know the device is idle - and thus all previously enqueued command buffers have been processed.
- // No need to switch to the next frame number.
- VkCommandBufferBeginInfo begin{
- VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
- nullptr,
- VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT
- };
- vkResetCommandPool(vulkan_->GetDevice(), frameData.cmdPoolMain, 0);
- VkResult res = vkBeginCommandBuffer(frameData.mainCmd, &begin);
- _assert_(res == VK_SUCCESS);
-
- if (useThread_) {
- std::unique_lock lock(frameData.push_mutex);
- frameData.readyForFence = true;
- frameData.push_condVar.notify_all();
- }
-}
-
void VulkanRenderManager::FlushSync() {
renderStepOffset_ += (int)steps_.size();
int curFrame = vulkan_->GetCurFrame();
FrameData &frameData = frameData_[curFrame];
- if (!useThread_) {
- frameData.steps = std::move(steps_);
- steps_.clear();
- frameData.type = VKRRunType::SYNC;
- Run(curFrame);
- } else {
+
+ {
std::unique_lock lock(frameData.pull_mutex);
VLOG("PUSH: Frame[%d].readyForRun = true (sync)", curFrame);
frameData.steps = std::move(steps_);
steps_.clear();
frameData.readyForRun = true;
_dbg_assert_(!frameData.readyForFence);
- frameData.type = VKRRunType::SYNC;
+ frameData.runType_ = VKRRunType::SYNC;
frameData.pull_condVar.notify_all();
}
- if (useThread_) {
+ {
std::unique_lock lock(frameData.push_mutex);
// Wait for the flush to be hit, since we're syncing.
while (!frameData.readyForFence) {
diff --git a/Common/GPU/Vulkan/VulkanRenderManager.h b/Common/GPU/Vulkan/VulkanRenderManager.h
index 4cc7aafdfc..43cbbb7b86 100644
--- a/Common/GPU/Vulkan/VulkanRenderManager.h
+++ b/Common/GPU/Vulkan/VulkanRenderManager.h
@@ -65,15 +65,6 @@ private:
std::string tag_;
};
-enum class VKRRunType {
- END,
- SYNC,
-};
-
-enum {
- MAX_TIMESTAMP_QUERIES = 128,
-};
-
struct BoundingRect {
int x1;
int y1;
@@ -236,6 +227,8 @@ public:
// as the other backends, even though there's no actual binding happening here.
VkImageView BindFramebufferAsTexture(VKRFramebuffer *fb, int binding, VkImageAspectFlags aspectBits, int attachment);
+ void BindCurrentFramebufferAsInputAttachment0(VkImageAspectFlags aspectBits);
+
bool CopyFramebufferToMemorySync(VKRFramebuffer *src, VkImageAspectFlags aspectBits, int x, int y, int w, int h, Draw::DataFormat destFormat, uint8_t *pixels, int pixelStride, const char *tag);
void CopyImageToMemorySync(VkImage image, int mipLevel, int x, int y, int w, int h, Draw::DataFormat destFormat, uint8_t *pixels, int pixelStride, const char *tag);
@@ -440,11 +433,7 @@ public:
void DestroyBackbuffers();
bool HasBackbuffers() {
- return !framebuffers_.empty();
- }
-
- void SetSplitSubmit(bool split) {
- splitSubmit_ = split;
+ return queueRunner_.HasBackbuffers();
}
void SetInflightFrames(int f) {
@@ -470,13 +459,10 @@ public:
}
private:
- bool InitBackbufferFramebuffers(int width, int height);
- bool InitDepthStencilBuffer(VkCommandBuffer cmd); // Used for non-buffered rendering.
void EndCurRenderStep();
void BeginSubmitFrame(int frame);
void EndSubmitFrame(int frame);
- void Submit(int frame, bool triggerFence);
// Bad for performance but sometimes necessary for synchronous CPU readbacks (screenshots and whatnot).
void FlushSync();
@@ -484,43 +470,7 @@ private:
void StopThread();
- // Permanent objects
- VkSemaphore acquireSemaphore_;
- VkSemaphore renderingCompleteSemaphore_;
-
- // Per-frame data, round-robin so we can overlap submission with execution of the previous frame.
- struct FrameData {
- std::mutex push_mutex;
- std::condition_variable push_condVar;
-
- std::mutex pull_mutex;
- std::condition_variable pull_condVar;
-
- bool readyForFence = true;
- bool readyForRun = false;
- bool skipSwap = false;
- VKRRunType type = VKRRunType::END;
-
- VkFence fence;
- VkFence readbackFence; // Strictly speaking we might only need one of these.
- bool readbackFenceUsed = false;
-
- // These are on different threads so need separate pools.
- VkCommandPool cmdPoolInit;
- VkCommandPool cmdPoolMain;
- VkCommandBuffer initCmd;
- VkCommandBuffer mainCmd;
- bool hasInitCommands = false;
- std::vector steps;
-
- // Swapchain.
- bool hasBegun = false;
- uint32_t curSwapchainImage = -1;
-
- // Profiling.
- QueueProfileContext profile;
- bool profilingEnabled_;
- };
+ FrameDataShared frameDataShared_;
FrameData frameData_[VulkanContext::MAX_INFLIGHT_FRAMES];
int newInflightFrames_ = -1;
@@ -544,11 +494,10 @@ private:
VKRStep *curRenderStep_ = nullptr;
bool curStepHasViewport_ = false;
bool curStepHasScissor_ = false;
- u32 curPipelineFlags_ = 0;
+ PipelineFlags curPipelineFlags_{};
BoundingRect curRenderArea_;
std::vector steps_;
- bool splitSubmit_ = false;
// Execution time state
bool run_ = true;
@@ -568,23 +517,4 @@ private:
// pipelines to check and possibly create at the end of the current render pass.
std::vector pipelinesToCheck_;
-
- // Swap chain management
- struct SwapchainImageData {
- VkImage image;
- VkImageView view;
- };
- std::vector framebuffers_;
- std::vector swapchainImages_;
- uint32_t swapchainImageCount_ = 0;
- struct DepthBufferInfo {
- VkFormat format = VK_FORMAT_UNDEFINED;
- VkImage image = VK_NULL_HANDLE;
- VmaAllocation alloc = VK_NULL_HANDLE;
- VkImageView view = VK_NULL_HANDLE;
- };
- DepthBufferInfo depth_;
-
- // This works great - except see issue #10097. WTF?
- bool useThread_ = true;
};
diff --git a/Common/GPU/Vulkan/thin3d_vulkan.cpp b/Common/GPU/Vulkan/thin3d_vulkan.cpp
index 8420c01450..34bbdd5efb 100644
--- a/Common/GPU/Vulkan/thin3d_vulkan.cpp
+++ b/Common/GPU/Vulkan/thin3d_vulkan.cpp
@@ -361,7 +361,7 @@ class VKFramebuffer;
class VKContext : public DrawContext {
public:
- VKContext(VulkanContext *vulkan, bool splitSubmit);
+ VKContext(VulkanContext *vulkan);
virtual ~VKContext();
const DeviceCaps &GetDeviceCaps() const override {
@@ -401,9 +401,10 @@ public:
// These functions should be self explanatory.
void BindFramebufferAsRenderTarget(Framebuffer *fbo, const RenderPassInfo &rp, const char *tag) override;
Framebuffer *GetCurrentRenderTarget() override {
- return curFramebuffer_;
+ return (Framebuffer *)curFramebuffer_.ptr;
}
void BindFramebufferAsTexture(Framebuffer *fbo, int binding, FBChannel channelBit, int attachment) override;
+ void BindCurrentFramebufferForColorInput() override;
void GetFramebufferDimensions(Framebuffer *fbo, int *w, int *h) override;
@@ -473,27 +474,7 @@ public:
std::vector GetFeatureList() const override;
std::vector GetExtensionList() const override;
- uint64_t GetNativeObject(NativeObject obj, void *srcObject) override {
- switch (obj) {
- case NativeObject::CONTEXT:
- return (uint64_t)vulkan_;
- case NativeObject::INIT_COMMANDBUFFER:
- return (uint64_t)renderManager_.GetInitCmd();
- case NativeObject::BOUND_TEXTURE0_IMAGEVIEW:
- return (uint64_t)boundImageView_[0];
- case NativeObject::BOUND_TEXTURE1_IMAGEVIEW:
- return (uint64_t)boundImageView_[1];
- case NativeObject::RENDER_MANAGER:
- return (uint64_t)(uintptr_t)&renderManager_;
- case NativeObject::NULL_IMAGEVIEW:
- return (uint64_t)GetNullTexture()->GetImageView();
- case NativeObject::TEXTURE_VIEW:
- return (uint64_t)(((VKTexture *)srcObject)->GetImageView());
- default:
- Crash();
- return 0;
- }
- }
+ uint64_t GetNativeObject(NativeObject obj, void *srcObject) override;
void HandleEvent(Event ev, int width, int height, void *param1, void *param2) override;
@@ -522,7 +503,7 @@ private:
VkDescriptorSetLayout descriptorSetLayout_ = VK_NULL_HANDLE;
VkPipelineLayout pipelineLayout_ = VK_NULL_HANDLE;
VkPipelineCache pipelineCache_ = VK_NULL_HANDLE;
- AutoRef curFramebuffer_;
+ AutoRef curFramebuffer_;
VkDevice device_;
VkQueue queue_;
@@ -781,7 +762,7 @@ bool VKTexture::Create(VkCommandBuffer cmd, VulkanPushBuffer *push, const Textur
return true;
}
-VKContext::VKContext(VulkanContext *vulkan, bool splitSubmit)
+VKContext::VKContext(VulkanContext *vulkan)
: vulkan_(vulkan), renderManager_(vulkan) {
shaderLanguageDesc_.Init(GLSL_VULKAN);
@@ -807,9 +788,11 @@ VKContext::VKContext(VulkanContext *vulkan, bool splitSubmit)
caps_.fragmentShaderInt32Supported = true;
caps_.textureNPOTFullySupported = true;
caps_.fragmentShaderDepthWriteSupported = true;
+ caps_.blendMinMaxSupported = true;
caps_.logicOpSupported = vulkan->GetDeviceFeatures().enabled.logicOp != 0;
auto deviceProps = vulkan->GetPhysicalDeviceProperties(vulkan_->GetCurrentPhysicalDeviceIndex()).properties;
+
switch (deviceProps.vendorID) {
case VULKAN_VENDOR_AMD: caps_.vendor = GPUVendor::VENDOR_AMD; break;
case VULKAN_VENDOR_ARM: caps_.vendor = GPUVendor::VENDOR_ARM; break;
@@ -831,6 +814,11 @@ VKContext::VKContext(VulkanContext *vulkan, bool splitSubmit)
// Color write mask not masking write in certain scenarios with a depth test, see #10421.
// Known still present on driver 0x80180000 and Adreno 5xx (possibly more.)
bugs_.Infest(Bugs::COLORWRITEMASK_BROKEN_WITH_DEPTHTEST);
+
+ // Trying to follow all the rules in https://registry.khronos.org/vulkan/specs/1.3/html/vkspec.html#synchronization-pipeline-barriers-subpass-self-dependencies
+ // and https://registry.khronos.org/vulkan/specs/1.3/html/vkspec.html#renderpass-feedbackloop, but still it doesn't
+ // quite work - artifacts on triangle boundaries on Adreno.
+ bugs_.Infest(Bugs::SUBPASS_FEEDBACK_BROKEN);
} else if (caps_.vendor == GPUVendor::VENDOR_AMD) {
// See issue #10074, and also #10065 (AMD) and #10109 for the choice of the driver version to check for.
if (deviceProps.driverVersion < 0x00407000) {
@@ -840,19 +828,27 @@ VKContext::VKContext(VulkanContext *vulkan, bool splitSubmit)
// Workaround for Intel driver bug. TODO: Re-enable after some driver version
bugs_.Infest(Bugs::DUAL_SOURCE_BLENDING_BROKEN);
} else if (caps_.vendor == GPUVendor::VENDOR_ARM) {
+ int majorVersion = VK_API_VERSION_MAJOR(deviceProps.driverVersion);
+
// These GPUs (up to some certain hardware version?) have a bug where draws where gl_Position.w == .z
// corrupt the depth buffer. This is easily worked around by simply scaling Z down a tiny bit when this case
// is detected. See: https://github.com/hrydgard/ppsspp/issues/11937
bugs_.Infest(Bugs::EQUAL_WZ_CORRUPTS_DEPTH);
- // At least one driver at the upper end of the range is known to be likely to suffer from the bug causing issue #13833 (Midnight Club map broken).
- bugs_.Infest(Bugs::MALI_STENCIL_DISCARD_BUG);
- // This started in driver 31 or 32.
- if (VK_API_VERSION_MAJOR(deviceProps.driverVersion) >= 32) {
+ // Nearly identical to the the Adreno bug, see #13833 (Midnight Club map broken) and other issues.
+ // Reported fixed in major version 40 - let's add a check once confirmed.
+ bugs_.Infest(Bugs::NO_DEPTH_CANNOT_DISCARD_STENCIL);
+
+ // This started in driver 31 or 32, fixed in 40 - let's add a check once confirmed.
+ if (majorVersion >= 32) {
bugs_.Infest(Bugs::MALI_CONSTANT_LOAD_BUG); // See issue #15661
}
}
+ // Limited, through input attachments and self-dependencies.
+ // We turn it off here already if buggy.
+ caps_.framebufferFetchSupported = !bugs_.Has(Bugs::SUBPASS_FEEDBACK_BROKEN);
+
caps_.deviceID = deviceProps.deviceID;
device_ = vulkan->GetDevice();
@@ -920,8 +916,6 @@ VKContext::VKContext(VulkanContext *vulkan, bool splitSubmit)
VkPipelineCacheCreateInfo pc{ VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO };
res = vkCreatePipelineCache(vulkan_->GetDevice(), &pc, nullptr, &pipelineCache_);
_assert_(VK_SUCCESS == res);
-
- renderManager_.SetSplitSubmit(splitSubmit);
}
VKContext::~VKContext() {
@@ -1058,12 +1052,12 @@ Pipeline *VKContext::CreateGraphicsPipeline(const PipelineDesc &desc, const char
VKDepthStencilState *depth = (VKDepthStencilState *)desc.depthStencil;
VKRasterState *raster = (VKRasterState *)desc.raster;
- u32 pipelineFlags = 0;
+ PipelineFlags pipelineFlags = (PipelineFlags)0;
if (depth->info.depthTestEnable || depth->info.stencilTestEnable) {
- pipelineFlags |= PIPELINE_FLAG_USES_DEPTH_STENCIL;
+ pipelineFlags |= PipelineFlags::USES_DEPTH_STENCIL;
}
- VKPipeline *pipeline = new VKPipeline(vulkan_, desc.uniformDesc ? desc.uniformDesc->uniformBufferSize : 16 * sizeof(float), (PipelineFlags)pipelineFlags, tag);
+ VKPipeline *pipeline = new VKPipeline(vulkan_, desc.uniformDesc ? desc.uniformDesc->uniformBufferSize : 16 * sizeof(float), pipelineFlags, tag);
VKRGraphicsPipelineDesc &gDesc = pipeline->vkrDesc;
@@ -1401,8 +1395,8 @@ void VKContext::Clear(int clearMask, uint32_t colorval, float depthVal, int sten
renderManager_.Clear(colorval, depthVal, stencilVal, mask);
}
-DrawContext *T3DCreateVulkanContext(VulkanContext *vulkan, bool split) {
- return new VKContext(vulkan, split);
+DrawContext *T3DCreateVulkanContext(VulkanContext *vulkan) {
+ return new VKContext(vulkan);
}
void AddFeature(std::vector &features, const char *name, VkBool32 available, VkBool32 enabled) {
@@ -1584,6 +1578,10 @@ void VKContext::BindFramebufferAsTexture(Framebuffer *fbo, int binding, FBChanne
boundImageView_[binding] = renderManager_.BindFramebufferAsTexture(fb->GetFB(), binding, aspect, attachment);
}
+void VKContext::BindCurrentFramebufferForColorInput() {
+ renderManager_.BindCurrentFramebufferAsInputAttachment0(VK_IMAGE_ASPECT_COLOR_BIT);
+}
+
void VKContext::GetFramebufferDimensions(Framebuffer *fbo, int *w, int *h) {
VKFramebuffer *fb = (VKFramebuffer *)fbo;
if (fb) {
@@ -1624,4 +1622,28 @@ void VKContext::InvalidateFramebuffer(FBInvalidationStage stage, uint32_t channe
}
}
+uint64_t VKContext::GetNativeObject(NativeObject obj, void *srcObject) {
+ switch (obj) {
+ case NativeObject::CONTEXT:
+ return (uint64_t)vulkan_;
+ case NativeObject::INIT_COMMANDBUFFER:
+ return (uint64_t)renderManager_.GetInitCmd();
+ case NativeObject::BOUND_TEXTURE0_IMAGEVIEW:
+ return (uint64_t)boundImageView_[0];
+ case NativeObject::BOUND_TEXTURE1_IMAGEVIEW:
+ return (uint64_t)boundImageView_[1];
+ case NativeObject::RENDER_MANAGER:
+ return (uint64_t)(uintptr_t)&renderManager_;
+ case NativeObject::NULL_IMAGEVIEW:
+ return (uint64_t)GetNullTexture()->GetImageView();
+ case NativeObject::TEXTURE_VIEW:
+ return (uint64_t)(((VKTexture *)srcObject)->GetImageView());
+ case NativeObject::BOUND_FRAMEBUFFER_COLOR_IMAGEVIEW:
+ return (uint64_t)curFramebuffer_->GetFB()->color.imageView;
+ default:
+ Crash();
+ return 0;
+ }
+}
+
} // namespace Draw
diff --git a/Common/GPU/thin3d.cpp b/Common/GPU/thin3d.cpp
index 1cc3c11fa0..acaa3dac0b 100644
--- a/Common/GPU/thin3d.cpp
+++ b/Common/GPU/thin3d.cpp
@@ -678,9 +678,9 @@ const char *Bugs::GetBugName(uint32_t bug) {
case COLORWRITEMASK_BROKEN_WITH_DEPTHTEST: return "COLORWRITEMASK_BROKEN_WITH_DEPTHTEST";
case BROKEN_FLAT_IN_SHADER: return "BROKEN_FLAT_IN_SHADER";
case EQUAL_WZ_CORRUPTS_DEPTH: return "EQUAL_WZ_CORRUPTS_DEPTH";
- case MALI_STENCIL_DISCARD_BUG: return "MALI_STENCIL_DISCARD_BUG";
case RASPBERRY_SHADER_COMP_HANG: return "RASPBERRY_SHADER_COMP_HANG";
case MALI_CONSTANT_LOAD_BUG: return "MALI_CONSTANT_LOAD_BUG";
+ case SUBPASS_FEEDBACK_BROKEN: return "SUBPASS_FEEDBACK_BROKEN";
default: return "(N/A)";
}
}
diff --git a/Common/GPU/thin3d.h b/Common/GPU/thin3d.h
index 843e6e8813..0fba6ee6f2 100644
--- a/Common/GPU/thin3d.h
+++ b/Common/GPU/thin3d.h
@@ -242,6 +242,7 @@ enum class NativeObject {
INIT_COMMANDBUFFER,
BOUND_TEXTURE0_IMAGEVIEW,
BOUND_TEXTURE1_IMAGEVIEW,
+ BOUND_FRAMEBUFFER_COLOR_IMAGEVIEW,
RENDER_MANAGER,
TEXTURE_VIEW,
NULL_IMAGEVIEW,
@@ -328,9 +329,9 @@ public:
COLORWRITEMASK_BROKEN_WITH_DEPTHTEST = 5,
BROKEN_FLAT_IN_SHADER = 6,
EQUAL_WZ_CORRUPTS_DEPTH = 7,
- MALI_STENCIL_DISCARD_BUG = 8,
- RASPBERRY_SHADER_COMP_HANG = 9,
- MALI_CONSTANT_LOAD_BUG = 10,
+ RASPBERRY_SHADER_COMP_HANG = 8,
+ MALI_CONSTANT_LOAD_BUG = 9,
+ SUBPASS_FEEDBACK_BROKEN = 10,
MAX_BUG,
};
@@ -546,6 +547,7 @@ struct DeviceCaps {
bool textureNPOTFullySupported;
bool fragmentShaderDepthWriteSupported;
bool textureDepthSupported;
+ bool blendMinMaxSupported;
std::string deviceName; // The device name to use when creating the thin3d context, to get the same one.
};
@@ -651,6 +653,9 @@ public:
// binding must be < MAX_TEXTURE_SLOTS (0, 1 are okay if it's 2).
virtual void BindFramebufferAsTexture(Framebuffer *fbo, int binding, FBChannel channelBit, int attachment) = 0;
+ // Framebuffer fetch / input attachment support, needs to be explicit in Vulkan.
+ virtual void BindCurrentFramebufferForColorInput() {}
+
// deprecated, only used by D3D9
virtual uintptr_t GetFramebufferAPITexture(Framebuffer *fbo, int channelBits, int attachment) {
return 0;
diff --git a/Common/GPU/thin3d_create.h b/Common/GPU/thin3d_create.h
index 702e7f45c2..fccb3b076c 100644
--- a/Common/GPU/thin3d_create.h
+++ b/Common/GPU/thin3d_create.h
@@ -31,6 +31,6 @@ DrawContext *T3DCreateDX9Context(IDirect3D9 *d3d, IDirect3D9Ex *d3dEx, int adapt
DrawContext *T3DCreateD3D11Context(ID3D11Device *device, ID3D11DeviceContext *context, ID3D11Device1 *device1, ID3D11DeviceContext1 *context1, D3D_FEATURE_LEVEL featureLevel, HWND hWnd, std::vector adapterNames);
#endif
-DrawContext *T3DCreateVulkanContext(VulkanContext *context, bool splitSubmit);
+DrawContext *T3DCreateVulkanContext(VulkanContext *context);
} // namespace Draw
diff --git a/Common/UI/Context.cpp b/Common/UI/Context.cpp
index e9fe8ed0c5..21c687bd2b 100644
--- a/Common/UI/Context.cpp
+++ b/Common/UI/Context.cpp
@@ -170,7 +170,7 @@ void UIContext::ActivateTopScissor() {
int h = std::max(0.0f, ceilf(scale_y * bounds.h));
if (x < 0 || y < 0 || x + w > pixel_xres || y + h > pixel_yres) {
// This won't actually report outside a game, but we can try.
- ERROR_LOG_REPORT(G3D, "UI scissor out of bounds: %d,%d-%d,%d / %d,%d", x, y, w, h, pixel_xres, pixel_yres);
+ ERROR_LOG_REPORT(G3D, "UI scissor out of bounds in %sScreen: %d,%d-%d,%d / %d,%d", screenTag_ ? screenTag_ : "N/A", x, y, w, h, pixel_xres, pixel_yres);
x = std::max(0, x);
y = std::max(0, y);
w = std::min(w, pixel_xres - x);
diff --git a/Common/UI/Context.h b/Common/UI/Context.h
index ad02b4027c..ac26ab0202 100644
--- a/Common/UI/Context.h
+++ b/Common/UI/Context.h
@@ -74,7 +74,6 @@ public:
const UI::Theme *theme;
// Utility methods
-
TextDrawer *Text() const { return textDrawer_; }
void SetFontStyle(const UI::FontStyle &style);
@@ -103,6 +102,10 @@ public:
void setUIAtlas(const std::string &name);
+ void SetScreenTag(const char *tag) {
+ screenTag_ = tag;
+ }
+
private:
Draw::DrawContext *draw_ = nullptr;
Bounds bounds_;
@@ -126,4 +129,6 @@ private:
std::string lastUIAtlas_;
std::string UIAtlas_ = "ui_atlas.zim";
+
+ const char *screenTag_ = nullptr;
};
diff --git a/Common/UI/Screen.h b/Common/UI/Screen.h
index de24c74985..42f0830627 100644
--- a/Common/UI/Screen.h
+++ b/Common/UI/Screen.h
@@ -71,7 +71,7 @@ public:
// what screen it is.
virtual void *dialogData() { return 0; }
- virtual std::string tag() const { return std::string(""); }
+ virtual const char *tag() const = 0;
virtual bool isTransparent() const { return false; }
virtual bool isTopLevel() const { return false; }
diff --git a/Common/UI/UIScreen.cpp b/Common/UI/UIScreen.cpp
index 8026de37bc..ecebae3a81 100644
--- a/Common/UI/UIScreen.cpp
+++ b/Common/UI/UIScreen.cpp
@@ -117,6 +117,9 @@ void UIScreen::render() {
if (root_) {
UIContext *uiContext = screenManager()->getUIContext();
+
+ uiContext->SetScreenTag(tag());
+
UI::LayoutViewHierarchy(*uiContext, root_, ignoreInsets_);
uiContext->PushTransform({translation_, scale_, alpha_});
diff --git a/Common/UI/UIScreen.h b/Common/UI/UIScreen.h
index 43ec130d9d..4b113d3001 100644
--- a/Common/UI/UIScreen.h
+++ b/Common/UI/UIScreen.h
@@ -136,7 +136,7 @@ public:
void SetHiddenChoices(std::set hidden) {
hidden_ = hidden;
}
- virtual std::string tag() const override { return std::string("listpopup"); }
+ const char *tag() const override { return "listpopup"; }
UI::Event OnChoice;
@@ -187,6 +187,8 @@ public:
disabled_ = *value_ < 0;
}
+ const char *tag() const override { return "SliderPopup"; }
+
Event OnChange;
private:
@@ -214,6 +216,8 @@ public:
: PopupScreen(title, "OK", "Cancel"), units_(units), value_(value), originalValue_(*value), minValue_(minValue), maxValue_(maxValue), step_(step), changing_(false), liveUpdate_(liveUpdate) {}
void CreatePopupContents(UI::ViewGroup *parent) override;
+ const char *tag() const override { return "SliderFloatPopup"; }
+
Event OnChange;
private:
@@ -241,6 +245,8 @@ public:
: PopupScreen(title, "OK", "Cancel"), value_(value), placeholder_(placeholder), maxLen_(maxLen) {}
virtual void CreatePopupContents(ViewGroup *parent) override;
+ const char *tag() const override { return "TextEditPopup"; }
+
Event OnChange;
private:
diff --git a/Common/VR/VRRenderer.cpp b/Common/VR/VRRenderer.cpp
index 79e6114a61..248b2b1487 100644
--- a/Common/VR/VRRenderer.cpp
+++ b/Common/VR/VRRenderer.cpp
@@ -9,6 +9,7 @@
#include
#include
+XrFovf fov;
XrView* projections;
XrPosef invViewTransform[2];
XrFrameState frameState = {};
@@ -293,7 +294,12 @@ bool VR_InitFrame( engine_t* engine ) {
projections));
//
+ fov = {};
for (int eye = 0; eye < ovrMaxNumEyes; eye++) {
+ fov.angleLeft += projections[eye].fov.angleLeft / 2.0f;
+ fov.angleRight += projections[eye].fov.angleRight / 2.0f;
+ fov.angleUp += projections[eye].fov.angleUp / 2.0f;
+ fov.angleDown += projections[eye].fov.angleDown / 2.0f;
invViewTransform[eye] = projections[eye].pose;
}
@@ -353,10 +359,7 @@ void VR_FinishFrame( engine_t* engine ) {
for (int eye = 0; eye < ovrMaxNumEyes; eye++) {
int imageLayer = engine->appState.Renderer.Multiview ? eye : 0;
ovrFramebuffer* frameBuffer = &engine->appState.Renderer.FrameBuffer[0];
- XrFovf fov = projections[eye].fov;
- if (vrMode == VR_MODE_MONO_6DOF) {
- fov = projections[0].fov;
- } else if (!engine->appState.Renderer.Multiview) {
+ if ((vrMode != VR_MODE_MONO_6DOF) && !engine->appState.Renderer.Multiview) {
frameBuffer = &engine->appState.Renderer.FrameBuffer[eye];
}
@@ -463,7 +466,6 @@ void VR_BindFramebuffer(engine_t *engine) {
ovrMatrix4f VR_GetMatrix( VRMatrix matrix ) {
ovrMatrix4f output;
if ((matrix == VR_PROJECTION_MATRIX_LEFT_EYE) || (matrix == VR_PROJECTION_MATRIX_RIGHT_EYE)) {
- XrFovf fov = matrix == VR_PROJECTION_MATRIX_LEFT_EYE ? projections[0].fov : projections[1].fov;
float near = (float)vrConfig[VR_CONFIG_FOV_SCALE] / 200.0f;
output = ovrMatrix4f_CreateProjectionFov(fov.angleLeft, fov.angleRight, fov.angleUp, fov.angleDown, near, 0.0f );
} else if ((matrix == VR_VIEW_MATRIX_LEFT_EYE) || (matrix == VR_VIEW_MATRIX_RIGHT_EYE)) {
diff --git a/Core/Config.cpp b/Core/Config.cpp
index 27f97e89c5..3798362ef8 100644
--- a/Core/Config.cpp
+++ b/Core/Config.cpp
@@ -938,7 +938,6 @@ static ConfigSetting graphicsSettings[] = {
ReportedConfigSetting("FragmentTestCache", &g_Config.bFragmentTestCache, true, true, true),
ConfigSetting("GfxDebugOutput", &g_Config.bGfxDebugOutput, false, false, false),
- ConfigSetting("GfxDebugSplitSubmit", &g_Config.bGfxDebugSplitSubmit, false, false, false),
ConfigSetting("LogFrameDrops", &g_Config.bLogFrameDrops, false, true, false),
ConfigSetting("InflightFrames", &g_Config.iInflightFrames, 3, true, false),
diff --git a/Core/Config.h b/Core/Config.h
index 6ae172bfba..36ec3a760d 100644
--- a/Core/Config.h
+++ b/Core/Config.h
@@ -176,6 +176,7 @@ public:
bool bSustainedPerformanceMode; // Android: Slows clocks down to avoid overheating/speed fluctuations.
bool bIgnoreScreenInsets; // Android: Center screen disregarding insets if this is enabled.
bool bVSync;
+
int iFrameSkip;
int iFrameSkipType;
int iFastForwardMode; // See FastForwardMode in ConfigValues.h.
@@ -242,7 +243,6 @@ public:
bool bShaderChainRequires60FPS;
std::string sTextureShaderName;
bool bGfxDebugOutput;
- bool bGfxDebugSplitSubmit;
int iInflightFrames;
bool bRenderDuplicateFrames;
diff --git a/Core/Dialog/PSPGamedataInstallDialog.cpp b/Core/Dialog/PSPGamedataInstallDialog.cpp
index 9fe30db3c8..3bbe9442f1 100644
--- a/Core/Dialog/PSPGamedataInstallDialog.cpp
+++ b/Core/Dialog/PSPGamedataInstallDialog.cpp
@@ -36,8 +36,9 @@ const static u32 GAMEDATA_BYTES_PER_READ = 32768;
// If this is too high, some games (e.g. Senjou no Valkyria 3) will lag.
const static u32 GAMEDATA_READS_PER_UPDATE = 20;
-const u32 ERROR_UTILITY_GAMEDATA_MEMSTRICK_WRITE_PROTECTED = 0x80111903;
const u32 ERROR_UTILITY_GAMEDATA_MEMSTRICK_REMOVED = 0x80111901;
+const u32 ERROR_UTILITY_GAMEDATA_MEMSTRICK_WRITE_PROTECTED = 0x80111903;
+const u32 ERROR_UTILITY_GAMEDATA_INVALID_MODE = 0x80111908;
static const std::string SFO_FILENAME = "PARAM.SFO";
@@ -88,9 +89,14 @@ int PSPGamedataInstallDialog::Init(u32 paramAddr) {
}
int size = Memory::Read_U32(paramAddr);
+ if (size != 1424 && size != 1432) {
+ ERROR_LOG_REPORT(SCEUTILITY, "sceGamedataInstallInitStart: invalid param size %d", size);
+ return SCE_ERROR_UTILITY_INVALID_PARAM_SIZE;
+ }
+
memset(&request, 0, sizeof(request));
// Only copy the right size to support different request format
- Memory::Memcpy(&request, paramAddr, size);
+ Memory::Memcpy(&request, paramAddr, size, "sceGamedataInstallInitStart");
ChangeStatusInit(GAMEDATA_INIT_DELAY_US);
return 0;
@@ -100,6 +106,17 @@ int PSPGamedataInstallDialog::Update(int animSpeed) {
if (GetStatus() != SCE_UTILITY_STATUS_RUNNING)
return SCE_ERROR_UTILITY_INVALID_STATUS;
+ if (param->mode >= 2) {
+ param->common.result = ERROR_UTILITY_GAMEDATA_INVALID_MODE;
+ param.NotifyWrite("DialogResult");
+ ChangeStatus(SCE_UTILITY_STATUS_FINISHED, 0);
+ WARN_LOG_REPORT(SCEUTILITY, "sceUtilityGamedataInstallUpdate: invalid mode %d", param->mode);
+ return 0;
+ }
+
+ // TODO: param->mode == 1 should show a prompt to confirm, then a progress bar.
+ // Any other mode (i.e. 0 or negative) should proceed and show no UI.
+
// TODO: This should return error codes in some cases, like write failure.
// request.common.result must be updated for errors as well.
@@ -222,6 +239,9 @@ void PSPGamedataInstallDialog::WriteSfoFile() {
}
int PSPGamedataInstallDialog::Abort() {
+ param->common.result = 1;
+ param.NotifyWrite("DialogResult");
+
// TODO: Delete the files or anything?
return PSPDialog::Shutdown();
}
diff --git a/Core/Dialog/PSPGamedataInstallDialog.h b/Core/Dialog/PSPGamedataInstallDialog.h
index e669a31873..a74fdb5254 100644
--- a/Core/Dialog/PSPGamedataInstallDialog.h
+++ b/Core/Dialog/PSPGamedataInstallDialog.h
@@ -22,7 +22,7 @@
struct SceUtilityGamedataInstallParam {
pspUtilityDialogCommon common;
- u32_le unknown1;
+ s32_le mode;
char gameName[13];
char ignore1[3];
char dataName[20];
diff --git a/Core/HLE/sceKernel.cpp b/Core/HLE/sceKernel.cpp
index cb858c0e90..b9d1028b48 100644
--- a/Core/HLE/sceKernel.cpp
+++ b/Core/HLE/sceKernel.cpp
@@ -820,7 +820,7 @@ const HLEFunction ThreadManForUser[] =
{0X87D4DD36, &WrapI_IU, "sceKernelCancelReceiveMbx", 'i', "ix" },
{0XA8E8C846, &WrapI_IU, "sceKernelReferMbxStatus", 'i', "ip" },
- {0X7C0DC2A0, &WrapI_CIUUU, "sceKernelCreateMsgPipe", 'i', "sixxx" },
+ {0X7C0DC2A0, &WrapI_CIUUU, "sceKernelCreateMsgPipe", 'i', "sixxp" },
{0XF0B7DA1C, &WrapI_I, "sceKernelDeleteMsgPipe", 'i', "i" },
{0X876DBFAD, &WrapI_IUUUUU, "sceKernelSendMsgPipe", 'i', "ixxxxx" },
{0X7C41F2C2, &WrapI_IUUUUU, "sceKernelSendMsgPipeCB", 'i', "ixxxxx" },
@@ -831,7 +831,7 @@ const HLEFunction ThreadManForUser[] =
{0X349B864D, &WrapI_IUU, "sceKernelCancelMsgPipe", 'i', "ixx" },
{0X33BE4024, &WrapI_IU, "sceKernelReferMsgPipeStatus", 'i', "ip" },
- {0X56C039B5, &WrapI_CIUUU, "sceKernelCreateVpl", 'i', "sixxx" },
+ {0X56C039B5, &WrapI_CIUUU, "sceKernelCreateVpl", 'i', "sixxp" },
{0X89B3D48C, &WrapI_I, "sceKernelDeleteVpl", 'i', "i" },
{0XBED27435, &WrapI_IUUU, "sceKernelAllocateVpl", 'i', "ixxx", HLE_NOT_IN_INTERRUPT | HLE_NOT_DISPATCH_SUSPENDED },
{0XEC0A693F, &WrapI_IUUU, "sceKernelAllocateVplCB", 'i', "ixxx", HLE_NOT_IN_INTERRUPT | HLE_NOT_DISPATCH_SUSPENDED },
@@ -840,7 +840,7 @@ const HLEFunction ThreadManForUser[] =
{0X1D371B8A, &WrapI_IU, "sceKernelCancelVpl", 'i', "ix" },
{0X39810265, &WrapI_IU, "sceKernelReferVplStatus", 'i', "ip" },
- {0XC07BB470, &WrapI_CUUUUU, "sceKernelCreateFpl", 'i', "sxxxxx" },
+ {0XC07BB470, &WrapI_CUUUUU, "sceKernelCreateFpl", 'i', "sixxxp" },
{0XED1410E0, &WrapI_I, "sceKernelDeleteFpl", 'i', "i" },
{0XD979E9BF, &WrapI_IUU, "sceKernelAllocateFpl", 'i', "ixx", HLE_NOT_IN_INTERRUPT | HLE_NOT_DISPATCH_SUSPENDED },
{0XE7282CB6, &WrapI_IUU, "sceKernelAllocateFplCB", 'i', "ixx", HLE_NOT_IN_INTERRUPT | HLE_NOT_DISPATCH_SUSPENDED },
@@ -864,7 +864,7 @@ const HLEFunction ThreadManForUser[] =
{0XD8B299AE, &WrapU_IUUU, "sceKernelSetVTimerHandler", 'x', "ixxx" },
{0X53B00E9A, &WrapU_IU64UU, "sceKernelSetVTimerHandlerWide", 'x', "iXxx" },
- {0X8DAFF657, &WrapI_CUUUUU, "sceKernelCreateTlspl", 'i', "sxxxxx" },
+ {0X8DAFF657, &WrapI_CUUUUU, "sceKernelCreateTlspl", 'i', "sixxxp" },
{0X32BF938E, &WrapI_I, "sceKernelDeleteTlspl", 'i', "i" },
{0X721067F3, &WrapI_IU, "sceKernelReferTlsplStatus", 'i', "xp" },
// Not completely certain about args.
@@ -908,7 +908,7 @@ const HLEFunction ThreadManForKernel[] =
{0x1fb15a32, &WrapU_IU, "sceKernelSetEventFlag", 'x', "ix", HLE_KERNEL_SYSCALL },
{0x812346e4, &WrapU_IU, "sceKernelClearEventFlag", 'x', "ix", HLE_KERNEL_SYSCALL },
{0x402fcf22, &WrapI_IUUUU, "sceKernelWaitEventFlag", 'i', "ixxpp", HLE_NOT_IN_INTERRUPT | HLE_KERNEL_SYSCALL},
- {0xc07bb470, &WrapI_CUUUUU, "sceKernelCreateFpl", 'i', "sxxxxx" ,HLE_KERNEL_SYSCALL },
+ {0xc07bb470, &WrapI_CUUUUU, "sceKernelCreateFpl", 'i', "sixxxp" ,HLE_KERNEL_SYSCALL },
{0xed1410e0, &WrapI_I, "sceKernelDeleteFpl", 'i', "i" ,HLE_KERNEL_SYSCALL },
{0x623ae665, &WrapI_IU, "sceKernelTryAllocateFpl", 'i', "ix" ,HLE_KERNEL_SYSCALL },
{0x616403ba, &WrapI_I, "sceKernelTerminateThread", 'i', "i" ,HLE_KERNEL_SYSCALL },
@@ -932,7 +932,7 @@ const HLEFunction ThreadManForKernel[] =
{0x0D81716A, &WrapI_IU, "sceKernelPollMbx", 'i', "ix", HLE_KERNEL_SYSCALL },
{0x87D4DD36, &WrapI_IU, "sceKernelCancelReceiveMbx", 'i', "ix", HLE_KERNEL_SYSCALL },
{0xA8E8C846, &WrapI_IU, "sceKernelReferMbxStatus", 'i', "ip", HLE_KERNEL_SYSCALL },
- {0x56C039B5, &WrapI_CIUUU, "sceKernelCreateVpl", 'i', "sixxx", HLE_KERNEL_SYSCALL },
+ {0x56C039B5, &WrapI_CIUUU, "sceKernelCreateVpl", 'i', "sixxp", HLE_KERNEL_SYSCALL },
{0x89B3D48C, &WrapI_I, "sceKernelDeleteVpl", 'i', "i", HLE_KERNEL_SYSCALL },
{0xBED27435, &WrapI_IUUU, "sceKernelAllocateVpl", 'i', "ixxx", HLE_KERNEL_SYSCALL | HLE_NOT_IN_INTERRUPT | HLE_NOT_DISPATCH_SUSPENDED },
{0xEC0A693F, &WrapI_IUUU, "sceKernelAllocateVplCB", 'i', "ixxx", HLE_KERNEL_SYSCALL | HLE_NOT_IN_INTERRUPT | HLE_NOT_DISPATCH_SUSPENDED },
diff --git a/Core/HLE/sceKernelMemory.cpp b/Core/HLE/sceKernelMemory.cpp
index ac853d5bb3..935cbc8482 100644
--- a/Core/HLE/sceKernelMemory.cpp
+++ b/Core/HLE/sceKernelMemory.cpp
@@ -46,6 +46,7 @@ const int TLSPL_NUM_INDEXES = 16;
// STATE BEGIN
BlockAllocator userMemory(256);
BlockAllocator kernelMemory(256);
+BlockAllocator volatileMemory(256);
static int vplWaitTimer = -1;
static int fplWaitTimer = -1;
@@ -432,6 +433,7 @@ void __KernelMemoryInit()
MemBlockInfoInit();
kernelMemory.Init(PSP_GetKernelMemoryBase(), PSP_GetKernelMemoryEnd() - PSP_GetKernelMemoryBase(), false);
userMemory.Init(PSP_GetUserMemoryBase(), PSP_GetUserMemoryEnd() - PSP_GetUserMemoryBase(), false);
+ volatileMemory.Init(PSP_GetVolatileMemoryStart(), PSP_GetVolatileMemoryEnd() - PSP_GetVolatileMemoryStart(), false);
ParallelMemset(&g_threadManager, Memory::GetPointerWrite(PSP_GetKernelMemoryBase()), 0, PSP_GetUserMemoryEnd() - PSP_GetKernelMemoryBase());
NotifyMemInfo(MemBlockFlags::WRITE, PSP_GetKernelMemoryBase(), PSP_GetUserMemoryEnd() - PSP_GetKernelMemoryBase(), "MemInit");
INFO_LOG(SCEKERNEL, "Kernel and user memory pools initialized");
@@ -457,12 +459,14 @@ void __KernelMemoryInit()
void __KernelMemoryDoState(PointerWrap &p)
{
- auto s = p.Section("sceKernelMemory", 1, 2);
+ auto s = p.Section("sceKernelMemory", 1, 3);
if (!s)
return;
kernelMemory.DoState(p);
userMemory.DoState(p);
+ if (s >= 3)
+ volatileMemory.DoState(p);
Do(p, vplWaitTimer);
CoreTiming::RestoreRegisterEvent(vplWaitTimer, "VplTimeout", __KernelVplTimeout);
@@ -481,6 +485,11 @@ void __KernelMemoryDoState(PointerWrap &p)
void __KernelMemoryShutdown()
{
+#ifdef _DEBUG
+ INFO_LOG(SCEKERNEL, "Shutting down volatile memory pool: ");
+ volatileMemory.ListBlocks();
+#endif
+ volatileMemory.Shutdown();
#ifdef _DEBUG
INFO_LOG(SCEKERNEL,"Shutting down user memory pool: ");
userMemory.ListBlocks();
@@ -495,6 +504,56 @@ void __KernelMemoryShutdown()
MemBlockInfoShutdown();
}
+BlockAllocator *BlockAllocatorFromID(int id) {
+ switch (id) {
+ case 1:
+ case 3:
+ case 4:
+ if (hleIsKernelMode())
+ return &kernelMemory;
+ return nullptr;
+
+ case 2:
+ case 6:
+ return &userMemory;
+
+ case 8:
+ case 10:
+ if (hleIsKernelMode())
+ return &userMemory;
+ return nullptr;
+
+ case 5:
+ return &volatileMemory;
+
+ default:
+ break;
+ }
+
+ return nullptr;
+}
+
+int BlockAllocatorToID(const BlockAllocator *alloc) {
+ if (alloc == &kernelMemory)
+ return 1;
+ if (alloc == &userMemory)
+ return 2;
+ if (alloc == &volatileMemory)
+ return 5;
+ return 0;
+}
+
+BlockAllocator *BlockAllocatorFromAddr(u32 addr) {
+ addr &= 0x3FFFFFFF;
+ if (Memory::IsKernelAndNotVolatileAddress(addr))
+ return &kernelMemory;
+ if (Memory::IsKernelAddress(addr))
+ return &volatileMemory;
+ if (Memory::IsRAMAddress(addr))
+ return &userMemory;
+ return nullptr;
+}
+
enum SceKernelFplAttr
{
PSP_FPL_ATTR_FIFO = 0x0000,
@@ -580,29 +639,18 @@ static void __KernelSortFplThreads(FPL *fpl)
std::stable_sort(fpl->waitingThreads.begin(), fpl->waitingThreads.end(), __FplThreadSortPriority);
}
-int sceKernelCreateFpl(const char *name, u32 mpid, u32 attr, u32 blockSize, u32 numBlocks, u32 optPtr)
-{
+int sceKernelCreateFpl(const char *name, u32 mpid, u32 attr, u32 blockSize, u32 numBlocks, u32 optPtr) {
if (!name)
- {
- WARN_LOG_REPORT(SCEKERNEL, "%08x=sceKernelCreateFpl(): invalid name", SCE_KERNEL_ERROR_NO_MEMORY);
- return SCE_KERNEL_ERROR_NO_MEMORY;
- }
+ return hleLogWarning(SCEKERNEL, SCE_KERNEL_ERROR_NO_MEMORY, "invalid name");
if (mpid < 1 || mpid > 9 || mpid == 7)
- {
- WARN_LOG_REPORT(SCEKERNEL, "%08x=sceKernelCreateFpl(): invalid partition %d", SCE_KERNEL_ERROR_ILLEGAL_ARGUMENT, mpid);
- return SCE_KERNEL_ERROR_ILLEGAL_ARGUMENT;
- }
- // We only support user right now.
- if (mpid != 2 && mpid != 6)
- {
- WARN_LOG_REPORT(SCEKERNEL, "%08x=sceKernelCreateFpl(): invalid partition %d", SCE_KERNEL_ERROR_ILLEGAL_PERM, mpid);
- return SCE_KERNEL_ERROR_ILLEGAL_PERM;
- }
+ return hleLogWarning(SCEKERNEL, SCE_KERNEL_ERROR_ILLEGAL_ARGUMENT, "invalid partition %d", mpid);
+
+ BlockAllocator *allocator = BlockAllocatorFromID(mpid);
+ if (allocator == nullptr)
+ return hleLogWarning(SCEKERNEL, SCE_KERNEL_ERROR_ILLEGAL_PERM, "invalid partition %d", mpid);
if (((attr & ~PSP_FPL_ATTR_KNOWN) & ~0xFF) != 0)
- {
- WARN_LOG_REPORT(SCEKERNEL, "%08x=sceKernelCreateFpl(): invalid attr parameter: %08x", SCE_KERNEL_ERROR_ILLEGAL_ATTR, attr);
- return SCE_KERNEL_ERROR_ILLEGAL_ATTR;
- }
+ return hleLogWarning(SCEKERNEL, SCE_KERNEL_ERROR_ILLEGAL_ATTR, "invalid attr parameter: %08x", attr);
+
// There's probably a simpler way to get this same basic formula...
// This is based on results from a PSP.
bool illegalMemSize = blockSize == 0 || numBlocks == 0;
@@ -611,25 +659,16 @@ int sceKernelCreateFpl(const char *name, u32 mpid, u32 attr, u32 blockSize, u32
if (!illegalMemSize && (u64) numBlocks >= 0x100000000ULL / (((u64) blockSize + 3ULL) & ~3ULL))
illegalMemSize = true;
if (illegalMemSize)
- {
- WARN_LOG_REPORT(SCEKERNEL, "%08x=sceKernelCreateFpl(): invalid blockSize/count", SCE_KERNEL_ERROR_ILLEGAL_MEMSIZE);
- return SCE_KERNEL_ERROR_ILLEGAL_MEMSIZE;
- }
+ return hleReportWarning(SCEKERNEL, SCE_KERNEL_ERROR_ILLEGAL_MEMSIZE, "invalid blockSize/count");
int alignment = 4;
- if (optPtr != 0)
- {
- u32 size = Memory::Read_U32(optPtr);
- if (size > 8)
- WARN_LOG_REPORT(SCEKERNEL, "sceKernelCreateFpl(): unsupported extra options, size = %d", size);
+ if (Memory::IsValidRange(optPtr, 4)) {
+ u32 size = Memory::ReadUnchecked_U32(optPtr);
if (size >= 4)
alignment = Memory::Read_U32(optPtr + 4);
// Must be a power of 2 to be valid.
if ((alignment & (alignment - 1)) != 0)
- {
- WARN_LOG_REPORT(SCEKERNEL, "%08x=sceKernelCreateFpl(): invalid alignment %d", SCE_KERNEL_ERROR_ILLEGAL_ARGUMENT, alignment);
- return SCE_KERNEL_ERROR_ILLEGAL_ARGUMENT;
- }
+ return hleLogWarning(SCEKERNEL, SCE_KERNEL_ERROR_ILLEGAL_ARGUMENT, "invalid alignment %d", alignment);
}
if (alignment < 4)
@@ -638,9 +677,8 @@ int sceKernelCreateFpl(const char *name, u32 mpid, u32 attr, u32 blockSize, u32
int alignedSize = ((int)blockSize + alignment - 1) & ~(alignment - 1);
u32 totalSize = alignedSize * numBlocks;
bool atEnd = (attr & PSP_FPL_ATTR_HIGHMEM) != 0;
- u32 address = userMemory.Alloc(totalSize, atEnd, "FPL");
- if (address == (u32)-1)
- {
+ u32 address = allocator->Alloc(totalSize, atEnd, "FPL");
+ if (address == (u32)-1) {
DEBUG_LOG(SCEKERNEL, "sceKernelCreateFpl(\"%s\", partition=%i, attr=%08x, bsize=%i, nb=%i) FAILED - out of ram",
name, mpid, attr, blockSize, numBlocks);
return SCE_KERNEL_ERROR_NO_MEMORY;
@@ -682,7 +720,10 @@ int sceKernelDeleteFpl(SceUID uid)
if (wokeThreads)
hleReSchedule("fpl deleted");
- userMemory.Free(fpl->address);
+ BlockAllocator *alloc = BlockAllocatorFromAddr(fpl->address);
+ _assert_msg_(alloc != nullptr, "Should always have a valid allocator/address");
+ if (alloc)
+ alloc->Free(fpl->address);
return kernelObjects.Destroy(uid);
}
else
@@ -955,18 +996,23 @@ public:
alloc->Free(address);
}
bool IsValid() {return address != (u32)-1;}
- BlockAllocator *alloc;
void DoState(PointerWrap &p) override
{
- auto s = p.Section("PMB", 1);
+ auto s = p.Section("PMB", 1, 2);
if (!s)
return;
Do(p, address);
DoArray(p, name, sizeof(name));
+ if (s >= 2) {
+ int allocType = BlockAllocatorToID(alloc);
+ Do(p, allocType);
+ alloc = BlockAllocatorFromID(allocType);
+ }
}
+ BlockAllocator *alloc;
u32 address;
char name[32];
};
@@ -986,44 +1032,28 @@ static u32 sceKernelTotalFreeMemSize()
return retVal;
}
-int sceKernelAllocPartitionMemory(int partition, const char *name, int type, u32 size, u32 addr)
-{
- if (name == NULL)
- {
- WARN_LOG_REPORT(SCEKERNEL, "%08x=sceKernelAllocPartitionMemory(): invalid name", SCE_KERNEL_ERROR_ERROR);
- return SCE_KERNEL_ERROR_ERROR;
- }
- if (size == 0)
- {
- WARN_LOG_REPORT(SCEKERNEL, "%08x=sceKernelAllocPartitionMemory(): invalid size %x", SCE_KERNEL_ERROR_MEMBLOCK_ALLOC_FAILED, size);
- return SCE_KERNEL_ERROR_MEMBLOCK_ALLOC_FAILED;
+int sceKernelAllocPartitionMemory(int partition, const char *name, int type, u32 size, u32 addr) {
+ if (type < PSP_SMEM_Low || type > PSP_SMEM_HighAligned)
+ return hleLogWarning(SCEKERNEL, SCE_KERNEL_ERROR_ILLEGAL_MEMBLOCKTYPE, "invalid type %x", type);
+ // Alignment is only allowed for powers of 2.
+ if (type == PSP_SMEM_LowAligned || type == PSP_SMEM_HighAligned) {
+ if ((addr & (addr - 1)) != 0 || addr == 0)
+ return hleLogWarning(SCEKERNEL, SCE_KERNEL_ERROR_ILLEGAL_ALIGNMENT_SIZE, "invalid alignment %x", addr);
}
if (partition < 1 || partition > 9 || partition == 7)
- {
- WARN_LOG_REPORT(SCEKERNEL, "%08x=sceKernelAllocPartitionMemory(): invalid partition %x", SCE_KERNEL_ERROR_ILLEGAL_ARGUMENT, partition);
- return SCE_KERNEL_ERROR_ILLEGAL_ARGUMENT;
- }
- // We only support user right now.
- if (partition != 2 && partition != 5 && partition != 6)
- {
- WARN_LOG_REPORT(SCEKERNEL, "%08x=sceKernelAllocPartitionMemory(): invalid partition %x", SCE_KERNEL_ERROR_ILLEGAL_PARTITION, partition);
- return SCE_KERNEL_ERROR_ILLEGAL_PARTITION;
- }
- if (type < PSP_SMEM_Low || type > PSP_SMEM_HighAligned)
- {
- WARN_LOG_REPORT(SCEKERNEL, "%08x=sceKernelAllocPartitionMemory(): invalid type %x", SCE_KERNEL_ERROR_ILLEGAL_MEMBLOCKTYPE, type);
- return SCE_KERNEL_ERROR_ILLEGAL_MEMBLOCKTYPE;
- }
- // Alignment is only allowed for powers of 2.
- if ((type == PSP_SMEM_LowAligned || type == PSP_SMEM_HighAligned) && ((addr & (addr - 1)) != 0 || addr == 0))
- {
- WARN_LOG_REPORT(SCEKERNEL, "%08x=sceKernelAllocPartitionMemory(): invalid alignment %x", SCE_KERNEL_ERROR_ILLEGAL_ALIGNMENT_SIZE, addr);
- return SCE_KERNEL_ERROR_ILLEGAL_ALIGNMENT_SIZE;
- }
+ return hleLogWarning(SCEKERNEL, SCE_KERNEL_ERROR_ILLEGAL_ARGUMENT, "invalid partition %x", partition);
- PartitionMemoryBlock *block = new PartitionMemoryBlock(&userMemory, name, size, (MemblockType)type, addr);
- if (!block->IsValid())
- {
+ BlockAllocator *allocator = BlockAllocatorFromID(partition);
+ if (allocator == nullptr)
+ return hleLogWarning(SCEKERNEL, SCE_KERNEL_ERROR_ILLEGAL_PARTITION, "invalid partition %x", partition);
+
+ if (name == nullptr)
+ return hleLogWarning(SCEKERNEL, SCE_KERNEL_ERROR_ERROR, "invalid name");
+ if (size == 0)
+ return hleLogWarning(SCEKERNEL, SCE_KERNEL_ERROR_MEMBLOCK_ALLOC_FAILED, "invalid size %x", size);
+
+ PartitionMemoryBlock *block = new PartitionMemoryBlock(allocator, name, size, (MemblockType)type, addr);
+ if (!block->IsValid()) {
delete block;
ERROR_LOG(SCEKERNEL, "sceKernelAllocPartitionMemory(partition = %i, %s, type= %i, size= %i, addr= %08x): allocation failed", partition, name, type, size, addr);
return SCE_KERNEL_ERROR_MEMBLOCK_ALLOC_FAILED;
@@ -1451,40 +1481,23 @@ static void __KernelSortVplThreads(VPL *vpl)
std::stable_sort(vpl->waitingThreads.begin(), vpl->waitingThreads.end(), __VplThreadSortPriority);
}
-SceUID sceKernelCreateVpl(const char *name, int partition, u32 attr, u32 vplSize, u32 optPtr)
-{
+SceUID sceKernelCreateVpl(const char *name, int partition, u32 attr, u32 vplSize, u32 optPtr) {
if (!name)
- {
- WARN_LOG_REPORT(SCEKERNEL, "%08x=sceKernelCreateVpl(): invalid name", SCE_KERNEL_ERROR_ERROR);
- return SCE_KERNEL_ERROR_ERROR;
- }
+ return hleLogWarning(SCEKERNEL, SCE_KERNEL_ERROR_ERROR, "invalid name");
if (partition < 1 || partition > 9 || partition == 7)
- {
- WARN_LOG_REPORT(SCEKERNEL, "%08x=sceKernelCreateVpl(): invalid partition %d", SCE_KERNEL_ERROR_ILLEGAL_ARGUMENT, partition);
- return SCE_KERNEL_ERROR_ILLEGAL_ARGUMENT;
- }
- // We only support user right now.
- if (partition != 2 && partition != 6)
- {
- WARN_LOG_REPORT(SCEKERNEL, "%08x=sceKernelCreateVpl(): invalid partition %d", SCE_KERNEL_ERROR_ILLEGAL_PERM, partition);
- return SCE_KERNEL_ERROR_ILLEGAL_PERM;
- }
+ return hleLogWarning(SCEKERNEL, SCE_KERNEL_ERROR_ILLEGAL_ARGUMENT, "invalid partition %d", partition);
+
+ BlockAllocator *allocator = BlockAllocatorFromID(partition);
+ if (allocator == nullptr)
+ return hleLogWarning(SCEKERNEL, SCE_KERNEL_ERROR_ILLEGAL_PERM, "invalid partition %d", partition);
+
if (((attr & ~PSP_VPL_ATTR_KNOWN) & ~0xFF) != 0)
- {
- WARN_LOG_REPORT(SCEKERNEL, "%08x=sceKernelCreateVpl(): invalid attr parameter: %08x", SCE_KERNEL_ERROR_ILLEGAL_ATTR, attr);
- return SCE_KERNEL_ERROR_ILLEGAL_ATTR;
- }
+ return hleLogWarning(SCEKERNEL, SCE_KERNEL_ERROR_ILLEGAL_ATTR, "invalid attr parameter: %08x", attr);
if (vplSize == 0)
- {
- WARN_LOG_REPORT(SCEKERNEL, "%08x=sceKernelCreateVpl(): invalid size", SCE_KERNEL_ERROR_ILLEGAL_MEMSIZE);
- return SCE_KERNEL_ERROR_ILLEGAL_MEMSIZE;
- }
+ return hleLogWarning(SCEKERNEL, SCE_KERNEL_ERROR_ILLEGAL_MEMSIZE, "invalid size");
// Block Allocator seems to A-OK this, let's stop it here.
if (vplSize >= 0x80000000)
- {
- WARN_LOG_REPORT(SCEKERNEL, "%08x=sceKernelCreateVpl(): way too big size", SCE_KERNEL_ERROR_NO_MEMORY);
- return SCE_KERNEL_ERROR_NO_MEMORY;
- }
+ return hleLogWarning(SCEKERNEL, SCE_KERNEL_ERROR_NO_MEMORY, "way too big size");
// Can't have that little space in a Vpl, sorry.
if (vplSize <= 0x30)
@@ -1493,12 +1506,9 @@ SceUID sceKernelCreateVpl(const char *name, int partition, u32 attr, u32 vplSize
// We ignore the upalign to 256 and do it ourselves by 8.
u32 allocSize = vplSize;
- u32 memBlockPtr = userMemory.Alloc(allocSize, (attr & PSP_VPL_ATTR_HIGHMEM) != 0, "VPL");
+ u32 memBlockPtr = allocator->Alloc(allocSize, (attr & PSP_VPL_ATTR_HIGHMEM) != 0, "VPL");
if (memBlockPtr == (u32)-1)
- {
- ERROR_LOG(SCEKERNEL, "sceKernelCreateVpl(): Failed to allocate %i bytes of pool data", vplSize);
- return SCE_KERNEL_ERROR_NO_MEMORY;
- }
+ return hleLogError(SCEKERNEL, SCE_KERNEL_ERROR_NO_MEMORY, "failed to allocate %i bytes of pool data", vplSize);
VPL *vpl = new VPL;
SceUID id = kernelObjects.Create(vpl);
@@ -1542,7 +1552,10 @@ int sceKernelDeleteVpl(SceUID uid)
if (wokeThreads)
hleReSchedule("vpl deleted");
- userMemory.Free(vpl->address);
+ BlockAllocator *alloc = BlockAllocatorFromAddr(vpl->address);
+ _assert_msg_(alloc != nullptr, "Should always have a valid allocator/address");
+ if (alloc)
+ alloc->Free(vpl->address);
kernelObjects.Destroy(uid);
return 0;
}
@@ -2044,29 +2057,17 @@ void __KernelTlsplThreadEnd(SceUID threadID)
tlsplThreadEndChecks.erase(locked.first, locked.second);
}
-SceUID sceKernelCreateTlspl(const char *name, u32 partition, u32 attr, u32 blockSize, u32 count, u32 optionsPtr)
-{
+SceUID sceKernelCreateTlspl(const char *name, u32 partition, u32 attr, u32 blockSize, u32 count, u32 optionsPtr) {
if (!name)
- {
- WARN_LOG_REPORT(SCEKERNEL, "%08x=sceKernelCreateTlspl(): invalid name", SCE_KERNEL_ERROR_NO_MEMORY);
- return SCE_KERNEL_ERROR_NO_MEMORY;
- }
+ return hleLogWarning(SCEKERNEL, SCE_KERNEL_ERROR_NO_MEMORY, "invalid name");
if ((attr & ~PSP_TLSPL_ATTR_KNOWN) >= 0x100)
- {
- WARN_LOG_REPORT(SCEKERNEL, "%08x=sceKernelCreateTlspl(): invalid attr parameter: %08x", SCE_KERNEL_ERROR_ILLEGAL_ATTR, attr);
- return SCE_KERNEL_ERROR_ILLEGAL_ATTR;
- }
+ return hleLogWarning(SCEKERNEL, SCE_KERNEL_ERROR_ILLEGAL_ATTR, "invalid attr parameter: %08x", attr);
if (partition < 1 || partition > 9 || partition == 7)
- {
- WARN_LOG_REPORT(SCEKERNEL, "%08x=sceKernelCreateTlspl(): invalid partition %d", SCE_KERNEL_ERROR_ILLEGAL_ARGUMENT, partition);
- return SCE_KERNEL_ERROR_ILLEGAL_ARGUMENT;
- }
- // We only support user right now.
- if (partition != 2 && partition != 6)
- {
- WARN_LOG_REPORT(SCEKERNEL, "%08x=sceKernelCreateTlspl(): invalid partition %d", SCE_KERNEL_ERROR_ILLEGAL_PERM, partition);
- return SCE_KERNEL_ERROR_ILLEGAL_PERM;
- }
+ return hleLogWarning(SCEKERNEL, SCE_KERNEL_ERROR_ILLEGAL_ARGUMENT, "invalid partition %d", partition);
+
+ BlockAllocator *allocator = BlockAllocatorFromID(partition);
+ if (allocator == nullptr)
+ return hleLogWarning(SCEKERNEL, SCE_KERNEL_ERROR_ILLEGAL_PERM, "invalid partition %x", partition);
// There's probably a simpler way to get this same basic formula...
// This is based on results from a PSP.
@@ -2076,41 +2077,29 @@ SceUID sceKernelCreateTlspl(const char *name, u32 partition, u32 attr, u32 block
if (!illegalMemSize && (u64) count >= 0x100000000ULL / (((u64) blockSize + 3ULL) & ~3ULL))
illegalMemSize = true;
if (illegalMemSize)
- {
- WARN_LOG_REPORT(SCEKERNEL, "%08x=sceKernelCreateTlspl(): invalid blockSize/count", SCE_KERNEL_ERROR_ILLEGAL_MEMSIZE);
- return SCE_KERNEL_ERROR_ILLEGAL_MEMSIZE;
- }
+ return hleLogWarning(SCEKERNEL, SCE_KERNEL_ERROR_ILLEGAL_MEMSIZE, "invalid blockSize/count");
int index = -1;
- for (int i = 0; i < TLSPL_NUM_INDEXES; ++i)
- if (tlsplUsedIndexes[i] == false)
- {
+ for (int i = 0; i < TLSPL_NUM_INDEXES; ++i) {
+ if (tlsplUsedIndexes[i] == false) {
index = i;
break;
}
+ }
if (index == -1)
- {
- WARN_LOG_REPORT(SCEKERNEL, "%08x=sceKernelCreateTlspl(): ran out of indexes for TLS pools", PSP_ERROR_TOO_MANY_TLSPL);
- return PSP_ERROR_TOO_MANY_TLSPL;
- }
+ return hleLogWarning(SCEKERNEL, PSP_ERROR_TOO_MANY_TLSPL, "ran out of indexes for TLS pools");
// Unless otherwise specified, we align to 4 bytes (a mips word.)
u32 alignment = 4;
- if (optionsPtr != 0)
- {
- u32 size = Memory::Read_U32(optionsPtr);
- if (size > 8)
- WARN_LOG_REPORT(SCEKERNEL, "sceKernelCreateTlspl(%s) unsupported options parameter, size = %d", name, size);
+ if (Memory::IsValidRange(optionsPtr, 4)) {
+ u32 size = Memory::ReadUnchecked_U32(optionsPtr);
if (size >= 8)
alignment = Memory::Read_U32(optionsPtr + 4);
// Note that 0 intentionally is allowed.
if ((alignment & (alignment - 1)) != 0)
- {
- ERROR_LOG_REPORT(SCEKERNEL, "sceKernelCreateTlspl(%s): alignment is not a power of 2: %d", name, alignment);
- return SCE_KERNEL_ERROR_ILLEGAL_ARGUMENT;
- }
+ return hleLogError(SCEKERNEL, SCE_KERNEL_ERROR_ILLEGAL_ARGUMENT, "alignment is not a power of 2: %d", alignment);
// This goes for 0, 1, and 2. Can't have less than 4 byte alignment.
if (alignment < 4)
alignment = 4;
@@ -2120,16 +2109,13 @@ SceUID sceKernelCreateTlspl(const char *name, u32 partition, u32 attr, u32 block
u32 alignedSize = (blockSize + alignment - 1) & ~(alignment - 1);
u32 totalSize = alignedSize * count;
- u32 blockPtr = userMemory.Alloc(totalSize, (attr & PSP_TLSPL_ATTR_HIGHMEM) != 0, name);
+ u32 blockPtr = allocator->Alloc(totalSize, (attr & PSP_TLSPL_ATTR_HIGHMEM) != 0, name);
#ifdef _DEBUG
- userMemory.ListBlocks();
+ allocator->ListBlocks();
#endif
- if (blockPtr == (u32) -1)
- {
- ERROR_LOG(SCEKERNEL, "%08x=sceKernelCreateTlspl(%s, %d, %08x, %d, %d, %08x): failed to allocate memory", SCE_KERNEL_ERROR_NO_MEMORY, name, partition, attr, blockSize, count, optionsPtr);
- return SCE_KERNEL_ERROR_NO_MEMORY;
- }
+ if (blockPtr == (u32)-1)
+ return hleLogError(SCEKERNEL, SCE_KERNEL_ERROR_NO_MEMORY, "failed to allocate memory");
TLSPL *tls = new TLSPL();
SceUID id = kernelObjects.Create(tls);
@@ -2148,9 +2134,7 @@ SceUID sceKernelCreateTlspl(const char *name, u32 partition, u32 attr, u32 block
tls->alignment = alignment;
tls->usage.resize(count, 0);
- WARN_LOG(SCEKERNEL, "%08x=sceKernelCreateTlspl(%s, %d, %08x, %d, %d, %08x)", id, name, partition, attr, blockSize, count, optionsPtr);
-
- return id;
+ return hleLogSuccessInfoI(SCEKERNEL, id);
}
int sceKernelDeleteTlspl(SceUID uid)
@@ -2178,7 +2162,10 @@ int sceKernelDeleteTlspl(SceUID uid)
HLEKernel::ResumeFromWait(threadID, WAITTYPE_TLSPL, uid, 0);
hleReSchedule("deleted tlspl");
- userMemory.Free(tls->address);
+ BlockAllocator *allocator = BlockAllocatorFromAddr(tls->address);
+ _assert_msg_(allocator != nullptr, "Should always have a valid allocator/address");
+ if (allocator)
+ allocator->Free(tls->address);
tlsplUsedIndexes[tls->ntls.index] = false;
kernelObjects.Destroy(uid);
}
diff --git a/Core/HLE/sceKernelMemory.h b/Core/HLE/sceKernelMemory.h
index 29d7356d00..b217974170 100644
--- a/Core/HLE/sceKernelMemory.h
+++ b/Core/HLE/sceKernelMemory.h
@@ -40,6 +40,10 @@ KernelObject *__KernelMemoryVPLObject();
KernelObject *__KernelMemoryPMBObject();
KernelObject *__KernelTlsplObject();
+BlockAllocator *BlockAllocatorFromID(int id);
+int BlockAllocatorToID(const BlockAllocator *alloc);
+BlockAllocator *BlockAllocatorFromAddr(u32 addr);
+
SceUID sceKernelCreateVpl(const char *name, int partition, u32 attr, u32 vplSize, u32 optPtr);
int sceKernelDeleteVpl(SceUID uid);
int sceKernelAllocateVpl(SceUID uid, u32 size, u32 addrPtr, u32 timeoutPtr);
diff --git a/Core/HLE/sceKernelMsgPipe.cpp b/Core/HLE/sceKernelMsgPipe.cpp
index b16e8060e2..0ff34c6290 100644
--- a/Core/HLE/sceKernelMsgPipe.cpp
+++ b/Core/HLE/sceKernelMsgPipe.cpp
@@ -140,10 +140,13 @@ struct MsgPipe : public KernelObject
int GetIDType() const override { return SCE_KERNEL_TMID_Mpipe; }
MsgPipe() : buffer(0) {}
- ~MsgPipe()
- {
- if (buffer != 0)
- userMemory.Free(buffer);
+ ~MsgPipe() {
+ if (buffer != 0) {
+ BlockAllocator *alloc = BlockAllocatorFromAddr(buffer);
+ _assert_msg_(alloc != nullptr, "Should always have a valid allocator/address");
+ if (alloc)
+ alloc->Free(buffer);
+ }
}
u32 GetUsedSize()
@@ -667,41 +670,26 @@ void __KernelMsgPipeDoState(PointerWrap &p)
CoreTiming::RestoreRegisterEvent(waitTimer, "MsgPipeTimeout", __KernelMsgPipeTimeout);
}
-int sceKernelCreateMsgPipe(const char *name, int partition, u32 attr, u32 size, u32 optionsPtr)
-{
+int sceKernelCreateMsgPipe(const char *name, int partition, u32 attr, u32 size, u32 optionsPtr) {
if (!name)
- {
- WARN_LOG_REPORT(SCEKERNEL, "%08x=sceKernelCreateMsgPipe(): invalid name", SCE_KERNEL_ERROR_NO_MEMORY);
- return SCE_KERNEL_ERROR_NO_MEMORY;
- }
+ return hleLogWarning(SCEKERNEL, SCE_KERNEL_ERROR_NO_MEMORY, "invalid name");
if (partition < 1 || partition > 9 || partition == 7)
- {
- WARN_LOG_REPORT(SCEKERNEL, "%08x=sceKernelCreateMsgPipe(): invalid partition %d", SCE_KERNEL_ERROR_ILLEGAL_ARGUMENT, partition);
- return SCE_KERNEL_ERROR_ILLEGAL_ARGUMENT;
- }
- // We only support user right now.
- if (partition != 2 && partition != 6)
- {
- WARN_LOG_REPORT(SCEKERNEL, "%08x=sceKernelCreateMsgPipe(): invalid partition %d", SCE_KERNEL_ERROR_ILLEGAL_PERM, partition);
- return SCE_KERNEL_ERROR_ILLEGAL_PERM;
- }
+ return hleLogWarning(SCEKERNEL, SCE_KERNEL_ERROR_ILLEGAL_ARGUMENT, "invalid partition %d", partition);
+
+ BlockAllocator *allocator = BlockAllocatorFromID(partition);
+ if (allocator == nullptr)
+ return hleLogWarning(SCEKERNEL, SCE_KERNEL_ERROR_ILLEGAL_PERM, "invalid partition %d", partition);
+
if ((attr & ~SCE_KERNEL_MPA_KNOWN) >= 0x100)
- {
- WARN_LOG_REPORT(SCEKERNEL, "%08x=sceKernelCreateEventFlag(%s): invalid attr parameter: %08x", SCE_KERNEL_ERROR_ILLEGAL_ATTR, name, attr);
- return SCE_KERNEL_ERROR_ILLEGAL_ATTR;
- }
+ return hleLogWarning(SCEKERNEL, SCE_KERNEL_ERROR_ILLEGAL_ATTR, "invalid attr parameter: %08x", attr);
u32 memBlockPtr = 0;
- if (size != 0)
- {
+ if (size != 0) {
// We ignore the upalign to 256.
u32 allocSize = size;
- memBlockPtr = userMemory.Alloc(allocSize, (attr & SCE_KERNEL_MPA_HIGHMEM) != 0, "MsgPipe");
+ memBlockPtr = allocator->Alloc(allocSize, (attr & SCE_KERNEL_MPA_HIGHMEM) != 0, "MsgPipe");
if (memBlockPtr == (u32)-1)
- {
- ERROR_LOG(SCEKERNEL, "%08x=sceKernelCreateEventFlag(%s): Failed to allocate %i bytes for buffer", SCE_KERNEL_ERROR_NO_MEMORY, name, size);
- return SCE_KERNEL_ERROR_NO_MEMORY;
- }
+ return hleLogError(SCEKERNEL, SCE_KERNEL_ERROR_NO_MEMORY, "failed to allocate %i bytes for buffer", size);
}
MsgPipe *m = new MsgPipe();
diff --git a/Core/HLE/sceUtility.cpp b/Core/HLE/sceUtility.cpp
index e6a5124600..f1d7f73d0b 100644
--- a/Core/HLE/sceUtility.cpp
+++ b/Core/HLE/sceUtility.cpp
@@ -744,11 +744,14 @@ static int sceUtilityGamedataInstallInitStart(u32 paramsAddr) {
}
ActivateDialog(UtilityDialogType::GAMEDATAINSTALL);
- return hleLogSuccessInfoX(SCEUTILITY, gamedataInstallDialog->Init(paramsAddr));
+ int result = gamedataInstallDialog->Init(paramsAddr);
+ if (result < 0)
+ DeactivateDialog();
+ return hleLogSuccessInfoX(SCEUTILITY, result);
}
static int sceUtilityGamedataInstallShutdownStart() {
- if (currentDialogType != UtilityDialogType::GAMEDATAINSTALL) {
+ if (!currentDialogActive || currentDialogType != UtilityDialogType::GAMEDATAINSTALL) {
return hleLogWarning(SCEUTILITY, SCE_ERROR_UTILITY_WRONG_TYPE, "wrong dialog type");
}
@@ -757,7 +760,7 @@ static int sceUtilityGamedataInstallShutdownStart() {
}
static int sceUtilityGamedataInstallUpdate(int animSpeed) {
- if (currentDialogType != UtilityDialogType::GAMEDATAINSTALL) {
+ if (!currentDialogActive || currentDialogType != UtilityDialogType::GAMEDATAINSTALL) {
return hleLogWarning(SCEUTILITY, SCE_ERROR_UTILITY_WRONG_TYPE, "wrong dialog type");
}
@@ -765,8 +768,9 @@ static int sceUtilityGamedataInstallUpdate(int animSpeed) {
}
static int sceUtilityGamedataInstallGetStatus() {
- if (currentDialogType != UtilityDialogType::GAMEDATAINSTALL) {
+ if (!currentDialogActive || currentDialogType != UtilityDialogType::GAMEDATAINSTALL) {
// This is called incorrectly all the time by some games. So let's not bother warning.
+ hleEatCycles(200);
return hleLogDebug(SCEUTILITY, SCE_ERROR_UTILITY_WRONG_TYPE, "wrong dialog type");
}
@@ -776,7 +780,7 @@ static int sceUtilityGamedataInstallGetStatus() {
}
static int sceUtilityGamedataInstallAbort() {
- if (currentDialogType != UtilityDialogType::GAMEDATAINSTALL) {
+ if (!currentDialogActive || currentDialogType != UtilityDialogType::GAMEDATAINSTALL) {
return hleLogWarning(SCEUTILITY, SCE_ERROR_UTILITY_WRONG_TYPE, "wrong dialog type");
}
diff --git a/Core/System.cpp b/Core/System.cpp
index 29b1765fb0..9c7519edd6 100644
--- a/Core/System.cpp
+++ b/Core/System.cpp
@@ -413,11 +413,11 @@ bool PSP_InitStart(const CoreParameter &coreParam, std::string *error_string) {
}
#if defined(_WIN32) && PPSSPP_ARCH(AMD64)
- INFO_LOG(BOOT, "PPSSPP %s Windows 64 bit", PPSSPP_GIT_VERSION);
+ NOTICE_LOG(BOOT, "PPSSPP %s Windows 64 bit", PPSSPP_GIT_VERSION);
#elif defined(_WIN32) && !PPSSPP_ARCH(AMD64)
- INFO_LOG(BOOT, "PPSSPP %s Windows 32 bit", PPSSPP_GIT_VERSION);
+ NOTICE_LOG(BOOT, "PPSSPP %s Windows 32 bit", PPSSPP_GIT_VERSION);
#else
- INFO_LOG(BOOT, "PPSSPP %s", PPSSPP_GIT_VERSION);
+ NOTICE_LOG(BOOT, "PPSSPP %s", PPSSPP_GIT_VERSION);
#endif
Core_NotifyLifecycle(CoreLifecycle::STARTING);
diff --git a/GPU/Common/DrawEngineCommon.cpp b/GPU/Common/DrawEngineCommon.cpp
index d361458111..5f4256158c 100644
--- a/GPU/Common/DrawEngineCommon.cpp
+++ b/GPU/Common/DrawEngineCommon.cpp
@@ -19,6 +19,7 @@
#include "Common/Data/Convert/ColorConv.h"
#include "Common/Profiler/Profiler.h"
+#include "Common/LogReporting.h"
#include "Core/Config.h"
#include "GPU/Common/DrawEngineCommon.h"
#include "GPU/Common/SplineCommon.h"
@@ -188,6 +189,57 @@ u32 DrawEngineCommon::NormalizeVertices(u8 *outPtr, u8 *bufPtr, const u8 *inPtr,
return DrawEngineCommon::NormalizeVertices(outPtr, bufPtr, inPtr, dec, lowerBound, upperBound, vertType);
}
+void DrawEngineCommon::DispatchSubmitImm(GEPrimitiveType prim, TransformedVertex *buffer, int vertexCount, int cullMode, bool continuation) {
+ // Instead of plumbing through properly (we'd need to inject these pretransformed vertices in the middle
+ // of SoftwareTransform(), which would take a lot of refactoring), we'll cheat and just turn these into
+ // through vertices.
+ // Since the only known use is Thrillville and it only uses it to clear, we just use color and pos.
+ struct ImmVertex {
+ float uv[2];
+ uint32_t color;
+ float xyz[3];
+ };
+ std::vector temp;
+ temp.resize(vertexCount);
+ uint32_t color1Used = 0;
+ for (int i = 0; i < vertexCount; i++) {
+ // Since we're sending through, scale back up to w/h.
+ temp[i].uv[0] = buffer[i].u * gstate.getTextureWidth(0);
+ temp[i].uv[1] = buffer[i].v * gstate.getTextureHeight(0);
+ temp[i].color = buffer[i].color0_32;
+ temp[i].xyz[0] = buffer[i].pos[0];
+ temp[i].xyz[1] = buffer[i].pos[1];
+ temp[i].xyz[2] = buffer[i].pos[2];
+ color1Used |= buffer[i].color1_32;
+ }
+ int vtype = GE_VTYPE_TC_FLOAT | GE_VTYPE_POS_FLOAT | GE_VTYPE_COL_8888 | GE_VTYPE_THROUGH;
+ // TODO: Handle fog and secondary color somehow?
+
+ if (gstate.isFogEnabled() && !gstate.isModeThrough()) {
+ WARN_LOG_REPORT_ONCE(geimmfog, G3D, "Imm vertex used fog");
+ }
+ if (color1Used != 0 && gstate.isUsingSecondaryColor() && !gstate.isModeThrough()) {
+ WARN_LOG_REPORT_ONCE(geimmcolor1, G3D, "Imm vertex used secondary color");
+ }
+
+ bool prevThrough = gstate.isModeThrough();
+ // Code checks this reg directly, not just the vtype ID.
+ if (!prevThrough) {
+ gstate.vertType |= GE_VTYPE_THROUGH;
+ gstate_c.Dirty(DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE | DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_UVSCALEOFFSET | DIRTY_CULLRANGE);
+ }
+
+ int bytesRead;
+ uint32_t vertTypeID = GetVertTypeID(vtype, 0);
+ SubmitPrim(&temp[0], nullptr, prim, vertexCount, vertTypeID, cullMode, &bytesRead);
+ DispatchFlush();
+
+ if (!prevThrough) {
+ gstate.vertType &= ~GE_VTYPE_THROUGH;
+ gstate_c.Dirty(DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE | DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_UVSCALEOFFSET | DIRTY_CULLRANGE);
+ }
+}
+
// This code has plenty of potential for optimization.
//
// It does the simplest and safest test possible: If all points of a bbox is outside a single of
@@ -484,12 +536,12 @@ u32 DrawEngineCommon::NormalizeVertices(u8 *outPtr, u8 *bufPtr, const u8 *inPtr,
return GE_VTYPE_TC_FLOAT | GE_VTYPE_COL_8888 | GE_VTYPE_NRM_FLOAT | GE_VTYPE_POS_FLOAT | (vertType & (GE_VTYPE_IDX_MASK | GE_VTYPE_THROUGH));
}
-void DrawEngineCommon::ApplyFramebufferRead(bool *fboTexNeedsBind) {
+void DrawEngineCommon::ApplyFramebufferRead(FBOTexState *fboTexState) {
if (gstate_c.Supports(GPU_SUPPORTS_ANY_FRAMEBUFFER_FETCH)) {
- *fboTexNeedsBind = false;
+ *fboTexState = FBO_TEX_READ_FRAMEBUFFER;
} else {
gpuStats.numCopiesForShaderBlend++;
- *fboTexNeedsBind = true;
+ *fboTexState = FBO_TEX_COPY_BIND_TEX;
}
gstate_c.Dirty(DIRTY_SHADERBLEND);
diff --git a/GPU/Common/DrawEngineCommon.h b/GPU/Common/DrawEngineCommon.h
index a8997454d9..40c397649b 100644
--- a/GPU/Common/DrawEngineCommon.h
+++ b/GPU/Common/DrawEngineCommon.h
@@ -46,6 +46,12 @@ enum {
TEX_SLOT_SPLINE_WEIGHTS_V = 6,
};
+enum FBOTexState {
+ FBO_TEX_NONE,
+ FBO_TEX_COPY_BIND_TEX,
+ FBO_TEX_READ_FRAMEBUFFER,
+};
+
inline uint32_t GetVertTypeID(uint32_t vertType, int uvGenMode) {
// As the decoder depends on the UVGenMode when we use UV prescale, we simply mash it
// into the top of the verttype where there are unused bits.
@@ -84,10 +90,7 @@ public:
SubmitPrim(verts, inds, prim, vertexCount, vertTypeID, cullMode, bytesRead);
}
- virtual void DispatchSubmitImm(const void *verts, const void *inds, GEPrimitiveType prim, int vertexCount, u32 vertTypeID, int cullMode, int *bytesRead) {
- SubmitPrim(verts, inds, prim, vertexCount, vertTypeID, cullMode, bytesRead);
- DispatchFlush();
- }
+ virtual void DispatchSubmitImm(GEPrimitiveType prim, TransformedVertex *buffer, int vertexCount, int cullMode, bool continuation);
bool TestBoundingBox(const void* control_points, int vertexCount, u32 vertType, int *bytesRead);
@@ -130,7 +133,7 @@ protected:
// Vertex decoding
void DecodeVertsStep(u8 *dest, int &i, int &decodedVerts);
- void ApplyFramebufferRead(bool *fboTexNeedsBind);
+ void ApplyFramebufferRead(FBOTexState *fboTexState);
inline int IndexSize(u32 vtype) const {
const u32 indexType = (vtype & GE_VTYPE_IDX_MASK);
diff --git a/GPU/Common/FragmentShaderGenerator.cpp b/GPU/Common/FragmentShaderGenerator.cpp
index d37b4a772d..e2088731af 100644
--- a/GPU/Common/FragmentShaderGenerator.cpp
+++ b/GPU/Common/FragmentShaderGenerator.cpp
@@ -134,10 +134,12 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu
GELogicOp replaceLogicOpType = isModeClear ? GE_LOGIC_COPY : (GELogicOp)id.Bits(FS_BIT_REPLACE_LOGIC_OP, 4);
bool replaceLogicOp = replaceLogicOpType != GE_LOGIC_COPY && compat.bitwiseOps;
- bool readFramebuffer = replaceBlend == REPLACE_BLEND_READ_FRAMEBUFFER || colorWriteMask || replaceLogicOp;
- bool readFramebufferTex = readFramebuffer && !gstate_c.Supports(GPU_SUPPORTS_ANY_FRAMEBUFFER_FETCH);
+ bool needFramebufferRead = replaceBlend == REPLACE_BLEND_READ_FRAMEBUFFER || colorWriteMask || replaceLogicOp;
- bool needFragCoord = readFramebuffer || gstate_c.Supports(GPU_ROUND_FRAGMENT_DEPTH_TO_16BIT);
+ bool fetchFramebuffer = needFramebufferRead && gstate_c.Supports(GPU_SUPPORTS_ANY_FRAMEBUFFER_FETCH);
+ bool readFramebufferTex = needFramebufferRead && !gstate_c.Supports(GPU_SUPPORTS_ANY_FRAMEBUFFER_FETCH);
+
+ bool needFragCoord = readFramebufferTex || gstate_c.Supports(GPU_ROUND_FRAGMENT_DEPTH_TO_16BIT);
bool writeDepth = gstate_c.Supports(GPU_ROUND_FRAGMENT_DEPTH_TO_16BIT);
if (shaderDepalMode != ShaderDepalMode::OFF && !doTexture) {
@@ -157,6 +159,11 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu
if (readFramebufferTex) {
WRITE(p, "layout (binding = 1) uniform sampler2D fbotex;\n");
+ } else if (fetchFramebuffer) {
+ WRITE(p, "layout (input_attachment_index = 0, binding = 9) uniform subpassInput inputColor;\n");
+ if (fragmentShaderFlags) {
+ *fragmentShaderFlags |= FragmentShaderFlags::INPUT_ATTACHMENT;
+ }
}
if (shaderDepalMode != ShaderDepalMode::OFF) {
@@ -416,7 +423,7 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu
if (!strcmp(compat.fragColor0, "fragColor0")) {
const char *qualifierColor0 = "out";
- if (readFramebuffer && compat.lastFragData && !strcmp(compat.lastFragData, compat.fragColor0)) {
+ if (fetchFramebuffer && compat.lastFragData && !strcmp(compat.lastFragData, compat.fragColor0)) {
qualifierColor0 = "inout";
}
// Output the output color definitions.
@@ -492,20 +499,26 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu
}
// Two things read from the old framebuffer - shader replacement blending and bit-level masking.
- if (readFramebuffer) {
+ if (readFramebufferTex) {
if (compat.shaderLanguage == HLSL_D3D11) {
WRITE(p, " vec4 destColor = fbotex.Load(int3((int)gl_FragCoord.x, (int)gl_FragCoord.y, 0));\n");
} else if (compat.shaderLanguage == HLSL_D3D9) {
WRITE(p, " vec4 destColor = tex2D(fbotex, gl_FragCoord.xy * u_fbotexSize.xy);\n", compat.texture);
- } else if (gstate_c.Supports(GPU_SUPPORTS_ANY_FRAMEBUFFER_FETCH)) {
- // If we have EXT_shader_framebuffer_fetch / ARM_shader_framebuffer_fetch, we skip the blit.
- // We can just read the prev value more directly.
- WRITE(p, " lowp vec4 destColor = %s;\n", compat.lastFragData);
} else if (!compat.texelFetch) {
WRITE(p, " lowp vec4 destColor = %s(fbotex, gl_FragCoord.xy * u_fbotexSize.xy);\n", compat.texture);
} else {
WRITE(p, " lowp vec4 destColor = %s(fbotex, ivec2(gl_FragCoord.x, gl_FragCoord.y), 0);\n", compat.texelFetch);
}
+ } else if (fetchFramebuffer) {
+ // If we have EXT_shader_framebuffer_fetch / ARM_shader_framebuffer_fetch, we skip the blit.
+ // We can just read the prev value more directly.
+ if (compat.shaderLanguage == GLSL_3xx) {
+ WRITE(p, " lowp vec4 destColor = %s;\n", compat.lastFragData);
+ } else if (compat.shaderLanguage == GLSL_VULKAN) {
+ WRITE(p, " lowp vec4 destColor = subpassLoad(inputColor);\n", compat.lastFragData);
+ } else {
+ _assert_msg_(false, "Need fetch destColor, but not a compatible language");
+ }
}
if (isModeClear) {
diff --git a/GPU/Common/FragmentShaderGenerator.h b/GPU/Common/FragmentShaderGenerator.h
index 85c651cf8b..8f358fa7ff 100644
--- a/GPU/Common/FragmentShaderGenerator.h
+++ b/GPU/Common/FragmentShaderGenerator.h
@@ -42,7 +42,7 @@ struct FShaderID;
// Can technically be deduced from the fragment shader ID, but this is safer.
enum class FragmentShaderFlags : u32 {
- FS_FLAG_INPUT_ATTACHMENT = 1,
+ INPUT_ATTACHMENT = 1,
};
ENUM_CLASS_BITOPS(FragmentShaderFlags);
diff --git a/GPU/Common/GPUStateUtils.cpp b/GPU/Common/GPUStateUtils.cpp
index fb90850cb9..2b8bb84dc1 100644
--- a/GPU/Common/GPUStateUtils.cpp
+++ b/GPU/Common/GPUStateUtils.cpp
@@ -231,6 +231,7 @@ StencilValueType ReplaceAlphaWithStencilType() {
case GE_FORMAT_8888:
case GE_FORMAT_INVALID:
case GE_FORMAT_DEPTH16:
+ case GE_FORMAT_CLUT8:
switch (gstate.getStencilOpZPass()) {
case GE_STENCILOP_REPLACE:
// TODO: Could detect zero here and force ZERO - less uniform updates?
@@ -859,66 +860,63 @@ static inline bool blendColorSimilar(uint32_t a, uint32_t b, int margin = 25) {
static bool SimulateLogicOpIfNeeded(BlendFactor &srcBlend, BlendFactor &dstBlend, BlendEq &blendEq) {
// Note: our shader solution applies logic ops BEFORE blending, not correctly after.
// This is however fine for the most common ones, like CLEAR/NOOP/SET, etc.
- if (!gstate_c.Supports(GPU_SUPPORTS_LOGIC_OP)) {
- if (gstate.isLogicOpEnabled()) {
- switch (gstate.getLogicOp()) {
- case GE_LOGIC_CLEAR:
- srcBlend = BlendFactor::ZERO;
- dstBlend = BlendFactor::ZERO;
- blendEq = BlendEq::ADD;
- return true;
- case GE_LOGIC_AND:
- case GE_LOGIC_AND_REVERSE:
- WARN_LOG_REPORT_ONCE(d3dLogicOpAnd, G3D, "Unsupported AND logic op: %x", gstate.getLogicOp());
- break;
- case GE_LOGIC_COPY:
- // This is the same as off.
- break;
- case GE_LOGIC_COPY_INVERTED:
- // Handled in the shader.
- break;
- case GE_LOGIC_AND_INVERTED:
- case GE_LOGIC_NOR:
- case GE_LOGIC_NAND:
- case GE_LOGIC_EQUIV:
- // Handled in the shader.
- WARN_LOG_REPORT_ONCE(d3dLogicOpAndInverted, G3D, "Attempted invert for logic op: %x", gstate.getLogicOp());
- break;
- case GE_LOGIC_INVERTED:
- srcBlend = BlendFactor::ONE;
- dstBlend = BlendFactor::ONE;
- blendEq = BlendEq::SUBTRACT;
- WARN_LOG_REPORT_ONCE(d3dLogicOpInverted, G3D, "Attempted inverse for logic op: %x", gstate.getLogicOp());
- return true;
- case GE_LOGIC_NOOP:
- srcBlend = BlendFactor::ZERO;
- dstBlend = BlendFactor::ONE;
- blendEq = BlendEq::ADD;
- return true;
- case GE_LOGIC_XOR:
- WARN_LOG_REPORT_ONCE(d3dLogicOpOrXor, G3D, "Unsupported XOR logic op: %x", gstate.getLogicOp());
- break;
- case GE_LOGIC_OR:
- case GE_LOGIC_OR_INVERTED:
- // Inverted in shader.
- srcBlend = BlendFactor::ONE;
- dstBlend = BlendFactor::ONE;
- blendEq = BlendEq::ADD;
- WARN_LOG_REPORT_ONCE(d3dLogicOpOr, G3D, "Attempted or for logic op: %x", gstate.getLogicOp());
- return true;
- case GE_LOGIC_OR_REVERSE:
- WARN_LOG_REPORT_ONCE(d3dLogicOpOrReverse, G3D, "Unsupported OR REVERSE logic op: %x", gstate.getLogicOp());
- break;
- case GE_LOGIC_SET:
- srcBlend = BlendFactor::ONE;
- dstBlend = BlendFactor::ONE;
- blendEq = BlendEq::ADD;
- WARN_LOG_REPORT_ONCE(d3dLogicOpSet, G3D, "Attempted set for logic op: %x", gstate.getLogicOp());
- return true;
- }
+ if (!gstate_c.Supports(GPU_SUPPORTS_LOGIC_OP) && gstate.isLogicOpEnabled()) {
+ switch (gstate.getLogicOp()) {
+ case GE_LOGIC_CLEAR:
+ srcBlend = BlendFactor::ZERO;
+ dstBlend = BlendFactor::ZERO;
+ blendEq = BlendEq::ADD;
+ return true;
+ case GE_LOGIC_AND:
+ case GE_LOGIC_AND_REVERSE:
+ WARN_LOG_REPORT_ONCE(d3dLogicOpAnd, G3D, "Unsupported AND logic op: %x", gstate.getLogicOp());
+ break;
+ case GE_LOGIC_COPY:
+ // This is the same as off.
+ break;
+ case GE_LOGIC_COPY_INVERTED:
+ // Handled in the shader.
+ break;
+ case GE_LOGIC_AND_INVERTED:
+ case GE_LOGIC_NOR:
+ case GE_LOGIC_NAND:
+ case GE_LOGIC_EQUIV:
+ // Handled in the shader.
+ WARN_LOG_REPORT_ONCE(d3dLogicOpAndInverted, G3D, "Attempted invert for logic op: %x", gstate.getLogicOp());
+ break;
+ case GE_LOGIC_INVERTED:
+ srcBlend = BlendFactor::ONE;
+ dstBlend = BlendFactor::ONE;
+ blendEq = BlendEq::SUBTRACT;
+ WARN_LOG_REPORT_ONCE(d3dLogicOpInverted, G3D, "Attempted inverse for logic op: %x", gstate.getLogicOp());
+ return true;
+ case GE_LOGIC_NOOP:
+ srcBlend = BlendFactor::ZERO;
+ dstBlend = BlendFactor::ONE;
+ blendEq = BlendEq::ADD;
+ return true;
+ case GE_LOGIC_XOR:
+ WARN_LOG_REPORT_ONCE(d3dLogicOpOrXor, G3D, "Unsupported XOR logic op: %x", gstate.getLogicOp());
+ break;
+ case GE_LOGIC_OR:
+ case GE_LOGIC_OR_INVERTED:
+ // Inverted in shader.
+ srcBlend = BlendFactor::ONE;
+ dstBlend = BlendFactor::ONE;
+ blendEq = BlendEq::ADD;
+ WARN_LOG_REPORT_ONCE(d3dLogicOpOr, G3D, "Attempted or for logic op: %x", gstate.getLogicOp());
+ return true;
+ case GE_LOGIC_OR_REVERSE:
+ WARN_LOG_REPORT_ONCE(d3dLogicOpOrReverse, G3D, "Unsupported OR REVERSE logic op: %x", gstate.getLogicOp());
+ break;
+ case GE_LOGIC_SET:
+ srcBlend = BlendFactor::ONE;
+ dstBlend = BlendFactor::ONE;
+ blendEq = BlendEq::ADD;
+ WARN_LOG_REPORT_ONCE(d3dLogicOpSet, G3D, "Attempted set for logic op: %x", gstate.getLogicOp());
+ return true;
}
}
-
return false;
}
@@ -1080,6 +1078,12 @@ static void ConvertBlendState(GenericBlendState &blendState, bool forceReplaceBl
case REPLACE_BLEND_NO:
// We may still want to do something about stencil -> alpha.
ApplyStencilReplaceAndLogicOpIgnoreBlend(replaceAlphaWithStencil, blendState);
+
+ if (forceReplaceBlend) {
+ // If this is true, the logic and mask replacements will be applied, at least. In that case,
+ // we should not apply any logic op simulation.
+ blendState.simulateLogicOpType = LOGICOPTYPE_NORMAL;
+ }
return;
case REPLACE_BLEND_BLUE_TO_ALPHA:
diff --git a/GPU/Common/ShaderId.cpp b/GPU/Common/ShaderId.cpp
index 7ca42ccba6..6345816604 100644
--- a/GPU/Common/ShaderId.cpp
+++ b/GPU/Common/ShaderId.cpp
@@ -349,13 +349,10 @@ void ComputeFragmentShaderID(FShaderID *id_out, const ComputedPipelineState &pip
id.SetBit(FS_BIT_COLOR_WRITEMASK, colorWriteMask);
- if (g_Config.bVendorBugChecksEnabled) {
- if (bugs.Has(Draw::Bugs::NO_DEPTH_CANNOT_DISCARD_STENCIL)) {
- id.SetBit(FS_BIT_NO_DEPTH_CANNOT_DISCARD_STENCIL, !IsStencilTestOutputDisabled() && !gstate.isDepthWriteEnabled());
- } else if (bugs.Has(Draw::Bugs::MALI_STENCIL_DISCARD_BUG) && PSP_CoreParameter().compat.flags().MaliDepthStencilBugWorkaround) {
- // Very similar driver bug to the Adreno one, with the same workaround (though might look into if there are cheaper ones!)
- // Keeping the conditions separate since it can probably be made tighter.
- id.SetBit(FS_BIT_NO_DEPTH_CANNOT_DISCARD_STENCIL, !IsStencilTestOutputDisabled() && (!gstate.isDepthTestEnabled() || !gstate.isDepthWriteEnabled()));
+ if (g_Config.bVendorBugChecksEnabled && bugs.Has(Draw::Bugs::NO_DEPTH_CANNOT_DISCARD_STENCIL)) {
+ bool stencilWithoutDepth = !IsStencilTestOutputDisabled() && (!gstate.isDepthTestEnabled() || !gstate.isDepthWriteEnabled());
+ if (stencilWithoutDepth) {
+ id.SetBit(FS_BIT_NO_DEPTH_CANNOT_DISCARD_STENCIL, stencilWithoutDepth);
}
}
}
diff --git a/GPU/Common/StencilCommon.cpp b/GPU/Common/StencilCommon.cpp
index 81749ef678..561c6a2857 100644
--- a/GPU/Common/StencilCommon.cpp
+++ b/GPU/Common/StencilCommon.cpp
@@ -171,6 +171,7 @@ bool FramebufferManagerCommon::PerformStencilUpload(u32 addr, int size, StencilU
break;
case GE_FORMAT_INVALID:
case GE_FORMAT_DEPTH16:
+ case GE_FORMAT_CLUT8:
// Inconceivable.
_assert_(false);
break;
diff --git a/GPU/Common/TextureCacheCommon.cpp b/GPU/Common/TextureCacheCommon.cpp
index 66d70f7c02..127d491010 100644
--- a/GPU/Common/TextureCacheCommon.cpp
+++ b/GPU/Common/TextureCacheCommon.cpp
@@ -37,6 +37,7 @@
#include "GPU/Common/ShaderId.h"
#include "GPU/Common/GPUStateUtils.h"
#include "GPU/Debugger/Debugger.h"
+#include "GPU/Debugger/Record.h"
#include "GPU/GPUCommon.h"
#include "GPU/GPUInterface.h"
#include "GPU/GPUState.h"
@@ -292,11 +293,18 @@ SamplerCacheKey TextureCacheCommon::GetSamplingParams(int maxLevel, const TexCac
SamplerCacheKey TextureCacheCommon::GetFramebufferSamplingParams(u16 bufferWidth, u16 bufferHeight) {
SamplerCacheKey key = GetSamplingParams(0, nullptr);
+ // In case auto max quality was on, restore min filt. Another fix for water in Outrun.
+ if (g_Config.iTexFiltering == TEX_FILTER_AUTO_MAX_QUALITY) {
+ int minFilt = gstate.texfilter & 0x7;
+ key.minFilt = minFilt & 1;
+ }
+
// Kill any mipmapping settings.
key.mipEnable = false;
key.mipFilt = false;
key.aniso = 0.0;
key.maxLevel = 0.0f;
+ key.lodBias = 0.0f;
// Often the framebuffer will not match the texture size. We'll wrap/clamp in the shader in that case.
int w = gstate.getTextureWidth(0);
@@ -1260,14 +1268,17 @@ void TextureCacheCommon::LoadClut(u32 clutAddr, u32 loadBytes) {
// It's possible for a game to load CLUT outside valid memory without crashing, should result in zeroes.
u32 bytes = Memory::ValidSize(clutAddr, loadBytes);
- if (clutRenderAddress_ != 0xFFFFFFFF && PSP_CoreParameter().compat.flags().AllowDownloadCLUT) {
+ bool performDownload = PSP_CoreParameter().compat.flags().AllowDownloadCLUT;
+ if (GPURecord::IsActive())
+ performDownload = true;
+ if (clutRenderAddress_ != 0xFFFFFFFF && performDownload) {
framebufferManager_->DownloadFramebufferForClut(clutRenderAddress_, clutRenderOffset_ + bytes);
Memory::MemcpyUnchecked(clutBufRaw_, clutAddr, bytes);
if (bytes < loadBytes) {
memset((u8 *)clutBufRaw_ + bytes, 0x00, loadBytes - bytes);
}
} else {
- // Here we could check for clutRenderAddres_ != 0xFFFFFFFF and zero the CLUT or something,
+ // Here we could check for clutRenderAddress_ != 0xFFFFFFFF and zero the CLUT or something,
// but choosing not to for now. Though the results of loading the CLUT from RAM here is
// almost certainly going to be bogus.
#ifdef _M_SSE
@@ -1986,6 +1997,9 @@ static bool CanDepalettize(GETextureFormat texFormat, GEBufferFormat bufferForma
return true;
}
break;
+ case GE_FORMAT_CLUT8:
+ // Shouldn't happen here.
+ return false;
}
WARN_LOG(G3D, "Invalid CLUT/framebuffer combination: %s vs %s", GeTextureFormatToString(texFormat), GeBufferFormatToString(bufferFormat));
return false;
diff --git a/GPU/Common/VertexDecoderArm.cpp b/GPU/Common/VertexDecoderArm.cpp
index e3d5b11f6b..2b93b563f8 100644
--- a/GPU/Common/VertexDecoderArm.cpp
+++ b/GPU/Common/VertexDecoderArm.cpp
@@ -872,22 +872,14 @@ void VertexDecoderJitCache::Jit_NormalFloat() {
STMIA(scratchReg, false, 3, tempReg1, tempReg2, tempReg3);
}
-// Through expands into floats, always. Might want to look at changing this.
void VertexDecoderJitCache::Jit_PosS8Through() {
- DEBUG_LOG_REPORT_ONCE(vertexS8Through, G3D, "Using S8 positions in throughmode");
_dbg_assert_msg_(fpScratchReg + 1 == fpScratchReg2, "VertexDecoder fpScratchRegs must be in order.");
_dbg_assert_msg_(fpScratchReg2 + 1 == fpScratchReg3, "VertexDecoder fpScratchRegs must be in order.");
- // TODO: SIMD
- LDRSB(tempReg1, srcReg, dec_->posoff);
- LDRSB(tempReg2, srcReg, dec_->posoff + 1);
- LDRB(tempReg3, srcReg, dec_->posoff + 2);
- static const ARMReg tr[3] = { tempReg1, tempReg2, tempReg3 };
- static const ARMReg fr[3] = { fpScratchReg, fpScratchReg2, fpScratchReg3 };
+ // 8-bit positions in throughmode always decode to 0, depth included.
+ VEOR(neonScratchReg, neonScratchReg, neonScratchReg);
+ VEOR(neonScratchReg2, neonScratchReg, neonScratchReg);
ADD(scratchReg, dstReg, dec_->decFmt.posoff);
- VMOV(neonScratchReg, tempReg1, tempReg2);
- VMOV(neonScratchReg2, tempReg3, tempReg3);
- VCVT(F_32 | I_SIGNED, neonScratchRegQ, neonScratchRegQ);
VST1(F_32, neonScratchReg, scratchReg, 2, ALIGN_NONE);
}
diff --git a/GPU/Common/VertexDecoderArm64.cpp b/GPU/Common/VertexDecoderArm64.cpp
index 5ff1d605f3..0ad04dbe5c 100644
--- a/GPU/Common/VertexDecoderArm64.cpp
+++ b/GPU/Common/VertexDecoderArm64.cpp
@@ -668,15 +668,11 @@ void VertexDecoderJitCache::Jit_PosFloat() {
}
void VertexDecoderJitCache::Jit_PosS8Through() {
- LDRSB(INDEX_UNSIGNED, tempReg1, srcReg, dec_->posoff);
- LDRSB(INDEX_UNSIGNED, tempReg2, srcReg, dec_->posoff + 1);
- LDRB(INDEX_UNSIGNED, tempReg3, srcReg, dec_->posoff + 2);
- fp.SCVTF(fpScratchReg, tempReg1);
- fp.SCVTF(fpScratchReg2, tempReg2);
- fp.SCVTF(fpScratchReg3, tempReg3);
+ // 8-bit positions in throughmode always decode to 0, depth included.
+ fp.EOR(fpScratchReg, fpScratchReg, fpScratchReg);
STR(INDEX_UNSIGNED, fpScratchReg, dstReg, dec_->decFmt.posoff);
- STR(INDEX_UNSIGNED, fpScratchReg2, dstReg, dec_->decFmt.posoff + 4);
- STR(INDEX_UNSIGNED, fpScratchReg3, dstReg, dec_->decFmt.posoff + 8);
+ STR(INDEX_UNSIGNED, fpScratchReg, dstReg, dec_->decFmt.posoff + 4);
+ STR(INDEX_UNSIGNED, fpScratchReg, dstReg, dec_->decFmt.posoff + 8);
}
void VertexDecoderJitCache::Jit_PosS16Through() {
diff --git a/GPU/Common/VertexDecoderCommon.cpp b/GPU/Common/VertexDecoderCommon.cpp
index a9b4a1039a..62eb20ef84 100644
--- a/GPU/Common/VertexDecoderCommon.cpp
+++ b/GPU/Common/VertexDecoderCommon.cpp
@@ -773,14 +773,20 @@ void VertexDecoder::Step_PosFloatSkin() const
Vec3ByMatrix43(pos, fn, skinMatrix);
}
-void VertexDecoder::Step_PosS8Through() const
-{
+void VertexDecoder::Step_PosInvalid() const {
+ // Invalid positions are just culled. Simulate by forcing invalid values.
float *v = (float *)(decoded_ + decFmt.posoff);
- const s8 *sv = (const s8 *)(ptr_ + posoff);
- const u8 *uv = (const u8 *)(ptr_ + posoff);
- v[0] = sv[0];
- v[1] = sv[1];
- v[2] = uv[2];
+ v[0] = std::numeric_limits::infinity();
+ v[1] = std::numeric_limits::infinity();
+ v[2] = std::numeric_limits::infinity();
+}
+
+void VertexDecoder::Step_PosS8Through() const {
+ // 8-bit positions in throughmode always decode to 0, depth included.
+ float *v = (float *)(decoded_ + decFmt.posoff);
+ v[0] = 0;
+ v[1] = 0;
+ v[2] = 0;
}
void VertexDecoder::Step_PosS16Through() const
@@ -1023,35 +1029,35 @@ static const StepFunction nrmstep_morphskin[4] = {
};
static const StepFunction posstep[4] = {
- &VertexDecoder::Step_PosS8,
+ &VertexDecoder::Step_PosInvalid,
&VertexDecoder::Step_PosS8,
&VertexDecoder::Step_PosS16,
&VertexDecoder::Step_PosFloat,
};
static const StepFunction posstep_skin[4] = {
- &VertexDecoder::Step_PosS8Skin,
+ &VertexDecoder::Step_PosInvalid,
&VertexDecoder::Step_PosS8Skin,
&VertexDecoder::Step_PosS16Skin,
&VertexDecoder::Step_PosFloatSkin,
};
static const StepFunction posstep_morph[4] = {
- &VertexDecoder::Step_PosS8Morph,
+ &VertexDecoder::Step_PosInvalid,
&VertexDecoder::Step_PosS8Morph,
&VertexDecoder::Step_PosS16Morph,
&VertexDecoder::Step_PosFloatMorph,
};
static const StepFunction posstep_morph_skin[4] = {
- &VertexDecoder::Step_PosS8MorphSkin,
+ &VertexDecoder::Step_PosInvalid,
&VertexDecoder::Step_PosS8MorphSkin,
&VertexDecoder::Step_PosS16MorphSkin,
&VertexDecoder::Step_PosFloatMorphSkin,
};
static const StepFunction posstep_through[4] = {
- &VertexDecoder::Step_PosS8Through,
+ &VertexDecoder::Step_PosInvalid,
&VertexDecoder::Step_PosS8Through,
&VertexDecoder::Step_PosS16Through,
&VertexDecoder::Step_PosFloatThrough,
@@ -1224,9 +1230,8 @@ void VertexDecoder::SetVertexType(u32 fmt, const VertexDecoderOptions &options,
bool reportNoPos = false;
if (!pos) {
reportNoPos = true;
- pos = 1;
}
- if (pos) { // there's always a position
+ if (pos >= 0) { // there's always a position
size = align(size, posalign[pos]);
posoff = size;
size += possize[pos];
diff --git a/GPU/Common/VertexDecoderCommon.h b/GPU/Common/VertexDecoderCommon.h
index 983f76f9b2..6a06093902 100644
--- a/GPU/Common/VertexDecoderCommon.h
+++ b/GPU/Common/VertexDecoderCommon.h
@@ -433,6 +433,7 @@ public:
void Step_PosS16MorphSkin() const;
void Step_PosFloatMorphSkin() const;
+ void Step_PosInvalid() const;
void Step_PosS8Through() const;
void Step_PosS16Through() const;
void Step_PosFloatThrough() const;
diff --git a/GPU/Common/VertexDecoderX86.cpp b/GPU/Common/VertexDecoderX86.cpp
index d684078989..13aabe2df3 100644
--- a/GPU/Common/VertexDecoderX86.cpp
+++ b/GPU/Common/VertexDecoderX86.cpp
@@ -1345,14 +1345,9 @@ void VertexDecoderJitCache::Jit_NormalFloatSkin() {
// Through expands into floats, always. Might want to look at changing this.
void VertexDecoderJitCache::Jit_PosS8Through() {
- DEBUG_LOG_REPORT_ONCE(vertexS8Through, G3D, "Using S8 positions in throughmode");
// SIMD doesn't really matter since this isn't useful on hardware.
+ XORPS(fpScratchReg, R(fpScratchReg));
for (int i = 0; i < 3; i++) {
- if (i == 2)
- MOVZX(32, 8, tempReg1, MDisp(srcReg, dec_->posoff + i));
- else
- MOVSX(32, 8, tempReg1, MDisp(srcReg, dec_->posoff + i));
- CVTSI2SS(fpScratchReg, R(tempReg1));
MOVSS(MDisp(dstReg, dec_->decFmt.posoff + i * 4), fpScratchReg);
}
}
diff --git a/GPU/Common/VertexShaderGenerator.cpp b/GPU/Common/VertexShaderGenerator.cpp
index ccda9af86b..16cce87d0f 100644
--- a/GPU/Common/VertexShaderGenerator.cpp
+++ b/GPU/Common/VertexShaderGenerator.cpp
@@ -142,10 +142,11 @@ bool GenerateVertexShader(const VShaderID &id, char *buffer, const ShaderLanguag
if (gl_extensions.EXT_gpu_shader4) {
gl_exts.push_back("#extension GL_EXT_gpu_shader4 : enable");
}
- if (gl_extensions.EXT_clip_cull_distance && id.Bit(VS_BIT_VERTEX_RANGE_CULLING)) {
+ bool useClamp = gstate_c.Supports(GPU_SUPPORTS_DEPTH_CLAMP) && !id.Bit(VS_BIT_IS_THROUGH);
+ if (gl_extensions.EXT_clip_cull_distance && (id.Bit(VS_BIT_VERTEX_RANGE_CULLING) || useClamp)) {
gl_exts.push_back("#extension GL_EXT_clip_cull_distance : enable");
}
- if (gl_extensions.APPLE_clip_distance && id.Bit(VS_BIT_VERTEX_RANGE_CULLING)) {
+ if (gl_extensions.APPLE_clip_distance && (id.Bit(VS_BIT_VERTEX_RANGE_CULLING) || useClamp)) {
gl_exts.push_back("#extension GL_APPLE_clip_distance : enable");
}
if (gl_extensions.ARB_cull_distance && id.Bit(VS_BIT_VERTEX_RANGE_CULLING)) {
@@ -227,6 +228,10 @@ bool GenerateVertexShader(const VShaderID &id, char *buffer, const ShaderLanguag
bool texCoordInVec3 = false;
bool vertexRangeCulling = id.Bit(VS_BIT_VERTEX_RANGE_CULLING) && !isModeThrough;
+ bool clipClampedDepth = !isModeThrough && gstate_c.Supports(GPU_SUPPORTS_DEPTH_CLAMP) && gstate_c.Supports(GPU_SUPPORTS_CLIP_DISTANCE);
+ const char *vertexRangeClipSuffix = "[0]";
+ if (vertexRangeCulling && clipClampedDepth)
+ vertexRangeClipSuffix = "[2]";
if (compat.shaderLanguage == GLSL_VULKAN) {
WRITE(p, "\n");
@@ -419,8 +424,15 @@ bool GenerateVertexShader(const VShaderID &id, char *buffer, const ShaderLanguag
WRITE(p, " vec4 gl_Position : POSITION;\n");
} else {
WRITE(p, " vec4 gl_Position : SV_Position;\n");
- if (vertexRangeCulling && gstate_c.Supports(GPU_SUPPORTS_CLIP_DISTANCE)) {
- WRITE(p, " float gl_ClipDistance : SV_ClipDistance0;\n");
+ bool clipRange = vertexRangeCulling && gstate_c.Supports(GPU_SUPPORTS_CLIP_DISTANCE);
+ if (clipClampedDepth && clipRange) {
+ WRITE(p, " float3 gl_ClipDistance : SV_ClipDistance;\n");
+ vertexRangeClipSuffix = ".z";
+ } else if (clipClampedDepth) {
+ WRITE(p, " float2 gl_ClipDistance : SV_ClipDistance;\n");
+ } else if (clipRange) {
+ WRITE(p, " float gl_ClipDistance : SV_ClipDistance;\n");
+ vertexRangeClipSuffix = "";
}
if (vertexRangeCulling && gstate_c.Supports(GPU_SUPPORTS_CULL_DISTANCE)) {
WRITE(p, " float2 gl_CullDistance : SV_CullDistance0;\n");
@@ -1177,8 +1189,37 @@ bool GenerateVertexShader(const VShaderID &id, char *buffer, const ShaderLanguag
WRITE(p, " %sv_fogdepth = (viewPos.z + u_fogcoef.x) * u_fogcoef.y;\n", compat.vsOutPrefix);
}
- if (vertexRangeCulling && !IsVRBuild()) {
+ if (clipClampedDepth || (vertexRangeCulling && !IsVRBuild())) {
WRITE(p, " vec3 projPos = outPos.xyz / outPos.w;\n");
+ }
+
+ if (clipClampedDepth) {
+ const char *clip0 = compat.shaderLanguage == HLSL_D3D11 ? ".x" : "[0]";
+ const char *clip1 = compat.shaderLanguage == HLSL_D3D11 ? ".y" : "[1]";
+ WRITE(p, " mediump float integerZ = projPos.z * u_depthRange.x + u_depthRange.y;\n");
+
+ // This should clip against minz, but only when it's above zero.
+ if (ShaderLanguageIsOpenGL(compat.shaderLanguage)) {
+ // On OpenGL/GLES, these values account for the -1 -> 1 range.
+ WRITE(p, " if (u_depthRange.y - u_depthRange.x >= 1.0) {\n");
+ } else {
+ // Everywhere else, it's 0 -> 1, simpler.
+ WRITE(p, " if (u_depthRange.y >= 1.0) {\n");
+ }
+ WRITE(p, " %sgl_ClipDistance%s = integerZ;\n", compat.vsOutPrefix, clip0);
+ WRITE(p, " } else {\n");
+ WRITE(p, " %sgl_ClipDistance%s = 0.0;\n", compat.vsOutPrefix, clip0);
+ WRITE(p, " }\n");
+
+ // This is similar, but for maxz when it's below 65535.0. -1/0 don't matter here.
+ WRITE(p, " if (u_depthRange.x + u_depthRange.y <= 65534.0) {\n");
+ WRITE(p, " %sgl_ClipDistance%s = 65535.0 - integerZ;\n", compat.vsOutPrefix, clip1);
+ WRITE(p, " } else {\n");
+ WRITE(p, " %sgl_ClipDistance%s = 0.0;\n", compat.vsOutPrefix, clip1);
+ WRITE(p, " }\n");
+ }
+
+ if (vertexRangeCulling && !IsVRBuild()) {
WRITE(p, " float projZ = (projPos.z - u_depthRange.z) * u_depthRange.w;\n");
// Vertex range culling doesn't happen when Z clips, note sign of w is important.
WRITE(p, " if (u_cullRangeMin.w <= 0.0 || projZ * outPos.w > -outPos.w) {\n");
@@ -1194,12 +1235,11 @@ bool GenerateVertexShader(const VShaderID &id, char *buffer, const ShaderLanguag
WRITE(p, " }\n");
WRITE(p, " }\n");
- const char *clip0 = compat.shaderLanguage == HLSL_D3D11 ? "" : "[0]";
const char *cull0 = compat.shaderLanguage == HLSL_D3D11 ? ".x" : "[0]";
const char *cull1 = compat.shaderLanguage == HLSL_D3D11 ? ".y" : "[1]";
if (gstate_c.Supports(GPU_SUPPORTS_CLIP_DISTANCE)) {
// TODO: Not rectangles...
- WRITE(p, " %sgl_ClipDistance%s = projZ * outPos.w + outPos.w;\n", compat.vsOutPrefix, clip0);
+ WRITE(p, " %sgl_ClipDistance%s = projZ * outPos.w + outPos.w;\n", compat.vsOutPrefix, vertexRangeClipSuffix);
}
if (gstate_c.Supports(GPU_SUPPORTS_CULL_DISTANCE)) {
// Cull any triangle fully outside in the same direction when depth clamp enabled.
diff --git a/GPU/D3D11/GPU_D3D11.cpp b/GPU/D3D11/GPU_D3D11.cpp
index 3cfe0c4b37..bfbd12f190 100644
--- a/GPU/D3D11/GPU_D3D11.cpp
+++ b/GPU/D3D11/GPU_D3D11.cpp
@@ -82,7 +82,7 @@ GPU_D3D11::GPU_D3D11(GraphicsContext *gfxCtx, Draw::DrawContext *draw)
// No need to flush before the tex scale/offset commands if we are baking
// the tex scale/offset into the vertices anyway.
UpdateCmdInfo();
- CheckGPUFeatures();
+ gstate_c.featureFlags = CheckGPUFeatures();
BuildReportingInfo();
@@ -100,40 +100,16 @@ GPU_D3D11::~GPU_D3D11() {
stockD3D11.Destroy();
}
-void GPU_D3D11::CheckGPUFeatures() {
- u32 features = 0;
-
- features |= GPU_SUPPORTS_BLEND_MINMAX;
+u32 GPU_D3D11::CheckGPUFeatures() const {
+ u32 features = GPUCommon::CheckGPUFeatures();
// Accurate depth is required because the Direct3D API does not support inverse Z.
// So we cannot incorrectly use the viewport transform as the depth range on Direct3D.
// TODO: Breaks text in PaRappa for some reason?
features |= GPU_SUPPORTS_ACCURATE_DEPTH;
-#ifndef _M_ARM
- // TODO: Do proper feature detection
- features |= GPU_SUPPORTS_ANISOTROPY;
-#endif
-
- features |= GPU_SUPPORTS_DEPTH_TEXTURE;
- features |= GPU_SUPPORTS_TEXTURE_NPOT;
- if (draw_->GetDeviceCaps().dualSourceBlend)
- features |= GPU_SUPPORTS_DUALSOURCE_BLEND;
if (draw_->GetDeviceCaps().depthClampSupported)
features |= GPU_SUPPORTS_DEPTH_CLAMP;
- if (draw_->GetDeviceCaps().clipDistanceSupported)
- features |= GPU_SUPPORTS_CLIP_DISTANCE;
- if (draw_->GetDeviceCaps().cullDistanceSupported)
- features |= GPU_SUPPORTS_CULL_DISTANCE;
- if (!draw_->GetBugs().Has(Draw::Bugs::BROKEN_NAN_IN_CONDITIONAL)) {
- // Ignore the compat setting if clip and cull are both enabled.
- // When supported, we can do the depth side of range culling more correctly.
- const bool supported = draw_->GetDeviceCaps().clipDistanceSupported && draw_->GetDeviceCaps().cullDistanceSupported;
- const bool disabled = PSP_CoreParameter().compat.flags().DisableRangeCulling;
- if (supported || !disabled) {
- features |= GPU_SUPPORTS_VS_RANGE_CULLING;
- }
- }
features |= GPU_SUPPORTS_TEXTURE_FLOAT;
features |= GPU_SUPPORTS_INSTANCE_RENDERING;
@@ -146,10 +122,6 @@ void GPU_D3D11::CheckGPUFeatures() {
features |= GPU_SUPPORTS_16BIT_FORMATS;
}
- if (draw_->GetDeviceCaps().logicOpSupported) {
- features |= GPU_SUPPORTS_LOGIC_OP;
- }
-
if (!g_Config.bHighQualityDepth && (features & GPU_SUPPORTS_ACCURATE_DEPTH) != 0) {
features |= GPU_SCALE_DEPTH_FROM_24BIT_TO_16BIT;
} else if (PSP_CoreParameter().compat.flags().PixelDepthRounding) {
@@ -164,11 +136,7 @@ void GPU_D3D11::CheckGPUFeatures() {
features |= GPU_USE_DEPTH_RANGE_HACK;
}
- if (PSP_CoreParameter().compat.flags().ClearToRAM) {
- features |= GPU_USE_CLEAR_RAM_HACK;
- }
-
- gstate_c.featureFlags = features;
+ return features;
}
// Needs to be called on GPU thread, not reporting thread.
@@ -206,7 +174,7 @@ void GPU_D3D11::BeginHostFrame() {
GPUCommon::BeginHostFrame();
UpdateCmdInfo();
if (resized_) {
- CheckGPUFeatures();
+ gstate_c.featureFlags = CheckGPUFeatures();
framebufferManager_->Resized();
drawEngine_.Resized();
textureCache_->NotifyConfigChanged();
diff --git a/GPU/D3D11/GPU_D3D11.h b/GPU/D3D11/GPU_D3D11.h
index d1e5287782..e45c1ab79d 100644
--- a/GPU/D3D11/GPU_D3D11.h
+++ b/GPU/D3D11/GPU_D3D11.h
@@ -36,7 +36,7 @@ public:
GPU_D3D11(GraphicsContext *gfxCtx, Draw::DrawContext *draw);
~GPU_D3D11();
- void CheckGPUFeatures() override;
+ u32 CheckGPUFeatures() const override;
void PreExecuteOp(u32 op, u32 diff) override;
void ExecuteOp(u32 op, u32 diff) override;
diff --git a/GPU/D3D11/StateMappingD3D11.cpp b/GPU/D3D11/StateMappingD3D11.cpp
index 0cee52a155..f4ce888f7b 100644
--- a/GPU/D3D11/StateMappingD3D11.cpp
+++ b/GPU/D3D11/StateMappingD3D11.cpp
@@ -153,15 +153,16 @@ void DrawEngineD3D11::ApplyDrawState(int prim) {
// We ignore the logicState on D3D since there's no support, the emulation of it is blend-and-shader only.
if (pipelineState_.FramebufferRead()) {
- bool fboTexNeedsBind = false;
- ApplyFramebufferRead(&fboTexNeedsBind);
+ FBOTexState fboTexBindState = FBO_TEX_NONE;
+ ApplyFramebufferRead(&fboTexBindState);
// The shader takes over the responsibility for blending, so recompute.
ApplyStencilReplaceAndLogicOpIgnoreBlend(blendState.replaceAlphaWithStencil, blendState);
- if (fboTexNeedsBind) {
+ if (fboTexBindState == FBO_TEX_COPY_BIND_TEX) {
framebufferManager_->BindFramebufferAsColorTexture(1, framebufferManager_->GetCurrentRenderVFB(), BINDFBCOLOR_MAY_COPY);
// No sampler required, we do a plain Load in the pixel shader.
fboTexBound_ = true;
+ fboTexBindState = FBO_TEX_NONE;
framebufferManager_->RebindFramebuffer("RebindFramebuffer - ApplyDrawState");
// Must dirty blend state here so we re-copy next time. Example: Lunar's spell effects.
diff --git a/GPU/Debugger/Debugger.cpp b/GPU/Debugger/Debugger.cpp
index fb88fdca2f..b505521631 100644
--- a/GPU/Debugger/Debugger.cpp
+++ b/GPU/Debugger/Debugger.cpp
@@ -18,6 +18,7 @@
#include
#include "Common/Log.h"
#include "Common/StringUtils.h"
+#include "Common/TimeUtil.h"
#include "GPU/GPU.h"
#include "GPU/Debugger/Breakpoints.h"
#include "GPU/Debugger/Debugger.h"
@@ -35,6 +36,8 @@ static int primsLastFrame = 0;
static int primsThisFrame = 0;
static int thisFlipNum = 0;
+static double lastStepTime = -1.0;
+
static std::vector> restrictPrimRanges;
static std::string restrictPrimRule;
@@ -56,6 +59,7 @@ void SetActive(bool flag) {
breakNext = BreakNext::NONE;
breakAtCount = -1;
GPUStepping::ResumeFromStepping();
+ lastStepTime = -1.0;
}
}
@@ -79,6 +83,7 @@ void SetBreakNext(BreakNext next) {
GPUBreakpoints::AddCmdBreakpoint(GE_CMD_SPLINE, true);
}
GPUStepping::ResumeFromStepping();
+ lastStepTime = next == BreakNext::NONE ? -1.0 : time_now_d();
}
void SetBreakCount(int c, bool relative) {
@@ -130,7 +135,12 @@ bool NotifyCommand(u32 pc) {
GPUBreakpoints::ClearTempBreakpoints();
auto info = gpuDebug->DissassembleOp(pc);
- NOTICE_LOG(G3D, "Waiting at %08x, %s", pc, info.desc.c_str());
+ if (lastStepTime >= 0.0) {
+ NOTICE_LOG(G3D, "Waiting at %08x, %s (%fms)", pc, info.desc.c_str(), (time_now_d() - lastStepTime) * 1000.0);
+ lastStepTime = -1.0;
+ } else {
+ NOTICE_LOG(G3D, "Waiting at %08x, %s", pc, info.desc.c_str());
+ }
GPUStepping::EnterStepping();
}
@@ -141,7 +151,12 @@ void NotifyDraw() {
if (!active)
return;
if (breakNext == BreakNext::DRAW && !GPUStepping::IsStepping()) {
- NOTICE_LOG(G3D, "Waiting at a draw");
+ if (lastStepTime >= 0.0) {
+ NOTICE_LOG(G3D, "Waiting at a draw (%fms)", (time_now_d() - lastStepTime) * 1000.0);
+ lastStepTime = -1.0;
+ } else {
+ NOTICE_LOG(G3D, "Waiting at a draw");
+ }
GPUStepping::EnterStepping();
}
}
diff --git a/GPU/Debugger/Playback.cpp b/GPU/Debugger/Playback.cpp
index aa34ce3ccf..c47eba66b3 100644
--- a/GPU/Debugger/Playback.cpp
+++ b/GPU/Debugger/Playback.cpp
@@ -298,6 +298,7 @@ private:
void Registers(u32 ptr, u32 sz);
void Vertices(u32 ptr, u32 sz);
void Indices(u32 ptr, u32 sz);
+ void ClutAddr(u32 ptr, u32 sz);
void Clut(u32 ptr, u32 sz);
void TransferSrc(u32 ptr, u32 sz);
void Memset(u32 ptr, u32 sz);
@@ -308,6 +309,8 @@ private:
void Display(u32 ptr, u32 sz);
u32 execMemcpyDest = 0;
+ u32 execClutAddr = 0;
+ u32 execClutFlags = 0;
u32 execListBuf = 0;
u32 execListPos = 0;
u32 execListID = 0;
@@ -472,15 +475,40 @@ void DumpExecute::Indices(u32 ptr, u32 sz) {
execListQueue.push_back((GE_CMD_IADDR << 24) | (psp & 0x00FFFFFF));
}
-void DumpExecute::Clut(u32 ptr, u32 sz) {
- u32 psp = mapping_.Map(ptr, sz, std::bind(&DumpExecute::SyncStall, this));
- if (psp == 0) {
- ERROR_LOG(SYSTEM, "Unable to allocate for clut");
- return;
- }
+void DumpExecute::ClutAddr(u32 ptr, u32 sz) {
+ struct ClutAddrData {
+ u32 addr;
+ u32 flags;
+ };
+ const ClutAddrData *data = (const ClutAddrData *)(pushbuf_.data() + ptr);
+ execClutAddr = data->addr;
+ execClutFlags = data->flags;
+}
- execListQueue.push_back((GE_CMD_CLUTADDRUPPER << 24) | ((psp >> 8) & 0x00FF0000));
- execListQueue.push_back((GE_CMD_CLUTADDR << 24) | (psp & 0x00FFFFFF));
+void DumpExecute::Clut(u32 ptr, u32 sz) {
+ // This is always run when we have the actual address set.
+ if (execClutAddr != 0) {
+ const bool isTarget = (execClutFlags & 1) != 0;
+ const bool unchangedVRAM = (execClutFlags & 2) != 0;
+
+ // TODO: Could use drawnVRAM flag, but it can be wrong.
+ // Could potentially always skip if !isTarget, but playing it safe for offset texture behavior.
+ if (Memory::IsValidRange(execClutAddr, sz) && !unchangedVRAM && (!isTarget || !g_Config.bSoftwareRendering)) {
+ // Intentionally don't trigger an upload here.
+ Memory::MemcpyUnchecked(execClutAddr, pushbuf_.data() + ptr, sz);
+ }
+
+ execClutAddr = 0;
+ } else {
+ u32 psp = mapping_.Map(ptr, sz, std::bind(&DumpExecute::SyncStall, this));
+ if (psp == 0) {
+ ERROR_LOG(SYSTEM, "Unable to allocate for clut");
+ return;
+ }
+
+ execListQueue.push_back((GE_CMD_CLUTADDRUPPER << 24) | ((psp >> 8) & 0x00FF0000));
+ execListQueue.push_back((GE_CMD_CLUTADDR << 24) | (psp & 0x00FFFFFF));
+ }
}
void DumpExecute::TransferSrc(u32 ptr, u32 sz) {
@@ -619,6 +647,10 @@ bool DumpExecute::Run() {
Indices(cmd.ptr, cmd.sz);
break;
+ case CommandType::CLUTADDR:
+ ClutAddr(cmd.ptr, cmd.sz);
+ break;
+
case CommandType::CLUT:
Clut(cmd.ptr, cmd.sz);
break;
diff --git a/GPU/Debugger/Record.cpp b/GPU/Debugger/Record.cpp
index cc63dccf72..248ba1e48c 100644
--- a/GPU/Debugger/Record.cpp
+++ b/GPU/Debugger/Record.cpp
@@ -37,6 +37,7 @@
#include "Core/MemMap.h"
#include "Core/System.h"
#include "Core/ThreadPools.h"
+#include "GPU/Common/GPUDebugInterface.h"
#include "GPU/GPUInterface.h"
#include "GPU/GPUState.h"
#include "GPU/ge_constants.h"
@@ -152,8 +153,19 @@ static void BeginRecording() {
u32 sz = 512 * 4;
pushbuf.resize(pushbuf.size() + sz);
gstate.Save((u32_le *)(pushbuf.data() + ptr));
-
commands.push_back({CommandType::INIT, sz, ptr});
+
+ // Also save the initial CLUT.
+ GPUDebugBuffer clut;
+ if (gpuDebug->GetCurrentClut(clut)) {
+ sz = clut.GetStride() * clut.PixelSize();
+ _assert_msg_(sz == 1024, "CLUT should be 1024 bytes");
+ ptr = (u32)pushbuf.size();
+ pushbuf.resize(pushbuf.size() + sz);
+ memcpy(pushbuf.data() + ptr, clut.GetData(), sz);
+ commands.push_back({ CommandType::CLUT, sz, ptr });
+ }
+
DirtyAllVRAM(DirtyVRAMFlag::DIRTY);
}
@@ -308,6 +320,34 @@ static Command EmitCommandWithRAM(CommandType t, const void *p, u32 sz, u32 alig
return cmd;
}
+static u32 GetTargetFlags(u32 addr, u32 sizeInRAM) {
+ const bool isTarget = lastRenderTargets.find(addr) != lastRenderTargets.end();
+
+ bool isDirtyVRAM = false;
+ bool isDrawnVRAM = false;
+ uint32_t start = (addr >> DIRTY_VRAM_SHIFT) & DIRTY_VRAM_MASK;
+ for (uint32_t i = 0; i < (sizeInRAM + DIRTY_VRAM_ROUND) >> DIRTY_VRAM_SHIFT; ++i) {
+ DirtyVRAMFlag flag = dirtyVRAM[start + i];
+ isDirtyVRAM = isDirtyVRAM || flag != DirtyVRAMFlag::CLEAN;
+ isDrawnVRAM = isDrawnVRAM || flag == DirtyVRAMFlag::DRAWN;
+
+ // Mark the VRAM clean now that it's been copied to VRAM.
+ if (flag == DirtyVRAMFlag::DIRTY)
+ dirtyVRAM[start + i] = DirtyVRAMFlag::CLEAN;
+ }
+
+ // The isTarget flag is mostly used for replay of dumps on a PSP.
+ u32 flags = isTarget ? 1 : 0;
+ // The unchangedVRAM flag tells us we can skip recopying.
+ if (!isDirtyVRAM)
+ flags |= 2;
+ // And the drawn flag tells us this data was potentially drawn to.
+ if (isDrawnVRAM)
+ flags |= 4;
+
+ return flags;
+}
+
static void EmitTextureData(int level, u32 texaddr) {
GETextureFormat format = gstate.getTextureFormat();
int w = gstate.getTextureWidth(level);
@@ -315,7 +355,6 @@ static void EmitTextureData(int level, u32 texaddr) {
int bufw = GetTextureBufw(level, texaddr, format);
int extraw = w > bufw ? w - bufw : 0;
u32 sizeInRAM = (textureBitsPerPixel[format] * (bufw * h + extraw)) / 8;
- const bool isTarget = lastRenderTargets.find(texaddr) != lastRenderTargets.end();
CommandType type = CommandType((int)CommandType::TEXTURE0 + level);
const u8 *p = Memory::GetPointerUnchecked(texaddr);
@@ -330,27 +369,7 @@ static void EmitTextureData(int level, u32 texaddr) {
u32 pad;
};
- bool isDirtyVRAM = false;
- bool isDrawnVRAM = false;
- uint32_t start = (texaddr >> DIRTY_VRAM_SHIFT) & DIRTY_VRAM_MASK;
- for (uint32_t i = 0; i < (sizeInRAM + DIRTY_VRAM_ROUND) >> DIRTY_VRAM_SHIFT; ++i) {
- DirtyVRAMFlag flag = dirtyVRAM[start + i];
- isDirtyVRAM = isDirtyVRAM || flag != DirtyVRAMFlag::CLEAN;
- isDrawnVRAM = isDrawnVRAM || flag == DirtyVRAMFlag::DRAWN;
-
- // Mark the VRAM clean now that it's been copied to VRAM.
- if (flag == DirtyVRAMFlag::DIRTY)
- dirtyVRAM[start + i] = DirtyVRAMFlag::CLEAN;
- }
-
- // The isTarget flag is mostly used for replay of dumps on a PSP.
- u32 flags = isTarget ? 1 : 0;
- // The unchangedVRAM flag tells us we can skip recopying.
- if (!isDirtyVRAM)
- flags |= 2;
- // And the drawn flag tells us this data was potentially drawn to.
- if (isDrawnVRAM)
- flags |= 4;
+ u32 flags = GetTargetFlags(texaddr, sizeInRAM);
FramebufData framebuf{ texaddr, bufw, flags };
framebufData.resize(sizeof(framebuf) + bytes);
memcpy(&framebufData[0], &framebuf, sizeof(framebuf));
@@ -456,12 +475,33 @@ static void EmitTransfer(u32 op) {
static void EmitClut(u32 op) {
u32 addr = gstate.getClutAddress();
+
+ // Hardware rendering may be using a framebuffer as CLUT.
+ // To get at this, we first run the command (normally we're called right before it has run.)
+ if (Memory::IsVRAMAddress(addr))
+ gpuDebug->SetCmdValue(op);
+
// Actually should only be 0x3F, but we allow enhanced CLUTs. See #15727.
u32 blocks = (op & 0x7F) == 0x40 ? 0x40 : (op & 0x3F);
u32 bytes = blocks * 32;
bytes = Memory::ValidSize(addr, bytes);
if (bytes != 0) {
+ // Send the original address so VRAM can be reasoned about.
+ if (Memory::IsVRAMAddress(addr)) {
+ struct ClutAddrData {
+ u32 addr;
+ u32 flags;
+ };
+ u32 flags = GetTargetFlags(addr, bytes);
+ ClutAddrData data{ addr, flags };
+
+ FlushRegisters();
+ Command cmd{CommandType::CLUTADDR, sizeof(data), (u32)pushbuf.size()};
+ pushbuf.resize(pushbuf.size() + sizeof(data));
+ memcpy(pushbuf.data() + cmd.ptr, &data, sizeof(data));
+ commands.push_back(cmd);
+ }
EmitCommandWithRAM(CommandType::CLUT, Memory::GetPointerUnchecked(addr), bytes, 16);
}
diff --git a/GPU/Debugger/RecordFormat.h b/GPU/Debugger/RecordFormat.h
index 8079136d0c..dc7abe3ec1 100644
--- a/GPU/Debugger/RecordFormat.h
+++ b/GPU/Debugger/RecordFormat.h
@@ -49,6 +49,7 @@ enum class CommandType : u8 {
MEMCPYDEST = 7,
MEMCPYDATA = 8,
DISPLAY = 9,
+ CLUTADDR = 10,
TEXTURE0 = 0x10,
TEXTURE1 = 0x11,
diff --git a/GPU/Directx9/DrawEngineDX9.h b/GPU/Directx9/DrawEngineDX9.h
index a0ee23e60d..9ef5b37c65 100644
--- a/GPU/Directx9/DrawEngineDX9.h
+++ b/GPU/Directx9/DrawEngineDX9.h
@@ -170,6 +170,8 @@ private:
// Hardware tessellation
TessellationDataTransferDX9 *tessDataTransferDX9;
+ FBOTexState fboTexBindState_ = FBO_TEX_NONE;
+
int lastRenderStepId_ = -1;
bool fboTexNeedsBind_ = false;
diff --git a/GPU/Directx9/GPU_DX9.cpp b/GPU/Directx9/GPU_DX9.cpp
index 3e5d0bac57..075d89e460 100644
--- a/GPU/Directx9/GPU_DX9.cpp
+++ b/GPU/Directx9/GPU_DX9.cpp
@@ -81,7 +81,7 @@ GPU_DX9::GPU_DX9(GraphicsContext *gfxCtx, Draw::DrawContext *draw)
// No need to flush before the tex scale/offset commands if we are baking
// the tex scale/offset into the vertices anyway.
UpdateCmdInfo();
- CheckGPUFeatures();
+ gstate_c.featureFlags = CheckGPUFeatures();
BuildReportingInfo();
@@ -98,68 +98,9 @@ GPU_DX9::GPU_DX9(GraphicsContext *gfxCtx, Draw::DrawContext *draw)
}
}
-// TODO: Move this detection elsewhere when it's needed elsewhere, not before. It's ugly.
-// Source: https://envytools.readthedocs.io/en/latest/hw/pciid.html#gf100
-enum NVIDIAGeneration {
- NV_PRE_KEPLER,
- NV_KEPLER,
- NV_MAXWELL,
- NV_PASCAL,
- NV_VOLTA,
- NV_TURING, // or later
-};
-
-static NVIDIAGeneration NVIDIAGetDeviceGeneration(int deviceID) {
- if (deviceID >= 0x1180 && deviceID <= 0x11bf)
- return NV_KEPLER; // GK104
- if (deviceID >= 0x11c0 && deviceID <= 0x11fa)
- return NV_KEPLER; // GK106
- if (deviceID >= 0x0fc0 && deviceID <= 0x0fff)
- return NV_KEPLER; // GK107
- if (deviceID >= 0x1003 && deviceID <= 0x1028)
- return NV_KEPLER; // GK110(B)
- if (deviceID >= 0x1280 && deviceID <= 0x12ba)
- return NV_KEPLER; // GK208
- if (deviceID >= 0x1381 && deviceID <= 0x13b0)
- return NV_MAXWELL; // GM107
- if (deviceID >= 0x1340 && deviceID <= 0x134d)
- return NV_MAXWELL; // GM108
- if (deviceID >= 0x13c0 && deviceID <= 0x13d9)
- return NV_MAXWELL; // GM204
- if (deviceID >= 0x1401 && deviceID <= 0x1427)
- return NV_MAXWELL; // GM206
- if (deviceID >= 0x15f7 && deviceID <= 0x15f9)
- return NV_PASCAL; // GP100
- if (deviceID >= 0x15f7 && deviceID <= 0x15f9)
- return NV_PASCAL; // GP100
- if (deviceID >= 0x1b00 && deviceID <= 0x1b38)
- return NV_PASCAL; // GP102
- if (deviceID >= 0x1b80 && deviceID <= 0x1be1)
- return NV_PASCAL; // GP104
- if (deviceID >= 0x1c02 && deviceID <= 0x1c62)
- return NV_PASCAL; // GP106
- if (deviceID >= 0x1c81 && deviceID <= 0x1c92)
- return NV_PASCAL; // GP107
- if (deviceID >= 0x1d01 && deviceID <= 0x1d12)
- return NV_PASCAL; // GP108
- if (deviceID >= 0x1d81 && deviceID <= 0x1dba)
- return NV_VOLTA; // GV100
- if (deviceID >= 0x1e02 && deviceID <= 0x1e3c)
- return NV_TURING; // TU102
- if (deviceID >= 0x1e82 && deviceID <= 0x1ed0)
- return NV_TURING; // TU104
- if (deviceID >= 0x1f02 && deviceID <= 0x1f51)
- return NV_TURING; // TU104
- if (deviceID >= 0x1e02)
- return NV_TURING; // More TU models or later, probably.
- return NV_PRE_KEPLER;
-}
-
-void GPU_DX9::CheckGPUFeatures() {
- u32 features = 0;
+u32 GPU_DX9::CheckGPUFeatures() const {
+ u32 features = GPUCommon::CheckGPUFeatures();
features |= GPU_SUPPORTS_16BIT_FORMATS;
- features |= GPU_SUPPORTS_BLEND_MINMAX;
- features |= GPU_SUPPORTS_DEPTH_TEXTURE;
features |= GPU_SUPPORTS_TEXTURE_LOD_CONTROL;
// Accurate depth is required because the Direct3D API does not support inverse Z.
@@ -168,41 +109,6 @@ void GPU_DX9::CheckGPUFeatures() {
features |= GPU_SUPPORTS_ACCURATE_DEPTH;
auto vendor = draw_->GetDeviceCaps().vendor;
- if (!PSP_CoreParameter().compat.flags().DisableRangeCulling) {
- // VS range culling (killing triangles in the vertex shader using NaN) causes problems on Intel.
- // Also causes problems on old NVIDIA.
- switch (vendor) {
- case Draw::GPUVendor::VENDOR_INTEL:
- break;
- case Draw::GPUVendor::VENDOR_NVIDIA:
- // Older NVIDIAs don't seem to like NaNs in their DX9 vertex shaders.
- // No idea if KEPLER is the right cutoff, but let's go with it.
- if (NVIDIAGetDeviceGeneration(draw_->GetDeviceCaps().deviceID) >= NV_KEPLER) {
- features |= GPU_SUPPORTS_VS_RANGE_CULLING;
- }
- break;
- default:
- features |= GPU_SUPPORTS_VS_RANGE_CULLING;
- break;
- }
- }
-
- D3DCAPS9 caps;
- ZeroMemory(&caps, sizeof(caps));
- HRESULT result = 0;
- if (deviceEx_) {
- result = deviceEx_->GetDeviceCaps(&caps);
- } else {
- result = device_->GetDeviceCaps(&caps);
- }
- if (FAILED(result)) {
- WARN_LOG_REPORT(G3D, "Direct3D9: Failed to get the device caps!");
- } else {
- if ((caps.RasterCaps & D3DPRASTERCAPS_ANISOTROPY) != 0 && caps.MaxAnisotropy > 1)
- features |= GPU_SUPPORTS_ANISOTROPY;
- if ((caps.TextureCaps & (D3DPTEXTURECAPS_NONPOW2CONDITIONAL | D3DPTEXTURECAPS_POW2)) == 0)
- features |= GPU_SUPPORTS_TEXTURE_NPOT;
- }
if (!g_Config.bHighQualityDepth) {
features |= GPU_SCALE_DEPTH_FROM_24BIT_TO_16BIT;
@@ -213,11 +119,7 @@ void GPU_DX9::CheckGPUFeatures() {
features |= GPU_ROUND_DEPTH_TO_16BIT;
}
- if (PSP_CoreParameter().compat.flags().ClearToRAM) {
- features |= GPU_USE_CLEAR_RAM_HACK;
- }
-
- gstate_c.featureFlags = features;
+ return features;
}
GPU_DX9::~GPU_DX9() {
@@ -261,7 +163,7 @@ void GPU_DX9::BeginHostFrame() {
GPUCommon::BeginHostFrame();
UpdateCmdInfo();
if (resized_) {
- CheckGPUFeatures();
+ gstate_c.featureFlags = CheckGPUFeatures();
framebufferManager_->Resized();
drawEngine_.Resized();
shaderManagerDX9_->DirtyShader();
diff --git a/GPU/Directx9/GPU_DX9.h b/GPU/Directx9/GPU_DX9.h
index 0e313da397..7c6a88d180 100644
--- a/GPU/Directx9/GPU_DX9.h
+++ b/GPU/Directx9/GPU_DX9.h
@@ -35,7 +35,7 @@ public:
GPU_DX9(GraphicsContext *gfxCtx, Draw::DrawContext *draw);
~GPU_DX9();
- void CheckGPUFeatures() override;
+ u32 CheckGPUFeatures() const override;
void PreExecuteOp(u32 op, u32 diff) override;
void ExecuteOp(u32 op, u32 diff) override;
diff --git a/GPU/Directx9/StateMappingDX9.cpp b/GPU/Directx9/StateMappingDX9.cpp
index 2ebadb3974..0dfa352f00 100644
--- a/GPU/Directx9/StateMappingDX9.cpp
+++ b/GPU/Directx9/StateMappingDX9.cpp
@@ -99,14 +99,14 @@ void DrawEngineDX9::ApplyDrawState(int prim) {
if (!gstate.isModeClear()) {
textureCache_->ApplyTexture();
- if (fboTexNeedsBind_) {
+ if (fboTexBindState_ = FBO_TEX_COPY_BIND_TEX) {
// Note that this is positions, not UVs, that we need the copy from.
framebufferManager_->BindFramebufferAsColorTexture(1, framebufferManager_->GetCurrentRenderVFB(), BINDFBCOLOR_MAY_COPY);
// If we are rendering at a higher resolution, linear is probably best for the dest color.
device_->SetSamplerState(1, D3DSAMP_MAGFILTER, D3DTEXF_LINEAR);
device_->SetSamplerState(1, D3DSAMP_MINFILTER, D3DTEXF_LINEAR);
fboTexBound_ = true;
- fboTexNeedsBind_ = false;
+ fboTexBindState_ = FBO_TEX_NONE;
}
// TODO: Test texture?
@@ -133,20 +133,23 @@ void DrawEngineDX9::ApplyDrawState(int prim) {
// We ignore the logicState on D3D since there's no support, the emulation of it is blend-and-shader only.
if (pipelineState_.FramebufferRead()) {
- bool fboTexNeedsBind = false;
- ApplyFramebufferRead(&fboTexNeedsBind);
+ ApplyFramebufferRead(&fboTexBindState_);
// The shader takes over the responsibility for blending, so recompute.
ApplyStencilReplaceAndLogicOpIgnoreBlend(blendState.replaceAlphaWithStencil, blendState);
- if (fboTexNeedsBind) {
+ if (fboTexBindState_ == FBO_TEX_COPY_BIND_TEX) {
// Note that this is positions, not UVs, that we need the copy from.
framebufferManager_->BindFramebufferAsColorTexture(1, framebufferManager_->GetCurrentRenderVFB(), BINDFBCOLOR_MAY_COPY);
// If we are rendering at a higher resolution, linear is probably best for the dest color.
device_->SetSamplerState(1, D3DSAMP_MAGFILTER, D3DTEXF_LINEAR);
device_->SetSamplerState(1, D3DSAMP_MINFILTER, D3DTEXF_LINEAR);
fboTexBound_ = true;
+ fboTexBindState_ = FBO_TEX_NONE;
dirtyRequiresRecheck_ |= DIRTY_BLEND_STATE;
gstate_c.Dirty(DIRTY_BLEND_STATE);
+ } else if (fboTexBindState_ == FBO_TEX_READ_FRAMEBUFFER) {
+ // Not supported.
+ fboTexBindState_ = FBO_TEX_NONE;
}
dirtyRequiresRecheck_ |= DIRTY_FRAGMENTSHADER_STATE;
diff --git a/GPU/GLES/DepthBufferGLES.cpp b/GPU/GLES/DepthBufferGLES.cpp
index e5bcc20bf9..342ebdad60 100644
--- a/GPU/GLES/DepthBufferGLES.cpp
+++ b/GPU/GLES/DepthBufferGLES.cpp
@@ -118,7 +118,8 @@ void FramebufferManagerGLES::PackDepthbuffer(VirtualFramebuffer *vfb, int x, int
queries.push_back({ &u_depthDownloadTo8, "u_depthTo8" });
std::vector inits;
inits.push_back({ &u_depthDownloadTex, 0, TEX_SLOT_PSP_TEXTURE });
- depthDownloadProgram_ = render->CreateProgram(shaders, semantics, queries, inits, false, false);
+ GLRProgramFlags flags{};
+ depthDownloadProgram_ = render->CreateProgram(shaders, semantics, queries, inits, flags);
for (auto iter : shaders) {
render->DeleteShader(iter);
}
diff --git a/GPU/GLES/GPU_GLES.cpp b/GPU/GLES/GPU_GLES.cpp
index 75fd0080b5..abae5134f9 100644
--- a/GPU/GLES/GPU_GLES.cpp
+++ b/GPU/GLES/GPU_GLES.cpp
@@ -54,7 +54,7 @@
GPU_GLES::GPU_GLES(GraphicsContext *gfxCtx, Draw::DrawContext *draw)
: GPUCommon(gfxCtx, draw), drawEngine_(draw), fragmentTestCache_(draw) {
UpdateVsyncInterval(true);
- CheckGPUFeatures();
+ gstate_c.featureFlags = CheckGPUFeatures();
shaderManagerGL_ = new ShaderManagerGLES(draw);
framebufferManagerGL_ = new FramebufferManagerGLES(draw);
@@ -148,42 +148,17 @@ GPU_GLES::~GPU_GLES() {
// Take the raw GL extension and versioning data and turn into feature flags.
// TODO: This should use DrawContext::GetDeviceCaps() more and more, and eventually
// this can be shared between all the backends.
-void GPU_GLES::CheckGPUFeatures() {
- u32 features = 0;
+u32 GPU_GLES::CheckGPUFeatures() const {
+ u32 features = GPUCommon::CheckGPUFeatures();
features |= GPU_SUPPORTS_16BIT_FORMATS;
- if (draw_->GetDeviceCaps().dualSourceBlend) {
- if (!g_Config.bVendorBugChecksEnabled || !draw_->GetBugs().Has(Draw::Bugs::DUAL_SOURCE_BLENDING_BROKEN)) {
- features |= GPU_SUPPORTS_DUALSOURCE_BLEND;
- }
- }
-
- if (gl_extensions.EXT_shader_framebuffer_fetch || gl_extensions.ARM_shader_framebuffer_fetch) {
- // This has caused problems in the past. Let's only enable on GLES3.
- if (gl_extensions.GLES3) {
- features |= GPU_SUPPORTS_ANY_FRAMEBUFFER_FETCH;
- }
- }
-
if ((gl_extensions.gpuVendor == GPU_VENDOR_NVIDIA) || (gl_extensions.gpuVendor == GPU_VENDOR_AMD))
features |= GPU_PREFER_REVERSE_COLOR_ORDER;
- if (draw_->GetDeviceCaps().textureNPOTFullySupported)
- features |= GPU_SUPPORTS_TEXTURE_NPOT;
-
- if (gl_extensions.EXT_blend_minmax)
- features |= GPU_SUPPORTS_BLEND_MINMAX;
-
- if (draw_->GetDeviceCaps().logicOpSupported)
- features |= GPU_SUPPORTS_LOGIC_OP;
-
if (gl_extensions.GLES3 || !gl_extensions.IsGLES)
features |= GPU_SUPPORTS_TEXTURE_LOD_CONTROL;
- if (draw_->GetDeviceCaps().anisoSupported)
- features |= GPU_SUPPORTS_ANISOTROPY;
-
bool canUseInstanceID = gl_extensions.EXT_draw_instanced || gl_extensions.ARB_draw_instanced;
bool canDefInstanceID = gl_extensions.IsGLES || gl_extensions.EXT_gpu_shader4 || gl_extensions.VersionGEThan(3, 1);
bool instanceRendering = gl_extensions.GLES3 || (canUseInstanceID && canDefInstanceID);
@@ -202,21 +177,6 @@ void GPU_GLES::CheckGPUFeatures() {
// Our implementation of depth texturing needs simple Z range, so can't
// use the extension hacks (yet).
}
- if (draw_->GetDeviceCaps().textureDepthSupported)
- features |= GPU_SUPPORTS_DEPTH_TEXTURE;
- if (draw_->GetDeviceCaps().clipDistanceSupported)
- features |= GPU_SUPPORTS_CLIP_DISTANCE;
- if (draw_->GetDeviceCaps().cullDistanceSupported)
- features |= GPU_SUPPORTS_CULL_DISTANCE;
- if (!draw_->GetBugs().Has(Draw::Bugs::BROKEN_NAN_IN_CONDITIONAL)) {
- // Ignore the compat setting if clip and cull are both enabled.
- // When supported, we can do the depth side of range culling more correctly.
- const bool supported = draw_->GetDeviceCaps().clipDistanceSupported && draw_->GetDeviceCaps().cullDistanceSupported;
- const bool disabled = PSP_CoreParameter().compat.flags().DisableRangeCulling;
- if (supported || !disabled) {
- features |= GPU_SUPPORTS_VS_RANGE_CULLING;
- }
- }
// If we already have a 16-bit depth buffer, we don't need to round.
bool prefer24 = draw_->GetDeviceCaps().preferredDepthBufferFormat == Draw::DataFormat::D24_S8;
@@ -245,11 +205,7 @@ void GPU_GLES::CheckGPUFeatures() {
features |= GPU_USE_DEPTH_RANGE_HACK;
}
- if (PSP_CoreParameter().compat.flags().ClearToRAM) {
- features |= GPU_USE_CLEAR_RAM_HACK;
- }
-
- gstate_c.featureFlags = features;
+ return features;
}
bool GPU_GLES::IsReady() {
@@ -321,7 +277,7 @@ void GPU_GLES::BeginHostFrame() {
GPUCommon::BeginHostFrame();
UpdateCmdInfo();
if (resized_) {
- CheckGPUFeatures();
+ gstate_c.featureFlags = CheckGPUFeatures();
framebufferManager_->Resized();
drawEngine_.Resized();
shaderManagerGL_->DirtyShader();
diff --git a/GPU/GLES/GPU_GLES.h b/GPU/GLES/GPU_GLES.h
index cbe6bc00b0..98bb1d9362 100644
--- a/GPU/GLES/GPU_GLES.h
+++ b/GPU/GLES/GPU_GLES.h
@@ -38,7 +38,7 @@ public:
~GPU_GLES();
// This gets called on startup and when we get back from settings.
- void CheckGPUFeatures() override;
+ u32 CheckGPUFeatures() const override;
bool IsReady() override;
void CancelReady() override;
diff --git a/GPU/GLES/ShaderManagerGLES.cpp b/GPU/GLES/ShaderManagerGLES.cpp
index 18dbe3680f..92dfa9488f 100644
--- a/GPU/GLES/ShaderManagerGLES.cpp
+++ b/GPU/GLES/ShaderManagerGLES.cpp
@@ -192,9 +192,18 @@ LinkedShader::LinkedShader(GLRenderManager *render, VShaderID VSID, Shader *vs,
initialize.push_back({ &u_tess_weights_u, 0, TEX_SLOT_SPLINE_WEIGHTS_U });
initialize.push_back({ &u_tess_weights_v, 0, TEX_SLOT_SPLINE_WEIGHTS_V });
- bool useDualSource = (gstate_c.featureFlags & GPU_SUPPORTS_DUALSOURCE_BLEND) != 0;
- bool useClip0 = VSID.Bit(VS_BIT_VERTEX_RANGE_CULLING) && gstate_c.Supports(GPU_SUPPORTS_CLIP_DISTANCE);
- program = render->CreateProgram(shaders, semantics, queries, initialize, useDualSource, useClip0);
+ GLRProgramFlags flags{};
+ flags.supportDualSource = (gstate_c.featureFlags & GPU_SUPPORTS_DUALSOURCE_BLEND) != 0;
+ if (!VSID.Bit(VS_BIT_IS_THROUGH) && gstate_c.Supports(GPU_SUPPORTS_DEPTH_CLAMP)) {
+ flags.useClipDistance0 = true;
+ flags.useClipDistance1 = true;
+ if (VSID.Bit(VS_BIT_VERTEX_RANGE_CULLING) && gstate_c.Supports(GPU_SUPPORTS_CLIP_DISTANCE))
+ flags.useClipDistance2 = true;
+ } else if (VSID.Bit(VS_BIT_VERTEX_RANGE_CULLING) && gstate_c.Supports(GPU_SUPPORTS_CLIP_DISTANCE)) {
+ flags.useClipDistance0 = true;
+ }
+
+ program = render->CreateProgram(shaders, semantics, queries, initialize, flags);
// The rest, use the "dirty" mechanism.
dirtyUniforms = DIRTY_ALL_UNIFORMS;
diff --git a/GPU/GLES/StateMappingGLES.cpp b/GPU/GLES/StateMappingGLES.cpp
index dbfc115a91..cdcc9e5069 100644
--- a/GPU/GLES/StateMappingGLES.cpp
+++ b/GPU/GLES/StateMappingGLES.cpp
@@ -149,13 +149,14 @@ void DrawEngineGLES::ApplyDrawState(int prim) {
GenericLogicState &logicState = pipelineState_.logicState;
if (pipelineState_.FramebufferRead()) {
- bool fboTexNeedsBind = false;
- ApplyFramebufferRead(&fboTexNeedsBind);
+ FBOTexState fboTexBindState = FBO_TEX_NONE;
+ ApplyFramebufferRead(&fboTexBindState);
// The shader takes over the responsibility for blending, so recompute.
ApplyStencilReplaceAndLogicOpIgnoreBlend(blendState.replaceAlphaWithStencil, blendState);
// We copy the framebuffer here, as doing so will wipe any blend state if we do it later.
- if (fboTexNeedsBind) {
+ // fboTexNeedsBind_ won't be set if we can read directly from the target.
+ if (fboTexBindState == FBO_TEX_COPY_BIND_TEX) {
// Note that this is positions, not UVs, that we need the copy from.
framebufferManager_->BindFramebufferAsColorTexture(1, framebufferManager_->GetCurrentRenderVFB(), BINDFBCOLOR_MAY_COPY);
// If we are rendering at a higher resolution, linear is probably best for the dest color.
@@ -166,6 +167,9 @@ void DrawEngineGLES::ApplyDrawState(int prim) {
// Must dirty blend state here so we re-copy next time. Example: Lunar's spell effects.
dirtyRequiresRecheck_ |= DIRTY_BLEND_STATE;
gstate_c.Dirty(DIRTY_BLEND_STATE);
+ } else if (fboTexBindState == FBO_TEX_READ_FRAMEBUFFER) {
+ // No action needed here.
+ fboTexBindState = FBO_TEX_NONE;
}
dirtyRequiresRecheck_ |= DIRTY_FRAGMENTSHADER_STATE;
gstate_c.Dirty(DIRTY_FRAGMENTSHADER_STATE);
diff --git a/GPU/GPUCommon.cpp b/GPU/GPUCommon.cpp
index 107398b7c8..3d64bdb1ab 100644
--- a/GPU/GPUCommon.cpp
+++ b/GPU/GPUCommon.cpp
@@ -89,7 +89,7 @@ const CommonCommandTableEntry commonCommandTable[] = {
// These affect the fragment shader so need flushing.
{ GE_CMD_CLEARMODE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_BLEND_STATE | DIRTY_DEPTHSTENCIL_STATE | DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_CULLRANGE | DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE },
{ GE_CMD_TEXTUREMAPENABLE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE },
- { GE_CMD_FOGENABLE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE},
+ { GE_CMD_FOGENABLE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE },
{ GE_CMD_TEXMODE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_TEXTURE_PARAMS | DIRTY_FRAGMENTSHADER_STATE },
{ GE_CMD_TEXSHADELS, FLAG_FLUSHBEFOREONCHANGE, DIRTY_VERTEXSHADER_STATE },
// Raster state for Direct3D 9, uncommon.
@@ -2414,10 +2414,10 @@ void GPUCommon::Execute_ImmVertexAlphaPrim(u32 op, u32 diff) {
immPrim_ = (GEPrimitiveType)prim;
// Flags seem to only be respected from the first prim.
immFlags_ = op & 0x00FFF800;
+ immFirstSent_ = false;
} else if (prim == GE_PRIM_KEEP_PREVIOUS && immPrim_ != GE_PRIM_INVALID) {
static constexpr int flushPrimCount[] = { 1, 2, 0, 3, 0, 0, 2, 0 };
- // Instead of finding a proper point to flush, we just emit a full rectangle every time one
- // is finished.
+ // Instead of finding a proper point to flush, we just emit prims when we can.
if (immCount_ == flushPrimCount[immPrim_ & 7])
FlushImm();
} else {
@@ -2439,31 +2439,6 @@ void GPUCommon::FlushImm() {
}
UpdateUVScaleOffset();
- // Instead of plumbing through properly (we'd need to inject these pretransformed vertices in the middle
- // of SoftwareTransform(), which would take a lot of refactoring), we'll cheat and just turn these into
- // through vertices.
- // Since the only known use is Thrillville and it only uses it to clear, we just use color and pos.
- struct ImmVertex {
- float uv[2];
- uint32_t color;
- float xyz[3];
- };
- ImmVertex temp[MAX_IMMBUFFER_SIZE];
- uint32_t color1Used = 0;
- for (int i = 0; i < immCount_; i++) {
- // Since we're sending through, scale back up to w/h.
- temp[i].uv[0] = immBuffer_[i].u * gstate.getTextureWidth(0);
- temp[i].uv[1] = immBuffer_[i].v * gstate.getTextureHeight(0);
- temp[i].color = immBuffer_[i].color0_32;
- temp[i].xyz[0] = immBuffer_[i].pos[0];
- temp[i].xyz[1] = immBuffer_[i].pos[1];
- temp[i].xyz[2] = immBuffer_[i].pos[2];
- color1Used |= immBuffer_[i].color1_32;
- }
- int vtype = GE_VTYPE_TC_FLOAT | GE_VTYPE_POS_FLOAT | GE_VTYPE_COL_8888 | GE_VTYPE_THROUGH;
-
- // TODO: Handle fog and secondary color somehow?
-
bool antialias = (immFlags_ & GE_IMM_ANTIALIAS) != 0;
bool prevAntialias = gstate.isAntiAliasEnabled();
bool shading = (immFlags_ & GE_IMM_SHADING) != 0;
@@ -2473,40 +2448,42 @@ void GPUCommon::FlushImm() {
int cullMode = (immFlags_ & GE_IMM_CULLFACE) != 0 ? 1 : 0;
bool texturing = (immFlags_ & GE_IMM_TEXTURE) != 0;
bool prevTexturing = gstate.isTextureMapEnabled();
+ bool fog = (immFlags_ & GE_IMM_FOG) != 0;
+ bool prevFog = gstate.isFogEnabled();
bool dither = (immFlags_ & GE_IMM_DITHER) != 0;
bool prevDither = gstate.isDitherEnabled();
if ((immFlags_ & GE_IMM_CLIPMASK) != 0) {
WARN_LOG_REPORT_ONCE(geimmclipvalue, G3D, "Imm vertex used clip value, flags=%06x", immFlags_);
- } else if ((immFlags_ & GE_IMM_FOG) != 0) {
- WARN_LOG_REPORT_ONCE(geimmfog, G3D, "Imm vertex used fog, flags=%06x", immFlags_);
- } else if (color1Used != 0 && gstate.isUsingSecondaryColor()) {
- WARN_LOG_REPORT_ONCE(geimmcolor1, G3D, "Imm vertex used secondary color, flags=%06x", immFlags_);
}
- if (texturing != prevTexturing || cullEnable != prevCullEnable || dither != prevDither || prevShading != shading) {
+ bool changed = texturing != prevTexturing || cullEnable != prevCullEnable || dither != prevDither;
+ changed = changed || prevShading != shading || prevFog != fog;
+ if (changed) {
DispatchFlush();
gstate.antiAliasEnable = (GE_CMD_ANTIALIASENABLE << 24) | (int)antialias;
gstate.shademodel = (GE_CMD_SHADEMODE << 24) | (int)shading;
gstate.cullfaceEnable = (GE_CMD_CULLFACEENABLE << 24) | (int)cullEnable;
gstate.textureMapEnable = (GE_CMD_TEXTUREMAPENABLE << 24) | (int)texturing;
+ gstate.fogEnable = (GE_CMD_FOGENABLE << 24) | (int)fog;
gstate.ditherEnable = (GE_CMD_DITHERENABLE << 24) | (int)dither;
- gstate_c.Dirty(DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE | DIRTY_RASTER_STATE);
+ gstate_c.Dirty(DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE | DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_UVSCALEOFFSET | DIRTY_CULLRANGE);
}
- int bytesRead;
- uint32_t vertTypeID = GetVertTypeID(vtype, 0);
- drawEngineCommon_->DispatchSubmitImm(temp, nullptr, immPrim_, immCount_, vertTypeID, cullMode, &bytesRead);
- // TODO: In the future, make a special path for these.
- // drawEngineCommon_->DispatchSubmitImm(immBuffer_, immCount_);
+ drawEngineCommon_->DispatchSubmitImm(immPrim_, immBuffer_, immCount_, cullMode, immFirstSent_);
immCount_ = 0;
+ immFirstSent_ = true;
- gstate.antiAliasEnable = (GE_CMD_ANTIALIASENABLE << 24) | (int)prevAntialias;
- gstate.shademodel = (GE_CMD_SHADEMODE << 24) | (int)prevShading;
- gstate.cullfaceEnable = (GE_CMD_CULLFACEENABLE << 24) | (int)prevCullEnable;
- gstate.textureMapEnable = (GE_CMD_TEXTUREMAPENABLE << 24) | (int)prevTexturing;
- gstate.ditherEnable = (GE_CMD_DITHERENABLE << 24) | (int)prevDither;
- gstate_c.Dirty(DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE | DIRTY_RASTER_STATE);
+ if (changed) {
+ DispatchFlush();
+ gstate.antiAliasEnable = (GE_CMD_ANTIALIASENABLE << 24) | (int)prevAntialias;
+ gstate.shademodel = (GE_CMD_SHADEMODE << 24) | (int)prevShading;
+ gstate.cullfaceEnable = (GE_CMD_CULLFACEENABLE << 24) | (int)prevCullEnable;
+ gstate.textureMapEnable = (GE_CMD_TEXTUREMAPENABLE << 24) | (int)prevTexturing;
+ gstate.fogEnable = (GE_CMD_FOGENABLE << 24) | (int)prevFog;
+ gstate.ditherEnable = (GE_CMD_DITHERENABLE << 24) | (int)prevDither;
+ gstate_c.Dirty(DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE | DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_UVSCALEOFFSET | DIRTY_CULLRANGE);
+ }
}
void GPUCommon::ExecuteOp(u32 op, u32 diff) {
@@ -3176,3 +3153,56 @@ size_t GPUCommon::FormatGPUStatsCommon(char *buffer, size_t size) {
vertexAverageCycles
);
}
+
+u32 GPUCommon::CheckGPUFeatures() const {
+ u32 features = 0;
+ if (draw_->GetDeviceCaps().logicOpSupported) {
+ features |= GPU_SUPPORTS_LOGIC_OP;
+ }
+ if (draw_->GetDeviceCaps().anisoSupported) {
+ features |= GPU_SUPPORTS_ANISOTROPY;
+ }
+ if (draw_->GetDeviceCaps().textureNPOTFullySupported) {
+ features |= GPU_SUPPORTS_TEXTURE_NPOT;
+ }
+ if (draw_->GetDeviceCaps().dualSourceBlend) {
+ if (!g_Config.bVendorBugChecksEnabled || !draw_->GetBugs().Has(Draw::Bugs::DUAL_SOURCE_BLENDING_BROKEN)) {
+ features |= GPU_SUPPORTS_DUALSOURCE_BLEND;
+ }
+ }
+ if (draw_->GetDeviceCaps().blendMinMaxSupported) {
+ features |= GPU_SUPPORTS_BLEND_MINMAX;
+ }
+
+ if (draw_->GetDeviceCaps().clipDistanceSupported) {
+ features |= GPU_SUPPORTS_CLIP_DISTANCE;
+ }
+
+ if (draw_->GetDeviceCaps().cullDistanceSupported) {
+ features |= GPU_SUPPORTS_CULL_DISTANCE;
+ }
+
+ if (draw_->GetDeviceCaps().textureDepthSupported) {
+ features |= GPU_SUPPORTS_DEPTH_TEXTURE;
+ }
+
+ if (!draw_->GetBugs().Has(Draw::Bugs::BROKEN_NAN_IN_CONDITIONAL)) {
+ // Ignore the compat setting if clip and cull are both enabled.
+ // When supported, we can do the depth side of range culling more correctly.
+ const bool supported = draw_->GetDeviceCaps().clipDistanceSupported && draw_->GetDeviceCaps().cullDistanceSupported;
+ const bool disabled = PSP_CoreParameter().compat.flags().DisableRangeCulling;
+ if (supported || !disabled) {
+ features |= GPU_SUPPORTS_VS_RANGE_CULLING;
+ }
+ }
+
+ if (draw_->GetDeviceCaps().framebufferFetchSupported) {
+ features |= GPU_SUPPORTS_ANY_FRAMEBUFFER_FETCH;
+ }
+
+ if (PSP_CoreParameter().compat.flags().ClearToRAM) {
+ features |= GPU_USE_CLEAR_RAM_HACK;
+ }
+
+ return features;
+}
diff --git a/GPU/GPUCommon.h b/GPU/GPUCommon.h
index 3a5674127f..1127e38d91 100644
--- a/GPU/GPUCommon.h
+++ b/GPU/GPUCommon.h
@@ -76,7 +76,7 @@ public:
Draw::DrawContext *GetDrawContext() override {
return draw_;
}
- virtual void CheckGPUFeatures() = 0;
+ virtual u32 CheckGPUFeatures() const;
void UpdateCmdInfo();
@@ -103,7 +103,7 @@ public:
void ExecuteOp(u32 op, u32 diff) override;
void PreExecuteOp(u32 op, u32 diff) override;
- bool InterpretList(DisplayList &list) override;
+ bool InterpretList(DisplayList &list);
void ProcessDLQueue();
u32 UpdateStall(int listid, u32 newstall) override;
u32 EnqueueList(u32 listpc, u32 stall, int subIntrBase, PSPPointer args, bool head) override;
@@ -355,6 +355,7 @@ protected:
int immCount_ = 0;
GEPrimitiveType immPrim_ = GE_PRIM_INVALID;
uint32_t immFlags_ = 0;
+ bool immFirstSent_ = false;
std::string reportingPrimaryInfo_;
std::string reportingFullInfo_;
diff --git a/GPU/GPUInterface.h b/GPU/GPUInterface.h
index 832897f598..2ce7c48620 100644
--- a/GPU/GPUInterface.h
+++ b/GPU/GPUInterface.h
@@ -205,7 +205,6 @@ public:
virtual void PreExecuteOp(u32 op, u32 diff) = 0;
virtual void ExecuteOp(u32 op, u32 diff) = 0;
- virtual bool InterpretList(DisplayList& list) = 0;
// Framebuffer management
virtual void SetDisplayFramebuffer(u32 framebuf, u32 stride, GEBufferFormat format) = 0;
diff --git a/GPU/GPUState.h b/GPU/GPUState.h
index cbfb58395d..191a670376 100644
--- a/GPU/GPUState.h
+++ b/GPU/GPUState.h
@@ -485,7 +485,8 @@ enum {
// Free bit: 15
GPU_SUPPORTS_DEPTH_TEXTURE = FLAG_BIT(16),
GPU_SUPPORTS_ACCURATE_DEPTH = FLAG_BIT(17),
- // Free bits: 18-19
+ GPU_SUPPORTS_FRAGMENT_SHADER_INTERLOCK = FLAG_BIT(18),
+ // Free bits: 19
GPU_SUPPORTS_ANY_FRAMEBUFFER_FETCH = FLAG_BIT(20),
GPU_SCALE_DEPTH_FROM_24BIT_TO_16BIT = FLAG_BIT(21),
GPU_ROUND_FRAGMENT_DEPTH_TO_16BIT = FLAG_BIT(22),
diff --git a/GPU/Software/BinManager.cpp b/GPU/Software/BinManager.cpp
index 3438cb222d..42eef12eda 100644
--- a/GPU/Software/BinManager.cpp
+++ b/GPU/Software/BinManager.cpp
@@ -167,7 +167,7 @@ void BinManager::UpdateState(bool throughMode) {
if (states_.Full())
Flush("states");
stateIndex_ = (uint16_t)states_.Push(RasterizerState());
- ComputeRasterizerState(&states_[stateIndex_], throughMode);
+ ComputeRasterizerState(&states_[stateIndex_]);
states_[stateIndex_].samplerID.cached.clut = cluts_[clutIndex_].readable;
ClearDirty(SoftDirty::PIXEL_ALL | SoftDirty::SAMPLER_ALL | SoftDirty::RAST_ALL);
@@ -326,7 +326,7 @@ void BinManager::AddTriangle(const VertexData &v0, const VertexData &v1, const V
if (d01.x * d02.y - d01.y * d02.x < 0)
return;
// If all points have identical coords, we'll have 0 weights and not skip properly, so skip here.
- if (d01.x == 0 && d01.y == 0 && d02.x == 0 && d02.y == 0)
+ if ((d01.x == 0 && d02.x == 0) || (d01.y == 0 && d02.y == 0))
return;
// Was it fully outside the scissor?
@@ -474,6 +474,9 @@ void BinManager::Drain() {
}
void BinManager::Flush(const char *reason) {
+ if (queueRange_.x1 == 0x7FFFFFFF)
+ return;
+
double st;
if (coreCollectDebugStats)
st = time_now_d();
diff --git a/GPU/Software/Clipper.cpp b/GPU/Software/Clipper.cpp
index 166947ef14..6fcb2fe47b 100644
--- a/GPU/Software/Clipper.cpp
+++ b/GPU/Software/Clipper.cpp
@@ -133,6 +133,10 @@ static inline bool CheckOutsideZ(ClipCoords p, int &pos, int &neg) {
void ProcessRect(const VertexData &v0, const VertexData &v1, BinManager &binner) {
if (!binner.State().throughMode) {
+ // If any verts were outside range, throw the entire prim away.
+ if (v0.OutsideRange() || v1.OutsideRange())
+ return;
+
// We may discard the entire rect based on depth values.
int outsidePos = 0, outsideNeg = 0;
CheckOutsideZ(v0.clippos, outsidePos, outsideNeg);
@@ -176,6 +180,12 @@ void ProcessRect(const VertexData &v0, const VertexData &v1, BinManager &binner)
}
void ProcessPoint(const VertexData &v0, BinManager &binner) {
+ // If any verts were outside range, throw the entire prim away.
+ if (!binner.State().throughMode) {
+ if (v0.OutsideRange())
+ return;
+ }
+
// Points need no clipping. Will be bounds checked in the rasterizer (which seems backwards?)
binner.AddPoint(v0);
}
@@ -187,6 +197,10 @@ void ProcessLine(const VertexData &v0, const VertexData &v1, BinManager &binner)
return;
}
+ // If any verts were outside range, throw the entire prim away.
+ if (v0.OutsideRange() || v1.OutsideRange())
+ return;
+
int outsidePos = 0, outsideNeg = 0;
CheckOutsideZ(v0.clippos, outsidePos, outsideNeg);
CheckOutsideZ(v1.clippos, outsidePos, outsideNeg);
@@ -222,6 +236,10 @@ void ProcessLine(const VertexData &v0, const VertexData &v1, BinManager &binner)
void ProcessTriangle(const VertexData &v0, const VertexData &v1, const VertexData &v2, const VertexData &provoking, BinManager &binner) {
int mask = 0;
if (!binner.State().throughMode) {
+ // If any verts were outside range, throw the entire prim away.
+ if (v0.OutsideRange() || v1.OutsideRange() || v2.OutsideRange())
+ return;
+
mask |= CalcClipMask(v0.clippos);
mask |= CalcClipMask(v1.clippos);
mask |= CalcClipMask(v2.clippos);
diff --git a/GPU/Software/FuncId.cpp b/GPU/Software/FuncId.cpp
index 501b5aaf27..0d194d0012 100644
--- a/GPU/Software/FuncId.cpp
+++ b/GPU/Software/FuncId.cpp
@@ -48,11 +48,11 @@ static inline PixelBlendFactor OptimizeAlphaFactor(uint32_t color) {
return PixelBlendFactor::FIX;
}
-void ComputePixelFuncID(PixelFuncID *id, bool throughMode) {
+void ComputePixelFuncID(PixelFuncID *id) {
id->fullKey = 0;
// TODO: Could this be minz > 0x0000 || maxz < 0xFFFF? Maybe unsafe, depending on verts...
- id->applyDepthRange = !throughMode;
+ id->applyDepthRange = !gstate.isModeThrough();
// Dither happens even in clear mode.
id->dithering = gstate.isDitherEnabled();
id->fbFormat = gstate.FrameBufFormat();
@@ -169,7 +169,7 @@ void ComputePixelFuncID(PixelFuncID *id, bool throughMode) {
}
id->applyLogicOp = gstate.isLogicOpEnabled() && gstate.getLogicOp() != GE_LOGIC_COPY;
- id->applyFog = gstate.isFogEnabled() && !throughMode;
+ id->applyFog = gstate.isFogEnabled() && !gstate.isModeThrough();
id->earlyZChecks = id->DepthTestFunc() != GE_COMP_ALWAYS;
if (id->stencilTest && id->earlyZChecks) {
diff --git a/GPU/Software/FuncId.h b/GPU/Software/FuncId.h
index 46307fc55c..d9d8e51573 100644
--- a/GPU/Software/FuncId.h
+++ b/GPU/Software/FuncId.h
@@ -244,7 +244,7 @@ struct hash {
};
-void ComputePixelFuncID(PixelFuncID *id, bool throughMode);
+void ComputePixelFuncID(PixelFuncID *id);
std::string DescribePixelFuncID(const PixelFuncID &id);
void ComputeSamplerID(SamplerID *id);
diff --git a/GPU/Software/Rasterizer.cpp b/GPU/Software/Rasterizer.cpp
index 1f4039d05a..2336ff6bef 100644
--- a/GPU/Software/Rasterizer.cpp
+++ b/GPU/Software/Rasterizer.cpp
@@ -93,8 +93,8 @@ static inline Vec4 Interpolate(const float &c0, const float &c1, const fl
return Interpolate(c0, c1, c2, w0.Cast(), w1.Cast(), w2.Cast(), wsum_recip);
}
-void ComputeRasterizerState(RasterizerState *state, bool throughMode) {
- ComputePixelFuncID(&state->pixelID, throughMode);
+void ComputeRasterizerState(RasterizerState *state) {
+ ComputePixelFuncID(&state->pixelID);
state->drawPixel = Rasterizer::GetSingleFunc(state->pixelID);
state->enableTextures = gstate.isTextureMapEnabled() && !state->pixelID.clearMode;
@@ -132,7 +132,7 @@ void ComputeRasterizerState(RasterizerState *state, bool throughMode) {
}
state->shadeGouraud = gstate.getShadeMode() == GE_SHADE_GOURAUD;
- state->throughMode = throughMode;
+ state->throughMode = gstate.isModeThrough();
state->antialiasLines = gstate.isAntiAliasEnabled();
#if defined(SOFTGPU_MEMORY_TAGGING_DETAILED) || defined(SOFTGPU_MEMORY_TAGGING_BASIC)
@@ -1235,6 +1235,7 @@ void ClearRectangle(const VertexData &v0, const VertexData &v1, const BinCoords
case GE_FORMAT_INVALID:
case GE_FORMAT_DEPTH16:
+ case GE_FORMAT_CLUT8:
_dbg_assert_msg_(false, "Software: invalid framebuf format.");
break;
}
@@ -1365,7 +1366,7 @@ void DrawLine(const VertexData &v0, const VertexData &v1, const BinCoords &range
maskOK = false;
}
- if (!CheckDepthTestPassed(pixelID.DepthTestFunc(), x, y, pixelID.cached.depthbufStride, z)) {
+ if (!CheckDepthTestPassed(pixelID.DepthTestFunc(), p.x, p.y, pixelID.cached.depthbufStride, z)) {
maskOK = false;
}
}
diff --git a/GPU/Software/Rasterizer.h b/GPU/Software/Rasterizer.h
index 089f99a657..93e89f9596 100644
--- a/GPU/Software/Rasterizer.h
+++ b/GPU/Software/Rasterizer.h
@@ -65,7 +65,7 @@ struct RasterizerState {
}
};
-void ComputeRasterizerState(RasterizerState *state, bool throughMode);
+void ComputeRasterizerState(RasterizerState *state);
// Draws a triangle if its vertices are specified in counter-clockwise order
void DrawTriangle(const VertexData &v0, const VertexData &v1, const VertexData &v2, const BinCoords &range, const RasterizerState &state);
diff --git a/GPU/Software/RasterizerRectangle.cpp b/GPU/Software/RasterizerRectangle.cpp
index 4c2e4ad1a9..ff5b9c85d9 100644
--- a/GPU/Software/RasterizerRectangle.cpp
+++ b/GPU/Software/RasterizerRectangle.cpp
@@ -116,8 +116,12 @@ void DrawSprite(const VertexData &v0, const VertexData &v1, const BinCoords &ran
DrawingCoords scissorTL = TransformUnit::ScreenToDrawing(range.x1, range.y1);
DrawingCoords scissorBR = TransformUnit::ScreenToDrawing(range.x2, range.y2);
- int z = v1.screenpos.z;
- int fog = 255;
+ const int z = v1.screenpos.z;
+ constexpr int fog = 255;
+
+ // Since it's flat, we can check depth range early. Matters for earlyZChecks.
+ if (pixelID.applyDepthRange && (z < pixelID.cached.minz || z > pixelID.cached.maxz))
+ return;
bool isWhite = v1.color0 == 0xFFFFFFFF;
@@ -204,15 +208,31 @@ void DrawSprite(const VertexData &v0, const VertexData &v1, const BinCoords &ran
float t = tf_start;
const Vec4 c0 = Vec4::FromRGBA(v1.color0);
- for (int y = pos0.y; y < pos1.y; y++) {
- float s = sf_start;
- // Not really that fast but faster than triangle.
- for (int x = pos0.x; x < pos1.x; x++) {
- Vec4 prim_color = state.nearest(s, t, xoff, yoff, ToVec4IntArg(c0), &texptr, &texbufw, 0, 0, state.samplerID);
- state.drawPixel(x, y, z, 255, ToVec4IntArg(prim_color), pixelID);
- s += dsf;
+ if (pixelID.earlyZChecks) {
+ for (int y = pos0.y; y < pos1.y; y++) {
+ float s = sf_start;
+ // Not really that fast but faster than triangle.
+ for (int x = pos0.x; x < pos1.x; x++) {
+ if (CheckDepthTestPassed(pixelID.DepthTestFunc(), x, y, pixelID.cached.depthbufStride, z)) {
+ Vec4 prim_color = state.nearest(s, t, xoff, yoff, ToVec4IntArg(c0), &texptr, &texbufw, 0, 0, state.samplerID);
+ state.drawPixel(x, y, z, fog, ToVec4IntArg(prim_color), pixelID);
+ }
+
+ s += dsf;
+ }
+ t += dtf;
+ }
+ } else {
+ for (int y = pos0.y; y < pos1.y; y++) {
+ float s = sf_start;
+ // Not really that fast but faster than triangle.
+ for (int x = pos0.x; x < pos1.x; x++) {
+ Vec4 prim_color = state.nearest(s, t, xoff, yoff, ToVec4IntArg(c0), &texptr, &texbufw, 0, 0, state.samplerID);
+ state.drawPixel(x, y, z, fog, ToVec4IntArg(prim_color), pixelID);
+ s += dsf;
+ }
+ t += dtf;
}
- t += dtf;
}
}
} else {
@@ -239,6 +259,16 @@ void DrawSprite(const VertexData &v0, const VertexData &v1, const BinCoords &ran
pixel++;
}
}
+ } else if (pixelID.earlyZChecks) {
+ const Vec4 prim_color = Vec4::FromRGBA(v1.color0);
+ for (int y = pos0.y; y < pos1.y; y++) {
+ for (int x = pos0.x; x < pos1.x; x++) {
+ if (!CheckDepthTestPassed(pixelID.DepthTestFunc(), x, y, pixelID.cached.depthbufStride, z))
+ continue;
+
+ state.drawPixel(x, y, z, fog, ToVec4IntArg(prim_color), pixelID);
+ }
+ }
} else {
const Vec4 prim_color = Vec4::FromRGBA(v1.color0);
for (int y = pos0.y; y < pos1.y; y++) {
@@ -325,15 +355,18 @@ bool RectangleFastPath(const VertexData &v0, const VertexData &v1, BinManager &b
}
static bool AreCoordsRectangleCompatible(const RasterizerState &state, const VertexData &data0, const VertexData &data1) {
- if (!(data1.color0 == data0.color0))
+ if (data1.color0 != data0.color0)
return false;
- if (!(data1.screenpos.z == data0.screenpos.z)) {
+ if (data1.screenpos.z != data0.screenpos.z) {
// Sometimes, we don't actually care about z.
if (state.pixelID.depthWrite || state.pixelID.DepthTestFunc() != GE_COMP_ALWAYS)
return false;
}
if (!state.throughMode) {
- if (!state.throughMode && !(data1.color1 == data0.color1))
+ if (data1.color1 != data0.color1)
+ return false;
+ // This means it should be culled, outside range.
+ if (data1.OutsideRange() || data0.OutsideRange())
return false;
// Do we have to think about perspective correction or slope mip level?
if (state.enableTextures && data1.clippos.w != data0.clippos.w) {
diff --git a/GPU/Software/SoftGpu.cpp b/GPU/Software/SoftGpu.cpp
index 18cdb0b314..0f2d3bcda1 100644
--- a/GPU/Software/SoftGpu.cpp
+++ b/GPU/Software/SoftGpu.cpp
@@ -361,7 +361,7 @@ const SoftwareCommandTableEntry softgpuCommandTable[] = {
{ GE_CMD_VTCT },
{ GE_CMD_VTCQ },
{ GE_CMD_VCV },
- { GE_CMD_VAP, FLAG_EXECUTE, SoftDirty::NONE, &GPUCommon::Execute_ImmVertexAlphaPrim },
+ { GE_CMD_VAP, FLAG_EXECUTE, SoftDirty::NONE, &SoftGPU::Execute_ImmVertexAlphaPrim },
{ GE_CMD_VFC },
{ GE_CMD_VSCV },
@@ -639,6 +639,7 @@ void SoftGPU::CopyToCurrentFboFromDisplayRam(int srcwidth, int srcheight) {
}
void SoftGPU::CopyDisplayToOutput(bool reallyDirty) {
+ drawEngine_->transformUnit.Flush("output");
// The display always shows 480x272.
CopyToCurrentFboFromDisplayRam(FB_WIDTH, FB_HEIGHT);
MarkDirty(displayFramebuf_, displayStride_, 272, displayFormat_, SoftGPUVRAMDirty::CLEAR);
@@ -650,7 +651,7 @@ void SoftGPU::MarkDirty(uint32_t addr, uint32_t stride, uint32_t height, GEBuffe
}
void SoftGPU::MarkDirty(uint32_t addr, uint32_t bytes, SoftGPUVRAMDirty value) {
- // Don't bother tracking if frameskipping.
+ // Only bother tracking if frameskipping.
if (g_Config.iFrameSkip == 0)
return;
if (!Memory::IsVRAMAddress(addr) || !Memory::IsVRAMAddress(addr + bytes - 1))
@@ -1005,19 +1006,24 @@ void SoftGPU::Execute_LoadClut(u32 op, u32 diff) {
void SoftGPU::Execute_FramebufPtr(u32 op, u32 diff) {
// We assume fb.data won't change while we're drawing.
- drawEngine_->transformUnit.Flush("framebuf");
- fb.data = Memory::GetPointerWrite(gstate.getFrameBufAddress());
+ if (diff) {
+ drawEngine_->transformUnit.Flush("framebuf");
+ fb.data = Memory::GetPointerWrite(gstate.getFrameBufAddress());
+ }
}
void SoftGPU::Execute_FramebufFormat(u32 op, u32 diff) {
// We should flush, because ranges within bins may change.
- drawEngine_->transformUnit.Flush("framebuf");
+ if (diff)
+ drawEngine_->transformUnit.Flush("framebuf");
}
void SoftGPU::Execute_ZbufPtr(u32 op, u32 diff) {
// We assume depthbuf.data won't change while we're drawing.
- drawEngine_->transformUnit.Flush("depthbuf");
- depthbuf.data = Memory::GetPointerWrite(gstate.getDepthBufAddress());
+ if (diff) {
+ drawEngine_->transformUnit.Flush("depthbuf");
+ depthbuf.data = Memory::GetPointerWrite(gstate.getDepthBufAddress());
+ }
}
void SoftGPU::Execute_VertexType(u32 op, u32 diff) {
@@ -1109,6 +1115,12 @@ void SoftGPU::Execute_BoneMtxData(u32 op, u32 diff) {
gstate.boneMatrixData = GE_CMD_BONEMATRIXDATA << 24;
}
+void SoftGPU::Execute_ImmVertexAlphaPrim(u32 op, u32 diff) {
+ GPUCommon::Execute_ImmVertexAlphaPrim(op, diff);
+ // We won't flush as often as hardware renderers, so we want to flush right away.
+ FlushImm();
+}
+
void SoftGPU::Execute_Call(u32 op, u32 diff) {
PROFILE_THIS_SCOPE("gpu_call");
@@ -1138,6 +1150,18 @@ void SoftGPU::FinishDeferred() {
drawEngine_->transformUnit.Flush("finish");
}
+int SoftGPU::ListSync(int listid, int mode) {
+ // Take this as a cue that we need to finish drawing.
+ drawEngine_->transformUnit.Flush("listsync");
+ return GPUCommon::ListSync(listid, mode);
+}
+
+u32 SoftGPU::DrawSync(int mode) {
+ // Take this as a cue that we need to finish drawing.
+ drawEngine_->transformUnit.Flush("drawsync");
+ return GPUCommon::DrawSync(mode);
+}
+
void SoftGPU::GetStats(char *buffer, size_t bufsize) {
drawEngine_->transformUnit.GetStats(buffer, bufsize);
}
diff --git a/GPU/Software/SoftGpu.h b/GPU/Software/SoftGpu.h
index 11d5dd16f3..e90f7c0fb8 100644
--- a/GPU/Software/SoftGpu.h
+++ b/GPU/Software/SoftGpu.h
@@ -127,10 +127,12 @@ public:
SoftGPU(GraphicsContext *gfxCtx, Draw::DrawContext *draw);
~SoftGPU();
- void CheckGPUFeatures() override {}
+ u32 CheckGPUFeatures() const override { return 0; }
void InitClear() override {}
void ExecuteOp(u32 op, u32 diff) override;
void FinishDeferred() override;
+ int ListSync(int listid, int mode) override;
+ u32 DrawSync(int mode) override;
void SetDisplayFramebuffer(u32 framebuf, u32 stride, GEBufferFormat format) override;
void CopyDisplayToOutput(bool reallyDirty) override;
@@ -185,6 +187,8 @@ public:
void Execute_TgenMtxData(u32 op, u32 diff);
void Execute_BoneMtxData(u32 op, u32 diff);
+ void Execute_ImmVertexAlphaPrim(u32 op, u32 diff);
+
typedef void (SoftGPU::*CmdFunc)(u32 op, u32 diff);
protected:
diff --git a/GPU/Software/TransformUnit.cpp b/GPU/Software/TransformUnit.cpp
index 0cb91dd280..c1471f7172 100644
--- a/GPU/Software/TransformUnit.cpp
+++ b/GPU/Software/TransformUnit.cpp
@@ -70,14 +70,66 @@ void SoftwareDrawEngine::DispatchSubmitPrim(const void *verts, const void *inds,
transformUnit.SubmitPrimitive(verts, inds, prim, vertexCount, vertTypeID, bytesRead, this);
}
-void SoftwareDrawEngine::DispatchSubmitImm(const void *verts, const void *inds, GEPrimitiveType prim, int vertexCount, u32 vertTypeID, int cullMode, int *bytesRead) {
+void SoftwareDrawEngine::DispatchSubmitImm(GEPrimitiveType prim, TransformedVertex *buffer, int vertexCount, int cullMode, bool continuation) {
+ uint32_t vertTypeID = GetVertTypeID(gstate.vertType | GE_VTYPE_POS_FLOAT, gstate.getUVGenMode());
+
int flipCull = cullMode != gstate.getCullMode() ? 1 : 0;
// TODO: For now, just setting all dirty.
transformUnit.SetDirty(SoftDirty(-1));
gstate.cullmode ^= flipCull;
- transformUnit.SubmitPrimitive(verts, inds, prim, vertexCount, vertTypeID, bytesRead, this);
+
+ // TODO: This is a bit ugly. Should bypass when clipping...
+ uint32_t xScale = gstate.viewportxscale;
+ uint32_t xCenter = gstate.viewportxcenter;
+ uint32_t yScale = gstate.viewportyscale;
+ uint32_t yCenter = gstate.viewportycenter;
+ uint32_t zScale = gstate.viewportzscale;
+ uint32_t zCenter = gstate.viewportzcenter;
+
+ // Force scale to 1 and center to zero.
+ gstate.viewportxscale = (GE_CMD_VIEWPORTXSCALE << 24) | 0x3F8000;
+ gstate.viewportxcenter = (GE_CMD_VIEWPORTXCENTER << 24) | 0x000000;
+ gstate.viewportyscale = (GE_CMD_VIEWPORTYSCALE << 24) | 0x3F8000;
+ gstate.viewportycenter = (GE_CMD_VIEWPORTYCENTER << 24) | 0x000000;
+ // Z we scale to 65535 for neg z clipping.
+ gstate.viewportzscale = (GE_CMD_VIEWPORTZSCALE << 24) | 0x477FFF;
+ gstate.viewportzcenter = (GE_CMD_VIEWPORTZCENTER << 24) | 0x000000;
+
+ // Before we start, submit 0 prims to reset the prev prim type.
+ // Following submits will always be KEEP_PREVIOUS.
+ if (!continuation)
+ transformUnit.SubmitPrimitive(nullptr, nullptr, prim, 0, vertTypeID, nullptr, this);
+
+ for (int i = 0; i < vertexCount; i++) {
+ VertexData vert;
+ vert.clippos = ClipCoords(buffer[i].pos);
+ vert.texturecoords.x = buffer[i].u;
+ vert.texturecoords.y = buffer[i].v;
+ if (gstate.isModeThrough()) {
+ vert.texturecoords.x *= gstate.getTextureWidth(0);
+ vert.texturecoords.y *= gstate.getTextureHeight(0);
+ } else {
+ vert.clippos.z *= 1.0f / 65535.0f;
+ }
+ vert.color0 = buffer[i].color0_32;
+ vert.color1 = gstate.isUsingSecondaryColor() && !gstate.isModeThrough() ? buffer[i].color1_32 : 0;
+ vert.fogdepth = buffer[i].fog;
+ vert.screenpos.x = (int)(buffer[i].x * 16.0f);
+ vert.screenpos.y = (int)(buffer[i].y * 16.0f);
+ vert.screenpos.z = (u16)(u32)buffer[i].z;
+
+ transformUnit.SubmitImmVertex(vert, this);
+ }
+
+ gstate.viewportxscale = xScale;
+ gstate.viewportxcenter = xCenter;
+ gstate.viewportyscale = yScale;
+ gstate.viewportycenter = yCenter;
+ gstate.viewportzscale = zScale;
+ gstate.viewportzcenter = zCenter;
+
gstate.cullmode ^= flipCull;
- // TODO: Should really clear, but the vertex type is faked so things might need resetting...
+ // TODO: Should really clear, but a bunch of values are forced so we this is safest.
transformUnit.SetDirty(SoftDirty(-1));
}
@@ -273,7 +325,7 @@ void ComputeTransformState(TransformState *state, const VertexReader &vreader) {
state->roundToScreen = &ClipToScreenInternal;
}
-VertexData TransformUnit::ReadVertex(VertexReader &vreader, const TransformState &state, bool &outside_range_flag) {
+VertexData TransformUnit::ReadVertex(VertexReader &vreader, const TransformState &state) {
PROFILE_THIS_SCOPE("read_vert");
VertexData vertex;
@@ -362,9 +414,13 @@ VertexData TransformUnit::ReadVertex(VertexReader &vreader, const TransformState
#else
screenScaled = vertex.clippos.xyz() * state.screenScale / vertex.clippos.w + state.screenAdd;
#endif
+ bool outside_range_flag = false;
vertex.screenpos = state.roundToScreen(screenScaled, vertex.clippos, &outside_range_flag);
- if (outside_range_flag)
+ if (outside_range_flag) {
+ // We use this, essentially, as the flag.
+ vertex.screenpos.x = 0x7FFFFFFF;
return vertex;
+ }
if (state.enableFog) {
vertex.fogdepth = (viewpos.z + state.fogEnd) * state.fogSlope;
@@ -447,20 +503,19 @@ void TransformUnit::SubmitPrimitive(const void* vertices, const void* indices, G
if (gstate_c.skipDrawReason & SKIPDRAW_SKIPFRAME) {
return;
}
- // Throughmode never draws 8-bit primitives, maybe because they can't fully specify the screen?
- if ((vertex_type & GE_VTYPE_THROUGH_MASK) != 0 && (vertex_type & GE_VTYPE_POS_MASK) == GE_VTYPE_POS_8BIT)
- return;
// Vertices without position are just entirely culled.
+ // Note: Throughmode does draw 8-bit primitives, but positions are always zero - handled in decode.
if ((vertex_type & GE_VTYPE_POS_MASK) == 0)
return;
u16 index_lower_bound = 0;
- u16 index_upper_bound = vertex_count - 1;
+ u16 index_upper_bound = vertex_count == 0 ? 0 : vertex_count - 1;
IndexConverter ConvertIndex(vertex_type, indices);
if (indices)
GetIndexBounds(indices, vertex_count, vertex_type, &index_lower_bound, &index_upper_bound);
- vdecoder.DecodeVerts(decoded_, vertices, index_lower_bound, index_upper_bound);
+ if (vertex_count != 0)
+ vdecoder.DecodeVerts(decoded_, vertices, index_lower_bound, index_upper_bound);
VertexReader vreader(decoded_, vtxfmt, vertex_type);
@@ -471,19 +526,11 @@ void TransformUnit::SubmitPrimitive(const void* vertices, const void* indices, G
prim_type = prev_prim_;
}
- int vtcs_per_prim;
- switch (prim_type) {
- case GE_PRIM_POINTS: vtcs_per_prim = 1; break;
- case GE_PRIM_LINES: vtcs_per_prim = 2; break;
- case GE_PRIM_TRIANGLES: vtcs_per_prim = 3; break;
- case GE_PRIM_RECTANGLES: vtcs_per_prim = 2; break;
- default: vtcs_per_prim = 0; break;
- }
-
// TODO: Do this in two passes - first process the vertices (before indexing/stripping),
// then resolve the indices. This lets us avoid transforming shared vertices twice.
binner_->UpdateState(vreader.isThrough());
+ hasDraws_ = true;
static TransformState transformState;
if (binner_->HasDirty(SoftDirty::LIGHT_ALL | SoftDirty::TRANSFORM_ALL)) {
@@ -494,9 +541,17 @@ void TransformUnit::SubmitPrimitive(const void* vertices, const void* indices, G
bool skipCull = !gstate.isCullEnabled() || gstate.isModeClear();
const CullType cullType = skipCull ? CullType::OFF : (gstate.getCullMode() ? CullType::CCW : CullType::CW);
- bool outside_range_flag = false;
+ auto readVertexAt = [&](VertexReader &vreader, const TransformState &transformState, int vtx) {
+ if (indices) {
+ vreader.Goto(ConvertIndex(vtx) - index_lower_bound);
+ } else {
+ vreader.Goto(vtx);
+ }
- if (vreader.isThrough() && cullType == CullType::OFF && prim_type == GE_PRIM_TRIANGLES && data_index_ + vertex_count >= 6 && ((data_index_ + vertex_count) % 6) == 0) {
+ return ReadVertex(vreader, transformState);
+ };
+
+ if (vreader.isThrough() && cullType == CullType::OFF && prim_type == GE_PRIM_TRIANGLES && data_index_ == 0 && vertex_count >= 6 && ((vertex_count) % 6) == 0) {
// Some games send rectangles as a series of regular triangles.
// We look for this, but only in throughmode.
VertexData buf[6];
@@ -506,20 +561,7 @@ void TransformUnit::SubmitPrimitive(const void* vertices, const void* indices, G
}
for (int vtx = 0; vtx < vertex_count; ++vtx) {
- if (indices) {
- vreader.Goto(ConvertIndex(vtx) - index_lower_bound);
- } else {
- vreader.Goto(vtx);
- }
-
- buf[buf_index++] = ReadVertex(vreader, transformState, outside_range_flag);
- if (buf_index >= 3 && outside_range_flag) {
- // Cull, just pretend it didn't happen.
- buf_index -= 3;
- outside_range_flag = false;
- continue;
- }
-
+ buf[buf_index++] = readVertexAt(vreader, transformState, vtx);
if (buf_index < 6)
continue;
@@ -552,73 +594,54 @@ void TransformUnit::SubmitPrimitive(const void* vertices, const void* indices, G
return;
}
+ // Note: intentionally, these allow for the case of vertex_count == 0, but data_index_ > 0.
+ // This is used for immediate-mode primitives.
switch (prim_type) {
case GE_PRIM_POINTS:
- case GE_PRIM_LINES:
- case GE_PRIM_TRIANGLES:
- {
- for (int vtx = 0; vtx < vertex_count; ++vtx) {
- if (indices) {
- vreader.Goto(ConvertIndex(vtx) - index_lower_bound);
- } else {
- vreader.Goto(vtx);
- }
-
- data_[data_index_++] = ReadVertex(vreader, transformState, outside_range_flag);
- if (data_index_ < vtcs_per_prim) {
- // Keep reading. Note: an incomplete prim will stay read for GE_PRIM_KEEP_PREVIOUS.
- continue;
- }
-
- // Okay, we've got enough verts. Reset the index for next time.
- data_index_ = 0;
- if (outside_range_flag) {
- // Cull the prim if it was outside, and move to the next prim.
- outside_range_flag = false;
- continue;
- }
-
- switch (prim_type) {
- case GE_PRIM_TRIANGLES:
- SendTriangle(cullType, &data_[0]);
- break;
-
- case GE_PRIM_LINES:
- Clipper::ProcessLine(data_[0], data_[1], *binner_);
- break;
-
- case GE_PRIM_POINTS:
- Clipper::ProcessPoint(data_[0], *binner_);
- break;
-
- default:
- _dbg_assert_msg_(false, "Unexpected prim type: %d", prim_type);
- }
- }
- break;
+ for (int i = 0; i < data_index_; ++i)
+ Clipper::ProcessPoint(data_[i], *binner_);
+ data_index_ = 0;
+ for (int vtx = 0; vtx < vertex_count; ++vtx) {
+ data_[0] = readVertexAt(vreader, transformState, vtx);
+ Clipper::ProcessPoint(data_[0], *binner_);
}
+ break;
+
+ case GE_PRIM_LINES:
+ for (int i = 0; i < data_index_ - 1; i += 2)
+ Clipper::ProcessLine(data_[i + 0], data_[i + 1], *binner_);
+ data_index_ &= 1;
+ for (int vtx = 0; vtx < vertex_count; ++vtx) {
+ data_[data_index_++] = readVertexAt(vreader, transformState, vtx);
+ if (data_index_ == 2) {
+ Clipper::ProcessLine(data_[0], data_[1], *binner_);
+ data_index_ = 0;
+ }
+ }
+ break;
+
+ case GE_PRIM_TRIANGLES:
+ for (int vtx = 0; vtx < vertex_count; ++vtx) {
+ data_[data_index_++] = readVertexAt(vreader, transformState, vtx);
+ if (data_index_ < 3) {
+ // Keep reading. Note: an incomplete prim will stay read for GE_PRIM_KEEP_PREVIOUS.
+ continue;
+ }
+ // Okay, we've got enough verts. Reset the index for next time.
+ data_index_ = 0;
+
+ SendTriangle(cullType, &data_[0]);
+ }
+ // In case vertex_count was 0.
+ if (data_index_ >= 3) {
+ SendTriangle(cullType, &data_[0]);
+ data_index_ = 0;
+ }
+ break;
case GE_PRIM_RECTANGLES:
for (int vtx = 0; vtx < vertex_count; ++vtx) {
- if (indices) {
- vreader.Goto(ConvertIndex(vtx) - index_lower_bound);
- } else {
- vreader.Goto(vtx);
- }
-
- data_[data_index_++] = ReadVertex(vreader, transformState, outside_range_flag);
- if (outside_range_flag) {
- outside_range_flag = false;
- // Note: this is the post increment index. If odd, we set the first vert.
- if (data_index_ & 1) {
- // Skip the next one and forget this one.
- vtx++;
- data_index_--;
- } else {
- // Forget both of the last 2.
- data_index_ -= 2;
- }
- }
+ data_[data_index_++] = readVertexAt(vreader, transformState, vtx);
if (data_index_ == 4 && vreader.isThrough() && cullType == CullType::OFF) {
if (Rasterizer::DetectRectangleThroughModeSlices(binner_->State(), data_)) {
@@ -646,19 +669,7 @@ void TransformUnit::SubmitPrimitive(const void* vertices, const void* indices, G
// If data_index_ is 1 or 2, etc., it means we're continuing a line strip.
int skip_count = data_index_ == 0 ? 1 : 0;
for (int vtx = 0; vtx < vertex_count; ++vtx) {
- if (indices) {
- vreader.Goto(ConvertIndex(vtx) - index_lower_bound);
- } else {
- vreader.Goto(vtx);
- }
-
- data_[(data_index_++) & 1] = ReadVertex(vreader, transformState, outside_range_flag);
- if (outside_range_flag) {
- // Drop all primitives containing the current vertex
- skip_count = 2;
- outside_range_flag = false;
- continue;
- }
+ data_[(data_index_++) & 1] = readVertexAt(vreader, transformState, vtx);
if (skip_count) {
--skip_count;
@@ -667,6 +678,9 @@ void TransformUnit::SubmitPrimitive(const void* vertices, const void* indices, G
Clipper::ProcessLine(data_[data_index_ & 1], data_[(data_index_ & 1) ^ 1], *binner_);
}
}
+ // If this is from immediate-mode drawing, we always had one new vert (already in data_.)
+ if (isImmDraw_ && data_index_ >= 2)
+ Clipper::ProcessLine(data_[data_index_ & 1], data_[(data_index_ & 1) ^ 1], *binner_);
break;
}
@@ -681,19 +695,15 @@ void TransformUnit::SubmitPrimitive(const void* vertices, const void* indices, G
if (data_index_ == 0 && vertex_count >= 4 && (vertex_count & 1) == 0 && cullType == CullType::OFF) {
for (int base = 0; base < vertex_count - 2; base += 2) {
for (int vtx = base == 0 ? 0 : 2; vtx < 4; ++vtx) {
- if (indices) {
- vreader.Goto(ConvertIndex(base + vtx) - index_lower_bound);
- } else {
- vreader.Goto(base + vtx);
- }
- data_[vtx] = ReadVertex(vreader, transformState, outside_range_flag);
+ data_[vtx] = readVertexAt(vreader, transformState, base + vtx);
}
// If a strip is effectively a rectangle, draw it as such!
int tl = -1, br = -1;
- if (!outside_range_flag && Rasterizer::DetectRectangleFromStrip(binner_->State(), data_, &tl, &br)) {
+ if (Rasterizer::DetectRectangleFromStrip(binner_->State(), data_, &tl, &br)) {
Clipper::ProcessRect(data_[tl], data_[br], *binner_);
start_vtx += 2;
+ skip_count = 0;
if (base + 4 >= vertex_count) {
start_vtx = vertex_count;
break;
@@ -710,32 +720,29 @@ void TransformUnit::SubmitPrimitive(const void* vertices, const void* indices, G
}
}
- outside_range_flag = false;
- for (int vtx = start_vtx; vtx < vertex_count; ++vtx) {
- if (indices) {
- vreader.Goto(ConvertIndex(vtx) - index_lower_bound);
- } else {
- vreader.Goto(vtx);
- }
-
+ for (int vtx = start_vtx; vtx < vertex_count && skip_count > 0; ++vtx) {
int provoking_index = (data_index_++) % 3;
- data_[provoking_index] = ReadVertex(vreader, transformState, outside_range_flag);
- if (outside_range_flag) {
- // Drop all primitives containing the current vertex
- skip_count = 2;
- outside_range_flag = false;
- continue;
- }
+ data_[provoking_index] = readVertexAt(vreader, transformState, vtx);
+ --skip_count;
+ ++start_vtx;
+ }
- if (skip_count) {
- --skip_count;
- continue;
- }
+ for (int vtx = start_vtx; vtx < vertex_count; ++vtx) {
+ int provoking_index = (data_index_++) % 3;
+ data_[provoking_index] = readVertexAt(vreader, transformState, vtx);
int wind = (data_index_ - 1) % 2;
CullType altCullType = cullType == CullType::OFF ? cullType : CullType((int)cullType ^ wind);
SendTriangle(altCullType, &data_[0], provoking_index);
}
+
+ // If this is from immediate-mode drawing, we always had one new vert (already in data_.)
+ if (isImmDraw_ && data_index_ >= 3) {
+ int provoking_index = (data_index_ - 1) % 3;
+ int wind = (data_index_ - 1) % 2;
+ CullType altCullType = cullType == CullType::OFF ? cullType : CullType((int)cullType ^ wind);
+ SendTriangle(altCullType, &data_[0], provoking_index);
+ }
break;
}
@@ -747,64 +754,47 @@ void TransformUnit::SubmitPrimitive(const void* vertices, const void* indices, G
int start_vtx = 0;
// Only read the central vertex if we're not continuing.
- if (data_index_ == 0) {
- if (indices) {
- vreader.Goto(ConvertIndex(0) - index_lower_bound);
- } else {
- vreader.Goto(0);
- }
- data_[0] = ReadVertex(vreader, transformState, outside_range_flag);
+ if (data_index_ == 0 && vertex_count > 0) {
+ data_[0] = readVertexAt(vreader, transformState, 0);
data_index_++;
start_vtx = 1;
-
- // If the central vertex is outside range, all the points are toast.
- if (outside_range_flag)
- break;
}
if (data_index_ == 1 && vertex_count == 4 && cullType == CullType::OFF) {
for (int vtx = start_vtx; vtx < vertex_count; ++vtx) {
- if (indices) {
- vreader.Goto(ConvertIndex(vtx) - index_lower_bound);
- } else {
- vreader.Goto(vtx);
- }
- data_[vtx] = ReadVertex(vreader, transformState, outside_range_flag);
+ data_[vtx] = readVertexAt(vreader, transformState, vtx);
}
int tl = -1, br = -1;
- if (!outside_range_flag && Rasterizer::DetectRectangleFromFan(binner_->State(), data_, vertex_count, &tl, &br)) {
+ if (Rasterizer::DetectRectangleFromFan(binner_->State(), data_, vertex_count, &tl, &br)) {
Clipper::ProcessRect(data_[tl], data_[br], *binner_);
break;
}
}
- outside_range_flag = false;
- for (int vtx = start_vtx; vtx < vertex_count; ++vtx) {
- if (indices) {
- vreader.Goto(ConvertIndex(vtx) - index_lower_bound);
- } else {
- vreader.Goto(vtx);
- }
-
+ for (int vtx = start_vtx; vtx < vertex_count && skip_count > 0; ++vtx) {
int provoking_index = 2 - ((data_index_++) % 2);
- data_[provoking_index] = ReadVertex(vreader, transformState, outside_range_flag);
- if (outside_range_flag) {
- // Drop all primitives containing the current vertex
- skip_count = 2;
- outside_range_flag = false;
- continue;
- }
+ data_[provoking_index] = readVertexAt(vreader, transformState, vtx);
+ --skip_count;
+ ++start_vtx;
+ }
- if (skip_count) {
- --skip_count;
- continue;
- }
+ for (int vtx = start_vtx; vtx < vertex_count; ++vtx) {
+ int provoking_index = 2 - ((data_index_++) % 2);
+ data_[provoking_index] = readVertexAt(vreader, transformState, vtx);
int wind = (data_index_ - 1) % 2;
CullType altCullType = cullType == CullType::OFF ? cullType : CullType((int)cullType ^ wind);
SendTriangle(altCullType, &data_[0], provoking_index);
}
+
+ // If this is from immediate-mode drawing, we always had one new vert (already in data_.)
+ if (isImmDraw_ && data_index_ >= 3) {
+ int wind = (data_index_ - 1) % 2;
+ int provoking_index = 2 - wind;
+ CullType altCullType = cullType == CullType::OFF ? cullType : CullType((int)cullType ^ wind);
+ SendTriangle(altCullType, &data_[0], provoking_index);
+ }
break;
}
@@ -814,6 +804,47 @@ void TransformUnit::SubmitPrimitive(const void* vertices, const void* indices, G
}
}
+void TransformUnit::SubmitImmVertex(const VertexData &vert, SoftwareDrawEngine *drawEngine) {
+ // Where we put it is different for STRIP/FAN types.
+ switch (prev_prim_) {
+ case GE_PRIM_POINTS:
+ case GE_PRIM_LINES:
+ case GE_PRIM_TRIANGLES:
+ case GE_PRIM_RECTANGLES:
+ // This is the easy one. SubmitPrimitive resets data_index_.
+ data_[data_index_++] = vert;
+ break;
+
+ case GE_PRIM_LINE_STRIP:
+ // This one alternates, and data_index_ > 0 means it draws a segment.
+ data_[(data_index_++) & 1] = vert;
+ break;
+
+ case GE_PRIM_TRIANGLE_STRIP:
+ data_[(data_index_++) % 3] = vert;
+ break;
+
+ case GE_PRIM_TRIANGLE_FAN:
+ if (data_index_ == 0) {
+ data_[data_index_++] = vert;
+ } else {
+ int provoking_index = 2 - ((data_index_++) % 2);
+ data_[provoking_index] = vert;
+ }
+ break;
+
+ default:
+ _assert_msg_(false, "Invalid prim type: %d", (int)prev_prim_);
+ break;
+ }
+
+ uint32_t vertTypeID = GetVertTypeID(gstate.vertType | GE_VTYPE_POS_FLOAT, gstate.getUVGenMode());
+ // This now processes the step with shared logic, given the existing data_.
+ isImmDraw_ = true;
+ SubmitPrimitive(nullptr, nullptr, GE_PRIM_KEEP_PREVIOUS, 0, vertTypeID, nullptr, drawEngine);
+ isImmDraw_ = false;
+}
+
void TransformUnit::SendTriangle(CullType cullType, const VertexData *verts, int provoking) {
if (cullType == CullType::OFF) {
Clipper::ProcessTriangle(verts[0], verts[1], verts[2], verts[provoking], *binner_);
@@ -826,8 +857,12 @@ void TransformUnit::SendTriangle(CullType cullType, const VertexData *verts, int
}
void TransformUnit::Flush(const char *reason) {
+ if (!hasDraws_)
+ return;
+
binner_->Flush(reason);
GPUDebug::NotifyDraw();
+ hasDraws_ = false;
}
void TransformUnit::GetStats(char *buffer, size_t bufsize) {
@@ -836,6 +871,9 @@ void TransformUnit::GetStats(char *buffer, size_t bufsize) {
}
void TransformUnit::FlushIfOverlap(const char *reason, bool modifying, uint32_t addr, uint32_t stride, uint32_t w, uint32_t h) {
+ if (!hasDraws_)
+ return;
+
if (binner_->HasPendingWrite(addr, stride, w, h))
Flush(reason);
if (modifying && binner_->HasPendingRead(addr, stride, w, h))
diff --git a/GPU/Software/TransformUnit.h b/GPU/Software/TransformUnit.h
index fd6e047350..465f4bd8ab 100644
--- a/GPU/Software/TransformUnit.h
+++ b/GPU/Software/TransformUnit.h
@@ -90,6 +90,10 @@ struct VertexData {
color1 = LerpInt, 256>(Vec3::FromRGB(a.color1), Vec3::FromRGB(b.color1), t_int).ToRGB();
}
+ bool OutsideRange() const {
+ return screenpos.x == 0x7FFFFFFF;
+ }
+
ClipCoords clippos;
Vec2 texturecoords;
uint32_t color0;
@@ -125,6 +129,7 @@ public:
static ScreenCoords DrawingToScreen(const DrawingCoords &coords, u16 z);
void SubmitPrimitive(const void* vertices, const void* indices, GEPrimitiveType prim_type, int vertex_count, u32 vertex_type, int *bytesRead, SoftwareDrawEngine *drawEngine);
+ void SubmitImmVertex(const VertexData &vert, SoftwareDrawEngine *drawEngine);
bool GetCurrentSimpleVertices(int count, std::vector &vertices, std::vector &indices);
@@ -138,7 +143,7 @@ public:
SoftDirty GetDirty();
private:
- VertexData ReadVertex(VertexReader &vreader, const TransformState &lstate, bool &outside_range_flag);
+ VertexData ReadVertex(VertexReader &vreader, const TransformState &state);
void SendTriangle(CullType cullType, const VertexData *verts, int provoking = 2);
u8 *decoded_ = nullptr;
@@ -149,6 +154,8 @@ private:
// This is the index of the next vert in data (or higher, may need modulus.)
int data_index_ = 0;
GEPrimitiveType prev_prim_ = GE_PRIM_POINTS;
+ bool hasDraws_ = false;
+ bool isImmDraw_ = false;
};
class SoftwareDrawEngine : public DrawEngineCommon {
@@ -158,7 +165,7 @@ public:
void DispatchFlush() override;
void DispatchSubmitPrim(const void *verts, const void *inds, GEPrimitiveType prim, int vertexCount, u32 vertType, int cullMode, int *bytesRead) override;
- void DispatchSubmitImm(const void *verts, const void *inds, GEPrimitiveType prim, int vertexCount, u32 vertTypeID, int cullMode, int *bytesRead) override;
+ void DispatchSubmitImm(GEPrimitiveType prim, TransformedVertex *buffer, int vertexCount, int cullMode, bool continuation) override;
VertexDecoder *FindVertexDecoder(u32 vtype);
diff --git a/GPU/Vulkan/DrawEngineVulkan.cpp b/GPU/Vulkan/DrawEngineVulkan.cpp
index e8ebd485d1..f992df082f 100644
--- a/GPU/Vulkan/DrawEngineVulkan.cpp
+++ b/GPU/Vulkan/DrawEngineVulkan.cpp
@@ -71,6 +71,7 @@ enum {
DRAW_BINDING_TESS_STORAGE_BUF = 6,
DRAW_BINDING_TESS_STORAGE_BUF_WU = 7,
DRAW_BINDING_TESS_STORAGE_BUF_WV = 8,
+ DRAW_BINDING_INPUT_ATTACHMENT = 9,
};
enum {
@@ -94,7 +95,10 @@ DrawEngineVulkan::DrawEngineVulkan(Draw::DrawContext *draw)
void DrawEngineVulkan::InitDeviceObjects() {
// All resources we need for PSP drawing. Usually only bindings 0 and 2-4 are populated.
- VkDescriptorSetLayoutBinding bindings[9]{};
+
+ // TODO: Make things more flexible, so we at least have specialized layouts for input attachments and tess.
+ // Note that it becomes a support matrix..
+ VkDescriptorSetLayoutBinding bindings[10]{};
bindings[0].descriptorCount = 1;
bindings[0].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
bindings[0].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT;
@@ -132,6 +136,10 @@ void DrawEngineVulkan::InitDeviceObjects() {
bindings[8].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
bindings[8].stageFlags = VK_SHADER_STAGE_VERTEX_BIT;
bindings[8].binding = DRAW_BINDING_TESS_STORAGE_BUF_WV;
+ bindings[9].descriptorCount = 1;
+ bindings[9].descriptorType = VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT;
+ bindings[9].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT;
+ bindings[9].binding = DRAW_BINDING_INPUT_ATTACHMENT;
VulkanContext *vulkan = (VulkanContext *)draw_->GetNativeObject(Draw::NativeObject::CONTEXT);
VkDevice device = vulkan->GetDevice();
@@ -145,13 +153,15 @@ void DrawEngineVulkan::InitDeviceObjects() {
static constexpr int DEFAULT_DESC_POOL_SIZE = 512;
std::vector dpTypes;
- dpTypes.resize(3);
+ dpTypes.resize(4);
dpTypes[0].descriptorCount = DEFAULT_DESC_POOL_SIZE * 3;
dpTypes[0].type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC;
dpTypes[1].descriptorCount = DEFAULT_DESC_POOL_SIZE * 3; // Don't use these for tess anymore, need max three per set.
dpTypes[1].type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
dpTypes[2].descriptorCount = DEFAULT_DESC_POOL_SIZE * 3; // TODO: Use a separate layout when no spline stuff is needed to reduce the need for these.
dpTypes[2].type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
+ dpTypes[3].descriptorCount = DEFAULT_DESC_POOL_SIZE; // TODO: Use a separate layout when no spline stuff is needed to reduce the need for these.
+ dpTypes[3].type = VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT;
VkDescriptorPoolCreateInfo dp{ VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO };
// Don't want to mess around with individually freeing these.
@@ -379,6 +389,7 @@ VkDescriptorSet DrawEngineVulkan::GetOrCreateDescriptorSet(VkImageView imageView
key.base_ = base;
key.light_ = light;
key.bone_ = bone;
+ key.secondaryIsInputAttachment = boundSecondaryIsInputAttachment_;
FrameData &frame = GetCurFrame();
// See if we already have this descriptor set cached.
@@ -417,15 +428,15 @@ VkDescriptorSet DrawEngineVulkan::GetOrCreateDescriptorSet(VkImageView imageView
}
if (boundSecondary_) {
- tex[1].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
+ tex[1].imageLayout = key.secondaryIsInputAttachment ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
tex[1].imageView = boundSecondary_;
tex[1].sampler = samplerSecondaryNearest_;
writes[n].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
writes[n].pNext = nullptr;
- writes[n].dstBinding = DRAW_BINDING_2ND_TEXTURE;
+ writes[n].dstBinding = key.secondaryIsInputAttachment ? DRAW_BINDING_INPUT_ATTACHMENT : DRAW_BINDING_2ND_TEXTURE;
writes[n].pImageInfo = &tex[1];
writes[n].descriptorCount = 1;
- writes[n].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
+ writes[n].descriptorType = key.secondaryIsInputAttachment ? VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT : VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
writes[n].dstSet = desc;
n++;
}
@@ -788,7 +799,7 @@ void DrawEngineVulkan::DoFlush() {
lastRenderStepId_ = curRenderStepId;
}
- renderManager->BindPipeline(pipeline->pipeline, (PipelineFlags)pipeline->flags, pipelineLayout_);
+ renderManager->BindPipeline(pipeline->pipeline, pipeline->pipelineFlags, pipelineLayout_);
if (pipeline != lastPipeline_) {
if (lastPipeline_ && !(lastPipeline_->UsesBlendConstant() && pipeline->UsesBlendConstant())) {
gstate_c.Dirty(DIRTY_BLEND_STATE);
@@ -916,7 +927,7 @@ void DrawEngineVulkan::DoFlush() {
lastRenderStepId_ = curRenderStepId;
}
- renderManager->BindPipeline(pipeline->pipeline, (PipelineFlags)pipeline->flags, pipelineLayout_);
+ renderManager->BindPipeline(pipeline->pipeline, pipeline->pipelineFlags, pipelineLayout_);
if (pipeline != lastPipeline_) {
if (lastPipeline_ && !lastPipeline_->UsesBlendConstant() && pipeline->UsesBlendConstant()) {
gstate_c.Dirty(DIRTY_BLEND_STATE);
diff --git a/GPU/Vulkan/DrawEngineVulkan.h b/GPU/Vulkan/DrawEngineVulkan.h
index 531e05c4ed..0de67940de 100644
--- a/GPU/Vulkan/DrawEngineVulkan.h
+++ b/GPU/Vulkan/DrawEngineVulkan.h
@@ -217,6 +217,8 @@ private:
// Secondary texture for shader blending
VkImageView boundSecondary_ = VK_NULL_HANDLE;
+ bool boundSecondaryIsInputAttachment_ = false;
+
// CLUT texture for shader depal
VkImageView boundDepal_ = VK_NULL_HANDLE;
bool boundDepalSmoothed_ = false;
@@ -234,6 +236,7 @@ private:
VkSampler sampler_;
VkBuffer base_, light_, bone_; // All three UBO slots will be set to this. This will usually be identical
// for all draws in a frame, except when the buffer has to grow.
+ bool secondaryIsInputAttachment;
};
// We alternate between these.
@@ -281,7 +284,7 @@ private:
VulkanDynamicState dynState_{};
int tessOffset_ = 0;
- bool fboTexNeedsBind_ = false;
+ FBOTexState fboTexBindState_ = FBO_TEX_NONE;
// Hardware tessellation
TessellationDataTransferVulkan *tessDataTransferVulkan;
diff --git a/GPU/Vulkan/FramebufferManagerVulkan.h b/GPU/Vulkan/FramebufferManagerVulkan.h
index 0f5d7c4f53..d3370fafb7 100644
--- a/GPU/Vulkan/FramebufferManagerVulkan.h
+++ b/GPU/Vulkan/FramebufferManagerVulkan.h
@@ -33,7 +33,7 @@ class VulkanPushBuffer;
class FramebufferManagerVulkan : public FramebufferManagerCommon {
public:
- FramebufferManagerVulkan(Draw::DrawContext *draw);
+ explicit FramebufferManagerVulkan(Draw::DrawContext *draw);
~FramebufferManagerVulkan();
// If within a render pass, this will just issue a regular clear. If beginning a new render pass,
diff --git a/GPU/Vulkan/GPU_Vulkan.cpp b/GPU/Vulkan/GPU_Vulkan.cpp
index f96d282fc7..b38201c8c5 100644
--- a/GPU/Vulkan/GPU_Vulkan.cpp
+++ b/GPU/Vulkan/GPU_Vulkan.cpp
@@ -52,7 +52,7 @@
GPU_Vulkan::GPU_Vulkan(GraphicsContext *gfxCtx, Draw::DrawContext *draw)
: GPUCommon(gfxCtx, draw), drawEngine_(draw) {
- CheckGPUFeatures();
+ gstate_c.featureFlags = CheckGPUFeatures();
VulkanContext *vulkan = (VulkanContext *)gfxCtx->GetAPIContext();
@@ -182,8 +182,8 @@ GPU_Vulkan::~GPU_Vulkan() {
delete framebufferManagerVulkan_;
}
-void GPU_Vulkan::CheckGPUFeatures() {
- uint32_t features = 0;
+u32 GPU_Vulkan::CheckGPUFeatures() const {
+ uint32_t features = GPUCommon::CheckGPUFeatures();
VulkanContext *vulkan = (VulkanContext *)draw_->GetNativeObject(Draw::NativeObject::CONTEXT);
switch (vulkan->GetPhysicalDeviceProperties().properties.vendorID) {
@@ -222,44 +222,14 @@ void GPU_Vulkan::CheckGPUFeatures() {
// Mandatory features on Vulkan, which may be checked in "centralized" code
features |= GPU_SUPPORTS_TEXTURE_LOD_CONTROL;
- features |= GPU_SUPPORTS_BLEND_MINMAX;
- features |= GPU_SUPPORTS_TEXTURE_NPOT;
features |= GPU_SUPPORTS_INSTANCE_RENDERING;
features |= GPU_SUPPORTS_VERTEX_TEXTURE_FETCH;
features |= GPU_SUPPORTS_TEXTURE_FLOAT;
- features |= GPU_SUPPORTS_DEPTH_TEXTURE;
auto &enabledFeatures = vulkan->GetDeviceFeatures().enabled;
if (enabledFeatures.depthClamp) {
features |= GPU_SUPPORTS_DEPTH_CLAMP;
}
- if (enabledFeatures.shaderClipDistance) {
- features |= GPU_SUPPORTS_CLIP_DISTANCE;
- }
- if (enabledFeatures.shaderCullDistance) {
- // Must support at least 8 if feature supported, so we're fine.
- features |= GPU_SUPPORTS_CULL_DISTANCE;
- }
- if (!draw_->GetBugs().Has(Draw::Bugs::BROKEN_NAN_IN_CONDITIONAL)) {
- // Ignore the compat setting if clip and cull are both enabled.
- // When supported, we can do the depth side of range culling more correctly.
- const bool supported = draw_->GetDeviceCaps().clipDistanceSupported && draw_->GetDeviceCaps().cullDistanceSupported;
- const bool disabled = PSP_CoreParameter().compat.flags().DisableRangeCulling;
- if (supported || !disabled) {
- features |= GPU_SUPPORTS_VS_RANGE_CULLING;
- }
- }
- if (enabledFeatures.dualSrcBlend) {
- if (!g_Config.bVendorBugChecksEnabled || !draw_->GetBugs().Has(Draw::Bugs::DUAL_SOURCE_BLENDING_BROKEN)) {
- features |= GPU_SUPPORTS_DUALSOURCE_BLEND;
- }
- }
- if (draw_->GetDeviceCaps().logicOpSupported) {
- features |= GPU_SUPPORTS_LOGIC_OP;
- }
- if (draw_->GetDeviceCaps().anisoSupported) {
- features |= GPU_SUPPORTS_ANISOTROPY;
- }
// These are VULKAN_4444_FORMAT and friends.
uint32_t fmt4444 = draw_->GetDataFormatSupport(Draw::DataFormat::B4G4R4A4_UNORM_PACK16);
@@ -275,10 +245,6 @@ void GPU_Vulkan::CheckGPUFeatures() {
INFO_LOG(G3D, "Deficient texture format support: 4444: %d 1555: %d 565: %d", fmt4444, fmt1555, fmt565);
}
- if (PSP_CoreParameter().compat.flags().ClearToRAM) {
- features |= GPU_USE_CLEAR_RAM_HACK;
- }
-
if (!g_Config.bHighQualityDepth && (features & GPU_SUPPORTS_ACCURATE_DEPTH) != 0) {
features |= GPU_SCALE_DEPTH_FROM_24BIT_TO_16BIT;
}
@@ -290,7 +256,7 @@ void GPU_Vulkan::CheckGPUFeatures() {
features |= GPU_ROUND_DEPTH_TO_16BIT;
}
- gstate_c.featureFlags = features;
+ return features;
}
void GPU_Vulkan::BeginHostFrame() {
@@ -298,7 +264,7 @@ void GPU_Vulkan::BeginHostFrame() {
UpdateCmdInfo();
if (resized_) {
- CheckGPUFeatures();
+ gstate_c.featureFlags = CheckGPUFeatures();
// In case the GPU changed.
BuildReportingInfo();
framebufferManager_->Resized();
@@ -537,7 +503,7 @@ void GPU_Vulkan::DeviceRestore() {
GPUCommon::DeviceRestore();
InitDeviceObjects();
- CheckGPUFeatures();
+ gstate_c.featureFlags = CheckGPUFeatures();
BuildReportingInfo();
UpdateCmdInfo();
diff --git a/GPU/Vulkan/GPU_Vulkan.h b/GPU/Vulkan/GPU_Vulkan.h
index 3c13d57621..3fbd329aae 100644
--- a/GPU/Vulkan/GPU_Vulkan.h
+++ b/GPU/Vulkan/GPU_Vulkan.h
@@ -38,7 +38,7 @@ public:
~GPU_Vulkan();
// This gets called on startup and when we get back from settings.
- void CheckGPUFeatures() override;
+ u32 CheckGPUFeatures() const override;
bool IsReady() override;
void CancelReady() override;
diff --git a/GPU/Vulkan/PipelineManagerVulkan.cpp b/GPU/Vulkan/PipelineManagerVulkan.cpp
index a88fe235a5..e950dfc457 100644
--- a/GPU/Vulkan/PipelineManagerVulkan.cpp
+++ b/GPU/Vulkan/PipelineManagerVulkan.cpp
@@ -170,7 +170,7 @@ static std::string CutFromMain(std::string str) {
}
static VulkanPipeline *CreateVulkanPipeline(VulkanRenderManager *renderManager, VkPipelineCache pipelineCache,
- VkPipelineLayout layout, const VulkanPipelineRasterStateKey &key,
+ VkPipelineLayout layout, PipelineFlags pipelineFlags, const VulkanPipelineRasterStateKey &key,
const DecVtxFormat *decFmt, VulkanVertexShader *vs, VulkanFragmentShader *fs, bool useHwTransform, u32 variantBitmask) {
VulkanPipeline *vulkanPipeline = new VulkanPipeline();
VKRGraphicsPipelineDesc *desc = &vulkanPipeline->desc;
@@ -299,14 +299,14 @@ static VulkanPipeline *CreateVulkanPipeline(VulkanRenderManager *renderManager,
VKRGraphicsPipeline *pipeline = renderManager->CreateGraphicsPipeline(desc, variantBitmask, "game");
vulkanPipeline->pipeline = pipeline;
- vulkanPipeline->flags = 0;
if (useBlendConstant)
- vulkanPipeline->flags |= PIPELINE_FLAG_USES_BLEND_CONSTANT;
+ pipelineFlags |= PipelineFlags::USES_BLEND_CONSTANT;
if (key.topology == VK_PRIMITIVE_TOPOLOGY_LINE_LIST || key.topology == VK_PRIMITIVE_TOPOLOGY_LINE_STRIP)
- vulkanPipeline->flags |= PIPELINE_FLAG_USES_LINES;
+ pipelineFlags |= PipelineFlags::USES_LINES;
if (dss.depthTestEnable || dss.stencilTestEnable) {
- vulkanPipeline->flags |= PIPELINE_FLAG_USES_DEPTH_STENCIL;
+ pipelineFlags |= PipelineFlags::USES_DEPTH_STENCIL;
}
+ vulkanPipeline->pipelineFlags = pipelineFlags;
return vulkanPipeline;
}
@@ -329,8 +329,13 @@ VulkanPipeline *PipelineManagerVulkan::GetOrCreatePipeline(VulkanRenderManager *
if (iter)
return iter;
+ PipelineFlags pipelineFlags = (PipelineFlags)0;
+ if (fs->Flags() & FragmentShaderFlags::INPUT_ATTACHMENT) {
+ pipelineFlags |= PipelineFlags::USES_INPUT_ATTACHMENT;
+ }
+
VulkanPipeline *pipeline = CreateVulkanPipeline(
- renderManager, pipelineCache_, layout,
+ renderManager, pipelineCache_, layout, pipelineFlags,
rasterKey, decFmt, vs, fs, useHwTransform, variantBitmask);
pipelines_.Insert(key, pipeline);
diff --git a/GPU/Vulkan/PipelineManagerVulkan.h b/GPU/Vulkan/PipelineManagerVulkan.h
index af32aa81bd..08907e3b3e 100644
--- a/GPU/Vulkan/PipelineManagerVulkan.h
+++ b/GPU/Vulkan/PipelineManagerVulkan.h
@@ -55,11 +55,12 @@ struct VulkanPipelineKey {
struct VulkanPipeline {
VKRGraphicsPipeline *pipeline;
VKRGraphicsPipelineDesc desc;
- int flags; // PipelineFlags enum above.
+ PipelineFlags pipelineFlags; // PipelineFlags enum above.
- bool UsesBlendConstant() const { return (flags & PIPELINE_FLAG_USES_BLEND_CONSTANT) != 0; }
- bool UsesLines() const { return (flags & PIPELINE_FLAG_USES_LINES) != 0; }
- bool UsesDepthStencil() const { return (flags & PIPELINE_FLAG_USES_DEPTH_STENCIL) != 0; }
+ bool UsesBlendConstant() const { return (pipelineFlags & PipelineFlags::USES_BLEND_CONSTANT) != 0; }
+ bool UsesLines() const { return (pipelineFlags & PipelineFlags::USES_LINES) != 0; }
+ bool UsesDepthStencil() const { return (pipelineFlags & PipelineFlags::USES_DEPTH_STENCIL) != 0; }
+ bool UsesInputAttachment() const { return (pipelineFlags & PipelineFlags::USES_INPUT_ATTACHMENT) != 0; }
u32 GetVariantsBitmask() const;
};
diff --git a/GPU/Vulkan/StateMappingVulkan.cpp b/GPU/Vulkan/StateMappingVulkan.cpp
index 6457b99ad9..290c7b010c 100644
--- a/GPU/Vulkan/StateMappingVulkan.cpp
+++ b/GPU/Vulkan/StateMappingVulkan.cpp
@@ -153,7 +153,7 @@ void DrawEngineVulkan::ConvertStateToVulkanKey(FramebufferManagerVulkan &fbManag
GenericLogicState &logicState = pipelineState_.logicState;
if (pipelineState_.FramebufferRead()) {
- ApplyFramebufferRead(&fboTexNeedsBind_);
+ ApplyFramebufferRead(&fboTexBindState_);
// The shader takes over the responsibility for blending, so recompute.
// We might still end up using blend to write something to alpha.
ApplyStencilReplaceAndLogicOpIgnoreBlend(blendState.replaceAlphaWithStencil, blendState);
@@ -364,15 +364,23 @@ void DrawEngineVulkan::BindShaderBlendTex() {
// TODO: At this point, we know if the vertices are full alpha or not.
// Set the nearest/linear here (since we correctly know if alpha/color tests are needed)?
if (!gstate.isModeClear()) {
- if (fboTexNeedsBind_) {
+ if (fboTexBindState_ == FBO_TEX_COPY_BIND_TEX) {
bool bindResult = framebufferManager_->BindFramebufferAsColorTexture(1, framebufferManager_->GetCurrentRenderVFB(), BINDFBCOLOR_MAY_COPY);
_dbg_assert_(bindResult);
boundSecondary_ = (VkImageView)draw_->GetNativeObject(Draw::NativeObject::BOUND_TEXTURE1_IMAGEVIEW);
+ boundSecondaryIsInputAttachment_ = false;
fboTexBound_ = true;
- fboTexNeedsBind_ = false;
+ fboTexBindState_ = FBO_TEX_NONE;
// Must dirty blend state here so we re-copy next time. Example: Lunar's spell effects.
dirtyRequiresRecheck_ |= DIRTY_BLEND_STATE;
+ } else if (fboTexBindState_ == FBO_TEX_READ_FRAMEBUFFER) {
+ draw_->BindCurrentFramebufferForColorInput();
+ boundSecondary_ = (VkImageView)draw_->GetNativeObject(Draw::NativeObject::BOUND_FRAMEBUFFER_COLOR_IMAGEVIEW);
+ boundSecondaryIsInputAttachment_ = true;
+ fboTexBindState_ = FBO_TEX_NONE;
+ } else {
+ boundSecondary_ = VK_NULL_HANDLE;
}
}
}
diff --git a/SDL/SDLVulkanGraphicsContext.cpp b/SDL/SDLVulkanGraphicsContext.cpp
index 433f35b03f..b7eb932b11 100644
--- a/SDL/SDLVulkanGraphicsContext.cpp
+++ b/SDL/SDLVulkanGraphicsContext.cpp
@@ -116,7 +116,7 @@ bool SDLVulkanGraphicsContext::Init(SDL_Window *&window, int x, int y, int mode,
return false;
}
- draw_ = Draw::T3DCreateVulkanContext(vulkan_, false);
+ draw_ = Draw::T3DCreateVulkanContext(vulkan_);
SetGPUBackend(GPUBackend::VULKAN);
bool success = draw_->CreatePresets();
_assert_(success);
diff --git a/UI/ComboKeyMappingScreen.cpp b/UI/ComboKeyMappingScreen.cpp
index 3a4bc277d1..d4c8a814b6 100644
--- a/UI/ComboKeyMappingScreen.cpp
+++ b/UI/ComboKeyMappingScreen.cpp
@@ -56,6 +56,8 @@ public:
parent->Add(scroll);
}
+ const char *tag() const override { return "ButtonShape"; }
+
private:
int *setting_;
};
@@ -84,6 +86,8 @@ public:
parent->Add(scroll);
}
+ const char *tag() const override { return "ButtonIcon"; }
+
private:
int *setting_;
};
diff --git a/UI/ComboKeyMappingScreen.h b/UI/ComboKeyMappingScreen.h
index 6eb271211d..c2ed6396d8 100644
--- a/UI/ComboKeyMappingScreen.h
+++ b/UI/ComboKeyMappingScreen.h
@@ -28,6 +28,8 @@ class ComboKeyScreen : public UIDialogScreenWithBackground {
public:
ComboKeyScreen(int id): id_(id) {}
+ const char *tag() const override { return "ComboKey"; }
+
void CreateViews() override;
void onFinish(DialogResult result) override;
diff --git a/UI/ControlMappingScreen.cpp b/UI/ControlMappingScreen.cpp
index 7acdd36f05..f6255ad4e3 100644
--- a/UI/ControlMappingScreen.cpp
+++ b/UI/ControlMappingScreen.cpp
@@ -313,7 +313,7 @@ UI::EventReturn ControlMappingScreen::OnVisualizeMapping(UI::EventParams ¶ms
}
void ControlMappingScreen::dialogFinished(const Screen *dialog, DialogResult result) {
- if (result == DR_OK && dialog->tag() == "listpopup") {
+ if (result == DR_OK && std::string(dialog->tag()) == "listpopup") {
ListPopupScreen *popup = (ListPopupScreen *)dialog;
KeyMap::AutoConfForPad(popup->GetChoiceString());
}
diff --git a/UI/ControlMappingScreen.h b/UI/ControlMappingScreen.h
index add89bfdfa..29f1360fbe 100644
--- a/UI/ControlMappingScreen.h
+++ b/UI/ControlMappingScreen.h
@@ -35,7 +35,7 @@ class SingleControlMapper;
class ControlMappingScreen : public UIDialogScreenWithBackground {
public:
ControlMappingScreen() {}
- std::string tag() const override { return "control mapping"; }
+ const char *tag() const override { return "ControlMapping"; }
protected:
void CreateViews() override;
@@ -47,7 +47,7 @@ private:
UI::EventReturn OnAutoConfigure(UI::EventParams ¶ms);
UI::EventReturn OnVisualizeMapping(UI::EventParams ¶ms);
- virtual void dialogFinished(const Screen *dialog, DialogResult result) override;
+ void dialogFinished(const Screen *dialog, DialogResult result) override;
UI::ScrollView *rightScroll_;
std::vector mappers_;
@@ -61,17 +61,19 @@ public:
pspBtn_ = btn;
}
- virtual bool key(const KeyInput &key) override;
- virtual bool axis(const AxisInput &axis) override;
+ const char *tag() const override { return "KeyMappingNewKey"; }
+
+ bool key(const KeyInput &key) override;
+ bool axis(const AxisInput &axis) override;
void SetDelay(float t);
protected:
void CreatePopupContents(UI::ViewGroup *parent) override;
- virtual bool FillVertical() const override { return false; }
- virtual bool ShowButtons() const override { return true; }
- virtual void OnCompleted(DialogResult result) override {}
+ bool FillVertical() const override { return false; }
+ bool ShowButtons() const override { return true; }
+ void OnCompleted(DialogResult result) override {}
private:
int pspBtn_;
@@ -87,6 +89,8 @@ public:
pspBtn_ = btn;
}
+ const char *tag() const override { return "KeyMappingNewMouseKey"; }
+
bool key(const KeyInput &key) override;
bool axis(const AxisInput &axis) override;
@@ -114,6 +118,8 @@ public:
void update() override;
+ const char *tag() const override { return "AnalogSetup"; }
+
protected:
void CreateViews() override;
@@ -144,6 +150,8 @@ public:
bool key(const KeyInput &key) override;
bool axis(const AxisInput &axis) override;
+ const char *tag() const override { return "TouchTest"; }
+
protected:
struct TrackedTouch {
int id;
@@ -171,6 +179,8 @@ class VisualMappingScreen : public UIDialogScreenWithBackground {
public:
VisualMappingScreen() {}
+ const char *tag() const override { return "VisualMapping"; }
+
protected:
void CreateViews() override;
diff --git a/UI/CwCheatScreen.h b/UI/CwCheatScreen.h
index e2e167c771..27a28a1b51 100644
--- a/UI/CwCheatScreen.h
+++ b/UI/CwCheatScreen.h
@@ -41,6 +41,8 @@ public:
void update() override;
void onFinish(DialogResult result) override;
+ const char *tag() const override { return "CwCheat"; }
+
protected:
void CreateViews() override;
diff --git a/UI/DevScreens.cpp b/UI/DevScreens.cpp
index a804eb98f0..ce5bd3fada 100644
--- a/UI/DevScreens.cpp
+++ b/UI/DevScreens.cpp
@@ -85,7 +85,7 @@ static const char *logLevelList[] = {
"Verb."
};
-void DevMenu::CreatePopupContents(UI::ViewGroup *parent) {
+void DevMenuScreen::CreatePopupContents(UI::ViewGroup *parent) {
using namespace UI;
auto dev = GetI18NCategory("Developer");
auto sy = GetI18NCategory("System");
@@ -94,25 +94,25 @@ void DevMenu::CreatePopupContents(UI::ViewGroup *parent) {
LinearLayout *items = new LinearLayout(ORIENT_VERTICAL);
#if !defined(MOBILE_DEVICE)
- items->Add(new Choice(dev->T("Log View")))->OnClick.Handle(this, &DevMenu::OnLogView);
+ items->Add(new Choice(dev->T("Log View")))->OnClick.Handle(this, &DevMenuScreen::OnLogView);
#endif
- items->Add(new Choice(dev->T("Logging Channels")))->OnClick.Handle(this, &DevMenu::OnLogConfig);
- items->Add(new Choice(sy->T("Developer Tools")))->OnClick.Handle(this, &DevMenu::OnDeveloperTools);
- items->Add(new Choice(dev->T("Jit Compare")))->OnClick.Handle(this, &DevMenu::OnJitCompare);
- items->Add(new Choice(dev->T("Shader Viewer")))->OnClick.Handle(this, &DevMenu::OnShaderView);
+ items->Add(new Choice(dev->T("Logging Channels")))->OnClick.Handle(this, &DevMenuScreen::OnLogConfig);
+ items->Add(new Choice(sy->T("Developer Tools")))->OnClick.Handle(this, &DevMenuScreen::OnDeveloperTools);
+ items->Add(new Choice(dev->T("Jit Compare")))->OnClick.Handle(this, &DevMenuScreen::OnJitCompare);
+ items->Add(new Choice(dev->T("Shader Viewer")))->OnClick.Handle(this, &DevMenuScreen::OnShaderView);
if (g_Config.iGPUBackend == (int)GPUBackend::VULKAN) {
// TODO: Make a new allocator visualizer for VMA.
// items->Add(new CheckBox(&g_Config.bShowAllocatorDebug, dev->T("Allocator Viewer")));
items->Add(new CheckBox(&g_Config.bShowGpuProfile, dev->T("GPU Profile")));
}
- items->Add(new Choice(dev->T("Toggle Freeze")))->OnClick.Handle(this, &DevMenu::OnFreezeFrame);
- items->Add(new Choice(dev->T("Dump Frame GPU Commands")))->OnClick.Handle(this, &DevMenu::OnDumpFrame);
- items->Add(new Choice(dev->T("Toggle Audio Debug")))->OnClick.Handle(this, &DevMenu::OnToggleAudioDebug);
+ items->Add(new Choice(dev->T("Toggle Freeze")))->OnClick.Handle(this, &DevMenuScreen::OnFreezeFrame);
+ items->Add(new Choice(dev->T("Dump Frame GPU Commands")))->OnClick.Handle(this, &DevMenuScreen::OnDumpFrame);
+ items->Add(new Choice(dev->T("Toggle Audio Debug")))->OnClick.Handle(this, &DevMenuScreen::OnToggleAudioDebug);
#ifdef USE_PROFILER
items->Add(new CheckBox(&g_Config.bShowFrameProfiler, dev->T("Frame Profiler"), ""));
#endif
items->Add(new CheckBox(&g_Config.bDrawFrameGraph, dev->T("Draw Frametimes Graph")));
- items->Add(new Choice(dev->T("Reset limited logging")))->OnClick.Handle(this, &DevMenu::OnResetLimitedLogging);
+ items->Add(new Choice(dev->T("Reset limited logging")))->OnClick.Handle(this, &DevMenuScreen::OnResetLimitedLogging);
scroll->Add(items);
parent->Add(scroll);
@@ -123,48 +123,48 @@ void DevMenu::CreatePopupContents(UI::ViewGroup *parent) {
}
}
-UI::EventReturn DevMenu::OnToggleAudioDebug(UI::EventParams &e) {
+UI::EventReturn DevMenuScreen::OnToggleAudioDebug(UI::EventParams &e) {
g_Config.bShowAudioDebug = !g_Config.bShowAudioDebug;
return UI::EVENT_DONE;
}
-UI::EventReturn DevMenu::OnResetLimitedLogging(UI::EventParams &e) {
+UI::EventReturn DevMenuScreen::OnResetLimitedLogging(UI::EventParams &e) {
Reporting::ResetCounts();
return UI::EVENT_DONE;
}
-UI::EventReturn DevMenu::OnLogView(UI::EventParams &e) {
+UI::EventReturn DevMenuScreen::OnLogView(UI::EventParams &e) {
UpdateUIState(UISTATE_PAUSEMENU);
screenManager()->push(new LogScreen());
return UI::EVENT_DONE;
}
-UI::EventReturn DevMenu::OnLogConfig(UI::EventParams &e) {
+UI::EventReturn DevMenuScreen::OnLogConfig(UI::EventParams &e) {
UpdateUIState(UISTATE_PAUSEMENU);
screenManager()->push(new LogConfigScreen());
return UI::EVENT_DONE;
}
-UI::EventReturn DevMenu::OnDeveloperTools(UI::EventParams &e) {
+UI::EventReturn DevMenuScreen::OnDeveloperTools(UI::EventParams &e) {
UpdateUIState(UISTATE_PAUSEMENU);
screenManager()->push(new DeveloperToolsScreen());
return UI::EVENT_DONE;
}
-UI::EventReturn DevMenu::OnJitCompare(UI::EventParams &e) {
+UI::EventReturn DevMenuScreen::OnJitCompare(UI::EventParams &e) {
UpdateUIState(UISTATE_PAUSEMENU);
screenManager()->push(new JitCompareScreen());
return UI::EVENT_DONE;
}
-UI::EventReturn DevMenu::OnShaderView(UI::EventParams &e) {
+UI::EventReturn DevMenuScreen::OnShaderView(UI::EventParams &e) {
UpdateUIState(UISTATE_PAUSEMENU);
if (gpu) // Avoid crashing if chosen while the game is being loaded.
screenManager()->push(new ShaderListScreen());
return UI::EVENT_DONE;
}
-UI::EventReturn DevMenu::OnFreezeFrame(UI::EventParams &e) {
+UI::EventReturn DevMenuScreen::OnFreezeFrame(UI::EventParams &e) {
if (PSP_CoreParameter().frozen) {
PSP_CoreParameter().frozen = false;
} else {
@@ -173,12 +173,12 @@ UI::EventReturn DevMenu::OnFreezeFrame(UI::EventParams &e) {
return UI::EVENT_DONE;
}
-UI::EventReturn DevMenu::OnDumpFrame(UI::EventParams &e) {
+UI::EventReturn DevMenuScreen::OnDumpFrame(UI::EventParams &e) {
gpu->DumpNextFrame();
return UI::EVENT_DONE;
}
-void DevMenu::dialogFinished(const Screen *dialog, DialogResult result) {
+void DevMenuScreen::dialogFinished(const Screen *dialog, DialogResult result) {
UpdateUIState(UISTATE_INGAME);
// Close when a subscreen got closed.
// TODO: a bug in screenmanager causes this not to work here.
@@ -514,7 +514,15 @@ void SystemInfoScreen::CreateViews() {
const std::string apiNameKey = draw->GetInfoString(InfoField::APINAME);
const char *apiName = gr->T(apiNameKey);
deviceSpecs->Add(new InfoItem(si->T("3D API"), apiName));
- deviceSpecs->Add(new InfoItem(si->T("Vendor"), draw->GetInfoString(InfoField::VENDORSTRING)));
+
+ // TODO: Not really vendor, on most APIs it's a device name (GL calls it vendor though).
+ std::string vendorString;
+ if (draw->GetDeviceCaps().deviceID != 0) {
+ vendorString = StringFromFormat("%s (%08x)", draw->GetInfoString(InfoField::VENDORSTRING).c_str(), draw->GetDeviceCaps().deviceID);
+ } else {
+ vendorString = draw->GetInfoString(InfoField::VENDORSTRING);
+ }
+ deviceSpecs->Add(new InfoItem(si->T("Vendor"), vendorString));
std::string vendor = draw->GetInfoString(InfoField::VENDOR);
if (vendor.size())
deviceSpecs->Add(new InfoItem(si->T("Vendor (detected)"), vendor));
diff --git a/UI/DevScreens.h b/UI/DevScreens.h
index 4e7065307e..0df3d73ce3 100644
--- a/UI/DevScreens.h
+++ b/UI/DevScreens.h
@@ -28,9 +28,11 @@
#include "UI/MiscScreens.h"
#include "GPU/Common/ShaderCommon.h"
-class DevMenu : public PopupScreen {
+class DevMenuScreen : public PopupScreen {
public:
- DevMenu(std::shared_ptr i18n) : PopupScreen(i18n->T("Dev Tools")) {}
+ DevMenuScreen(std::shared_ptr i18n) : PopupScreen(i18n->T("Dev Tools")) {}
+
+ const char *tag() const override { return "DevMenu"; }
void CreatePopupContents(UI::ViewGroup *parent) override;
void dialogFinished(const Screen *dialog, DialogResult result) override;
@@ -50,7 +52,9 @@ protected:
class JitDebugScreen : public UIDialogScreenWithBackground {
public:
JitDebugScreen() {}
- virtual void CreateViews() override;
+ void CreateViews() override;
+
+ const char *tag() const override { return "JitDebug"; }
private:
UI::EventReturn OnEnableAll(UI::EventParams &e);
@@ -60,7 +64,9 @@ private:
class LogConfigScreen : public UIDialogScreenWithBackground {
public:
LogConfigScreen() {}
- virtual void CreateViews() override;
+ void CreateViews() override;
+
+ const char *tag() const override { return "LogConfig"; }
private:
UI::EventReturn OnToggleAll(UI::EventParams &e);
@@ -76,6 +82,8 @@ public:
void CreateViews() override;
void update() override;
+ const char *tag() const override { return "Log"; }
+
private:
void UpdateLog();
UI::EventReturn OnSubmit(UI::EventParams &e);
@@ -89,14 +97,16 @@ class LogLevelScreen : public ListPopupScreen {
public:
LogLevelScreen(const std::string &title);
-private:
- virtual void OnCompleted(DialogResult result);
+ const char *tag() const override { return "LogLevel"; }
+private:
+ void OnCompleted(DialogResult result) override;
};
class SystemInfoScreen : public UIDialogScreenWithBackground {
public:
- SystemInfoScreen() {}
+ const char *tag() const override { return "SystemInfo"; }
+
void CreateViews() override;
};
@@ -106,13 +116,15 @@ public:
memset(buttons_, 0, sizeof(buttons_));
}
- virtual bool key(const KeyInput &key) override;
+ const char *tag() const override { return "AddressPrompt"; }
+
+ bool key(const KeyInput &key) override;
UI::Event OnChoice;
protected:
- virtual void CreatePopupContents(UI::ViewGroup *parent) override;
- virtual void OnCompleted(DialogResult result) override;
+ void CreatePopupContents(UI::ViewGroup *parent) override;
+ void OnCompleted(DialogResult result) override;
UI::EventReturn OnDigitButton(UI::EventParams &e);
UI::EventReturn OnBackspace(UI::EventParams &e);
@@ -128,8 +140,9 @@ private:
class JitCompareScreen : public UIDialogScreenWithBackground {
public:
- JitCompareScreen() : currentBlock_(-1) {}
- virtual void CreateViews() override;
+ void CreateViews() override;
+
+ const char *tag() const override { return "JitCompare"; }
private:
void UpdateDisasm();
@@ -146,7 +159,7 @@ private:
UI::EventReturn OnAddressChange(UI::EventParams &e);
UI::EventReturn OnShowStats(UI::EventParams &e);
- int currentBlock_;
+ int currentBlock_ = -1;
UI::TextView *blockName_;
UI::TextEdit *blockAddr_;
@@ -158,9 +171,10 @@ private:
class ShaderListScreen : public UIDialogScreenWithBackground {
public:
- ShaderListScreen() {}
void CreateViews() override;
+ const char *tag() const override { return "ShaderList"; }
+
private:
int ListShaders(DebugShaderType shaderType, UI::LinearLayout *view);
@@ -175,6 +189,9 @@ public:
: id_(id), type_(type) {}
void CreateViews() override;
+
+ const char *tag() const override { return "ShaderView"; }
+
private:
std::string id_;
DebugShaderType type_;
@@ -188,6 +205,8 @@ public:
void CreateViews() override;
void update() override;
+ const char *tag() const override { return "FrameDumpTest"; }
+
private:
UI::EventReturn OnLoadDump(UI::EventParams &e);
diff --git a/UI/DisplayLayoutScreen.h b/UI/DisplayLayoutScreen.h
index 15835258eb..06730742d4 100644
--- a/UI/DisplayLayoutScreen.h
+++ b/UI/DisplayLayoutScreen.h
@@ -31,7 +31,7 @@ public:
virtual void dialogFinished(const Screen *dialog, DialogResult result) override;
virtual void onFinish(DialogResult reason) override;
virtual void resized() override;
- std::string tag() const override { return "display layout screen"; }
+ const char *tag() const override { return "DisplayLayout"; }
protected:
virtual UI::EventReturn OnCenter(UI::EventParams &e);
diff --git a/UI/EmuScreen.cpp b/UI/EmuScreen.cpp
index 1b98b91bb8..14ee1ced6c 100644
--- a/UI/EmuScreen.cpp
+++ b/UI/EmuScreen.cpp
@@ -955,7 +955,7 @@ void EmuScreen::CreateViews() {
UI::EventReturn EmuScreen::OnDevTools(UI::EventParams ¶ms) {
auto dev = GetI18NCategory("Developer");
- DevMenu *devMenu = new DevMenu(dev);
+ DevMenuScreen *devMenu = new DevMenuScreen(dev);
if (params.v)
devMenu->SetPopupOrigin(params.v);
screenManager()->push(devMenu);
diff --git a/UI/EmuScreen.h b/UI/EmuScreen.h
index ff41bd81a4..1fd41ffb9d 100644
--- a/UI/EmuScreen.h
+++ b/UI/EmuScreen.h
@@ -40,6 +40,8 @@ public:
EmuScreen(const Path &filename);
~EmuScreen();
+ const char *tag() const override { return "Emu"; }
+
void update() override;
void render() override;
void preRender() override;
diff --git a/UI/GPUDriverTestScreen.h b/UI/GPUDriverTestScreen.h
index f2c5796be1..38ec2c33e1 100644
--- a/UI/GPUDriverTestScreen.h
+++ b/UI/GPUDriverTestScreen.h
@@ -17,6 +17,8 @@ public:
void CreateViews() override;
void render() override;
+ const char *tag() const override { return "GPUDriverTest"; }
+
private:
void DiscardTest();
void ShaderTest();
diff --git a/UI/GameScreen.cpp b/UI/GameScreen.cpp
index 15d4073f98..edf519f46b 100644
--- a/UI/GameScreen.cpp
+++ b/UI/GameScreen.cpp
@@ -443,6 +443,7 @@ UI::EventReturn GameScreen::OnRemoveFromRecent(UI::EventParams &e) {
class SetBackgroundPopupScreen : public PopupScreen {
public:
SetBackgroundPopupScreen(const std::string &title, const Path &gamePath);
+ const char *tag() const override { return "SetBackgroundPopup"; }
protected:
bool FillVertical() const override { return false; }
diff --git a/UI/GameScreen.h b/UI/GameScreen.h
index 9379c8f134..cff0c48366 100644
--- a/UI/GameScreen.h
+++ b/UI/GameScreen.h
@@ -38,7 +38,7 @@ public:
void render() override;
- std::string tag() const override { return "game"; }
+ const char *tag() const override { return "Game"; }
protected:
void CreateViews() override;
diff --git a/UI/GameSettingsScreen.cpp b/UI/GameSettingsScreen.cpp
index 6c702bd7f0..0c1c0d009e 100644
--- a/UI/GameSettingsScreen.cpp
+++ b/UI/GameSettingsScreen.cpp
@@ -469,7 +469,7 @@ void GameSettingsScreen::CreateViews() {
if (GetGPUBackend() == GPUBackend::VULKAN || GetGPUBackend() == GPUBackend::OPENGL) {
static const char *bufferOptions[] = { "No buffer", "Up to 1", "Up to 2" };
- PopupMultiChoice *inflightChoice = graphicsSettings->Add(new PopupMultiChoice(&g_Config.iInflightFrames, gr->T("Buffer graphics commands (faster, input lag)"), bufferOptions, 0, ARRAY_SIZE(bufferOptions), gr->GetName(), screenManager()));
+ PopupMultiChoice *inflightChoice = graphicsSettings->Add(new PopupMultiChoice(&g_Config.iInflightFrames, gr->T("Buffer graphics commands (faster, input lag)"), bufferOptions, 1, ARRAY_SIZE(bufferOptions), gr->GetName(), screenManager()));
inflightChoice->OnChoice.Handle(this, &GameSettingsScreen::OnInflightFramesChoice);
}
diff --git a/UI/GameSettingsScreen.h b/UI/GameSettingsScreen.h
index 087b05740e..82b4cd859e 100644
--- a/UI/GameSettingsScreen.h
+++ b/UI/GameSettingsScreen.h
@@ -32,7 +32,7 @@ public:
void update() override;
void onFinish(DialogResult result) override;
- std::string tag() const override { return "settings"; }
+ const char *tag() const override { return "GameSettings"; }
protected:
void sendMessage(const char *message, const char *value) override;
@@ -157,10 +157,11 @@ private:
class DeveloperToolsScreen : public UIDialogScreenWithBackground {
public:
- DeveloperToolsScreen() {}
void update() override;
void onFinish(DialogResult result) override;
+ const char *tag() const override { return "DeveloperTools"; }
+
protected:
void CreateViews() override;
@@ -205,6 +206,8 @@ public:
void CreatePopupContents(UI::ViewGroup *parent) override;
+ const char *tag() const override { return "HostnameSelect"; }
+
protected:
void OnCompleted(DialogResult result) override;
bool CanComplete(DialogResult result) override;
@@ -247,4 +250,6 @@ private:
class GestureMappingScreen : public UIDialogScreenWithBackground {
public:
void CreateViews() override;
+
+ const char *tag() const override { return "GestureMapping"; }
};
diff --git a/UI/InstallZipScreen.h b/UI/InstallZipScreen.h
index 6daa13bf7f..140b82fec2 100644
--- a/UI/InstallZipScreen.h
+++ b/UI/InstallZipScreen.h
@@ -30,6 +30,8 @@ public:
virtual void update() override;
virtual bool key(const KeyInput &key) override;
+ const char *tag() const override { return "InstallZip"; }
+
protected:
virtual void CreateViews() override;
diff --git a/UI/MainScreen.cpp b/UI/MainScreen.cpp
index 3e025d06ba..d5ed3ab0a4 100644
--- a/UI/MainScreen.cpp
+++ b/UI/MainScreen.cpp
@@ -1432,11 +1432,12 @@ UI::EventReturn MainScreen::OnExit(UI::EventParams &e) {
}
void MainScreen::dialogFinished(const Screen *dialog, DialogResult result) {
- if (dialog->tag() == "store") {
+ std::string tag = dialog->tag();
+ if (tag == "Store") {
backFromStore_ = true;
RecreateViews();
}
- if (dialog->tag() == "game") {
+ if (tag == "Game") {
if (!restoreFocusGamePath_.empty() && UI::IsFocusMovementEnabled()) {
// Prevent the background from fading, since we just were displaying it.
highlightedGamePath_ = restoreFocusGamePath_;
diff --git a/UI/MainScreen.h b/UI/MainScreen.h
index 6b15f06427..b81e193b93 100644
--- a/UI/MainScreen.h
+++ b/UI/MainScreen.h
@@ -102,6 +102,8 @@ public:
bool isTopLevel() const override { return true; }
+ const char *tag() const override { return "Main"; }
+
// Horrible hack to show the demos & homebrew tab after having installed a game from a zip file.
static bool showHomebrewTab;
@@ -153,7 +155,7 @@ protected:
class UmdReplaceScreen : public UIDialogScreenWithBackground {
public:
- UmdReplaceScreen() {}
+ const char *tag() const override { return "UmdReplace"; }
protected:
void CreateViews() override;
@@ -174,6 +176,8 @@ public:
void CreatePopupContents(UI::ViewGroup *parent) override;
UI::Event OnRecentChanged;
+ const char *tag() const override { return "GridSettings"; }
+
private:
UI::EventReturn GridPlusClick(UI::EventParams &e);
UI::EventReturn GridMinusClick(UI::EventParams &e);
diff --git a/UI/MemStickScreen.h b/UI/MemStickScreen.h
index f315467c56..378f4f3ed0 100644
--- a/UI/MemStickScreen.h
+++ b/UI/MemStickScreen.h
@@ -36,7 +36,7 @@ public:
MemStickScreen(bool initialSetup);
~MemStickScreen() {}
- std::string tag() const override { return "game"; }
+ const char *tag() const override { return "MemStick"; }
enum Choice {
CHOICE_BROWSE_FOLDER,
@@ -112,6 +112,9 @@ class ConfirmMemstickMoveScreen : public UIDialogScreenWithBackground {
public:
ConfirmMemstickMoveScreen(Path newMemstickFolder, bool initialSetup);
~ConfirmMemstickMoveScreen();
+
+ const char *tag() const override { return "ConfirmMemstickMove"; }
+
protected:
void update() override;
void CreateViews() override;
diff --git a/UI/MiscScreens.cpp b/UI/MiscScreens.cpp
index 2a0ea9bfe7..d90d92f76d 100644
--- a/UI/MiscScreens.cpp
+++ b/UI/MiscScreens.cpp
@@ -387,13 +387,13 @@ void HandleCommonMessages(const char *message, const char *value, ScreenManager
MIPSComp::jit->ClearCache();
}
currentMIPS->UpdateCore((CPUCore)g_Config.iCpuCore);
- } else if (!strcmp(message, "control mapping") && isActiveScreen && activeScreen->tag() != "control mapping") {
+ } else if (!strcmp(message, "control mapping") && isActiveScreen && std::string(activeScreen->tag()) != "ControlMapping") {
UpdateUIState(UISTATE_MENU);
manager->push(new ControlMappingScreen());
- } else if (!strcmp(message, "display layout editor") && isActiveScreen && activeScreen->tag() != "display layout screen") {
+ } else if (!strcmp(message, "display layout editor") && isActiveScreen && std::string(activeScreen->tag()) != "DisplayLayout") {
UpdateUIState(UISTATE_MENU);
manager->push(new DisplayLayoutScreen());
- } else if (!strcmp(message, "settings") && isActiveScreen && activeScreen->tag() != "settings") {
+ } else if (!strcmp(message, "settings") && isActiveScreen && std::string(activeScreen->tag()) != "GameSettings") {
UpdateUIState(UISTATE_MENU);
manager->push(new GameSettingsScreen(Path()));
} else if (!strcmp(message, "language screen") && isActiveScreen) {
diff --git a/UI/MiscScreens.h b/UI/MiscScreens.h
index d094b6d3db..164a06cfec 100644
--- a/UI/MiscScreens.h
+++ b/UI/MiscScreens.h
@@ -82,6 +82,8 @@ public:
void TriggerFinish(DialogResult result) override;
+ const char *tag() const override { return "Prompt"; }
+
private:
UI::EventReturn OnYes(UI::EventParams &e);
UI::EventReturn OnNo(UI::EventParams &e);
@@ -96,6 +98,8 @@ class NewLanguageScreen : public ListPopupScreen {
public:
NewLanguageScreen(const std::string &title);
+ const char *tag() const override { return "NewLanguage"; }
+
private:
void OnCompleted(DialogResult result) override;
bool ShowButtons() const override { return true; }
@@ -110,6 +114,8 @@ public:
void CreateViews() override;
+ const char *tag() const override { return "PostProc"; }
+
private:
void OnCompleted(DialogResult result) override;
bool ShowButtons() const override { return true; }
@@ -123,6 +129,8 @@ public:
void CreateViews() override;
+ const char *tag() const override { return "TextureShader"; }
+
private:
void OnCompleted(DialogResult result) override;
bool ShowButtons() const override { return true; }
@@ -146,6 +154,8 @@ public:
void sendMessage(const char *message, const char *value) override;
void CreateViews() override {}
+ const char *tag() const override { return "Logo"; }
+
private:
void Next();
int frames_ = 0;
@@ -162,6 +172,8 @@ public:
void CreateViews() override;
+ const char *tag() const override { return "Credits"; }
+
private:
UI::EventReturn OnOK(UI::EventParams &e);
diff --git a/UI/PauseScreen.cpp b/UI/PauseScreen.cpp
index 339409ed6e..748fcdf86d 100644
--- a/UI/PauseScreen.cpp
+++ b/UI/PauseScreen.cpp
@@ -172,9 +172,7 @@ public:
return slot_;
}
- std::string tag() const override {
- return "screenshot";
- }
+ const char *tag() const override { return "ScreenshotView"; }
protected:
bool FillVertical() const override { return false; }
@@ -460,7 +458,7 @@ UI::EventReturn GamePauseScreen::OnState(UI::EventParams &e) {
void GamePauseScreen::dialogFinished(const Screen *dialog, DialogResult dr) {
std::string tag = dialog->tag();
- if (tag == "screenshot" && dr == DR_OK) {
+ if (tag == "ScreenshotView" && dr == DR_OK) {
finishNextFrame_ = true;
} else {
// There may have been changes to our savestates, so let's recreate.
diff --git a/UI/PauseScreen.h b/UI/PauseScreen.h
index 15b22174e2..3c66609285 100644
--- a/UI/PauseScreen.h
+++ b/UI/PauseScreen.h
@@ -33,6 +33,8 @@ public:
virtual void dialogFinished(const Screen *dialog, DialogResult dr) override;
+ const char *tag() const override { return "GamePause"; }
+
protected:
virtual void CreateViews() override;
virtual void update() override;
diff --git a/UI/RemoteISOScreen.h b/UI/RemoteISOScreen.h
index bfcac815be..b8676e4832 100644
--- a/UI/RemoteISOScreen.h
+++ b/UI/RemoteISOScreen.h
@@ -29,6 +29,8 @@ class RemoteISOScreen : public UIScreenWithBackground {
public:
RemoteISOScreen();
+ const char *tag() const override { return "RemoteISO"; }
+
protected:
void update() override;
void CreateViews() override;
@@ -57,6 +59,8 @@ public:
RemoteISOConnectScreen();
~RemoteISOConnectScreen() override;
+ const char *tag() const override { return "RemoteISOConnect"; }
+
protected:
void update() override;
void CreateViews() override;
@@ -83,6 +87,8 @@ class RemoteISOBrowseScreen : public MainScreen {
public:
RemoteISOBrowseScreen(const std::string &url, const std::vector &games);
+ const char *tag() const override { return "RemoteISOBrowse"; }
+
protected:
void CreateViews() override;
@@ -94,6 +100,8 @@ class RemoteISOSettingsScreen : public UIDialogScreenWithBackground {
public:
RemoteISOSettingsScreen();
+ const char *tag() const override { return "RemoteISOSettings"; }
+
UI::EventReturn OnClickRemoteISOSubdir(UI::EventParams &e);
UI::EventReturn OnClickRemoteServer(UI::EventParams &e);
protected:
diff --git a/UI/ReportScreen.h b/UI/ReportScreen.h
index a7d132b2c8..006d90e6f5 100644
--- a/UI/ReportScreen.h
+++ b/UI/ReportScreen.h
@@ -37,6 +37,8 @@ class ReportScreen : public UIDialogScreenWithGameBackground {
public:
ReportScreen(const Path &gamePath);
+ const char *tag() const override { return "Report"; }
+
protected:
void postRender() override;
void update() override;
@@ -75,6 +77,8 @@ class ReportFinishScreen : public UIDialogScreenWithGameBackground {
public:
ReportFinishScreen(const Path &gamePath, ReportingOverallScore score);
+ const char *tag() const override { return "ReportFinish"; }
+
protected:
void update() override;
void CreateViews() override;
diff --git a/UI/SavedataScreen.cpp b/UI/SavedataScreen.cpp
index 731e3d5d95..0172b6a19b 100644
--- a/UI/SavedataScreen.cpp
+++ b/UI/SavedataScreen.cpp
@@ -78,8 +78,9 @@ static std::string TrimString(const std::string &str) {
class SavedataPopupScreen : public PopupScreen {
public:
- SavedataPopupScreen(std::string savePath, std::string title) : PopupScreen(TrimString(title)), savePath_(savePath) {
- }
+ SavedataPopupScreen(std::string savePath, std::string title) : PopupScreen(TrimString(title)), savePath_(savePath) { }
+
+ const char *tag() const override { return "SavedataPopup"; }
void CreatePopupContents(UI::ViewGroup *parent) override {
using namespace UI;
diff --git a/UI/SavedataScreen.h b/UI/SavedataScreen.h
index 1f9509e687..1bb2f42140 100644
--- a/UI/SavedataScreen.h
+++ b/UI/SavedataScreen.h
@@ -74,6 +74,8 @@ public:
void dialogFinished(const Screen *dialog, DialogResult result) override;
void sendMessage(const char *message, const char *value) override;
+ const char *tag() const override { return "Savedata"; }
+
protected:
UI::EventReturn OnSavedataButtonClick(UI::EventParams &e);
UI::EventReturn OnSortClick(UI::EventParams &e);
diff --git a/UI/Store.h b/UI/Store.h
index 263e5dbde1..8f02f1a615 100644
--- a/UI/Store.h
+++ b/UI/Store.h
@@ -67,7 +67,7 @@ public:
~StoreScreen();
void update() override;
- std::string tag() const override { return "store"; }
+ const char *tag() const override { return "Store"; }
protected:
void CreateViews() override;
diff --git a/UI/TiltAnalogSettingsScreen.h b/UI/TiltAnalogSettingsScreen.h
index 3aeece6eb9..832a436842 100644
--- a/UI/TiltAnalogSettingsScreen.h
+++ b/UI/TiltAnalogSettingsScreen.h
@@ -27,6 +27,8 @@ public:
void CreateViews() override;
bool axis(const AxisInput &axis) override;
+ const char *tag() const override { return "TiltAnalogSettings"; }
+
private:
UI::EventReturn OnCalibrate(UI::EventParams &e);
float currentTiltX_ = 0.0f;
diff --git a/UI/TouchControlLayoutScreen.h b/UI/TouchControlLayoutScreen.h
index 3db9394258..9834f217f0 100644
--- a/UI/TouchControlLayoutScreen.h
+++ b/UI/TouchControlLayoutScreen.h
@@ -33,6 +33,8 @@ public:
virtual void update() override;
virtual void resized() override;
+ const char *tag() const override { return "TouchControlLayout"; }
+
protected:
virtual UI::EventReturn OnReset(UI::EventParams &e);
virtual UI::EventReturn OnVisibility(UI::EventParams &e);
diff --git a/UI/TouchControlVisibilityScreen.h b/UI/TouchControlVisibilityScreen.h
index f44e333395..52d6d00eea 100644
--- a/UI/TouchControlVisibilityScreen.h
+++ b/UI/TouchControlVisibilityScreen.h
@@ -36,6 +36,8 @@ public:
void CreateViews() override;
void onFinish(DialogResult result) override;
+ const char *tag() const override { return "TouchControlVisibility"; }
+
protected:
UI::EventReturn OnToggleAll(UI::EventParams &e);
@@ -47,4 +49,6 @@ private:
class RightAnalogMappingScreen : public UIDialogScreenWithBackground {
public:
void CreateViews() override;
+
+ const char *tag() const override { return "RightAnalogMapping"; }
};
diff --git a/Windows/GPU/WindowsVulkanContext.cpp b/Windows/GPU/WindowsVulkanContext.cpp
index 5618e2f1f5..818f398420 100644
--- a/Windows/GPU/WindowsVulkanContext.cpp
+++ b/Windows/GPU/WindowsVulkanContext.cpp
@@ -131,9 +131,7 @@ bool WindowsVulkanContext::Init(HINSTANCE hInst, HWND hWnd, std::string *error_m
return false;
}
- bool splitSubmit = g_Config.bGfxDebugSplitSubmit;
-
- draw_ = Draw::T3DCreateVulkanContext(vulkan_, splitSubmit);
+ draw_ = Draw::T3DCreateVulkanContext(vulkan_);
SetGPUBackend(GPUBackend::VULKAN, vulkan_->GetPhysicalDeviceProperties(deviceNum).properties.deviceName);
bool success = draw_->CreatePresets();
_assert_msg_(success, "Failed to compile preset shaders");
diff --git a/Windows/W32Util/Misc.cpp b/Windows/W32Util/Misc.cpp
index 657a0193d2..1849070afc 100644
--- a/Windows/W32Util/Misc.cpp
+++ b/Windows/W32Util/Misc.cpp
@@ -288,9 +288,49 @@ int GenericListControl::HandleNotify(LPARAM lParam) {
return 0;
}
+ if (mhdr->code == LVN_INCREMENTALSEARCH) {
+ NMLVFINDITEM *request = (NMLVFINDITEM *)lParam;
+ uint32_t supported = LVFI_WRAP | LVFI_STRING | LVFI_PARTIAL | LVFI_SUBSTRING;
+ if ((request->lvfi.flags & ~supported) == 0 && (request->lvfi.flags & LVFI_STRING) != 0) {
+ bool wrap = (request->lvfi.flags & LVFI_WRAP) != 0;
+ bool partial = (request->lvfi.flags & (LVFI_PARTIAL | LVFI_SUBSTRING)) != 0;
+
+ // It seems like 0 is always sent for start, let's override.
+ int startRow = request->iStart;
+ if (startRow == 0)
+ startRow = GetSelectedIndex();
+ int result = OnIncrementalSearch(startRow, request->lvfi.psz, wrap, partial);
+ if (result != -1) {
+ request->lvfi.flags = LVFI_PARAM;
+ request->lvfi.lParam = (LPARAM)result;
+ }
+ }
+ }
+
return 0;
}
+int GenericListControl::OnIncrementalSearch(int startRow, const wchar_t *str, bool wrap, bool partial) {
+ int size = GetRowCount();
+ size_t searchlen = wcslen(str);
+ if (!wrap)
+ size -= startRow;
+
+ // We start with the earliest column, preferring matches on the leftmost columns by default.
+ for (int c = 0; c < columnCount; ++c) {
+ for (int i = 0; i < size; ++i) {
+ int r = (startRow + i) % size;
+ stringBuffer[0] = 0;
+ GetColumnText(stringBuffer, r, c);
+ int difference = partial ? _wcsnicmp(str, stringBuffer, searchlen) : _wcsicmp(str, stringBuffer);
+ if (difference == 0)
+ return r;
+ }
+ }
+
+ return -1;
+}
+
void GenericListControl::Update() {
if (!updateScheduled_) {
SetTimer(handle, IDT_UPDATE, UPDATE_DELAY, nullptr);
diff --git a/Windows/W32Util/Misc.h b/Windows/W32Util/Misc.h
index 8a43c55b59..1b723cc241 100644
--- a/Windows/W32Util/Misc.h
+++ b/Windows/W32Util/Misc.h
@@ -68,6 +68,8 @@ protected:
virtual bool OnRowPrePaint(int row, LPNMLVCUSTOMDRAW msg) { return false; }
virtual bool OnColPrePaint(int row, int col, LPNMLVCUSTOMDRAW msg) { return false; }
+ virtual int OnIncrementalSearch(int startRow, const wchar_t *str, bool wrap, bool partial);
+
private:
static LRESULT CALLBACK wndProc(HWND hwnd, UINT msg, WPARAM wParam, LPARAM lParam);
void ProcessUpdate();
diff --git a/Windows/main.cpp b/Windows/main.cpp
index 4c0752bda8..e3888dfe4c 100644
--- a/Windows/main.cpp
+++ b/Windows/main.cpp
@@ -34,6 +34,7 @@
#include "Common/System/Display.h"
#include "Common/System/NativeApp.h"
#include "Common/System/System.h"
+#include "Common/File/FileUtil.h"
#include "Common/File/VFS/VFS.h"
#include "Common/File/VFS/AssetReader.h"
#include "Common/Data/Text/I18n.h"
@@ -109,9 +110,12 @@ static std::thread inputBoxThread;
static bool inputBoxRunning = false;
void OpenDirectory(const char *path) {
+ // SHParseDisplayName can't handle relative paths, so normalize first.
+ std::string resolved = ReplaceAll(File::ResolvePath(path), "/", "\\");
+
SFGAOF flags;
PIDLIST_ABSOLUTE pidl = nullptr;
- HRESULT hr = SHParseDisplayName(ConvertUTF8ToWString(ReplaceAll(path, "/", "\\")).c_str(), nullptr, &pidl, 0, &flags);
+ HRESULT hr = SHParseDisplayName(ConvertUTF8ToWString(resolved).c_str(), nullptr, &pidl, 0, &flags);
if (pidl) {
if (SUCCEEDED(hr))
diff --git a/android/jni/Android.mk b/android/jni/Android.mk
index b47db9d5cc..f317315e0a 100644
--- a/android/jni/Android.mk
+++ b/android/jni/Android.mk
@@ -51,6 +51,7 @@ VULKAN_FILES := \
$(SRC)/Common/GPU/Vulkan/thin3d_vulkan.cpp \
$(SRC)/Common/GPU/Vulkan/VulkanQueueRunner.cpp \
$(SRC)/Common/GPU/Vulkan/VulkanRenderManager.cpp \
+ $(SRC)/Common/GPU/Vulkan/VulkanFrameData.cpp \
$(SRC)/Common/GPU/Vulkan/VulkanLoader.cpp \
$(SRC)/Common/GPU/Vulkan/VulkanContext.cpp \
$(SRC)/Common/GPU/Vulkan/VulkanDebug.cpp \
diff --git a/android/jni/AndroidVulkanContext.cpp b/android/jni/AndroidVulkanContext.cpp
index dc68a8d75a..af8a87e751 100644
--- a/android/jni/AndroidVulkanContext.cpp
+++ b/android/jni/AndroidVulkanContext.cpp
@@ -101,7 +101,7 @@ bool AndroidVulkanContext::InitFromRenderThread(ANativeWindow *wnd, int desiredB
bool success = true;
if (g_Vulkan->InitSwapchain()) {
- draw_ = Draw::T3DCreateVulkanContext(g_Vulkan, g_Config.bGfxDebugSplitSubmit);
+ draw_ = Draw::T3DCreateVulkanContext(g_Vulkan);
SetGPUBackend(GPUBackend::VULKAN);
success = draw_->CreatePresets(); // Doesn't fail, we ship the compiler.
_assert_msg_(success, "Failed to compile preset shaders");
diff --git a/assets/compat.ini b/assets/compat.ini
index f79978e9fd..f153b93933 100644
--- a/assets/compat.ini
+++ b/assets/compat.ini
@@ -584,54 +584,6 @@ NPJH50304 = true
ULES00703 = true
ULAS42095 = true
-[MaliDepthStencilBugWorkaround]
-# See issue #13833 where the map is supposed to be round but is not.
-
-# Midnight Club: LA Remix
-ULUS10383 = true
-ULES01144 = true
-ULJS00180 = true
-ULJS00267 = true
-ULJM05904 = true
-NPJH50440 = true
-# Midnight Club 3 : DUB edition
-ULUS10021 = true
-ULES00108 = true
-
-# Tales of Phantasia - Narikiri Dungeon X. See #15526
-ULJS00293 = true
-
-# The Simpsons Game - issue #9830
-ULUS10295 = true
-ULES00975 = true
-ULES00979 = true
-ULES00978 = true
-ULES00977 = true
-ULES00976 = true
-
-# Surf's Up - see issue #15016
-ULES00816 = true
-ULES00817 = true
-ULUS10262 = true
-
-# Kenka Bancho - Badass Rumble - see issue #15016
-ULUS10442 = true
-ULJS00166 = true
-UCAS40231 = true
-ULJS19030 = true
-NPJH50223 = true
-ULJS19082 = true
-
-# Nanoha Magical Girl - see issue #15016
-ULJS00384 = true
-ULJS00385 = true
-ULJS19078 = true
-
-# Suikoden Woven Web of The Centuries / Genso Suikoden: Tsumugareshi Hyakunen no Toki
-ULJM05886 = true
-ULJM08063 = true
-NPJH50535 = true
-
[RequireDefaultCPUClock]
# GOW : Ghost of Sparta
UCUS98737 = true
diff --git a/ffmpeg b/ffmpeg
index 3ad7ddb9eb..cea6dd17be 160000
--- a/ffmpeg
+++ b/ffmpeg
@@ -1 +1 @@
-Subproject commit 3ad7ddb9eb2af898dce8c4b5e9a28b77b3f7ddd7
+Subproject commit cea6dd17be4ea045946dc991ef0bca91f9005345
diff --git a/headless/Compare.cpp b/headless/Compare.cpp
index f5127e5ee0..76254d005a 100644
--- a/headless/Compare.cpp
+++ b/headless/Compare.cpp
@@ -305,11 +305,18 @@ bool CompareOutput(const Path &bootFilename, const std::string &output, bool ver
}
}
-inline int ComparePixel(u32 pix1, u32 pix2) {
- // For now, if they're different at all except alpha, it's an error.
- if ((pix1 & 0xFFFFFF) != (pix2 & 0xFFFFFF))
- return 1;
- return 0;
+static inline double CompareChannel(int pix1, int pix2) {
+ double diff = pix1 - pix2;
+ return diff * diff;
+}
+
+static inline double ComparePixel(u32 pix1, u32 pix2) {
+ // Ignore alpha.
+ double r = CompareChannel(pix1 & 0xFF, pix2 & 0xFF);
+ double g = CompareChannel((pix1 >> 8) & 0xFF, (pix2 >> 8) & 0xFF);
+ double b = CompareChannel((pix1 >> 16) & 0xFF, (pix2 >> 16) & 0xFF);
+
+ return r + g + b;
}
std::vector TranslateDebugBufferToCompare(const GPUDebugBuffer *buffer, u32 stride, u32 h) {
@@ -338,7 +345,6 @@ std::vector TranslateDebugBufferToCompare(const GPUDebugBuffer *buffer, u32
dst += (h - safeH) * stride;
}
- u32 errors = 0;
for (u32 y = 0; y < safeH; ++y) {
switch (buffer->GetFormat()) {
case GPU_DBG_FORMAT_8888:
@@ -429,7 +435,7 @@ double ScreenshotComparer::Compare(const Path &screenshotFilename) {
return -1.0f;
}
- u32 errors = 0;
+ double errors = 0;
if (asBitmap_) {
// The reference is flipped and BGRA by default for the common BMP compare case.
for (u32 y = 0; y < h_; ++y) {
@@ -447,7 +453,8 @@ double ScreenshotComparer::Compare(const Path &screenshotFilename) {
}
}
- return (double) errors / (double) (w_ * h_);
+ // Convert to MSE, accounting for all three channels (RGB.)
+ return errors / (double)(w_ * h_ * 3);
}
bool ScreenshotComparer::SaveActualBitmap(const Path &resultFilename) {
diff --git a/headless/Headless.cpp b/headless/Headless.cpp
index 0be0518e8b..1deebd95cd 100644
--- a/headless/Headless.cpp
+++ b/headless/Headless.cpp
@@ -126,15 +126,12 @@ int printUsage(const char *progname, const char *reason)
fprintf(stderr, " -m, --mount umd.cso mount iso on umd1:\n");
fprintf(stderr, " -r, --root some/path mount path on host0: (elfs must be in here)\n");
fprintf(stderr, " -l, --log full log output, not just emulated printfs\n");
- fprintf(stderr, " --debugger=PORT enable websocket debugger and break at start\n");
+ fprintf(stderr, " --debugger=PORT enable websocket debugger and break at start\n");
-#if defined(HEADLESSHOST_CLASS)
- {
- fprintf(stderr, " --graphics=BACKEND use the full gpu backend (slower)\n");
- fprintf(stderr, " options: gles, software, directx9, etc.\n");
- fprintf(stderr, " --screenshot=FILE compare against a screenshot\n");
- }
-#endif
+ fprintf(stderr, " --graphics=BACKEND use a different gpu backend\n");
+ fprintf(stderr, " options: gles, software, directx9, etc.\n");
+ fprintf(stderr, " --screenshot=FILE compare against a screenshot\n");
+ fprintf(stderr, " --max-mse=NUMBER maximum allowed MSE error for screenshot\n");
fprintf(stderr, " --timeout=SECONDS abort test it if takes longer than SECONDS\n");
fprintf(stderr, " -v, --verbose show the full passed/failed result\n");
@@ -142,6 +139,7 @@ int printUsage(const char *progname, const char *reason)
fprintf(stderr, " --ir use ir interpreter\n");
fprintf(stderr, " -j use jit (default)\n");
fprintf(stderr, " -c, --compare compare with output in file.expected\n");
+ fprintf(stderr, " --bench run multiple times and output speed\n");
fprintf(stderr, "\nSee headless.txt for details.\n");
return 1;
@@ -161,13 +159,20 @@ static HeadlessHost *getHost(GPUCore gpuCore) {
}
}
-bool RunAutoTest(HeadlessHost *headlessHost, CoreParameter &coreParameter, bool autoCompare, bool verbose, double timeout)
-{
+struct AutoTestOptions {
+ double timeout;
+ double maxScreenshotError;
+ bool compare : 1;
+ bool verbose : 1;
+ bool bench : 1;
+};
+
+bool RunAutoTest(HeadlessHost *headlessHost, CoreParameter &coreParameter, const AutoTestOptions &opt) {
// Kinda ugly, trying to guesstimate the test name from filename...
currentTestName = GetTestName(coreParameter.fileToStart);
std::string output;
- if (autoCompare)
+ if (opt.compare || opt.bench)
coreParameter.collectEmuLog = &output;
std::string error_string;
@@ -181,23 +186,19 @@ bool RunAutoTest(HeadlessHost *headlessHost, CoreParameter &coreParameter, bool
TeamCityPrint("testStarted name='%s' captureStandardOutput='true'", currentTestName.c_str());
- host->BootDone();
-
- if (autoCompare)
- headlessHost->SetComparisonScreenshot(ExpectedScreenshotFromFilename(coreParameter.fileToStart));
+ if (opt.compare)
+ headlessHost->SetComparisonScreenshot(ExpectedScreenshotFromFilename(coreParameter.fileToStart), opt.maxScreenshotError);
while (!PSP_InitUpdate(&error_string))
sleep_ms(1);
if (!PSP_IsInited()) {
TeamCityPrint("testFailed name='%s' message='Startup failed'", currentTestName.c_str());
TeamCityPrint("testFinished name='%s'", currentTestName.c_str());
- GitHubActionsPrint("error", "Test timeout for %s", currentTestName.c_str());
+ GitHubActionsPrint("error", "Test init failed for %s", currentTestName.c_str());
return false;
}
- bool passed = true;
- double deadline;
- deadline = time_now_d() + timeout;
+ host->BootDone();
Core_UpdateDebugStats(g_Config.bShowDebugStats || g_Config.bLogFrameDrops);
@@ -205,6 +206,8 @@ bool RunAutoTest(HeadlessHost *headlessHost, CoreParameter &coreParameter, bool
if (coreParameter.graphicsContext && coreParameter.graphicsContext->GetDrawContext())
coreParameter.graphicsContext->GetDrawContext()->BeginFrame();
+ bool passed = true;
+ double deadline = time_now_d() + opt.timeout;
coreState = coreParameter.startBreak ? CORE_STEPPING : CORE_RUNNING;
while (coreState == CORE_RUNNING || coreState == CORE_STEPPING)
{
@@ -221,12 +224,15 @@ bool RunAutoTest(HeadlessHost *headlessHost, CoreParameter &coreParameter, bool
}
if (time_now_d() > deadline) {
// Don't compare, print the output at least up to this point, and bail.
- printf("%s", output.c_str());
- passed = false;
+ if (!opt.bench) {
+ printf("%s", output.c_str());
- host->SendDebugOutput("TIMEOUT\n");
- TeamCityPrint("testFailed name='%s' message='Test timeout'", currentTestName.c_str());
- GitHubActionsPrint("error", "Test timeout for %s", currentTestName.c_str());
+ host->SendDebugOutput("TIMEOUT\n");
+ TeamCityPrint("testFailed name='%s' message='Test timeout'", currentTestName.c_str());
+ GitHubActionsPrint("error", "Test timeout for %s", currentTestName.c_str());
+ }
+
+ passed = false;
Core_Stop();
}
}
@@ -237,10 +243,11 @@ bool RunAutoTest(HeadlessHost *headlessHost, CoreParameter &coreParameter, bool
PSP_Shutdown();
- headlessHost->FlushDebugOutput();
+ if (!opt.bench)
+ headlessHost->FlushDebugOutput();
- if (autoCompare && passed)
- passed = CompareOutput(coreParameter.fileToStart, output, verbose);
+ if (opt.compare && passed)
+ passed = CompareOutput(coreParameter.fileToStart, output, opt.verbose);
TeamCityPrint("testFinished name='%s'", currentTestName.c_str());
@@ -263,9 +270,9 @@ int main(int argc, const char* argv[])
_CrtSetDbgFlag(_CRTDBG_ALLOC_MEM_DF | _CRTDBG_LEAK_CHECK_DF);
#endif
+ AutoTestOptions testOptions{};
+ testOptions.timeout = std::numeric_limits::infinity();
bool fullLog = false;
- bool autoCompare = false;
- bool verbose = false;
const char *stateToLoad = 0;
GPUCore gpuCore = GPUCORE_SOFTWARE;
CPUCore cpuCore = CPUCore::JIT;
@@ -275,7 +282,6 @@ int main(int argc, const char* argv[])
const char *mountIso = nullptr;
const char *mountRoot = nullptr;
const char *screenshotFilename = nullptr;
- float timeout = std::numeric_limits::infinity();
for (int i = 1; i < argc; i++)
{
@@ -300,9 +306,11 @@ int main(int argc, const char* argv[])
else if (!strcmp(argv[i], "--ir"))
cpuCore = CPUCore::IR_JIT;
else if (!strcmp(argv[i], "-c") || !strcmp(argv[i], "--compare"))
- autoCompare = true;
+ testOptions.compare = true;
+ else if (!strcmp(argv[i], "--bench"))
+ testOptions.bench = true;
else if (!strcmp(argv[i], "-v") || !strcmp(argv[i], "--verbose"))
- verbose = true;
+ testOptions.verbose = true;
else if (!strncmp(argv[i], "--graphics=", strlen("--graphics=")) && strlen(argv[i]) > strlen("--graphics="))
{
const char *gpuName = argv[i] + strlen("--graphics=");
@@ -330,7 +338,9 @@ int main(int argc, const char* argv[])
} else if (!strncmp(argv[i], "--screenshot=", strlen("--screenshot=")) && strlen(argv[i]) > strlen("--screenshot="))
screenshotFilename = argv[i] + strlen("--screenshot=");
else if (!strncmp(argv[i], "--timeout=", strlen("--timeout=")) && strlen(argv[i]) > strlen("--timeout="))
- timeout = (float)strtod(argv[i] + strlen("--timeout="), NULL);
+ testOptions.timeout = strtod(argv[i] + strlen("--timeout="), nullptr);
+ else if (!strncmp(argv[i], "--max-mse=", strlen("--max-mse=")) && strlen(argv[i]) > strlen("--max-mse="))
+ testOptions.maxScreenshotError = strtod(argv[i] + strlen("--max-mse="), nullptr);
else if (!strncmp(argv[i], "--debugger=", strlen("--debugger=")) && strlen(argv[i]) > strlen("--debugger="))
debuggerPort = (int)strtoul(argv[i] + strlen("--debugger="), NULL, 10);
else if (!strcmp(argv[i], "--teamcity"))
@@ -388,7 +398,7 @@ int main(int argc, const char* argv[])
coreParameter.mountIso = mountIso ? Path(std::string(mountIso)) : Path();
coreParameter.mountRoot = mountRoot ? Path(std::string(mountRoot)) : Path();
coreParameter.startBreak = false;
- coreParameter.printfEmuLog = !autoCompare;
+ coreParameter.printfEmuLog = !testOptions.compare;
coreParameter.headLess = true;
coreParameter.renderScaleFactor = 1;
coreParameter.renderWidth = 480;
@@ -456,8 +466,9 @@ int main(int argc, const char* argv[])
if (!File::Exists(g_Config.flash0Directory))
g_Config.flash0Directory = File::GetExeDirectory() / "assets/flash0";
- if (screenshotFilename != 0)
- headlessHost->SetComparisonScreenshot(Path(std::string(screenshotFilename)));
+ if (screenshotFilename)
+ headlessHost->SetComparisonScreenshot(Path(std::string(screenshotFilename)), testOptions.maxScreenshotError);
+ headlessHost->SetWriteFailureScreenshot(!teamCityMode && !getenv("GITHUB_ACTIONS") && !testOptions.bench);
#if PPSSPP_PLATFORM(ANDROID)
// For some reason the debugger installs it with this name?
@@ -487,14 +498,28 @@ int main(int argc, const char* argv[])
for (size_t i = 0; i < testFilenames.size(); ++i)
{
coreParameter.fileToStart = Path(testFilenames[i]);
- if (autoCompare)
+ if (testOptions.compare)
printf("%s:\n", coreParameter.fileToStart.c_str());
- bool passed = RunAutoTest(headlessHost, coreParameter, autoCompare, verbose, timeout);
- if (autoCompare)
- {
+ bool passed = RunAutoTest(headlessHost, coreParameter, testOptions);
+ if (testOptions.bench) {
+ double st = time_now_d();
+ double deadline = st + testOptions.timeout;
+ double runs = 0.0;
+ for (int i = 0; i < 100; ++i) {
+ RunAutoTest(headlessHost, coreParameter, testOptions);
+ runs++;
+
+ if (time_now_d() > deadline)
+ break;
+ }
+ double et = time_now_d();
+
std::string testName = GetTestName(coreParameter.fileToStart);
- if (passed)
- {
+ printf(" %s - %f seconds average\n", testName.c_str(), (et - st) / runs);
+ }
+ if (testOptions.compare) {
+ std::string testName = GetTestName(coreParameter.fileToStart);
+ if (passed) {
passedTests.push_back(testName);
printf(" %s - passed!\n", testName.c_str());
}
@@ -503,8 +528,7 @@ int main(int argc, const char* argv[])
}
}
- if (autoCompare)
- {
+ if (testOptions.compare) {
printf("%d tests passed, %d tests failed.\n", (int)passedTests.size(), (int)failedTests.size());
if (!failedTests.empty())
{
diff --git a/headless/StubHost.cpp b/headless/StubHost.cpp
index e7fbd03f9a..b3896df478 100644
--- a/headless/StubHost.cpp
+++ b/headless/StubHost.cpp
@@ -54,10 +54,10 @@ void HeadlessHost::SendDebugScreenshot(const u8 *pixbuf, u32 w, u32 h) {
if (errors < 0)
SendOrCollectDebugOutput(comparer.GetError() + "\n");
- if (errors > 0)
- SendOrCollectDebugOutput(StringFromFormat("Screenshot error: %f%%\n", errors * 100.0f));
+ if (errors > maxScreenshotError_)
+ SendOrCollectDebugOutput(StringFromFormat("Screenshot MSE: %f\n", errors));
- if (errors > 0 && !teamCityMode && !getenv("GITHUB_ACTIONS")) {
+ if (errors > maxScreenshotError_ && writeFailureScreenshot_) {
if (comparer.SaveActualBitmap(Path("__testfailure.bmp")))
SendOrCollectDebugOutput("Actual output written to: __testfailure.bmp\n");
comparer.SaveVisualComparisonPNG(Path("__testcompare.png"));
diff --git a/headless/StubHost.h b/headless/StubHost.h
index c914f025b2..a77a42424d 100644
--- a/headless/StubHost.h
+++ b/headless/StubHost.h
@@ -68,8 +68,12 @@ public:
}
}
- virtual void SetComparisonScreenshot(const Path &filename) {
+ void SetComparisonScreenshot(const Path &filename, double maxError) {
comparisonScreenshot_ = filename;
+ maxScreenshotError_ = maxError;
+ }
+ void SetWriteFailureScreenshot(bool flag) {
+ writeFailureScreenshot_ = flag;
}
void SendDebugScreenshot(const u8 *pixbuf, u32 w, u32 h) override;
@@ -83,7 +87,9 @@ protected:
void SendOrCollectDebugOutput(const std::string &output);
Path comparisonScreenshot_;
+ double maxScreenshotError_ = 0.0;
std::string debugOutputBuffer_;
GPUCore gpuCore_;
GraphicsContext *gfx_ = nullptr;
+ bool writeFailureScreenshot_ = true;
};
diff --git a/libretro/LibretroVulkanContext.cpp b/libretro/LibretroVulkanContext.cpp
index 571b7bba9d..ff9180cecf 100644
--- a/libretro/LibretroVulkanContext.cpp
+++ b/libretro/LibretroVulkanContext.cpp
@@ -137,7 +137,7 @@ void LibretroVulkanContext::CreateDrawContext() {
return;
}
- draw_ = Draw::T3DCreateVulkanContext(vk, false);
+ draw_ = Draw::T3DCreateVulkanContext(vk);
((VulkanRenderManager*)draw_->GetNativeObject(Draw::NativeObject::RENDER_MANAGER))->SetInflightFrames(g_Config.iInflightFrames);
SetGPUBackend(GPUBackend::VULKAN);
}
diff --git a/libretro/Makefile.common b/libretro/Makefile.common
index ab2c4bfe1c..f08bd45633 100644
--- a/libretro/Makefile.common
+++ b/libretro/Makefile.common
@@ -253,6 +253,7 @@ SOURCES_CXX += \
$(COMMONDIR)/GPU/Vulkan/thin3d_vulkan.cpp \
$(COMMONDIR)/GPU/Vulkan/VulkanQueueRunner.cpp \
$(COMMONDIR)/GPU/Vulkan/VulkanRenderManager.cpp \
+ $(COMMONDIR)/GPU/Vulkan/VulkanFrameData.cpp \
$(COMMONDIR)/GPU/Vulkan/VulkanLoader.cpp \
$(COMMONDIR)/GPU/Vulkan/VulkanContext.cpp \
$(COMMONDIR)/GPU/Vulkan/VulkanDebug.cpp \
diff --git a/pspautotests b/pspautotests
index 682a4303ab..d7a55d5487 160000
--- a/pspautotests
+++ b/pspautotests
@@ -1 +1 @@
-Subproject commit 682a4303aba63a50c91ae0fa6928c9dac8ca9b92
+Subproject commit d7a55d5487b1fa1a688178206abaad4aea5be18a
diff --git a/test.py b/test.py
index 72c6302eb2..47f5c36034 100755
--- a/test.py
+++ b/test.py
@@ -147,6 +147,7 @@ tests_good = [
"gpu/commands/blend",
"gpu/commands/blend565",
"gpu/commands/blocktransfer",
+ "gpu/commands/fog",
"gpu/commands/material",
"gpu/displaylist/alignment",
"gpu/dither/dither",
@@ -157,7 +158,11 @@ tests_good = [
"gpu/ge/enqueueparam",
"gpu/ge/queue",
"gpu/primitives/indices",
+ "gpu/primitives/invalidprim",
+ "gpu/primitives/trianglefan",
+ "gpu/primitives/trianglestrip",
"gpu/primitives/triangles",
+ "gpu/rendertarget/copy",
"gpu/rendertarget/depal",
"gpu/signals/pause",
"gpu/signals/pause2",
@@ -204,6 +209,7 @@ tests_good = [
"sysmem/freesize",
"sysmem/memblock",
"sysmem/sysmem",
+ "sysmem/volatile",
"threads/alarm/alarm",
"threads/alarm/cancel/cancel",
"threads/alarm/refer/refer",
@@ -269,6 +275,7 @@ tests_good = [
"threads/mutex/refer",
"threads/mutex/try",
"threads/mutex/unlock",
+ "threads/mutex/unlock2",
"threads/semaphores/semaphores",
"threads/semaphores/cancel",
"threads/semaphores/create",
@@ -394,21 +401,19 @@ tests_next = [
"gpu/ge/get",
"gpu/primitives/bezier",
"gpu/primitives/continue",
- "gpu/primitives/invalidprim",
+ "gpu/primitives/immediate",
"gpu/primitives/lines",
"gpu/primitives/linestrip",
"gpu/primitives/points",
"gpu/primitives/rectangles",
"gpu/primitives/spline",
- "gpu/primitives/trianglefan",
- "gpu/primitives/trianglestrip",
"gpu/reflection/reflection",
- "gpu/rendertarget/copy",
"gpu/rendertarget/rendertarget",
"gpu/signals/continue",
"gpu/signals/jumps",
"gpu/signals/simple",
"gpu/simple/simple",
+ "gpu/textures/size",
"gpu/triangle/triangle",
"gpu/vertices/colors",
"gpu/vertices/texcoords",