Merge branch 'master' into compat_openxr_gta

This commit is contained in:
Lubos 2022-09-20 21:46:33 +02:00
commit c4dbd6d045
141 changed files with 2419 additions and 1686 deletions

View File

@ -623,6 +623,8 @@ add_library(Common STATIC
Common/GPU/Vulkan/VulkanRenderManager.h
Common/GPU/Vulkan/VulkanQueueRunner.cpp
Common/GPU/Vulkan/VulkanQueueRunner.h
Common/GPU/Vulkan/VulkanFrameData.cpp
Common/GPU/Vulkan/VulkanFrameData.h
Common/Input/GestureDetector.cpp
Common/Input/GestureDetector.h
Common/Input/KeyCodes.h

View File

@ -441,6 +441,7 @@
<ClInclude Include="GPU\Vulkan\VulkanBarrier.h" />
<ClInclude Include="GPU\Vulkan\VulkanContext.h" />
<ClInclude Include="GPU\Vulkan\VulkanDebug.h" />
<ClInclude Include="GPU\Vulkan\VulkanFrameData.h" />
<ClInclude Include="GPU\Vulkan\VulkanImage.h" />
<ClInclude Include="GPU\Vulkan\VulkanLoader.h" />
<ClInclude Include="GPU\Vulkan\VulkanMemory.h" />
@ -861,6 +862,7 @@
<ClCompile Include="GPU\Vulkan\VulkanBarrier.cpp" />
<ClCompile Include="GPU\Vulkan\VulkanContext.cpp" />
<ClCompile Include="GPU\Vulkan\VulkanDebug.cpp" />
<ClCompile Include="GPU\Vulkan\VulkanFrameData.cpp" />
<ClCompile Include="GPU\Vulkan\VulkanImage.cpp" />
<ClCompile Include="GPU\Vulkan\VulkanLoader.cpp" />
<ClCompile Include="GPU\Vulkan\VulkanMemory.cpp" />

View File

@ -419,6 +419,9 @@
<Filter>GPU\Vulkan</Filter>
</ClInclude>
<ClInclude Include="RiscVEmitter.h" />
<ClInclude Include="GPU\Vulkan\VulkanFrameData.h">
<Filter>GPU\Vulkan</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<ClCompile Include="ABI.cpp" />
@ -791,6 +794,9 @@
<Filter>GPU\Vulkan</Filter>
</ClCompile>
<ClCompile Include="RiscVEmitter.cpp" />
<ClCompile Include="GPU\Vulkan\VulkanFrameData.cpp">
<Filter>GPU\Vulkan</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<Filter Include="Crypto">

View File

@ -270,6 +270,7 @@ D3D11DrawContext::D3D11DrawContext(ID3D11Device *device, ID3D11DeviceContext *de
caps_.anisoSupported = true;
caps_.textureNPOTFullySupported = true;
caps_.fragmentShaderDepthWriteSupported = true;
caps_.blendMinMaxSupported = true;
D3D11_FEATURE_DATA_D3D11_OPTIONS options{};
HRESULT result = device_->CheckFeatureSupport(D3D11_FEATURE_D3D11_OPTIONS, &options, sizeof(options));

View File

@ -646,6 +646,63 @@ void D3D9Context::InvalidateCachedState() {
curPipeline_ = nullptr;
}
// TODO: Move this detection elsewhere when it's needed elsewhere, not before. It's ugly.
// Source: https://envytools.readthedocs.io/en/latest/hw/pciid.html#gf100
enum NVIDIAGeneration {
NV_PRE_KEPLER,
NV_KEPLER,
NV_MAXWELL,
NV_PASCAL,
NV_VOLTA,
NV_TURING, // or later
};
static NVIDIAGeneration NVIDIAGetDeviceGeneration(int deviceID) {
if (deviceID >= 0x1180 && deviceID <= 0x11bf)
return NV_KEPLER; // GK104
if (deviceID >= 0x11c0 && deviceID <= 0x11fa)
return NV_KEPLER; // GK106
if (deviceID >= 0x0fc0 && deviceID <= 0x0fff)
return NV_KEPLER; // GK107
if (deviceID >= 0x1003 && deviceID <= 0x1028)
return NV_KEPLER; // GK110(B)
if (deviceID >= 0x1280 && deviceID <= 0x12ba)
return NV_KEPLER; // GK208
if (deviceID >= 0x1381 && deviceID <= 0x13b0)
return NV_MAXWELL; // GM107
if (deviceID >= 0x1340 && deviceID <= 0x134d)
return NV_MAXWELL; // GM108
if (deviceID >= 0x13c0 && deviceID <= 0x13d9)
return NV_MAXWELL; // GM204
if (deviceID >= 0x1401 && deviceID <= 0x1427)
return NV_MAXWELL; // GM206
if (deviceID >= 0x15f7 && deviceID <= 0x15f9)
return NV_PASCAL; // GP100
if (deviceID >= 0x15f7 && deviceID <= 0x15f9)
return NV_PASCAL; // GP100
if (deviceID >= 0x1b00 && deviceID <= 0x1b38)
return NV_PASCAL; // GP102
if (deviceID >= 0x1b80 && deviceID <= 0x1be1)
return NV_PASCAL; // GP104
if (deviceID >= 0x1c02 && deviceID <= 0x1c62)
return NV_PASCAL; // GP106
if (deviceID >= 0x1c81 && deviceID <= 0x1c92)
return NV_PASCAL; // GP107
if (deviceID >= 0x1d01 && deviceID <= 0x1d12)
return NV_PASCAL; // GP108
if (deviceID >= 0x1d81 && deviceID <= 0x1dba)
return NV_VOLTA; // GV100
if (deviceID >= 0x1e02 && deviceID <= 0x1e3c)
return NV_TURING; // TU102
if (deviceID >= 0x1e82 && deviceID <= 0x1ed0)
return NV_TURING; // TU104
if (deviceID >= 0x1f02 && deviceID <= 0x1f51)
return NV_TURING; // TU104
if (deviceID >= 0x1e02)
return NV_TURING; // More TU models or later, probably.
return NV_PRE_KEPLER;
}
#define FB_DIV 1
#define FOURCC_INTZ ((D3DFORMAT)(MAKEFOURCC('I', 'N', 'T', 'Z')))
@ -665,14 +722,24 @@ D3D9Context::D3D9Context(IDirect3D9 *d3d, IDirect3D9Ex *d3dEx, int adapterId, ID
caps_.vendor = GPUVendor::VENDOR_UNKNOWN;
}
if (!FAILED(device->GetDeviceCaps(&d3dCaps_))) {
D3DCAPS9 caps;
ZeroMemory(&caps, sizeof(caps));
HRESULT result = 0;
if (deviceEx_) {
result = deviceEx_->GetDeviceCaps(&caps);
} else {
result = device_->GetDeviceCaps(&caps);
}
if (SUCCEEDED(result)) {
sprintf(shadeLangVersion_, "PS: %04x VS: %04x", d3dCaps_.PixelShaderVersion & 0xFFFF, d3dCaps_.VertexShaderVersion & 0xFFFF);
} else {
WARN_LOG(G3D, "Direct3D9: Failed to get the device caps!");
strcpy(shadeLangVersion_, "N/A");
}
caps_.deviceID = identifier_.DeviceId;
caps_.multiViewport = false;
caps_.anisoSupported = true;
caps_.depthRangeMinusOneToOne = false;
caps_.preferredDepthBufferFormat = DataFormat::D24_S8;
caps_.dualSourceBlend = false;
@ -684,8 +751,30 @@ D3D9Context::D3D9Context(IDirect3D9 *d3d, IDirect3D9Ex *d3dEx, int adapterId, ID
caps_.framebufferDepthCopySupported = false;
caps_.framebufferSeparateDepthCopySupported = false;
caps_.texture3DSupported = true;
caps_.textureNPOTFullySupported = true;
caps_.fragmentShaderDepthWriteSupported = true;
caps_.blendMinMaxSupported = true;
if ((caps.RasterCaps & D3DPRASTERCAPS_ANISOTROPY) != 0 && caps.MaxAnisotropy > 1) {
caps_.anisoSupported = true;
}
if ((caps.TextureCaps & (D3DPTEXTURECAPS_NONPOW2CONDITIONAL | D3DPTEXTURECAPS_POW2)) == 0) {
caps_.textureNPOTFullySupported = true;
}
// VS range culling (killing triangles in the vertex shader using NaN) causes problems on Intel.
// Also causes problems on old NVIDIA.
switch (caps_.vendor) {
case Draw::GPUVendor::VENDOR_INTEL:
bugs_.Infest(Bugs::BROKEN_NAN_IN_CONDITIONAL);
break;
case Draw::GPUVendor::VENDOR_NVIDIA:
// Older NVIDIAs don't seem to like NaNs in their DX9 vertex shaders.
// No idea if KEPLER is the right cutoff, but let's go with it.
if (NVIDIAGetDeviceGeneration(caps_.deviceID) < NV_KEPLER) {
bugs_.Infest(Bugs::BROKEN_NAN_IN_CONDITIONAL);
}
break;
}
if (d3d) {
D3DDISPLAYMODE displayMode;

View File

@ -814,7 +814,7 @@ void GLQueueRunner::PerformRenderPass(const GLRStep &step, bool first, bool last
int logicOp = -1;
bool logicEnabled = false;
#endif
bool clipDistance0Enabled = false;
bool clipDistanceEnabled[8]{};
GLuint blendEqColor = (GLuint)-1;
GLuint blendEqAlpha = (GLuint)-1;
@ -1123,14 +1123,18 @@ void GLQueueRunner::PerformRenderPass(const GLRStep &step, bool first, bool last
{
if (curProgram != c.program.program) {
glUseProgram(c.program.program->program);
if (c.program.program->use_clip_distance0 != clipDistance0Enabled) {
if (c.program.program->use_clip_distance0)
glEnable(GL_CLIP_DISTANCE0);
else
glDisable(GL_CLIP_DISTANCE0);
clipDistance0Enabled = c.program.program->use_clip_distance0;
}
curProgram = c.program.program;
for (size_t i = 0; i < ARRAY_SIZE(clipDistanceEnabled); ++i) {
if (c.program.program->use_clip_distance[i] == clipDistanceEnabled[i])
continue;
if (c.program.program->use_clip_distance[i])
glEnable(GL_CLIP_DISTANCE0 + (GLenum)i);
else
glDisable(GL_CLIP_DISTANCE0 + (GLenum)i);
clipDistanceEnabled[i] = c.program.program->use_clip_distance[i];
}
}
CHECK_GL_ERROR_IF_DEBUG();
break;
@ -1371,8 +1375,10 @@ void GLQueueRunner::PerformRenderPass(const GLRStep &step, bool first, bool last
glDisable(GL_COLOR_LOGIC_OP);
}
#endif
if (clipDistance0Enabled)
glDisable(GL_CLIP_DISTANCE0);
for (size_t i = 0; i < ARRAY_SIZE(clipDistanceEnabled); ++i) {
if (clipDistanceEnabled[i])
glDisable(GL_CLIP_DISTANCE0 + (GLenum)i);
}
if ((colorMask & 15) != 15)
glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
CHECK_GL_ERROR_IF_DEBUG();

View File

@ -579,7 +579,6 @@ void GLRenderManager::EndSubmitFrame(int frame) {
void GLRenderManager::Run(int frame) {
BeginSubmitFrame(frame);
FrameData &frameData = frameData_[frame];
auto &stepsOnThread = frameData_[frame].steps;

View File

@ -91,6 +91,13 @@ public:
std::string error;
};
struct GLRProgramFlags {
bool supportDualSource : 1;
bool useClipDistance0 : 1;
bool useClipDistance1 : 1;
bool useClipDistance2 : 1;
};
class GLRProgram {
public:
~GLRProgram() {
@ -119,7 +126,7 @@ public:
std::vector<Semantic> semantics_;
std::vector<UniformLocQuery> queries_;
std::vector<Initializer> initialize_;
bool use_clip_distance0 = false;
bool use_clip_distance[8]{};
struct UniformInfo {
int loc_;
@ -427,15 +434,17 @@ public:
// not be an active render pass.
GLRProgram *CreateProgram(
std::vector<GLRShader *> shaders, std::vector<GLRProgram::Semantic> semantics, std::vector<GLRProgram::UniformLocQuery> queries,
std::vector<GLRProgram::Initializer> initializers, bool supportDualSource, bool useClipDistance0) {
std::vector<GLRProgram::Initializer> initializers, const GLRProgramFlags &flags) {
GLRInitStep step{ GLRInitStepType::CREATE_PROGRAM };
_assert_(shaders.size() <= ARRAY_SIZE(step.create_program.shaders));
step.create_program.program = new GLRProgram();
step.create_program.program->semantics_ = semantics;
step.create_program.program->queries_ = queries;
step.create_program.program->initialize_ = initializers;
step.create_program.program->use_clip_distance0 = useClipDistance0;
step.create_program.support_dual_source = supportDualSource;
step.create_program.program->use_clip_distance[0] = flags.useClipDistance0;
step.create_program.program->use_clip_distance[1] = flags.useClipDistance1;
step.create_program.program->use_clip_distance[2] = flags.useClipDistance2;
step.create_program.support_dual_source = flags.supportDualSource;
_assert_msg_(shaders.size() > 0, "Can't create a program with zero shaders");
for (size_t i = 0; i < shaders.size(); i++) {
step.create_program.shaders[i] = shaders[i];
@ -1003,6 +1012,7 @@ private:
bool readyForFence = true;
bool readyForRun = false;
bool readyForSubmit = false;
bool skipSwap = false;
GLRRunType type = GLRRunType::END;

View File

@ -552,6 +552,8 @@ OpenGLContext::OpenGLContext() {
caps_.framebufferDepthBlitSupported = caps_.framebufferBlitSupported;
caps_.framebufferStencilBlitSupported = caps_.framebufferBlitSupported;
caps_.depthClampSupported = gl_extensions.ARB_depth_clamp;
caps_.blendMinMaxSupported = gl_extensions.EXT_blend_minmax;
if (gl_extensions.IsGLES) {
caps_.clipDistanceSupported = gl_extensions.EXT_clip_cull_distance || gl_extensions.APPLE_clip_distance;
caps_.cullDistanceSupported = gl_extensions.EXT_clip_cull_distance;
@ -711,8 +713,10 @@ OpenGLContext::OpenGLContext() {
}
}
if (gl_extensions.IsGLES) {
// NOTE: We only support framebuffer fetch on ES3 due to past issues..
if (gl_extensions.IsGLES && gl_extensions.GLES3) {
caps_.framebufferFetchSupported = (gl_extensions.EXT_shader_framebuffer_fetch || gl_extensions.ARM_shader_framebuffer_fetch);
if (gl_extensions.EXT_shader_framebuffer_fetch) {
shaderLanguageDesc_.framebufferFetchExtension = "#extension GL_EXT_shader_framebuffer_fetch : require";
shaderLanguageDesc_.lastFragData = gl_extensions.GLES3 ? "fragColor0" : "gl_LastFragData[0]";
@ -1234,7 +1238,8 @@ bool OpenGLPipeline::LinkShaders() {
}
}
program_ = render_->CreateProgram(linkShaders, semantics, queries, initialize, false, false);
GLRProgramFlags flags{};
program_ = render_->CreateProgram(linkShaders, semantics, queries, initialize, flags);
return true;
}

View File

@ -4,7 +4,7 @@
void VulkanBarrier::Flush(VkCommandBuffer cmd) {
if (!imageBarriers_.empty()) {
vkCmdPipelineBarrier(cmd, srcStageMask_, dstStageMask_, 0, 0, nullptr, 0, nullptr, (uint32_t)imageBarriers_.size(), imageBarriers_.data());
vkCmdPipelineBarrier(cmd, srcStageMask_, dstStageMask_, dependencyFlags_, 0, nullptr, 0, nullptr, (uint32_t)imageBarriers_.size(), imageBarriers_.data());
}
imageBarriers_.clear();
srcStageMask_ = 0;

View File

@ -21,6 +21,7 @@ public:
) {
srcStageMask_ |= srcStageMask;
dstStageMask_ |= dstStageMask;
dependencyFlags_ |= VK_DEPENDENCY_BY_REGION_BIT;
VkImageMemoryBarrier imageBarrier;
imageBarrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
@ -112,4 +113,5 @@ private:
VkPipelineStageFlags srcStageMask_ = 0;
VkPipelineStageFlags dstStageMask_ = 0;
std::vector<VkImageMemoryBarrier> imageBarriers_;
VkDependencyFlags dependencyFlags_ = 0;
};

View File

@ -667,7 +667,10 @@ VkResult VulkanContext::CreateDevice() {
extensionsLookup_.KHR_create_renderpass2 = true;
extensionsLookup_.KHR_depth_stencil_resolve = EnableDeviceExtension(VK_KHR_DEPTH_STENCIL_RESOLVE_EXTENSION_NAME);
}
extensionsLookup_.EXT_shader_stencil_export = EnableDeviceExtension(VK_EXT_SHADER_STENCIL_EXPORT_EXTENSION_NAME);
extensionsLookup_.EXT_fragment_shader_interlock = EnableDeviceExtension(VK_EXT_FRAGMENT_SHADER_INTERLOCK_EXTENSION_NAME);
extensionsLookup_.ARM_rasterization_order_attachment_access = EnableDeviceExtension(VK_ARM_RASTERIZATION_ORDER_ATTACHMENT_ACCESS_EXTENSION_NAME);
VkDeviceCreateInfo device_info{ VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO };
device_info.queueCreateInfoCount = 1;

View File

@ -86,7 +86,6 @@ VKAPI_ATTR VkBool32 VKAPI_CALL VulkanDebugUtilsCallback(
} else {
WARN_LOG(G3D, "VKDEBUG: %s", msg.c_str());
}
// false indicates that layer should not bail-out of an
// API call that had validation failures. This may mean that the
// app dies inside the driver due to invalid parameter(s).
@ -94,3 +93,4 @@ VKAPI_ATTR VkBool32 VKAPI_CALL VulkanDebugUtilsCallback(
// keep that behavior here.
return false;
}

View File

@ -0,0 +1,207 @@
#include "VulkanFrameData.h"
#include "Common/Log.h"
void FrameData::Init(VulkanContext *vulkan, int index) {
this->index = index;
VkDevice device = vulkan->GetDevice();
VkCommandPoolCreateInfo cmd_pool_info = { VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO };
cmd_pool_info.queueFamilyIndex = vulkan->GetGraphicsQueueFamilyIndex();
cmd_pool_info.flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT;
VkResult res = vkCreateCommandPool(device, &cmd_pool_info, nullptr, &cmdPoolInit);
_dbg_assert_(res == VK_SUCCESS);
res = vkCreateCommandPool(device, &cmd_pool_info, nullptr, &cmdPoolMain);
_dbg_assert_(res == VK_SUCCESS);
VkCommandBufferAllocateInfo cmd_alloc = { VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO };
cmd_alloc.commandPool = cmdPoolInit;
cmd_alloc.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
cmd_alloc.commandBufferCount = 1;
res = vkAllocateCommandBuffers(device, &cmd_alloc, &initCmd);
_dbg_assert_(res == VK_SUCCESS);
cmd_alloc.commandPool = cmdPoolMain;
res = vkAllocateCommandBuffers(device, &cmd_alloc, &mainCmd);
res = vkAllocateCommandBuffers(device, &cmd_alloc, &presentCmd);
_dbg_assert_(res == VK_SUCCESS);
// Creating the frame fence with true so they can be instantly waited on the first frame
fence = vulkan->CreateFence(true);
// This fence one is used for synchronizing readbacks. Does not need preinitialization.
readbackFence = vulkan->CreateFence(false);
VkQueryPoolCreateInfo query_ci{ VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO };
query_ci.queryCount = MAX_TIMESTAMP_QUERIES;
query_ci.queryType = VK_QUERY_TYPE_TIMESTAMP;
res = vkCreateQueryPool(device, &query_ci, nullptr, &profile.queryPool);
}
void FrameData::Destroy(VulkanContext *vulkan) {
VkDevice device = vulkan->GetDevice();
// TODO: I don't think free-ing command buffers is necessary before destroying a pool.
vkFreeCommandBuffers(device, cmdPoolInit, 1, &initCmd);
vkFreeCommandBuffers(device, cmdPoolMain, 1, &mainCmd);
vkDestroyCommandPool(device, cmdPoolInit, nullptr);
vkDestroyCommandPool(device, cmdPoolMain, nullptr);
vkDestroyFence(device, fence, nullptr);
vkDestroyFence(device, readbackFence, nullptr);
vkDestroyQueryPool(device, profile.queryPool, nullptr);
}
void FrameData::AcquireNextImage(VulkanContext *vulkan, FrameDataShared &shared) {
_dbg_assert_(!hasAcquired);
// Get the index of the next available swapchain image, and a semaphore to block command buffer execution on.
VkResult res = vkAcquireNextImageKHR(vulkan->GetDevice(), vulkan->GetSwapchain(), UINT64_MAX, shared.acquireSemaphore, (VkFence)VK_NULL_HANDLE, &curSwapchainImage);
switch (res) {
case VK_SUCCESS:
hasAcquired = true;
break;
case VK_SUBOPTIMAL_KHR:
hasAcquired = true;
// Hopefully the resize will happen shortly. Ignore - one frame might look bad or something.
WARN_LOG(G3D, "VK_SUBOPTIMAL_KHR returned - ignoring");
break;
case VK_ERROR_OUT_OF_DATE_KHR:
// We do not set hasAcquired here!
WARN_LOG(G3D, "VK_ERROR_OUT_OF_DATE_KHR returned from AcquireNextImage - processing the frame, but not presenting");
skipSwap = true;
break;
default:
// Weird, shouldn't get any other values. Maybe lost device?
_assert_msg_(false, "vkAcquireNextImageKHR failed! result=%s", VulkanResultToString(res));
break;
}
}
VkResult FrameData::QueuePresent(VulkanContext *vulkan, FrameDataShared &shared) {
_dbg_assert_(hasAcquired);
hasAcquired = false;
_dbg_assert_(!skipSwap);
VkSwapchainKHR swapchain = vulkan->GetSwapchain();
VkPresentInfoKHR present = { VK_STRUCTURE_TYPE_PRESENT_INFO_KHR };
present.swapchainCount = 1;
present.pSwapchains = &swapchain;
present.pImageIndices = &curSwapchainImage;
present.pWaitSemaphores = &shared.renderingCompleteSemaphore;
present.waitSemaphoreCount = 1;
return vkQueuePresentKHR(vulkan->GetGraphicsQueue(), &present);
}
VkCommandBuffer FrameData::GetInitCmd(VulkanContext *vulkan) {
if (!hasInitCommands) {
VkCommandBufferBeginInfo begin = {
VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
nullptr,
VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT
};
vkResetCommandPool(vulkan->GetDevice(), cmdPoolInit, 0);
VkResult res = vkBeginCommandBuffer(initCmd, &begin);
if (res != VK_SUCCESS) {
return VK_NULL_HANDLE;
}
hasInitCommands = true;
}
return initCmd;
}
void FrameData::SubmitPending(VulkanContext *vulkan, FrameSubmitType type, FrameDataShared &sharedData) {
VkCommandBuffer cmdBufs[2];
int numCmdBufs = 0;
VkFence fenceToTrigger = VK_NULL_HANDLE;
if (hasInitCommands) {
if (profilingEnabled_) {
// Pre-allocated query ID 1 - end of init cmdbuf.
vkCmdWriteTimestamp(initCmd, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, profile.queryPool, 1);
}
VkResult res = vkEndCommandBuffer(initCmd);
cmdBufs[numCmdBufs++] = initCmd;
_assert_msg_(res == VK_SUCCESS, "vkEndCommandBuffer failed (init)! result=%s", VulkanResultToString(res));
hasInitCommands = false;
}
if ((hasMainCommands || hasPresentCommands) && type == FrameSubmitType::Sync) {
fenceToTrigger = readbackFence;
}
if (hasMainCommands) {
VkResult res = vkEndCommandBuffer(mainCmd);
_assert_msg_(res == VK_SUCCESS, "vkEndCommandBuffer failed (main)! result=%s", VulkanResultToString(res));
cmdBufs[numCmdBufs++] = mainCmd;
hasMainCommands = false;
}
if (hasPresentCommands) {
VkResult res = vkEndCommandBuffer(presentCmd);
_assert_msg_(res == VK_SUCCESS, "vkEndCommandBuffer failed (present)! result=%s", VulkanResultToString(res));
cmdBufs[numCmdBufs++] = presentCmd;
hasPresentCommands = false;
if (type == FrameSubmitType::Present) {
fenceToTrigger = fence;
}
}
if (!numCmdBufs && fenceToTrigger == VK_NULL_HANDLE) {
// Nothing to do.
return;
}
VkSubmitInfo submit_info{ VK_STRUCTURE_TYPE_SUBMIT_INFO };
VkPipelineStageFlags waitStage[1]{ VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT };
if (type == FrameSubmitType::Present && !skipSwap) {
_dbg_assert_(hasAcquired);
submit_info.waitSemaphoreCount = 1;
submit_info.pWaitSemaphores = &sharedData.acquireSemaphore;
submit_info.pWaitDstStageMask = waitStage;
}
submit_info.commandBufferCount = (uint32_t)numCmdBufs;
submit_info.pCommandBuffers = cmdBufs;
if (type == FrameSubmitType::Present && !skipSwap) {
submit_info.signalSemaphoreCount = 1;
submit_info.pSignalSemaphores = &sharedData.renderingCompleteSemaphore;
}
VkResult res = vkQueueSubmit(vulkan->GetGraphicsQueue(), 1, &submit_info, fenceToTrigger);
if (res == VK_ERROR_DEVICE_LOST) {
_assert_msg_(false, "Lost the Vulkan device in vkQueueSubmit! If this happens again, switch Graphics Backend away from Vulkan");
} else {
_assert_msg_(res == VK_SUCCESS, "vkQueueSubmit failed (main)! result=%s", VulkanResultToString(res));
}
if (type == FrameSubmitType::Sync) {
// Hard stall of the GPU, not ideal, but necessary so the CPU has the contents of the readback.
vkWaitForFences(vulkan->GetDevice(), 1, &readbackFence, true, UINT64_MAX);
vkResetFences(vulkan->GetDevice(), 1, &readbackFence);
}
// When !triggerFence, we notify after syncing with Vulkan.
if (type == FrameSubmitType::Present || type == FrameSubmitType::Sync) {
VERBOSE_LOG(G3D, "PULL: Frame %d.readyForFence = true", index);
std::unique_lock<std::mutex> lock(push_mutex);
readyForFence = true; // misnomer in sync mode!
push_condVar.notify_all();
}
}
void FrameDataShared::Init(VulkanContext *vulkan) {
VkSemaphoreCreateInfo semaphoreCreateInfo = { VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO };
semaphoreCreateInfo.flags = 0;
VkResult res = vkCreateSemaphore(vulkan->GetDevice(), &semaphoreCreateInfo, nullptr, &acquireSemaphore);
_dbg_assert_(res == VK_SUCCESS);
res = vkCreateSemaphore(vulkan->GetDevice(), &semaphoreCreateInfo, nullptr, &renderingCompleteSemaphore);
_dbg_assert_(res == VK_SUCCESS);
}
void FrameDataShared::Destroy(VulkanContext *vulkan) {
VkDevice device = vulkan->GetDevice();
vkDestroySemaphore(device, acquireSemaphore, nullptr);
vkDestroySemaphore(device, renderingCompleteSemaphore, nullptr);
}

View File

@ -0,0 +1,101 @@
#pragma once
#include <cstdint>
#include <mutex>
#include <condition_variable>
#include "Common/GPU/Vulkan/VulkanContext.h"
struct VKRStep;
enum {
MAX_TIMESTAMP_QUERIES = 128,
};
enum class VKRRunType {
END,
SYNC,
};
struct QueueProfileContext {
VkQueryPool queryPool;
std::vector<std::string> timestampDescriptions;
std::string profileSummary;
double cpuStartTime;
double cpuEndTime;
};
struct FrameDataShared {
// Permanent objects
VkSemaphore acquireSemaphore = VK_NULL_HANDLE;
VkSemaphore renderingCompleteSemaphore = VK_NULL_HANDLE;
void Init(VulkanContext *vulkan);
void Destroy(VulkanContext *vulkan);
};
enum class FrameSubmitType {
Pending,
Sync,
Present,
};
// Per-frame data, round-robin so we can overlap submission with execution of the previous frame.
struct FrameData {
std::mutex push_mutex;
std::condition_variable push_condVar;
std::mutex pull_mutex;
std::condition_variable pull_condVar;
bool readyForFence = true;
bool readyForRun = false; // protected by pull_mutex
bool skipSwap = false;
VkFence fence;
VkFence readbackFence; // Strictly speaking we might only need one global of these.
// These are on different threads so need separate pools.
VkCommandPool cmdPoolInit; // Written to from main thread
VkCommandPool cmdPoolMain; // Written to from render thread, which also submits
VkCommandBuffer initCmd;
VkCommandBuffer mainCmd;
VkCommandBuffer presentCmd;
bool hasInitCommands = false;
bool hasMainCommands = false;
bool hasPresentCommands = false;
bool hasAcquired = false;
std::vector<VKRStep *> steps;
// Swapchain.
uint32_t curSwapchainImage = -1;
// Profiling.
QueueProfileContext profile;
bool profilingEnabled_;
void Init(VulkanContext *vulkan, int index);
void Destroy(VulkanContext *vulkan);
void AcquireNextImage(VulkanContext *vulkan, FrameDataShared &shared);
VkResult QueuePresent(VulkanContext *vulkan, FrameDataShared &shared);
VkCommandBuffer GetInitCmd(VulkanContext *vulkan);
// This will only submit if we are actually recording init commands.
void SubmitPending(VulkanContext *vulkan, FrameSubmitType type, FrameDataShared &shared);
VKRRunType RunType() const {
return runType_;
}
VKRRunType runType_ = VKRRunType::END;
private:
// Metadata for logging etc
int index;
};

View File

@ -241,6 +241,8 @@ struct VulkanExtensions {
bool KHR_depth_stencil_resolve;
bool EXT_shader_stencil_export;
bool EXT_swapchain_colorspace;
bool ARM_rasterization_order_attachment_access;
bool EXT_fragment_shader_interlock;
// bool EXT_depth_range_unrestricted; // Allows depth outside [0.0, 1.0] in 32-bit float depth buffers.
};

View File

@ -34,8 +34,11 @@ RenderPassType MergeRPTypes(RenderPassType a, RenderPassType b) {
if (a == b) {
// Trivial merging case.
return a;
} else if (a == RP_TYPE_COLOR_DEPTH && b == RP_TYPE_COLOR_DEPTH_INPUT) {
return RP_TYPE_COLOR_DEPTH_INPUT;
} else if (a == RP_TYPE_COLOR_DEPTH_INPUT && b == RP_TYPE_COLOR_DEPTH) {
return RP_TYPE_COLOR_DEPTH_INPUT;
}
// More cases to be added later.
return a;
}
@ -138,6 +141,171 @@ void VulkanQueueRunner::DestroyDeviceObjects() {
renderPasses_.Clear();
}
bool VulkanQueueRunner::CreateSwapchain(VkCommandBuffer cmdInit) {
VkResult res = vkGetSwapchainImagesKHR(vulkan_->GetDevice(), vulkan_->GetSwapchain(), &swapchainImageCount_, nullptr);
_dbg_assert_(res == VK_SUCCESS);
VkImage *swapchainImages = new VkImage[swapchainImageCount_];
res = vkGetSwapchainImagesKHR(vulkan_->GetDevice(), vulkan_->GetSwapchain(), &swapchainImageCount_, swapchainImages);
if (res != VK_SUCCESS) {
ERROR_LOG(G3D, "vkGetSwapchainImagesKHR failed");
delete[] swapchainImages;
return false;
}
for (uint32_t i = 0; i < swapchainImageCount_; i++) {
SwapchainImageData sc_buffer{};
sc_buffer.image = swapchainImages[i];
VkImageViewCreateInfo color_image_view = { VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO };
color_image_view.format = vulkan_->GetSwapchainFormat();
color_image_view.components.r = VK_COMPONENT_SWIZZLE_IDENTITY;
color_image_view.components.g = VK_COMPONENT_SWIZZLE_IDENTITY;
color_image_view.components.b = VK_COMPONENT_SWIZZLE_IDENTITY;
color_image_view.components.a = VK_COMPONENT_SWIZZLE_IDENTITY;
color_image_view.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
color_image_view.subresourceRange.baseMipLevel = 0;
color_image_view.subresourceRange.levelCount = 1;
color_image_view.subresourceRange.baseArrayLayer = 0;
color_image_view.subresourceRange.layerCount = 1;
color_image_view.viewType = VK_IMAGE_VIEW_TYPE_2D;
color_image_view.flags = 0;
color_image_view.image = sc_buffer.image;
// We leave the images as UNDEFINED, there's no need to pre-transition them as
// the backbuffer renderpass starts out with them being auto-transitioned from UNDEFINED anyway.
// Also, turns out it's illegal to transition un-acquired images, thanks Hans-Kristian. See #11417.
res = vkCreateImageView(vulkan_->GetDevice(), &color_image_view, nullptr, &sc_buffer.view);
swapchainImages_.push_back(sc_buffer);
_dbg_assert_(res == VK_SUCCESS);
}
delete[] swapchainImages;
// Must be before InitBackbufferRenderPass.
if (InitDepthStencilBuffer(cmdInit)) {
InitBackbufferFramebuffers(vulkan_->GetBackbufferWidth(), vulkan_->GetBackbufferHeight());
}
return true;
}
bool VulkanQueueRunner::InitBackbufferFramebuffers(int width, int height) {
VkResult res;
// We share the same depth buffer but have multiple color buffers, see the loop below.
VkImageView attachments[2] = { VK_NULL_HANDLE, depth_.view };
VkFramebufferCreateInfo fb_info = { VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO };
fb_info.renderPass = GetCompatibleRenderPass()->Get(vulkan_, RP_TYPE_BACKBUFFER);
fb_info.attachmentCount = 2;
fb_info.pAttachments = attachments;
fb_info.width = width;
fb_info.height = height;
fb_info.layers = 1;
framebuffers_.resize(swapchainImageCount_);
for (uint32_t i = 0; i < swapchainImageCount_; i++) {
attachments[0] = swapchainImages_[i].view;
res = vkCreateFramebuffer(vulkan_->GetDevice(), &fb_info, nullptr, &framebuffers_[i]);
_dbg_assert_(res == VK_SUCCESS);
if (res != VK_SUCCESS) {
framebuffers_.clear();
return false;
}
}
return true;
}
bool VulkanQueueRunner::InitDepthStencilBuffer(VkCommandBuffer cmd) {
const VkFormat depth_format = vulkan_->GetDeviceInfo().preferredDepthStencilFormat;
int aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT;
VkImageCreateInfo image_info = { VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO };
image_info.imageType = VK_IMAGE_TYPE_2D;
image_info.format = depth_format;
image_info.extent.width = vulkan_->GetBackbufferWidth();
image_info.extent.height = vulkan_->GetBackbufferHeight();
image_info.extent.depth = 1;
image_info.mipLevels = 1;
image_info.arrayLayers = 1;
image_info.samples = VK_SAMPLE_COUNT_1_BIT;
image_info.queueFamilyIndexCount = 0;
image_info.pQueueFamilyIndices = nullptr;
image_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
image_info.usage = VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT;
image_info.flags = 0;
depth_.format = depth_format;
VmaAllocationCreateInfo allocCreateInfo{};
VmaAllocationInfo allocInfo{};
allocCreateInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY;
VkResult res = vmaCreateImage(vulkan_->Allocator(), &image_info, &allocCreateInfo, &depth_.image, &depth_.alloc, &allocInfo);
_dbg_assert_(res == VK_SUCCESS);
if (res != VK_SUCCESS)
return false;
vulkan_->SetDebugName(depth_.image, VK_OBJECT_TYPE_IMAGE, "BackbufferDepth");
TransitionImageLayout2(cmd, depth_.image, 0, 1,
aspectMask,
VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT,
VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT,
0, VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT);
VkImageViewCreateInfo depth_view_info = { VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO };
depth_view_info.image = depth_.image;
depth_view_info.format = depth_format;
depth_view_info.components.r = VK_COMPONENT_SWIZZLE_IDENTITY;
depth_view_info.components.g = VK_COMPONENT_SWIZZLE_IDENTITY;
depth_view_info.components.b = VK_COMPONENT_SWIZZLE_IDENTITY;
depth_view_info.components.a = VK_COMPONENT_SWIZZLE_IDENTITY;
depth_view_info.subresourceRange.aspectMask = aspectMask;
depth_view_info.subresourceRange.baseMipLevel = 0;
depth_view_info.subresourceRange.levelCount = 1;
depth_view_info.subresourceRange.baseArrayLayer = 0;
depth_view_info.subresourceRange.layerCount = 1;
depth_view_info.viewType = VK_IMAGE_VIEW_TYPE_2D;
depth_view_info.flags = 0;
VkDevice device = vulkan_->GetDevice();
res = vkCreateImageView(device, &depth_view_info, NULL, &depth_.view);
_dbg_assert_(res == VK_SUCCESS);
if (res != VK_SUCCESS)
return false;
return true;
}
void VulkanQueueRunner::DestroyBackBuffers() {
for (auto &image : swapchainImages_) {
vulkan_->Delete().QueueDeleteImageView(image.view);
}
swapchainImages_.clear();
if (depth_.view) {
vulkan_->Delete().QueueDeleteImageView(depth_.view);
}
if (depth_.image) {
_dbg_assert_(depth_.alloc);
vulkan_->Delete().QueueDeleteImageAllocation(depth_.image, depth_.alloc);
}
depth_ = {};
for (uint32_t i = 0; i < framebuffers_.size(); i++) {
_dbg_assert_(framebuffers_[i] != VK_NULL_HANDLE);
vulkan_->Delete().QueueDeleteFramebuffer(framebuffers_[i]);
}
framebuffers_.clear();
INFO_LOG(G3D, "Backbuffers destroyed");
}
static VkAttachmentLoadOp ConvertLoadAction(VKRRenderPassLoadAction action) {
switch (action) {
case VKRRenderPassLoadAction::CLEAR: return VK_ATTACHMENT_LOAD_OP_CLEAR;
@ -155,7 +323,12 @@ static VkAttachmentStoreOp ConvertStoreAction(VKRRenderPassStoreAction action) {
return VK_ATTACHMENT_STORE_OP_DONT_CARE; // avoid compiler warning
}
// Self-dependency: https://github.com/gpuweb/gpuweb/issues/442#issuecomment-547604827
// Also see https://www.khronos.org/registry/vulkan/specs/1.3-extensions/html/vkspec.html#synchronization-pipeline-barriers-subpass-self-dependencies
VkRenderPass CreateRP(VulkanContext *vulkan, const RPKey &key, RenderPassType rpType) {
bool selfDependency = rpType == RP_TYPE_COLOR_DEPTH_INPUT;
VkAttachmentDescription attachments[2] = {};
attachments[0].format = rpType == RP_TYPE_BACKBUFFER ? vulkan->GetSwapchainFormat() : VK_FORMAT_R8G8B8A8_UNORM;
attachments[0].samples = VK_SAMPLE_COUNT_1_BIT;
@ -179,7 +352,7 @@ VkRenderPass CreateRP(VulkanContext *vulkan, const RPKey &key, RenderPassType rp
VkAttachmentReference color_reference{};
color_reference.attachment = 0;
color_reference.layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
color_reference.layout = selfDependency ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
VkAttachmentReference depth_reference{};
depth_reference.attachment = 1;
@ -188,8 +361,13 @@ VkRenderPass CreateRP(VulkanContext *vulkan, const RPKey &key, RenderPassType rp
VkSubpassDescription subpass{};
subpass.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS;
subpass.flags = 0;
subpass.inputAttachmentCount = 0;
subpass.pInputAttachments = nullptr;
if (selfDependency) {
subpass.inputAttachmentCount = 1;
subpass.pInputAttachments = &color_reference;
} else {
subpass.inputAttachmentCount = 0;
subpass.pInputAttachments = nullptr;
}
subpass.colorAttachmentCount = 1;
subpass.pColorAttachments = &color_reference;
subpass.pResolveAttachments = nullptr;
@ -198,22 +376,40 @@ VkRenderPass CreateRP(VulkanContext *vulkan, const RPKey &key, RenderPassType rp
subpass.pPreserveAttachments = nullptr;
// Not sure if this is really necessary.
VkSubpassDependency dep{};
dep.srcSubpass = VK_SUBPASS_EXTERNAL;
dep.dstSubpass = 0;
dep.srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
dep.dstStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
dep.srcAccessMask = 0;
dep.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
VkSubpassDependency deps[2]{};
size_t numDeps = 0;
VkRenderPassCreateInfo rp{ VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO };
rp.attachmentCount = 2;
rp.pAttachments = attachments;
rp.subpassCount = 1;
rp.pSubpasses = &subpass;
if (rpType == RP_TYPE_BACKBUFFER) {
deps[numDeps].srcSubpass = VK_SUBPASS_EXTERNAL;
deps[numDeps].dstSubpass = 0;
deps[numDeps].srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
deps[numDeps].dstStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
deps[numDeps].srcAccessMask = 0;
deps[numDeps].dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
numDeps++;
rp.dependencyCount = 1;
rp.pDependencies = &dep;
}
if (selfDependency) {
deps[numDeps].dependencyFlags = VK_DEPENDENCY_BY_REGION_BIT;
deps[numDeps].srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
deps[numDeps].dstAccessMask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT;
deps[numDeps].srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
deps[numDeps].dstStageMask = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
deps[numDeps].srcSubpass = 0;
deps[numDeps].dstSubpass = 0;
numDeps++;
}
if (numDeps > 0) {
rp.dependencyCount = (u32)numDeps;
rp.pDependencies = deps;
}
VkRenderPass pass;
@ -246,6 +442,30 @@ VKRRenderPass *VulkanQueueRunner::GetRenderPass(const RPKey &key) {
return pass;
}
// Must match the subpass self-dependency declared above.
void VulkanQueueRunner::SelfDependencyBarrier(VKRImage &img, VkImageAspectFlags aspect, VulkanBarrier *recordBarrier) {
if (aspect & VK_IMAGE_ASPECT_COLOR_BIT) {
VkAccessFlags srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
VkAccessFlags dstAccessMask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT;
VkPipelineStageFlags srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
VkPipelineStageFlags dstStageMask = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
recordBarrier->TransitionImage(
img.image,
0,
1,
aspect,
VK_IMAGE_LAYOUT_GENERAL,
VK_IMAGE_LAYOUT_GENERAL,
srcAccessMask,
dstAccessMask,
srcStageMask,
dstStageMask
);
} else {
_assert_msg_(false, "Depth self-dependencies not yet supported");
}
}
void VulkanQueueRunner::PreprocessSteps(std::vector<VKRStep *> &steps) {
// Optimizes renderpasses, then sequences them.
// Planned optimizations:
@ -321,23 +541,47 @@ void VulkanQueueRunner::PreprocessSteps(std::vector<VKRStep *> &steps) {
}
}
void VulkanQueueRunner::RunSteps(VkCommandBuffer cmd, std::vector<VKRStep *> &steps, QueueProfileContext *profile) {
void VulkanQueueRunner::RunSteps(FrameData &frameData, FrameDataShared &frameDataShared) {
QueueProfileContext *profile = frameData.profilingEnabled_ ? &frameData.profile : nullptr;
if (profile)
profile->cpuStartTime = time_now_d();
bool emitLabels = vulkan_->Extensions().EXT_debug_utils;
for (size_t i = 0; i < steps.size(); i++) {
const VKRStep &step = *steps[i];
VkCommandBuffer cmd = frameData.hasPresentCommands ? frameData.presentCmd : frameData.mainCmd;
for (size_t i = 0; i < frameData.steps.size(); i++) {
const VKRStep &step = *frameData.steps[i];
if (emitLabels) {
VkDebugUtilsLabelEXT labelInfo{ VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT };
labelInfo.pLabelName = step.tag;
vkCmdBeginDebugUtilsLabelEXT(cmd, &labelInfo);
vkCmdBeginDebugUtilsLabelEXT(frameData.mainCmd, &labelInfo);
}
switch (step.stepType) {
case VKRStepType::RENDER:
if (!step.render.framebuffer) {
frameData.SubmitPending(vulkan_, FrameSubmitType::Pending, frameDataShared);
// When stepping in the GE debugger, we can end up here multiple times in a "frame".
// So only acquire once.
if (!frameData.hasAcquired) {
frameData.AcquireNextImage(vulkan_, frameDataShared);
SetBackbuffer(framebuffers_[frameData.curSwapchainImage], swapchainImages_[frameData.curSwapchainImage].image);
}
_dbg_assert_(!frameData.hasPresentCommands);
// A RENDER step rendering to the backbuffer is normally the last step that happens in a frame,
// unless taking a screenshot, in which case there might be a READBACK_IMAGE after it.
// This is why we have to switch cmd to presentCmd, in this case.
VkCommandBufferBeginInfo begin{ VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO };
begin.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
vkBeginCommandBuffer(frameData.presentCmd, &begin);
frameData.hasPresentCommands = true;
cmd = frameData.presentCmd;
}
PerformRenderPass(step, cmd);
break;
case VKRStepType::COPY:
@ -368,10 +612,12 @@ void VulkanQueueRunner::RunSteps(VkCommandBuffer cmd, std::vector<VKRStep *> &st
// Deleting all in one go should be easier on the instruction cache than deleting
// them as we go - and easier to debug because we can look backwards in the frame.
for (size_t i = 0; i < steps.size(); i++) {
delete steps[i];
for (auto step : frameData.steps) {
delete step;
}
frameData.steps.clear();
if (profile)
profile->cpuEndTime = time_now_d();
}
@ -628,6 +874,7 @@ std::string VulkanQueueRunner::StepToString(const VKRStep &step) const {
switch (step.render.renderPassType) {
case RP_TYPE_BACKBUFFER: renderCmd = "BACKBUF"; break;
case RP_TYPE_COLOR_DEPTH: renderCmd = "RENDER"; break;
case RP_TYPE_COLOR_DEPTH_INPUT: renderCmd = "RENDER_INPUT"; break;
default: renderCmd = "N/A";
}
snprintf(buffer, sizeof(buffer), "%s %s (draws: %d, %dx%d/%dx%d, fb: %p, )", renderCmd, step.tag, step.render.numDraws, actual_w, actual_h, w, h, step.render.framebuffer);
@ -817,6 +1064,9 @@ void VulkanQueueRunner::LogRenderPass(const VKRStep &pass, bool verbose) {
case VKRRenderCommand::REMOVED:
INFO_LOG(G3D, " (Removed)");
break;
case VKRRenderCommand::SELF_DEPENDENCY_BARRIER:
INFO_LOG(G3D, " SelfBarrier()");
break;
case VKRRenderCommand::BIND_GRAPHICS_PIPELINE:
INFO_LOG(G3D, " BindGraphicsPipeline(%x)", (int)(intptr_t)cmd.graphics_pipeline.pipeline);
break;
@ -1070,7 +1320,6 @@ void VulkanQueueRunner::PerformRenderPass(const VKRStep &step, VkCommandBuffer c
}
}
// Don't execute empty renderpasses that keep the contents.
if (step.commands.empty() && step.render.colorLoad == VKRRenderPassLoadAction::KEEP && step.render.depthLoad == VKRRenderPassLoadAction::KEEP && step.render.stencilLoad == VKRRenderPassLoadAction::KEEP) {
// Flush the pending barrier
@ -1120,6 +1369,7 @@ void VulkanQueueRunner::PerformRenderPass(const VKRStep &step, VkCommandBuffer c
// This reads the layout of the color and depth images, and chooses a render pass using them that
// will transition to the desired final layout.
//
// NOTE: Flushes recordBarrier_.
VKRRenderPass *renderPass = PerformBindFramebufferAsRenderTarget(step, cmd);
@ -1235,6 +1485,15 @@ void VulkanQueueRunner::PerformRenderPass(const VKRStep &step, VkCommandBuffer c
break;
}
case VKRRenderCommand::SELF_DEPENDENCY_BARRIER:
{
_assert_(step.render.pipelineFlags & PipelineFlags::USES_INPUT_ATTACHMENT);
VulkanBarrier barrier;
SelfDependencyBarrier(step.render.framebuffer->color, VK_IMAGE_ASPECT_COLOR_BIT, &barrier);
barrier.Flush(cmd);
break;
}
case VKRRenderCommand::PUSH_CONSTANTS:
vkCmdPushConstants(cmd, pipelineLayout, c.push.stages, c.push.offset, c.push.size, c.push.data);
break;

View File

@ -8,6 +8,7 @@
#include "Common/Data/Collections/Hashmaps.h"
#include "Common/GPU/Vulkan/VulkanContext.h"
#include "Common/GPU/Vulkan/VulkanBarrier.h"
#include "Common/GPU/Vulkan/VulkanFrameData.h"
#include "Common/Data/Convert/SmallDataConvert.h"
#include "Common/Data/Collections/TinySet.h"
#include "Common/GPU/DataFormat.h"
@ -16,11 +17,11 @@ class VKRFramebuffer;
struct VKRGraphicsPipeline;
struct VKRComputePipeline;
struct VKRImage;
struct FrameData;
enum {
QUEUE_HACK_MGS2_ACID = 1,
QUEUE_HACK_SONIC = 2,
// Killzone PR = 4.
QUEUE_HACK_RENDERPASS_MERGE = 8,
};
@ -36,20 +37,24 @@ enum class VKRRenderCommand : uint8_t {
DRAW,
DRAW_INDEXED,
PUSH_CONSTANTS,
SELF_DEPENDENCY_BARRIER,
NUM_RENDER_COMMANDS,
};
enum PipelineFlags {
PIPELINE_FLAG_NONE = 0,
PIPELINE_FLAG_USES_LINES = (1 << 2),
PIPELINE_FLAG_USES_BLEND_CONSTANT = (1 << 3),
PIPELINE_FLAG_USES_DEPTH_STENCIL = (1 << 4), // Reads or writes the depth buffer.
enum class PipelineFlags {
NONE = 0,
USES_LINES = (1 << 2),
USES_BLEND_CONSTANT = (1 << 3),
USES_DEPTH_STENCIL = (1 << 4), // Reads or writes the depth buffer.
USES_INPUT_ATTACHMENT = (1 << 5),
};
ENUM_CLASS_BITOPS(PipelineFlags);
// Pipelines need to be created for the right type of render pass.
enum RenderPassType {
RP_TYPE_BACKBUFFER,
RP_TYPE_COLOR_DEPTH,
RP_TYPE_COLOR_DEPTH_INPUT,
// Later will add pure-color render passes.
RP_TYPE_COUNT,
};
@ -146,14 +151,6 @@ struct TransitionRequest {
VkImageLayout targetLayout;
};
struct QueueProfileContext {
VkQueryPool queryPool;
std::vector<std::string> timestampDescriptions;
std::string profileSummary;
double cpuStartTime;
double cpuEndTime;
};
class VKRRenderPass;
struct VKRStep {
@ -168,7 +165,6 @@ struct VKRStep {
union {
struct {
VKRFramebuffer *framebuffer;
// TODO: Look these up through renderPass?
VKRRenderPassLoadAction colorLoad;
VKRRenderPassLoadAction depthLoad;
VKRRenderPassLoadAction stencilLoad;
@ -183,7 +179,7 @@ struct VKRStep {
int numReads;
VkImageLayout finalColorLayout;
VkImageLayout finalDepthStencilLayout;
u32 pipelineFlags;
PipelineFlags pipelineFlags; // contains the self dependency flag, in the form of USES_INPUT_ATTACHMENT
VkRect2D renderArea;
// Render pass type. Deduced after finishing recording the pass, from the used pipelines.
// NOTE: Storing the render pass here doesn't do much good, we change the compatible parameters (load/store ops) during step optimization.
@ -255,7 +251,7 @@ public:
}
void PreprocessSteps(std::vector<VKRStep *> &steps);
void RunSteps(VkCommandBuffer cmd, std::vector<VKRStep *> &steps, QueueProfileContext *profile);
void RunSteps(FrameData &frameData, FrameDataShared &frameDataShared);
void LogSteps(const std::vector<VKRStep *> &steps, bool verbose);
std::string StepToString(const VKRStep &step) const;
@ -263,6 +259,14 @@ public:
void CreateDeviceObjects();
void DestroyDeviceObjects();
// Swapchain
void DestroyBackBuffers();
bool CreateSwapchain(VkCommandBuffer cmdInit);
bool HasBackbuffers() const {
return !framebuffers_.empty();
}
// Get a render pass that's compatible with all our framebuffers.
// Note that it's precached, cannot look up in the map as this might be on another thread.
VKRRenderPass *GetCompatibleRenderPass() const {
@ -302,6 +306,9 @@ public:
}
private:
bool InitBackbufferFramebuffers(int width, int height);
bool InitDepthStencilBuffer(VkCommandBuffer cmd); // Used for non-buffered rendering.
VKRRenderPass *PerformBindFramebufferAsRenderTarget(const VKRStep &pass, VkCommandBuffer cmd);
void PerformRenderPass(const VKRStep &pass, VkCommandBuffer cmd);
void PerformCopy(const VKRStep &pass, VkCommandBuffer cmd);
@ -324,6 +331,8 @@ private:
static void SetupTransitionToTransferSrc(VKRImage &img, VkImageAspectFlags aspect, VulkanBarrier *recordBarrier);
static void SetupTransitionToTransferDst(VKRImage &img, VkImageAspectFlags aspect, VulkanBarrier *recordBarrier);
static void SelfDependencyBarrier(VKRImage &img, VkImageAspectFlags aspect, VulkanBarrier *recordBarrier);
VulkanContext *vulkan_;
VkFramebuffer backbuffer_ = VK_NULL_HANDLE;
@ -354,4 +363,20 @@ private:
// Stored here to help reuse the allocation.
VulkanBarrier recordBarrier_;
// Swap chain management
struct SwapchainImageData {
VkImage image;
VkImageView view;
};
std::vector<VkFramebuffer> framebuffers_;
std::vector<SwapchainImageData> swapchainImages_;
uint32_t swapchainImageCount_ = 0;
struct DepthBufferInfo {
VkFormat format = VK_FORMAT_UNDEFINED;
VkImage image = VK_NULL_HANDLE;
VmaAllocation alloc = VK_NULL_HANDLE;
VkImageView view = VK_NULL_HANDLE;
};
DepthBufferInfo depth_;
};

View File

@ -223,7 +223,7 @@ void CreateImage(VulkanContext *vulkan, VkCommandBuffer cmd, VKRImage &img, int
// Strictly speaking we don't yet need VK_IMAGE_USAGE_SAMPLED_BIT for depth buffers since we do not yet sample depth buffers.
ici.usage = VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT;
if (color) {
ici.usage |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
ici.usage |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT;
} else {
ici.usage |= VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT;
}
@ -288,53 +288,15 @@ void CreateImage(VulkanContext *vulkan, VkCommandBuffer cmd, VKRImage &img, int
}
VulkanRenderManager::VulkanRenderManager(VulkanContext *vulkan) : vulkan_(vulkan), queueRunner_(vulkan) {
VkSemaphoreCreateInfo semaphoreCreateInfo = { VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO };
semaphoreCreateInfo.flags = 0;
VkResult res = vkCreateSemaphore(vulkan_->GetDevice(), &semaphoreCreateInfo, nullptr, &acquireSemaphore_);
_dbg_assert_(res == VK_SUCCESS);
res = vkCreateSemaphore(vulkan_->GetDevice(), &semaphoreCreateInfo, nullptr, &renderingCompleteSemaphore_);
_dbg_assert_(res == VK_SUCCESS);
inflightFramesAtStart_ = vulkan_->GetInflightFrames();
frameDataShared_.Init(vulkan);
for (int i = 0; i < inflightFramesAtStart_; i++) {
VkCommandPoolCreateInfo cmd_pool_info = { VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO };
cmd_pool_info.queueFamilyIndex = vulkan_->GetGraphicsQueueFamilyIndex();
cmd_pool_info.flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT;
VkResult res = vkCreateCommandPool(vulkan_->GetDevice(), &cmd_pool_info, nullptr, &frameData_[i].cmdPoolInit);
_dbg_assert_(res == VK_SUCCESS);
res = vkCreateCommandPool(vulkan_->GetDevice(), &cmd_pool_info, nullptr, &frameData_[i].cmdPoolMain);
_dbg_assert_(res == VK_SUCCESS);
VkCommandBufferAllocateInfo cmd_alloc = { VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO };
cmd_alloc.commandPool = frameData_[i].cmdPoolInit;
cmd_alloc.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
cmd_alloc.commandBufferCount = 1;
res = vkAllocateCommandBuffers(vulkan_->GetDevice(), &cmd_alloc, &frameData_[i].initCmd);
_dbg_assert_(res == VK_SUCCESS);
cmd_alloc.commandPool = frameData_[i].cmdPoolMain;
res = vkAllocateCommandBuffers(vulkan_->GetDevice(), &cmd_alloc, &frameData_[i].mainCmd);
_dbg_assert_(res == VK_SUCCESS);
// Creating the frame fence with true so they can be instantly waited on the first frame
frameData_[i].fence = vulkan_->CreateFence(true);
// This fence one is used for synchronizing readbacks. Does not need preinitialization.
frameData_[i].readbackFence = vulkan_->CreateFence(false);
VkQueryPoolCreateInfo query_ci{ VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO };
query_ci.queryCount = MAX_TIMESTAMP_QUERIES;
query_ci.queryType = VK_QUERY_TYPE_TIMESTAMP;
res = vkCreateQueryPool(vulkan_->GetDevice(), &query_ci, nullptr, &frameData_[i].profile.queryPool);
frameData_[i].Init(vulkan, i);
}
queueRunner_.CreateDeviceObjects();
// AMD hack for issue #10097 (older drivers only.)
const auto &props = vulkan_->GetPhysicalDeviceProperties().properties;
if (props.vendorID == VULKAN_VENDOR_AMD && props.apiVersion < VK_API_VERSION_1_1) {
useThread_ = false;
}
}
bool VulkanRenderManager::CreateBackbuffers() {
@ -342,52 +304,14 @@ bool VulkanRenderManager::CreateBackbuffers() {
ERROR_LOG(G3D, "No swapchain - can't create backbuffers");
return false;
}
VkResult res = vkGetSwapchainImagesKHR(vulkan_->GetDevice(), vulkan_->GetSwapchain(), &swapchainImageCount_, nullptr);
_dbg_assert_(res == VK_SUCCESS);
VkImage *swapchainImages = new VkImage[swapchainImageCount_];
res = vkGetSwapchainImagesKHR(vulkan_->GetDevice(), vulkan_->GetSwapchain(), &swapchainImageCount_, swapchainImages);
if (res != VK_SUCCESS) {
ERROR_LOG(G3D, "vkGetSwapchainImagesKHR failed");
delete[] swapchainImages;
return false;
}
VkCommandBuffer cmdInit = GetInitCmd();
for (uint32_t i = 0; i < swapchainImageCount_; i++) {
SwapchainImageData sc_buffer{};
sc_buffer.image = swapchainImages[i];
VkImageViewCreateInfo color_image_view = { VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO };
color_image_view.format = vulkan_->GetSwapchainFormat();
color_image_view.components.r = VK_COMPONENT_SWIZZLE_IDENTITY;
color_image_view.components.g = VK_COMPONENT_SWIZZLE_IDENTITY;
color_image_view.components.b = VK_COMPONENT_SWIZZLE_IDENTITY;
color_image_view.components.a = VK_COMPONENT_SWIZZLE_IDENTITY;
color_image_view.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
color_image_view.subresourceRange.baseMipLevel = 0;
color_image_view.subresourceRange.levelCount = 1;
color_image_view.subresourceRange.baseArrayLayer = 0;
color_image_view.subresourceRange.layerCount = 1;
color_image_view.viewType = VK_IMAGE_VIEW_TYPE_2D;
color_image_view.flags = 0;
color_image_view.image = sc_buffer.image;
// We leave the images as UNDEFINED, there's no need to pre-transition them as
// the backbuffer renderpass starts out with them being auto-transitioned from UNDEFINED anyway.
// Also, turns out it's illegal to transition un-acquired images, thanks Hans-Kristian. See #11417.
res = vkCreateImageView(vulkan_->GetDevice(), &color_image_view, nullptr, &sc_buffer.view);
swapchainImages_.push_back(sc_buffer);
_dbg_assert_(res == VK_SUCCESS);
if (!queueRunner_.CreateSwapchain(cmdInit)) {
return false;
}
delete[] swapchainImages;
// Must be before InitBackbufferRenderPass.
if (InitDepthStencilBuffer(cmdInit)) {
InitBackbufferFramebuffers(vulkan_->GetBackbufferWidth(), vulkan_->GetBackbufferHeight());
}
curWidthRaw_ = -1;
curHeightRaw_ = -1;
@ -404,7 +328,7 @@ bool VulkanRenderManager::CreateBackbuffers() {
outOfDateFrames_ = 0;
// Start the thread.
if (useThread_ && HasBackbuffers()) {
if (HasBackbuffers()) {
run_ = true;
// Won't necessarily be 0.
threadInitFrame_ = vulkan_->GetCurFrame();
@ -417,57 +341,58 @@ bool VulkanRenderManager::CreateBackbuffers() {
}
void VulkanRenderManager::StopThread() {
if (useThread_ && run_) {
run_ = false;
// Stop the thread.
for (int i = 0; i < vulkan_->GetInflightFrames(); i++) {
auto &frameData = frameData_[i];
{
std::unique_lock<std::mutex> lock(frameData.push_mutex);
frameData.push_condVar.notify_all();
}
{
std::unique_lock<std::mutex> lock(frameData.pull_mutex);
frameData.pull_condVar.notify_all();
}
// Zero the queries so we don't try to pull them later.
frameData.profile.timestampDescriptions.clear();
}
thread_.join();
INFO_LOG(G3D, "Vulkan submission thread joined. Frame=%d", vulkan_->GetCurFrame());
compileCond_.notify_all();
compileThread_.join();
INFO_LOG(G3D, "Vulkan compiler thread joined.");
// Eat whatever has been queued up for this frame if anything.
Wipe();
// Wait for any fences to finish and be resignaled, so we don't have sync issues.
// Also clean out any queued data, which might refer to things that might not be valid
// when we restart...
for (int i = 0; i < vulkan_->GetInflightFrames(); i++) {
auto &frameData = frameData_[i];
_assert_(!frameData.readyForRun);
_assert_(frameData.steps.empty());
if (frameData.hasInitCommands) {
// Clear 'em out. This can happen on restart sometimes.
vkEndCommandBuffer(frameData.initCmd);
frameData.hasInitCommands = false;
}
frameData.readyForRun = false;
for (size_t i = 0; i < frameData.steps.size(); i++) {
delete frameData.steps[i];
}
frameData.steps.clear();
std::unique_lock<std::mutex> lock(frameData.push_mutex);
while (!frameData.readyForFence) {
VLOG("PUSH: Waiting for frame[%d].readyForFence = 1 (stop)", i);
frameData.push_condVar.wait(lock);
}
}
} else {
if (!run_) {
INFO_LOG(G3D, "Vulkan submission thread was already stopped.");
return;
}
run_ = false;
// Stop the thread.
for (int i = 0; i < vulkan_->GetInflightFrames(); i++) {
auto &frameData = frameData_[i];
{
std::unique_lock<std::mutex> lock(frameData.push_mutex);
frameData.push_condVar.notify_all();
}
{
std::unique_lock<std::mutex> lock(frameData.pull_mutex);
frameData.pull_condVar.notify_all();
}
// Zero the queries so we don't try to pull them later.
frameData.profile.timestampDescriptions.clear();
}
thread_.join();
INFO_LOG(G3D, "Vulkan submission thread joined. Frame=%d", vulkan_->GetCurFrame());
compileCond_.notify_all();
compileThread_.join();
INFO_LOG(G3D, "Vulkan compiler thread joined.");
// Eat whatever has been queued up for this frame if anything.
Wipe();
// Wait for any fences to finish and be resignaled, so we don't have sync issues.
// Also clean out any queued data, which might refer to things that might not be valid
// when we restart...
for (int i = 0; i < vulkan_->GetInflightFrames(); i++) {
auto &frameData = frameData_[i];
_assert_(!frameData.readyForRun);
_assert_(frameData.steps.empty());
if (frameData.hasInitCommands) {
// Clear 'em out. This can happen on restart sometimes.
vkEndCommandBuffer(frameData.initCmd);
frameData.hasInitCommands = false;
}
frameData.readyForRun = false;
for (size_t i = 0; i < frameData.steps.size(); i++) {
delete frameData.steps[i];
}
frameData.steps.clear();
std::unique_lock<std::mutex> lock(frameData.push_mutex);
while (!frameData.readyForFence) {
VLOG("PUSH: Waiting for frame[%d].readyForFence = 1 (stop)", i);
frameData.push_condVar.wait(lock);
}
}
}
@ -475,26 +400,7 @@ void VulkanRenderManager::DestroyBackbuffers() {
StopThread();
vulkan_->WaitUntilQueueIdle();
for (auto &image : swapchainImages_) {
vulkan_->Delete().QueueDeleteImageView(image.view);
}
swapchainImages_.clear();
if (depth_.view) {
vulkan_->Delete().QueueDeleteImageView(depth_.view);
}
if (depth_.image) {
_dbg_assert_(depth_.alloc);
vulkan_->Delete().QueueDeleteImageAllocation(depth_.image, depth_.alloc);
}
depth_ = {};
for (uint32_t i = 0; i < framebuffers_.size(); i++) {
_dbg_assert_(framebuffers_[i] != VK_NULL_HANDLE);
vulkan_->Delete().QueueDeleteFramebuffer(framebuffers_[i]);
}
framebuffers_.clear();
INFO_LOG(G3D, "Backbuffers destroyed");
queueRunner_.DestroyBackBuffers();
}
VulkanRenderManager::~VulkanRenderManager() {
@ -504,16 +410,9 @@ VulkanRenderManager::~VulkanRenderManager() {
DrainCompileQueue();
VkDevice device = vulkan_->GetDevice();
vkDestroySemaphore(device, acquireSemaphore_, nullptr);
vkDestroySemaphore(device, renderingCompleteSemaphore_, nullptr);
frameDataShared_.Destroy(vulkan_);
for (int i = 0; i < inflightFramesAtStart_; i++) {
vkFreeCommandBuffers(device, frameData_[i].cmdPoolInit, 1, &frameData_[i].initCmd);
vkFreeCommandBuffers(device, frameData_[i].cmdPoolMain, 1, &frameData_[i].mainCmd);
vkDestroyCommandPool(device, frameData_[i].cmdPoolInit, nullptr);
vkDestroyCommandPool(device, frameData_[i].cmdPoolMain, nullptr);
vkDestroyFence(device, frameData_[i].fence, nullptr);
vkDestroyFence(device, frameData_[i].readbackFence, nullptr);
vkDestroyQueryPool(device, frameData_[i].profile.queryPool, nullptr);
frameData_[i].Destroy(vulkan_);
}
queueRunner_.DestroyDeviceObjects();
}
@ -534,7 +433,9 @@ void VulkanRenderManager::CompileThreadFunc() {
break;
}
INFO_LOG(G3D, "Compilation thread has %d pipelines to create", (int)toCompile.size());
if (!toCompile.empty()) {
INFO_LOG(G3D, "Compilation thread has %d pipelines to create", (int)toCompile.size());
}
// TODO: Here we can sort the pending pipelines by vertex and fragment shaders,
// and split up further.
@ -574,6 +475,7 @@ void VulkanRenderManager::ThreadFunc() {
threadFrame = 0;
}
FrameData &frameData = frameData_[threadFrame];
std::unique_lock<std::mutex> lock(frameData.pull_mutex);
while (!frameData.readyForRun && run_) {
VLOG("PULL: Waiting for frame[%d].readyForRun", threadFrame);
@ -589,8 +491,7 @@ void VulkanRenderManager::ThreadFunc() {
// but that created a race condition where frames could end up not finished properly on resize etc.
// Only increment next time if we're done.
nextFrame = frameData.type == VKRRunType::END;
_dbg_assert_(frameData.type == VKRRunType::END || frameData.type == VKRRunType::SYNC);
nextFrame = frameData.RunType() == VKRRunType::END;
}
VLOG("PULL: Running frame %d", threadFrame);
if (firstFrame) {
@ -615,7 +516,7 @@ void VulkanRenderManager::BeginFrame(bool enableProfiling, bool enableLogProfile
FrameData &frameData = frameData_[curFrame];
// Make sure the very last command buffer from the frame before the previous has been fully executed.
if (useThread_) {
{
std::unique_lock<std::mutex> lock(frameData.push_mutex);
while (!frameData.readyForFence) {
VLOG("PUSH: Waiting for frame[%d].readyForFence = 1", curFrame);
@ -633,7 +534,6 @@ void VulkanRenderManager::BeginFrame(bool enableProfiling, bool enableLogProfile
// Can't set this until after the fence.
frameData.profilingEnabled_ = enableProfiling;
frameData.readbackFenceUsed = false;
uint64_t queryResults[MAX_TIMESTAMP_QUERIES];
@ -698,21 +598,7 @@ void VulkanRenderManager::BeginFrame(bool enableProfiling, bool enableLogProfile
VkCommandBuffer VulkanRenderManager::GetInitCmd() {
int curFrame = vulkan_->GetCurFrame();
FrameData &frameData = frameData_[curFrame];
if (!frameData.hasInitCommands) {
VkCommandBufferBeginInfo begin = {
VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
nullptr,
VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT
};
vkResetCommandPool(vulkan_->GetDevice(), frameData.cmdPoolInit, 0);
VkResult res = vkBeginCommandBuffer(frameData.initCmd, &begin);
if (res != VK_SUCCESS) {
return VK_NULL_HANDLE;
}
frameData.hasInitCommands = true;
}
return frameData_[curFrame].initCmd;
return frameData_[curFrame].GetInitCmd(vulkan_);
}
VKRGraphicsPipeline *VulkanRenderManager::CreateGraphicsPipeline(VKRGraphicsPipelineDesc *desc, uint32_t variantBitmask, const char *tag) {
@ -771,16 +657,20 @@ void VulkanRenderManager::EndCurRenderStep() {
curRenderStep_->render.colorStore, curRenderStep_->render.depthStore, curRenderStep_->render.stencilStore,
};
RenderPassType rpType = RP_TYPE_COLOR_DEPTH;
// Save the accumulated pipeline flags so we can use that to configure the render pass.
// We'll often be able to avoid loading/saving the depth/stencil buffer.
curRenderStep_->render.pipelineFlags = curPipelineFlags_;
if (!curRenderStep_->render.framebuffer) {
rpType = RP_TYPE_BACKBUFFER;
} else if (curPipelineFlags_ & PipelineFlags::USES_INPUT_ATTACHMENT) {
// Not allowed on backbuffers.
rpType = RP_TYPE_COLOR_DEPTH_INPUT;
}
// TODO: Also add render pass types for depth/stencil-less.
VKRRenderPass *renderPass = queueRunner_.GetRenderPass(key);
curRenderStep_->render.renderPassType = rpType;
// Save the accumulated pipeline flags so we can use that to configure the render pass.
// We'll often be able to avoid loading/saving the depth/stencil buffer.
compileMutex_.lock();
bool needsCompile = false;
for (VKRGraphicsPipeline *pipeline : pipelinesToCheck_) {
@ -806,7 +696,12 @@ void VulkanRenderManager::EndCurRenderStep() {
// We no longer have a current render step.
curRenderStep_ = nullptr;
curPipelineFlags_ = 0;
curPipelineFlags_ = (PipelineFlags)0;
}
void VulkanRenderManager::BindCurrentFramebufferAsInputAttachment0(VkImageAspectFlags aspectBits) {
_dbg_assert_(curRenderStep_);
curRenderStep_->commands.push_back(VkRenderData{ VKRRenderCommand::SELF_DEPENDENCY_BARRIER });
}
void VulkanRenderManager::BindFramebufferAsRenderTarget(VKRFramebuffer *fb, VKRRenderPassLoadAction color, VKRRenderPassLoadAction depth, VKRRenderPassLoadAction stencil, uint32_t clearColor, float clearDepth, uint8_t clearStencil, const char *tag) {
@ -1028,98 +923,6 @@ void VulkanRenderManager::CopyImageToMemorySync(VkImage image, int mipLevel, int
queueRunner_.CopyReadbackBuffer(w, h, destFormat, destFormat, pixelStride, pixels);
}
bool VulkanRenderManager::InitBackbufferFramebuffers(int width, int height) {
VkResult res;
// We share the same depth buffer but have multiple color buffers, see the loop below.
VkImageView attachments[2] = { VK_NULL_HANDLE, depth_.view };
VkFramebufferCreateInfo fb_info = { VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO };
fb_info.renderPass = queueRunner_.GetCompatibleRenderPass()->Get(vulkan_, RP_TYPE_BACKBUFFER);
fb_info.attachmentCount = 2;
fb_info.pAttachments = attachments;
fb_info.width = width;
fb_info.height = height;
fb_info.layers = 1;
framebuffers_.resize(swapchainImageCount_);
for (uint32_t i = 0; i < swapchainImageCount_; i++) {
attachments[0] = swapchainImages_[i].view;
res = vkCreateFramebuffer(vulkan_->GetDevice(), &fb_info, nullptr, &framebuffers_[i]);
_dbg_assert_(res == VK_SUCCESS);
if (res != VK_SUCCESS) {
framebuffers_.clear();
return false;
}
}
return true;
}
bool VulkanRenderManager::InitDepthStencilBuffer(VkCommandBuffer cmd) {
const VkFormat depth_format = vulkan_->GetDeviceInfo().preferredDepthStencilFormat;
int aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT;
VkImageCreateInfo image_info = { VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO };
image_info.imageType = VK_IMAGE_TYPE_2D;
image_info.format = depth_format;
image_info.extent.width = vulkan_->GetBackbufferWidth();
image_info.extent.height = vulkan_->GetBackbufferHeight();
image_info.extent.depth = 1;
image_info.mipLevels = 1;
image_info.arrayLayers = 1;
image_info.samples = VK_SAMPLE_COUNT_1_BIT;
image_info.queueFamilyIndexCount = 0;
image_info.pQueueFamilyIndices = nullptr;
image_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
image_info.usage = VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT;
image_info.flags = 0;
depth_.format = depth_format;
VmaAllocationCreateInfo allocCreateInfo{};
VmaAllocationInfo allocInfo{};
allocCreateInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY;
VkResult res = vmaCreateImage(vulkan_->Allocator(), &image_info, &allocCreateInfo, &depth_.image, &depth_.alloc, &allocInfo);
_dbg_assert_(res == VK_SUCCESS);
if (res != VK_SUCCESS)
return false;
vulkan_->SetDebugName(depth_.image, VK_OBJECT_TYPE_IMAGE, "BackbufferDepth");
TransitionImageLayout2(cmd, depth_.image, 0, 1,
aspectMask,
VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT,
VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT,
0, VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT);
VkImageViewCreateInfo depth_view_info = { VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO };
depth_view_info.image = depth_.image;
depth_view_info.format = depth_format;
depth_view_info.components.r = VK_COMPONENT_SWIZZLE_IDENTITY;
depth_view_info.components.g = VK_COMPONENT_SWIZZLE_IDENTITY;
depth_view_info.components.b = VK_COMPONENT_SWIZZLE_IDENTITY;
depth_view_info.components.a = VK_COMPONENT_SWIZZLE_IDENTITY;
depth_view_info.subresourceRange.aspectMask = aspectMask;
depth_view_info.subresourceRange.baseMipLevel = 0;
depth_view_info.subresourceRange.levelCount = 1;
depth_view_info.subresourceRange.baseArrayLayer = 0;
depth_view_info.subresourceRange.layerCount = 1;
depth_view_info.viewType = VK_IMAGE_VIEW_TYPE_2D;
depth_view_info.flags = 0;
VkDevice device = vulkan_->GetDevice();
res = vkCreateImageView(device, &depth_view_info, NULL, &depth_.view);
_dbg_assert_(res == VK_SUCCESS);
if (res != VK_SUCCESS)
return false;
return true;
}
static void RemoveDrawCommands(std::vector<VkRenderData> *cmds) {
// Here we remove any DRAW type commands when we hit a CLEAR.
for (auto &c : *cmds) {
@ -1359,6 +1162,9 @@ VkImageView VulkanRenderManager::BindFramebufferAsTexture(VKRFramebuffer *fb, in
}
}
// Called on main thread.
// Sends the collected commands to the render thread. Submit-latency should be
// measured from here, probably.
void VulkanRenderManager::Finish() {
EndCurRenderStep();
@ -1371,18 +1177,14 @@ void VulkanRenderManager::Finish() {
int curFrame = vulkan_->GetCurFrame();
FrameData &frameData = frameData_[curFrame];
if (!useThread_) {
frameData.steps = std::move(steps_);
steps_.clear();
frameData.type = VKRRunType::END;
Run(curFrame);
} else {
{
std::unique_lock<std::mutex> lock(frameData.pull_mutex);
VLOG("PUSH: Frame[%d].readyForRun = true", curFrame);
frameData.steps = std::move(steps_);
steps_.clear();
frameData.readyForRun = true;
frameData.type = VKRRunType::END;
frameData.runType_ = VKRRunType::END;
frameData.pull_condVar.notify_all();
}
vulkan_->EndFrame();
@ -1397,118 +1199,39 @@ void VulkanRenderManager::Wipe() {
steps_.clear();
}
// Called on the render thread.
//
// Can be called multiple times with no bad side effects. This is so that we can either begin a frame the normal way,
// or stop it in the middle for a synchronous readback, then start over again mostly normally but without repeating
// the backbuffer image acquisition.
void VulkanRenderManager::BeginSubmitFrame(int frame) {
FrameData &frameData = frameData_[frame];
if (!frameData.hasBegun) {
// Get the index of the next available swapchain image, and a semaphore to block command buffer execution on.
VkResult res = vkAcquireNextImageKHR(vulkan_->GetDevice(), vulkan_->GetSwapchain(), UINT64_MAX, acquireSemaphore_, (VkFence)VK_NULL_HANDLE, &frameData.curSwapchainImage);
if (res == VK_SUBOPTIMAL_KHR) {
// Hopefully the resize will happen shortly. Ignore - one frame might look bad or something.
WARN_LOG(G3D, "VK_SUBOPTIMAL_KHR returned - ignoring");
} else if (res == VK_ERROR_OUT_OF_DATE_KHR) {
WARN_LOG(G3D, "VK_ERROR_OUT_OF_DATE_KHR returned - processing the frame, but not presenting");
frameData.skipSwap = true;
} else {
_assert_msg_(res == VK_SUCCESS, "vkAcquireNextImageKHR failed! result=%s", VulkanResultToString(res));
}
// Should only have at most the init command buffer pending here (that one came from the other thread).
_dbg_assert_(!frameData.hasPresentCommands);
frameData.SubmitPending(vulkan_, FrameSubmitType::Pending, frameDataShared_);
if (!frameData.hasMainCommands) {
// Effectively resets both main and present command buffers, since they both live in this pool.
// We always record main commands first, so we don't need to reset the present command buffer separately.
vkResetCommandPool(vulkan_->GetDevice(), frameData.cmdPoolMain, 0);
VkCommandBufferBeginInfo begin{ VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO };
begin.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
res = vkBeginCommandBuffer(frameData.mainCmd, &begin);
VkResult res = vkBeginCommandBuffer(frameData.mainCmd, &begin);
frameData.hasMainCommands = true;
_assert_msg_(res == VK_SUCCESS, "vkBeginCommandBuffer failed! result=%s", VulkanResultToString(res));
queueRunner_.SetBackbuffer(framebuffers_[frameData.curSwapchainImage], swapchainImages_[frameData.curSwapchainImage].image);
frameData.hasBegun = true;
}
}
void VulkanRenderManager::Submit(int frame, bool triggerFrameFence) {
FrameData &frameData = frameData_[frame];
if (frameData.hasInitCommands) {
if (frameData.profilingEnabled_ && triggerFrameFence) {
// Pre-allocated query ID 1.
vkCmdWriteTimestamp(frameData.initCmd, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, frameData.profile.queryPool, 1);
}
VkResult res = vkEndCommandBuffer(frameData.initCmd);
_assert_msg_(res == VK_SUCCESS, "vkEndCommandBuffer failed (init)! result=%s", VulkanResultToString(res));
}
VkResult res = vkEndCommandBuffer(frameData.mainCmd);
_assert_msg_(res == VK_SUCCESS, "vkEndCommandBuffer failed (main)! result=%s", VulkanResultToString(res));
VkCommandBuffer cmdBufs[2];
int numCmdBufs = 0;
if (frameData.hasInitCommands) {
cmdBufs[numCmdBufs++] = frameData.initCmd;
if (splitSubmit_) {
// Send the init commands off separately. Used this once to confirm that the cause of a device loss was in the init cmdbuf.
VkSubmitInfo submit_info{ VK_STRUCTURE_TYPE_SUBMIT_INFO };
submit_info.commandBufferCount = (uint32_t)numCmdBufs;
submit_info.pCommandBuffers = cmdBufs;
res = vkQueueSubmit(vulkan_->GetGraphicsQueue(), 1, &submit_info, VK_NULL_HANDLE);
if (res == VK_ERROR_DEVICE_LOST) {
_assert_msg_(false, "Lost the Vulkan device in split submit! If this happens again, switch Graphics Backend away from Vulkan");
} else {
_assert_msg_(res == VK_SUCCESS, "vkQueueSubmit failed (init)! result=%s", VulkanResultToString(res));
}
numCmdBufs = 0;
}
}
cmdBufs[numCmdBufs++] = frameData.mainCmd;
VkSubmitInfo submit_info{ VK_STRUCTURE_TYPE_SUBMIT_INFO };
VkPipelineStageFlags waitStage[1]{ VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT };
if (triggerFrameFence && !frameData.skipSwap) {
submit_info.waitSemaphoreCount = 1;
submit_info.pWaitSemaphores = &acquireSemaphore_;
submit_info.pWaitDstStageMask = waitStage;
}
submit_info.commandBufferCount = (uint32_t)numCmdBufs;
submit_info.pCommandBuffers = cmdBufs;
if (triggerFrameFence && !frameData.skipSwap) {
submit_info.signalSemaphoreCount = 1;
submit_info.pSignalSemaphores = &renderingCompleteSemaphore_;
}
res = vkQueueSubmit(vulkan_->GetGraphicsQueue(), 1, &submit_info, triggerFrameFence ? frameData.fence : frameData.readbackFence);
if (res == VK_ERROR_DEVICE_LOST) {
_assert_msg_(false, "Lost the Vulkan device in vkQueueSubmit! If this happens again, switch Graphics Backend away from Vulkan");
} else {
_assert_msg_(res == VK_SUCCESS, "vkQueueSubmit failed (main, split=%d)! result=%s", (int)splitSubmit_, VulkanResultToString(res));
}
// When !triggerFence, we notify after syncing with Vulkan.
if (useThread_ && triggerFrameFence) {
VLOG("PULL: Frame %d.readyForFence = true", frame);
std::unique_lock<std::mutex> lock(frameData.push_mutex);
frameData.readyForFence = true;
frameData.push_condVar.notify_all();
}
frameData.hasInitCommands = false;
}
// Called on the render thread.
void VulkanRenderManager::EndSubmitFrame(int frame) {
FrameData &frameData = frameData_[frame];
frameData.hasBegun = false;
Submit(frame, true);
frameData.SubmitPending(vulkan_, FrameSubmitType::Present, frameDataShared_);
if (!frameData.skipSwap) {
VkSwapchainKHR swapchain = vulkan_->GetSwapchain();
VkPresentInfoKHR present = { VK_STRUCTURE_TYPE_PRESENT_INFO_KHR };
present.swapchainCount = 1;
present.pSwapchains = &swapchain;
present.pImageIndices = &frameData.curSwapchainImage;
present.pWaitSemaphores = &renderingCompleteSemaphore_;
present.waitSemaphoreCount = 1;
VkResult res = vkQueuePresentKHR(vulkan_->GetGraphicsQueue(), &present);
VkResult res = frameData.QueuePresent(vulkan_, frameDataShared_);
if (res == VK_ERROR_OUT_OF_DATE_KHR) {
// We clearly didn't get this in vkAcquireNextImageKHR because of the skipSwap check above.
// Do the increment.
@ -1528,18 +1251,29 @@ void VulkanRenderManager::EndSubmitFrame(int frame) {
}
}
void VulkanRenderManager::EndSyncFrame(int frame) {
FrameData &frameData = frameData_[frame];
// The submit will trigger the readbackFence, and also do the wait for it.
frameData.SubmitPending(vulkan_, FrameSubmitType::Sync, frameDataShared_);
// At this point we can resume filling the command buffers for the current frame since
// we know the device is idle - and thus all previously enqueued command buffers have been processed.
// No need to switch to the next frame number, would just be confusing.
std::unique_lock<std::mutex> lock(frameData.push_mutex);
frameData.readyForFence = true;
frameData.push_condVar.notify_all();
}
void VulkanRenderManager::Run(int frame) {
BeginSubmitFrame(frame);
FrameData &frameData = frameData_[frame];
auto &stepsOnThread = frameData_[frame].steps;
VkCommandBuffer cmd = frameData.mainCmd;
queueRunner_.PreprocessSteps(stepsOnThread);
queueRunner_.PreprocessSteps(frameData_[frame].steps);
//queueRunner_.LogSteps(stepsOnThread, false);
queueRunner_.RunSteps(cmd, stepsOnThread, frameData.profilingEnabled_ ? &frameData.profile : nullptr);
stepsOnThread.clear();
queueRunner_.RunSteps(frameData, frameDataShared_);
switch (frameData.type) {
switch (frameData.runType_) {
case VKRRunType::END:
EndSubmitFrame(frame);
break;
@ -1555,59 +1289,24 @@ void VulkanRenderManager::Run(int frame) {
VLOG("PULL: Finished running frame %d", frame);
}
void VulkanRenderManager::EndSyncFrame(int frame) {
FrameData &frameData = frameData_[frame];
frameData.readbackFenceUsed = true;
// The submit will trigger the readbackFence.
Submit(frame, false);
// Hard stall of the GPU, not ideal, but necessary so the CPU has the contents of the readback.
vkWaitForFences(vulkan_->GetDevice(), 1, &frameData.readbackFence, true, UINT64_MAX);
vkResetFences(vulkan_->GetDevice(), 1, &frameData.readbackFence);
// At this point we can resume filling the command buffers for the current frame since
// we know the device is idle - and thus all previously enqueued command buffers have been processed.
// No need to switch to the next frame number.
VkCommandBufferBeginInfo begin{
VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
nullptr,
VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT
};
vkResetCommandPool(vulkan_->GetDevice(), frameData.cmdPoolMain, 0);
VkResult res = vkBeginCommandBuffer(frameData.mainCmd, &begin);
_assert_(res == VK_SUCCESS);
if (useThread_) {
std::unique_lock<std::mutex> lock(frameData.push_mutex);
frameData.readyForFence = true;
frameData.push_condVar.notify_all();
}
}
void VulkanRenderManager::FlushSync() {
renderStepOffset_ += (int)steps_.size();
int curFrame = vulkan_->GetCurFrame();
FrameData &frameData = frameData_[curFrame];
if (!useThread_) {
frameData.steps = std::move(steps_);
steps_.clear();
frameData.type = VKRRunType::SYNC;
Run(curFrame);
} else {
{
std::unique_lock<std::mutex> lock(frameData.pull_mutex);
VLOG("PUSH: Frame[%d].readyForRun = true (sync)", curFrame);
frameData.steps = std::move(steps_);
steps_.clear();
frameData.readyForRun = true;
_dbg_assert_(!frameData.readyForFence);
frameData.type = VKRRunType::SYNC;
frameData.runType_ = VKRRunType::SYNC;
frameData.pull_condVar.notify_all();
}
if (useThread_) {
{
std::unique_lock<std::mutex> lock(frameData.push_mutex);
// Wait for the flush to be hit, since we're syncing.
while (!frameData.readyForFence) {

View File

@ -65,15 +65,6 @@ private:
std::string tag_;
};
enum class VKRRunType {
END,
SYNC,
};
enum {
MAX_TIMESTAMP_QUERIES = 128,
};
struct BoundingRect {
int x1;
int y1;
@ -236,6 +227,8 @@ public:
// as the other backends, even though there's no actual binding happening here.
VkImageView BindFramebufferAsTexture(VKRFramebuffer *fb, int binding, VkImageAspectFlags aspectBits, int attachment);
void BindCurrentFramebufferAsInputAttachment0(VkImageAspectFlags aspectBits);
bool CopyFramebufferToMemorySync(VKRFramebuffer *src, VkImageAspectFlags aspectBits, int x, int y, int w, int h, Draw::DataFormat destFormat, uint8_t *pixels, int pixelStride, const char *tag);
void CopyImageToMemorySync(VkImage image, int mipLevel, int x, int y, int w, int h, Draw::DataFormat destFormat, uint8_t *pixels, int pixelStride, const char *tag);
@ -440,11 +433,7 @@ public:
void DestroyBackbuffers();
bool HasBackbuffers() {
return !framebuffers_.empty();
}
void SetSplitSubmit(bool split) {
splitSubmit_ = split;
return queueRunner_.HasBackbuffers();
}
void SetInflightFrames(int f) {
@ -470,13 +459,10 @@ public:
}
private:
bool InitBackbufferFramebuffers(int width, int height);
bool InitDepthStencilBuffer(VkCommandBuffer cmd); // Used for non-buffered rendering.
void EndCurRenderStep();
void BeginSubmitFrame(int frame);
void EndSubmitFrame(int frame);
void Submit(int frame, bool triggerFence);
// Bad for performance but sometimes necessary for synchronous CPU readbacks (screenshots and whatnot).
void FlushSync();
@ -484,43 +470,7 @@ private:
void StopThread();
// Permanent objects
VkSemaphore acquireSemaphore_;
VkSemaphore renderingCompleteSemaphore_;
// Per-frame data, round-robin so we can overlap submission with execution of the previous frame.
struct FrameData {
std::mutex push_mutex;
std::condition_variable push_condVar;
std::mutex pull_mutex;
std::condition_variable pull_condVar;
bool readyForFence = true;
bool readyForRun = false;
bool skipSwap = false;
VKRRunType type = VKRRunType::END;
VkFence fence;
VkFence readbackFence; // Strictly speaking we might only need one of these.
bool readbackFenceUsed = false;
// These are on different threads so need separate pools.
VkCommandPool cmdPoolInit;
VkCommandPool cmdPoolMain;
VkCommandBuffer initCmd;
VkCommandBuffer mainCmd;
bool hasInitCommands = false;
std::vector<VKRStep *> steps;
// Swapchain.
bool hasBegun = false;
uint32_t curSwapchainImage = -1;
// Profiling.
QueueProfileContext profile;
bool profilingEnabled_;
};
FrameDataShared frameDataShared_;
FrameData frameData_[VulkanContext::MAX_INFLIGHT_FRAMES];
int newInflightFrames_ = -1;
@ -544,11 +494,10 @@ private:
VKRStep *curRenderStep_ = nullptr;
bool curStepHasViewport_ = false;
bool curStepHasScissor_ = false;
u32 curPipelineFlags_ = 0;
PipelineFlags curPipelineFlags_{};
BoundingRect curRenderArea_;
std::vector<VKRStep *> steps_;
bool splitSubmit_ = false;
// Execution time state
bool run_ = true;
@ -568,23 +517,4 @@ private:
// pipelines to check and possibly create at the end of the current render pass.
std::vector<VKRGraphicsPipeline *> pipelinesToCheck_;
// Swap chain management
struct SwapchainImageData {
VkImage image;
VkImageView view;
};
std::vector<VkFramebuffer> framebuffers_;
std::vector<SwapchainImageData> swapchainImages_;
uint32_t swapchainImageCount_ = 0;
struct DepthBufferInfo {
VkFormat format = VK_FORMAT_UNDEFINED;
VkImage image = VK_NULL_HANDLE;
VmaAllocation alloc = VK_NULL_HANDLE;
VkImageView view = VK_NULL_HANDLE;
};
DepthBufferInfo depth_;
// This works great - except see issue #10097. WTF?
bool useThread_ = true;
};

View File

@ -361,7 +361,7 @@ class VKFramebuffer;
class VKContext : public DrawContext {
public:
VKContext(VulkanContext *vulkan, bool splitSubmit);
VKContext(VulkanContext *vulkan);
virtual ~VKContext();
const DeviceCaps &GetDeviceCaps() const override {
@ -401,9 +401,10 @@ public:
// These functions should be self explanatory.
void BindFramebufferAsRenderTarget(Framebuffer *fbo, const RenderPassInfo &rp, const char *tag) override;
Framebuffer *GetCurrentRenderTarget() override {
return curFramebuffer_;
return (Framebuffer *)curFramebuffer_.ptr;
}
void BindFramebufferAsTexture(Framebuffer *fbo, int binding, FBChannel channelBit, int attachment) override;
void BindCurrentFramebufferForColorInput() override;
void GetFramebufferDimensions(Framebuffer *fbo, int *w, int *h) override;
@ -473,27 +474,7 @@ public:
std::vector<std::string> GetFeatureList() const override;
std::vector<std::string> GetExtensionList() const override;
uint64_t GetNativeObject(NativeObject obj, void *srcObject) override {
switch (obj) {
case NativeObject::CONTEXT:
return (uint64_t)vulkan_;
case NativeObject::INIT_COMMANDBUFFER:
return (uint64_t)renderManager_.GetInitCmd();
case NativeObject::BOUND_TEXTURE0_IMAGEVIEW:
return (uint64_t)boundImageView_[0];
case NativeObject::BOUND_TEXTURE1_IMAGEVIEW:
return (uint64_t)boundImageView_[1];
case NativeObject::RENDER_MANAGER:
return (uint64_t)(uintptr_t)&renderManager_;
case NativeObject::NULL_IMAGEVIEW:
return (uint64_t)GetNullTexture()->GetImageView();
case NativeObject::TEXTURE_VIEW:
return (uint64_t)(((VKTexture *)srcObject)->GetImageView());
default:
Crash();
return 0;
}
}
uint64_t GetNativeObject(NativeObject obj, void *srcObject) override;
void HandleEvent(Event ev, int width, int height, void *param1, void *param2) override;
@ -522,7 +503,7 @@ private:
VkDescriptorSetLayout descriptorSetLayout_ = VK_NULL_HANDLE;
VkPipelineLayout pipelineLayout_ = VK_NULL_HANDLE;
VkPipelineCache pipelineCache_ = VK_NULL_HANDLE;
AutoRef<Framebuffer> curFramebuffer_;
AutoRef<VKFramebuffer> curFramebuffer_;
VkDevice device_;
VkQueue queue_;
@ -781,7 +762,7 @@ bool VKTexture::Create(VkCommandBuffer cmd, VulkanPushBuffer *push, const Textur
return true;
}
VKContext::VKContext(VulkanContext *vulkan, bool splitSubmit)
VKContext::VKContext(VulkanContext *vulkan)
: vulkan_(vulkan), renderManager_(vulkan) {
shaderLanguageDesc_.Init(GLSL_VULKAN);
@ -807,9 +788,11 @@ VKContext::VKContext(VulkanContext *vulkan, bool splitSubmit)
caps_.fragmentShaderInt32Supported = true;
caps_.textureNPOTFullySupported = true;
caps_.fragmentShaderDepthWriteSupported = true;
caps_.blendMinMaxSupported = true;
caps_.logicOpSupported = vulkan->GetDeviceFeatures().enabled.logicOp != 0;
auto deviceProps = vulkan->GetPhysicalDeviceProperties(vulkan_->GetCurrentPhysicalDeviceIndex()).properties;
switch (deviceProps.vendorID) {
case VULKAN_VENDOR_AMD: caps_.vendor = GPUVendor::VENDOR_AMD; break;
case VULKAN_VENDOR_ARM: caps_.vendor = GPUVendor::VENDOR_ARM; break;
@ -831,6 +814,11 @@ VKContext::VKContext(VulkanContext *vulkan, bool splitSubmit)
// Color write mask not masking write in certain scenarios with a depth test, see #10421.
// Known still present on driver 0x80180000 and Adreno 5xx (possibly more.)
bugs_.Infest(Bugs::COLORWRITEMASK_BROKEN_WITH_DEPTHTEST);
// Trying to follow all the rules in https://registry.khronos.org/vulkan/specs/1.3/html/vkspec.html#synchronization-pipeline-barriers-subpass-self-dependencies
// and https://registry.khronos.org/vulkan/specs/1.3/html/vkspec.html#renderpass-feedbackloop, but still it doesn't
// quite work - artifacts on triangle boundaries on Adreno.
bugs_.Infest(Bugs::SUBPASS_FEEDBACK_BROKEN);
} else if (caps_.vendor == GPUVendor::VENDOR_AMD) {
// See issue #10074, and also #10065 (AMD) and #10109 for the choice of the driver version to check for.
if (deviceProps.driverVersion < 0x00407000) {
@ -840,19 +828,27 @@ VKContext::VKContext(VulkanContext *vulkan, bool splitSubmit)
// Workaround for Intel driver bug. TODO: Re-enable after some driver version
bugs_.Infest(Bugs::DUAL_SOURCE_BLENDING_BROKEN);
} else if (caps_.vendor == GPUVendor::VENDOR_ARM) {
int majorVersion = VK_API_VERSION_MAJOR(deviceProps.driverVersion);
// These GPUs (up to some certain hardware version?) have a bug where draws where gl_Position.w == .z
// corrupt the depth buffer. This is easily worked around by simply scaling Z down a tiny bit when this case
// is detected. See: https://github.com/hrydgard/ppsspp/issues/11937
bugs_.Infest(Bugs::EQUAL_WZ_CORRUPTS_DEPTH);
// At least one driver at the upper end of the range is known to be likely to suffer from the bug causing issue #13833 (Midnight Club map broken).
bugs_.Infest(Bugs::MALI_STENCIL_DISCARD_BUG);
// This started in driver 31 or 32.
if (VK_API_VERSION_MAJOR(deviceProps.driverVersion) >= 32) {
// Nearly identical to the the Adreno bug, see #13833 (Midnight Club map broken) and other issues.
// Reported fixed in major version 40 - let's add a check once confirmed.
bugs_.Infest(Bugs::NO_DEPTH_CANNOT_DISCARD_STENCIL);
// This started in driver 31 or 32, fixed in 40 - let's add a check once confirmed.
if (majorVersion >= 32) {
bugs_.Infest(Bugs::MALI_CONSTANT_LOAD_BUG); // See issue #15661
}
}
// Limited, through input attachments and self-dependencies.
// We turn it off here already if buggy.
caps_.framebufferFetchSupported = !bugs_.Has(Bugs::SUBPASS_FEEDBACK_BROKEN);
caps_.deviceID = deviceProps.deviceID;
device_ = vulkan->GetDevice();
@ -920,8 +916,6 @@ VKContext::VKContext(VulkanContext *vulkan, bool splitSubmit)
VkPipelineCacheCreateInfo pc{ VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO };
res = vkCreatePipelineCache(vulkan_->GetDevice(), &pc, nullptr, &pipelineCache_);
_assert_(VK_SUCCESS == res);
renderManager_.SetSplitSubmit(splitSubmit);
}
VKContext::~VKContext() {
@ -1058,12 +1052,12 @@ Pipeline *VKContext::CreateGraphicsPipeline(const PipelineDesc &desc, const char
VKDepthStencilState *depth = (VKDepthStencilState *)desc.depthStencil;
VKRasterState *raster = (VKRasterState *)desc.raster;
u32 pipelineFlags = 0;
PipelineFlags pipelineFlags = (PipelineFlags)0;
if (depth->info.depthTestEnable || depth->info.stencilTestEnable) {
pipelineFlags |= PIPELINE_FLAG_USES_DEPTH_STENCIL;
pipelineFlags |= PipelineFlags::USES_DEPTH_STENCIL;
}
VKPipeline *pipeline = new VKPipeline(vulkan_, desc.uniformDesc ? desc.uniformDesc->uniformBufferSize : 16 * sizeof(float), (PipelineFlags)pipelineFlags, tag);
VKPipeline *pipeline = new VKPipeline(vulkan_, desc.uniformDesc ? desc.uniformDesc->uniformBufferSize : 16 * sizeof(float), pipelineFlags, tag);
VKRGraphicsPipelineDesc &gDesc = pipeline->vkrDesc;
@ -1401,8 +1395,8 @@ void VKContext::Clear(int clearMask, uint32_t colorval, float depthVal, int sten
renderManager_.Clear(colorval, depthVal, stencilVal, mask);
}
DrawContext *T3DCreateVulkanContext(VulkanContext *vulkan, bool split) {
return new VKContext(vulkan, split);
DrawContext *T3DCreateVulkanContext(VulkanContext *vulkan) {
return new VKContext(vulkan);
}
void AddFeature(std::vector<std::string> &features, const char *name, VkBool32 available, VkBool32 enabled) {
@ -1584,6 +1578,10 @@ void VKContext::BindFramebufferAsTexture(Framebuffer *fbo, int binding, FBChanne
boundImageView_[binding] = renderManager_.BindFramebufferAsTexture(fb->GetFB(), binding, aspect, attachment);
}
void VKContext::BindCurrentFramebufferForColorInput() {
renderManager_.BindCurrentFramebufferAsInputAttachment0(VK_IMAGE_ASPECT_COLOR_BIT);
}
void VKContext::GetFramebufferDimensions(Framebuffer *fbo, int *w, int *h) {
VKFramebuffer *fb = (VKFramebuffer *)fbo;
if (fb) {
@ -1624,4 +1622,28 @@ void VKContext::InvalidateFramebuffer(FBInvalidationStage stage, uint32_t channe
}
}
uint64_t VKContext::GetNativeObject(NativeObject obj, void *srcObject) {
switch (obj) {
case NativeObject::CONTEXT:
return (uint64_t)vulkan_;
case NativeObject::INIT_COMMANDBUFFER:
return (uint64_t)renderManager_.GetInitCmd();
case NativeObject::BOUND_TEXTURE0_IMAGEVIEW:
return (uint64_t)boundImageView_[0];
case NativeObject::BOUND_TEXTURE1_IMAGEVIEW:
return (uint64_t)boundImageView_[1];
case NativeObject::RENDER_MANAGER:
return (uint64_t)(uintptr_t)&renderManager_;
case NativeObject::NULL_IMAGEVIEW:
return (uint64_t)GetNullTexture()->GetImageView();
case NativeObject::TEXTURE_VIEW:
return (uint64_t)(((VKTexture *)srcObject)->GetImageView());
case NativeObject::BOUND_FRAMEBUFFER_COLOR_IMAGEVIEW:
return (uint64_t)curFramebuffer_->GetFB()->color.imageView;
default:
Crash();
return 0;
}
}
} // namespace Draw

View File

@ -678,9 +678,9 @@ const char *Bugs::GetBugName(uint32_t bug) {
case COLORWRITEMASK_BROKEN_WITH_DEPTHTEST: return "COLORWRITEMASK_BROKEN_WITH_DEPTHTEST";
case BROKEN_FLAT_IN_SHADER: return "BROKEN_FLAT_IN_SHADER";
case EQUAL_WZ_CORRUPTS_DEPTH: return "EQUAL_WZ_CORRUPTS_DEPTH";
case MALI_STENCIL_DISCARD_BUG: return "MALI_STENCIL_DISCARD_BUG";
case RASPBERRY_SHADER_COMP_HANG: return "RASPBERRY_SHADER_COMP_HANG";
case MALI_CONSTANT_LOAD_BUG: return "MALI_CONSTANT_LOAD_BUG";
case SUBPASS_FEEDBACK_BROKEN: return "SUBPASS_FEEDBACK_BROKEN";
default: return "(N/A)";
}
}

View File

@ -242,6 +242,7 @@ enum class NativeObject {
INIT_COMMANDBUFFER,
BOUND_TEXTURE0_IMAGEVIEW,
BOUND_TEXTURE1_IMAGEVIEW,
BOUND_FRAMEBUFFER_COLOR_IMAGEVIEW,
RENDER_MANAGER,
TEXTURE_VIEW,
NULL_IMAGEVIEW,
@ -328,9 +329,9 @@ public:
COLORWRITEMASK_BROKEN_WITH_DEPTHTEST = 5,
BROKEN_FLAT_IN_SHADER = 6,
EQUAL_WZ_CORRUPTS_DEPTH = 7,
MALI_STENCIL_DISCARD_BUG = 8,
RASPBERRY_SHADER_COMP_HANG = 9,
MALI_CONSTANT_LOAD_BUG = 10,
RASPBERRY_SHADER_COMP_HANG = 8,
MALI_CONSTANT_LOAD_BUG = 9,
SUBPASS_FEEDBACK_BROKEN = 10,
MAX_BUG,
};
@ -546,6 +547,7 @@ struct DeviceCaps {
bool textureNPOTFullySupported;
bool fragmentShaderDepthWriteSupported;
bool textureDepthSupported;
bool blendMinMaxSupported;
std::string deviceName; // The device name to use when creating the thin3d context, to get the same one.
};
@ -651,6 +653,9 @@ public:
// binding must be < MAX_TEXTURE_SLOTS (0, 1 are okay if it's 2).
virtual void BindFramebufferAsTexture(Framebuffer *fbo, int binding, FBChannel channelBit, int attachment) = 0;
// Framebuffer fetch / input attachment support, needs to be explicit in Vulkan.
virtual void BindCurrentFramebufferForColorInput() {}
// deprecated, only used by D3D9
virtual uintptr_t GetFramebufferAPITexture(Framebuffer *fbo, int channelBits, int attachment) {
return 0;

View File

@ -31,6 +31,6 @@ DrawContext *T3DCreateDX9Context(IDirect3D9 *d3d, IDirect3D9Ex *d3dEx, int adapt
DrawContext *T3DCreateD3D11Context(ID3D11Device *device, ID3D11DeviceContext *context, ID3D11Device1 *device1, ID3D11DeviceContext1 *context1, D3D_FEATURE_LEVEL featureLevel, HWND hWnd, std::vector<std::string> adapterNames);
#endif
DrawContext *T3DCreateVulkanContext(VulkanContext *context, bool splitSubmit);
DrawContext *T3DCreateVulkanContext(VulkanContext *context);
} // namespace Draw

View File

@ -170,7 +170,7 @@ void UIContext::ActivateTopScissor() {
int h = std::max(0.0f, ceilf(scale_y * bounds.h));
if (x < 0 || y < 0 || x + w > pixel_xres || y + h > pixel_yres) {
// This won't actually report outside a game, but we can try.
ERROR_LOG_REPORT(G3D, "UI scissor out of bounds: %d,%d-%d,%d / %d,%d", x, y, w, h, pixel_xres, pixel_yres);
ERROR_LOG_REPORT(G3D, "UI scissor out of bounds in %sScreen: %d,%d-%d,%d / %d,%d", screenTag_ ? screenTag_ : "N/A", x, y, w, h, pixel_xres, pixel_yres);
x = std::max(0, x);
y = std::max(0, y);
w = std::min(w, pixel_xres - x);

View File

@ -74,7 +74,6 @@ public:
const UI::Theme *theme;
// Utility methods
TextDrawer *Text() const { return textDrawer_; }
void SetFontStyle(const UI::FontStyle &style);
@ -103,6 +102,10 @@ public:
void setUIAtlas(const std::string &name);
void SetScreenTag(const char *tag) {
screenTag_ = tag;
}
private:
Draw::DrawContext *draw_ = nullptr;
Bounds bounds_;
@ -126,4 +129,6 @@ private:
std::string lastUIAtlas_;
std::string UIAtlas_ = "ui_atlas.zim";
const char *screenTag_ = nullptr;
};

View File

@ -71,7 +71,7 @@ public:
// what screen it is.
virtual void *dialogData() { return 0; }
virtual std::string tag() const { return std::string(""); }
virtual const char *tag() const = 0;
virtual bool isTransparent() const { return false; }
virtual bool isTopLevel() const { return false; }

View File

@ -117,6 +117,9 @@ void UIScreen::render() {
if (root_) {
UIContext *uiContext = screenManager()->getUIContext();
uiContext->SetScreenTag(tag());
UI::LayoutViewHierarchy(*uiContext, root_, ignoreInsets_);
uiContext->PushTransform({translation_, scale_, alpha_});

View File

@ -136,7 +136,7 @@ public:
void SetHiddenChoices(std::set<int> hidden) {
hidden_ = hidden;
}
virtual std::string tag() const override { return std::string("listpopup"); }
const char *tag() const override { return "listpopup"; }
UI::Event OnChoice;
@ -187,6 +187,8 @@ public:
disabled_ = *value_ < 0;
}
const char *tag() const override { return "SliderPopup"; }
Event OnChange;
private:
@ -214,6 +216,8 @@ public:
: PopupScreen(title, "OK", "Cancel"), units_(units), value_(value), originalValue_(*value), minValue_(minValue), maxValue_(maxValue), step_(step), changing_(false), liveUpdate_(liveUpdate) {}
void CreatePopupContents(UI::ViewGroup *parent) override;
const char *tag() const override { return "SliderFloatPopup"; }
Event OnChange;
private:
@ -241,6 +245,8 @@ public:
: PopupScreen(title, "OK", "Cancel"), value_(value), placeholder_(placeholder), maxLen_(maxLen) {}
virtual void CreatePopupContents(ViewGroup *parent) override;
const char *tag() const override { return "TextEditPopup"; }
Event OnChange;
private:

View File

@ -9,6 +9,7 @@
#include <GLES3/gl3.h>
#include <GLES3/gl3ext.h>
XrFovf fov;
XrView* projections;
XrPosef invViewTransform[2];
XrFrameState frameState = {};
@ -293,7 +294,12 @@ bool VR_InitFrame( engine_t* engine ) {
projections));
//
fov = {};
for (int eye = 0; eye < ovrMaxNumEyes; eye++) {
fov.angleLeft += projections[eye].fov.angleLeft / 2.0f;
fov.angleRight += projections[eye].fov.angleRight / 2.0f;
fov.angleUp += projections[eye].fov.angleUp / 2.0f;
fov.angleDown += projections[eye].fov.angleDown / 2.0f;
invViewTransform[eye] = projections[eye].pose;
}
@ -353,10 +359,7 @@ void VR_FinishFrame( engine_t* engine ) {
for (int eye = 0; eye < ovrMaxNumEyes; eye++) {
int imageLayer = engine->appState.Renderer.Multiview ? eye : 0;
ovrFramebuffer* frameBuffer = &engine->appState.Renderer.FrameBuffer[0];
XrFovf fov = projections[eye].fov;
if (vrMode == VR_MODE_MONO_6DOF) {
fov = projections[0].fov;
} else if (!engine->appState.Renderer.Multiview) {
if ((vrMode != VR_MODE_MONO_6DOF) && !engine->appState.Renderer.Multiview) {
frameBuffer = &engine->appState.Renderer.FrameBuffer[eye];
}
@ -463,7 +466,6 @@ void VR_BindFramebuffer(engine_t *engine) {
ovrMatrix4f VR_GetMatrix( VRMatrix matrix ) {
ovrMatrix4f output;
if ((matrix == VR_PROJECTION_MATRIX_LEFT_EYE) || (matrix == VR_PROJECTION_MATRIX_RIGHT_EYE)) {
XrFovf fov = matrix == VR_PROJECTION_MATRIX_LEFT_EYE ? projections[0].fov : projections[1].fov;
float near = (float)vrConfig[VR_CONFIG_FOV_SCALE] / 200.0f;
output = ovrMatrix4f_CreateProjectionFov(fov.angleLeft, fov.angleRight, fov.angleUp, fov.angleDown, near, 0.0f );
} else if ((matrix == VR_VIEW_MATRIX_LEFT_EYE) || (matrix == VR_VIEW_MATRIX_RIGHT_EYE)) {

View File

@ -938,7 +938,6 @@ static ConfigSetting graphicsSettings[] = {
ReportedConfigSetting("FragmentTestCache", &g_Config.bFragmentTestCache, true, true, true),
ConfigSetting("GfxDebugOutput", &g_Config.bGfxDebugOutput, false, false, false),
ConfigSetting("GfxDebugSplitSubmit", &g_Config.bGfxDebugSplitSubmit, false, false, false),
ConfigSetting("LogFrameDrops", &g_Config.bLogFrameDrops, false, true, false),
ConfigSetting("InflightFrames", &g_Config.iInflightFrames, 3, true, false),

View File

@ -176,6 +176,7 @@ public:
bool bSustainedPerformanceMode; // Android: Slows clocks down to avoid overheating/speed fluctuations.
bool bIgnoreScreenInsets; // Android: Center screen disregarding insets if this is enabled.
bool bVSync;
int iFrameSkip;
int iFrameSkipType;
int iFastForwardMode; // See FastForwardMode in ConfigValues.h.
@ -242,7 +243,6 @@ public:
bool bShaderChainRequires60FPS;
std::string sTextureShaderName;
bool bGfxDebugOutput;
bool bGfxDebugSplitSubmit;
int iInflightFrames;
bool bRenderDuplicateFrames;

View File

@ -36,8 +36,9 @@ const static u32 GAMEDATA_BYTES_PER_READ = 32768;
// If this is too high, some games (e.g. Senjou no Valkyria 3) will lag.
const static u32 GAMEDATA_READS_PER_UPDATE = 20;
const u32 ERROR_UTILITY_GAMEDATA_MEMSTRICK_WRITE_PROTECTED = 0x80111903;
const u32 ERROR_UTILITY_GAMEDATA_MEMSTRICK_REMOVED = 0x80111901;
const u32 ERROR_UTILITY_GAMEDATA_MEMSTRICK_WRITE_PROTECTED = 0x80111903;
const u32 ERROR_UTILITY_GAMEDATA_INVALID_MODE = 0x80111908;
static const std::string SFO_FILENAME = "PARAM.SFO";
@ -88,9 +89,14 @@ int PSPGamedataInstallDialog::Init(u32 paramAddr) {
}
int size = Memory::Read_U32(paramAddr);
if (size != 1424 && size != 1432) {
ERROR_LOG_REPORT(SCEUTILITY, "sceGamedataInstallInitStart: invalid param size %d", size);
return SCE_ERROR_UTILITY_INVALID_PARAM_SIZE;
}
memset(&request, 0, sizeof(request));
// Only copy the right size to support different request format
Memory::Memcpy(&request, paramAddr, size);
Memory::Memcpy(&request, paramAddr, size, "sceGamedataInstallInitStart");
ChangeStatusInit(GAMEDATA_INIT_DELAY_US);
return 0;
@ -100,6 +106,17 @@ int PSPGamedataInstallDialog::Update(int animSpeed) {
if (GetStatus() != SCE_UTILITY_STATUS_RUNNING)
return SCE_ERROR_UTILITY_INVALID_STATUS;
if (param->mode >= 2) {
param->common.result = ERROR_UTILITY_GAMEDATA_INVALID_MODE;
param.NotifyWrite("DialogResult");
ChangeStatus(SCE_UTILITY_STATUS_FINISHED, 0);
WARN_LOG_REPORT(SCEUTILITY, "sceUtilityGamedataInstallUpdate: invalid mode %d", param->mode);
return 0;
}
// TODO: param->mode == 1 should show a prompt to confirm, then a progress bar.
// Any other mode (i.e. 0 or negative) should proceed and show no UI.
// TODO: This should return error codes in some cases, like write failure.
// request.common.result must be updated for errors as well.
@ -222,6 +239,9 @@ void PSPGamedataInstallDialog::WriteSfoFile() {
}
int PSPGamedataInstallDialog::Abort() {
param->common.result = 1;
param.NotifyWrite("DialogResult");
// TODO: Delete the files or anything?
return PSPDialog::Shutdown();
}

View File

@ -22,7 +22,7 @@
struct SceUtilityGamedataInstallParam {
pspUtilityDialogCommon common;
u32_le unknown1;
s32_le mode;
char gameName[13];
char ignore1[3];
char dataName[20];

View File

@ -820,7 +820,7 @@ const HLEFunction ThreadManForUser[] =
{0X87D4DD36, &WrapI_IU<sceKernelCancelReceiveMbx>, "sceKernelCancelReceiveMbx", 'i', "ix" },
{0XA8E8C846, &WrapI_IU<sceKernelReferMbxStatus>, "sceKernelReferMbxStatus", 'i', "ip" },
{0X7C0DC2A0, &WrapI_CIUUU<sceKernelCreateMsgPipe>, "sceKernelCreateMsgPipe", 'i', "sixxx" },
{0X7C0DC2A0, &WrapI_CIUUU<sceKernelCreateMsgPipe>, "sceKernelCreateMsgPipe", 'i', "sixxp" },
{0XF0B7DA1C, &WrapI_I<sceKernelDeleteMsgPipe>, "sceKernelDeleteMsgPipe", 'i', "i" },
{0X876DBFAD, &WrapI_IUUUUU<sceKernelSendMsgPipe>, "sceKernelSendMsgPipe", 'i', "ixxxxx" },
{0X7C41F2C2, &WrapI_IUUUUU<sceKernelSendMsgPipeCB>, "sceKernelSendMsgPipeCB", 'i', "ixxxxx" },
@ -831,7 +831,7 @@ const HLEFunction ThreadManForUser[] =
{0X349B864D, &WrapI_IUU<sceKernelCancelMsgPipe>, "sceKernelCancelMsgPipe", 'i', "ixx" },
{0X33BE4024, &WrapI_IU<sceKernelReferMsgPipeStatus>, "sceKernelReferMsgPipeStatus", 'i', "ip" },
{0X56C039B5, &WrapI_CIUUU<sceKernelCreateVpl>, "sceKernelCreateVpl", 'i', "sixxx" },
{0X56C039B5, &WrapI_CIUUU<sceKernelCreateVpl>, "sceKernelCreateVpl", 'i', "sixxp" },
{0X89B3D48C, &WrapI_I<sceKernelDeleteVpl>, "sceKernelDeleteVpl", 'i', "i" },
{0XBED27435, &WrapI_IUUU<sceKernelAllocateVpl>, "sceKernelAllocateVpl", 'i', "ixxx", HLE_NOT_IN_INTERRUPT | HLE_NOT_DISPATCH_SUSPENDED },
{0XEC0A693F, &WrapI_IUUU<sceKernelAllocateVplCB>, "sceKernelAllocateVplCB", 'i', "ixxx", HLE_NOT_IN_INTERRUPT | HLE_NOT_DISPATCH_SUSPENDED },
@ -840,7 +840,7 @@ const HLEFunction ThreadManForUser[] =
{0X1D371B8A, &WrapI_IU<sceKernelCancelVpl>, "sceKernelCancelVpl", 'i', "ix" },
{0X39810265, &WrapI_IU<sceKernelReferVplStatus>, "sceKernelReferVplStatus", 'i', "ip" },
{0XC07BB470, &WrapI_CUUUUU<sceKernelCreateFpl>, "sceKernelCreateFpl", 'i', "sxxxxx" },
{0XC07BB470, &WrapI_CUUUUU<sceKernelCreateFpl>, "sceKernelCreateFpl", 'i', "sixxxp" },
{0XED1410E0, &WrapI_I<sceKernelDeleteFpl>, "sceKernelDeleteFpl", 'i', "i" },
{0XD979E9BF, &WrapI_IUU<sceKernelAllocateFpl>, "sceKernelAllocateFpl", 'i', "ixx", HLE_NOT_IN_INTERRUPT | HLE_NOT_DISPATCH_SUSPENDED },
{0XE7282CB6, &WrapI_IUU<sceKernelAllocateFplCB>, "sceKernelAllocateFplCB", 'i', "ixx", HLE_NOT_IN_INTERRUPT | HLE_NOT_DISPATCH_SUSPENDED },
@ -864,7 +864,7 @@ const HLEFunction ThreadManForUser[] =
{0XD8B299AE, &WrapU_IUUU<sceKernelSetVTimerHandler>, "sceKernelSetVTimerHandler", 'x', "ixxx" },
{0X53B00E9A, &WrapU_IU64UU<sceKernelSetVTimerHandlerWide>, "sceKernelSetVTimerHandlerWide", 'x', "iXxx" },
{0X8DAFF657, &WrapI_CUUUUU<sceKernelCreateTlspl>, "sceKernelCreateTlspl", 'i', "sxxxxx" },
{0X8DAFF657, &WrapI_CUUUUU<sceKernelCreateTlspl>, "sceKernelCreateTlspl", 'i', "sixxxp" },
{0X32BF938E, &WrapI_I<sceKernelDeleteTlspl>, "sceKernelDeleteTlspl", 'i', "i" },
{0X721067F3, &WrapI_IU<sceKernelReferTlsplStatus>, "sceKernelReferTlsplStatus", 'i', "xp" },
// Not completely certain about args.
@ -908,7 +908,7 @@ const HLEFunction ThreadManForKernel[] =
{0x1fb15a32, &WrapU_IU<sceKernelSetEventFlag>, "sceKernelSetEventFlag", 'x', "ix", HLE_KERNEL_SYSCALL },
{0x812346e4, &WrapU_IU<sceKernelClearEventFlag>, "sceKernelClearEventFlag", 'x', "ix", HLE_KERNEL_SYSCALL },
{0x402fcf22, &WrapI_IUUUU<sceKernelWaitEventFlag>, "sceKernelWaitEventFlag", 'i', "ixxpp", HLE_NOT_IN_INTERRUPT | HLE_KERNEL_SYSCALL},
{0xc07bb470, &WrapI_CUUUUU<sceKernelCreateFpl>, "sceKernelCreateFpl", 'i', "sxxxxx" ,HLE_KERNEL_SYSCALL },
{0xc07bb470, &WrapI_CUUUUU<sceKernelCreateFpl>, "sceKernelCreateFpl", 'i', "sixxxp" ,HLE_KERNEL_SYSCALL },
{0xed1410e0, &WrapI_I<sceKernelDeleteFpl>, "sceKernelDeleteFpl", 'i', "i" ,HLE_KERNEL_SYSCALL },
{0x623ae665, &WrapI_IU<sceKernelTryAllocateFpl>, "sceKernelTryAllocateFpl", 'i', "ix" ,HLE_KERNEL_SYSCALL },
{0x616403ba, &WrapI_I<sceKernelTerminateThread>, "sceKernelTerminateThread", 'i', "i" ,HLE_KERNEL_SYSCALL },
@ -932,7 +932,7 @@ const HLEFunction ThreadManForKernel[] =
{0x0D81716A, &WrapI_IU<sceKernelPollMbx>, "sceKernelPollMbx", 'i', "ix", HLE_KERNEL_SYSCALL },
{0x87D4DD36, &WrapI_IU<sceKernelCancelReceiveMbx>, "sceKernelCancelReceiveMbx", 'i', "ix", HLE_KERNEL_SYSCALL },
{0xA8E8C846, &WrapI_IU<sceKernelReferMbxStatus>, "sceKernelReferMbxStatus", 'i', "ip", HLE_KERNEL_SYSCALL },
{0x56C039B5, &WrapI_CIUUU<sceKernelCreateVpl>, "sceKernelCreateVpl", 'i', "sixxx", HLE_KERNEL_SYSCALL },
{0x56C039B5, &WrapI_CIUUU<sceKernelCreateVpl>, "sceKernelCreateVpl", 'i', "sixxp", HLE_KERNEL_SYSCALL },
{0x89B3D48C, &WrapI_I<sceKernelDeleteVpl>, "sceKernelDeleteVpl", 'i', "i", HLE_KERNEL_SYSCALL },
{0xBED27435, &WrapI_IUUU<sceKernelAllocateVpl>, "sceKernelAllocateVpl", 'i', "ixxx", HLE_KERNEL_SYSCALL | HLE_NOT_IN_INTERRUPT | HLE_NOT_DISPATCH_SUSPENDED },
{0xEC0A693F, &WrapI_IUUU<sceKernelAllocateVplCB>, "sceKernelAllocateVplCB", 'i', "ixxx", HLE_KERNEL_SYSCALL | HLE_NOT_IN_INTERRUPT | HLE_NOT_DISPATCH_SUSPENDED },

View File

@ -46,6 +46,7 @@ const int TLSPL_NUM_INDEXES = 16;
// STATE BEGIN
BlockAllocator userMemory(256);
BlockAllocator kernelMemory(256);
BlockAllocator volatileMemory(256);
static int vplWaitTimer = -1;
static int fplWaitTimer = -1;
@ -432,6 +433,7 @@ void __KernelMemoryInit()
MemBlockInfoInit();
kernelMemory.Init(PSP_GetKernelMemoryBase(), PSP_GetKernelMemoryEnd() - PSP_GetKernelMemoryBase(), false);
userMemory.Init(PSP_GetUserMemoryBase(), PSP_GetUserMemoryEnd() - PSP_GetUserMemoryBase(), false);
volatileMemory.Init(PSP_GetVolatileMemoryStart(), PSP_GetVolatileMemoryEnd() - PSP_GetVolatileMemoryStart(), false);
ParallelMemset(&g_threadManager, Memory::GetPointerWrite(PSP_GetKernelMemoryBase()), 0, PSP_GetUserMemoryEnd() - PSP_GetKernelMemoryBase());
NotifyMemInfo(MemBlockFlags::WRITE, PSP_GetKernelMemoryBase(), PSP_GetUserMemoryEnd() - PSP_GetKernelMemoryBase(), "MemInit");
INFO_LOG(SCEKERNEL, "Kernel and user memory pools initialized");
@ -457,12 +459,14 @@ void __KernelMemoryInit()
void __KernelMemoryDoState(PointerWrap &p)
{
auto s = p.Section("sceKernelMemory", 1, 2);
auto s = p.Section("sceKernelMemory", 1, 3);
if (!s)
return;
kernelMemory.DoState(p);
userMemory.DoState(p);
if (s >= 3)
volatileMemory.DoState(p);
Do(p, vplWaitTimer);
CoreTiming::RestoreRegisterEvent(vplWaitTimer, "VplTimeout", __KernelVplTimeout);
@ -481,6 +485,11 @@ void __KernelMemoryDoState(PointerWrap &p)
void __KernelMemoryShutdown()
{
#ifdef _DEBUG
INFO_LOG(SCEKERNEL, "Shutting down volatile memory pool: ");
volatileMemory.ListBlocks();
#endif
volatileMemory.Shutdown();
#ifdef _DEBUG
INFO_LOG(SCEKERNEL,"Shutting down user memory pool: ");
userMemory.ListBlocks();
@ -495,6 +504,56 @@ void __KernelMemoryShutdown()
MemBlockInfoShutdown();
}
BlockAllocator *BlockAllocatorFromID(int id) {
switch (id) {
case 1:
case 3:
case 4:
if (hleIsKernelMode())
return &kernelMemory;
return nullptr;
case 2:
case 6:
return &userMemory;
case 8:
case 10:
if (hleIsKernelMode())
return &userMemory;
return nullptr;
case 5:
return &volatileMemory;
default:
break;
}
return nullptr;
}
int BlockAllocatorToID(const BlockAllocator *alloc) {
if (alloc == &kernelMemory)
return 1;
if (alloc == &userMemory)
return 2;
if (alloc == &volatileMemory)
return 5;
return 0;
}
BlockAllocator *BlockAllocatorFromAddr(u32 addr) {
addr &= 0x3FFFFFFF;
if (Memory::IsKernelAndNotVolatileAddress(addr))
return &kernelMemory;
if (Memory::IsKernelAddress(addr))
return &volatileMemory;
if (Memory::IsRAMAddress(addr))
return &userMemory;
return nullptr;
}
enum SceKernelFplAttr
{
PSP_FPL_ATTR_FIFO = 0x0000,
@ -580,29 +639,18 @@ static void __KernelSortFplThreads(FPL *fpl)
std::stable_sort(fpl->waitingThreads.begin(), fpl->waitingThreads.end(), __FplThreadSortPriority);
}
int sceKernelCreateFpl(const char *name, u32 mpid, u32 attr, u32 blockSize, u32 numBlocks, u32 optPtr)
{
int sceKernelCreateFpl(const char *name, u32 mpid, u32 attr, u32 blockSize, u32 numBlocks, u32 optPtr) {
if (!name)
{
WARN_LOG_REPORT(SCEKERNEL, "%08x=sceKernelCreateFpl(): invalid name", SCE_KERNEL_ERROR_NO_MEMORY);
return SCE_KERNEL_ERROR_NO_MEMORY;
}
return hleLogWarning(SCEKERNEL, SCE_KERNEL_ERROR_NO_MEMORY, "invalid name");
if (mpid < 1 || mpid > 9 || mpid == 7)
{
WARN_LOG_REPORT(SCEKERNEL, "%08x=sceKernelCreateFpl(): invalid partition %d", SCE_KERNEL_ERROR_ILLEGAL_ARGUMENT, mpid);
return SCE_KERNEL_ERROR_ILLEGAL_ARGUMENT;
}
// We only support user right now.
if (mpid != 2 && mpid != 6)
{
WARN_LOG_REPORT(SCEKERNEL, "%08x=sceKernelCreateFpl(): invalid partition %d", SCE_KERNEL_ERROR_ILLEGAL_PERM, mpid);
return SCE_KERNEL_ERROR_ILLEGAL_PERM;
}
return hleLogWarning(SCEKERNEL, SCE_KERNEL_ERROR_ILLEGAL_ARGUMENT, "invalid partition %d", mpid);
BlockAllocator *allocator = BlockAllocatorFromID(mpid);
if (allocator == nullptr)
return hleLogWarning(SCEKERNEL, SCE_KERNEL_ERROR_ILLEGAL_PERM, "invalid partition %d", mpid);
if (((attr & ~PSP_FPL_ATTR_KNOWN) & ~0xFF) != 0)
{
WARN_LOG_REPORT(SCEKERNEL, "%08x=sceKernelCreateFpl(): invalid attr parameter: %08x", SCE_KERNEL_ERROR_ILLEGAL_ATTR, attr);
return SCE_KERNEL_ERROR_ILLEGAL_ATTR;
}
return hleLogWarning(SCEKERNEL, SCE_KERNEL_ERROR_ILLEGAL_ATTR, "invalid attr parameter: %08x", attr);
// There's probably a simpler way to get this same basic formula...
// This is based on results from a PSP.
bool illegalMemSize = blockSize == 0 || numBlocks == 0;
@ -611,25 +659,16 @@ int sceKernelCreateFpl(const char *name, u32 mpid, u32 attr, u32 blockSize, u32
if (!illegalMemSize && (u64) numBlocks >= 0x100000000ULL / (((u64) blockSize + 3ULL) & ~3ULL))
illegalMemSize = true;
if (illegalMemSize)
{
WARN_LOG_REPORT(SCEKERNEL, "%08x=sceKernelCreateFpl(): invalid blockSize/count", SCE_KERNEL_ERROR_ILLEGAL_MEMSIZE);
return SCE_KERNEL_ERROR_ILLEGAL_MEMSIZE;
}
return hleReportWarning(SCEKERNEL, SCE_KERNEL_ERROR_ILLEGAL_MEMSIZE, "invalid blockSize/count");
int alignment = 4;
if (optPtr != 0)
{
u32 size = Memory::Read_U32(optPtr);
if (size > 8)
WARN_LOG_REPORT(SCEKERNEL, "sceKernelCreateFpl(): unsupported extra options, size = %d", size);
if (Memory::IsValidRange(optPtr, 4)) {
u32 size = Memory::ReadUnchecked_U32(optPtr);
if (size >= 4)
alignment = Memory::Read_U32(optPtr + 4);
// Must be a power of 2 to be valid.
if ((alignment & (alignment - 1)) != 0)
{
WARN_LOG_REPORT(SCEKERNEL, "%08x=sceKernelCreateFpl(): invalid alignment %d", SCE_KERNEL_ERROR_ILLEGAL_ARGUMENT, alignment);
return SCE_KERNEL_ERROR_ILLEGAL_ARGUMENT;
}
return hleLogWarning(SCEKERNEL, SCE_KERNEL_ERROR_ILLEGAL_ARGUMENT, "invalid alignment %d", alignment);
}
if (alignment < 4)
@ -638,9 +677,8 @@ int sceKernelCreateFpl(const char *name, u32 mpid, u32 attr, u32 blockSize, u32
int alignedSize = ((int)blockSize + alignment - 1) & ~(alignment - 1);
u32 totalSize = alignedSize * numBlocks;
bool atEnd = (attr & PSP_FPL_ATTR_HIGHMEM) != 0;
u32 address = userMemory.Alloc(totalSize, atEnd, "FPL");
if (address == (u32)-1)
{
u32 address = allocator->Alloc(totalSize, atEnd, "FPL");
if (address == (u32)-1) {
DEBUG_LOG(SCEKERNEL, "sceKernelCreateFpl(\"%s\", partition=%i, attr=%08x, bsize=%i, nb=%i) FAILED - out of ram",
name, mpid, attr, blockSize, numBlocks);
return SCE_KERNEL_ERROR_NO_MEMORY;
@ -682,7 +720,10 @@ int sceKernelDeleteFpl(SceUID uid)
if (wokeThreads)
hleReSchedule("fpl deleted");
userMemory.Free(fpl->address);
BlockAllocator *alloc = BlockAllocatorFromAddr(fpl->address);
_assert_msg_(alloc != nullptr, "Should always have a valid allocator/address");
if (alloc)
alloc->Free(fpl->address);
return kernelObjects.Destroy<FPL>(uid);
}
else
@ -955,18 +996,23 @@ public:
alloc->Free(address);
}
bool IsValid() {return address != (u32)-1;}
BlockAllocator *alloc;
void DoState(PointerWrap &p) override
{
auto s = p.Section("PMB", 1);
auto s = p.Section("PMB", 1, 2);
if (!s)
return;
Do(p, address);
DoArray(p, name, sizeof(name));
if (s >= 2) {
int allocType = BlockAllocatorToID(alloc);
Do(p, allocType);
alloc = BlockAllocatorFromID(allocType);
}
}
BlockAllocator *alloc;
u32 address;
char name[32];
};
@ -986,44 +1032,28 @@ static u32 sceKernelTotalFreeMemSize()
return retVal;
}
int sceKernelAllocPartitionMemory(int partition, const char *name, int type, u32 size, u32 addr)
{
if (name == NULL)
{
WARN_LOG_REPORT(SCEKERNEL, "%08x=sceKernelAllocPartitionMemory(): invalid name", SCE_KERNEL_ERROR_ERROR);
return SCE_KERNEL_ERROR_ERROR;
}
if (size == 0)
{
WARN_LOG_REPORT(SCEKERNEL, "%08x=sceKernelAllocPartitionMemory(): invalid size %x", SCE_KERNEL_ERROR_MEMBLOCK_ALLOC_FAILED, size);
return SCE_KERNEL_ERROR_MEMBLOCK_ALLOC_FAILED;
int sceKernelAllocPartitionMemory(int partition, const char *name, int type, u32 size, u32 addr) {
if (type < PSP_SMEM_Low || type > PSP_SMEM_HighAligned)
return hleLogWarning(SCEKERNEL, SCE_KERNEL_ERROR_ILLEGAL_MEMBLOCKTYPE, "invalid type %x", type);
// Alignment is only allowed for powers of 2.
if (type == PSP_SMEM_LowAligned || type == PSP_SMEM_HighAligned) {
if ((addr & (addr - 1)) != 0 || addr == 0)
return hleLogWarning(SCEKERNEL, SCE_KERNEL_ERROR_ILLEGAL_ALIGNMENT_SIZE, "invalid alignment %x", addr);
}
if (partition < 1 || partition > 9 || partition == 7)
{
WARN_LOG_REPORT(SCEKERNEL, "%08x=sceKernelAllocPartitionMemory(): invalid partition %x", SCE_KERNEL_ERROR_ILLEGAL_ARGUMENT, partition);
return SCE_KERNEL_ERROR_ILLEGAL_ARGUMENT;
}
// We only support user right now.
if (partition != 2 && partition != 5 && partition != 6)
{
WARN_LOG_REPORT(SCEKERNEL, "%08x=sceKernelAllocPartitionMemory(): invalid partition %x", SCE_KERNEL_ERROR_ILLEGAL_PARTITION, partition);
return SCE_KERNEL_ERROR_ILLEGAL_PARTITION;
}
if (type < PSP_SMEM_Low || type > PSP_SMEM_HighAligned)
{
WARN_LOG_REPORT(SCEKERNEL, "%08x=sceKernelAllocPartitionMemory(): invalid type %x", SCE_KERNEL_ERROR_ILLEGAL_MEMBLOCKTYPE, type);
return SCE_KERNEL_ERROR_ILLEGAL_MEMBLOCKTYPE;
}
// Alignment is only allowed for powers of 2.
if ((type == PSP_SMEM_LowAligned || type == PSP_SMEM_HighAligned) && ((addr & (addr - 1)) != 0 || addr == 0))
{
WARN_LOG_REPORT(SCEKERNEL, "%08x=sceKernelAllocPartitionMemory(): invalid alignment %x", SCE_KERNEL_ERROR_ILLEGAL_ALIGNMENT_SIZE, addr);
return SCE_KERNEL_ERROR_ILLEGAL_ALIGNMENT_SIZE;
}
return hleLogWarning(SCEKERNEL, SCE_KERNEL_ERROR_ILLEGAL_ARGUMENT, "invalid partition %x", partition);
PartitionMemoryBlock *block = new PartitionMemoryBlock(&userMemory, name, size, (MemblockType)type, addr);
if (!block->IsValid())
{
BlockAllocator *allocator = BlockAllocatorFromID(partition);
if (allocator == nullptr)
return hleLogWarning(SCEKERNEL, SCE_KERNEL_ERROR_ILLEGAL_PARTITION, "invalid partition %x", partition);
if (name == nullptr)
return hleLogWarning(SCEKERNEL, SCE_KERNEL_ERROR_ERROR, "invalid name");
if (size == 0)
return hleLogWarning(SCEKERNEL, SCE_KERNEL_ERROR_MEMBLOCK_ALLOC_FAILED, "invalid size %x", size);
PartitionMemoryBlock *block = new PartitionMemoryBlock(allocator, name, size, (MemblockType)type, addr);
if (!block->IsValid()) {
delete block;
ERROR_LOG(SCEKERNEL, "sceKernelAllocPartitionMemory(partition = %i, %s, type= %i, size= %i, addr= %08x): allocation failed", partition, name, type, size, addr);
return SCE_KERNEL_ERROR_MEMBLOCK_ALLOC_FAILED;
@ -1451,40 +1481,23 @@ static void __KernelSortVplThreads(VPL *vpl)
std::stable_sort(vpl->waitingThreads.begin(), vpl->waitingThreads.end(), __VplThreadSortPriority);
}
SceUID sceKernelCreateVpl(const char *name, int partition, u32 attr, u32 vplSize, u32 optPtr)
{
SceUID sceKernelCreateVpl(const char *name, int partition, u32 attr, u32 vplSize, u32 optPtr) {
if (!name)
{
WARN_LOG_REPORT(SCEKERNEL, "%08x=sceKernelCreateVpl(): invalid name", SCE_KERNEL_ERROR_ERROR);
return SCE_KERNEL_ERROR_ERROR;
}
return hleLogWarning(SCEKERNEL, SCE_KERNEL_ERROR_ERROR, "invalid name");
if (partition < 1 || partition > 9 || partition == 7)
{
WARN_LOG_REPORT(SCEKERNEL, "%08x=sceKernelCreateVpl(): invalid partition %d", SCE_KERNEL_ERROR_ILLEGAL_ARGUMENT, partition);
return SCE_KERNEL_ERROR_ILLEGAL_ARGUMENT;
}
// We only support user right now.
if (partition != 2 && partition != 6)
{
WARN_LOG_REPORT(SCEKERNEL, "%08x=sceKernelCreateVpl(): invalid partition %d", SCE_KERNEL_ERROR_ILLEGAL_PERM, partition);
return SCE_KERNEL_ERROR_ILLEGAL_PERM;
}
return hleLogWarning(SCEKERNEL, SCE_KERNEL_ERROR_ILLEGAL_ARGUMENT, "invalid partition %d", partition);
BlockAllocator *allocator = BlockAllocatorFromID(partition);
if (allocator == nullptr)
return hleLogWarning(SCEKERNEL, SCE_KERNEL_ERROR_ILLEGAL_PERM, "invalid partition %d", partition);
if (((attr & ~PSP_VPL_ATTR_KNOWN) & ~0xFF) != 0)
{
WARN_LOG_REPORT(SCEKERNEL, "%08x=sceKernelCreateVpl(): invalid attr parameter: %08x", SCE_KERNEL_ERROR_ILLEGAL_ATTR, attr);
return SCE_KERNEL_ERROR_ILLEGAL_ATTR;
}
return hleLogWarning(SCEKERNEL, SCE_KERNEL_ERROR_ILLEGAL_ATTR, "invalid attr parameter: %08x", attr);
if (vplSize == 0)
{
WARN_LOG_REPORT(SCEKERNEL, "%08x=sceKernelCreateVpl(): invalid size", SCE_KERNEL_ERROR_ILLEGAL_MEMSIZE);
return SCE_KERNEL_ERROR_ILLEGAL_MEMSIZE;
}
return hleLogWarning(SCEKERNEL, SCE_KERNEL_ERROR_ILLEGAL_MEMSIZE, "invalid size");
// Block Allocator seems to A-OK this, let's stop it here.
if (vplSize >= 0x80000000)
{
WARN_LOG_REPORT(SCEKERNEL, "%08x=sceKernelCreateVpl(): way too big size", SCE_KERNEL_ERROR_NO_MEMORY);
return SCE_KERNEL_ERROR_NO_MEMORY;
}
return hleLogWarning(SCEKERNEL, SCE_KERNEL_ERROR_NO_MEMORY, "way too big size");
// Can't have that little space in a Vpl, sorry.
if (vplSize <= 0x30)
@ -1493,12 +1506,9 @@ SceUID sceKernelCreateVpl(const char *name, int partition, u32 attr, u32 vplSize
// We ignore the upalign to 256 and do it ourselves by 8.
u32 allocSize = vplSize;
u32 memBlockPtr = userMemory.Alloc(allocSize, (attr & PSP_VPL_ATTR_HIGHMEM) != 0, "VPL");
u32 memBlockPtr = allocator->Alloc(allocSize, (attr & PSP_VPL_ATTR_HIGHMEM) != 0, "VPL");
if (memBlockPtr == (u32)-1)
{
ERROR_LOG(SCEKERNEL, "sceKernelCreateVpl(): Failed to allocate %i bytes of pool data", vplSize);
return SCE_KERNEL_ERROR_NO_MEMORY;
}
return hleLogError(SCEKERNEL, SCE_KERNEL_ERROR_NO_MEMORY, "failed to allocate %i bytes of pool data", vplSize);
VPL *vpl = new VPL;
SceUID id = kernelObjects.Create(vpl);
@ -1542,7 +1552,10 @@ int sceKernelDeleteVpl(SceUID uid)
if (wokeThreads)
hleReSchedule("vpl deleted");
userMemory.Free(vpl->address);
BlockAllocator *alloc = BlockAllocatorFromAddr(vpl->address);
_assert_msg_(alloc != nullptr, "Should always have a valid allocator/address");
if (alloc)
alloc->Free(vpl->address);
kernelObjects.Destroy<VPL>(uid);
return 0;
}
@ -2044,29 +2057,17 @@ void __KernelTlsplThreadEnd(SceUID threadID)
tlsplThreadEndChecks.erase(locked.first, locked.second);
}
SceUID sceKernelCreateTlspl(const char *name, u32 partition, u32 attr, u32 blockSize, u32 count, u32 optionsPtr)
{
SceUID sceKernelCreateTlspl(const char *name, u32 partition, u32 attr, u32 blockSize, u32 count, u32 optionsPtr) {
if (!name)
{
WARN_LOG_REPORT(SCEKERNEL, "%08x=sceKernelCreateTlspl(): invalid name", SCE_KERNEL_ERROR_NO_MEMORY);
return SCE_KERNEL_ERROR_NO_MEMORY;
}
return hleLogWarning(SCEKERNEL, SCE_KERNEL_ERROR_NO_MEMORY, "invalid name");
if ((attr & ~PSP_TLSPL_ATTR_KNOWN) >= 0x100)
{
WARN_LOG_REPORT(SCEKERNEL, "%08x=sceKernelCreateTlspl(): invalid attr parameter: %08x", SCE_KERNEL_ERROR_ILLEGAL_ATTR, attr);
return SCE_KERNEL_ERROR_ILLEGAL_ATTR;
}
return hleLogWarning(SCEKERNEL, SCE_KERNEL_ERROR_ILLEGAL_ATTR, "invalid attr parameter: %08x", attr);
if (partition < 1 || partition > 9 || partition == 7)
{
WARN_LOG_REPORT(SCEKERNEL, "%08x=sceKernelCreateTlspl(): invalid partition %d", SCE_KERNEL_ERROR_ILLEGAL_ARGUMENT, partition);
return SCE_KERNEL_ERROR_ILLEGAL_ARGUMENT;
}
// We only support user right now.
if (partition != 2 && partition != 6)
{
WARN_LOG_REPORT(SCEKERNEL, "%08x=sceKernelCreateTlspl(): invalid partition %d", SCE_KERNEL_ERROR_ILLEGAL_PERM, partition);
return SCE_KERNEL_ERROR_ILLEGAL_PERM;
}
return hleLogWarning(SCEKERNEL, SCE_KERNEL_ERROR_ILLEGAL_ARGUMENT, "invalid partition %d", partition);
BlockAllocator *allocator = BlockAllocatorFromID(partition);
if (allocator == nullptr)
return hleLogWarning(SCEKERNEL, SCE_KERNEL_ERROR_ILLEGAL_PERM, "invalid partition %x", partition);
// There's probably a simpler way to get this same basic formula...
// This is based on results from a PSP.
@ -2076,41 +2077,29 @@ SceUID sceKernelCreateTlspl(const char *name, u32 partition, u32 attr, u32 block
if (!illegalMemSize && (u64) count >= 0x100000000ULL / (((u64) blockSize + 3ULL) & ~3ULL))
illegalMemSize = true;
if (illegalMemSize)
{
WARN_LOG_REPORT(SCEKERNEL, "%08x=sceKernelCreateTlspl(): invalid blockSize/count", SCE_KERNEL_ERROR_ILLEGAL_MEMSIZE);
return SCE_KERNEL_ERROR_ILLEGAL_MEMSIZE;
}
return hleLogWarning(SCEKERNEL, SCE_KERNEL_ERROR_ILLEGAL_MEMSIZE, "invalid blockSize/count");
int index = -1;
for (int i = 0; i < TLSPL_NUM_INDEXES; ++i)
if (tlsplUsedIndexes[i] == false)
{
for (int i = 0; i < TLSPL_NUM_INDEXES; ++i) {
if (tlsplUsedIndexes[i] == false) {
index = i;
break;
}
}
if (index == -1)
{
WARN_LOG_REPORT(SCEKERNEL, "%08x=sceKernelCreateTlspl(): ran out of indexes for TLS pools", PSP_ERROR_TOO_MANY_TLSPL);
return PSP_ERROR_TOO_MANY_TLSPL;
}
return hleLogWarning(SCEKERNEL, PSP_ERROR_TOO_MANY_TLSPL, "ran out of indexes for TLS pools");
// Unless otherwise specified, we align to 4 bytes (a mips word.)
u32 alignment = 4;
if (optionsPtr != 0)
{
u32 size = Memory::Read_U32(optionsPtr);
if (size > 8)
WARN_LOG_REPORT(SCEKERNEL, "sceKernelCreateTlspl(%s) unsupported options parameter, size = %d", name, size);
if (Memory::IsValidRange(optionsPtr, 4)) {
u32 size = Memory::ReadUnchecked_U32(optionsPtr);
if (size >= 8)
alignment = Memory::Read_U32(optionsPtr + 4);
// Note that 0 intentionally is allowed.
if ((alignment & (alignment - 1)) != 0)
{
ERROR_LOG_REPORT(SCEKERNEL, "sceKernelCreateTlspl(%s): alignment is not a power of 2: %d", name, alignment);
return SCE_KERNEL_ERROR_ILLEGAL_ARGUMENT;
}
return hleLogError(SCEKERNEL, SCE_KERNEL_ERROR_ILLEGAL_ARGUMENT, "alignment is not a power of 2: %d", alignment);
// This goes for 0, 1, and 2. Can't have less than 4 byte alignment.
if (alignment < 4)
alignment = 4;
@ -2120,16 +2109,13 @@ SceUID sceKernelCreateTlspl(const char *name, u32 partition, u32 attr, u32 block
u32 alignedSize = (blockSize + alignment - 1) & ~(alignment - 1);
u32 totalSize = alignedSize * count;
u32 blockPtr = userMemory.Alloc(totalSize, (attr & PSP_TLSPL_ATTR_HIGHMEM) != 0, name);
u32 blockPtr = allocator->Alloc(totalSize, (attr & PSP_TLSPL_ATTR_HIGHMEM) != 0, name);
#ifdef _DEBUG
userMemory.ListBlocks();
allocator->ListBlocks();
#endif
if (blockPtr == (u32) -1)
{
ERROR_LOG(SCEKERNEL, "%08x=sceKernelCreateTlspl(%s, %d, %08x, %d, %d, %08x): failed to allocate memory", SCE_KERNEL_ERROR_NO_MEMORY, name, partition, attr, blockSize, count, optionsPtr);
return SCE_KERNEL_ERROR_NO_MEMORY;
}
if (blockPtr == (u32)-1)
return hleLogError(SCEKERNEL, SCE_KERNEL_ERROR_NO_MEMORY, "failed to allocate memory");
TLSPL *tls = new TLSPL();
SceUID id = kernelObjects.Create(tls);
@ -2148,9 +2134,7 @@ SceUID sceKernelCreateTlspl(const char *name, u32 partition, u32 attr, u32 block
tls->alignment = alignment;
tls->usage.resize(count, 0);
WARN_LOG(SCEKERNEL, "%08x=sceKernelCreateTlspl(%s, %d, %08x, %d, %d, %08x)", id, name, partition, attr, blockSize, count, optionsPtr);
return id;
return hleLogSuccessInfoI(SCEKERNEL, id);
}
int sceKernelDeleteTlspl(SceUID uid)
@ -2178,7 +2162,10 @@ int sceKernelDeleteTlspl(SceUID uid)
HLEKernel::ResumeFromWait(threadID, WAITTYPE_TLSPL, uid, 0);
hleReSchedule("deleted tlspl");
userMemory.Free(tls->address);
BlockAllocator *allocator = BlockAllocatorFromAddr(tls->address);
_assert_msg_(allocator != nullptr, "Should always have a valid allocator/address");
if (allocator)
allocator->Free(tls->address);
tlsplUsedIndexes[tls->ntls.index] = false;
kernelObjects.Destroy<TLSPL>(uid);
}

View File

@ -40,6 +40,10 @@ KernelObject *__KernelMemoryVPLObject();
KernelObject *__KernelMemoryPMBObject();
KernelObject *__KernelTlsplObject();
BlockAllocator *BlockAllocatorFromID(int id);
int BlockAllocatorToID(const BlockAllocator *alloc);
BlockAllocator *BlockAllocatorFromAddr(u32 addr);
SceUID sceKernelCreateVpl(const char *name, int partition, u32 attr, u32 vplSize, u32 optPtr);
int sceKernelDeleteVpl(SceUID uid);
int sceKernelAllocateVpl(SceUID uid, u32 size, u32 addrPtr, u32 timeoutPtr);

View File

@ -140,10 +140,13 @@ struct MsgPipe : public KernelObject
int GetIDType() const override { return SCE_KERNEL_TMID_Mpipe; }
MsgPipe() : buffer(0) {}
~MsgPipe()
{
if (buffer != 0)
userMemory.Free(buffer);
~MsgPipe() {
if (buffer != 0) {
BlockAllocator *alloc = BlockAllocatorFromAddr(buffer);
_assert_msg_(alloc != nullptr, "Should always have a valid allocator/address");
if (alloc)
alloc->Free(buffer);
}
}
u32 GetUsedSize()
@ -667,41 +670,26 @@ void __KernelMsgPipeDoState(PointerWrap &p)
CoreTiming::RestoreRegisterEvent(waitTimer, "MsgPipeTimeout", __KernelMsgPipeTimeout);
}
int sceKernelCreateMsgPipe(const char *name, int partition, u32 attr, u32 size, u32 optionsPtr)
{
int sceKernelCreateMsgPipe(const char *name, int partition, u32 attr, u32 size, u32 optionsPtr) {
if (!name)
{
WARN_LOG_REPORT(SCEKERNEL, "%08x=sceKernelCreateMsgPipe(): invalid name", SCE_KERNEL_ERROR_NO_MEMORY);
return SCE_KERNEL_ERROR_NO_MEMORY;
}
return hleLogWarning(SCEKERNEL, SCE_KERNEL_ERROR_NO_MEMORY, "invalid name");
if (partition < 1 || partition > 9 || partition == 7)
{
WARN_LOG_REPORT(SCEKERNEL, "%08x=sceKernelCreateMsgPipe(): invalid partition %d", SCE_KERNEL_ERROR_ILLEGAL_ARGUMENT, partition);
return SCE_KERNEL_ERROR_ILLEGAL_ARGUMENT;
}
// We only support user right now.
if (partition != 2 && partition != 6)
{
WARN_LOG_REPORT(SCEKERNEL, "%08x=sceKernelCreateMsgPipe(): invalid partition %d", SCE_KERNEL_ERROR_ILLEGAL_PERM, partition);
return SCE_KERNEL_ERROR_ILLEGAL_PERM;
}
return hleLogWarning(SCEKERNEL, SCE_KERNEL_ERROR_ILLEGAL_ARGUMENT, "invalid partition %d", partition);
BlockAllocator *allocator = BlockAllocatorFromID(partition);
if (allocator == nullptr)
return hleLogWarning(SCEKERNEL, SCE_KERNEL_ERROR_ILLEGAL_PERM, "invalid partition %d", partition);
if ((attr & ~SCE_KERNEL_MPA_KNOWN) >= 0x100)
{
WARN_LOG_REPORT(SCEKERNEL, "%08x=sceKernelCreateEventFlag(%s): invalid attr parameter: %08x", SCE_KERNEL_ERROR_ILLEGAL_ATTR, name, attr);
return SCE_KERNEL_ERROR_ILLEGAL_ATTR;
}
return hleLogWarning(SCEKERNEL, SCE_KERNEL_ERROR_ILLEGAL_ATTR, "invalid attr parameter: %08x", attr);
u32 memBlockPtr = 0;
if (size != 0)
{
if (size != 0) {
// We ignore the upalign to 256.
u32 allocSize = size;
memBlockPtr = userMemory.Alloc(allocSize, (attr & SCE_KERNEL_MPA_HIGHMEM) != 0, "MsgPipe");
memBlockPtr = allocator->Alloc(allocSize, (attr & SCE_KERNEL_MPA_HIGHMEM) != 0, "MsgPipe");
if (memBlockPtr == (u32)-1)
{
ERROR_LOG(SCEKERNEL, "%08x=sceKernelCreateEventFlag(%s): Failed to allocate %i bytes for buffer", SCE_KERNEL_ERROR_NO_MEMORY, name, size);
return SCE_KERNEL_ERROR_NO_MEMORY;
}
return hleLogError(SCEKERNEL, SCE_KERNEL_ERROR_NO_MEMORY, "failed to allocate %i bytes for buffer", size);
}
MsgPipe *m = new MsgPipe();

View File

@ -744,11 +744,14 @@ static int sceUtilityGamedataInstallInitStart(u32 paramsAddr) {
}
ActivateDialog(UtilityDialogType::GAMEDATAINSTALL);
return hleLogSuccessInfoX(SCEUTILITY, gamedataInstallDialog->Init(paramsAddr));
int result = gamedataInstallDialog->Init(paramsAddr);
if (result < 0)
DeactivateDialog();
return hleLogSuccessInfoX(SCEUTILITY, result);
}
static int sceUtilityGamedataInstallShutdownStart() {
if (currentDialogType != UtilityDialogType::GAMEDATAINSTALL) {
if (!currentDialogActive || currentDialogType != UtilityDialogType::GAMEDATAINSTALL) {
return hleLogWarning(SCEUTILITY, SCE_ERROR_UTILITY_WRONG_TYPE, "wrong dialog type");
}
@ -757,7 +760,7 @@ static int sceUtilityGamedataInstallShutdownStart() {
}
static int sceUtilityGamedataInstallUpdate(int animSpeed) {
if (currentDialogType != UtilityDialogType::GAMEDATAINSTALL) {
if (!currentDialogActive || currentDialogType != UtilityDialogType::GAMEDATAINSTALL) {
return hleLogWarning(SCEUTILITY, SCE_ERROR_UTILITY_WRONG_TYPE, "wrong dialog type");
}
@ -765,8 +768,9 @@ static int sceUtilityGamedataInstallUpdate(int animSpeed) {
}
static int sceUtilityGamedataInstallGetStatus() {
if (currentDialogType != UtilityDialogType::GAMEDATAINSTALL) {
if (!currentDialogActive || currentDialogType != UtilityDialogType::GAMEDATAINSTALL) {
// This is called incorrectly all the time by some games. So let's not bother warning.
hleEatCycles(200);
return hleLogDebug(SCEUTILITY, SCE_ERROR_UTILITY_WRONG_TYPE, "wrong dialog type");
}
@ -776,7 +780,7 @@ static int sceUtilityGamedataInstallGetStatus() {
}
static int sceUtilityGamedataInstallAbort() {
if (currentDialogType != UtilityDialogType::GAMEDATAINSTALL) {
if (!currentDialogActive || currentDialogType != UtilityDialogType::GAMEDATAINSTALL) {
return hleLogWarning(SCEUTILITY, SCE_ERROR_UTILITY_WRONG_TYPE, "wrong dialog type");
}

View File

@ -413,11 +413,11 @@ bool PSP_InitStart(const CoreParameter &coreParam, std::string *error_string) {
}
#if defined(_WIN32) && PPSSPP_ARCH(AMD64)
INFO_LOG(BOOT, "PPSSPP %s Windows 64 bit", PPSSPP_GIT_VERSION);
NOTICE_LOG(BOOT, "PPSSPP %s Windows 64 bit", PPSSPP_GIT_VERSION);
#elif defined(_WIN32) && !PPSSPP_ARCH(AMD64)
INFO_LOG(BOOT, "PPSSPP %s Windows 32 bit", PPSSPP_GIT_VERSION);
NOTICE_LOG(BOOT, "PPSSPP %s Windows 32 bit", PPSSPP_GIT_VERSION);
#else
INFO_LOG(BOOT, "PPSSPP %s", PPSSPP_GIT_VERSION);
NOTICE_LOG(BOOT, "PPSSPP %s", PPSSPP_GIT_VERSION);
#endif
Core_NotifyLifecycle(CoreLifecycle::STARTING);

View File

@ -19,6 +19,7 @@
#include "Common/Data/Convert/ColorConv.h"
#include "Common/Profiler/Profiler.h"
#include "Common/LogReporting.h"
#include "Core/Config.h"
#include "GPU/Common/DrawEngineCommon.h"
#include "GPU/Common/SplineCommon.h"
@ -188,6 +189,57 @@ u32 DrawEngineCommon::NormalizeVertices(u8 *outPtr, u8 *bufPtr, const u8 *inPtr,
return DrawEngineCommon::NormalizeVertices(outPtr, bufPtr, inPtr, dec, lowerBound, upperBound, vertType);
}
void DrawEngineCommon::DispatchSubmitImm(GEPrimitiveType prim, TransformedVertex *buffer, int vertexCount, int cullMode, bool continuation) {
// Instead of plumbing through properly (we'd need to inject these pretransformed vertices in the middle
// of SoftwareTransform(), which would take a lot of refactoring), we'll cheat and just turn these into
// through vertices.
// Since the only known use is Thrillville and it only uses it to clear, we just use color and pos.
struct ImmVertex {
float uv[2];
uint32_t color;
float xyz[3];
};
std::vector<ImmVertex> temp;
temp.resize(vertexCount);
uint32_t color1Used = 0;
for (int i = 0; i < vertexCount; i++) {
// Since we're sending through, scale back up to w/h.
temp[i].uv[0] = buffer[i].u * gstate.getTextureWidth(0);
temp[i].uv[1] = buffer[i].v * gstate.getTextureHeight(0);
temp[i].color = buffer[i].color0_32;
temp[i].xyz[0] = buffer[i].pos[0];
temp[i].xyz[1] = buffer[i].pos[1];
temp[i].xyz[2] = buffer[i].pos[2];
color1Used |= buffer[i].color1_32;
}
int vtype = GE_VTYPE_TC_FLOAT | GE_VTYPE_POS_FLOAT | GE_VTYPE_COL_8888 | GE_VTYPE_THROUGH;
// TODO: Handle fog and secondary color somehow?
if (gstate.isFogEnabled() && !gstate.isModeThrough()) {
WARN_LOG_REPORT_ONCE(geimmfog, G3D, "Imm vertex used fog");
}
if (color1Used != 0 && gstate.isUsingSecondaryColor() && !gstate.isModeThrough()) {
WARN_LOG_REPORT_ONCE(geimmcolor1, G3D, "Imm vertex used secondary color");
}
bool prevThrough = gstate.isModeThrough();
// Code checks this reg directly, not just the vtype ID.
if (!prevThrough) {
gstate.vertType |= GE_VTYPE_THROUGH;
gstate_c.Dirty(DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE | DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_UVSCALEOFFSET | DIRTY_CULLRANGE);
}
int bytesRead;
uint32_t vertTypeID = GetVertTypeID(vtype, 0);
SubmitPrim(&temp[0], nullptr, prim, vertexCount, vertTypeID, cullMode, &bytesRead);
DispatchFlush();
if (!prevThrough) {
gstate.vertType &= ~GE_VTYPE_THROUGH;
gstate_c.Dirty(DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE | DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_UVSCALEOFFSET | DIRTY_CULLRANGE);
}
}
// This code has plenty of potential for optimization.
//
// It does the simplest and safest test possible: If all points of a bbox is outside a single of
@ -484,12 +536,12 @@ u32 DrawEngineCommon::NormalizeVertices(u8 *outPtr, u8 *bufPtr, const u8 *inPtr,
return GE_VTYPE_TC_FLOAT | GE_VTYPE_COL_8888 | GE_VTYPE_NRM_FLOAT | GE_VTYPE_POS_FLOAT | (vertType & (GE_VTYPE_IDX_MASK | GE_VTYPE_THROUGH));
}
void DrawEngineCommon::ApplyFramebufferRead(bool *fboTexNeedsBind) {
void DrawEngineCommon::ApplyFramebufferRead(FBOTexState *fboTexState) {
if (gstate_c.Supports(GPU_SUPPORTS_ANY_FRAMEBUFFER_FETCH)) {
*fboTexNeedsBind = false;
*fboTexState = FBO_TEX_READ_FRAMEBUFFER;
} else {
gpuStats.numCopiesForShaderBlend++;
*fboTexNeedsBind = true;
*fboTexState = FBO_TEX_COPY_BIND_TEX;
}
gstate_c.Dirty(DIRTY_SHADERBLEND);

View File

@ -46,6 +46,12 @@ enum {
TEX_SLOT_SPLINE_WEIGHTS_V = 6,
};
enum FBOTexState {
FBO_TEX_NONE,
FBO_TEX_COPY_BIND_TEX,
FBO_TEX_READ_FRAMEBUFFER,
};
inline uint32_t GetVertTypeID(uint32_t vertType, int uvGenMode) {
// As the decoder depends on the UVGenMode when we use UV prescale, we simply mash it
// into the top of the verttype where there are unused bits.
@ -84,10 +90,7 @@ public:
SubmitPrim(verts, inds, prim, vertexCount, vertTypeID, cullMode, bytesRead);
}
virtual void DispatchSubmitImm(const void *verts, const void *inds, GEPrimitiveType prim, int vertexCount, u32 vertTypeID, int cullMode, int *bytesRead) {
SubmitPrim(verts, inds, prim, vertexCount, vertTypeID, cullMode, bytesRead);
DispatchFlush();
}
virtual void DispatchSubmitImm(GEPrimitiveType prim, TransformedVertex *buffer, int vertexCount, int cullMode, bool continuation);
bool TestBoundingBox(const void* control_points, int vertexCount, u32 vertType, int *bytesRead);
@ -130,7 +133,7 @@ protected:
// Vertex decoding
void DecodeVertsStep(u8 *dest, int &i, int &decodedVerts);
void ApplyFramebufferRead(bool *fboTexNeedsBind);
void ApplyFramebufferRead(FBOTexState *fboTexState);
inline int IndexSize(u32 vtype) const {
const u32 indexType = (vtype & GE_VTYPE_IDX_MASK);

View File

@ -134,10 +134,12 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu
GELogicOp replaceLogicOpType = isModeClear ? GE_LOGIC_COPY : (GELogicOp)id.Bits(FS_BIT_REPLACE_LOGIC_OP, 4);
bool replaceLogicOp = replaceLogicOpType != GE_LOGIC_COPY && compat.bitwiseOps;
bool readFramebuffer = replaceBlend == REPLACE_BLEND_READ_FRAMEBUFFER || colorWriteMask || replaceLogicOp;
bool readFramebufferTex = readFramebuffer && !gstate_c.Supports(GPU_SUPPORTS_ANY_FRAMEBUFFER_FETCH);
bool needFramebufferRead = replaceBlend == REPLACE_BLEND_READ_FRAMEBUFFER || colorWriteMask || replaceLogicOp;
bool needFragCoord = readFramebuffer || gstate_c.Supports(GPU_ROUND_FRAGMENT_DEPTH_TO_16BIT);
bool fetchFramebuffer = needFramebufferRead && gstate_c.Supports(GPU_SUPPORTS_ANY_FRAMEBUFFER_FETCH);
bool readFramebufferTex = needFramebufferRead && !gstate_c.Supports(GPU_SUPPORTS_ANY_FRAMEBUFFER_FETCH);
bool needFragCoord = readFramebufferTex || gstate_c.Supports(GPU_ROUND_FRAGMENT_DEPTH_TO_16BIT);
bool writeDepth = gstate_c.Supports(GPU_ROUND_FRAGMENT_DEPTH_TO_16BIT);
if (shaderDepalMode != ShaderDepalMode::OFF && !doTexture) {
@ -157,6 +159,11 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu
if (readFramebufferTex) {
WRITE(p, "layout (binding = 1) uniform sampler2D fbotex;\n");
} else if (fetchFramebuffer) {
WRITE(p, "layout (input_attachment_index = 0, binding = 9) uniform subpassInput inputColor;\n");
if (fragmentShaderFlags) {
*fragmentShaderFlags |= FragmentShaderFlags::INPUT_ATTACHMENT;
}
}
if (shaderDepalMode != ShaderDepalMode::OFF) {
@ -416,7 +423,7 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu
if (!strcmp(compat.fragColor0, "fragColor0")) {
const char *qualifierColor0 = "out";
if (readFramebuffer && compat.lastFragData && !strcmp(compat.lastFragData, compat.fragColor0)) {
if (fetchFramebuffer && compat.lastFragData && !strcmp(compat.lastFragData, compat.fragColor0)) {
qualifierColor0 = "inout";
}
// Output the output color definitions.
@ -492,20 +499,26 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu
}
// Two things read from the old framebuffer - shader replacement blending and bit-level masking.
if (readFramebuffer) {
if (readFramebufferTex) {
if (compat.shaderLanguage == HLSL_D3D11) {
WRITE(p, " vec4 destColor = fbotex.Load(int3((int)gl_FragCoord.x, (int)gl_FragCoord.y, 0));\n");
} else if (compat.shaderLanguage == HLSL_D3D9) {
WRITE(p, " vec4 destColor = tex2D(fbotex, gl_FragCoord.xy * u_fbotexSize.xy);\n", compat.texture);
} else if (gstate_c.Supports(GPU_SUPPORTS_ANY_FRAMEBUFFER_FETCH)) {
// If we have EXT_shader_framebuffer_fetch / ARM_shader_framebuffer_fetch, we skip the blit.
// We can just read the prev value more directly.
WRITE(p, " lowp vec4 destColor = %s;\n", compat.lastFragData);
} else if (!compat.texelFetch) {
WRITE(p, " lowp vec4 destColor = %s(fbotex, gl_FragCoord.xy * u_fbotexSize.xy);\n", compat.texture);
} else {
WRITE(p, " lowp vec4 destColor = %s(fbotex, ivec2(gl_FragCoord.x, gl_FragCoord.y), 0);\n", compat.texelFetch);
}
} else if (fetchFramebuffer) {
// If we have EXT_shader_framebuffer_fetch / ARM_shader_framebuffer_fetch, we skip the blit.
// We can just read the prev value more directly.
if (compat.shaderLanguage == GLSL_3xx) {
WRITE(p, " lowp vec4 destColor = %s;\n", compat.lastFragData);
} else if (compat.shaderLanguage == GLSL_VULKAN) {
WRITE(p, " lowp vec4 destColor = subpassLoad(inputColor);\n", compat.lastFragData);
} else {
_assert_msg_(false, "Need fetch destColor, but not a compatible language");
}
}
if (isModeClear) {

View File

@ -42,7 +42,7 @@ struct FShaderID;
// Can technically be deduced from the fragment shader ID, but this is safer.
enum class FragmentShaderFlags : u32 {
FS_FLAG_INPUT_ATTACHMENT = 1,
INPUT_ATTACHMENT = 1,
};
ENUM_CLASS_BITOPS(FragmentShaderFlags);

View File

@ -231,6 +231,7 @@ StencilValueType ReplaceAlphaWithStencilType() {
case GE_FORMAT_8888:
case GE_FORMAT_INVALID:
case GE_FORMAT_DEPTH16:
case GE_FORMAT_CLUT8:
switch (gstate.getStencilOpZPass()) {
case GE_STENCILOP_REPLACE:
// TODO: Could detect zero here and force ZERO - less uniform updates?
@ -859,66 +860,63 @@ static inline bool blendColorSimilar(uint32_t a, uint32_t b, int margin = 25) {
static bool SimulateLogicOpIfNeeded(BlendFactor &srcBlend, BlendFactor &dstBlend, BlendEq &blendEq) {
// Note: our shader solution applies logic ops BEFORE blending, not correctly after.
// This is however fine for the most common ones, like CLEAR/NOOP/SET, etc.
if (!gstate_c.Supports(GPU_SUPPORTS_LOGIC_OP)) {
if (gstate.isLogicOpEnabled()) {
switch (gstate.getLogicOp()) {
case GE_LOGIC_CLEAR:
srcBlend = BlendFactor::ZERO;
dstBlend = BlendFactor::ZERO;
blendEq = BlendEq::ADD;
return true;
case GE_LOGIC_AND:
case GE_LOGIC_AND_REVERSE:
WARN_LOG_REPORT_ONCE(d3dLogicOpAnd, G3D, "Unsupported AND logic op: %x", gstate.getLogicOp());
break;
case GE_LOGIC_COPY:
// This is the same as off.
break;
case GE_LOGIC_COPY_INVERTED:
// Handled in the shader.
break;
case GE_LOGIC_AND_INVERTED:
case GE_LOGIC_NOR:
case GE_LOGIC_NAND:
case GE_LOGIC_EQUIV:
// Handled in the shader.
WARN_LOG_REPORT_ONCE(d3dLogicOpAndInverted, G3D, "Attempted invert for logic op: %x", gstate.getLogicOp());
break;
case GE_LOGIC_INVERTED:
srcBlend = BlendFactor::ONE;
dstBlend = BlendFactor::ONE;
blendEq = BlendEq::SUBTRACT;
WARN_LOG_REPORT_ONCE(d3dLogicOpInverted, G3D, "Attempted inverse for logic op: %x", gstate.getLogicOp());
return true;
case GE_LOGIC_NOOP:
srcBlend = BlendFactor::ZERO;
dstBlend = BlendFactor::ONE;
blendEq = BlendEq::ADD;
return true;
case GE_LOGIC_XOR:
WARN_LOG_REPORT_ONCE(d3dLogicOpOrXor, G3D, "Unsupported XOR logic op: %x", gstate.getLogicOp());
break;
case GE_LOGIC_OR:
case GE_LOGIC_OR_INVERTED:
// Inverted in shader.
srcBlend = BlendFactor::ONE;
dstBlend = BlendFactor::ONE;
blendEq = BlendEq::ADD;
WARN_LOG_REPORT_ONCE(d3dLogicOpOr, G3D, "Attempted or for logic op: %x", gstate.getLogicOp());
return true;
case GE_LOGIC_OR_REVERSE:
WARN_LOG_REPORT_ONCE(d3dLogicOpOrReverse, G3D, "Unsupported OR REVERSE logic op: %x", gstate.getLogicOp());
break;
case GE_LOGIC_SET:
srcBlend = BlendFactor::ONE;
dstBlend = BlendFactor::ONE;
blendEq = BlendEq::ADD;
WARN_LOG_REPORT_ONCE(d3dLogicOpSet, G3D, "Attempted set for logic op: %x", gstate.getLogicOp());
return true;
}
if (!gstate_c.Supports(GPU_SUPPORTS_LOGIC_OP) && gstate.isLogicOpEnabled()) {
switch (gstate.getLogicOp()) {
case GE_LOGIC_CLEAR:
srcBlend = BlendFactor::ZERO;
dstBlend = BlendFactor::ZERO;
blendEq = BlendEq::ADD;
return true;
case GE_LOGIC_AND:
case GE_LOGIC_AND_REVERSE:
WARN_LOG_REPORT_ONCE(d3dLogicOpAnd, G3D, "Unsupported AND logic op: %x", gstate.getLogicOp());
break;
case GE_LOGIC_COPY:
// This is the same as off.
break;
case GE_LOGIC_COPY_INVERTED:
// Handled in the shader.
break;
case GE_LOGIC_AND_INVERTED:
case GE_LOGIC_NOR:
case GE_LOGIC_NAND:
case GE_LOGIC_EQUIV:
// Handled in the shader.
WARN_LOG_REPORT_ONCE(d3dLogicOpAndInverted, G3D, "Attempted invert for logic op: %x", gstate.getLogicOp());
break;
case GE_LOGIC_INVERTED:
srcBlend = BlendFactor::ONE;
dstBlend = BlendFactor::ONE;
blendEq = BlendEq::SUBTRACT;
WARN_LOG_REPORT_ONCE(d3dLogicOpInverted, G3D, "Attempted inverse for logic op: %x", gstate.getLogicOp());
return true;
case GE_LOGIC_NOOP:
srcBlend = BlendFactor::ZERO;
dstBlend = BlendFactor::ONE;
blendEq = BlendEq::ADD;
return true;
case GE_LOGIC_XOR:
WARN_LOG_REPORT_ONCE(d3dLogicOpOrXor, G3D, "Unsupported XOR logic op: %x", gstate.getLogicOp());
break;
case GE_LOGIC_OR:
case GE_LOGIC_OR_INVERTED:
// Inverted in shader.
srcBlend = BlendFactor::ONE;
dstBlend = BlendFactor::ONE;
blendEq = BlendEq::ADD;
WARN_LOG_REPORT_ONCE(d3dLogicOpOr, G3D, "Attempted or for logic op: %x", gstate.getLogicOp());
return true;
case GE_LOGIC_OR_REVERSE:
WARN_LOG_REPORT_ONCE(d3dLogicOpOrReverse, G3D, "Unsupported OR REVERSE logic op: %x", gstate.getLogicOp());
break;
case GE_LOGIC_SET:
srcBlend = BlendFactor::ONE;
dstBlend = BlendFactor::ONE;
blendEq = BlendEq::ADD;
WARN_LOG_REPORT_ONCE(d3dLogicOpSet, G3D, "Attempted set for logic op: %x", gstate.getLogicOp());
return true;
}
}
return false;
}
@ -1080,6 +1078,12 @@ static void ConvertBlendState(GenericBlendState &blendState, bool forceReplaceBl
case REPLACE_BLEND_NO:
// We may still want to do something about stencil -> alpha.
ApplyStencilReplaceAndLogicOpIgnoreBlend(replaceAlphaWithStencil, blendState);
if (forceReplaceBlend) {
// If this is true, the logic and mask replacements will be applied, at least. In that case,
// we should not apply any logic op simulation.
blendState.simulateLogicOpType = LOGICOPTYPE_NORMAL;
}
return;
case REPLACE_BLEND_BLUE_TO_ALPHA:

View File

@ -349,13 +349,10 @@ void ComputeFragmentShaderID(FShaderID *id_out, const ComputedPipelineState &pip
id.SetBit(FS_BIT_COLOR_WRITEMASK, colorWriteMask);
if (g_Config.bVendorBugChecksEnabled) {
if (bugs.Has(Draw::Bugs::NO_DEPTH_CANNOT_DISCARD_STENCIL)) {
id.SetBit(FS_BIT_NO_DEPTH_CANNOT_DISCARD_STENCIL, !IsStencilTestOutputDisabled() && !gstate.isDepthWriteEnabled());
} else if (bugs.Has(Draw::Bugs::MALI_STENCIL_DISCARD_BUG) && PSP_CoreParameter().compat.flags().MaliDepthStencilBugWorkaround) {
// Very similar driver bug to the Adreno one, with the same workaround (though might look into if there are cheaper ones!)
// Keeping the conditions separate since it can probably be made tighter.
id.SetBit(FS_BIT_NO_DEPTH_CANNOT_DISCARD_STENCIL, !IsStencilTestOutputDisabled() && (!gstate.isDepthTestEnabled() || !gstate.isDepthWriteEnabled()));
if (g_Config.bVendorBugChecksEnabled && bugs.Has(Draw::Bugs::NO_DEPTH_CANNOT_DISCARD_STENCIL)) {
bool stencilWithoutDepth = !IsStencilTestOutputDisabled() && (!gstate.isDepthTestEnabled() || !gstate.isDepthWriteEnabled());
if (stencilWithoutDepth) {
id.SetBit(FS_BIT_NO_DEPTH_CANNOT_DISCARD_STENCIL, stencilWithoutDepth);
}
}
}

View File

@ -171,6 +171,7 @@ bool FramebufferManagerCommon::PerformStencilUpload(u32 addr, int size, StencilU
break;
case GE_FORMAT_INVALID:
case GE_FORMAT_DEPTH16:
case GE_FORMAT_CLUT8:
// Inconceivable.
_assert_(false);
break;

View File

@ -37,6 +37,7 @@
#include "GPU/Common/ShaderId.h"
#include "GPU/Common/GPUStateUtils.h"
#include "GPU/Debugger/Debugger.h"
#include "GPU/Debugger/Record.h"
#include "GPU/GPUCommon.h"
#include "GPU/GPUInterface.h"
#include "GPU/GPUState.h"
@ -292,11 +293,18 @@ SamplerCacheKey TextureCacheCommon::GetSamplingParams(int maxLevel, const TexCac
SamplerCacheKey TextureCacheCommon::GetFramebufferSamplingParams(u16 bufferWidth, u16 bufferHeight) {
SamplerCacheKey key = GetSamplingParams(0, nullptr);
// In case auto max quality was on, restore min filt. Another fix for water in Outrun.
if (g_Config.iTexFiltering == TEX_FILTER_AUTO_MAX_QUALITY) {
int minFilt = gstate.texfilter & 0x7;
key.minFilt = minFilt & 1;
}
// Kill any mipmapping settings.
key.mipEnable = false;
key.mipFilt = false;
key.aniso = 0.0;
key.maxLevel = 0.0f;
key.lodBias = 0.0f;
// Often the framebuffer will not match the texture size. We'll wrap/clamp in the shader in that case.
int w = gstate.getTextureWidth(0);
@ -1260,14 +1268,17 @@ void TextureCacheCommon::LoadClut(u32 clutAddr, u32 loadBytes) {
// It's possible for a game to load CLUT outside valid memory without crashing, should result in zeroes.
u32 bytes = Memory::ValidSize(clutAddr, loadBytes);
if (clutRenderAddress_ != 0xFFFFFFFF && PSP_CoreParameter().compat.flags().AllowDownloadCLUT) {
bool performDownload = PSP_CoreParameter().compat.flags().AllowDownloadCLUT;
if (GPURecord::IsActive())
performDownload = true;
if (clutRenderAddress_ != 0xFFFFFFFF && performDownload) {
framebufferManager_->DownloadFramebufferForClut(clutRenderAddress_, clutRenderOffset_ + bytes);
Memory::MemcpyUnchecked(clutBufRaw_, clutAddr, bytes);
if (bytes < loadBytes) {
memset((u8 *)clutBufRaw_ + bytes, 0x00, loadBytes - bytes);
}
} else {
// Here we could check for clutRenderAddres_ != 0xFFFFFFFF and zero the CLUT or something,
// Here we could check for clutRenderAddress_ != 0xFFFFFFFF and zero the CLUT or something,
// but choosing not to for now. Though the results of loading the CLUT from RAM here is
// almost certainly going to be bogus.
#ifdef _M_SSE
@ -1986,6 +1997,9 @@ static bool CanDepalettize(GETextureFormat texFormat, GEBufferFormat bufferForma
return true;
}
break;
case GE_FORMAT_CLUT8:
// Shouldn't happen here.
return false;
}
WARN_LOG(G3D, "Invalid CLUT/framebuffer combination: %s vs %s", GeTextureFormatToString(texFormat), GeBufferFormatToString(bufferFormat));
return false;

View File

@ -872,22 +872,14 @@ void VertexDecoderJitCache::Jit_NormalFloat() {
STMIA(scratchReg, false, 3, tempReg1, tempReg2, tempReg3);
}
// Through expands into floats, always. Might want to look at changing this.
void VertexDecoderJitCache::Jit_PosS8Through() {
DEBUG_LOG_REPORT_ONCE(vertexS8Through, G3D, "Using S8 positions in throughmode");
_dbg_assert_msg_(fpScratchReg + 1 == fpScratchReg2, "VertexDecoder fpScratchRegs must be in order.");
_dbg_assert_msg_(fpScratchReg2 + 1 == fpScratchReg3, "VertexDecoder fpScratchRegs must be in order.");
// TODO: SIMD
LDRSB(tempReg1, srcReg, dec_->posoff);
LDRSB(tempReg2, srcReg, dec_->posoff + 1);
LDRB(tempReg3, srcReg, dec_->posoff + 2);
static const ARMReg tr[3] = { tempReg1, tempReg2, tempReg3 };
static const ARMReg fr[3] = { fpScratchReg, fpScratchReg2, fpScratchReg3 };
// 8-bit positions in throughmode always decode to 0, depth included.
VEOR(neonScratchReg, neonScratchReg, neonScratchReg);
VEOR(neonScratchReg2, neonScratchReg, neonScratchReg);
ADD(scratchReg, dstReg, dec_->decFmt.posoff);
VMOV(neonScratchReg, tempReg1, tempReg2);
VMOV(neonScratchReg2, tempReg3, tempReg3);
VCVT(F_32 | I_SIGNED, neonScratchRegQ, neonScratchRegQ);
VST1(F_32, neonScratchReg, scratchReg, 2, ALIGN_NONE);
}

View File

@ -668,15 +668,11 @@ void VertexDecoderJitCache::Jit_PosFloat() {
}
void VertexDecoderJitCache::Jit_PosS8Through() {
LDRSB(INDEX_UNSIGNED, tempReg1, srcReg, dec_->posoff);
LDRSB(INDEX_UNSIGNED, tempReg2, srcReg, dec_->posoff + 1);
LDRB(INDEX_UNSIGNED, tempReg3, srcReg, dec_->posoff + 2);
fp.SCVTF(fpScratchReg, tempReg1);
fp.SCVTF(fpScratchReg2, tempReg2);
fp.SCVTF(fpScratchReg3, tempReg3);
// 8-bit positions in throughmode always decode to 0, depth included.
fp.EOR(fpScratchReg, fpScratchReg, fpScratchReg);
STR(INDEX_UNSIGNED, fpScratchReg, dstReg, dec_->decFmt.posoff);
STR(INDEX_UNSIGNED, fpScratchReg2, dstReg, dec_->decFmt.posoff + 4);
STR(INDEX_UNSIGNED, fpScratchReg3, dstReg, dec_->decFmt.posoff + 8);
STR(INDEX_UNSIGNED, fpScratchReg, dstReg, dec_->decFmt.posoff + 4);
STR(INDEX_UNSIGNED, fpScratchReg, dstReg, dec_->decFmt.posoff + 8);
}
void VertexDecoderJitCache::Jit_PosS16Through() {

View File

@ -773,14 +773,20 @@ void VertexDecoder::Step_PosFloatSkin() const
Vec3ByMatrix43(pos, fn, skinMatrix);
}
void VertexDecoder::Step_PosS8Through() const
{
void VertexDecoder::Step_PosInvalid() const {
// Invalid positions are just culled. Simulate by forcing invalid values.
float *v = (float *)(decoded_ + decFmt.posoff);
const s8 *sv = (const s8 *)(ptr_ + posoff);
const u8 *uv = (const u8 *)(ptr_ + posoff);
v[0] = sv[0];
v[1] = sv[1];
v[2] = uv[2];
v[0] = std::numeric_limits<float>::infinity();
v[1] = std::numeric_limits<float>::infinity();
v[2] = std::numeric_limits<float>::infinity();
}
void VertexDecoder::Step_PosS8Through() const {
// 8-bit positions in throughmode always decode to 0, depth included.
float *v = (float *)(decoded_ + decFmt.posoff);
v[0] = 0;
v[1] = 0;
v[2] = 0;
}
void VertexDecoder::Step_PosS16Through() const
@ -1023,35 +1029,35 @@ static const StepFunction nrmstep_morphskin[4] = {
};
static const StepFunction posstep[4] = {
&VertexDecoder::Step_PosS8,
&VertexDecoder::Step_PosInvalid,
&VertexDecoder::Step_PosS8,
&VertexDecoder::Step_PosS16,
&VertexDecoder::Step_PosFloat,
};
static const StepFunction posstep_skin[4] = {
&VertexDecoder::Step_PosS8Skin,
&VertexDecoder::Step_PosInvalid,
&VertexDecoder::Step_PosS8Skin,
&VertexDecoder::Step_PosS16Skin,
&VertexDecoder::Step_PosFloatSkin,
};
static const StepFunction posstep_morph[4] = {
&VertexDecoder::Step_PosS8Morph,
&VertexDecoder::Step_PosInvalid,
&VertexDecoder::Step_PosS8Morph,
&VertexDecoder::Step_PosS16Morph,
&VertexDecoder::Step_PosFloatMorph,
};
static const StepFunction posstep_morph_skin[4] = {
&VertexDecoder::Step_PosS8MorphSkin,
&VertexDecoder::Step_PosInvalid,
&VertexDecoder::Step_PosS8MorphSkin,
&VertexDecoder::Step_PosS16MorphSkin,
&VertexDecoder::Step_PosFloatMorphSkin,
};
static const StepFunction posstep_through[4] = {
&VertexDecoder::Step_PosS8Through,
&VertexDecoder::Step_PosInvalid,
&VertexDecoder::Step_PosS8Through,
&VertexDecoder::Step_PosS16Through,
&VertexDecoder::Step_PosFloatThrough,
@ -1224,9 +1230,8 @@ void VertexDecoder::SetVertexType(u32 fmt, const VertexDecoderOptions &options,
bool reportNoPos = false;
if (!pos) {
reportNoPos = true;
pos = 1;
}
if (pos) { // there's always a position
if (pos >= 0) { // there's always a position
size = align(size, posalign[pos]);
posoff = size;
size += possize[pos];

View File

@ -433,6 +433,7 @@ public:
void Step_PosS16MorphSkin() const;
void Step_PosFloatMorphSkin() const;
void Step_PosInvalid() const;
void Step_PosS8Through() const;
void Step_PosS16Through() const;
void Step_PosFloatThrough() const;

View File

@ -1345,14 +1345,9 @@ void VertexDecoderJitCache::Jit_NormalFloatSkin() {
// Through expands into floats, always. Might want to look at changing this.
void VertexDecoderJitCache::Jit_PosS8Through() {
DEBUG_LOG_REPORT_ONCE(vertexS8Through, G3D, "Using S8 positions in throughmode");
// SIMD doesn't really matter since this isn't useful on hardware.
XORPS(fpScratchReg, R(fpScratchReg));
for (int i = 0; i < 3; i++) {
if (i == 2)
MOVZX(32, 8, tempReg1, MDisp(srcReg, dec_->posoff + i));
else
MOVSX(32, 8, tempReg1, MDisp(srcReg, dec_->posoff + i));
CVTSI2SS(fpScratchReg, R(tempReg1));
MOVSS(MDisp(dstReg, dec_->decFmt.posoff + i * 4), fpScratchReg);
}
}

View File

@ -142,10 +142,11 @@ bool GenerateVertexShader(const VShaderID &id, char *buffer, const ShaderLanguag
if (gl_extensions.EXT_gpu_shader4) {
gl_exts.push_back("#extension GL_EXT_gpu_shader4 : enable");
}
if (gl_extensions.EXT_clip_cull_distance && id.Bit(VS_BIT_VERTEX_RANGE_CULLING)) {
bool useClamp = gstate_c.Supports(GPU_SUPPORTS_DEPTH_CLAMP) && !id.Bit(VS_BIT_IS_THROUGH);
if (gl_extensions.EXT_clip_cull_distance && (id.Bit(VS_BIT_VERTEX_RANGE_CULLING) || useClamp)) {
gl_exts.push_back("#extension GL_EXT_clip_cull_distance : enable");
}
if (gl_extensions.APPLE_clip_distance && id.Bit(VS_BIT_VERTEX_RANGE_CULLING)) {
if (gl_extensions.APPLE_clip_distance && (id.Bit(VS_BIT_VERTEX_RANGE_CULLING) || useClamp)) {
gl_exts.push_back("#extension GL_APPLE_clip_distance : enable");
}
if (gl_extensions.ARB_cull_distance && id.Bit(VS_BIT_VERTEX_RANGE_CULLING)) {
@ -227,6 +228,10 @@ bool GenerateVertexShader(const VShaderID &id, char *buffer, const ShaderLanguag
bool texCoordInVec3 = false;
bool vertexRangeCulling = id.Bit(VS_BIT_VERTEX_RANGE_CULLING) && !isModeThrough;
bool clipClampedDepth = !isModeThrough && gstate_c.Supports(GPU_SUPPORTS_DEPTH_CLAMP) && gstate_c.Supports(GPU_SUPPORTS_CLIP_DISTANCE);
const char *vertexRangeClipSuffix = "[0]";
if (vertexRangeCulling && clipClampedDepth)
vertexRangeClipSuffix = "[2]";
if (compat.shaderLanguage == GLSL_VULKAN) {
WRITE(p, "\n");
@ -419,8 +424,15 @@ bool GenerateVertexShader(const VShaderID &id, char *buffer, const ShaderLanguag
WRITE(p, " vec4 gl_Position : POSITION;\n");
} else {
WRITE(p, " vec4 gl_Position : SV_Position;\n");
if (vertexRangeCulling && gstate_c.Supports(GPU_SUPPORTS_CLIP_DISTANCE)) {
WRITE(p, " float gl_ClipDistance : SV_ClipDistance0;\n");
bool clipRange = vertexRangeCulling && gstate_c.Supports(GPU_SUPPORTS_CLIP_DISTANCE);
if (clipClampedDepth && clipRange) {
WRITE(p, " float3 gl_ClipDistance : SV_ClipDistance;\n");
vertexRangeClipSuffix = ".z";
} else if (clipClampedDepth) {
WRITE(p, " float2 gl_ClipDistance : SV_ClipDistance;\n");
} else if (clipRange) {
WRITE(p, " float gl_ClipDistance : SV_ClipDistance;\n");
vertexRangeClipSuffix = "";
}
if (vertexRangeCulling && gstate_c.Supports(GPU_SUPPORTS_CULL_DISTANCE)) {
WRITE(p, " float2 gl_CullDistance : SV_CullDistance0;\n");
@ -1177,8 +1189,37 @@ bool GenerateVertexShader(const VShaderID &id, char *buffer, const ShaderLanguag
WRITE(p, " %sv_fogdepth = (viewPos.z + u_fogcoef.x) * u_fogcoef.y;\n", compat.vsOutPrefix);
}
if (vertexRangeCulling && !IsVRBuild()) {
if (clipClampedDepth || (vertexRangeCulling && !IsVRBuild())) {
WRITE(p, " vec3 projPos = outPos.xyz / outPos.w;\n");
}
if (clipClampedDepth) {
const char *clip0 = compat.shaderLanguage == HLSL_D3D11 ? ".x" : "[0]";
const char *clip1 = compat.shaderLanguage == HLSL_D3D11 ? ".y" : "[1]";
WRITE(p, " mediump float integerZ = projPos.z * u_depthRange.x + u_depthRange.y;\n");
// This should clip against minz, but only when it's above zero.
if (ShaderLanguageIsOpenGL(compat.shaderLanguage)) {
// On OpenGL/GLES, these values account for the -1 -> 1 range.
WRITE(p, " if (u_depthRange.y - u_depthRange.x >= 1.0) {\n");
} else {
// Everywhere else, it's 0 -> 1, simpler.
WRITE(p, " if (u_depthRange.y >= 1.0) {\n");
}
WRITE(p, " %sgl_ClipDistance%s = integerZ;\n", compat.vsOutPrefix, clip0);
WRITE(p, " } else {\n");
WRITE(p, " %sgl_ClipDistance%s = 0.0;\n", compat.vsOutPrefix, clip0);
WRITE(p, " }\n");
// This is similar, but for maxz when it's below 65535.0. -1/0 don't matter here.
WRITE(p, " if (u_depthRange.x + u_depthRange.y <= 65534.0) {\n");
WRITE(p, " %sgl_ClipDistance%s = 65535.0 - integerZ;\n", compat.vsOutPrefix, clip1);
WRITE(p, " } else {\n");
WRITE(p, " %sgl_ClipDistance%s = 0.0;\n", compat.vsOutPrefix, clip1);
WRITE(p, " }\n");
}
if (vertexRangeCulling && !IsVRBuild()) {
WRITE(p, " float projZ = (projPos.z - u_depthRange.z) * u_depthRange.w;\n");
// Vertex range culling doesn't happen when Z clips, note sign of w is important.
WRITE(p, " if (u_cullRangeMin.w <= 0.0 || projZ * outPos.w > -outPos.w) {\n");
@ -1194,12 +1235,11 @@ bool GenerateVertexShader(const VShaderID &id, char *buffer, const ShaderLanguag
WRITE(p, " }\n");
WRITE(p, " }\n");
const char *clip0 = compat.shaderLanguage == HLSL_D3D11 ? "" : "[0]";
const char *cull0 = compat.shaderLanguage == HLSL_D3D11 ? ".x" : "[0]";
const char *cull1 = compat.shaderLanguage == HLSL_D3D11 ? ".y" : "[1]";
if (gstate_c.Supports(GPU_SUPPORTS_CLIP_DISTANCE)) {
// TODO: Not rectangles...
WRITE(p, " %sgl_ClipDistance%s = projZ * outPos.w + outPos.w;\n", compat.vsOutPrefix, clip0);
WRITE(p, " %sgl_ClipDistance%s = projZ * outPos.w + outPos.w;\n", compat.vsOutPrefix, vertexRangeClipSuffix);
}
if (gstate_c.Supports(GPU_SUPPORTS_CULL_DISTANCE)) {
// Cull any triangle fully outside in the same direction when depth clamp enabled.

View File

@ -82,7 +82,7 @@ GPU_D3D11::GPU_D3D11(GraphicsContext *gfxCtx, Draw::DrawContext *draw)
// No need to flush before the tex scale/offset commands if we are baking
// the tex scale/offset into the vertices anyway.
UpdateCmdInfo();
CheckGPUFeatures();
gstate_c.featureFlags = CheckGPUFeatures();
BuildReportingInfo();
@ -100,40 +100,16 @@ GPU_D3D11::~GPU_D3D11() {
stockD3D11.Destroy();
}
void GPU_D3D11::CheckGPUFeatures() {
u32 features = 0;
features |= GPU_SUPPORTS_BLEND_MINMAX;
u32 GPU_D3D11::CheckGPUFeatures() const {
u32 features = GPUCommon::CheckGPUFeatures();
// Accurate depth is required because the Direct3D API does not support inverse Z.
// So we cannot incorrectly use the viewport transform as the depth range on Direct3D.
// TODO: Breaks text in PaRappa for some reason?
features |= GPU_SUPPORTS_ACCURATE_DEPTH;
#ifndef _M_ARM
// TODO: Do proper feature detection
features |= GPU_SUPPORTS_ANISOTROPY;
#endif
features |= GPU_SUPPORTS_DEPTH_TEXTURE;
features |= GPU_SUPPORTS_TEXTURE_NPOT;
if (draw_->GetDeviceCaps().dualSourceBlend)
features |= GPU_SUPPORTS_DUALSOURCE_BLEND;
if (draw_->GetDeviceCaps().depthClampSupported)
features |= GPU_SUPPORTS_DEPTH_CLAMP;
if (draw_->GetDeviceCaps().clipDistanceSupported)
features |= GPU_SUPPORTS_CLIP_DISTANCE;
if (draw_->GetDeviceCaps().cullDistanceSupported)
features |= GPU_SUPPORTS_CULL_DISTANCE;
if (!draw_->GetBugs().Has(Draw::Bugs::BROKEN_NAN_IN_CONDITIONAL)) {
// Ignore the compat setting if clip and cull are both enabled.
// When supported, we can do the depth side of range culling more correctly.
const bool supported = draw_->GetDeviceCaps().clipDistanceSupported && draw_->GetDeviceCaps().cullDistanceSupported;
const bool disabled = PSP_CoreParameter().compat.flags().DisableRangeCulling;
if (supported || !disabled) {
features |= GPU_SUPPORTS_VS_RANGE_CULLING;
}
}
features |= GPU_SUPPORTS_TEXTURE_FLOAT;
features |= GPU_SUPPORTS_INSTANCE_RENDERING;
@ -146,10 +122,6 @@ void GPU_D3D11::CheckGPUFeatures() {
features |= GPU_SUPPORTS_16BIT_FORMATS;
}
if (draw_->GetDeviceCaps().logicOpSupported) {
features |= GPU_SUPPORTS_LOGIC_OP;
}
if (!g_Config.bHighQualityDepth && (features & GPU_SUPPORTS_ACCURATE_DEPTH) != 0) {
features |= GPU_SCALE_DEPTH_FROM_24BIT_TO_16BIT;
} else if (PSP_CoreParameter().compat.flags().PixelDepthRounding) {
@ -164,11 +136,7 @@ void GPU_D3D11::CheckGPUFeatures() {
features |= GPU_USE_DEPTH_RANGE_HACK;
}
if (PSP_CoreParameter().compat.flags().ClearToRAM) {
features |= GPU_USE_CLEAR_RAM_HACK;
}
gstate_c.featureFlags = features;
return features;
}
// Needs to be called on GPU thread, not reporting thread.
@ -206,7 +174,7 @@ void GPU_D3D11::BeginHostFrame() {
GPUCommon::BeginHostFrame();
UpdateCmdInfo();
if (resized_) {
CheckGPUFeatures();
gstate_c.featureFlags = CheckGPUFeatures();
framebufferManager_->Resized();
drawEngine_.Resized();
textureCache_->NotifyConfigChanged();

View File

@ -36,7 +36,7 @@ public:
GPU_D3D11(GraphicsContext *gfxCtx, Draw::DrawContext *draw);
~GPU_D3D11();
void CheckGPUFeatures() override;
u32 CheckGPUFeatures() const override;
void PreExecuteOp(u32 op, u32 diff) override;
void ExecuteOp(u32 op, u32 diff) override;

View File

@ -153,15 +153,16 @@ void DrawEngineD3D11::ApplyDrawState(int prim) {
// We ignore the logicState on D3D since there's no support, the emulation of it is blend-and-shader only.
if (pipelineState_.FramebufferRead()) {
bool fboTexNeedsBind = false;
ApplyFramebufferRead(&fboTexNeedsBind);
FBOTexState fboTexBindState = FBO_TEX_NONE;
ApplyFramebufferRead(&fboTexBindState);
// The shader takes over the responsibility for blending, so recompute.
ApplyStencilReplaceAndLogicOpIgnoreBlend(blendState.replaceAlphaWithStencil, blendState);
if (fboTexNeedsBind) {
if (fboTexBindState == FBO_TEX_COPY_BIND_TEX) {
framebufferManager_->BindFramebufferAsColorTexture(1, framebufferManager_->GetCurrentRenderVFB(), BINDFBCOLOR_MAY_COPY);
// No sampler required, we do a plain Load in the pixel shader.
fboTexBound_ = true;
fboTexBindState = FBO_TEX_NONE;
framebufferManager_->RebindFramebuffer("RebindFramebuffer - ApplyDrawState");
// Must dirty blend state here so we re-copy next time. Example: Lunar's spell effects.

View File

@ -18,6 +18,7 @@
#include <vector>
#include "Common/Log.h"
#include "Common/StringUtils.h"
#include "Common/TimeUtil.h"
#include "GPU/GPU.h"
#include "GPU/Debugger/Breakpoints.h"
#include "GPU/Debugger/Debugger.h"
@ -35,6 +36,8 @@ static int primsLastFrame = 0;
static int primsThisFrame = 0;
static int thisFlipNum = 0;
static double lastStepTime = -1.0;
static std::vector<std::pair<int, int>> restrictPrimRanges;
static std::string restrictPrimRule;
@ -56,6 +59,7 @@ void SetActive(bool flag) {
breakNext = BreakNext::NONE;
breakAtCount = -1;
GPUStepping::ResumeFromStepping();
lastStepTime = -1.0;
}
}
@ -79,6 +83,7 @@ void SetBreakNext(BreakNext next) {
GPUBreakpoints::AddCmdBreakpoint(GE_CMD_SPLINE, true);
}
GPUStepping::ResumeFromStepping();
lastStepTime = next == BreakNext::NONE ? -1.0 : time_now_d();
}
void SetBreakCount(int c, bool relative) {
@ -130,7 +135,12 @@ bool NotifyCommand(u32 pc) {
GPUBreakpoints::ClearTempBreakpoints();
auto info = gpuDebug->DissassembleOp(pc);
NOTICE_LOG(G3D, "Waiting at %08x, %s", pc, info.desc.c_str());
if (lastStepTime >= 0.0) {
NOTICE_LOG(G3D, "Waiting at %08x, %s (%fms)", pc, info.desc.c_str(), (time_now_d() - lastStepTime) * 1000.0);
lastStepTime = -1.0;
} else {
NOTICE_LOG(G3D, "Waiting at %08x, %s", pc, info.desc.c_str());
}
GPUStepping::EnterStepping();
}
@ -141,7 +151,12 @@ void NotifyDraw() {
if (!active)
return;
if (breakNext == BreakNext::DRAW && !GPUStepping::IsStepping()) {
NOTICE_LOG(G3D, "Waiting at a draw");
if (lastStepTime >= 0.0) {
NOTICE_LOG(G3D, "Waiting at a draw (%fms)", (time_now_d() - lastStepTime) * 1000.0);
lastStepTime = -1.0;
} else {
NOTICE_LOG(G3D, "Waiting at a draw");
}
GPUStepping::EnterStepping();
}
}

View File

@ -298,6 +298,7 @@ private:
void Registers(u32 ptr, u32 sz);
void Vertices(u32 ptr, u32 sz);
void Indices(u32 ptr, u32 sz);
void ClutAddr(u32 ptr, u32 sz);
void Clut(u32 ptr, u32 sz);
void TransferSrc(u32 ptr, u32 sz);
void Memset(u32 ptr, u32 sz);
@ -308,6 +309,8 @@ private:
void Display(u32 ptr, u32 sz);
u32 execMemcpyDest = 0;
u32 execClutAddr = 0;
u32 execClutFlags = 0;
u32 execListBuf = 0;
u32 execListPos = 0;
u32 execListID = 0;
@ -472,15 +475,40 @@ void DumpExecute::Indices(u32 ptr, u32 sz) {
execListQueue.push_back((GE_CMD_IADDR << 24) | (psp & 0x00FFFFFF));
}
void DumpExecute::Clut(u32 ptr, u32 sz) {
u32 psp = mapping_.Map(ptr, sz, std::bind(&DumpExecute::SyncStall, this));
if (psp == 0) {
ERROR_LOG(SYSTEM, "Unable to allocate for clut");
return;
}
void DumpExecute::ClutAddr(u32 ptr, u32 sz) {
struct ClutAddrData {
u32 addr;
u32 flags;
};
const ClutAddrData *data = (const ClutAddrData *)(pushbuf_.data() + ptr);
execClutAddr = data->addr;
execClutFlags = data->flags;
}
execListQueue.push_back((GE_CMD_CLUTADDRUPPER << 24) | ((psp >> 8) & 0x00FF0000));
execListQueue.push_back((GE_CMD_CLUTADDR << 24) | (psp & 0x00FFFFFF));
void DumpExecute::Clut(u32 ptr, u32 sz) {
// This is always run when we have the actual address set.
if (execClutAddr != 0) {
const bool isTarget = (execClutFlags & 1) != 0;
const bool unchangedVRAM = (execClutFlags & 2) != 0;
// TODO: Could use drawnVRAM flag, but it can be wrong.
// Could potentially always skip if !isTarget, but playing it safe for offset texture behavior.
if (Memory::IsValidRange(execClutAddr, sz) && !unchangedVRAM && (!isTarget || !g_Config.bSoftwareRendering)) {
// Intentionally don't trigger an upload here.
Memory::MemcpyUnchecked(execClutAddr, pushbuf_.data() + ptr, sz);
}
execClutAddr = 0;
} else {
u32 psp = mapping_.Map(ptr, sz, std::bind(&DumpExecute::SyncStall, this));
if (psp == 0) {
ERROR_LOG(SYSTEM, "Unable to allocate for clut");
return;
}
execListQueue.push_back((GE_CMD_CLUTADDRUPPER << 24) | ((psp >> 8) & 0x00FF0000));
execListQueue.push_back((GE_CMD_CLUTADDR << 24) | (psp & 0x00FFFFFF));
}
}
void DumpExecute::TransferSrc(u32 ptr, u32 sz) {
@ -619,6 +647,10 @@ bool DumpExecute::Run() {
Indices(cmd.ptr, cmd.sz);
break;
case CommandType::CLUTADDR:
ClutAddr(cmd.ptr, cmd.sz);
break;
case CommandType::CLUT:
Clut(cmd.ptr, cmd.sz);
break;

View File

@ -37,6 +37,7 @@
#include "Core/MemMap.h"
#include "Core/System.h"
#include "Core/ThreadPools.h"
#include "GPU/Common/GPUDebugInterface.h"
#include "GPU/GPUInterface.h"
#include "GPU/GPUState.h"
#include "GPU/ge_constants.h"
@ -152,8 +153,19 @@ static void BeginRecording() {
u32 sz = 512 * 4;
pushbuf.resize(pushbuf.size() + sz);
gstate.Save((u32_le *)(pushbuf.data() + ptr));
commands.push_back({CommandType::INIT, sz, ptr});
// Also save the initial CLUT.
GPUDebugBuffer clut;
if (gpuDebug->GetCurrentClut(clut)) {
sz = clut.GetStride() * clut.PixelSize();
_assert_msg_(sz == 1024, "CLUT should be 1024 bytes");
ptr = (u32)pushbuf.size();
pushbuf.resize(pushbuf.size() + sz);
memcpy(pushbuf.data() + ptr, clut.GetData(), sz);
commands.push_back({ CommandType::CLUT, sz, ptr });
}
DirtyAllVRAM(DirtyVRAMFlag::DIRTY);
}
@ -308,6 +320,34 @@ static Command EmitCommandWithRAM(CommandType t, const void *p, u32 sz, u32 alig
return cmd;
}
static u32 GetTargetFlags(u32 addr, u32 sizeInRAM) {
const bool isTarget = lastRenderTargets.find(addr) != lastRenderTargets.end();
bool isDirtyVRAM = false;
bool isDrawnVRAM = false;
uint32_t start = (addr >> DIRTY_VRAM_SHIFT) & DIRTY_VRAM_MASK;
for (uint32_t i = 0; i < (sizeInRAM + DIRTY_VRAM_ROUND) >> DIRTY_VRAM_SHIFT; ++i) {
DirtyVRAMFlag flag = dirtyVRAM[start + i];
isDirtyVRAM = isDirtyVRAM || flag != DirtyVRAMFlag::CLEAN;
isDrawnVRAM = isDrawnVRAM || flag == DirtyVRAMFlag::DRAWN;
// Mark the VRAM clean now that it's been copied to VRAM.
if (flag == DirtyVRAMFlag::DIRTY)
dirtyVRAM[start + i] = DirtyVRAMFlag::CLEAN;
}
// The isTarget flag is mostly used for replay of dumps on a PSP.
u32 flags = isTarget ? 1 : 0;
// The unchangedVRAM flag tells us we can skip recopying.
if (!isDirtyVRAM)
flags |= 2;
// And the drawn flag tells us this data was potentially drawn to.
if (isDrawnVRAM)
flags |= 4;
return flags;
}
static void EmitTextureData(int level, u32 texaddr) {
GETextureFormat format = gstate.getTextureFormat();
int w = gstate.getTextureWidth(level);
@ -315,7 +355,6 @@ static void EmitTextureData(int level, u32 texaddr) {
int bufw = GetTextureBufw(level, texaddr, format);
int extraw = w > bufw ? w - bufw : 0;
u32 sizeInRAM = (textureBitsPerPixel[format] * (bufw * h + extraw)) / 8;
const bool isTarget = lastRenderTargets.find(texaddr) != lastRenderTargets.end();
CommandType type = CommandType((int)CommandType::TEXTURE0 + level);
const u8 *p = Memory::GetPointerUnchecked(texaddr);
@ -330,27 +369,7 @@ static void EmitTextureData(int level, u32 texaddr) {
u32 pad;
};
bool isDirtyVRAM = false;
bool isDrawnVRAM = false;
uint32_t start = (texaddr >> DIRTY_VRAM_SHIFT) & DIRTY_VRAM_MASK;
for (uint32_t i = 0; i < (sizeInRAM + DIRTY_VRAM_ROUND) >> DIRTY_VRAM_SHIFT; ++i) {
DirtyVRAMFlag flag = dirtyVRAM[start + i];
isDirtyVRAM = isDirtyVRAM || flag != DirtyVRAMFlag::CLEAN;
isDrawnVRAM = isDrawnVRAM || flag == DirtyVRAMFlag::DRAWN;
// Mark the VRAM clean now that it's been copied to VRAM.
if (flag == DirtyVRAMFlag::DIRTY)
dirtyVRAM[start + i] = DirtyVRAMFlag::CLEAN;
}
// The isTarget flag is mostly used for replay of dumps on a PSP.
u32 flags = isTarget ? 1 : 0;
// The unchangedVRAM flag tells us we can skip recopying.
if (!isDirtyVRAM)
flags |= 2;
// And the drawn flag tells us this data was potentially drawn to.
if (isDrawnVRAM)
flags |= 4;
u32 flags = GetTargetFlags(texaddr, sizeInRAM);
FramebufData framebuf{ texaddr, bufw, flags };
framebufData.resize(sizeof(framebuf) + bytes);
memcpy(&framebufData[0], &framebuf, sizeof(framebuf));
@ -456,12 +475,33 @@ static void EmitTransfer(u32 op) {
static void EmitClut(u32 op) {
u32 addr = gstate.getClutAddress();
// Hardware rendering may be using a framebuffer as CLUT.
// To get at this, we first run the command (normally we're called right before it has run.)
if (Memory::IsVRAMAddress(addr))
gpuDebug->SetCmdValue(op);
// Actually should only be 0x3F, but we allow enhanced CLUTs. See #15727.
u32 blocks = (op & 0x7F) == 0x40 ? 0x40 : (op & 0x3F);
u32 bytes = blocks * 32;
bytes = Memory::ValidSize(addr, bytes);
if (bytes != 0) {
// Send the original address so VRAM can be reasoned about.
if (Memory::IsVRAMAddress(addr)) {
struct ClutAddrData {
u32 addr;
u32 flags;
};
u32 flags = GetTargetFlags(addr, bytes);
ClutAddrData data{ addr, flags };
FlushRegisters();
Command cmd{CommandType::CLUTADDR, sizeof(data), (u32)pushbuf.size()};
pushbuf.resize(pushbuf.size() + sizeof(data));
memcpy(pushbuf.data() + cmd.ptr, &data, sizeof(data));
commands.push_back(cmd);
}
EmitCommandWithRAM(CommandType::CLUT, Memory::GetPointerUnchecked(addr), bytes, 16);
}

View File

@ -49,6 +49,7 @@ enum class CommandType : u8 {
MEMCPYDEST = 7,
MEMCPYDATA = 8,
DISPLAY = 9,
CLUTADDR = 10,
TEXTURE0 = 0x10,
TEXTURE1 = 0x11,

View File

@ -170,6 +170,8 @@ private:
// Hardware tessellation
TessellationDataTransferDX9 *tessDataTransferDX9;
FBOTexState fboTexBindState_ = FBO_TEX_NONE;
int lastRenderStepId_ = -1;
bool fboTexNeedsBind_ = false;

View File

@ -81,7 +81,7 @@ GPU_DX9::GPU_DX9(GraphicsContext *gfxCtx, Draw::DrawContext *draw)
// No need to flush before the tex scale/offset commands if we are baking
// the tex scale/offset into the vertices anyway.
UpdateCmdInfo();
CheckGPUFeatures();
gstate_c.featureFlags = CheckGPUFeatures();
BuildReportingInfo();
@ -98,68 +98,9 @@ GPU_DX9::GPU_DX9(GraphicsContext *gfxCtx, Draw::DrawContext *draw)
}
}
// TODO: Move this detection elsewhere when it's needed elsewhere, not before. It's ugly.
// Source: https://envytools.readthedocs.io/en/latest/hw/pciid.html#gf100
enum NVIDIAGeneration {
NV_PRE_KEPLER,
NV_KEPLER,
NV_MAXWELL,
NV_PASCAL,
NV_VOLTA,
NV_TURING, // or later
};
static NVIDIAGeneration NVIDIAGetDeviceGeneration(int deviceID) {
if (deviceID >= 0x1180 && deviceID <= 0x11bf)
return NV_KEPLER; // GK104
if (deviceID >= 0x11c0 && deviceID <= 0x11fa)
return NV_KEPLER; // GK106
if (deviceID >= 0x0fc0 && deviceID <= 0x0fff)
return NV_KEPLER; // GK107
if (deviceID >= 0x1003 && deviceID <= 0x1028)
return NV_KEPLER; // GK110(B)
if (deviceID >= 0x1280 && deviceID <= 0x12ba)
return NV_KEPLER; // GK208
if (deviceID >= 0x1381 && deviceID <= 0x13b0)
return NV_MAXWELL; // GM107
if (deviceID >= 0x1340 && deviceID <= 0x134d)
return NV_MAXWELL; // GM108
if (deviceID >= 0x13c0 && deviceID <= 0x13d9)
return NV_MAXWELL; // GM204
if (deviceID >= 0x1401 && deviceID <= 0x1427)
return NV_MAXWELL; // GM206
if (deviceID >= 0x15f7 && deviceID <= 0x15f9)
return NV_PASCAL; // GP100
if (deviceID >= 0x15f7 && deviceID <= 0x15f9)
return NV_PASCAL; // GP100
if (deviceID >= 0x1b00 && deviceID <= 0x1b38)
return NV_PASCAL; // GP102
if (deviceID >= 0x1b80 && deviceID <= 0x1be1)
return NV_PASCAL; // GP104
if (deviceID >= 0x1c02 && deviceID <= 0x1c62)
return NV_PASCAL; // GP106
if (deviceID >= 0x1c81 && deviceID <= 0x1c92)
return NV_PASCAL; // GP107
if (deviceID >= 0x1d01 && deviceID <= 0x1d12)
return NV_PASCAL; // GP108
if (deviceID >= 0x1d81 && deviceID <= 0x1dba)
return NV_VOLTA; // GV100
if (deviceID >= 0x1e02 && deviceID <= 0x1e3c)
return NV_TURING; // TU102
if (deviceID >= 0x1e82 && deviceID <= 0x1ed0)
return NV_TURING; // TU104
if (deviceID >= 0x1f02 && deviceID <= 0x1f51)
return NV_TURING; // TU104
if (deviceID >= 0x1e02)
return NV_TURING; // More TU models or later, probably.
return NV_PRE_KEPLER;
}
void GPU_DX9::CheckGPUFeatures() {
u32 features = 0;
u32 GPU_DX9::CheckGPUFeatures() const {
u32 features = GPUCommon::CheckGPUFeatures();
features |= GPU_SUPPORTS_16BIT_FORMATS;
features |= GPU_SUPPORTS_BLEND_MINMAX;
features |= GPU_SUPPORTS_DEPTH_TEXTURE;
features |= GPU_SUPPORTS_TEXTURE_LOD_CONTROL;
// Accurate depth is required because the Direct3D API does not support inverse Z.
@ -168,41 +109,6 @@ void GPU_DX9::CheckGPUFeatures() {
features |= GPU_SUPPORTS_ACCURATE_DEPTH;
auto vendor = draw_->GetDeviceCaps().vendor;
if (!PSP_CoreParameter().compat.flags().DisableRangeCulling) {
// VS range culling (killing triangles in the vertex shader using NaN) causes problems on Intel.
// Also causes problems on old NVIDIA.
switch (vendor) {
case Draw::GPUVendor::VENDOR_INTEL:
break;
case Draw::GPUVendor::VENDOR_NVIDIA:
// Older NVIDIAs don't seem to like NaNs in their DX9 vertex shaders.
// No idea if KEPLER is the right cutoff, but let's go with it.
if (NVIDIAGetDeviceGeneration(draw_->GetDeviceCaps().deviceID) >= NV_KEPLER) {
features |= GPU_SUPPORTS_VS_RANGE_CULLING;
}
break;
default:
features |= GPU_SUPPORTS_VS_RANGE_CULLING;
break;
}
}
D3DCAPS9 caps;
ZeroMemory(&caps, sizeof(caps));
HRESULT result = 0;
if (deviceEx_) {
result = deviceEx_->GetDeviceCaps(&caps);
} else {
result = device_->GetDeviceCaps(&caps);
}
if (FAILED(result)) {
WARN_LOG_REPORT(G3D, "Direct3D9: Failed to get the device caps!");
} else {
if ((caps.RasterCaps & D3DPRASTERCAPS_ANISOTROPY) != 0 && caps.MaxAnisotropy > 1)
features |= GPU_SUPPORTS_ANISOTROPY;
if ((caps.TextureCaps & (D3DPTEXTURECAPS_NONPOW2CONDITIONAL | D3DPTEXTURECAPS_POW2)) == 0)
features |= GPU_SUPPORTS_TEXTURE_NPOT;
}
if (!g_Config.bHighQualityDepth) {
features |= GPU_SCALE_DEPTH_FROM_24BIT_TO_16BIT;
@ -213,11 +119,7 @@ void GPU_DX9::CheckGPUFeatures() {
features |= GPU_ROUND_DEPTH_TO_16BIT;
}
if (PSP_CoreParameter().compat.flags().ClearToRAM) {
features |= GPU_USE_CLEAR_RAM_HACK;
}
gstate_c.featureFlags = features;
return features;
}
GPU_DX9::~GPU_DX9() {
@ -261,7 +163,7 @@ void GPU_DX9::BeginHostFrame() {
GPUCommon::BeginHostFrame();
UpdateCmdInfo();
if (resized_) {
CheckGPUFeatures();
gstate_c.featureFlags = CheckGPUFeatures();
framebufferManager_->Resized();
drawEngine_.Resized();
shaderManagerDX9_->DirtyShader();

View File

@ -35,7 +35,7 @@ public:
GPU_DX9(GraphicsContext *gfxCtx, Draw::DrawContext *draw);
~GPU_DX9();
void CheckGPUFeatures() override;
u32 CheckGPUFeatures() const override;
void PreExecuteOp(u32 op, u32 diff) override;
void ExecuteOp(u32 op, u32 diff) override;

View File

@ -99,14 +99,14 @@ void DrawEngineDX9::ApplyDrawState(int prim) {
if (!gstate.isModeClear()) {
textureCache_->ApplyTexture();
if (fboTexNeedsBind_) {
if (fboTexBindState_ = FBO_TEX_COPY_BIND_TEX) {
// Note that this is positions, not UVs, that we need the copy from.
framebufferManager_->BindFramebufferAsColorTexture(1, framebufferManager_->GetCurrentRenderVFB(), BINDFBCOLOR_MAY_COPY);
// If we are rendering at a higher resolution, linear is probably best for the dest color.
device_->SetSamplerState(1, D3DSAMP_MAGFILTER, D3DTEXF_LINEAR);
device_->SetSamplerState(1, D3DSAMP_MINFILTER, D3DTEXF_LINEAR);
fboTexBound_ = true;
fboTexNeedsBind_ = false;
fboTexBindState_ = FBO_TEX_NONE;
}
// TODO: Test texture?
@ -133,20 +133,23 @@ void DrawEngineDX9::ApplyDrawState(int prim) {
// We ignore the logicState on D3D since there's no support, the emulation of it is blend-and-shader only.
if (pipelineState_.FramebufferRead()) {
bool fboTexNeedsBind = false;
ApplyFramebufferRead(&fboTexNeedsBind);
ApplyFramebufferRead(&fboTexBindState_);
// The shader takes over the responsibility for blending, so recompute.
ApplyStencilReplaceAndLogicOpIgnoreBlend(blendState.replaceAlphaWithStencil, blendState);
if (fboTexNeedsBind) {
if (fboTexBindState_ == FBO_TEX_COPY_BIND_TEX) {
// Note that this is positions, not UVs, that we need the copy from.
framebufferManager_->BindFramebufferAsColorTexture(1, framebufferManager_->GetCurrentRenderVFB(), BINDFBCOLOR_MAY_COPY);
// If we are rendering at a higher resolution, linear is probably best for the dest color.
device_->SetSamplerState(1, D3DSAMP_MAGFILTER, D3DTEXF_LINEAR);
device_->SetSamplerState(1, D3DSAMP_MINFILTER, D3DTEXF_LINEAR);
fboTexBound_ = true;
fboTexBindState_ = FBO_TEX_NONE;
dirtyRequiresRecheck_ |= DIRTY_BLEND_STATE;
gstate_c.Dirty(DIRTY_BLEND_STATE);
} else if (fboTexBindState_ == FBO_TEX_READ_FRAMEBUFFER) {
// Not supported.
fboTexBindState_ = FBO_TEX_NONE;
}
dirtyRequiresRecheck_ |= DIRTY_FRAGMENTSHADER_STATE;

View File

@ -118,7 +118,8 @@ void FramebufferManagerGLES::PackDepthbuffer(VirtualFramebuffer *vfb, int x, int
queries.push_back({ &u_depthDownloadTo8, "u_depthTo8" });
std::vector<GLRProgram::Initializer> inits;
inits.push_back({ &u_depthDownloadTex, 0, TEX_SLOT_PSP_TEXTURE });
depthDownloadProgram_ = render->CreateProgram(shaders, semantics, queries, inits, false, false);
GLRProgramFlags flags{};
depthDownloadProgram_ = render->CreateProgram(shaders, semantics, queries, inits, flags);
for (auto iter : shaders) {
render->DeleteShader(iter);
}

View File

@ -54,7 +54,7 @@
GPU_GLES::GPU_GLES(GraphicsContext *gfxCtx, Draw::DrawContext *draw)
: GPUCommon(gfxCtx, draw), drawEngine_(draw), fragmentTestCache_(draw) {
UpdateVsyncInterval(true);
CheckGPUFeatures();
gstate_c.featureFlags = CheckGPUFeatures();
shaderManagerGL_ = new ShaderManagerGLES(draw);
framebufferManagerGL_ = new FramebufferManagerGLES(draw);
@ -148,42 +148,17 @@ GPU_GLES::~GPU_GLES() {
// Take the raw GL extension and versioning data and turn into feature flags.
// TODO: This should use DrawContext::GetDeviceCaps() more and more, and eventually
// this can be shared between all the backends.
void GPU_GLES::CheckGPUFeatures() {
u32 features = 0;
u32 GPU_GLES::CheckGPUFeatures() const {
u32 features = GPUCommon::CheckGPUFeatures();
features |= GPU_SUPPORTS_16BIT_FORMATS;
if (draw_->GetDeviceCaps().dualSourceBlend) {
if (!g_Config.bVendorBugChecksEnabled || !draw_->GetBugs().Has(Draw::Bugs::DUAL_SOURCE_BLENDING_BROKEN)) {
features |= GPU_SUPPORTS_DUALSOURCE_BLEND;
}
}
if (gl_extensions.EXT_shader_framebuffer_fetch || gl_extensions.ARM_shader_framebuffer_fetch) {
// This has caused problems in the past. Let's only enable on GLES3.
if (gl_extensions.GLES3) {
features |= GPU_SUPPORTS_ANY_FRAMEBUFFER_FETCH;
}
}
if ((gl_extensions.gpuVendor == GPU_VENDOR_NVIDIA) || (gl_extensions.gpuVendor == GPU_VENDOR_AMD))
features |= GPU_PREFER_REVERSE_COLOR_ORDER;
if (draw_->GetDeviceCaps().textureNPOTFullySupported)
features |= GPU_SUPPORTS_TEXTURE_NPOT;
if (gl_extensions.EXT_blend_minmax)
features |= GPU_SUPPORTS_BLEND_MINMAX;
if (draw_->GetDeviceCaps().logicOpSupported)
features |= GPU_SUPPORTS_LOGIC_OP;
if (gl_extensions.GLES3 || !gl_extensions.IsGLES)
features |= GPU_SUPPORTS_TEXTURE_LOD_CONTROL;
if (draw_->GetDeviceCaps().anisoSupported)
features |= GPU_SUPPORTS_ANISOTROPY;
bool canUseInstanceID = gl_extensions.EXT_draw_instanced || gl_extensions.ARB_draw_instanced;
bool canDefInstanceID = gl_extensions.IsGLES || gl_extensions.EXT_gpu_shader4 || gl_extensions.VersionGEThan(3, 1);
bool instanceRendering = gl_extensions.GLES3 || (canUseInstanceID && canDefInstanceID);
@ -202,21 +177,6 @@ void GPU_GLES::CheckGPUFeatures() {
// Our implementation of depth texturing needs simple Z range, so can't
// use the extension hacks (yet).
}
if (draw_->GetDeviceCaps().textureDepthSupported)
features |= GPU_SUPPORTS_DEPTH_TEXTURE;
if (draw_->GetDeviceCaps().clipDistanceSupported)
features |= GPU_SUPPORTS_CLIP_DISTANCE;
if (draw_->GetDeviceCaps().cullDistanceSupported)
features |= GPU_SUPPORTS_CULL_DISTANCE;
if (!draw_->GetBugs().Has(Draw::Bugs::BROKEN_NAN_IN_CONDITIONAL)) {
// Ignore the compat setting if clip and cull are both enabled.
// When supported, we can do the depth side of range culling more correctly.
const bool supported = draw_->GetDeviceCaps().clipDistanceSupported && draw_->GetDeviceCaps().cullDistanceSupported;
const bool disabled = PSP_CoreParameter().compat.flags().DisableRangeCulling;
if (supported || !disabled) {
features |= GPU_SUPPORTS_VS_RANGE_CULLING;
}
}
// If we already have a 16-bit depth buffer, we don't need to round.
bool prefer24 = draw_->GetDeviceCaps().preferredDepthBufferFormat == Draw::DataFormat::D24_S8;
@ -245,11 +205,7 @@ void GPU_GLES::CheckGPUFeatures() {
features |= GPU_USE_DEPTH_RANGE_HACK;
}
if (PSP_CoreParameter().compat.flags().ClearToRAM) {
features |= GPU_USE_CLEAR_RAM_HACK;
}
gstate_c.featureFlags = features;
return features;
}
bool GPU_GLES::IsReady() {
@ -321,7 +277,7 @@ void GPU_GLES::BeginHostFrame() {
GPUCommon::BeginHostFrame();
UpdateCmdInfo();
if (resized_) {
CheckGPUFeatures();
gstate_c.featureFlags = CheckGPUFeatures();
framebufferManager_->Resized();
drawEngine_.Resized();
shaderManagerGL_->DirtyShader();

View File

@ -38,7 +38,7 @@ public:
~GPU_GLES();
// This gets called on startup and when we get back from settings.
void CheckGPUFeatures() override;
u32 CheckGPUFeatures() const override;
bool IsReady() override;
void CancelReady() override;

View File

@ -192,9 +192,18 @@ LinkedShader::LinkedShader(GLRenderManager *render, VShaderID VSID, Shader *vs,
initialize.push_back({ &u_tess_weights_u, 0, TEX_SLOT_SPLINE_WEIGHTS_U });
initialize.push_back({ &u_tess_weights_v, 0, TEX_SLOT_SPLINE_WEIGHTS_V });
bool useDualSource = (gstate_c.featureFlags & GPU_SUPPORTS_DUALSOURCE_BLEND) != 0;
bool useClip0 = VSID.Bit(VS_BIT_VERTEX_RANGE_CULLING) && gstate_c.Supports(GPU_SUPPORTS_CLIP_DISTANCE);
program = render->CreateProgram(shaders, semantics, queries, initialize, useDualSource, useClip0);
GLRProgramFlags flags{};
flags.supportDualSource = (gstate_c.featureFlags & GPU_SUPPORTS_DUALSOURCE_BLEND) != 0;
if (!VSID.Bit(VS_BIT_IS_THROUGH) && gstate_c.Supports(GPU_SUPPORTS_DEPTH_CLAMP)) {
flags.useClipDistance0 = true;
flags.useClipDistance1 = true;
if (VSID.Bit(VS_BIT_VERTEX_RANGE_CULLING) && gstate_c.Supports(GPU_SUPPORTS_CLIP_DISTANCE))
flags.useClipDistance2 = true;
} else if (VSID.Bit(VS_BIT_VERTEX_RANGE_CULLING) && gstate_c.Supports(GPU_SUPPORTS_CLIP_DISTANCE)) {
flags.useClipDistance0 = true;
}
program = render->CreateProgram(shaders, semantics, queries, initialize, flags);
// The rest, use the "dirty" mechanism.
dirtyUniforms = DIRTY_ALL_UNIFORMS;

View File

@ -149,13 +149,14 @@ void DrawEngineGLES::ApplyDrawState(int prim) {
GenericLogicState &logicState = pipelineState_.logicState;
if (pipelineState_.FramebufferRead()) {
bool fboTexNeedsBind = false;
ApplyFramebufferRead(&fboTexNeedsBind);
FBOTexState fboTexBindState = FBO_TEX_NONE;
ApplyFramebufferRead(&fboTexBindState);
// The shader takes over the responsibility for blending, so recompute.
ApplyStencilReplaceAndLogicOpIgnoreBlend(blendState.replaceAlphaWithStencil, blendState);
// We copy the framebuffer here, as doing so will wipe any blend state if we do it later.
if (fboTexNeedsBind) {
// fboTexNeedsBind_ won't be set if we can read directly from the target.
if (fboTexBindState == FBO_TEX_COPY_BIND_TEX) {
// Note that this is positions, not UVs, that we need the copy from.
framebufferManager_->BindFramebufferAsColorTexture(1, framebufferManager_->GetCurrentRenderVFB(), BINDFBCOLOR_MAY_COPY);
// If we are rendering at a higher resolution, linear is probably best for the dest color.
@ -166,6 +167,9 @@ void DrawEngineGLES::ApplyDrawState(int prim) {
// Must dirty blend state here so we re-copy next time. Example: Lunar's spell effects.
dirtyRequiresRecheck_ |= DIRTY_BLEND_STATE;
gstate_c.Dirty(DIRTY_BLEND_STATE);
} else if (fboTexBindState == FBO_TEX_READ_FRAMEBUFFER) {
// No action needed here.
fboTexBindState = FBO_TEX_NONE;
}
dirtyRequiresRecheck_ |= DIRTY_FRAGMENTSHADER_STATE;
gstate_c.Dirty(DIRTY_FRAGMENTSHADER_STATE);

View File

@ -89,7 +89,7 @@ const CommonCommandTableEntry commonCommandTable[] = {
// These affect the fragment shader so need flushing.
{ GE_CMD_CLEARMODE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_BLEND_STATE | DIRTY_DEPTHSTENCIL_STATE | DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_CULLRANGE | DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE },
{ GE_CMD_TEXTUREMAPENABLE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE },
{ GE_CMD_FOGENABLE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE},
{ GE_CMD_FOGENABLE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE },
{ GE_CMD_TEXMODE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_TEXTURE_PARAMS | DIRTY_FRAGMENTSHADER_STATE },
{ GE_CMD_TEXSHADELS, FLAG_FLUSHBEFOREONCHANGE, DIRTY_VERTEXSHADER_STATE },
// Raster state for Direct3D 9, uncommon.
@ -2414,10 +2414,10 @@ void GPUCommon::Execute_ImmVertexAlphaPrim(u32 op, u32 diff) {
immPrim_ = (GEPrimitiveType)prim;
// Flags seem to only be respected from the first prim.
immFlags_ = op & 0x00FFF800;
immFirstSent_ = false;
} else if (prim == GE_PRIM_KEEP_PREVIOUS && immPrim_ != GE_PRIM_INVALID) {
static constexpr int flushPrimCount[] = { 1, 2, 0, 3, 0, 0, 2, 0 };
// Instead of finding a proper point to flush, we just emit a full rectangle every time one
// is finished.
// Instead of finding a proper point to flush, we just emit prims when we can.
if (immCount_ == flushPrimCount[immPrim_ & 7])
FlushImm();
} else {
@ -2439,31 +2439,6 @@ void GPUCommon::FlushImm() {
}
UpdateUVScaleOffset();
// Instead of plumbing through properly (we'd need to inject these pretransformed vertices in the middle
// of SoftwareTransform(), which would take a lot of refactoring), we'll cheat and just turn these into
// through vertices.
// Since the only known use is Thrillville and it only uses it to clear, we just use color and pos.
struct ImmVertex {
float uv[2];
uint32_t color;
float xyz[3];
};
ImmVertex temp[MAX_IMMBUFFER_SIZE];
uint32_t color1Used = 0;
for (int i = 0; i < immCount_; i++) {
// Since we're sending through, scale back up to w/h.
temp[i].uv[0] = immBuffer_[i].u * gstate.getTextureWidth(0);
temp[i].uv[1] = immBuffer_[i].v * gstate.getTextureHeight(0);
temp[i].color = immBuffer_[i].color0_32;
temp[i].xyz[0] = immBuffer_[i].pos[0];
temp[i].xyz[1] = immBuffer_[i].pos[1];
temp[i].xyz[2] = immBuffer_[i].pos[2];
color1Used |= immBuffer_[i].color1_32;
}
int vtype = GE_VTYPE_TC_FLOAT | GE_VTYPE_POS_FLOAT | GE_VTYPE_COL_8888 | GE_VTYPE_THROUGH;
// TODO: Handle fog and secondary color somehow?
bool antialias = (immFlags_ & GE_IMM_ANTIALIAS) != 0;
bool prevAntialias = gstate.isAntiAliasEnabled();
bool shading = (immFlags_ & GE_IMM_SHADING) != 0;
@ -2473,40 +2448,42 @@ void GPUCommon::FlushImm() {
int cullMode = (immFlags_ & GE_IMM_CULLFACE) != 0 ? 1 : 0;
bool texturing = (immFlags_ & GE_IMM_TEXTURE) != 0;
bool prevTexturing = gstate.isTextureMapEnabled();
bool fog = (immFlags_ & GE_IMM_FOG) != 0;
bool prevFog = gstate.isFogEnabled();
bool dither = (immFlags_ & GE_IMM_DITHER) != 0;
bool prevDither = gstate.isDitherEnabled();
if ((immFlags_ & GE_IMM_CLIPMASK) != 0) {
WARN_LOG_REPORT_ONCE(geimmclipvalue, G3D, "Imm vertex used clip value, flags=%06x", immFlags_);
} else if ((immFlags_ & GE_IMM_FOG) != 0) {
WARN_LOG_REPORT_ONCE(geimmfog, G3D, "Imm vertex used fog, flags=%06x", immFlags_);
} else if (color1Used != 0 && gstate.isUsingSecondaryColor()) {
WARN_LOG_REPORT_ONCE(geimmcolor1, G3D, "Imm vertex used secondary color, flags=%06x", immFlags_);
}
if (texturing != prevTexturing || cullEnable != prevCullEnable || dither != prevDither || prevShading != shading) {
bool changed = texturing != prevTexturing || cullEnable != prevCullEnable || dither != prevDither;
changed = changed || prevShading != shading || prevFog != fog;
if (changed) {
DispatchFlush();
gstate.antiAliasEnable = (GE_CMD_ANTIALIASENABLE << 24) | (int)antialias;
gstate.shademodel = (GE_CMD_SHADEMODE << 24) | (int)shading;
gstate.cullfaceEnable = (GE_CMD_CULLFACEENABLE << 24) | (int)cullEnable;
gstate.textureMapEnable = (GE_CMD_TEXTUREMAPENABLE << 24) | (int)texturing;
gstate.fogEnable = (GE_CMD_FOGENABLE << 24) | (int)fog;
gstate.ditherEnable = (GE_CMD_DITHERENABLE << 24) | (int)dither;
gstate_c.Dirty(DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE | DIRTY_RASTER_STATE);
gstate_c.Dirty(DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE | DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_UVSCALEOFFSET | DIRTY_CULLRANGE);
}
int bytesRead;
uint32_t vertTypeID = GetVertTypeID(vtype, 0);
drawEngineCommon_->DispatchSubmitImm(temp, nullptr, immPrim_, immCount_, vertTypeID, cullMode, &bytesRead);
// TODO: In the future, make a special path for these.
// drawEngineCommon_->DispatchSubmitImm(immBuffer_, immCount_);
drawEngineCommon_->DispatchSubmitImm(immPrim_, immBuffer_, immCount_, cullMode, immFirstSent_);
immCount_ = 0;
immFirstSent_ = true;
gstate.antiAliasEnable = (GE_CMD_ANTIALIASENABLE << 24) | (int)prevAntialias;
gstate.shademodel = (GE_CMD_SHADEMODE << 24) | (int)prevShading;
gstate.cullfaceEnable = (GE_CMD_CULLFACEENABLE << 24) | (int)prevCullEnable;
gstate.textureMapEnable = (GE_CMD_TEXTUREMAPENABLE << 24) | (int)prevTexturing;
gstate.ditherEnable = (GE_CMD_DITHERENABLE << 24) | (int)prevDither;
gstate_c.Dirty(DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE | DIRTY_RASTER_STATE);
if (changed) {
DispatchFlush();
gstate.antiAliasEnable = (GE_CMD_ANTIALIASENABLE << 24) | (int)prevAntialias;
gstate.shademodel = (GE_CMD_SHADEMODE << 24) | (int)prevShading;
gstate.cullfaceEnable = (GE_CMD_CULLFACEENABLE << 24) | (int)prevCullEnable;
gstate.textureMapEnable = (GE_CMD_TEXTUREMAPENABLE << 24) | (int)prevTexturing;
gstate.fogEnable = (GE_CMD_FOGENABLE << 24) | (int)prevFog;
gstate.ditherEnable = (GE_CMD_DITHERENABLE << 24) | (int)prevDither;
gstate_c.Dirty(DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE | DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_UVSCALEOFFSET | DIRTY_CULLRANGE);
}
}
void GPUCommon::ExecuteOp(u32 op, u32 diff) {
@ -3176,3 +3153,56 @@ size_t GPUCommon::FormatGPUStatsCommon(char *buffer, size_t size) {
vertexAverageCycles
);
}
u32 GPUCommon::CheckGPUFeatures() const {
u32 features = 0;
if (draw_->GetDeviceCaps().logicOpSupported) {
features |= GPU_SUPPORTS_LOGIC_OP;
}
if (draw_->GetDeviceCaps().anisoSupported) {
features |= GPU_SUPPORTS_ANISOTROPY;
}
if (draw_->GetDeviceCaps().textureNPOTFullySupported) {
features |= GPU_SUPPORTS_TEXTURE_NPOT;
}
if (draw_->GetDeviceCaps().dualSourceBlend) {
if (!g_Config.bVendorBugChecksEnabled || !draw_->GetBugs().Has(Draw::Bugs::DUAL_SOURCE_BLENDING_BROKEN)) {
features |= GPU_SUPPORTS_DUALSOURCE_BLEND;
}
}
if (draw_->GetDeviceCaps().blendMinMaxSupported) {
features |= GPU_SUPPORTS_BLEND_MINMAX;
}
if (draw_->GetDeviceCaps().clipDistanceSupported) {
features |= GPU_SUPPORTS_CLIP_DISTANCE;
}
if (draw_->GetDeviceCaps().cullDistanceSupported) {
features |= GPU_SUPPORTS_CULL_DISTANCE;
}
if (draw_->GetDeviceCaps().textureDepthSupported) {
features |= GPU_SUPPORTS_DEPTH_TEXTURE;
}
if (!draw_->GetBugs().Has(Draw::Bugs::BROKEN_NAN_IN_CONDITIONAL)) {
// Ignore the compat setting if clip and cull are both enabled.
// When supported, we can do the depth side of range culling more correctly.
const bool supported = draw_->GetDeviceCaps().clipDistanceSupported && draw_->GetDeviceCaps().cullDistanceSupported;
const bool disabled = PSP_CoreParameter().compat.flags().DisableRangeCulling;
if (supported || !disabled) {
features |= GPU_SUPPORTS_VS_RANGE_CULLING;
}
}
if (draw_->GetDeviceCaps().framebufferFetchSupported) {
features |= GPU_SUPPORTS_ANY_FRAMEBUFFER_FETCH;
}
if (PSP_CoreParameter().compat.flags().ClearToRAM) {
features |= GPU_USE_CLEAR_RAM_HACK;
}
return features;
}

View File

@ -76,7 +76,7 @@ public:
Draw::DrawContext *GetDrawContext() override {
return draw_;
}
virtual void CheckGPUFeatures() = 0;
virtual u32 CheckGPUFeatures() const;
void UpdateCmdInfo();
@ -103,7 +103,7 @@ public:
void ExecuteOp(u32 op, u32 diff) override;
void PreExecuteOp(u32 op, u32 diff) override;
bool InterpretList(DisplayList &list) override;
bool InterpretList(DisplayList &list);
void ProcessDLQueue();
u32 UpdateStall(int listid, u32 newstall) override;
u32 EnqueueList(u32 listpc, u32 stall, int subIntrBase, PSPPointer<PspGeListArgs> args, bool head) override;
@ -355,6 +355,7 @@ protected:
int immCount_ = 0;
GEPrimitiveType immPrim_ = GE_PRIM_INVALID;
uint32_t immFlags_ = 0;
bool immFirstSent_ = false;
std::string reportingPrimaryInfo_;
std::string reportingFullInfo_;

View File

@ -205,7 +205,6 @@ public:
virtual void PreExecuteOp(u32 op, u32 diff) = 0;
virtual void ExecuteOp(u32 op, u32 diff) = 0;
virtual bool InterpretList(DisplayList& list) = 0;
// Framebuffer management
virtual void SetDisplayFramebuffer(u32 framebuf, u32 stride, GEBufferFormat format) = 0;

View File

@ -485,7 +485,8 @@ enum {
// Free bit: 15
GPU_SUPPORTS_DEPTH_TEXTURE = FLAG_BIT(16),
GPU_SUPPORTS_ACCURATE_DEPTH = FLAG_BIT(17),
// Free bits: 18-19
GPU_SUPPORTS_FRAGMENT_SHADER_INTERLOCK = FLAG_BIT(18),
// Free bits: 19
GPU_SUPPORTS_ANY_FRAMEBUFFER_FETCH = FLAG_BIT(20),
GPU_SCALE_DEPTH_FROM_24BIT_TO_16BIT = FLAG_BIT(21),
GPU_ROUND_FRAGMENT_DEPTH_TO_16BIT = FLAG_BIT(22),

View File

@ -167,7 +167,7 @@ void BinManager::UpdateState(bool throughMode) {
if (states_.Full())
Flush("states");
stateIndex_ = (uint16_t)states_.Push(RasterizerState());
ComputeRasterizerState(&states_[stateIndex_], throughMode);
ComputeRasterizerState(&states_[stateIndex_]);
states_[stateIndex_].samplerID.cached.clut = cluts_[clutIndex_].readable;
ClearDirty(SoftDirty::PIXEL_ALL | SoftDirty::SAMPLER_ALL | SoftDirty::RAST_ALL);
@ -326,7 +326,7 @@ void BinManager::AddTriangle(const VertexData &v0, const VertexData &v1, const V
if (d01.x * d02.y - d01.y * d02.x < 0)
return;
// If all points have identical coords, we'll have 0 weights and not skip properly, so skip here.
if (d01.x == 0 && d01.y == 0 && d02.x == 0 && d02.y == 0)
if ((d01.x == 0 && d02.x == 0) || (d01.y == 0 && d02.y == 0))
return;
// Was it fully outside the scissor?
@ -474,6 +474,9 @@ void BinManager::Drain() {
}
void BinManager::Flush(const char *reason) {
if (queueRange_.x1 == 0x7FFFFFFF)
return;
double st;
if (coreCollectDebugStats)
st = time_now_d();

View File

@ -133,6 +133,10 @@ static inline bool CheckOutsideZ(ClipCoords p, int &pos, int &neg) {
void ProcessRect(const VertexData &v0, const VertexData &v1, BinManager &binner) {
if (!binner.State().throughMode) {
// If any verts were outside range, throw the entire prim away.
if (v0.OutsideRange() || v1.OutsideRange())
return;
// We may discard the entire rect based on depth values.
int outsidePos = 0, outsideNeg = 0;
CheckOutsideZ(v0.clippos, outsidePos, outsideNeg);
@ -176,6 +180,12 @@ void ProcessRect(const VertexData &v0, const VertexData &v1, BinManager &binner)
}
void ProcessPoint(const VertexData &v0, BinManager &binner) {
// If any verts were outside range, throw the entire prim away.
if (!binner.State().throughMode) {
if (v0.OutsideRange())
return;
}
// Points need no clipping. Will be bounds checked in the rasterizer (which seems backwards?)
binner.AddPoint(v0);
}
@ -187,6 +197,10 @@ void ProcessLine(const VertexData &v0, const VertexData &v1, BinManager &binner)
return;
}
// If any verts were outside range, throw the entire prim away.
if (v0.OutsideRange() || v1.OutsideRange())
return;
int outsidePos = 0, outsideNeg = 0;
CheckOutsideZ(v0.clippos, outsidePos, outsideNeg);
CheckOutsideZ(v1.clippos, outsidePos, outsideNeg);
@ -222,6 +236,10 @@ void ProcessLine(const VertexData &v0, const VertexData &v1, BinManager &binner)
void ProcessTriangle(const VertexData &v0, const VertexData &v1, const VertexData &v2, const VertexData &provoking, BinManager &binner) {
int mask = 0;
if (!binner.State().throughMode) {
// If any verts were outside range, throw the entire prim away.
if (v0.OutsideRange() || v1.OutsideRange() || v2.OutsideRange())
return;
mask |= CalcClipMask(v0.clippos);
mask |= CalcClipMask(v1.clippos);
mask |= CalcClipMask(v2.clippos);

View File

@ -48,11 +48,11 @@ static inline PixelBlendFactor OptimizeAlphaFactor(uint32_t color) {
return PixelBlendFactor::FIX;
}
void ComputePixelFuncID(PixelFuncID *id, bool throughMode) {
void ComputePixelFuncID(PixelFuncID *id) {
id->fullKey = 0;
// TODO: Could this be minz > 0x0000 || maxz < 0xFFFF? Maybe unsafe, depending on verts...
id->applyDepthRange = !throughMode;
id->applyDepthRange = !gstate.isModeThrough();
// Dither happens even in clear mode.
id->dithering = gstate.isDitherEnabled();
id->fbFormat = gstate.FrameBufFormat();
@ -169,7 +169,7 @@ void ComputePixelFuncID(PixelFuncID *id, bool throughMode) {
}
id->applyLogicOp = gstate.isLogicOpEnabled() && gstate.getLogicOp() != GE_LOGIC_COPY;
id->applyFog = gstate.isFogEnabled() && !throughMode;
id->applyFog = gstate.isFogEnabled() && !gstate.isModeThrough();
id->earlyZChecks = id->DepthTestFunc() != GE_COMP_ALWAYS;
if (id->stencilTest && id->earlyZChecks) {

View File

@ -244,7 +244,7 @@ struct hash<SamplerID> {
};
void ComputePixelFuncID(PixelFuncID *id, bool throughMode);
void ComputePixelFuncID(PixelFuncID *id);
std::string DescribePixelFuncID(const PixelFuncID &id);
void ComputeSamplerID(SamplerID *id);

View File

@ -93,8 +93,8 @@ static inline Vec4<float> Interpolate(const float &c0, const float &c1, const fl
return Interpolate(c0, c1, c2, w0.Cast<float>(), w1.Cast<float>(), w2.Cast<float>(), wsum_recip);
}
void ComputeRasterizerState(RasterizerState *state, bool throughMode) {
ComputePixelFuncID(&state->pixelID, throughMode);
void ComputeRasterizerState(RasterizerState *state) {
ComputePixelFuncID(&state->pixelID);
state->drawPixel = Rasterizer::GetSingleFunc(state->pixelID);
state->enableTextures = gstate.isTextureMapEnabled() && !state->pixelID.clearMode;
@ -132,7 +132,7 @@ void ComputeRasterizerState(RasterizerState *state, bool throughMode) {
}
state->shadeGouraud = gstate.getShadeMode() == GE_SHADE_GOURAUD;
state->throughMode = throughMode;
state->throughMode = gstate.isModeThrough();
state->antialiasLines = gstate.isAntiAliasEnabled();
#if defined(SOFTGPU_MEMORY_TAGGING_DETAILED) || defined(SOFTGPU_MEMORY_TAGGING_BASIC)
@ -1235,6 +1235,7 @@ void ClearRectangle(const VertexData &v0, const VertexData &v1, const BinCoords
case GE_FORMAT_INVALID:
case GE_FORMAT_DEPTH16:
case GE_FORMAT_CLUT8:
_dbg_assert_msg_(false, "Software: invalid framebuf format.");
break;
}
@ -1365,7 +1366,7 @@ void DrawLine(const VertexData &v0, const VertexData &v1, const BinCoords &range
maskOK = false;
}
if (!CheckDepthTestPassed(pixelID.DepthTestFunc(), x, y, pixelID.cached.depthbufStride, z)) {
if (!CheckDepthTestPassed(pixelID.DepthTestFunc(), p.x, p.y, pixelID.cached.depthbufStride, z)) {
maskOK = false;
}
}

View File

@ -65,7 +65,7 @@ struct RasterizerState {
}
};
void ComputeRasterizerState(RasterizerState *state, bool throughMode);
void ComputeRasterizerState(RasterizerState *state);
// Draws a triangle if its vertices are specified in counter-clockwise order
void DrawTriangle(const VertexData &v0, const VertexData &v1, const VertexData &v2, const BinCoords &range, const RasterizerState &state);

View File

@ -116,8 +116,12 @@ void DrawSprite(const VertexData &v0, const VertexData &v1, const BinCoords &ran
DrawingCoords scissorTL = TransformUnit::ScreenToDrawing(range.x1, range.y1);
DrawingCoords scissorBR = TransformUnit::ScreenToDrawing(range.x2, range.y2);
int z = v1.screenpos.z;
int fog = 255;
const int z = v1.screenpos.z;
constexpr int fog = 255;
// Since it's flat, we can check depth range early. Matters for earlyZChecks.
if (pixelID.applyDepthRange && (z < pixelID.cached.minz || z > pixelID.cached.maxz))
return;
bool isWhite = v1.color0 == 0xFFFFFFFF;
@ -204,15 +208,31 @@ void DrawSprite(const VertexData &v0, const VertexData &v1, const BinCoords &ran
float t = tf_start;
const Vec4<int> c0 = Vec4<int>::FromRGBA(v1.color0);
for (int y = pos0.y; y < pos1.y; y++) {
float s = sf_start;
// Not really that fast but faster than triangle.
for (int x = pos0.x; x < pos1.x; x++) {
Vec4<int> prim_color = state.nearest(s, t, xoff, yoff, ToVec4IntArg(c0), &texptr, &texbufw, 0, 0, state.samplerID);
state.drawPixel(x, y, z, 255, ToVec4IntArg(prim_color), pixelID);
s += dsf;
if (pixelID.earlyZChecks) {
for (int y = pos0.y; y < pos1.y; y++) {
float s = sf_start;
// Not really that fast but faster than triangle.
for (int x = pos0.x; x < pos1.x; x++) {
if (CheckDepthTestPassed(pixelID.DepthTestFunc(), x, y, pixelID.cached.depthbufStride, z)) {
Vec4<int> prim_color = state.nearest(s, t, xoff, yoff, ToVec4IntArg(c0), &texptr, &texbufw, 0, 0, state.samplerID);
state.drawPixel(x, y, z, fog, ToVec4IntArg(prim_color), pixelID);
}
s += dsf;
}
t += dtf;
}
} else {
for (int y = pos0.y; y < pos1.y; y++) {
float s = sf_start;
// Not really that fast but faster than triangle.
for (int x = pos0.x; x < pos1.x; x++) {
Vec4<int> prim_color = state.nearest(s, t, xoff, yoff, ToVec4IntArg(c0), &texptr, &texbufw, 0, 0, state.samplerID);
state.drawPixel(x, y, z, fog, ToVec4IntArg(prim_color), pixelID);
s += dsf;
}
t += dtf;
}
t += dtf;
}
}
} else {
@ -239,6 +259,16 @@ void DrawSprite(const VertexData &v0, const VertexData &v1, const BinCoords &ran
pixel++;
}
}
} else if (pixelID.earlyZChecks) {
const Vec4<int> prim_color = Vec4<int>::FromRGBA(v1.color0);
for (int y = pos0.y; y < pos1.y; y++) {
for (int x = pos0.x; x < pos1.x; x++) {
if (!CheckDepthTestPassed(pixelID.DepthTestFunc(), x, y, pixelID.cached.depthbufStride, z))
continue;
state.drawPixel(x, y, z, fog, ToVec4IntArg(prim_color), pixelID);
}
}
} else {
const Vec4<int> prim_color = Vec4<int>::FromRGBA(v1.color0);
for (int y = pos0.y; y < pos1.y; y++) {
@ -325,15 +355,18 @@ bool RectangleFastPath(const VertexData &v0, const VertexData &v1, BinManager &b
}
static bool AreCoordsRectangleCompatible(const RasterizerState &state, const VertexData &data0, const VertexData &data1) {
if (!(data1.color0 == data0.color0))
if (data1.color0 != data0.color0)
return false;
if (!(data1.screenpos.z == data0.screenpos.z)) {
if (data1.screenpos.z != data0.screenpos.z) {
// Sometimes, we don't actually care about z.
if (state.pixelID.depthWrite || state.pixelID.DepthTestFunc() != GE_COMP_ALWAYS)
return false;
}
if (!state.throughMode) {
if (!state.throughMode && !(data1.color1 == data0.color1))
if (data1.color1 != data0.color1)
return false;
// This means it should be culled, outside range.
if (data1.OutsideRange() || data0.OutsideRange())
return false;
// Do we have to think about perspective correction or slope mip level?
if (state.enableTextures && data1.clippos.w != data0.clippos.w) {

View File

@ -361,7 +361,7 @@ const SoftwareCommandTableEntry softgpuCommandTable[] = {
{ GE_CMD_VTCT },
{ GE_CMD_VTCQ },
{ GE_CMD_VCV },
{ GE_CMD_VAP, FLAG_EXECUTE, SoftDirty::NONE, &GPUCommon::Execute_ImmVertexAlphaPrim },
{ GE_CMD_VAP, FLAG_EXECUTE, SoftDirty::NONE, &SoftGPU::Execute_ImmVertexAlphaPrim },
{ GE_CMD_VFC },
{ GE_CMD_VSCV },
@ -639,6 +639,7 @@ void SoftGPU::CopyToCurrentFboFromDisplayRam(int srcwidth, int srcheight) {
}
void SoftGPU::CopyDisplayToOutput(bool reallyDirty) {
drawEngine_->transformUnit.Flush("output");
// The display always shows 480x272.
CopyToCurrentFboFromDisplayRam(FB_WIDTH, FB_HEIGHT);
MarkDirty(displayFramebuf_, displayStride_, 272, displayFormat_, SoftGPUVRAMDirty::CLEAR);
@ -650,7 +651,7 @@ void SoftGPU::MarkDirty(uint32_t addr, uint32_t stride, uint32_t height, GEBuffe
}
void SoftGPU::MarkDirty(uint32_t addr, uint32_t bytes, SoftGPUVRAMDirty value) {
// Don't bother tracking if frameskipping.
// Only bother tracking if frameskipping.
if (g_Config.iFrameSkip == 0)
return;
if (!Memory::IsVRAMAddress(addr) || !Memory::IsVRAMAddress(addr + bytes - 1))
@ -1005,19 +1006,24 @@ void SoftGPU::Execute_LoadClut(u32 op, u32 diff) {
void SoftGPU::Execute_FramebufPtr(u32 op, u32 diff) {
// We assume fb.data won't change while we're drawing.
drawEngine_->transformUnit.Flush("framebuf");
fb.data = Memory::GetPointerWrite(gstate.getFrameBufAddress());
if (diff) {
drawEngine_->transformUnit.Flush("framebuf");
fb.data = Memory::GetPointerWrite(gstate.getFrameBufAddress());
}
}
void SoftGPU::Execute_FramebufFormat(u32 op, u32 diff) {
// We should flush, because ranges within bins may change.
drawEngine_->transformUnit.Flush("framebuf");
if (diff)
drawEngine_->transformUnit.Flush("framebuf");
}
void SoftGPU::Execute_ZbufPtr(u32 op, u32 diff) {
// We assume depthbuf.data won't change while we're drawing.
drawEngine_->transformUnit.Flush("depthbuf");
depthbuf.data = Memory::GetPointerWrite(gstate.getDepthBufAddress());
if (diff) {
drawEngine_->transformUnit.Flush("depthbuf");
depthbuf.data = Memory::GetPointerWrite(gstate.getDepthBufAddress());
}
}
void SoftGPU::Execute_VertexType(u32 op, u32 diff) {
@ -1109,6 +1115,12 @@ void SoftGPU::Execute_BoneMtxData(u32 op, u32 diff) {
gstate.boneMatrixData = GE_CMD_BONEMATRIXDATA << 24;
}
void SoftGPU::Execute_ImmVertexAlphaPrim(u32 op, u32 diff) {
GPUCommon::Execute_ImmVertexAlphaPrim(op, diff);
// We won't flush as often as hardware renderers, so we want to flush right away.
FlushImm();
}
void SoftGPU::Execute_Call(u32 op, u32 diff) {
PROFILE_THIS_SCOPE("gpu_call");
@ -1138,6 +1150,18 @@ void SoftGPU::FinishDeferred() {
drawEngine_->transformUnit.Flush("finish");
}
int SoftGPU::ListSync(int listid, int mode) {
// Take this as a cue that we need to finish drawing.
drawEngine_->transformUnit.Flush("listsync");
return GPUCommon::ListSync(listid, mode);
}
u32 SoftGPU::DrawSync(int mode) {
// Take this as a cue that we need to finish drawing.
drawEngine_->transformUnit.Flush("drawsync");
return GPUCommon::DrawSync(mode);
}
void SoftGPU::GetStats(char *buffer, size_t bufsize) {
drawEngine_->transformUnit.GetStats(buffer, bufsize);
}

View File

@ -127,10 +127,12 @@ public:
SoftGPU(GraphicsContext *gfxCtx, Draw::DrawContext *draw);
~SoftGPU();
void CheckGPUFeatures() override {}
u32 CheckGPUFeatures() const override { return 0; }
void InitClear() override {}
void ExecuteOp(u32 op, u32 diff) override;
void FinishDeferred() override;
int ListSync(int listid, int mode) override;
u32 DrawSync(int mode) override;
void SetDisplayFramebuffer(u32 framebuf, u32 stride, GEBufferFormat format) override;
void CopyDisplayToOutput(bool reallyDirty) override;
@ -185,6 +187,8 @@ public:
void Execute_TgenMtxData(u32 op, u32 diff);
void Execute_BoneMtxData(u32 op, u32 diff);
void Execute_ImmVertexAlphaPrim(u32 op, u32 diff);
typedef void (SoftGPU::*CmdFunc)(u32 op, u32 diff);
protected:

View File

@ -70,14 +70,66 @@ void SoftwareDrawEngine::DispatchSubmitPrim(const void *verts, const void *inds,
transformUnit.SubmitPrimitive(verts, inds, prim, vertexCount, vertTypeID, bytesRead, this);
}
void SoftwareDrawEngine::DispatchSubmitImm(const void *verts, const void *inds, GEPrimitiveType prim, int vertexCount, u32 vertTypeID, int cullMode, int *bytesRead) {
void SoftwareDrawEngine::DispatchSubmitImm(GEPrimitiveType prim, TransformedVertex *buffer, int vertexCount, int cullMode, bool continuation) {
uint32_t vertTypeID = GetVertTypeID(gstate.vertType | GE_VTYPE_POS_FLOAT, gstate.getUVGenMode());
int flipCull = cullMode != gstate.getCullMode() ? 1 : 0;
// TODO: For now, just setting all dirty.
transformUnit.SetDirty(SoftDirty(-1));
gstate.cullmode ^= flipCull;
transformUnit.SubmitPrimitive(verts, inds, prim, vertexCount, vertTypeID, bytesRead, this);
// TODO: This is a bit ugly. Should bypass when clipping...
uint32_t xScale = gstate.viewportxscale;
uint32_t xCenter = gstate.viewportxcenter;
uint32_t yScale = gstate.viewportyscale;
uint32_t yCenter = gstate.viewportycenter;
uint32_t zScale = gstate.viewportzscale;
uint32_t zCenter = gstate.viewportzcenter;
// Force scale to 1 and center to zero.
gstate.viewportxscale = (GE_CMD_VIEWPORTXSCALE << 24) | 0x3F8000;
gstate.viewportxcenter = (GE_CMD_VIEWPORTXCENTER << 24) | 0x000000;
gstate.viewportyscale = (GE_CMD_VIEWPORTYSCALE << 24) | 0x3F8000;
gstate.viewportycenter = (GE_CMD_VIEWPORTYCENTER << 24) | 0x000000;
// Z we scale to 65535 for neg z clipping.
gstate.viewportzscale = (GE_CMD_VIEWPORTZSCALE << 24) | 0x477FFF;
gstate.viewportzcenter = (GE_CMD_VIEWPORTZCENTER << 24) | 0x000000;
// Before we start, submit 0 prims to reset the prev prim type.
// Following submits will always be KEEP_PREVIOUS.
if (!continuation)
transformUnit.SubmitPrimitive(nullptr, nullptr, prim, 0, vertTypeID, nullptr, this);
for (int i = 0; i < vertexCount; i++) {
VertexData vert;
vert.clippos = ClipCoords(buffer[i].pos);
vert.texturecoords.x = buffer[i].u;
vert.texturecoords.y = buffer[i].v;
if (gstate.isModeThrough()) {
vert.texturecoords.x *= gstate.getTextureWidth(0);
vert.texturecoords.y *= gstate.getTextureHeight(0);
} else {
vert.clippos.z *= 1.0f / 65535.0f;
}
vert.color0 = buffer[i].color0_32;
vert.color1 = gstate.isUsingSecondaryColor() && !gstate.isModeThrough() ? buffer[i].color1_32 : 0;
vert.fogdepth = buffer[i].fog;
vert.screenpos.x = (int)(buffer[i].x * 16.0f);
vert.screenpos.y = (int)(buffer[i].y * 16.0f);
vert.screenpos.z = (u16)(u32)buffer[i].z;
transformUnit.SubmitImmVertex(vert, this);
}
gstate.viewportxscale = xScale;
gstate.viewportxcenter = xCenter;
gstate.viewportyscale = yScale;
gstate.viewportycenter = yCenter;
gstate.viewportzscale = zScale;
gstate.viewportzcenter = zCenter;
gstate.cullmode ^= flipCull;
// TODO: Should really clear, but the vertex type is faked so things might need resetting...
// TODO: Should really clear, but a bunch of values are forced so we this is safest.
transformUnit.SetDirty(SoftDirty(-1));
}
@ -273,7 +325,7 @@ void ComputeTransformState(TransformState *state, const VertexReader &vreader) {
state->roundToScreen = &ClipToScreenInternal<false, true>;
}
VertexData TransformUnit::ReadVertex(VertexReader &vreader, const TransformState &state, bool &outside_range_flag) {
VertexData TransformUnit::ReadVertex(VertexReader &vreader, const TransformState &state) {
PROFILE_THIS_SCOPE("read_vert");
VertexData vertex;
@ -362,9 +414,13 @@ VertexData TransformUnit::ReadVertex(VertexReader &vreader, const TransformState
#else
screenScaled = vertex.clippos.xyz() * state.screenScale / vertex.clippos.w + state.screenAdd;
#endif
bool outside_range_flag = false;
vertex.screenpos = state.roundToScreen(screenScaled, vertex.clippos, &outside_range_flag);
if (outside_range_flag)
if (outside_range_flag) {
// We use this, essentially, as the flag.
vertex.screenpos.x = 0x7FFFFFFF;
return vertex;
}
if (state.enableFog) {
vertex.fogdepth = (viewpos.z + state.fogEnd) * state.fogSlope;
@ -447,20 +503,19 @@ void TransformUnit::SubmitPrimitive(const void* vertices, const void* indices, G
if (gstate_c.skipDrawReason & SKIPDRAW_SKIPFRAME) {
return;
}
// Throughmode never draws 8-bit primitives, maybe because they can't fully specify the screen?
if ((vertex_type & GE_VTYPE_THROUGH_MASK) != 0 && (vertex_type & GE_VTYPE_POS_MASK) == GE_VTYPE_POS_8BIT)
return;
// Vertices without position are just entirely culled.
// Note: Throughmode does draw 8-bit primitives, but positions are always zero - handled in decode.
if ((vertex_type & GE_VTYPE_POS_MASK) == 0)
return;
u16 index_lower_bound = 0;
u16 index_upper_bound = vertex_count - 1;
u16 index_upper_bound = vertex_count == 0 ? 0 : vertex_count - 1;
IndexConverter ConvertIndex(vertex_type, indices);
if (indices)
GetIndexBounds(indices, vertex_count, vertex_type, &index_lower_bound, &index_upper_bound);
vdecoder.DecodeVerts(decoded_, vertices, index_lower_bound, index_upper_bound);
if (vertex_count != 0)
vdecoder.DecodeVerts(decoded_, vertices, index_lower_bound, index_upper_bound);
VertexReader vreader(decoded_, vtxfmt, vertex_type);
@ -471,19 +526,11 @@ void TransformUnit::SubmitPrimitive(const void* vertices, const void* indices, G
prim_type = prev_prim_;
}
int vtcs_per_prim;
switch (prim_type) {
case GE_PRIM_POINTS: vtcs_per_prim = 1; break;
case GE_PRIM_LINES: vtcs_per_prim = 2; break;
case GE_PRIM_TRIANGLES: vtcs_per_prim = 3; break;
case GE_PRIM_RECTANGLES: vtcs_per_prim = 2; break;
default: vtcs_per_prim = 0; break;
}
// TODO: Do this in two passes - first process the vertices (before indexing/stripping),
// then resolve the indices. This lets us avoid transforming shared vertices twice.
binner_->UpdateState(vreader.isThrough());
hasDraws_ = true;
static TransformState transformState;
if (binner_->HasDirty(SoftDirty::LIGHT_ALL | SoftDirty::TRANSFORM_ALL)) {
@ -494,9 +541,17 @@ void TransformUnit::SubmitPrimitive(const void* vertices, const void* indices, G
bool skipCull = !gstate.isCullEnabled() || gstate.isModeClear();
const CullType cullType = skipCull ? CullType::OFF : (gstate.getCullMode() ? CullType::CCW : CullType::CW);
bool outside_range_flag = false;
auto readVertexAt = [&](VertexReader &vreader, const TransformState &transformState, int vtx) {
if (indices) {
vreader.Goto(ConvertIndex(vtx) - index_lower_bound);
} else {
vreader.Goto(vtx);
}
if (vreader.isThrough() && cullType == CullType::OFF && prim_type == GE_PRIM_TRIANGLES && data_index_ + vertex_count >= 6 && ((data_index_ + vertex_count) % 6) == 0) {
return ReadVertex(vreader, transformState);
};
if (vreader.isThrough() && cullType == CullType::OFF && prim_type == GE_PRIM_TRIANGLES && data_index_ == 0 && vertex_count >= 6 && ((vertex_count) % 6) == 0) {
// Some games send rectangles as a series of regular triangles.
// We look for this, but only in throughmode.
VertexData buf[6];
@ -506,20 +561,7 @@ void TransformUnit::SubmitPrimitive(const void* vertices, const void* indices, G
}
for (int vtx = 0; vtx < vertex_count; ++vtx) {
if (indices) {
vreader.Goto(ConvertIndex(vtx) - index_lower_bound);
} else {
vreader.Goto(vtx);
}
buf[buf_index++] = ReadVertex(vreader, transformState, outside_range_flag);
if (buf_index >= 3 && outside_range_flag) {
// Cull, just pretend it didn't happen.
buf_index -= 3;
outside_range_flag = false;
continue;
}
buf[buf_index++] = readVertexAt(vreader, transformState, vtx);
if (buf_index < 6)
continue;
@ -552,73 +594,54 @@ void TransformUnit::SubmitPrimitive(const void* vertices, const void* indices, G
return;
}
// Note: intentionally, these allow for the case of vertex_count == 0, but data_index_ > 0.
// This is used for immediate-mode primitives.
switch (prim_type) {
case GE_PRIM_POINTS:
case GE_PRIM_LINES:
case GE_PRIM_TRIANGLES:
{
for (int vtx = 0; vtx < vertex_count; ++vtx) {
if (indices) {
vreader.Goto(ConvertIndex(vtx) - index_lower_bound);
} else {
vreader.Goto(vtx);
}
data_[data_index_++] = ReadVertex(vreader, transformState, outside_range_flag);
if (data_index_ < vtcs_per_prim) {
// Keep reading. Note: an incomplete prim will stay read for GE_PRIM_KEEP_PREVIOUS.
continue;
}
// Okay, we've got enough verts. Reset the index for next time.
data_index_ = 0;
if (outside_range_flag) {
// Cull the prim if it was outside, and move to the next prim.
outside_range_flag = false;
continue;
}
switch (prim_type) {
case GE_PRIM_TRIANGLES:
SendTriangle(cullType, &data_[0]);
break;
case GE_PRIM_LINES:
Clipper::ProcessLine(data_[0], data_[1], *binner_);
break;
case GE_PRIM_POINTS:
Clipper::ProcessPoint(data_[0], *binner_);
break;
default:
_dbg_assert_msg_(false, "Unexpected prim type: %d", prim_type);
}
}
break;
for (int i = 0; i < data_index_; ++i)
Clipper::ProcessPoint(data_[i], *binner_);
data_index_ = 0;
for (int vtx = 0; vtx < vertex_count; ++vtx) {
data_[0] = readVertexAt(vreader, transformState, vtx);
Clipper::ProcessPoint(data_[0], *binner_);
}
break;
case GE_PRIM_LINES:
for (int i = 0; i < data_index_ - 1; i += 2)
Clipper::ProcessLine(data_[i + 0], data_[i + 1], *binner_);
data_index_ &= 1;
for (int vtx = 0; vtx < vertex_count; ++vtx) {
data_[data_index_++] = readVertexAt(vreader, transformState, vtx);
if (data_index_ == 2) {
Clipper::ProcessLine(data_[0], data_[1], *binner_);
data_index_ = 0;
}
}
break;
case GE_PRIM_TRIANGLES:
for (int vtx = 0; vtx < vertex_count; ++vtx) {
data_[data_index_++] = readVertexAt(vreader, transformState, vtx);
if (data_index_ < 3) {
// Keep reading. Note: an incomplete prim will stay read for GE_PRIM_KEEP_PREVIOUS.
continue;
}
// Okay, we've got enough verts. Reset the index for next time.
data_index_ = 0;
SendTriangle(cullType, &data_[0]);
}
// In case vertex_count was 0.
if (data_index_ >= 3) {
SendTriangle(cullType, &data_[0]);
data_index_ = 0;
}
break;
case GE_PRIM_RECTANGLES:
for (int vtx = 0; vtx < vertex_count; ++vtx) {
if (indices) {
vreader.Goto(ConvertIndex(vtx) - index_lower_bound);
} else {
vreader.Goto(vtx);
}
data_[data_index_++] = ReadVertex(vreader, transformState, outside_range_flag);
if (outside_range_flag) {
outside_range_flag = false;
// Note: this is the post increment index. If odd, we set the first vert.
if (data_index_ & 1) {
// Skip the next one and forget this one.
vtx++;
data_index_--;
} else {
// Forget both of the last 2.
data_index_ -= 2;
}
}
data_[data_index_++] = readVertexAt(vreader, transformState, vtx);
if (data_index_ == 4 && vreader.isThrough() && cullType == CullType::OFF) {
if (Rasterizer::DetectRectangleThroughModeSlices(binner_->State(), data_)) {
@ -646,19 +669,7 @@ void TransformUnit::SubmitPrimitive(const void* vertices, const void* indices, G
// If data_index_ is 1 or 2, etc., it means we're continuing a line strip.
int skip_count = data_index_ == 0 ? 1 : 0;
for (int vtx = 0; vtx < vertex_count; ++vtx) {
if (indices) {
vreader.Goto(ConvertIndex(vtx) - index_lower_bound);
} else {
vreader.Goto(vtx);
}
data_[(data_index_++) & 1] = ReadVertex(vreader, transformState, outside_range_flag);
if (outside_range_flag) {
// Drop all primitives containing the current vertex
skip_count = 2;
outside_range_flag = false;
continue;
}
data_[(data_index_++) & 1] = readVertexAt(vreader, transformState, vtx);
if (skip_count) {
--skip_count;
@ -667,6 +678,9 @@ void TransformUnit::SubmitPrimitive(const void* vertices, const void* indices, G
Clipper::ProcessLine(data_[data_index_ & 1], data_[(data_index_ & 1) ^ 1], *binner_);
}
}
// If this is from immediate-mode drawing, we always had one new vert (already in data_.)
if (isImmDraw_ && data_index_ >= 2)
Clipper::ProcessLine(data_[data_index_ & 1], data_[(data_index_ & 1) ^ 1], *binner_);
break;
}
@ -681,19 +695,15 @@ void TransformUnit::SubmitPrimitive(const void* vertices, const void* indices, G
if (data_index_ == 0 && vertex_count >= 4 && (vertex_count & 1) == 0 && cullType == CullType::OFF) {
for (int base = 0; base < vertex_count - 2; base += 2) {
for (int vtx = base == 0 ? 0 : 2; vtx < 4; ++vtx) {
if (indices) {
vreader.Goto(ConvertIndex(base + vtx) - index_lower_bound);
} else {
vreader.Goto(base + vtx);
}
data_[vtx] = ReadVertex(vreader, transformState, outside_range_flag);
data_[vtx] = readVertexAt(vreader, transformState, base + vtx);
}
// If a strip is effectively a rectangle, draw it as such!
int tl = -1, br = -1;
if (!outside_range_flag && Rasterizer::DetectRectangleFromStrip(binner_->State(), data_, &tl, &br)) {
if (Rasterizer::DetectRectangleFromStrip(binner_->State(), data_, &tl, &br)) {
Clipper::ProcessRect(data_[tl], data_[br], *binner_);
start_vtx += 2;
skip_count = 0;
if (base + 4 >= vertex_count) {
start_vtx = vertex_count;
break;
@ -710,32 +720,29 @@ void TransformUnit::SubmitPrimitive(const void* vertices, const void* indices, G
}
}
outside_range_flag = false;
for (int vtx = start_vtx; vtx < vertex_count; ++vtx) {
if (indices) {
vreader.Goto(ConvertIndex(vtx) - index_lower_bound);
} else {
vreader.Goto(vtx);
}
for (int vtx = start_vtx; vtx < vertex_count && skip_count > 0; ++vtx) {
int provoking_index = (data_index_++) % 3;
data_[provoking_index] = ReadVertex(vreader, transformState, outside_range_flag);
if (outside_range_flag) {
// Drop all primitives containing the current vertex
skip_count = 2;
outside_range_flag = false;
continue;
}
data_[provoking_index] = readVertexAt(vreader, transformState, vtx);
--skip_count;
++start_vtx;
}
if (skip_count) {
--skip_count;
continue;
}
for (int vtx = start_vtx; vtx < vertex_count; ++vtx) {
int provoking_index = (data_index_++) % 3;
data_[provoking_index] = readVertexAt(vreader, transformState, vtx);
int wind = (data_index_ - 1) % 2;
CullType altCullType = cullType == CullType::OFF ? cullType : CullType((int)cullType ^ wind);
SendTriangle(altCullType, &data_[0], provoking_index);
}
// If this is from immediate-mode drawing, we always had one new vert (already in data_.)
if (isImmDraw_ && data_index_ >= 3) {
int provoking_index = (data_index_ - 1) % 3;
int wind = (data_index_ - 1) % 2;
CullType altCullType = cullType == CullType::OFF ? cullType : CullType((int)cullType ^ wind);
SendTriangle(altCullType, &data_[0], provoking_index);
}
break;
}
@ -747,64 +754,47 @@ void TransformUnit::SubmitPrimitive(const void* vertices, const void* indices, G
int start_vtx = 0;
// Only read the central vertex if we're not continuing.
if (data_index_ == 0) {
if (indices) {
vreader.Goto(ConvertIndex(0) - index_lower_bound);
} else {
vreader.Goto(0);
}
data_[0] = ReadVertex(vreader, transformState, outside_range_flag);
if (data_index_ == 0 && vertex_count > 0) {
data_[0] = readVertexAt(vreader, transformState, 0);
data_index_++;
start_vtx = 1;
// If the central vertex is outside range, all the points are toast.
if (outside_range_flag)
break;
}
if (data_index_ == 1 && vertex_count == 4 && cullType == CullType::OFF) {
for (int vtx = start_vtx; vtx < vertex_count; ++vtx) {
if (indices) {
vreader.Goto(ConvertIndex(vtx) - index_lower_bound);
} else {
vreader.Goto(vtx);
}
data_[vtx] = ReadVertex(vreader, transformState, outside_range_flag);
data_[vtx] = readVertexAt(vreader, transformState, vtx);
}
int tl = -1, br = -1;
if (!outside_range_flag && Rasterizer::DetectRectangleFromFan(binner_->State(), data_, vertex_count, &tl, &br)) {
if (Rasterizer::DetectRectangleFromFan(binner_->State(), data_, vertex_count, &tl, &br)) {
Clipper::ProcessRect(data_[tl], data_[br], *binner_);
break;
}
}
outside_range_flag = false;
for (int vtx = start_vtx; vtx < vertex_count; ++vtx) {
if (indices) {
vreader.Goto(ConvertIndex(vtx) - index_lower_bound);
} else {
vreader.Goto(vtx);
}
for (int vtx = start_vtx; vtx < vertex_count && skip_count > 0; ++vtx) {
int provoking_index = 2 - ((data_index_++) % 2);
data_[provoking_index] = ReadVertex(vreader, transformState, outside_range_flag);
if (outside_range_flag) {
// Drop all primitives containing the current vertex
skip_count = 2;
outside_range_flag = false;
continue;
}
data_[provoking_index] = readVertexAt(vreader, transformState, vtx);
--skip_count;
++start_vtx;
}
if (skip_count) {
--skip_count;
continue;
}
for (int vtx = start_vtx; vtx < vertex_count; ++vtx) {
int provoking_index = 2 - ((data_index_++) % 2);
data_[provoking_index] = readVertexAt(vreader, transformState, vtx);
int wind = (data_index_ - 1) % 2;
CullType altCullType = cullType == CullType::OFF ? cullType : CullType((int)cullType ^ wind);
SendTriangle(altCullType, &data_[0], provoking_index);
}
// If this is from immediate-mode drawing, we always had one new vert (already in data_.)
if (isImmDraw_ && data_index_ >= 3) {
int wind = (data_index_ - 1) % 2;
int provoking_index = 2 - wind;
CullType altCullType = cullType == CullType::OFF ? cullType : CullType((int)cullType ^ wind);
SendTriangle(altCullType, &data_[0], provoking_index);
}
break;
}
@ -814,6 +804,47 @@ void TransformUnit::SubmitPrimitive(const void* vertices, const void* indices, G
}
}
void TransformUnit::SubmitImmVertex(const VertexData &vert, SoftwareDrawEngine *drawEngine) {
// Where we put it is different for STRIP/FAN types.
switch (prev_prim_) {
case GE_PRIM_POINTS:
case GE_PRIM_LINES:
case GE_PRIM_TRIANGLES:
case GE_PRIM_RECTANGLES:
// This is the easy one. SubmitPrimitive resets data_index_.
data_[data_index_++] = vert;
break;
case GE_PRIM_LINE_STRIP:
// This one alternates, and data_index_ > 0 means it draws a segment.
data_[(data_index_++) & 1] = vert;
break;
case GE_PRIM_TRIANGLE_STRIP:
data_[(data_index_++) % 3] = vert;
break;
case GE_PRIM_TRIANGLE_FAN:
if (data_index_ == 0) {
data_[data_index_++] = vert;
} else {
int provoking_index = 2 - ((data_index_++) % 2);
data_[provoking_index] = vert;
}
break;
default:
_assert_msg_(false, "Invalid prim type: %d", (int)prev_prim_);
break;
}
uint32_t vertTypeID = GetVertTypeID(gstate.vertType | GE_VTYPE_POS_FLOAT, gstate.getUVGenMode());
// This now processes the step with shared logic, given the existing data_.
isImmDraw_ = true;
SubmitPrimitive(nullptr, nullptr, GE_PRIM_KEEP_PREVIOUS, 0, vertTypeID, nullptr, drawEngine);
isImmDraw_ = false;
}
void TransformUnit::SendTriangle(CullType cullType, const VertexData *verts, int provoking) {
if (cullType == CullType::OFF) {
Clipper::ProcessTriangle(verts[0], verts[1], verts[2], verts[provoking], *binner_);
@ -826,8 +857,12 @@ void TransformUnit::SendTriangle(CullType cullType, const VertexData *verts, int
}
void TransformUnit::Flush(const char *reason) {
if (!hasDraws_)
return;
binner_->Flush(reason);
GPUDebug::NotifyDraw();
hasDraws_ = false;
}
void TransformUnit::GetStats(char *buffer, size_t bufsize) {
@ -836,6 +871,9 @@ void TransformUnit::GetStats(char *buffer, size_t bufsize) {
}
void TransformUnit::FlushIfOverlap(const char *reason, bool modifying, uint32_t addr, uint32_t stride, uint32_t w, uint32_t h) {
if (!hasDraws_)
return;
if (binner_->HasPendingWrite(addr, stride, w, h))
Flush(reason);
if (modifying && binner_->HasPendingRead(addr, stride, w, h))

View File

@ -90,6 +90,10 @@ struct VertexData {
color1 = LerpInt<Vec3<int>, 256>(Vec3<int>::FromRGB(a.color1), Vec3<int>::FromRGB(b.color1), t_int).ToRGB();
}
bool OutsideRange() const {
return screenpos.x == 0x7FFFFFFF;
}
ClipCoords clippos;
Vec2<float> texturecoords;
uint32_t color0;
@ -125,6 +129,7 @@ public:
static ScreenCoords DrawingToScreen(const DrawingCoords &coords, u16 z);
void SubmitPrimitive(const void* vertices, const void* indices, GEPrimitiveType prim_type, int vertex_count, u32 vertex_type, int *bytesRead, SoftwareDrawEngine *drawEngine);
void SubmitImmVertex(const VertexData &vert, SoftwareDrawEngine *drawEngine);
bool GetCurrentSimpleVertices(int count, std::vector<GPUDebugVertex> &vertices, std::vector<u16> &indices);
@ -138,7 +143,7 @@ public:
SoftDirty GetDirty();
private:
VertexData ReadVertex(VertexReader &vreader, const TransformState &lstate, bool &outside_range_flag);
VertexData ReadVertex(VertexReader &vreader, const TransformState &state);
void SendTriangle(CullType cullType, const VertexData *verts, int provoking = 2);
u8 *decoded_ = nullptr;
@ -149,6 +154,8 @@ private:
// This is the index of the next vert in data (or higher, may need modulus.)
int data_index_ = 0;
GEPrimitiveType prev_prim_ = GE_PRIM_POINTS;
bool hasDraws_ = false;
bool isImmDraw_ = false;
};
class SoftwareDrawEngine : public DrawEngineCommon {
@ -158,7 +165,7 @@ public:
void DispatchFlush() override;
void DispatchSubmitPrim(const void *verts, const void *inds, GEPrimitiveType prim, int vertexCount, u32 vertType, int cullMode, int *bytesRead) override;
void DispatchSubmitImm(const void *verts, const void *inds, GEPrimitiveType prim, int vertexCount, u32 vertTypeID, int cullMode, int *bytesRead) override;
void DispatchSubmitImm(GEPrimitiveType prim, TransformedVertex *buffer, int vertexCount, int cullMode, bool continuation) override;
VertexDecoder *FindVertexDecoder(u32 vtype);

View File

@ -71,6 +71,7 @@ enum {
DRAW_BINDING_TESS_STORAGE_BUF = 6,
DRAW_BINDING_TESS_STORAGE_BUF_WU = 7,
DRAW_BINDING_TESS_STORAGE_BUF_WV = 8,
DRAW_BINDING_INPUT_ATTACHMENT = 9,
};
enum {
@ -94,7 +95,10 @@ DrawEngineVulkan::DrawEngineVulkan(Draw::DrawContext *draw)
void DrawEngineVulkan::InitDeviceObjects() {
// All resources we need for PSP drawing. Usually only bindings 0 and 2-4 are populated.
VkDescriptorSetLayoutBinding bindings[9]{};
// TODO: Make things more flexible, so we at least have specialized layouts for input attachments and tess.
// Note that it becomes a support matrix..
VkDescriptorSetLayoutBinding bindings[10]{};
bindings[0].descriptorCount = 1;
bindings[0].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
bindings[0].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT;
@ -132,6 +136,10 @@ void DrawEngineVulkan::InitDeviceObjects() {
bindings[8].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
bindings[8].stageFlags = VK_SHADER_STAGE_VERTEX_BIT;
bindings[8].binding = DRAW_BINDING_TESS_STORAGE_BUF_WV;
bindings[9].descriptorCount = 1;
bindings[9].descriptorType = VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT;
bindings[9].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT;
bindings[9].binding = DRAW_BINDING_INPUT_ATTACHMENT;
VulkanContext *vulkan = (VulkanContext *)draw_->GetNativeObject(Draw::NativeObject::CONTEXT);
VkDevice device = vulkan->GetDevice();
@ -145,13 +153,15 @@ void DrawEngineVulkan::InitDeviceObjects() {
static constexpr int DEFAULT_DESC_POOL_SIZE = 512;
std::vector<VkDescriptorPoolSize> dpTypes;
dpTypes.resize(3);
dpTypes.resize(4);
dpTypes[0].descriptorCount = DEFAULT_DESC_POOL_SIZE * 3;
dpTypes[0].type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC;
dpTypes[1].descriptorCount = DEFAULT_DESC_POOL_SIZE * 3; // Don't use these for tess anymore, need max three per set.
dpTypes[1].type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
dpTypes[2].descriptorCount = DEFAULT_DESC_POOL_SIZE * 3; // TODO: Use a separate layout when no spline stuff is needed to reduce the need for these.
dpTypes[2].type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
dpTypes[3].descriptorCount = DEFAULT_DESC_POOL_SIZE; // TODO: Use a separate layout when no spline stuff is needed to reduce the need for these.
dpTypes[3].type = VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT;
VkDescriptorPoolCreateInfo dp{ VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO };
// Don't want to mess around with individually freeing these.
@ -379,6 +389,7 @@ VkDescriptorSet DrawEngineVulkan::GetOrCreateDescriptorSet(VkImageView imageView
key.base_ = base;
key.light_ = light;
key.bone_ = bone;
key.secondaryIsInputAttachment = boundSecondaryIsInputAttachment_;
FrameData &frame = GetCurFrame();
// See if we already have this descriptor set cached.
@ -417,15 +428,15 @@ VkDescriptorSet DrawEngineVulkan::GetOrCreateDescriptorSet(VkImageView imageView
}
if (boundSecondary_) {
tex[1].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
tex[1].imageLayout = key.secondaryIsInputAttachment ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
tex[1].imageView = boundSecondary_;
tex[1].sampler = samplerSecondaryNearest_;
writes[n].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
writes[n].pNext = nullptr;
writes[n].dstBinding = DRAW_BINDING_2ND_TEXTURE;
writes[n].dstBinding = key.secondaryIsInputAttachment ? DRAW_BINDING_INPUT_ATTACHMENT : DRAW_BINDING_2ND_TEXTURE;
writes[n].pImageInfo = &tex[1];
writes[n].descriptorCount = 1;
writes[n].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
writes[n].descriptorType = key.secondaryIsInputAttachment ? VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT : VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
writes[n].dstSet = desc;
n++;
}
@ -788,7 +799,7 @@ void DrawEngineVulkan::DoFlush() {
lastRenderStepId_ = curRenderStepId;
}
renderManager->BindPipeline(pipeline->pipeline, (PipelineFlags)pipeline->flags, pipelineLayout_);
renderManager->BindPipeline(pipeline->pipeline, pipeline->pipelineFlags, pipelineLayout_);
if (pipeline != lastPipeline_) {
if (lastPipeline_ && !(lastPipeline_->UsesBlendConstant() && pipeline->UsesBlendConstant())) {
gstate_c.Dirty(DIRTY_BLEND_STATE);
@ -916,7 +927,7 @@ void DrawEngineVulkan::DoFlush() {
lastRenderStepId_ = curRenderStepId;
}
renderManager->BindPipeline(pipeline->pipeline, (PipelineFlags)pipeline->flags, pipelineLayout_);
renderManager->BindPipeline(pipeline->pipeline, pipeline->pipelineFlags, pipelineLayout_);
if (pipeline != lastPipeline_) {
if (lastPipeline_ && !lastPipeline_->UsesBlendConstant() && pipeline->UsesBlendConstant()) {
gstate_c.Dirty(DIRTY_BLEND_STATE);

View File

@ -217,6 +217,8 @@ private:
// Secondary texture for shader blending
VkImageView boundSecondary_ = VK_NULL_HANDLE;
bool boundSecondaryIsInputAttachment_ = false;
// CLUT texture for shader depal
VkImageView boundDepal_ = VK_NULL_HANDLE;
bool boundDepalSmoothed_ = false;
@ -234,6 +236,7 @@ private:
VkSampler sampler_;
VkBuffer base_, light_, bone_; // All three UBO slots will be set to this. This will usually be identical
// for all draws in a frame, except when the buffer has to grow.
bool secondaryIsInputAttachment;
};
// We alternate between these.
@ -281,7 +284,7 @@ private:
VulkanDynamicState dynState_{};
int tessOffset_ = 0;
bool fboTexNeedsBind_ = false;
FBOTexState fboTexBindState_ = FBO_TEX_NONE;
// Hardware tessellation
TessellationDataTransferVulkan *tessDataTransferVulkan;

View File

@ -33,7 +33,7 @@ class VulkanPushBuffer;
class FramebufferManagerVulkan : public FramebufferManagerCommon {
public:
FramebufferManagerVulkan(Draw::DrawContext *draw);
explicit FramebufferManagerVulkan(Draw::DrawContext *draw);
~FramebufferManagerVulkan();
// If within a render pass, this will just issue a regular clear. If beginning a new render pass,

View File

@ -52,7 +52,7 @@
GPU_Vulkan::GPU_Vulkan(GraphicsContext *gfxCtx, Draw::DrawContext *draw)
: GPUCommon(gfxCtx, draw), drawEngine_(draw) {
CheckGPUFeatures();
gstate_c.featureFlags = CheckGPUFeatures();
VulkanContext *vulkan = (VulkanContext *)gfxCtx->GetAPIContext();
@ -182,8 +182,8 @@ GPU_Vulkan::~GPU_Vulkan() {
delete framebufferManagerVulkan_;
}
void GPU_Vulkan::CheckGPUFeatures() {
uint32_t features = 0;
u32 GPU_Vulkan::CheckGPUFeatures() const {
uint32_t features = GPUCommon::CheckGPUFeatures();
VulkanContext *vulkan = (VulkanContext *)draw_->GetNativeObject(Draw::NativeObject::CONTEXT);
switch (vulkan->GetPhysicalDeviceProperties().properties.vendorID) {
@ -222,44 +222,14 @@ void GPU_Vulkan::CheckGPUFeatures() {
// Mandatory features on Vulkan, which may be checked in "centralized" code
features |= GPU_SUPPORTS_TEXTURE_LOD_CONTROL;
features |= GPU_SUPPORTS_BLEND_MINMAX;
features |= GPU_SUPPORTS_TEXTURE_NPOT;
features |= GPU_SUPPORTS_INSTANCE_RENDERING;
features |= GPU_SUPPORTS_VERTEX_TEXTURE_FETCH;
features |= GPU_SUPPORTS_TEXTURE_FLOAT;
features |= GPU_SUPPORTS_DEPTH_TEXTURE;
auto &enabledFeatures = vulkan->GetDeviceFeatures().enabled;
if (enabledFeatures.depthClamp) {
features |= GPU_SUPPORTS_DEPTH_CLAMP;
}
if (enabledFeatures.shaderClipDistance) {
features |= GPU_SUPPORTS_CLIP_DISTANCE;
}
if (enabledFeatures.shaderCullDistance) {
// Must support at least 8 if feature supported, so we're fine.
features |= GPU_SUPPORTS_CULL_DISTANCE;
}
if (!draw_->GetBugs().Has(Draw::Bugs::BROKEN_NAN_IN_CONDITIONAL)) {
// Ignore the compat setting if clip and cull are both enabled.
// When supported, we can do the depth side of range culling more correctly.
const bool supported = draw_->GetDeviceCaps().clipDistanceSupported && draw_->GetDeviceCaps().cullDistanceSupported;
const bool disabled = PSP_CoreParameter().compat.flags().DisableRangeCulling;
if (supported || !disabled) {
features |= GPU_SUPPORTS_VS_RANGE_CULLING;
}
}
if (enabledFeatures.dualSrcBlend) {
if (!g_Config.bVendorBugChecksEnabled || !draw_->GetBugs().Has(Draw::Bugs::DUAL_SOURCE_BLENDING_BROKEN)) {
features |= GPU_SUPPORTS_DUALSOURCE_BLEND;
}
}
if (draw_->GetDeviceCaps().logicOpSupported) {
features |= GPU_SUPPORTS_LOGIC_OP;
}
if (draw_->GetDeviceCaps().anisoSupported) {
features |= GPU_SUPPORTS_ANISOTROPY;
}
// These are VULKAN_4444_FORMAT and friends.
uint32_t fmt4444 = draw_->GetDataFormatSupport(Draw::DataFormat::B4G4R4A4_UNORM_PACK16);
@ -275,10 +245,6 @@ void GPU_Vulkan::CheckGPUFeatures() {
INFO_LOG(G3D, "Deficient texture format support: 4444: %d 1555: %d 565: %d", fmt4444, fmt1555, fmt565);
}
if (PSP_CoreParameter().compat.flags().ClearToRAM) {
features |= GPU_USE_CLEAR_RAM_HACK;
}
if (!g_Config.bHighQualityDepth && (features & GPU_SUPPORTS_ACCURATE_DEPTH) != 0) {
features |= GPU_SCALE_DEPTH_FROM_24BIT_TO_16BIT;
}
@ -290,7 +256,7 @@ void GPU_Vulkan::CheckGPUFeatures() {
features |= GPU_ROUND_DEPTH_TO_16BIT;
}
gstate_c.featureFlags = features;
return features;
}
void GPU_Vulkan::BeginHostFrame() {
@ -298,7 +264,7 @@ void GPU_Vulkan::BeginHostFrame() {
UpdateCmdInfo();
if (resized_) {
CheckGPUFeatures();
gstate_c.featureFlags = CheckGPUFeatures();
// In case the GPU changed.
BuildReportingInfo();
framebufferManager_->Resized();
@ -537,7 +503,7 @@ void GPU_Vulkan::DeviceRestore() {
GPUCommon::DeviceRestore();
InitDeviceObjects();
CheckGPUFeatures();
gstate_c.featureFlags = CheckGPUFeatures();
BuildReportingInfo();
UpdateCmdInfo();

View File

@ -38,7 +38,7 @@ public:
~GPU_Vulkan();
// This gets called on startup and when we get back from settings.
void CheckGPUFeatures() override;
u32 CheckGPUFeatures() const override;
bool IsReady() override;
void CancelReady() override;

View File

@ -170,7 +170,7 @@ static std::string CutFromMain(std::string str) {
}
static VulkanPipeline *CreateVulkanPipeline(VulkanRenderManager *renderManager, VkPipelineCache pipelineCache,
VkPipelineLayout layout, const VulkanPipelineRasterStateKey &key,
VkPipelineLayout layout, PipelineFlags pipelineFlags, const VulkanPipelineRasterStateKey &key,
const DecVtxFormat *decFmt, VulkanVertexShader *vs, VulkanFragmentShader *fs, bool useHwTransform, u32 variantBitmask) {
VulkanPipeline *vulkanPipeline = new VulkanPipeline();
VKRGraphicsPipelineDesc *desc = &vulkanPipeline->desc;
@ -299,14 +299,14 @@ static VulkanPipeline *CreateVulkanPipeline(VulkanRenderManager *renderManager,
VKRGraphicsPipeline *pipeline = renderManager->CreateGraphicsPipeline(desc, variantBitmask, "game");
vulkanPipeline->pipeline = pipeline;
vulkanPipeline->flags = 0;
if (useBlendConstant)
vulkanPipeline->flags |= PIPELINE_FLAG_USES_BLEND_CONSTANT;
pipelineFlags |= PipelineFlags::USES_BLEND_CONSTANT;
if (key.topology == VK_PRIMITIVE_TOPOLOGY_LINE_LIST || key.topology == VK_PRIMITIVE_TOPOLOGY_LINE_STRIP)
vulkanPipeline->flags |= PIPELINE_FLAG_USES_LINES;
pipelineFlags |= PipelineFlags::USES_LINES;
if (dss.depthTestEnable || dss.stencilTestEnable) {
vulkanPipeline->flags |= PIPELINE_FLAG_USES_DEPTH_STENCIL;
pipelineFlags |= PipelineFlags::USES_DEPTH_STENCIL;
}
vulkanPipeline->pipelineFlags = pipelineFlags;
return vulkanPipeline;
}
@ -329,8 +329,13 @@ VulkanPipeline *PipelineManagerVulkan::GetOrCreatePipeline(VulkanRenderManager *
if (iter)
return iter;
PipelineFlags pipelineFlags = (PipelineFlags)0;
if (fs->Flags() & FragmentShaderFlags::INPUT_ATTACHMENT) {
pipelineFlags |= PipelineFlags::USES_INPUT_ATTACHMENT;
}
VulkanPipeline *pipeline = CreateVulkanPipeline(
renderManager, pipelineCache_, layout,
renderManager, pipelineCache_, layout, pipelineFlags,
rasterKey, decFmt, vs, fs, useHwTransform, variantBitmask);
pipelines_.Insert(key, pipeline);

View File

@ -55,11 +55,12 @@ struct VulkanPipelineKey {
struct VulkanPipeline {
VKRGraphicsPipeline *pipeline;
VKRGraphicsPipelineDesc desc;
int flags; // PipelineFlags enum above.
PipelineFlags pipelineFlags; // PipelineFlags enum above.
bool UsesBlendConstant() const { return (flags & PIPELINE_FLAG_USES_BLEND_CONSTANT) != 0; }
bool UsesLines() const { return (flags & PIPELINE_FLAG_USES_LINES) != 0; }
bool UsesDepthStencil() const { return (flags & PIPELINE_FLAG_USES_DEPTH_STENCIL) != 0; }
bool UsesBlendConstant() const { return (pipelineFlags & PipelineFlags::USES_BLEND_CONSTANT) != 0; }
bool UsesLines() const { return (pipelineFlags & PipelineFlags::USES_LINES) != 0; }
bool UsesDepthStencil() const { return (pipelineFlags & PipelineFlags::USES_DEPTH_STENCIL) != 0; }
bool UsesInputAttachment() const { return (pipelineFlags & PipelineFlags::USES_INPUT_ATTACHMENT) != 0; }
u32 GetVariantsBitmask() const;
};

View File

@ -153,7 +153,7 @@ void DrawEngineVulkan::ConvertStateToVulkanKey(FramebufferManagerVulkan &fbManag
GenericLogicState &logicState = pipelineState_.logicState;
if (pipelineState_.FramebufferRead()) {
ApplyFramebufferRead(&fboTexNeedsBind_);
ApplyFramebufferRead(&fboTexBindState_);
// The shader takes over the responsibility for blending, so recompute.
// We might still end up using blend to write something to alpha.
ApplyStencilReplaceAndLogicOpIgnoreBlend(blendState.replaceAlphaWithStencil, blendState);
@ -364,15 +364,23 @@ void DrawEngineVulkan::BindShaderBlendTex() {
// TODO: At this point, we know if the vertices are full alpha or not.
// Set the nearest/linear here (since we correctly know if alpha/color tests are needed)?
if (!gstate.isModeClear()) {
if (fboTexNeedsBind_) {
if (fboTexBindState_ == FBO_TEX_COPY_BIND_TEX) {
bool bindResult = framebufferManager_->BindFramebufferAsColorTexture(1, framebufferManager_->GetCurrentRenderVFB(), BINDFBCOLOR_MAY_COPY);
_dbg_assert_(bindResult);
boundSecondary_ = (VkImageView)draw_->GetNativeObject(Draw::NativeObject::BOUND_TEXTURE1_IMAGEVIEW);
boundSecondaryIsInputAttachment_ = false;
fboTexBound_ = true;
fboTexNeedsBind_ = false;
fboTexBindState_ = FBO_TEX_NONE;
// Must dirty blend state here so we re-copy next time. Example: Lunar's spell effects.
dirtyRequiresRecheck_ |= DIRTY_BLEND_STATE;
} else if (fboTexBindState_ == FBO_TEX_READ_FRAMEBUFFER) {
draw_->BindCurrentFramebufferForColorInput();
boundSecondary_ = (VkImageView)draw_->GetNativeObject(Draw::NativeObject::BOUND_FRAMEBUFFER_COLOR_IMAGEVIEW);
boundSecondaryIsInputAttachment_ = true;
fboTexBindState_ = FBO_TEX_NONE;
} else {
boundSecondary_ = VK_NULL_HANDLE;
}
}
}

View File

@ -116,7 +116,7 @@ bool SDLVulkanGraphicsContext::Init(SDL_Window *&window, int x, int y, int mode,
return false;
}
draw_ = Draw::T3DCreateVulkanContext(vulkan_, false);
draw_ = Draw::T3DCreateVulkanContext(vulkan_);
SetGPUBackend(GPUBackend::VULKAN);
bool success = draw_->CreatePresets();
_assert_(success);

View File

@ -56,6 +56,8 @@ public:
parent->Add(scroll);
}
const char *tag() const override { return "ButtonShape"; }
private:
int *setting_;
};
@ -84,6 +86,8 @@ public:
parent->Add(scroll);
}
const char *tag() const override { return "ButtonIcon"; }
private:
int *setting_;
};

View File

@ -28,6 +28,8 @@ class ComboKeyScreen : public UIDialogScreenWithBackground {
public:
ComboKeyScreen(int id): id_(id) {}
const char *tag() const override { return "ComboKey"; }
void CreateViews() override;
void onFinish(DialogResult result) override;

View File

@ -313,7 +313,7 @@ UI::EventReturn ControlMappingScreen::OnVisualizeMapping(UI::EventParams &params
}
void ControlMappingScreen::dialogFinished(const Screen *dialog, DialogResult result) {
if (result == DR_OK && dialog->tag() == "listpopup") {
if (result == DR_OK && std::string(dialog->tag()) == "listpopup") {
ListPopupScreen *popup = (ListPopupScreen *)dialog;
KeyMap::AutoConfForPad(popup->GetChoiceString());
}

View File

@ -35,7 +35,7 @@ class SingleControlMapper;
class ControlMappingScreen : public UIDialogScreenWithBackground {
public:
ControlMappingScreen() {}
std::string tag() const override { return "control mapping"; }
const char *tag() const override { return "ControlMapping"; }
protected:
void CreateViews() override;
@ -47,7 +47,7 @@ private:
UI::EventReturn OnAutoConfigure(UI::EventParams &params);
UI::EventReturn OnVisualizeMapping(UI::EventParams &params);
virtual void dialogFinished(const Screen *dialog, DialogResult result) override;
void dialogFinished(const Screen *dialog, DialogResult result) override;
UI::ScrollView *rightScroll_;
std::vector<SingleControlMapper *> mappers_;
@ -61,17 +61,19 @@ public:
pspBtn_ = btn;
}
virtual bool key(const KeyInput &key) override;
virtual bool axis(const AxisInput &axis) override;
const char *tag() const override { return "KeyMappingNewKey"; }
bool key(const KeyInput &key) override;
bool axis(const AxisInput &axis) override;
void SetDelay(float t);
protected:
void CreatePopupContents(UI::ViewGroup *parent) override;
virtual bool FillVertical() const override { return false; }
virtual bool ShowButtons() const override { return true; }
virtual void OnCompleted(DialogResult result) override {}
bool FillVertical() const override { return false; }
bool ShowButtons() const override { return true; }
void OnCompleted(DialogResult result) override {}
private:
int pspBtn_;
@ -87,6 +89,8 @@ public:
pspBtn_ = btn;
}
const char *tag() const override { return "KeyMappingNewMouseKey"; }
bool key(const KeyInput &key) override;
bool axis(const AxisInput &axis) override;
@ -114,6 +118,8 @@ public:
void update() override;
const char *tag() const override { return "AnalogSetup"; }
protected:
void CreateViews() override;
@ -144,6 +150,8 @@ public:
bool key(const KeyInput &key) override;
bool axis(const AxisInput &axis) override;
const char *tag() const override { return "TouchTest"; }
protected:
struct TrackedTouch {
int id;
@ -171,6 +179,8 @@ class VisualMappingScreen : public UIDialogScreenWithBackground {
public:
VisualMappingScreen() {}
const char *tag() const override { return "VisualMapping"; }
protected:
void CreateViews() override;

View File

@ -41,6 +41,8 @@ public:
void update() override;
void onFinish(DialogResult result) override;
const char *tag() const override { return "CwCheat"; }
protected:
void CreateViews() override;

View File

@ -85,7 +85,7 @@ static const char *logLevelList[] = {
"Verb."
};
void DevMenu::CreatePopupContents(UI::ViewGroup *parent) {
void DevMenuScreen::CreatePopupContents(UI::ViewGroup *parent) {
using namespace UI;
auto dev = GetI18NCategory("Developer");
auto sy = GetI18NCategory("System");
@ -94,25 +94,25 @@ void DevMenu::CreatePopupContents(UI::ViewGroup *parent) {
LinearLayout *items = new LinearLayout(ORIENT_VERTICAL);
#if !defined(MOBILE_DEVICE)
items->Add(new Choice(dev->T("Log View")))->OnClick.Handle(this, &DevMenu::OnLogView);
items->Add(new Choice(dev->T("Log View")))->OnClick.Handle(this, &DevMenuScreen::OnLogView);
#endif
items->Add(new Choice(dev->T("Logging Channels")))->OnClick.Handle(this, &DevMenu::OnLogConfig);
items->Add(new Choice(sy->T("Developer Tools")))->OnClick.Handle(this, &DevMenu::OnDeveloperTools);
items->Add(new Choice(dev->T("Jit Compare")))->OnClick.Handle(this, &DevMenu::OnJitCompare);
items->Add(new Choice(dev->T("Shader Viewer")))->OnClick.Handle(this, &DevMenu::OnShaderView);
items->Add(new Choice(dev->T("Logging Channels")))->OnClick.Handle(this, &DevMenuScreen::OnLogConfig);
items->Add(new Choice(sy->T("Developer Tools")))->OnClick.Handle(this, &DevMenuScreen::OnDeveloperTools);
items->Add(new Choice(dev->T("Jit Compare")))->OnClick.Handle(this, &DevMenuScreen::OnJitCompare);
items->Add(new Choice(dev->T("Shader Viewer")))->OnClick.Handle(this, &DevMenuScreen::OnShaderView);
if (g_Config.iGPUBackend == (int)GPUBackend::VULKAN) {
// TODO: Make a new allocator visualizer for VMA.
// items->Add(new CheckBox(&g_Config.bShowAllocatorDebug, dev->T("Allocator Viewer")));
items->Add(new CheckBox(&g_Config.bShowGpuProfile, dev->T("GPU Profile")));
}
items->Add(new Choice(dev->T("Toggle Freeze")))->OnClick.Handle(this, &DevMenu::OnFreezeFrame);
items->Add(new Choice(dev->T("Dump Frame GPU Commands")))->OnClick.Handle(this, &DevMenu::OnDumpFrame);
items->Add(new Choice(dev->T("Toggle Audio Debug")))->OnClick.Handle(this, &DevMenu::OnToggleAudioDebug);
items->Add(new Choice(dev->T("Toggle Freeze")))->OnClick.Handle(this, &DevMenuScreen::OnFreezeFrame);
items->Add(new Choice(dev->T("Dump Frame GPU Commands")))->OnClick.Handle(this, &DevMenuScreen::OnDumpFrame);
items->Add(new Choice(dev->T("Toggle Audio Debug")))->OnClick.Handle(this, &DevMenuScreen::OnToggleAudioDebug);
#ifdef USE_PROFILER
items->Add(new CheckBox(&g_Config.bShowFrameProfiler, dev->T("Frame Profiler"), ""));
#endif
items->Add(new CheckBox(&g_Config.bDrawFrameGraph, dev->T("Draw Frametimes Graph")));
items->Add(new Choice(dev->T("Reset limited logging")))->OnClick.Handle(this, &DevMenu::OnResetLimitedLogging);
items->Add(new Choice(dev->T("Reset limited logging")))->OnClick.Handle(this, &DevMenuScreen::OnResetLimitedLogging);
scroll->Add(items);
parent->Add(scroll);
@ -123,48 +123,48 @@ void DevMenu::CreatePopupContents(UI::ViewGroup *parent) {
}
}
UI::EventReturn DevMenu::OnToggleAudioDebug(UI::EventParams &e) {
UI::EventReturn DevMenuScreen::OnToggleAudioDebug(UI::EventParams &e) {
g_Config.bShowAudioDebug = !g_Config.bShowAudioDebug;
return UI::EVENT_DONE;
}
UI::EventReturn DevMenu::OnResetLimitedLogging(UI::EventParams &e) {
UI::EventReturn DevMenuScreen::OnResetLimitedLogging(UI::EventParams &e) {
Reporting::ResetCounts();
return UI::EVENT_DONE;
}
UI::EventReturn DevMenu::OnLogView(UI::EventParams &e) {
UI::EventReturn DevMenuScreen::OnLogView(UI::EventParams &e) {
UpdateUIState(UISTATE_PAUSEMENU);
screenManager()->push(new LogScreen());
return UI::EVENT_DONE;
}
UI::EventReturn DevMenu::OnLogConfig(UI::EventParams &e) {
UI::EventReturn DevMenuScreen::OnLogConfig(UI::EventParams &e) {
UpdateUIState(UISTATE_PAUSEMENU);
screenManager()->push(new LogConfigScreen());
return UI::EVENT_DONE;
}
UI::EventReturn DevMenu::OnDeveloperTools(UI::EventParams &e) {
UI::EventReturn DevMenuScreen::OnDeveloperTools(UI::EventParams &e) {
UpdateUIState(UISTATE_PAUSEMENU);
screenManager()->push(new DeveloperToolsScreen());
return UI::EVENT_DONE;
}
UI::EventReturn DevMenu::OnJitCompare(UI::EventParams &e) {
UI::EventReturn DevMenuScreen::OnJitCompare(UI::EventParams &e) {
UpdateUIState(UISTATE_PAUSEMENU);
screenManager()->push(new JitCompareScreen());
return UI::EVENT_DONE;
}
UI::EventReturn DevMenu::OnShaderView(UI::EventParams &e) {
UI::EventReturn DevMenuScreen::OnShaderView(UI::EventParams &e) {
UpdateUIState(UISTATE_PAUSEMENU);
if (gpu) // Avoid crashing if chosen while the game is being loaded.
screenManager()->push(new ShaderListScreen());
return UI::EVENT_DONE;
}
UI::EventReturn DevMenu::OnFreezeFrame(UI::EventParams &e) {
UI::EventReturn DevMenuScreen::OnFreezeFrame(UI::EventParams &e) {
if (PSP_CoreParameter().frozen) {
PSP_CoreParameter().frozen = false;
} else {
@ -173,12 +173,12 @@ UI::EventReturn DevMenu::OnFreezeFrame(UI::EventParams &e) {
return UI::EVENT_DONE;
}
UI::EventReturn DevMenu::OnDumpFrame(UI::EventParams &e) {
UI::EventReturn DevMenuScreen::OnDumpFrame(UI::EventParams &e) {
gpu->DumpNextFrame();
return UI::EVENT_DONE;
}
void DevMenu::dialogFinished(const Screen *dialog, DialogResult result) {
void DevMenuScreen::dialogFinished(const Screen *dialog, DialogResult result) {
UpdateUIState(UISTATE_INGAME);
// Close when a subscreen got closed.
// TODO: a bug in screenmanager causes this not to work here.
@ -514,7 +514,15 @@ void SystemInfoScreen::CreateViews() {
const std::string apiNameKey = draw->GetInfoString(InfoField::APINAME);
const char *apiName = gr->T(apiNameKey);
deviceSpecs->Add(new InfoItem(si->T("3D API"), apiName));
deviceSpecs->Add(new InfoItem(si->T("Vendor"), draw->GetInfoString(InfoField::VENDORSTRING)));
// TODO: Not really vendor, on most APIs it's a device name (GL calls it vendor though).
std::string vendorString;
if (draw->GetDeviceCaps().deviceID != 0) {
vendorString = StringFromFormat("%s (%08x)", draw->GetInfoString(InfoField::VENDORSTRING).c_str(), draw->GetDeviceCaps().deviceID);
} else {
vendorString = draw->GetInfoString(InfoField::VENDORSTRING);
}
deviceSpecs->Add(new InfoItem(si->T("Vendor"), vendorString));
std::string vendor = draw->GetInfoString(InfoField::VENDOR);
if (vendor.size())
deviceSpecs->Add(new InfoItem(si->T("Vendor (detected)"), vendor));

Some files were not shown because too many files have changed in this diff Show More