mirror of
https://github.com/hrydgard/ppsspp.git
synced 2024-11-23 13:30:02 +00:00
Merge branch 'master' into compat_openxr_gta
This commit is contained in:
commit
c4dbd6d045
@ -623,6 +623,8 @@ add_library(Common STATIC
|
||||
Common/GPU/Vulkan/VulkanRenderManager.h
|
||||
Common/GPU/Vulkan/VulkanQueueRunner.cpp
|
||||
Common/GPU/Vulkan/VulkanQueueRunner.h
|
||||
Common/GPU/Vulkan/VulkanFrameData.cpp
|
||||
Common/GPU/Vulkan/VulkanFrameData.h
|
||||
Common/Input/GestureDetector.cpp
|
||||
Common/Input/GestureDetector.h
|
||||
Common/Input/KeyCodes.h
|
||||
|
@ -441,6 +441,7 @@
|
||||
<ClInclude Include="GPU\Vulkan\VulkanBarrier.h" />
|
||||
<ClInclude Include="GPU\Vulkan\VulkanContext.h" />
|
||||
<ClInclude Include="GPU\Vulkan\VulkanDebug.h" />
|
||||
<ClInclude Include="GPU\Vulkan\VulkanFrameData.h" />
|
||||
<ClInclude Include="GPU\Vulkan\VulkanImage.h" />
|
||||
<ClInclude Include="GPU\Vulkan\VulkanLoader.h" />
|
||||
<ClInclude Include="GPU\Vulkan\VulkanMemory.h" />
|
||||
@ -861,6 +862,7 @@
|
||||
<ClCompile Include="GPU\Vulkan\VulkanBarrier.cpp" />
|
||||
<ClCompile Include="GPU\Vulkan\VulkanContext.cpp" />
|
||||
<ClCompile Include="GPU\Vulkan\VulkanDebug.cpp" />
|
||||
<ClCompile Include="GPU\Vulkan\VulkanFrameData.cpp" />
|
||||
<ClCompile Include="GPU\Vulkan\VulkanImage.cpp" />
|
||||
<ClCompile Include="GPU\Vulkan\VulkanLoader.cpp" />
|
||||
<ClCompile Include="GPU\Vulkan\VulkanMemory.cpp" />
|
||||
|
@ -419,6 +419,9 @@
|
||||
<Filter>GPU\Vulkan</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="RiscVEmitter.h" />
|
||||
<ClInclude Include="GPU\Vulkan\VulkanFrameData.h">
|
||||
<Filter>GPU\Vulkan</Filter>
|
||||
</ClInclude>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="ABI.cpp" />
|
||||
@ -791,6 +794,9 @@
|
||||
<Filter>GPU\Vulkan</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="RiscVEmitter.cpp" />
|
||||
<ClCompile Include="GPU\Vulkan\VulkanFrameData.cpp">
|
||||
<Filter>GPU\Vulkan</Filter>
|
||||
</ClCompile>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<Filter Include="Crypto">
|
||||
|
@ -270,6 +270,7 @@ D3D11DrawContext::D3D11DrawContext(ID3D11Device *device, ID3D11DeviceContext *de
|
||||
caps_.anisoSupported = true;
|
||||
caps_.textureNPOTFullySupported = true;
|
||||
caps_.fragmentShaderDepthWriteSupported = true;
|
||||
caps_.blendMinMaxSupported = true;
|
||||
|
||||
D3D11_FEATURE_DATA_D3D11_OPTIONS options{};
|
||||
HRESULT result = device_->CheckFeatureSupport(D3D11_FEATURE_D3D11_OPTIONS, &options, sizeof(options));
|
||||
|
@ -646,6 +646,63 @@ void D3D9Context::InvalidateCachedState() {
|
||||
curPipeline_ = nullptr;
|
||||
}
|
||||
|
||||
// TODO: Move this detection elsewhere when it's needed elsewhere, not before. It's ugly.
|
||||
// Source: https://envytools.readthedocs.io/en/latest/hw/pciid.html#gf100
|
||||
enum NVIDIAGeneration {
|
||||
NV_PRE_KEPLER,
|
||||
NV_KEPLER,
|
||||
NV_MAXWELL,
|
||||
NV_PASCAL,
|
||||
NV_VOLTA,
|
||||
NV_TURING, // or later
|
||||
};
|
||||
|
||||
static NVIDIAGeneration NVIDIAGetDeviceGeneration(int deviceID) {
|
||||
if (deviceID >= 0x1180 && deviceID <= 0x11bf)
|
||||
return NV_KEPLER; // GK104
|
||||
if (deviceID >= 0x11c0 && deviceID <= 0x11fa)
|
||||
return NV_KEPLER; // GK106
|
||||
if (deviceID >= 0x0fc0 && deviceID <= 0x0fff)
|
||||
return NV_KEPLER; // GK107
|
||||
if (deviceID >= 0x1003 && deviceID <= 0x1028)
|
||||
return NV_KEPLER; // GK110(B)
|
||||
if (deviceID >= 0x1280 && deviceID <= 0x12ba)
|
||||
return NV_KEPLER; // GK208
|
||||
if (deviceID >= 0x1381 && deviceID <= 0x13b0)
|
||||
return NV_MAXWELL; // GM107
|
||||
if (deviceID >= 0x1340 && deviceID <= 0x134d)
|
||||
return NV_MAXWELL; // GM108
|
||||
if (deviceID >= 0x13c0 && deviceID <= 0x13d9)
|
||||
return NV_MAXWELL; // GM204
|
||||
if (deviceID >= 0x1401 && deviceID <= 0x1427)
|
||||
return NV_MAXWELL; // GM206
|
||||
if (deviceID >= 0x15f7 && deviceID <= 0x15f9)
|
||||
return NV_PASCAL; // GP100
|
||||
if (deviceID >= 0x15f7 && deviceID <= 0x15f9)
|
||||
return NV_PASCAL; // GP100
|
||||
if (deviceID >= 0x1b00 && deviceID <= 0x1b38)
|
||||
return NV_PASCAL; // GP102
|
||||
if (deviceID >= 0x1b80 && deviceID <= 0x1be1)
|
||||
return NV_PASCAL; // GP104
|
||||
if (deviceID >= 0x1c02 && deviceID <= 0x1c62)
|
||||
return NV_PASCAL; // GP106
|
||||
if (deviceID >= 0x1c81 && deviceID <= 0x1c92)
|
||||
return NV_PASCAL; // GP107
|
||||
if (deviceID >= 0x1d01 && deviceID <= 0x1d12)
|
||||
return NV_PASCAL; // GP108
|
||||
if (deviceID >= 0x1d81 && deviceID <= 0x1dba)
|
||||
return NV_VOLTA; // GV100
|
||||
if (deviceID >= 0x1e02 && deviceID <= 0x1e3c)
|
||||
return NV_TURING; // TU102
|
||||
if (deviceID >= 0x1e82 && deviceID <= 0x1ed0)
|
||||
return NV_TURING; // TU104
|
||||
if (deviceID >= 0x1f02 && deviceID <= 0x1f51)
|
||||
return NV_TURING; // TU104
|
||||
if (deviceID >= 0x1e02)
|
||||
return NV_TURING; // More TU models or later, probably.
|
||||
return NV_PRE_KEPLER;
|
||||
}
|
||||
|
||||
#define FB_DIV 1
|
||||
#define FOURCC_INTZ ((D3DFORMAT)(MAKEFOURCC('I', 'N', 'T', 'Z')))
|
||||
|
||||
@ -665,14 +722,24 @@ D3D9Context::D3D9Context(IDirect3D9 *d3d, IDirect3D9Ex *d3dEx, int adapterId, ID
|
||||
caps_.vendor = GPUVendor::VENDOR_UNKNOWN;
|
||||
}
|
||||
|
||||
if (!FAILED(device->GetDeviceCaps(&d3dCaps_))) {
|
||||
D3DCAPS9 caps;
|
||||
ZeroMemory(&caps, sizeof(caps));
|
||||
HRESULT result = 0;
|
||||
if (deviceEx_) {
|
||||
result = deviceEx_->GetDeviceCaps(&caps);
|
||||
} else {
|
||||
result = device_->GetDeviceCaps(&caps);
|
||||
}
|
||||
|
||||
if (SUCCEEDED(result)) {
|
||||
sprintf(shadeLangVersion_, "PS: %04x VS: %04x", d3dCaps_.PixelShaderVersion & 0xFFFF, d3dCaps_.VertexShaderVersion & 0xFFFF);
|
||||
} else {
|
||||
WARN_LOG(G3D, "Direct3D9: Failed to get the device caps!");
|
||||
strcpy(shadeLangVersion_, "N/A");
|
||||
}
|
||||
|
||||
caps_.deviceID = identifier_.DeviceId;
|
||||
caps_.multiViewport = false;
|
||||
caps_.anisoSupported = true;
|
||||
caps_.depthRangeMinusOneToOne = false;
|
||||
caps_.preferredDepthBufferFormat = DataFormat::D24_S8;
|
||||
caps_.dualSourceBlend = false;
|
||||
@ -684,8 +751,30 @@ D3D9Context::D3D9Context(IDirect3D9 *d3d, IDirect3D9Ex *d3dEx, int adapterId, ID
|
||||
caps_.framebufferDepthCopySupported = false;
|
||||
caps_.framebufferSeparateDepthCopySupported = false;
|
||||
caps_.texture3DSupported = true;
|
||||
caps_.textureNPOTFullySupported = true;
|
||||
caps_.fragmentShaderDepthWriteSupported = true;
|
||||
caps_.blendMinMaxSupported = true;
|
||||
|
||||
if ((caps.RasterCaps & D3DPRASTERCAPS_ANISOTROPY) != 0 && caps.MaxAnisotropy > 1) {
|
||||
caps_.anisoSupported = true;
|
||||
}
|
||||
if ((caps.TextureCaps & (D3DPTEXTURECAPS_NONPOW2CONDITIONAL | D3DPTEXTURECAPS_POW2)) == 0) {
|
||||
caps_.textureNPOTFullySupported = true;
|
||||
}
|
||||
|
||||
// VS range culling (killing triangles in the vertex shader using NaN) causes problems on Intel.
|
||||
// Also causes problems on old NVIDIA.
|
||||
switch (caps_.vendor) {
|
||||
case Draw::GPUVendor::VENDOR_INTEL:
|
||||
bugs_.Infest(Bugs::BROKEN_NAN_IN_CONDITIONAL);
|
||||
break;
|
||||
case Draw::GPUVendor::VENDOR_NVIDIA:
|
||||
// Older NVIDIAs don't seem to like NaNs in their DX9 vertex shaders.
|
||||
// No idea if KEPLER is the right cutoff, but let's go with it.
|
||||
if (NVIDIAGetDeviceGeneration(caps_.deviceID) < NV_KEPLER) {
|
||||
bugs_.Infest(Bugs::BROKEN_NAN_IN_CONDITIONAL);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
if (d3d) {
|
||||
D3DDISPLAYMODE displayMode;
|
||||
|
@ -814,7 +814,7 @@ void GLQueueRunner::PerformRenderPass(const GLRStep &step, bool first, bool last
|
||||
int logicOp = -1;
|
||||
bool logicEnabled = false;
|
||||
#endif
|
||||
bool clipDistance0Enabled = false;
|
||||
bool clipDistanceEnabled[8]{};
|
||||
GLuint blendEqColor = (GLuint)-1;
|
||||
GLuint blendEqAlpha = (GLuint)-1;
|
||||
|
||||
@ -1123,14 +1123,18 @@ void GLQueueRunner::PerformRenderPass(const GLRStep &step, bool first, bool last
|
||||
{
|
||||
if (curProgram != c.program.program) {
|
||||
glUseProgram(c.program.program->program);
|
||||
if (c.program.program->use_clip_distance0 != clipDistance0Enabled) {
|
||||
if (c.program.program->use_clip_distance0)
|
||||
glEnable(GL_CLIP_DISTANCE0);
|
||||
else
|
||||
glDisable(GL_CLIP_DISTANCE0);
|
||||
clipDistance0Enabled = c.program.program->use_clip_distance0;
|
||||
}
|
||||
curProgram = c.program.program;
|
||||
|
||||
for (size_t i = 0; i < ARRAY_SIZE(clipDistanceEnabled); ++i) {
|
||||
if (c.program.program->use_clip_distance[i] == clipDistanceEnabled[i])
|
||||
continue;
|
||||
|
||||
if (c.program.program->use_clip_distance[i])
|
||||
glEnable(GL_CLIP_DISTANCE0 + (GLenum)i);
|
||||
else
|
||||
glDisable(GL_CLIP_DISTANCE0 + (GLenum)i);
|
||||
clipDistanceEnabled[i] = c.program.program->use_clip_distance[i];
|
||||
}
|
||||
}
|
||||
CHECK_GL_ERROR_IF_DEBUG();
|
||||
break;
|
||||
@ -1371,8 +1375,10 @@ void GLQueueRunner::PerformRenderPass(const GLRStep &step, bool first, bool last
|
||||
glDisable(GL_COLOR_LOGIC_OP);
|
||||
}
|
||||
#endif
|
||||
if (clipDistance0Enabled)
|
||||
glDisable(GL_CLIP_DISTANCE0);
|
||||
for (size_t i = 0; i < ARRAY_SIZE(clipDistanceEnabled); ++i) {
|
||||
if (clipDistanceEnabled[i])
|
||||
glDisable(GL_CLIP_DISTANCE0 + (GLenum)i);
|
||||
}
|
||||
if ((colorMask & 15) != 15)
|
||||
glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
|
||||
CHECK_GL_ERROR_IF_DEBUG();
|
||||
|
@ -579,7 +579,6 @@ void GLRenderManager::EndSubmitFrame(int frame) {
|
||||
void GLRenderManager::Run(int frame) {
|
||||
BeginSubmitFrame(frame);
|
||||
|
||||
|
||||
FrameData &frameData = frameData_[frame];
|
||||
|
||||
auto &stepsOnThread = frameData_[frame].steps;
|
||||
|
@ -91,6 +91,13 @@ public:
|
||||
std::string error;
|
||||
};
|
||||
|
||||
struct GLRProgramFlags {
|
||||
bool supportDualSource : 1;
|
||||
bool useClipDistance0 : 1;
|
||||
bool useClipDistance1 : 1;
|
||||
bool useClipDistance2 : 1;
|
||||
};
|
||||
|
||||
class GLRProgram {
|
||||
public:
|
||||
~GLRProgram() {
|
||||
@ -119,7 +126,7 @@ public:
|
||||
std::vector<Semantic> semantics_;
|
||||
std::vector<UniformLocQuery> queries_;
|
||||
std::vector<Initializer> initialize_;
|
||||
bool use_clip_distance0 = false;
|
||||
bool use_clip_distance[8]{};
|
||||
|
||||
struct UniformInfo {
|
||||
int loc_;
|
||||
@ -427,15 +434,17 @@ public:
|
||||
// not be an active render pass.
|
||||
GLRProgram *CreateProgram(
|
||||
std::vector<GLRShader *> shaders, std::vector<GLRProgram::Semantic> semantics, std::vector<GLRProgram::UniformLocQuery> queries,
|
||||
std::vector<GLRProgram::Initializer> initializers, bool supportDualSource, bool useClipDistance0) {
|
||||
std::vector<GLRProgram::Initializer> initializers, const GLRProgramFlags &flags) {
|
||||
GLRInitStep step{ GLRInitStepType::CREATE_PROGRAM };
|
||||
_assert_(shaders.size() <= ARRAY_SIZE(step.create_program.shaders));
|
||||
step.create_program.program = new GLRProgram();
|
||||
step.create_program.program->semantics_ = semantics;
|
||||
step.create_program.program->queries_ = queries;
|
||||
step.create_program.program->initialize_ = initializers;
|
||||
step.create_program.program->use_clip_distance0 = useClipDistance0;
|
||||
step.create_program.support_dual_source = supportDualSource;
|
||||
step.create_program.program->use_clip_distance[0] = flags.useClipDistance0;
|
||||
step.create_program.program->use_clip_distance[1] = flags.useClipDistance1;
|
||||
step.create_program.program->use_clip_distance[2] = flags.useClipDistance2;
|
||||
step.create_program.support_dual_source = flags.supportDualSource;
|
||||
_assert_msg_(shaders.size() > 0, "Can't create a program with zero shaders");
|
||||
for (size_t i = 0; i < shaders.size(); i++) {
|
||||
step.create_program.shaders[i] = shaders[i];
|
||||
@ -1003,6 +1012,7 @@ private:
|
||||
bool readyForFence = true;
|
||||
bool readyForRun = false;
|
||||
bool readyForSubmit = false;
|
||||
|
||||
bool skipSwap = false;
|
||||
GLRRunType type = GLRRunType::END;
|
||||
|
||||
|
@ -552,6 +552,8 @@ OpenGLContext::OpenGLContext() {
|
||||
caps_.framebufferDepthBlitSupported = caps_.framebufferBlitSupported;
|
||||
caps_.framebufferStencilBlitSupported = caps_.framebufferBlitSupported;
|
||||
caps_.depthClampSupported = gl_extensions.ARB_depth_clamp;
|
||||
caps_.blendMinMaxSupported = gl_extensions.EXT_blend_minmax;
|
||||
|
||||
if (gl_extensions.IsGLES) {
|
||||
caps_.clipDistanceSupported = gl_extensions.EXT_clip_cull_distance || gl_extensions.APPLE_clip_distance;
|
||||
caps_.cullDistanceSupported = gl_extensions.EXT_clip_cull_distance;
|
||||
@ -711,8 +713,10 @@ OpenGLContext::OpenGLContext() {
|
||||
}
|
||||
}
|
||||
|
||||
if (gl_extensions.IsGLES) {
|
||||
// NOTE: We only support framebuffer fetch on ES3 due to past issues..
|
||||
if (gl_extensions.IsGLES && gl_extensions.GLES3) {
|
||||
caps_.framebufferFetchSupported = (gl_extensions.EXT_shader_framebuffer_fetch || gl_extensions.ARM_shader_framebuffer_fetch);
|
||||
|
||||
if (gl_extensions.EXT_shader_framebuffer_fetch) {
|
||||
shaderLanguageDesc_.framebufferFetchExtension = "#extension GL_EXT_shader_framebuffer_fetch : require";
|
||||
shaderLanguageDesc_.lastFragData = gl_extensions.GLES3 ? "fragColor0" : "gl_LastFragData[0]";
|
||||
@ -1234,7 +1238,8 @@ bool OpenGLPipeline::LinkShaders() {
|
||||
}
|
||||
}
|
||||
|
||||
program_ = render_->CreateProgram(linkShaders, semantics, queries, initialize, false, false);
|
||||
GLRProgramFlags flags{};
|
||||
program_ = render_->CreateProgram(linkShaders, semantics, queries, initialize, flags);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -4,7 +4,7 @@
|
||||
|
||||
void VulkanBarrier::Flush(VkCommandBuffer cmd) {
|
||||
if (!imageBarriers_.empty()) {
|
||||
vkCmdPipelineBarrier(cmd, srcStageMask_, dstStageMask_, 0, 0, nullptr, 0, nullptr, (uint32_t)imageBarriers_.size(), imageBarriers_.data());
|
||||
vkCmdPipelineBarrier(cmd, srcStageMask_, dstStageMask_, dependencyFlags_, 0, nullptr, 0, nullptr, (uint32_t)imageBarriers_.size(), imageBarriers_.data());
|
||||
}
|
||||
imageBarriers_.clear();
|
||||
srcStageMask_ = 0;
|
||||
|
@ -21,6 +21,7 @@ public:
|
||||
) {
|
||||
srcStageMask_ |= srcStageMask;
|
||||
dstStageMask_ |= dstStageMask;
|
||||
dependencyFlags_ |= VK_DEPENDENCY_BY_REGION_BIT;
|
||||
|
||||
VkImageMemoryBarrier imageBarrier;
|
||||
imageBarrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
|
||||
@ -112,4 +113,5 @@ private:
|
||||
VkPipelineStageFlags srcStageMask_ = 0;
|
||||
VkPipelineStageFlags dstStageMask_ = 0;
|
||||
std::vector<VkImageMemoryBarrier> imageBarriers_;
|
||||
VkDependencyFlags dependencyFlags_ = 0;
|
||||
};
|
||||
|
@ -667,7 +667,10 @@ VkResult VulkanContext::CreateDevice() {
|
||||
extensionsLookup_.KHR_create_renderpass2 = true;
|
||||
extensionsLookup_.KHR_depth_stencil_resolve = EnableDeviceExtension(VK_KHR_DEPTH_STENCIL_RESOLVE_EXTENSION_NAME);
|
||||
}
|
||||
|
||||
extensionsLookup_.EXT_shader_stencil_export = EnableDeviceExtension(VK_EXT_SHADER_STENCIL_EXPORT_EXTENSION_NAME);
|
||||
extensionsLookup_.EXT_fragment_shader_interlock = EnableDeviceExtension(VK_EXT_FRAGMENT_SHADER_INTERLOCK_EXTENSION_NAME);
|
||||
extensionsLookup_.ARM_rasterization_order_attachment_access = EnableDeviceExtension(VK_ARM_RASTERIZATION_ORDER_ATTACHMENT_ACCESS_EXTENSION_NAME);
|
||||
|
||||
VkDeviceCreateInfo device_info{ VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO };
|
||||
device_info.queueCreateInfoCount = 1;
|
||||
|
@ -86,7 +86,6 @@ VKAPI_ATTR VkBool32 VKAPI_CALL VulkanDebugUtilsCallback(
|
||||
} else {
|
||||
WARN_LOG(G3D, "VKDEBUG: %s", msg.c_str());
|
||||
}
|
||||
|
||||
// false indicates that layer should not bail-out of an
|
||||
// API call that had validation failures. This may mean that the
|
||||
// app dies inside the driver due to invalid parameter(s).
|
||||
@ -94,3 +93,4 @@ VKAPI_ATTR VkBool32 VKAPI_CALL VulkanDebugUtilsCallback(
|
||||
// keep that behavior here.
|
||||
return false;
|
||||
}
|
||||
|
||||
|
207
Common/GPU/Vulkan/VulkanFrameData.cpp
Normal file
207
Common/GPU/Vulkan/VulkanFrameData.cpp
Normal file
@ -0,0 +1,207 @@
|
||||
#include "VulkanFrameData.h"
|
||||
#include "Common/Log.h"
|
||||
|
||||
void FrameData::Init(VulkanContext *vulkan, int index) {
|
||||
this->index = index;
|
||||
VkDevice device = vulkan->GetDevice();
|
||||
|
||||
VkCommandPoolCreateInfo cmd_pool_info = { VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO };
|
||||
cmd_pool_info.queueFamilyIndex = vulkan->GetGraphicsQueueFamilyIndex();
|
||||
cmd_pool_info.flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT;
|
||||
VkResult res = vkCreateCommandPool(device, &cmd_pool_info, nullptr, &cmdPoolInit);
|
||||
_dbg_assert_(res == VK_SUCCESS);
|
||||
res = vkCreateCommandPool(device, &cmd_pool_info, nullptr, &cmdPoolMain);
|
||||
_dbg_assert_(res == VK_SUCCESS);
|
||||
|
||||
VkCommandBufferAllocateInfo cmd_alloc = { VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO };
|
||||
cmd_alloc.commandPool = cmdPoolInit;
|
||||
cmd_alloc.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
|
||||
cmd_alloc.commandBufferCount = 1;
|
||||
res = vkAllocateCommandBuffers(device, &cmd_alloc, &initCmd);
|
||||
_dbg_assert_(res == VK_SUCCESS);
|
||||
cmd_alloc.commandPool = cmdPoolMain;
|
||||
res = vkAllocateCommandBuffers(device, &cmd_alloc, &mainCmd);
|
||||
res = vkAllocateCommandBuffers(device, &cmd_alloc, &presentCmd);
|
||||
_dbg_assert_(res == VK_SUCCESS);
|
||||
|
||||
// Creating the frame fence with true so they can be instantly waited on the first frame
|
||||
fence = vulkan->CreateFence(true);
|
||||
|
||||
// This fence one is used for synchronizing readbacks. Does not need preinitialization.
|
||||
readbackFence = vulkan->CreateFence(false);
|
||||
|
||||
VkQueryPoolCreateInfo query_ci{ VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO };
|
||||
query_ci.queryCount = MAX_TIMESTAMP_QUERIES;
|
||||
query_ci.queryType = VK_QUERY_TYPE_TIMESTAMP;
|
||||
res = vkCreateQueryPool(device, &query_ci, nullptr, &profile.queryPool);
|
||||
}
|
||||
|
||||
void FrameData::Destroy(VulkanContext *vulkan) {
|
||||
VkDevice device = vulkan->GetDevice();
|
||||
// TODO: I don't think free-ing command buffers is necessary before destroying a pool.
|
||||
vkFreeCommandBuffers(device, cmdPoolInit, 1, &initCmd);
|
||||
vkFreeCommandBuffers(device, cmdPoolMain, 1, &mainCmd);
|
||||
vkDestroyCommandPool(device, cmdPoolInit, nullptr);
|
||||
vkDestroyCommandPool(device, cmdPoolMain, nullptr);
|
||||
vkDestroyFence(device, fence, nullptr);
|
||||
vkDestroyFence(device, readbackFence, nullptr);
|
||||
vkDestroyQueryPool(device, profile.queryPool, nullptr);
|
||||
}
|
||||
|
||||
void FrameData::AcquireNextImage(VulkanContext *vulkan, FrameDataShared &shared) {
|
||||
_dbg_assert_(!hasAcquired);
|
||||
|
||||
// Get the index of the next available swapchain image, and a semaphore to block command buffer execution on.
|
||||
VkResult res = vkAcquireNextImageKHR(vulkan->GetDevice(), vulkan->GetSwapchain(), UINT64_MAX, shared.acquireSemaphore, (VkFence)VK_NULL_HANDLE, &curSwapchainImage);
|
||||
switch (res) {
|
||||
case VK_SUCCESS:
|
||||
hasAcquired = true;
|
||||
break;
|
||||
case VK_SUBOPTIMAL_KHR:
|
||||
hasAcquired = true;
|
||||
// Hopefully the resize will happen shortly. Ignore - one frame might look bad or something.
|
||||
WARN_LOG(G3D, "VK_SUBOPTIMAL_KHR returned - ignoring");
|
||||
break;
|
||||
case VK_ERROR_OUT_OF_DATE_KHR:
|
||||
// We do not set hasAcquired here!
|
||||
WARN_LOG(G3D, "VK_ERROR_OUT_OF_DATE_KHR returned from AcquireNextImage - processing the frame, but not presenting");
|
||||
skipSwap = true;
|
||||
break;
|
||||
default:
|
||||
// Weird, shouldn't get any other values. Maybe lost device?
|
||||
_assert_msg_(false, "vkAcquireNextImageKHR failed! result=%s", VulkanResultToString(res));
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
VkResult FrameData::QueuePresent(VulkanContext *vulkan, FrameDataShared &shared) {
|
||||
_dbg_assert_(hasAcquired);
|
||||
hasAcquired = false;
|
||||
_dbg_assert_(!skipSwap);
|
||||
|
||||
VkSwapchainKHR swapchain = vulkan->GetSwapchain();
|
||||
VkPresentInfoKHR present = { VK_STRUCTURE_TYPE_PRESENT_INFO_KHR };
|
||||
present.swapchainCount = 1;
|
||||
present.pSwapchains = &swapchain;
|
||||
present.pImageIndices = &curSwapchainImage;
|
||||
present.pWaitSemaphores = &shared.renderingCompleteSemaphore;
|
||||
present.waitSemaphoreCount = 1;
|
||||
|
||||
return vkQueuePresentKHR(vulkan->GetGraphicsQueue(), &present);
|
||||
}
|
||||
|
||||
VkCommandBuffer FrameData::GetInitCmd(VulkanContext *vulkan) {
|
||||
if (!hasInitCommands) {
|
||||
VkCommandBufferBeginInfo begin = {
|
||||
VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
|
||||
nullptr,
|
||||
VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT
|
||||
};
|
||||
vkResetCommandPool(vulkan->GetDevice(), cmdPoolInit, 0);
|
||||
VkResult res = vkBeginCommandBuffer(initCmd, &begin);
|
||||
if (res != VK_SUCCESS) {
|
||||
return VK_NULL_HANDLE;
|
||||
}
|
||||
hasInitCommands = true;
|
||||
}
|
||||
return initCmd;
|
||||
}
|
||||
|
||||
void FrameData::SubmitPending(VulkanContext *vulkan, FrameSubmitType type, FrameDataShared &sharedData) {
|
||||
VkCommandBuffer cmdBufs[2];
|
||||
int numCmdBufs = 0;
|
||||
|
||||
VkFence fenceToTrigger = VK_NULL_HANDLE;
|
||||
|
||||
if (hasInitCommands) {
|
||||
if (profilingEnabled_) {
|
||||
// Pre-allocated query ID 1 - end of init cmdbuf.
|
||||
vkCmdWriteTimestamp(initCmd, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, profile.queryPool, 1);
|
||||
}
|
||||
|
||||
VkResult res = vkEndCommandBuffer(initCmd);
|
||||
cmdBufs[numCmdBufs++] = initCmd;
|
||||
|
||||
_assert_msg_(res == VK_SUCCESS, "vkEndCommandBuffer failed (init)! result=%s", VulkanResultToString(res));
|
||||
hasInitCommands = false;
|
||||
}
|
||||
|
||||
if ((hasMainCommands || hasPresentCommands) && type == FrameSubmitType::Sync) {
|
||||
fenceToTrigger = readbackFence;
|
||||
}
|
||||
|
||||
if (hasMainCommands) {
|
||||
VkResult res = vkEndCommandBuffer(mainCmd);
|
||||
_assert_msg_(res == VK_SUCCESS, "vkEndCommandBuffer failed (main)! result=%s", VulkanResultToString(res));
|
||||
|
||||
cmdBufs[numCmdBufs++] = mainCmd;
|
||||
hasMainCommands = false;
|
||||
}
|
||||
|
||||
if (hasPresentCommands) {
|
||||
VkResult res = vkEndCommandBuffer(presentCmd);
|
||||
_assert_msg_(res == VK_SUCCESS, "vkEndCommandBuffer failed (present)! result=%s", VulkanResultToString(res));
|
||||
|
||||
cmdBufs[numCmdBufs++] = presentCmd;
|
||||
hasPresentCommands = false;
|
||||
|
||||
if (type == FrameSubmitType::Present) {
|
||||
fenceToTrigger = fence;
|
||||
}
|
||||
}
|
||||
|
||||
if (!numCmdBufs && fenceToTrigger == VK_NULL_HANDLE) {
|
||||
// Nothing to do.
|
||||
return;
|
||||
}
|
||||
|
||||
VkSubmitInfo submit_info{ VK_STRUCTURE_TYPE_SUBMIT_INFO };
|
||||
VkPipelineStageFlags waitStage[1]{ VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT };
|
||||
if (type == FrameSubmitType::Present && !skipSwap) {
|
||||
_dbg_assert_(hasAcquired);
|
||||
submit_info.waitSemaphoreCount = 1;
|
||||
submit_info.pWaitSemaphores = &sharedData.acquireSemaphore;
|
||||
submit_info.pWaitDstStageMask = waitStage;
|
||||
}
|
||||
submit_info.commandBufferCount = (uint32_t)numCmdBufs;
|
||||
submit_info.pCommandBuffers = cmdBufs;
|
||||
if (type == FrameSubmitType::Present && !skipSwap) {
|
||||
submit_info.signalSemaphoreCount = 1;
|
||||
submit_info.pSignalSemaphores = &sharedData.renderingCompleteSemaphore;
|
||||
}
|
||||
VkResult res = vkQueueSubmit(vulkan->GetGraphicsQueue(), 1, &submit_info, fenceToTrigger);
|
||||
if (res == VK_ERROR_DEVICE_LOST) {
|
||||
_assert_msg_(false, "Lost the Vulkan device in vkQueueSubmit! If this happens again, switch Graphics Backend away from Vulkan");
|
||||
} else {
|
||||
_assert_msg_(res == VK_SUCCESS, "vkQueueSubmit failed (main)! result=%s", VulkanResultToString(res));
|
||||
}
|
||||
|
||||
if (type == FrameSubmitType::Sync) {
|
||||
// Hard stall of the GPU, not ideal, but necessary so the CPU has the contents of the readback.
|
||||
vkWaitForFences(vulkan->GetDevice(), 1, &readbackFence, true, UINT64_MAX);
|
||||
vkResetFences(vulkan->GetDevice(), 1, &readbackFence);
|
||||
}
|
||||
|
||||
// When !triggerFence, we notify after syncing with Vulkan.
|
||||
if (type == FrameSubmitType::Present || type == FrameSubmitType::Sync) {
|
||||
VERBOSE_LOG(G3D, "PULL: Frame %d.readyForFence = true", index);
|
||||
std::unique_lock<std::mutex> lock(push_mutex);
|
||||
readyForFence = true; // misnomer in sync mode!
|
||||
push_condVar.notify_all();
|
||||
}
|
||||
}
|
||||
|
||||
void FrameDataShared::Init(VulkanContext *vulkan) {
|
||||
VkSemaphoreCreateInfo semaphoreCreateInfo = { VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO };
|
||||
semaphoreCreateInfo.flags = 0;
|
||||
VkResult res = vkCreateSemaphore(vulkan->GetDevice(), &semaphoreCreateInfo, nullptr, &acquireSemaphore);
|
||||
_dbg_assert_(res == VK_SUCCESS);
|
||||
res = vkCreateSemaphore(vulkan->GetDevice(), &semaphoreCreateInfo, nullptr, &renderingCompleteSemaphore);
|
||||
_dbg_assert_(res == VK_SUCCESS);
|
||||
}
|
||||
|
||||
void FrameDataShared::Destroy(VulkanContext *vulkan) {
|
||||
VkDevice device = vulkan->GetDevice();
|
||||
vkDestroySemaphore(device, acquireSemaphore, nullptr);
|
||||
vkDestroySemaphore(device, renderingCompleteSemaphore, nullptr);
|
||||
}
|
101
Common/GPU/Vulkan/VulkanFrameData.h
Normal file
101
Common/GPU/Vulkan/VulkanFrameData.h
Normal file
@ -0,0 +1,101 @@
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
#include <mutex>
|
||||
#include <condition_variable>
|
||||
|
||||
#include "Common/GPU/Vulkan/VulkanContext.h"
|
||||
|
||||
struct VKRStep;
|
||||
|
||||
enum {
|
||||
MAX_TIMESTAMP_QUERIES = 128,
|
||||
};
|
||||
|
||||
enum class VKRRunType {
|
||||
END,
|
||||
SYNC,
|
||||
};
|
||||
|
||||
struct QueueProfileContext {
|
||||
VkQueryPool queryPool;
|
||||
std::vector<std::string> timestampDescriptions;
|
||||
std::string profileSummary;
|
||||
double cpuStartTime;
|
||||
double cpuEndTime;
|
||||
};
|
||||
|
||||
struct FrameDataShared {
|
||||
// Permanent objects
|
||||
VkSemaphore acquireSemaphore = VK_NULL_HANDLE;
|
||||
VkSemaphore renderingCompleteSemaphore = VK_NULL_HANDLE;
|
||||
|
||||
void Init(VulkanContext *vulkan);
|
||||
void Destroy(VulkanContext *vulkan);
|
||||
};
|
||||
|
||||
enum class FrameSubmitType {
|
||||
Pending,
|
||||
Sync,
|
||||
Present,
|
||||
};
|
||||
|
||||
// Per-frame data, round-robin so we can overlap submission with execution of the previous frame.
|
||||
struct FrameData {
|
||||
std::mutex push_mutex;
|
||||
std::condition_variable push_condVar;
|
||||
|
||||
std::mutex pull_mutex;
|
||||
std::condition_variable pull_condVar;
|
||||
|
||||
bool readyForFence = true;
|
||||
bool readyForRun = false; // protected by pull_mutex
|
||||
bool skipSwap = false;
|
||||
|
||||
VkFence fence;
|
||||
VkFence readbackFence; // Strictly speaking we might only need one global of these.
|
||||
|
||||
// These are on different threads so need separate pools.
|
||||
VkCommandPool cmdPoolInit; // Written to from main thread
|
||||
VkCommandPool cmdPoolMain; // Written to from render thread, which also submits
|
||||
|
||||
VkCommandBuffer initCmd;
|
||||
VkCommandBuffer mainCmd;
|
||||
VkCommandBuffer presentCmd;
|
||||
|
||||
bool hasInitCommands = false;
|
||||
bool hasMainCommands = false;
|
||||
bool hasPresentCommands = false;
|
||||
|
||||
bool hasAcquired = false;
|
||||
|
||||
std::vector<VKRStep *> steps;
|
||||
|
||||
// Swapchain.
|
||||
uint32_t curSwapchainImage = -1;
|
||||
|
||||
// Profiling.
|
||||
QueueProfileContext profile;
|
||||
bool profilingEnabled_;
|
||||
|
||||
void Init(VulkanContext *vulkan, int index);
|
||||
void Destroy(VulkanContext *vulkan);
|
||||
|
||||
void AcquireNextImage(VulkanContext *vulkan, FrameDataShared &shared);
|
||||
VkResult QueuePresent(VulkanContext *vulkan, FrameDataShared &shared);
|
||||
VkCommandBuffer GetInitCmd(VulkanContext *vulkan);
|
||||
|
||||
// This will only submit if we are actually recording init commands.
|
||||
void SubmitPending(VulkanContext *vulkan, FrameSubmitType type, FrameDataShared &shared);
|
||||
|
||||
VKRRunType RunType() const {
|
||||
return runType_;
|
||||
}
|
||||
|
||||
VKRRunType runType_ = VKRRunType::END;
|
||||
|
||||
private:
|
||||
// Metadata for logging etc
|
||||
int index;
|
||||
};
|
@ -241,6 +241,8 @@ struct VulkanExtensions {
|
||||
bool KHR_depth_stencil_resolve;
|
||||
bool EXT_shader_stencil_export;
|
||||
bool EXT_swapchain_colorspace;
|
||||
bool ARM_rasterization_order_attachment_access;
|
||||
bool EXT_fragment_shader_interlock;
|
||||
// bool EXT_depth_range_unrestricted; // Allows depth outside [0.0, 1.0] in 32-bit float depth buffers.
|
||||
};
|
||||
|
||||
|
@ -34,8 +34,11 @@ RenderPassType MergeRPTypes(RenderPassType a, RenderPassType b) {
|
||||
if (a == b) {
|
||||
// Trivial merging case.
|
||||
return a;
|
||||
} else if (a == RP_TYPE_COLOR_DEPTH && b == RP_TYPE_COLOR_DEPTH_INPUT) {
|
||||
return RP_TYPE_COLOR_DEPTH_INPUT;
|
||||
} else if (a == RP_TYPE_COLOR_DEPTH_INPUT && b == RP_TYPE_COLOR_DEPTH) {
|
||||
return RP_TYPE_COLOR_DEPTH_INPUT;
|
||||
}
|
||||
// More cases to be added later.
|
||||
return a;
|
||||
}
|
||||
|
||||
@ -138,6 +141,171 @@ void VulkanQueueRunner::DestroyDeviceObjects() {
|
||||
renderPasses_.Clear();
|
||||
}
|
||||
|
||||
bool VulkanQueueRunner::CreateSwapchain(VkCommandBuffer cmdInit) {
|
||||
VkResult res = vkGetSwapchainImagesKHR(vulkan_->GetDevice(), vulkan_->GetSwapchain(), &swapchainImageCount_, nullptr);
|
||||
_dbg_assert_(res == VK_SUCCESS);
|
||||
|
||||
VkImage *swapchainImages = new VkImage[swapchainImageCount_];
|
||||
res = vkGetSwapchainImagesKHR(vulkan_->GetDevice(), vulkan_->GetSwapchain(), &swapchainImageCount_, swapchainImages);
|
||||
if (res != VK_SUCCESS) {
|
||||
ERROR_LOG(G3D, "vkGetSwapchainImagesKHR failed");
|
||||
delete[] swapchainImages;
|
||||
return false;
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < swapchainImageCount_; i++) {
|
||||
SwapchainImageData sc_buffer{};
|
||||
sc_buffer.image = swapchainImages[i];
|
||||
|
||||
VkImageViewCreateInfo color_image_view = { VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO };
|
||||
color_image_view.format = vulkan_->GetSwapchainFormat();
|
||||
color_image_view.components.r = VK_COMPONENT_SWIZZLE_IDENTITY;
|
||||
color_image_view.components.g = VK_COMPONENT_SWIZZLE_IDENTITY;
|
||||
color_image_view.components.b = VK_COMPONENT_SWIZZLE_IDENTITY;
|
||||
color_image_view.components.a = VK_COMPONENT_SWIZZLE_IDENTITY;
|
||||
color_image_view.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
|
||||
color_image_view.subresourceRange.baseMipLevel = 0;
|
||||
color_image_view.subresourceRange.levelCount = 1;
|
||||
color_image_view.subresourceRange.baseArrayLayer = 0;
|
||||
color_image_view.subresourceRange.layerCount = 1;
|
||||
color_image_view.viewType = VK_IMAGE_VIEW_TYPE_2D;
|
||||
color_image_view.flags = 0;
|
||||
color_image_view.image = sc_buffer.image;
|
||||
|
||||
// We leave the images as UNDEFINED, there's no need to pre-transition them as
|
||||
// the backbuffer renderpass starts out with them being auto-transitioned from UNDEFINED anyway.
|
||||
// Also, turns out it's illegal to transition un-acquired images, thanks Hans-Kristian. See #11417.
|
||||
|
||||
res = vkCreateImageView(vulkan_->GetDevice(), &color_image_view, nullptr, &sc_buffer.view);
|
||||
swapchainImages_.push_back(sc_buffer);
|
||||
_dbg_assert_(res == VK_SUCCESS);
|
||||
}
|
||||
delete[] swapchainImages;
|
||||
|
||||
// Must be before InitBackbufferRenderPass.
|
||||
if (InitDepthStencilBuffer(cmdInit)) {
|
||||
InitBackbufferFramebuffers(vulkan_->GetBackbufferWidth(), vulkan_->GetBackbufferHeight());
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
bool VulkanQueueRunner::InitBackbufferFramebuffers(int width, int height) {
|
||||
VkResult res;
|
||||
// We share the same depth buffer but have multiple color buffers, see the loop below.
|
||||
VkImageView attachments[2] = { VK_NULL_HANDLE, depth_.view };
|
||||
|
||||
VkFramebufferCreateInfo fb_info = { VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO };
|
||||
fb_info.renderPass = GetCompatibleRenderPass()->Get(vulkan_, RP_TYPE_BACKBUFFER);
|
||||
fb_info.attachmentCount = 2;
|
||||
fb_info.pAttachments = attachments;
|
||||
fb_info.width = width;
|
||||
fb_info.height = height;
|
||||
fb_info.layers = 1;
|
||||
|
||||
framebuffers_.resize(swapchainImageCount_);
|
||||
|
||||
for (uint32_t i = 0; i < swapchainImageCount_; i++) {
|
||||
attachments[0] = swapchainImages_[i].view;
|
||||
res = vkCreateFramebuffer(vulkan_->GetDevice(), &fb_info, nullptr, &framebuffers_[i]);
|
||||
_dbg_assert_(res == VK_SUCCESS);
|
||||
if (res != VK_SUCCESS) {
|
||||
framebuffers_.clear();
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool VulkanQueueRunner::InitDepthStencilBuffer(VkCommandBuffer cmd) {
|
||||
const VkFormat depth_format = vulkan_->GetDeviceInfo().preferredDepthStencilFormat;
|
||||
int aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT;
|
||||
VkImageCreateInfo image_info = { VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO };
|
||||
image_info.imageType = VK_IMAGE_TYPE_2D;
|
||||
image_info.format = depth_format;
|
||||
image_info.extent.width = vulkan_->GetBackbufferWidth();
|
||||
image_info.extent.height = vulkan_->GetBackbufferHeight();
|
||||
image_info.extent.depth = 1;
|
||||
image_info.mipLevels = 1;
|
||||
image_info.arrayLayers = 1;
|
||||
image_info.samples = VK_SAMPLE_COUNT_1_BIT;
|
||||
image_info.queueFamilyIndexCount = 0;
|
||||
image_info.pQueueFamilyIndices = nullptr;
|
||||
image_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
|
||||
image_info.usage = VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT;
|
||||
image_info.flags = 0;
|
||||
|
||||
depth_.format = depth_format;
|
||||
|
||||
VmaAllocationCreateInfo allocCreateInfo{};
|
||||
VmaAllocationInfo allocInfo{};
|
||||
|
||||
allocCreateInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY;
|
||||
|
||||
VkResult res = vmaCreateImage(vulkan_->Allocator(), &image_info, &allocCreateInfo, &depth_.image, &depth_.alloc, &allocInfo);
|
||||
_dbg_assert_(res == VK_SUCCESS);
|
||||
if (res != VK_SUCCESS)
|
||||
return false;
|
||||
|
||||
vulkan_->SetDebugName(depth_.image, VK_OBJECT_TYPE_IMAGE, "BackbufferDepth");
|
||||
|
||||
TransitionImageLayout2(cmd, depth_.image, 0, 1,
|
||||
aspectMask,
|
||||
VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
|
||||
VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT,
|
||||
VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT,
|
||||
0, VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT);
|
||||
|
||||
VkImageViewCreateInfo depth_view_info = { VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO };
|
||||
depth_view_info.image = depth_.image;
|
||||
depth_view_info.format = depth_format;
|
||||
depth_view_info.components.r = VK_COMPONENT_SWIZZLE_IDENTITY;
|
||||
depth_view_info.components.g = VK_COMPONENT_SWIZZLE_IDENTITY;
|
||||
depth_view_info.components.b = VK_COMPONENT_SWIZZLE_IDENTITY;
|
||||
depth_view_info.components.a = VK_COMPONENT_SWIZZLE_IDENTITY;
|
||||
depth_view_info.subresourceRange.aspectMask = aspectMask;
|
||||
depth_view_info.subresourceRange.baseMipLevel = 0;
|
||||
depth_view_info.subresourceRange.levelCount = 1;
|
||||
depth_view_info.subresourceRange.baseArrayLayer = 0;
|
||||
depth_view_info.subresourceRange.layerCount = 1;
|
||||
depth_view_info.viewType = VK_IMAGE_VIEW_TYPE_2D;
|
||||
depth_view_info.flags = 0;
|
||||
|
||||
VkDevice device = vulkan_->GetDevice();
|
||||
|
||||
res = vkCreateImageView(device, &depth_view_info, NULL, &depth_.view);
|
||||
_dbg_assert_(res == VK_SUCCESS);
|
||||
if (res != VK_SUCCESS)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
void VulkanQueueRunner::DestroyBackBuffers() {
|
||||
for (auto &image : swapchainImages_) {
|
||||
vulkan_->Delete().QueueDeleteImageView(image.view);
|
||||
}
|
||||
swapchainImages_.clear();
|
||||
|
||||
if (depth_.view) {
|
||||
vulkan_->Delete().QueueDeleteImageView(depth_.view);
|
||||
}
|
||||
if (depth_.image) {
|
||||
_dbg_assert_(depth_.alloc);
|
||||
vulkan_->Delete().QueueDeleteImageAllocation(depth_.image, depth_.alloc);
|
||||
}
|
||||
depth_ = {};
|
||||
for (uint32_t i = 0; i < framebuffers_.size(); i++) {
|
||||
_dbg_assert_(framebuffers_[i] != VK_NULL_HANDLE);
|
||||
vulkan_->Delete().QueueDeleteFramebuffer(framebuffers_[i]);
|
||||
}
|
||||
framebuffers_.clear();
|
||||
|
||||
INFO_LOG(G3D, "Backbuffers destroyed");
|
||||
}
|
||||
|
||||
static VkAttachmentLoadOp ConvertLoadAction(VKRRenderPassLoadAction action) {
|
||||
switch (action) {
|
||||
case VKRRenderPassLoadAction::CLEAR: return VK_ATTACHMENT_LOAD_OP_CLEAR;
|
||||
@ -155,7 +323,12 @@ static VkAttachmentStoreOp ConvertStoreAction(VKRRenderPassStoreAction action) {
|
||||
return VK_ATTACHMENT_STORE_OP_DONT_CARE; // avoid compiler warning
|
||||
}
|
||||
|
||||
// Self-dependency: https://github.com/gpuweb/gpuweb/issues/442#issuecomment-547604827
|
||||
// Also see https://www.khronos.org/registry/vulkan/specs/1.3-extensions/html/vkspec.html#synchronization-pipeline-barriers-subpass-self-dependencies
|
||||
|
||||
VkRenderPass CreateRP(VulkanContext *vulkan, const RPKey &key, RenderPassType rpType) {
|
||||
bool selfDependency = rpType == RP_TYPE_COLOR_DEPTH_INPUT;
|
||||
|
||||
VkAttachmentDescription attachments[2] = {};
|
||||
attachments[0].format = rpType == RP_TYPE_BACKBUFFER ? vulkan->GetSwapchainFormat() : VK_FORMAT_R8G8B8A8_UNORM;
|
||||
attachments[0].samples = VK_SAMPLE_COUNT_1_BIT;
|
||||
@ -179,7 +352,7 @@ VkRenderPass CreateRP(VulkanContext *vulkan, const RPKey &key, RenderPassType rp
|
||||
|
||||
VkAttachmentReference color_reference{};
|
||||
color_reference.attachment = 0;
|
||||
color_reference.layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
|
||||
color_reference.layout = selfDependency ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
|
||||
|
||||
VkAttachmentReference depth_reference{};
|
||||
depth_reference.attachment = 1;
|
||||
@ -188,8 +361,13 @@ VkRenderPass CreateRP(VulkanContext *vulkan, const RPKey &key, RenderPassType rp
|
||||
VkSubpassDescription subpass{};
|
||||
subpass.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS;
|
||||
subpass.flags = 0;
|
||||
subpass.inputAttachmentCount = 0;
|
||||
subpass.pInputAttachments = nullptr;
|
||||
if (selfDependency) {
|
||||
subpass.inputAttachmentCount = 1;
|
||||
subpass.pInputAttachments = &color_reference;
|
||||
} else {
|
||||
subpass.inputAttachmentCount = 0;
|
||||
subpass.pInputAttachments = nullptr;
|
||||
}
|
||||
subpass.colorAttachmentCount = 1;
|
||||
subpass.pColorAttachments = &color_reference;
|
||||
subpass.pResolveAttachments = nullptr;
|
||||
@ -198,22 +376,40 @@ VkRenderPass CreateRP(VulkanContext *vulkan, const RPKey &key, RenderPassType rp
|
||||
subpass.pPreserveAttachments = nullptr;
|
||||
|
||||
// Not sure if this is really necessary.
|
||||
VkSubpassDependency dep{};
|
||||
dep.srcSubpass = VK_SUBPASS_EXTERNAL;
|
||||
dep.dstSubpass = 0;
|
||||
dep.srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
|
||||
dep.dstStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
|
||||
dep.srcAccessMask = 0;
|
||||
dep.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
|
||||
VkSubpassDependency deps[2]{};
|
||||
size_t numDeps = 0;
|
||||
|
||||
VkRenderPassCreateInfo rp{ VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO };
|
||||
rp.attachmentCount = 2;
|
||||
rp.pAttachments = attachments;
|
||||
rp.subpassCount = 1;
|
||||
rp.pSubpasses = &subpass;
|
||||
|
||||
if (rpType == RP_TYPE_BACKBUFFER) {
|
||||
deps[numDeps].srcSubpass = VK_SUBPASS_EXTERNAL;
|
||||
deps[numDeps].dstSubpass = 0;
|
||||
deps[numDeps].srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
|
||||
deps[numDeps].dstStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
|
||||
deps[numDeps].srcAccessMask = 0;
|
||||
deps[numDeps].dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
|
||||
numDeps++;
|
||||
rp.dependencyCount = 1;
|
||||
rp.pDependencies = &dep;
|
||||
}
|
||||
|
||||
if (selfDependency) {
|
||||
deps[numDeps].dependencyFlags = VK_DEPENDENCY_BY_REGION_BIT;
|
||||
deps[numDeps].srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
|
||||
deps[numDeps].dstAccessMask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT;
|
||||
deps[numDeps].srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
|
||||
deps[numDeps].dstStageMask = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
|
||||
deps[numDeps].srcSubpass = 0;
|
||||
deps[numDeps].dstSubpass = 0;
|
||||
numDeps++;
|
||||
}
|
||||
|
||||
if (numDeps > 0) {
|
||||
rp.dependencyCount = (u32)numDeps;
|
||||
rp.pDependencies = deps;
|
||||
}
|
||||
|
||||
VkRenderPass pass;
|
||||
@ -246,6 +442,30 @@ VKRRenderPass *VulkanQueueRunner::GetRenderPass(const RPKey &key) {
|
||||
return pass;
|
||||
}
|
||||
|
||||
// Must match the subpass self-dependency declared above.
|
||||
void VulkanQueueRunner::SelfDependencyBarrier(VKRImage &img, VkImageAspectFlags aspect, VulkanBarrier *recordBarrier) {
|
||||
if (aspect & VK_IMAGE_ASPECT_COLOR_BIT) {
|
||||
VkAccessFlags srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
|
||||
VkAccessFlags dstAccessMask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT;
|
||||
VkPipelineStageFlags srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
|
||||
VkPipelineStageFlags dstStageMask = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
|
||||
recordBarrier->TransitionImage(
|
||||
img.image,
|
||||
0,
|
||||
1,
|
||||
aspect,
|
||||
VK_IMAGE_LAYOUT_GENERAL,
|
||||
VK_IMAGE_LAYOUT_GENERAL,
|
||||
srcAccessMask,
|
||||
dstAccessMask,
|
||||
srcStageMask,
|
||||
dstStageMask
|
||||
);
|
||||
} else {
|
||||
_assert_msg_(false, "Depth self-dependencies not yet supported");
|
||||
}
|
||||
}
|
||||
|
||||
void VulkanQueueRunner::PreprocessSteps(std::vector<VKRStep *> &steps) {
|
||||
// Optimizes renderpasses, then sequences them.
|
||||
// Planned optimizations:
|
||||
@ -321,23 +541,47 @@ void VulkanQueueRunner::PreprocessSteps(std::vector<VKRStep *> &steps) {
|
||||
}
|
||||
}
|
||||
|
||||
void VulkanQueueRunner::RunSteps(VkCommandBuffer cmd, std::vector<VKRStep *> &steps, QueueProfileContext *profile) {
|
||||
void VulkanQueueRunner::RunSteps(FrameData &frameData, FrameDataShared &frameDataShared) {
|
||||
QueueProfileContext *profile = frameData.profilingEnabled_ ? &frameData.profile : nullptr;
|
||||
|
||||
if (profile)
|
||||
profile->cpuStartTime = time_now_d();
|
||||
|
||||
bool emitLabels = vulkan_->Extensions().EXT_debug_utils;
|
||||
|
||||
for (size_t i = 0; i < steps.size(); i++) {
|
||||
const VKRStep &step = *steps[i];
|
||||
VkCommandBuffer cmd = frameData.hasPresentCommands ? frameData.presentCmd : frameData.mainCmd;
|
||||
|
||||
for (size_t i = 0; i < frameData.steps.size(); i++) {
|
||||
const VKRStep &step = *frameData.steps[i];
|
||||
|
||||
if (emitLabels) {
|
||||
VkDebugUtilsLabelEXT labelInfo{ VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT };
|
||||
labelInfo.pLabelName = step.tag;
|
||||
vkCmdBeginDebugUtilsLabelEXT(cmd, &labelInfo);
|
||||
vkCmdBeginDebugUtilsLabelEXT(frameData.mainCmd, &labelInfo);
|
||||
}
|
||||
|
||||
switch (step.stepType) {
|
||||
case VKRStepType::RENDER:
|
||||
if (!step.render.framebuffer) {
|
||||
frameData.SubmitPending(vulkan_, FrameSubmitType::Pending, frameDataShared);
|
||||
|
||||
// When stepping in the GE debugger, we can end up here multiple times in a "frame".
|
||||
// So only acquire once.
|
||||
if (!frameData.hasAcquired) {
|
||||
frameData.AcquireNextImage(vulkan_, frameDataShared);
|
||||
SetBackbuffer(framebuffers_[frameData.curSwapchainImage], swapchainImages_[frameData.curSwapchainImage].image);
|
||||
}
|
||||
|
||||
_dbg_assert_(!frameData.hasPresentCommands);
|
||||
// A RENDER step rendering to the backbuffer is normally the last step that happens in a frame,
|
||||
// unless taking a screenshot, in which case there might be a READBACK_IMAGE after it.
|
||||
// This is why we have to switch cmd to presentCmd, in this case.
|
||||
VkCommandBufferBeginInfo begin{ VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO };
|
||||
begin.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
|
||||
vkBeginCommandBuffer(frameData.presentCmd, &begin);
|
||||
frameData.hasPresentCommands = true;
|
||||
cmd = frameData.presentCmd;
|
||||
}
|
||||
PerformRenderPass(step, cmd);
|
||||
break;
|
||||
case VKRStepType::COPY:
|
||||
@ -368,10 +612,12 @@ void VulkanQueueRunner::RunSteps(VkCommandBuffer cmd, std::vector<VKRStep *> &st
|
||||
|
||||
// Deleting all in one go should be easier on the instruction cache than deleting
|
||||
// them as we go - and easier to debug because we can look backwards in the frame.
|
||||
for (size_t i = 0; i < steps.size(); i++) {
|
||||
delete steps[i];
|
||||
for (auto step : frameData.steps) {
|
||||
delete step;
|
||||
}
|
||||
|
||||
frameData.steps.clear();
|
||||
|
||||
if (profile)
|
||||
profile->cpuEndTime = time_now_d();
|
||||
}
|
||||
@ -628,6 +874,7 @@ std::string VulkanQueueRunner::StepToString(const VKRStep &step) const {
|
||||
switch (step.render.renderPassType) {
|
||||
case RP_TYPE_BACKBUFFER: renderCmd = "BACKBUF"; break;
|
||||
case RP_TYPE_COLOR_DEPTH: renderCmd = "RENDER"; break;
|
||||
case RP_TYPE_COLOR_DEPTH_INPUT: renderCmd = "RENDER_INPUT"; break;
|
||||
default: renderCmd = "N/A";
|
||||
}
|
||||
snprintf(buffer, sizeof(buffer), "%s %s (draws: %d, %dx%d/%dx%d, fb: %p, )", renderCmd, step.tag, step.render.numDraws, actual_w, actual_h, w, h, step.render.framebuffer);
|
||||
@ -817,6 +1064,9 @@ void VulkanQueueRunner::LogRenderPass(const VKRStep &pass, bool verbose) {
|
||||
case VKRRenderCommand::REMOVED:
|
||||
INFO_LOG(G3D, " (Removed)");
|
||||
break;
|
||||
case VKRRenderCommand::SELF_DEPENDENCY_BARRIER:
|
||||
INFO_LOG(G3D, " SelfBarrier()");
|
||||
break;
|
||||
case VKRRenderCommand::BIND_GRAPHICS_PIPELINE:
|
||||
INFO_LOG(G3D, " BindGraphicsPipeline(%x)", (int)(intptr_t)cmd.graphics_pipeline.pipeline);
|
||||
break;
|
||||
@ -1070,7 +1320,6 @@ void VulkanQueueRunner::PerformRenderPass(const VKRStep &step, VkCommandBuffer c
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Don't execute empty renderpasses that keep the contents.
|
||||
if (step.commands.empty() && step.render.colorLoad == VKRRenderPassLoadAction::KEEP && step.render.depthLoad == VKRRenderPassLoadAction::KEEP && step.render.stencilLoad == VKRRenderPassLoadAction::KEEP) {
|
||||
// Flush the pending barrier
|
||||
@ -1120,6 +1369,7 @@ void VulkanQueueRunner::PerformRenderPass(const VKRStep &step, VkCommandBuffer c
|
||||
|
||||
// This reads the layout of the color and depth images, and chooses a render pass using them that
|
||||
// will transition to the desired final layout.
|
||||
//
|
||||
// NOTE: Flushes recordBarrier_.
|
||||
VKRRenderPass *renderPass = PerformBindFramebufferAsRenderTarget(step, cmd);
|
||||
|
||||
@ -1235,6 +1485,15 @@ void VulkanQueueRunner::PerformRenderPass(const VKRStep &step, VkCommandBuffer c
|
||||
break;
|
||||
}
|
||||
|
||||
case VKRRenderCommand::SELF_DEPENDENCY_BARRIER:
|
||||
{
|
||||
_assert_(step.render.pipelineFlags & PipelineFlags::USES_INPUT_ATTACHMENT);
|
||||
VulkanBarrier barrier;
|
||||
SelfDependencyBarrier(step.render.framebuffer->color, VK_IMAGE_ASPECT_COLOR_BIT, &barrier);
|
||||
barrier.Flush(cmd);
|
||||
break;
|
||||
}
|
||||
|
||||
case VKRRenderCommand::PUSH_CONSTANTS:
|
||||
vkCmdPushConstants(cmd, pipelineLayout, c.push.stages, c.push.offset, c.push.size, c.push.data);
|
||||
break;
|
||||
|
@ -8,6 +8,7 @@
|
||||
#include "Common/Data/Collections/Hashmaps.h"
|
||||
#include "Common/GPU/Vulkan/VulkanContext.h"
|
||||
#include "Common/GPU/Vulkan/VulkanBarrier.h"
|
||||
#include "Common/GPU/Vulkan/VulkanFrameData.h"
|
||||
#include "Common/Data/Convert/SmallDataConvert.h"
|
||||
#include "Common/Data/Collections/TinySet.h"
|
||||
#include "Common/GPU/DataFormat.h"
|
||||
@ -16,11 +17,11 @@ class VKRFramebuffer;
|
||||
struct VKRGraphicsPipeline;
|
||||
struct VKRComputePipeline;
|
||||
struct VKRImage;
|
||||
struct FrameData;
|
||||
|
||||
enum {
|
||||
QUEUE_HACK_MGS2_ACID = 1,
|
||||
QUEUE_HACK_SONIC = 2,
|
||||
// Killzone PR = 4.
|
||||
QUEUE_HACK_RENDERPASS_MERGE = 8,
|
||||
};
|
||||
|
||||
@ -36,20 +37,24 @@ enum class VKRRenderCommand : uint8_t {
|
||||
DRAW,
|
||||
DRAW_INDEXED,
|
||||
PUSH_CONSTANTS,
|
||||
SELF_DEPENDENCY_BARRIER,
|
||||
NUM_RENDER_COMMANDS,
|
||||
};
|
||||
|
||||
enum PipelineFlags {
|
||||
PIPELINE_FLAG_NONE = 0,
|
||||
PIPELINE_FLAG_USES_LINES = (1 << 2),
|
||||
PIPELINE_FLAG_USES_BLEND_CONSTANT = (1 << 3),
|
||||
PIPELINE_FLAG_USES_DEPTH_STENCIL = (1 << 4), // Reads or writes the depth buffer.
|
||||
enum class PipelineFlags {
|
||||
NONE = 0,
|
||||
USES_LINES = (1 << 2),
|
||||
USES_BLEND_CONSTANT = (1 << 3),
|
||||
USES_DEPTH_STENCIL = (1 << 4), // Reads or writes the depth buffer.
|
||||
USES_INPUT_ATTACHMENT = (1 << 5),
|
||||
};
|
||||
ENUM_CLASS_BITOPS(PipelineFlags);
|
||||
|
||||
// Pipelines need to be created for the right type of render pass.
|
||||
enum RenderPassType {
|
||||
RP_TYPE_BACKBUFFER,
|
||||
RP_TYPE_COLOR_DEPTH,
|
||||
RP_TYPE_COLOR_DEPTH_INPUT,
|
||||
// Later will add pure-color render passes.
|
||||
RP_TYPE_COUNT,
|
||||
};
|
||||
@ -146,14 +151,6 @@ struct TransitionRequest {
|
||||
VkImageLayout targetLayout;
|
||||
};
|
||||
|
||||
struct QueueProfileContext {
|
||||
VkQueryPool queryPool;
|
||||
std::vector<std::string> timestampDescriptions;
|
||||
std::string profileSummary;
|
||||
double cpuStartTime;
|
||||
double cpuEndTime;
|
||||
};
|
||||
|
||||
class VKRRenderPass;
|
||||
|
||||
struct VKRStep {
|
||||
@ -168,7 +165,6 @@ struct VKRStep {
|
||||
union {
|
||||
struct {
|
||||
VKRFramebuffer *framebuffer;
|
||||
// TODO: Look these up through renderPass?
|
||||
VKRRenderPassLoadAction colorLoad;
|
||||
VKRRenderPassLoadAction depthLoad;
|
||||
VKRRenderPassLoadAction stencilLoad;
|
||||
@ -183,7 +179,7 @@ struct VKRStep {
|
||||
int numReads;
|
||||
VkImageLayout finalColorLayout;
|
||||
VkImageLayout finalDepthStencilLayout;
|
||||
u32 pipelineFlags;
|
||||
PipelineFlags pipelineFlags; // contains the self dependency flag, in the form of USES_INPUT_ATTACHMENT
|
||||
VkRect2D renderArea;
|
||||
// Render pass type. Deduced after finishing recording the pass, from the used pipelines.
|
||||
// NOTE: Storing the render pass here doesn't do much good, we change the compatible parameters (load/store ops) during step optimization.
|
||||
@ -255,7 +251,7 @@ public:
|
||||
}
|
||||
|
||||
void PreprocessSteps(std::vector<VKRStep *> &steps);
|
||||
void RunSteps(VkCommandBuffer cmd, std::vector<VKRStep *> &steps, QueueProfileContext *profile);
|
||||
void RunSteps(FrameData &frameData, FrameDataShared &frameDataShared);
|
||||
void LogSteps(const std::vector<VKRStep *> &steps, bool verbose);
|
||||
|
||||
std::string StepToString(const VKRStep &step) const;
|
||||
@ -263,6 +259,14 @@ public:
|
||||
void CreateDeviceObjects();
|
||||
void DestroyDeviceObjects();
|
||||
|
||||
// Swapchain
|
||||
void DestroyBackBuffers();
|
||||
bool CreateSwapchain(VkCommandBuffer cmdInit);
|
||||
|
||||
bool HasBackbuffers() const {
|
||||
return !framebuffers_.empty();
|
||||
}
|
||||
|
||||
// Get a render pass that's compatible with all our framebuffers.
|
||||
// Note that it's precached, cannot look up in the map as this might be on another thread.
|
||||
VKRRenderPass *GetCompatibleRenderPass() const {
|
||||
@ -302,6 +306,9 @@ public:
|
||||
}
|
||||
|
||||
private:
|
||||
bool InitBackbufferFramebuffers(int width, int height);
|
||||
bool InitDepthStencilBuffer(VkCommandBuffer cmd); // Used for non-buffered rendering.
|
||||
|
||||
VKRRenderPass *PerformBindFramebufferAsRenderTarget(const VKRStep &pass, VkCommandBuffer cmd);
|
||||
void PerformRenderPass(const VKRStep &pass, VkCommandBuffer cmd);
|
||||
void PerformCopy(const VKRStep &pass, VkCommandBuffer cmd);
|
||||
@ -324,6 +331,8 @@ private:
|
||||
static void SetupTransitionToTransferSrc(VKRImage &img, VkImageAspectFlags aspect, VulkanBarrier *recordBarrier);
|
||||
static void SetupTransitionToTransferDst(VKRImage &img, VkImageAspectFlags aspect, VulkanBarrier *recordBarrier);
|
||||
|
||||
static void SelfDependencyBarrier(VKRImage &img, VkImageAspectFlags aspect, VulkanBarrier *recordBarrier);
|
||||
|
||||
VulkanContext *vulkan_;
|
||||
|
||||
VkFramebuffer backbuffer_ = VK_NULL_HANDLE;
|
||||
@ -354,4 +363,20 @@ private:
|
||||
// Stored here to help reuse the allocation.
|
||||
|
||||
VulkanBarrier recordBarrier_;
|
||||
|
||||
// Swap chain management
|
||||
struct SwapchainImageData {
|
||||
VkImage image;
|
||||
VkImageView view;
|
||||
};
|
||||
std::vector<VkFramebuffer> framebuffers_;
|
||||
std::vector<SwapchainImageData> swapchainImages_;
|
||||
uint32_t swapchainImageCount_ = 0;
|
||||
struct DepthBufferInfo {
|
||||
VkFormat format = VK_FORMAT_UNDEFINED;
|
||||
VkImage image = VK_NULL_HANDLE;
|
||||
VmaAllocation alloc = VK_NULL_HANDLE;
|
||||
VkImageView view = VK_NULL_HANDLE;
|
||||
};
|
||||
DepthBufferInfo depth_;
|
||||
};
|
||||
|
@ -223,7 +223,7 @@ void CreateImage(VulkanContext *vulkan, VkCommandBuffer cmd, VKRImage &img, int
|
||||
// Strictly speaking we don't yet need VK_IMAGE_USAGE_SAMPLED_BIT for depth buffers since we do not yet sample depth buffers.
|
||||
ici.usage = VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT;
|
||||
if (color) {
|
||||
ici.usage |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
|
||||
ici.usage |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT;
|
||||
} else {
|
||||
ici.usage |= VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT;
|
||||
}
|
||||
@ -288,53 +288,15 @@ void CreateImage(VulkanContext *vulkan, VkCommandBuffer cmd, VKRImage &img, int
|
||||
}
|
||||
|
||||
VulkanRenderManager::VulkanRenderManager(VulkanContext *vulkan) : vulkan_(vulkan), queueRunner_(vulkan) {
|
||||
VkSemaphoreCreateInfo semaphoreCreateInfo = { VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO };
|
||||
semaphoreCreateInfo.flags = 0;
|
||||
VkResult res = vkCreateSemaphore(vulkan_->GetDevice(), &semaphoreCreateInfo, nullptr, &acquireSemaphore_);
|
||||
_dbg_assert_(res == VK_SUCCESS);
|
||||
res = vkCreateSemaphore(vulkan_->GetDevice(), &semaphoreCreateInfo, nullptr, &renderingCompleteSemaphore_);
|
||||
_dbg_assert_(res == VK_SUCCESS);
|
||||
|
||||
inflightFramesAtStart_ = vulkan_->GetInflightFrames();
|
||||
|
||||
frameDataShared_.Init(vulkan);
|
||||
|
||||
for (int i = 0; i < inflightFramesAtStart_; i++) {
|
||||
VkCommandPoolCreateInfo cmd_pool_info = { VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO };
|
||||
cmd_pool_info.queueFamilyIndex = vulkan_->GetGraphicsQueueFamilyIndex();
|
||||
cmd_pool_info.flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT;
|
||||
VkResult res = vkCreateCommandPool(vulkan_->GetDevice(), &cmd_pool_info, nullptr, &frameData_[i].cmdPoolInit);
|
||||
_dbg_assert_(res == VK_SUCCESS);
|
||||
res = vkCreateCommandPool(vulkan_->GetDevice(), &cmd_pool_info, nullptr, &frameData_[i].cmdPoolMain);
|
||||
_dbg_assert_(res == VK_SUCCESS);
|
||||
|
||||
VkCommandBufferAllocateInfo cmd_alloc = { VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO };
|
||||
cmd_alloc.commandPool = frameData_[i].cmdPoolInit;
|
||||
cmd_alloc.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
|
||||
cmd_alloc.commandBufferCount = 1;
|
||||
|
||||
res = vkAllocateCommandBuffers(vulkan_->GetDevice(), &cmd_alloc, &frameData_[i].initCmd);
|
||||
_dbg_assert_(res == VK_SUCCESS);
|
||||
cmd_alloc.commandPool = frameData_[i].cmdPoolMain;
|
||||
res = vkAllocateCommandBuffers(vulkan_->GetDevice(), &cmd_alloc, &frameData_[i].mainCmd);
|
||||
_dbg_assert_(res == VK_SUCCESS);
|
||||
|
||||
// Creating the frame fence with true so they can be instantly waited on the first frame
|
||||
frameData_[i].fence = vulkan_->CreateFence(true);
|
||||
|
||||
// This fence one is used for synchronizing readbacks. Does not need preinitialization.
|
||||
frameData_[i].readbackFence = vulkan_->CreateFence(false);
|
||||
|
||||
VkQueryPoolCreateInfo query_ci{ VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO };
|
||||
query_ci.queryCount = MAX_TIMESTAMP_QUERIES;
|
||||
query_ci.queryType = VK_QUERY_TYPE_TIMESTAMP;
|
||||
res = vkCreateQueryPool(vulkan_->GetDevice(), &query_ci, nullptr, &frameData_[i].profile.queryPool);
|
||||
frameData_[i].Init(vulkan, i);
|
||||
}
|
||||
|
||||
queueRunner_.CreateDeviceObjects();
|
||||
|
||||
// AMD hack for issue #10097 (older drivers only.)
|
||||
const auto &props = vulkan_->GetPhysicalDeviceProperties().properties;
|
||||
if (props.vendorID == VULKAN_VENDOR_AMD && props.apiVersion < VK_API_VERSION_1_1) {
|
||||
useThread_ = false;
|
||||
}
|
||||
}
|
||||
|
||||
bool VulkanRenderManager::CreateBackbuffers() {
|
||||
@ -342,52 +304,14 @@ bool VulkanRenderManager::CreateBackbuffers() {
|
||||
ERROR_LOG(G3D, "No swapchain - can't create backbuffers");
|
||||
return false;
|
||||
}
|
||||
VkResult res = vkGetSwapchainImagesKHR(vulkan_->GetDevice(), vulkan_->GetSwapchain(), &swapchainImageCount_, nullptr);
|
||||
_dbg_assert_(res == VK_SUCCESS);
|
||||
|
||||
VkImage *swapchainImages = new VkImage[swapchainImageCount_];
|
||||
res = vkGetSwapchainImagesKHR(vulkan_->GetDevice(), vulkan_->GetSwapchain(), &swapchainImageCount_, swapchainImages);
|
||||
if (res != VK_SUCCESS) {
|
||||
ERROR_LOG(G3D, "vkGetSwapchainImagesKHR failed");
|
||||
delete[] swapchainImages;
|
||||
return false;
|
||||
}
|
||||
|
||||
VkCommandBuffer cmdInit = GetInitCmd();
|
||||
|
||||
for (uint32_t i = 0; i < swapchainImageCount_; i++) {
|
||||
SwapchainImageData sc_buffer{};
|
||||
sc_buffer.image = swapchainImages[i];
|
||||
|
||||
VkImageViewCreateInfo color_image_view = { VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO };
|
||||
color_image_view.format = vulkan_->GetSwapchainFormat();
|
||||
color_image_view.components.r = VK_COMPONENT_SWIZZLE_IDENTITY;
|
||||
color_image_view.components.g = VK_COMPONENT_SWIZZLE_IDENTITY;
|
||||
color_image_view.components.b = VK_COMPONENT_SWIZZLE_IDENTITY;
|
||||
color_image_view.components.a = VK_COMPONENT_SWIZZLE_IDENTITY;
|
||||
color_image_view.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
|
||||
color_image_view.subresourceRange.baseMipLevel = 0;
|
||||
color_image_view.subresourceRange.levelCount = 1;
|
||||
color_image_view.subresourceRange.baseArrayLayer = 0;
|
||||
color_image_view.subresourceRange.layerCount = 1;
|
||||
color_image_view.viewType = VK_IMAGE_VIEW_TYPE_2D;
|
||||
color_image_view.flags = 0;
|
||||
color_image_view.image = sc_buffer.image;
|
||||
|
||||
// We leave the images as UNDEFINED, there's no need to pre-transition them as
|
||||
// the backbuffer renderpass starts out with them being auto-transitioned from UNDEFINED anyway.
|
||||
// Also, turns out it's illegal to transition un-acquired images, thanks Hans-Kristian. See #11417.
|
||||
|
||||
res = vkCreateImageView(vulkan_->GetDevice(), &color_image_view, nullptr, &sc_buffer.view);
|
||||
swapchainImages_.push_back(sc_buffer);
|
||||
_dbg_assert_(res == VK_SUCCESS);
|
||||
if (!queueRunner_.CreateSwapchain(cmdInit)) {
|
||||
return false;
|
||||
}
|
||||
delete[] swapchainImages;
|
||||
|
||||
// Must be before InitBackbufferRenderPass.
|
||||
if (InitDepthStencilBuffer(cmdInit)) {
|
||||
InitBackbufferFramebuffers(vulkan_->GetBackbufferWidth(), vulkan_->GetBackbufferHeight());
|
||||
}
|
||||
curWidthRaw_ = -1;
|
||||
curHeightRaw_ = -1;
|
||||
|
||||
@ -404,7 +328,7 @@ bool VulkanRenderManager::CreateBackbuffers() {
|
||||
outOfDateFrames_ = 0;
|
||||
|
||||
// Start the thread.
|
||||
if (useThread_ && HasBackbuffers()) {
|
||||
if (HasBackbuffers()) {
|
||||
run_ = true;
|
||||
// Won't necessarily be 0.
|
||||
threadInitFrame_ = vulkan_->GetCurFrame();
|
||||
@ -417,57 +341,58 @@ bool VulkanRenderManager::CreateBackbuffers() {
|
||||
}
|
||||
|
||||
void VulkanRenderManager::StopThread() {
|
||||
if (useThread_ && run_) {
|
||||
run_ = false;
|
||||
// Stop the thread.
|
||||
for (int i = 0; i < vulkan_->GetInflightFrames(); i++) {
|
||||
auto &frameData = frameData_[i];
|
||||
{
|
||||
std::unique_lock<std::mutex> lock(frameData.push_mutex);
|
||||
frameData.push_condVar.notify_all();
|
||||
}
|
||||
{
|
||||
std::unique_lock<std::mutex> lock(frameData.pull_mutex);
|
||||
frameData.pull_condVar.notify_all();
|
||||
}
|
||||
// Zero the queries so we don't try to pull them later.
|
||||
frameData.profile.timestampDescriptions.clear();
|
||||
}
|
||||
thread_.join();
|
||||
INFO_LOG(G3D, "Vulkan submission thread joined. Frame=%d", vulkan_->GetCurFrame());
|
||||
compileCond_.notify_all();
|
||||
compileThread_.join();
|
||||
INFO_LOG(G3D, "Vulkan compiler thread joined.");
|
||||
|
||||
// Eat whatever has been queued up for this frame if anything.
|
||||
Wipe();
|
||||
|
||||
// Wait for any fences to finish and be resignaled, so we don't have sync issues.
|
||||
// Also clean out any queued data, which might refer to things that might not be valid
|
||||
// when we restart...
|
||||
for (int i = 0; i < vulkan_->GetInflightFrames(); i++) {
|
||||
auto &frameData = frameData_[i];
|
||||
_assert_(!frameData.readyForRun);
|
||||
_assert_(frameData.steps.empty());
|
||||
if (frameData.hasInitCommands) {
|
||||
// Clear 'em out. This can happen on restart sometimes.
|
||||
vkEndCommandBuffer(frameData.initCmd);
|
||||
frameData.hasInitCommands = false;
|
||||
}
|
||||
frameData.readyForRun = false;
|
||||
for (size_t i = 0; i < frameData.steps.size(); i++) {
|
||||
delete frameData.steps[i];
|
||||
}
|
||||
frameData.steps.clear();
|
||||
|
||||
std::unique_lock<std::mutex> lock(frameData.push_mutex);
|
||||
while (!frameData.readyForFence) {
|
||||
VLOG("PUSH: Waiting for frame[%d].readyForFence = 1 (stop)", i);
|
||||
frameData.push_condVar.wait(lock);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if (!run_) {
|
||||
INFO_LOG(G3D, "Vulkan submission thread was already stopped.");
|
||||
return;
|
||||
}
|
||||
|
||||
run_ = false;
|
||||
// Stop the thread.
|
||||
for (int i = 0; i < vulkan_->GetInflightFrames(); i++) {
|
||||
auto &frameData = frameData_[i];
|
||||
{
|
||||
std::unique_lock<std::mutex> lock(frameData.push_mutex);
|
||||
frameData.push_condVar.notify_all();
|
||||
}
|
||||
{
|
||||
std::unique_lock<std::mutex> lock(frameData.pull_mutex);
|
||||
frameData.pull_condVar.notify_all();
|
||||
}
|
||||
// Zero the queries so we don't try to pull them later.
|
||||
frameData.profile.timestampDescriptions.clear();
|
||||
}
|
||||
thread_.join();
|
||||
INFO_LOG(G3D, "Vulkan submission thread joined. Frame=%d", vulkan_->GetCurFrame());
|
||||
compileCond_.notify_all();
|
||||
compileThread_.join();
|
||||
INFO_LOG(G3D, "Vulkan compiler thread joined.");
|
||||
|
||||
// Eat whatever has been queued up for this frame if anything.
|
||||
Wipe();
|
||||
|
||||
// Wait for any fences to finish and be resignaled, so we don't have sync issues.
|
||||
// Also clean out any queued data, which might refer to things that might not be valid
|
||||
// when we restart...
|
||||
for (int i = 0; i < vulkan_->GetInflightFrames(); i++) {
|
||||
auto &frameData = frameData_[i];
|
||||
_assert_(!frameData.readyForRun);
|
||||
_assert_(frameData.steps.empty());
|
||||
if (frameData.hasInitCommands) {
|
||||
// Clear 'em out. This can happen on restart sometimes.
|
||||
vkEndCommandBuffer(frameData.initCmd);
|
||||
frameData.hasInitCommands = false;
|
||||
}
|
||||
frameData.readyForRun = false;
|
||||
for (size_t i = 0; i < frameData.steps.size(); i++) {
|
||||
delete frameData.steps[i];
|
||||
}
|
||||
frameData.steps.clear();
|
||||
|
||||
std::unique_lock<std::mutex> lock(frameData.push_mutex);
|
||||
while (!frameData.readyForFence) {
|
||||
VLOG("PUSH: Waiting for frame[%d].readyForFence = 1 (stop)", i);
|
||||
frameData.push_condVar.wait(lock);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -475,26 +400,7 @@ void VulkanRenderManager::DestroyBackbuffers() {
|
||||
StopThread();
|
||||
vulkan_->WaitUntilQueueIdle();
|
||||
|
||||
for (auto &image : swapchainImages_) {
|
||||
vulkan_->Delete().QueueDeleteImageView(image.view);
|
||||
}
|
||||
swapchainImages_.clear();
|
||||
|
||||
if (depth_.view) {
|
||||
vulkan_->Delete().QueueDeleteImageView(depth_.view);
|
||||
}
|
||||
if (depth_.image) {
|
||||
_dbg_assert_(depth_.alloc);
|
||||
vulkan_->Delete().QueueDeleteImageAllocation(depth_.image, depth_.alloc);
|
||||
}
|
||||
depth_ = {};
|
||||
for (uint32_t i = 0; i < framebuffers_.size(); i++) {
|
||||
_dbg_assert_(framebuffers_[i] != VK_NULL_HANDLE);
|
||||
vulkan_->Delete().QueueDeleteFramebuffer(framebuffers_[i]);
|
||||
}
|
||||
framebuffers_.clear();
|
||||
|
||||
INFO_LOG(G3D, "Backbuffers destroyed");
|
||||
queueRunner_.DestroyBackBuffers();
|
||||
}
|
||||
|
||||
VulkanRenderManager::~VulkanRenderManager() {
|
||||
@ -504,16 +410,9 @@ VulkanRenderManager::~VulkanRenderManager() {
|
||||
|
||||
DrainCompileQueue();
|
||||
VkDevice device = vulkan_->GetDevice();
|
||||
vkDestroySemaphore(device, acquireSemaphore_, nullptr);
|
||||
vkDestroySemaphore(device, renderingCompleteSemaphore_, nullptr);
|
||||
frameDataShared_.Destroy(vulkan_);
|
||||
for (int i = 0; i < inflightFramesAtStart_; i++) {
|
||||
vkFreeCommandBuffers(device, frameData_[i].cmdPoolInit, 1, &frameData_[i].initCmd);
|
||||
vkFreeCommandBuffers(device, frameData_[i].cmdPoolMain, 1, &frameData_[i].mainCmd);
|
||||
vkDestroyCommandPool(device, frameData_[i].cmdPoolInit, nullptr);
|
||||
vkDestroyCommandPool(device, frameData_[i].cmdPoolMain, nullptr);
|
||||
vkDestroyFence(device, frameData_[i].fence, nullptr);
|
||||
vkDestroyFence(device, frameData_[i].readbackFence, nullptr);
|
||||
vkDestroyQueryPool(device, frameData_[i].profile.queryPool, nullptr);
|
||||
frameData_[i].Destroy(vulkan_);
|
||||
}
|
||||
queueRunner_.DestroyDeviceObjects();
|
||||
}
|
||||
@ -534,7 +433,9 @@ void VulkanRenderManager::CompileThreadFunc() {
|
||||
break;
|
||||
}
|
||||
|
||||
INFO_LOG(G3D, "Compilation thread has %d pipelines to create", (int)toCompile.size());
|
||||
if (!toCompile.empty()) {
|
||||
INFO_LOG(G3D, "Compilation thread has %d pipelines to create", (int)toCompile.size());
|
||||
}
|
||||
|
||||
// TODO: Here we can sort the pending pipelines by vertex and fragment shaders,
|
||||
// and split up further.
|
||||
@ -574,6 +475,7 @@ void VulkanRenderManager::ThreadFunc() {
|
||||
threadFrame = 0;
|
||||
}
|
||||
FrameData &frameData = frameData_[threadFrame];
|
||||
|
||||
std::unique_lock<std::mutex> lock(frameData.pull_mutex);
|
||||
while (!frameData.readyForRun && run_) {
|
||||
VLOG("PULL: Waiting for frame[%d].readyForRun", threadFrame);
|
||||
@ -589,8 +491,7 @@ void VulkanRenderManager::ThreadFunc() {
|
||||
// but that created a race condition where frames could end up not finished properly on resize etc.
|
||||
|
||||
// Only increment next time if we're done.
|
||||
nextFrame = frameData.type == VKRRunType::END;
|
||||
_dbg_assert_(frameData.type == VKRRunType::END || frameData.type == VKRRunType::SYNC);
|
||||
nextFrame = frameData.RunType() == VKRRunType::END;
|
||||
}
|
||||
VLOG("PULL: Running frame %d", threadFrame);
|
||||
if (firstFrame) {
|
||||
@ -615,7 +516,7 @@ void VulkanRenderManager::BeginFrame(bool enableProfiling, bool enableLogProfile
|
||||
FrameData &frameData = frameData_[curFrame];
|
||||
|
||||
// Make sure the very last command buffer from the frame before the previous has been fully executed.
|
||||
if (useThread_) {
|
||||
{
|
||||
std::unique_lock<std::mutex> lock(frameData.push_mutex);
|
||||
while (!frameData.readyForFence) {
|
||||
VLOG("PUSH: Waiting for frame[%d].readyForFence = 1", curFrame);
|
||||
@ -633,7 +534,6 @@ void VulkanRenderManager::BeginFrame(bool enableProfiling, bool enableLogProfile
|
||||
|
||||
// Can't set this until after the fence.
|
||||
frameData.profilingEnabled_ = enableProfiling;
|
||||
frameData.readbackFenceUsed = false;
|
||||
|
||||
uint64_t queryResults[MAX_TIMESTAMP_QUERIES];
|
||||
|
||||
@ -698,21 +598,7 @@ void VulkanRenderManager::BeginFrame(bool enableProfiling, bool enableLogProfile
|
||||
|
||||
VkCommandBuffer VulkanRenderManager::GetInitCmd() {
|
||||
int curFrame = vulkan_->GetCurFrame();
|
||||
FrameData &frameData = frameData_[curFrame];
|
||||
if (!frameData.hasInitCommands) {
|
||||
VkCommandBufferBeginInfo begin = {
|
||||
VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
|
||||
nullptr,
|
||||
VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT
|
||||
};
|
||||
vkResetCommandPool(vulkan_->GetDevice(), frameData.cmdPoolInit, 0);
|
||||
VkResult res = vkBeginCommandBuffer(frameData.initCmd, &begin);
|
||||
if (res != VK_SUCCESS) {
|
||||
return VK_NULL_HANDLE;
|
||||
}
|
||||
frameData.hasInitCommands = true;
|
||||
}
|
||||
return frameData_[curFrame].initCmd;
|
||||
return frameData_[curFrame].GetInitCmd(vulkan_);
|
||||
}
|
||||
|
||||
VKRGraphicsPipeline *VulkanRenderManager::CreateGraphicsPipeline(VKRGraphicsPipelineDesc *desc, uint32_t variantBitmask, const char *tag) {
|
||||
@ -771,16 +657,20 @@ void VulkanRenderManager::EndCurRenderStep() {
|
||||
curRenderStep_->render.colorStore, curRenderStep_->render.depthStore, curRenderStep_->render.stencilStore,
|
||||
};
|
||||
RenderPassType rpType = RP_TYPE_COLOR_DEPTH;
|
||||
// Save the accumulated pipeline flags so we can use that to configure the render pass.
|
||||
// We'll often be able to avoid loading/saving the depth/stencil buffer.
|
||||
curRenderStep_->render.pipelineFlags = curPipelineFlags_;
|
||||
if (!curRenderStep_->render.framebuffer) {
|
||||
rpType = RP_TYPE_BACKBUFFER;
|
||||
} else if (curPipelineFlags_ & PipelineFlags::USES_INPUT_ATTACHMENT) {
|
||||
// Not allowed on backbuffers.
|
||||
rpType = RP_TYPE_COLOR_DEPTH_INPUT;
|
||||
}
|
||||
// TODO: Also add render pass types for depth/stencil-less.
|
||||
|
||||
VKRRenderPass *renderPass = queueRunner_.GetRenderPass(key);
|
||||
curRenderStep_->render.renderPassType = rpType;
|
||||
|
||||
// Save the accumulated pipeline flags so we can use that to configure the render pass.
|
||||
// We'll often be able to avoid loading/saving the depth/stencil buffer.
|
||||
compileMutex_.lock();
|
||||
bool needsCompile = false;
|
||||
for (VKRGraphicsPipeline *pipeline : pipelinesToCheck_) {
|
||||
@ -806,7 +696,12 @@ void VulkanRenderManager::EndCurRenderStep() {
|
||||
|
||||
// We no longer have a current render step.
|
||||
curRenderStep_ = nullptr;
|
||||
curPipelineFlags_ = 0;
|
||||
curPipelineFlags_ = (PipelineFlags)0;
|
||||
}
|
||||
|
||||
void VulkanRenderManager::BindCurrentFramebufferAsInputAttachment0(VkImageAspectFlags aspectBits) {
|
||||
_dbg_assert_(curRenderStep_);
|
||||
curRenderStep_->commands.push_back(VkRenderData{ VKRRenderCommand::SELF_DEPENDENCY_BARRIER });
|
||||
}
|
||||
|
||||
void VulkanRenderManager::BindFramebufferAsRenderTarget(VKRFramebuffer *fb, VKRRenderPassLoadAction color, VKRRenderPassLoadAction depth, VKRRenderPassLoadAction stencil, uint32_t clearColor, float clearDepth, uint8_t clearStencil, const char *tag) {
|
||||
@ -1028,98 +923,6 @@ void VulkanRenderManager::CopyImageToMemorySync(VkImage image, int mipLevel, int
|
||||
queueRunner_.CopyReadbackBuffer(w, h, destFormat, destFormat, pixelStride, pixels);
|
||||
}
|
||||
|
||||
bool VulkanRenderManager::InitBackbufferFramebuffers(int width, int height) {
|
||||
VkResult res;
|
||||
// We share the same depth buffer but have multiple color buffers, see the loop below.
|
||||
VkImageView attachments[2] = { VK_NULL_HANDLE, depth_.view };
|
||||
|
||||
VkFramebufferCreateInfo fb_info = { VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO };
|
||||
fb_info.renderPass = queueRunner_.GetCompatibleRenderPass()->Get(vulkan_, RP_TYPE_BACKBUFFER);
|
||||
fb_info.attachmentCount = 2;
|
||||
fb_info.pAttachments = attachments;
|
||||
fb_info.width = width;
|
||||
fb_info.height = height;
|
||||
fb_info.layers = 1;
|
||||
|
||||
framebuffers_.resize(swapchainImageCount_);
|
||||
|
||||
for (uint32_t i = 0; i < swapchainImageCount_; i++) {
|
||||
attachments[0] = swapchainImages_[i].view;
|
||||
res = vkCreateFramebuffer(vulkan_->GetDevice(), &fb_info, nullptr, &framebuffers_[i]);
|
||||
_dbg_assert_(res == VK_SUCCESS);
|
||||
if (res != VK_SUCCESS) {
|
||||
framebuffers_.clear();
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool VulkanRenderManager::InitDepthStencilBuffer(VkCommandBuffer cmd) {
|
||||
const VkFormat depth_format = vulkan_->GetDeviceInfo().preferredDepthStencilFormat;
|
||||
int aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT;
|
||||
VkImageCreateInfo image_info = { VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO };
|
||||
image_info.imageType = VK_IMAGE_TYPE_2D;
|
||||
image_info.format = depth_format;
|
||||
image_info.extent.width = vulkan_->GetBackbufferWidth();
|
||||
image_info.extent.height = vulkan_->GetBackbufferHeight();
|
||||
image_info.extent.depth = 1;
|
||||
image_info.mipLevels = 1;
|
||||
image_info.arrayLayers = 1;
|
||||
image_info.samples = VK_SAMPLE_COUNT_1_BIT;
|
||||
image_info.queueFamilyIndexCount = 0;
|
||||
image_info.pQueueFamilyIndices = nullptr;
|
||||
image_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
|
||||
image_info.usage = VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT;
|
||||
image_info.flags = 0;
|
||||
|
||||
depth_.format = depth_format;
|
||||
|
||||
VmaAllocationCreateInfo allocCreateInfo{};
|
||||
VmaAllocationInfo allocInfo{};
|
||||
|
||||
allocCreateInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY;
|
||||
|
||||
VkResult res = vmaCreateImage(vulkan_->Allocator(), &image_info, &allocCreateInfo, &depth_.image, &depth_.alloc, &allocInfo);
|
||||
_dbg_assert_(res == VK_SUCCESS);
|
||||
if (res != VK_SUCCESS)
|
||||
return false;
|
||||
|
||||
vulkan_->SetDebugName(depth_.image, VK_OBJECT_TYPE_IMAGE, "BackbufferDepth");
|
||||
|
||||
TransitionImageLayout2(cmd, depth_.image, 0, 1,
|
||||
aspectMask,
|
||||
VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
|
||||
VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT,
|
||||
VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT,
|
||||
0, VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT);
|
||||
|
||||
VkImageViewCreateInfo depth_view_info = { VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO };
|
||||
depth_view_info.image = depth_.image;
|
||||
depth_view_info.format = depth_format;
|
||||
depth_view_info.components.r = VK_COMPONENT_SWIZZLE_IDENTITY;
|
||||
depth_view_info.components.g = VK_COMPONENT_SWIZZLE_IDENTITY;
|
||||
depth_view_info.components.b = VK_COMPONENT_SWIZZLE_IDENTITY;
|
||||
depth_view_info.components.a = VK_COMPONENT_SWIZZLE_IDENTITY;
|
||||
depth_view_info.subresourceRange.aspectMask = aspectMask;
|
||||
depth_view_info.subresourceRange.baseMipLevel = 0;
|
||||
depth_view_info.subresourceRange.levelCount = 1;
|
||||
depth_view_info.subresourceRange.baseArrayLayer = 0;
|
||||
depth_view_info.subresourceRange.layerCount = 1;
|
||||
depth_view_info.viewType = VK_IMAGE_VIEW_TYPE_2D;
|
||||
depth_view_info.flags = 0;
|
||||
|
||||
VkDevice device = vulkan_->GetDevice();
|
||||
|
||||
res = vkCreateImageView(device, &depth_view_info, NULL, &depth_.view);
|
||||
_dbg_assert_(res == VK_SUCCESS);
|
||||
if (res != VK_SUCCESS)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static void RemoveDrawCommands(std::vector<VkRenderData> *cmds) {
|
||||
// Here we remove any DRAW type commands when we hit a CLEAR.
|
||||
for (auto &c : *cmds) {
|
||||
@ -1359,6 +1162,9 @@ VkImageView VulkanRenderManager::BindFramebufferAsTexture(VKRFramebuffer *fb, in
|
||||
}
|
||||
}
|
||||
|
||||
// Called on main thread.
|
||||
// Sends the collected commands to the render thread. Submit-latency should be
|
||||
// measured from here, probably.
|
||||
void VulkanRenderManager::Finish() {
|
||||
EndCurRenderStep();
|
||||
|
||||
@ -1371,18 +1177,14 @@ void VulkanRenderManager::Finish() {
|
||||
|
||||
int curFrame = vulkan_->GetCurFrame();
|
||||
FrameData &frameData = frameData_[curFrame];
|
||||
if (!useThread_) {
|
||||
frameData.steps = std::move(steps_);
|
||||
steps_.clear();
|
||||
frameData.type = VKRRunType::END;
|
||||
Run(curFrame);
|
||||
} else {
|
||||
|
||||
{
|
||||
std::unique_lock<std::mutex> lock(frameData.pull_mutex);
|
||||
VLOG("PUSH: Frame[%d].readyForRun = true", curFrame);
|
||||
frameData.steps = std::move(steps_);
|
||||
steps_.clear();
|
||||
frameData.readyForRun = true;
|
||||
frameData.type = VKRRunType::END;
|
||||
frameData.runType_ = VKRRunType::END;
|
||||
frameData.pull_condVar.notify_all();
|
||||
}
|
||||
vulkan_->EndFrame();
|
||||
@ -1397,118 +1199,39 @@ void VulkanRenderManager::Wipe() {
|
||||
steps_.clear();
|
||||
}
|
||||
|
||||
// Called on the render thread.
|
||||
//
|
||||
// Can be called multiple times with no bad side effects. This is so that we can either begin a frame the normal way,
|
||||
// or stop it in the middle for a synchronous readback, then start over again mostly normally but without repeating
|
||||
// the backbuffer image acquisition.
|
||||
void VulkanRenderManager::BeginSubmitFrame(int frame) {
|
||||
FrameData &frameData = frameData_[frame];
|
||||
if (!frameData.hasBegun) {
|
||||
// Get the index of the next available swapchain image, and a semaphore to block command buffer execution on.
|
||||
VkResult res = vkAcquireNextImageKHR(vulkan_->GetDevice(), vulkan_->GetSwapchain(), UINT64_MAX, acquireSemaphore_, (VkFence)VK_NULL_HANDLE, &frameData.curSwapchainImage);
|
||||
if (res == VK_SUBOPTIMAL_KHR) {
|
||||
// Hopefully the resize will happen shortly. Ignore - one frame might look bad or something.
|
||||
WARN_LOG(G3D, "VK_SUBOPTIMAL_KHR returned - ignoring");
|
||||
} else if (res == VK_ERROR_OUT_OF_DATE_KHR) {
|
||||
WARN_LOG(G3D, "VK_ERROR_OUT_OF_DATE_KHR returned - processing the frame, but not presenting");
|
||||
frameData.skipSwap = true;
|
||||
} else {
|
||||
_assert_msg_(res == VK_SUCCESS, "vkAcquireNextImageKHR failed! result=%s", VulkanResultToString(res));
|
||||
}
|
||||
|
||||
// Should only have at most the init command buffer pending here (that one came from the other thread).
|
||||
_dbg_assert_(!frameData.hasPresentCommands);
|
||||
frameData.SubmitPending(vulkan_, FrameSubmitType::Pending, frameDataShared_);
|
||||
|
||||
if (!frameData.hasMainCommands) {
|
||||
// Effectively resets both main and present command buffers, since they both live in this pool.
|
||||
// We always record main commands first, so we don't need to reset the present command buffer separately.
|
||||
vkResetCommandPool(vulkan_->GetDevice(), frameData.cmdPoolMain, 0);
|
||||
|
||||
VkCommandBufferBeginInfo begin{ VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO };
|
||||
begin.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
|
||||
res = vkBeginCommandBuffer(frameData.mainCmd, &begin);
|
||||
|
||||
VkResult res = vkBeginCommandBuffer(frameData.mainCmd, &begin);
|
||||
frameData.hasMainCommands = true;
|
||||
_assert_msg_(res == VK_SUCCESS, "vkBeginCommandBuffer failed! result=%s", VulkanResultToString(res));
|
||||
|
||||
queueRunner_.SetBackbuffer(framebuffers_[frameData.curSwapchainImage], swapchainImages_[frameData.curSwapchainImage].image);
|
||||
|
||||
frameData.hasBegun = true;
|
||||
}
|
||||
}
|
||||
|
||||
void VulkanRenderManager::Submit(int frame, bool triggerFrameFence) {
|
||||
FrameData &frameData = frameData_[frame];
|
||||
if (frameData.hasInitCommands) {
|
||||
if (frameData.profilingEnabled_ && triggerFrameFence) {
|
||||
// Pre-allocated query ID 1.
|
||||
vkCmdWriteTimestamp(frameData.initCmd, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, frameData.profile.queryPool, 1);
|
||||
}
|
||||
VkResult res = vkEndCommandBuffer(frameData.initCmd);
|
||||
_assert_msg_(res == VK_SUCCESS, "vkEndCommandBuffer failed (init)! result=%s", VulkanResultToString(res));
|
||||
}
|
||||
|
||||
VkResult res = vkEndCommandBuffer(frameData.mainCmd);
|
||||
_assert_msg_(res == VK_SUCCESS, "vkEndCommandBuffer failed (main)! result=%s", VulkanResultToString(res));
|
||||
|
||||
VkCommandBuffer cmdBufs[2];
|
||||
int numCmdBufs = 0;
|
||||
if (frameData.hasInitCommands) {
|
||||
cmdBufs[numCmdBufs++] = frameData.initCmd;
|
||||
if (splitSubmit_) {
|
||||
// Send the init commands off separately. Used this once to confirm that the cause of a device loss was in the init cmdbuf.
|
||||
VkSubmitInfo submit_info{ VK_STRUCTURE_TYPE_SUBMIT_INFO };
|
||||
submit_info.commandBufferCount = (uint32_t)numCmdBufs;
|
||||
submit_info.pCommandBuffers = cmdBufs;
|
||||
res = vkQueueSubmit(vulkan_->GetGraphicsQueue(), 1, &submit_info, VK_NULL_HANDLE);
|
||||
if (res == VK_ERROR_DEVICE_LOST) {
|
||||
_assert_msg_(false, "Lost the Vulkan device in split submit! If this happens again, switch Graphics Backend away from Vulkan");
|
||||
} else {
|
||||
_assert_msg_(res == VK_SUCCESS, "vkQueueSubmit failed (init)! result=%s", VulkanResultToString(res));
|
||||
}
|
||||
numCmdBufs = 0;
|
||||
}
|
||||
}
|
||||
cmdBufs[numCmdBufs++] = frameData.mainCmd;
|
||||
|
||||
VkSubmitInfo submit_info{ VK_STRUCTURE_TYPE_SUBMIT_INFO };
|
||||
VkPipelineStageFlags waitStage[1]{ VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT };
|
||||
if (triggerFrameFence && !frameData.skipSwap) {
|
||||
submit_info.waitSemaphoreCount = 1;
|
||||
submit_info.pWaitSemaphores = &acquireSemaphore_;
|
||||
submit_info.pWaitDstStageMask = waitStage;
|
||||
}
|
||||
submit_info.commandBufferCount = (uint32_t)numCmdBufs;
|
||||
submit_info.pCommandBuffers = cmdBufs;
|
||||
if (triggerFrameFence && !frameData.skipSwap) {
|
||||
submit_info.signalSemaphoreCount = 1;
|
||||
submit_info.pSignalSemaphores = &renderingCompleteSemaphore_;
|
||||
}
|
||||
res = vkQueueSubmit(vulkan_->GetGraphicsQueue(), 1, &submit_info, triggerFrameFence ? frameData.fence : frameData.readbackFence);
|
||||
if (res == VK_ERROR_DEVICE_LOST) {
|
||||
_assert_msg_(false, "Lost the Vulkan device in vkQueueSubmit! If this happens again, switch Graphics Backend away from Vulkan");
|
||||
} else {
|
||||
_assert_msg_(res == VK_SUCCESS, "vkQueueSubmit failed (main, split=%d)! result=%s", (int)splitSubmit_, VulkanResultToString(res));
|
||||
}
|
||||
|
||||
// When !triggerFence, we notify after syncing with Vulkan.
|
||||
if (useThread_ && triggerFrameFence) {
|
||||
VLOG("PULL: Frame %d.readyForFence = true", frame);
|
||||
std::unique_lock<std::mutex> lock(frameData.push_mutex);
|
||||
frameData.readyForFence = true;
|
||||
frameData.push_condVar.notify_all();
|
||||
}
|
||||
|
||||
frameData.hasInitCommands = false;
|
||||
}
|
||||
|
||||
// Called on the render thread.
|
||||
void VulkanRenderManager::EndSubmitFrame(int frame) {
|
||||
FrameData &frameData = frameData_[frame];
|
||||
frameData.hasBegun = false;
|
||||
|
||||
Submit(frame, true);
|
||||
frameData.SubmitPending(vulkan_, FrameSubmitType::Present, frameDataShared_);
|
||||
|
||||
if (!frameData.skipSwap) {
|
||||
VkSwapchainKHR swapchain = vulkan_->GetSwapchain();
|
||||
VkPresentInfoKHR present = { VK_STRUCTURE_TYPE_PRESENT_INFO_KHR };
|
||||
present.swapchainCount = 1;
|
||||
present.pSwapchains = &swapchain;
|
||||
present.pImageIndices = &frameData.curSwapchainImage;
|
||||
present.pWaitSemaphores = &renderingCompleteSemaphore_;
|
||||
present.waitSemaphoreCount = 1;
|
||||
|
||||
VkResult res = vkQueuePresentKHR(vulkan_->GetGraphicsQueue(), &present);
|
||||
VkResult res = frameData.QueuePresent(vulkan_, frameDataShared_);
|
||||
if (res == VK_ERROR_OUT_OF_DATE_KHR) {
|
||||
// We clearly didn't get this in vkAcquireNextImageKHR because of the skipSwap check above.
|
||||
// Do the increment.
|
||||
@ -1528,18 +1251,29 @@ void VulkanRenderManager::EndSubmitFrame(int frame) {
|
||||
}
|
||||
}
|
||||
|
||||
void VulkanRenderManager::EndSyncFrame(int frame) {
|
||||
FrameData &frameData = frameData_[frame];
|
||||
|
||||
// The submit will trigger the readbackFence, and also do the wait for it.
|
||||
frameData.SubmitPending(vulkan_, FrameSubmitType::Sync, frameDataShared_);
|
||||
|
||||
// At this point we can resume filling the command buffers for the current frame since
|
||||
// we know the device is idle - and thus all previously enqueued command buffers have been processed.
|
||||
// No need to switch to the next frame number, would just be confusing.
|
||||
std::unique_lock<std::mutex> lock(frameData.push_mutex);
|
||||
frameData.readyForFence = true;
|
||||
frameData.push_condVar.notify_all();
|
||||
}
|
||||
|
||||
void VulkanRenderManager::Run(int frame) {
|
||||
BeginSubmitFrame(frame);
|
||||
|
||||
FrameData &frameData = frameData_[frame];
|
||||
auto &stepsOnThread = frameData_[frame].steps;
|
||||
VkCommandBuffer cmd = frameData.mainCmd;
|
||||
queueRunner_.PreprocessSteps(stepsOnThread);
|
||||
queueRunner_.PreprocessSteps(frameData_[frame].steps);
|
||||
//queueRunner_.LogSteps(stepsOnThread, false);
|
||||
queueRunner_.RunSteps(cmd, stepsOnThread, frameData.profilingEnabled_ ? &frameData.profile : nullptr);
|
||||
stepsOnThread.clear();
|
||||
queueRunner_.RunSteps(frameData, frameDataShared_);
|
||||
|
||||
switch (frameData.type) {
|
||||
switch (frameData.runType_) {
|
||||
case VKRRunType::END:
|
||||
EndSubmitFrame(frame);
|
||||
break;
|
||||
@ -1555,59 +1289,24 @@ void VulkanRenderManager::Run(int frame) {
|
||||
VLOG("PULL: Finished running frame %d", frame);
|
||||
}
|
||||
|
||||
void VulkanRenderManager::EndSyncFrame(int frame) {
|
||||
FrameData &frameData = frameData_[frame];
|
||||
|
||||
frameData.readbackFenceUsed = true;
|
||||
|
||||
// The submit will trigger the readbackFence.
|
||||
Submit(frame, false);
|
||||
|
||||
// Hard stall of the GPU, not ideal, but necessary so the CPU has the contents of the readback.
|
||||
vkWaitForFences(vulkan_->GetDevice(), 1, &frameData.readbackFence, true, UINT64_MAX);
|
||||
vkResetFences(vulkan_->GetDevice(), 1, &frameData.readbackFence);
|
||||
|
||||
// At this point we can resume filling the command buffers for the current frame since
|
||||
// we know the device is idle - and thus all previously enqueued command buffers have been processed.
|
||||
// No need to switch to the next frame number.
|
||||
VkCommandBufferBeginInfo begin{
|
||||
VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
|
||||
nullptr,
|
||||
VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT
|
||||
};
|
||||
vkResetCommandPool(vulkan_->GetDevice(), frameData.cmdPoolMain, 0);
|
||||
VkResult res = vkBeginCommandBuffer(frameData.mainCmd, &begin);
|
||||
_assert_(res == VK_SUCCESS);
|
||||
|
||||
if (useThread_) {
|
||||
std::unique_lock<std::mutex> lock(frameData.push_mutex);
|
||||
frameData.readyForFence = true;
|
||||
frameData.push_condVar.notify_all();
|
||||
}
|
||||
}
|
||||
|
||||
void VulkanRenderManager::FlushSync() {
|
||||
renderStepOffset_ += (int)steps_.size();
|
||||
|
||||
int curFrame = vulkan_->GetCurFrame();
|
||||
FrameData &frameData = frameData_[curFrame];
|
||||
if (!useThread_) {
|
||||
frameData.steps = std::move(steps_);
|
||||
steps_.clear();
|
||||
frameData.type = VKRRunType::SYNC;
|
||||
Run(curFrame);
|
||||
} else {
|
||||
|
||||
{
|
||||
std::unique_lock<std::mutex> lock(frameData.pull_mutex);
|
||||
VLOG("PUSH: Frame[%d].readyForRun = true (sync)", curFrame);
|
||||
frameData.steps = std::move(steps_);
|
||||
steps_.clear();
|
||||
frameData.readyForRun = true;
|
||||
_dbg_assert_(!frameData.readyForFence);
|
||||
frameData.type = VKRRunType::SYNC;
|
||||
frameData.runType_ = VKRRunType::SYNC;
|
||||
frameData.pull_condVar.notify_all();
|
||||
}
|
||||
|
||||
if (useThread_) {
|
||||
{
|
||||
std::unique_lock<std::mutex> lock(frameData.push_mutex);
|
||||
// Wait for the flush to be hit, since we're syncing.
|
||||
while (!frameData.readyForFence) {
|
||||
|
@ -65,15 +65,6 @@ private:
|
||||
std::string tag_;
|
||||
};
|
||||
|
||||
enum class VKRRunType {
|
||||
END,
|
||||
SYNC,
|
||||
};
|
||||
|
||||
enum {
|
||||
MAX_TIMESTAMP_QUERIES = 128,
|
||||
};
|
||||
|
||||
struct BoundingRect {
|
||||
int x1;
|
||||
int y1;
|
||||
@ -236,6 +227,8 @@ public:
|
||||
// as the other backends, even though there's no actual binding happening here.
|
||||
VkImageView BindFramebufferAsTexture(VKRFramebuffer *fb, int binding, VkImageAspectFlags aspectBits, int attachment);
|
||||
|
||||
void BindCurrentFramebufferAsInputAttachment0(VkImageAspectFlags aspectBits);
|
||||
|
||||
bool CopyFramebufferToMemorySync(VKRFramebuffer *src, VkImageAspectFlags aspectBits, int x, int y, int w, int h, Draw::DataFormat destFormat, uint8_t *pixels, int pixelStride, const char *tag);
|
||||
void CopyImageToMemorySync(VkImage image, int mipLevel, int x, int y, int w, int h, Draw::DataFormat destFormat, uint8_t *pixels, int pixelStride, const char *tag);
|
||||
|
||||
@ -440,11 +433,7 @@ public:
|
||||
void DestroyBackbuffers();
|
||||
|
||||
bool HasBackbuffers() {
|
||||
return !framebuffers_.empty();
|
||||
}
|
||||
|
||||
void SetSplitSubmit(bool split) {
|
||||
splitSubmit_ = split;
|
||||
return queueRunner_.HasBackbuffers();
|
||||
}
|
||||
|
||||
void SetInflightFrames(int f) {
|
||||
@ -470,13 +459,10 @@ public:
|
||||
}
|
||||
|
||||
private:
|
||||
bool InitBackbufferFramebuffers(int width, int height);
|
||||
bool InitDepthStencilBuffer(VkCommandBuffer cmd); // Used for non-buffered rendering.
|
||||
void EndCurRenderStep();
|
||||
|
||||
void BeginSubmitFrame(int frame);
|
||||
void EndSubmitFrame(int frame);
|
||||
void Submit(int frame, bool triggerFence);
|
||||
|
||||
// Bad for performance but sometimes necessary for synchronous CPU readbacks (screenshots and whatnot).
|
||||
void FlushSync();
|
||||
@ -484,43 +470,7 @@ private:
|
||||
|
||||
void StopThread();
|
||||
|
||||
// Permanent objects
|
||||
VkSemaphore acquireSemaphore_;
|
||||
VkSemaphore renderingCompleteSemaphore_;
|
||||
|
||||
// Per-frame data, round-robin so we can overlap submission with execution of the previous frame.
|
||||
struct FrameData {
|
||||
std::mutex push_mutex;
|
||||
std::condition_variable push_condVar;
|
||||
|
||||
std::mutex pull_mutex;
|
||||
std::condition_variable pull_condVar;
|
||||
|
||||
bool readyForFence = true;
|
||||
bool readyForRun = false;
|
||||
bool skipSwap = false;
|
||||
VKRRunType type = VKRRunType::END;
|
||||
|
||||
VkFence fence;
|
||||
VkFence readbackFence; // Strictly speaking we might only need one of these.
|
||||
bool readbackFenceUsed = false;
|
||||
|
||||
// These are on different threads so need separate pools.
|
||||
VkCommandPool cmdPoolInit;
|
||||
VkCommandPool cmdPoolMain;
|
||||
VkCommandBuffer initCmd;
|
||||
VkCommandBuffer mainCmd;
|
||||
bool hasInitCommands = false;
|
||||
std::vector<VKRStep *> steps;
|
||||
|
||||
// Swapchain.
|
||||
bool hasBegun = false;
|
||||
uint32_t curSwapchainImage = -1;
|
||||
|
||||
// Profiling.
|
||||
QueueProfileContext profile;
|
||||
bool profilingEnabled_;
|
||||
};
|
||||
FrameDataShared frameDataShared_;
|
||||
|
||||
FrameData frameData_[VulkanContext::MAX_INFLIGHT_FRAMES];
|
||||
int newInflightFrames_ = -1;
|
||||
@ -544,11 +494,10 @@ private:
|
||||
VKRStep *curRenderStep_ = nullptr;
|
||||
bool curStepHasViewport_ = false;
|
||||
bool curStepHasScissor_ = false;
|
||||
u32 curPipelineFlags_ = 0;
|
||||
PipelineFlags curPipelineFlags_{};
|
||||
BoundingRect curRenderArea_;
|
||||
|
||||
std::vector<VKRStep *> steps_;
|
||||
bool splitSubmit_ = false;
|
||||
|
||||
// Execution time state
|
||||
bool run_ = true;
|
||||
@ -568,23 +517,4 @@ private:
|
||||
|
||||
// pipelines to check and possibly create at the end of the current render pass.
|
||||
std::vector<VKRGraphicsPipeline *> pipelinesToCheck_;
|
||||
|
||||
// Swap chain management
|
||||
struct SwapchainImageData {
|
||||
VkImage image;
|
||||
VkImageView view;
|
||||
};
|
||||
std::vector<VkFramebuffer> framebuffers_;
|
||||
std::vector<SwapchainImageData> swapchainImages_;
|
||||
uint32_t swapchainImageCount_ = 0;
|
||||
struct DepthBufferInfo {
|
||||
VkFormat format = VK_FORMAT_UNDEFINED;
|
||||
VkImage image = VK_NULL_HANDLE;
|
||||
VmaAllocation alloc = VK_NULL_HANDLE;
|
||||
VkImageView view = VK_NULL_HANDLE;
|
||||
};
|
||||
DepthBufferInfo depth_;
|
||||
|
||||
// This works great - except see issue #10097. WTF?
|
||||
bool useThread_ = true;
|
||||
};
|
||||
|
@ -361,7 +361,7 @@ class VKFramebuffer;
|
||||
|
||||
class VKContext : public DrawContext {
|
||||
public:
|
||||
VKContext(VulkanContext *vulkan, bool splitSubmit);
|
||||
VKContext(VulkanContext *vulkan);
|
||||
virtual ~VKContext();
|
||||
|
||||
const DeviceCaps &GetDeviceCaps() const override {
|
||||
@ -401,9 +401,10 @@ public:
|
||||
// These functions should be self explanatory.
|
||||
void BindFramebufferAsRenderTarget(Framebuffer *fbo, const RenderPassInfo &rp, const char *tag) override;
|
||||
Framebuffer *GetCurrentRenderTarget() override {
|
||||
return curFramebuffer_;
|
||||
return (Framebuffer *)curFramebuffer_.ptr;
|
||||
}
|
||||
void BindFramebufferAsTexture(Framebuffer *fbo, int binding, FBChannel channelBit, int attachment) override;
|
||||
void BindCurrentFramebufferForColorInput() override;
|
||||
|
||||
void GetFramebufferDimensions(Framebuffer *fbo, int *w, int *h) override;
|
||||
|
||||
@ -473,27 +474,7 @@ public:
|
||||
std::vector<std::string> GetFeatureList() const override;
|
||||
std::vector<std::string> GetExtensionList() const override;
|
||||
|
||||
uint64_t GetNativeObject(NativeObject obj, void *srcObject) override {
|
||||
switch (obj) {
|
||||
case NativeObject::CONTEXT:
|
||||
return (uint64_t)vulkan_;
|
||||
case NativeObject::INIT_COMMANDBUFFER:
|
||||
return (uint64_t)renderManager_.GetInitCmd();
|
||||
case NativeObject::BOUND_TEXTURE0_IMAGEVIEW:
|
||||
return (uint64_t)boundImageView_[0];
|
||||
case NativeObject::BOUND_TEXTURE1_IMAGEVIEW:
|
||||
return (uint64_t)boundImageView_[1];
|
||||
case NativeObject::RENDER_MANAGER:
|
||||
return (uint64_t)(uintptr_t)&renderManager_;
|
||||
case NativeObject::NULL_IMAGEVIEW:
|
||||
return (uint64_t)GetNullTexture()->GetImageView();
|
||||
case NativeObject::TEXTURE_VIEW:
|
||||
return (uint64_t)(((VKTexture *)srcObject)->GetImageView());
|
||||
default:
|
||||
Crash();
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
uint64_t GetNativeObject(NativeObject obj, void *srcObject) override;
|
||||
|
||||
void HandleEvent(Event ev, int width, int height, void *param1, void *param2) override;
|
||||
|
||||
@ -522,7 +503,7 @@ private:
|
||||
VkDescriptorSetLayout descriptorSetLayout_ = VK_NULL_HANDLE;
|
||||
VkPipelineLayout pipelineLayout_ = VK_NULL_HANDLE;
|
||||
VkPipelineCache pipelineCache_ = VK_NULL_HANDLE;
|
||||
AutoRef<Framebuffer> curFramebuffer_;
|
||||
AutoRef<VKFramebuffer> curFramebuffer_;
|
||||
|
||||
VkDevice device_;
|
||||
VkQueue queue_;
|
||||
@ -781,7 +762,7 @@ bool VKTexture::Create(VkCommandBuffer cmd, VulkanPushBuffer *push, const Textur
|
||||
return true;
|
||||
}
|
||||
|
||||
VKContext::VKContext(VulkanContext *vulkan, bool splitSubmit)
|
||||
VKContext::VKContext(VulkanContext *vulkan)
|
||||
: vulkan_(vulkan), renderManager_(vulkan) {
|
||||
shaderLanguageDesc_.Init(GLSL_VULKAN);
|
||||
|
||||
@ -807,9 +788,11 @@ VKContext::VKContext(VulkanContext *vulkan, bool splitSubmit)
|
||||
caps_.fragmentShaderInt32Supported = true;
|
||||
caps_.textureNPOTFullySupported = true;
|
||||
caps_.fragmentShaderDepthWriteSupported = true;
|
||||
caps_.blendMinMaxSupported = true;
|
||||
caps_.logicOpSupported = vulkan->GetDeviceFeatures().enabled.logicOp != 0;
|
||||
|
||||
auto deviceProps = vulkan->GetPhysicalDeviceProperties(vulkan_->GetCurrentPhysicalDeviceIndex()).properties;
|
||||
|
||||
switch (deviceProps.vendorID) {
|
||||
case VULKAN_VENDOR_AMD: caps_.vendor = GPUVendor::VENDOR_AMD; break;
|
||||
case VULKAN_VENDOR_ARM: caps_.vendor = GPUVendor::VENDOR_ARM; break;
|
||||
@ -831,6 +814,11 @@ VKContext::VKContext(VulkanContext *vulkan, bool splitSubmit)
|
||||
// Color write mask not masking write in certain scenarios with a depth test, see #10421.
|
||||
// Known still present on driver 0x80180000 and Adreno 5xx (possibly more.)
|
||||
bugs_.Infest(Bugs::COLORWRITEMASK_BROKEN_WITH_DEPTHTEST);
|
||||
|
||||
// Trying to follow all the rules in https://registry.khronos.org/vulkan/specs/1.3/html/vkspec.html#synchronization-pipeline-barriers-subpass-self-dependencies
|
||||
// and https://registry.khronos.org/vulkan/specs/1.3/html/vkspec.html#renderpass-feedbackloop, but still it doesn't
|
||||
// quite work - artifacts on triangle boundaries on Adreno.
|
||||
bugs_.Infest(Bugs::SUBPASS_FEEDBACK_BROKEN);
|
||||
} else if (caps_.vendor == GPUVendor::VENDOR_AMD) {
|
||||
// See issue #10074, and also #10065 (AMD) and #10109 for the choice of the driver version to check for.
|
||||
if (deviceProps.driverVersion < 0x00407000) {
|
||||
@ -840,19 +828,27 @@ VKContext::VKContext(VulkanContext *vulkan, bool splitSubmit)
|
||||
// Workaround for Intel driver bug. TODO: Re-enable after some driver version
|
||||
bugs_.Infest(Bugs::DUAL_SOURCE_BLENDING_BROKEN);
|
||||
} else if (caps_.vendor == GPUVendor::VENDOR_ARM) {
|
||||
int majorVersion = VK_API_VERSION_MAJOR(deviceProps.driverVersion);
|
||||
|
||||
// These GPUs (up to some certain hardware version?) have a bug where draws where gl_Position.w == .z
|
||||
// corrupt the depth buffer. This is easily worked around by simply scaling Z down a tiny bit when this case
|
||||
// is detected. See: https://github.com/hrydgard/ppsspp/issues/11937
|
||||
bugs_.Infest(Bugs::EQUAL_WZ_CORRUPTS_DEPTH);
|
||||
// At least one driver at the upper end of the range is known to be likely to suffer from the bug causing issue #13833 (Midnight Club map broken).
|
||||
bugs_.Infest(Bugs::MALI_STENCIL_DISCARD_BUG);
|
||||
|
||||
// This started in driver 31 or 32.
|
||||
if (VK_API_VERSION_MAJOR(deviceProps.driverVersion) >= 32) {
|
||||
// Nearly identical to the the Adreno bug, see #13833 (Midnight Club map broken) and other issues.
|
||||
// Reported fixed in major version 40 - let's add a check once confirmed.
|
||||
bugs_.Infest(Bugs::NO_DEPTH_CANNOT_DISCARD_STENCIL);
|
||||
|
||||
// This started in driver 31 or 32, fixed in 40 - let's add a check once confirmed.
|
||||
if (majorVersion >= 32) {
|
||||
bugs_.Infest(Bugs::MALI_CONSTANT_LOAD_BUG); // See issue #15661
|
||||
}
|
||||
}
|
||||
|
||||
// Limited, through input attachments and self-dependencies.
|
||||
// We turn it off here already if buggy.
|
||||
caps_.framebufferFetchSupported = !bugs_.Has(Bugs::SUBPASS_FEEDBACK_BROKEN);
|
||||
|
||||
caps_.deviceID = deviceProps.deviceID;
|
||||
device_ = vulkan->GetDevice();
|
||||
|
||||
@ -920,8 +916,6 @@ VKContext::VKContext(VulkanContext *vulkan, bool splitSubmit)
|
||||
VkPipelineCacheCreateInfo pc{ VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO };
|
||||
res = vkCreatePipelineCache(vulkan_->GetDevice(), &pc, nullptr, &pipelineCache_);
|
||||
_assert_(VK_SUCCESS == res);
|
||||
|
||||
renderManager_.SetSplitSubmit(splitSubmit);
|
||||
}
|
||||
|
||||
VKContext::~VKContext() {
|
||||
@ -1058,12 +1052,12 @@ Pipeline *VKContext::CreateGraphicsPipeline(const PipelineDesc &desc, const char
|
||||
VKDepthStencilState *depth = (VKDepthStencilState *)desc.depthStencil;
|
||||
VKRasterState *raster = (VKRasterState *)desc.raster;
|
||||
|
||||
u32 pipelineFlags = 0;
|
||||
PipelineFlags pipelineFlags = (PipelineFlags)0;
|
||||
if (depth->info.depthTestEnable || depth->info.stencilTestEnable) {
|
||||
pipelineFlags |= PIPELINE_FLAG_USES_DEPTH_STENCIL;
|
||||
pipelineFlags |= PipelineFlags::USES_DEPTH_STENCIL;
|
||||
}
|
||||
|
||||
VKPipeline *pipeline = new VKPipeline(vulkan_, desc.uniformDesc ? desc.uniformDesc->uniformBufferSize : 16 * sizeof(float), (PipelineFlags)pipelineFlags, tag);
|
||||
VKPipeline *pipeline = new VKPipeline(vulkan_, desc.uniformDesc ? desc.uniformDesc->uniformBufferSize : 16 * sizeof(float), pipelineFlags, tag);
|
||||
|
||||
VKRGraphicsPipelineDesc &gDesc = pipeline->vkrDesc;
|
||||
|
||||
@ -1401,8 +1395,8 @@ void VKContext::Clear(int clearMask, uint32_t colorval, float depthVal, int sten
|
||||
renderManager_.Clear(colorval, depthVal, stencilVal, mask);
|
||||
}
|
||||
|
||||
DrawContext *T3DCreateVulkanContext(VulkanContext *vulkan, bool split) {
|
||||
return new VKContext(vulkan, split);
|
||||
DrawContext *T3DCreateVulkanContext(VulkanContext *vulkan) {
|
||||
return new VKContext(vulkan);
|
||||
}
|
||||
|
||||
void AddFeature(std::vector<std::string> &features, const char *name, VkBool32 available, VkBool32 enabled) {
|
||||
@ -1584,6 +1578,10 @@ void VKContext::BindFramebufferAsTexture(Framebuffer *fbo, int binding, FBChanne
|
||||
boundImageView_[binding] = renderManager_.BindFramebufferAsTexture(fb->GetFB(), binding, aspect, attachment);
|
||||
}
|
||||
|
||||
void VKContext::BindCurrentFramebufferForColorInput() {
|
||||
renderManager_.BindCurrentFramebufferAsInputAttachment0(VK_IMAGE_ASPECT_COLOR_BIT);
|
||||
}
|
||||
|
||||
void VKContext::GetFramebufferDimensions(Framebuffer *fbo, int *w, int *h) {
|
||||
VKFramebuffer *fb = (VKFramebuffer *)fbo;
|
||||
if (fb) {
|
||||
@ -1624,4 +1622,28 @@ void VKContext::InvalidateFramebuffer(FBInvalidationStage stage, uint32_t channe
|
||||
}
|
||||
}
|
||||
|
||||
uint64_t VKContext::GetNativeObject(NativeObject obj, void *srcObject) {
|
||||
switch (obj) {
|
||||
case NativeObject::CONTEXT:
|
||||
return (uint64_t)vulkan_;
|
||||
case NativeObject::INIT_COMMANDBUFFER:
|
||||
return (uint64_t)renderManager_.GetInitCmd();
|
||||
case NativeObject::BOUND_TEXTURE0_IMAGEVIEW:
|
||||
return (uint64_t)boundImageView_[0];
|
||||
case NativeObject::BOUND_TEXTURE1_IMAGEVIEW:
|
||||
return (uint64_t)boundImageView_[1];
|
||||
case NativeObject::RENDER_MANAGER:
|
||||
return (uint64_t)(uintptr_t)&renderManager_;
|
||||
case NativeObject::NULL_IMAGEVIEW:
|
||||
return (uint64_t)GetNullTexture()->GetImageView();
|
||||
case NativeObject::TEXTURE_VIEW:
|
||||
return (uint64_t)(((VKTexture *)srcObject)->GetImageView());
|
||||
case NativeObject::BOUND_FRAMEBUFFER_COLOR_IMAGEVIEW:
|
||||
return (uint64_t)curFramebuffer_->GetFB()->color.imageView;
|
||||
default:
|
||||
Crash();
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Draw
|
||||
|
@ -678,9 +678,9 @@ const char *Bugs::GetBugName(uint32_t bug) {
|
||||
case COLORWRITEMASK_BROKEN_WITH_DEPTHTEST: return "COLORWRITEMASK_BROKEN_WITH_DEPTHTEST";
|
||||
case BROKEN_FLAT_IN_SHADER: return "BROKEN_FLAT_IN_SHADER";
|
||||
case EQUAL_WZ_CORRUPTS_DEPTH: return "EQUAL_WZ_CORRUPTS_DEPTH";
|
||||
case MALI_STENCIL_DISCARD_BUG: return "MALI_STENCIL_DISCARD_BUG";
|
||||
case RASPBERRY_SHADER_COMP_HANG: return "RASPBERRY_SHADER_COMP_HANG";
|
||||
case MALI_CONSTANT_LOAD_BUG: return "MALI_CONSTANT_LOAD_BUG";
|
||||
case SUBPASS_FEEDBACK_BROKEN: return "SUBPASS_FEEDBACK_BROKEN";
|
||||
default: return "(N/A)";
|
||||
}
|
||||
}
|
||||
|
@ -242,6 +242,7 @@ enum class NativeObject {
|
||||
INIT_COMMANDBUFFER,
|
||||
BOUND_TEXTURE0_IMAGEVIEW,
|
||||
BOUND_TEXTURE1_IMAGEVIEW,
|
||||
BOUND_FRAMEBUFFER_COLOR_IMAGEVIEW,
|
||||
RENDER_MANAGER,
|
||||
TEXTURE_VIEW,
|
||||
NULL_IMAGEVIEW,
|
||||
@ -328,9 +329,9 @@ public:
|
||||
COLORWRITEMASK_BROKEN_WITH_DEPTHTEST = 5,
|
||||
BROKEN_FLAT_IN_SHADER = 6,
|
||||
EQUAL_WZ_CORRUPTS_DEPTH = 7,
|
||||
MALI_STENCIL_DISCARD_BUG = 8,
|
||||
RASPBERRY_SHADER_COMP_HANG = 9,
|
||||
MALI_CONSTANT_LOAD_BUG = 10,
|
||||
RASPBERRY_SHADER_COMP_HANG = 8,
|
||||
MALI_CONSTANT_LOAD_BUG = 9,
|
||||
SUBPASS_FEEDBACK_BROKEN = 10,
|
||||
MAX_BUG,
|
||||
};
|
||||
|
||||
@ -546,6 +547,7 @@ struct DeviceCaps {
|
||||
bool textureNPOTFullySupported;
|
||||
bool fragmentShaderDepthWriteSupported;
|
||||
bool textureDepthSupported;
|
||||
bool blendMinMaxSupported;
|
||||
|
||||
std::string deviceName; // The device name to use when creating the thin3d context, to get the same one.
|
||||
};
|
||||
@ -651,6 +653,9 @@ public:
|
||||
// binding must be < MAX_TEXTURE_SLOTS (0, 1 are okay if it's 2).
|
||||
virtual void BindFramebufferAsTexture(Framebuffer *fbo, int binding, FBChannel channelBit, int attachment) = 0;
|
||||
|
||||
// Framebuffer fetch / input attachment support, needs to be explicit in Vulkan.
|
||||
virtual void BindCurrentFramebufferForColorInput() {}
|
||||
|
||||
// deprecated, only used by D3D9
|
||||
virtual uintptr_t GetFramebufferAPITexture(Framebuffer *fbo, int channelBits, int attachment) {
|
||||
return 0;
|
||||
|
@ -31,6 +31,6 @@ DrawContext *T3DCreateDX9Context(IDirect3D9 *d3d, IDirect3D9Ex *d3dEx, int adapt
|
||||
DrawContext *T3DCreateD3D11Context(ID3D11Device *device, ID3D11DeviceContext *context, ID3D11Device1 *device1, ID3D11DeviceContext1 *context1, D3D_FEATURE_LEVEL featureLevel, HWND hWnd, std::vector<std::string> adapterNames);
|
||||
#endif
|
||||
|
||||
DrawContext *T3DCreateVulkanContext(VulkanContext *context, bool splitSubmit);
|
||||
DrawContext *T3DCreateVulkanContext(VulkanContext *context);
|
||||
|
||||
} // namespace Draw
|
||||
|
@ -170,7 +170,7 @@ void UIContext::ActivateTopScissor() {
|
||||
int h = std::max(0.0f, ceilf(scale_y * bounds.h));
|
||||
if (x < 0 || y < 0 || x + w > pixel_xres || y + h > pixel_yres) {
|
||||
// This won't actually report outside a game, but we can try.
|
||||
ERROR_LOG_REPORT(G3D, "UI scissor out of bounds: %d,%d-%d,%d / %d,%d", x, y, w, h, pixel_xres, pixel_yres);
|
||||
ERROR_LOG_REPORT(G3D, "UI scissor out of bounds in %sScreen: %d,%d-%d,%d / %d,%d", screenTag_ ? screenTag_ : "N/A", x, y, w, h, pixel_xres, pixel_yres);
|
||||
x = std::max(0, x);
|
||||
y = std::max(0, y);
|
||||
w = std::min(w, pixel_xres - x);
|
||||
|
@ -74,7 +74,6 @@ public:
|
||||
const UI::Theme *theme;
|
||||
|
||||
// Utility methods
|
||||
|
||||
TextDrawer *Text() const { return textDrawer_; }
|
||||
|
||||
void SetFontStyle(const UI::FontStyle &style);
|
||||
@ -103,6 +102,10 @@ public:
|
||||
|
||||
void setUIAtlas(const std::string &name);
|
||||
|
||||
void SetScreenTag(const char *tag) {
|
||||
screenTag_ = tag;
|
||||
}
|
||||
|
||||
private:
|
||||
Draw::DrawContext *draw_ = nullptr;
|
||||
Bounds bounds_;
|
||||
@ -126,4 +129,6 @@ private:
|
||||
|
||||
std::string lastUIAtlas_;
|
||||
std::string UIAtlas_ = "ui_atlas.zim";
|
||||
|
||||
const char *screenTag_ = nullptr;
|
||||
};
|
||||
|
@ -71,7 +71,7 @@ public:
|
||||
// what screen it is.
|
||||
virtual void *dialogData() { return 0; }
|
||||
|
||||
virtual std::string tag() const { return std::string(""); }
|
||||
virtual const char *tag() const = 0;
|
||||
|
||||
virtual bool isTransparent() const { return false; }
|
||||
virtual bool isTopLevel() const { return false; }
|
||||
|
@ -117,6 +117,9 @@ void UIScreen::render() {
|
||||
|
||||
if (root_) {
|
||||
UIContext *uiContext = screenManager()->getUIContext();
|
||||
|
||||
uiContext->SetScreenTag(tag());
|
||||
|
||||
UI::LayoutViewHierarchy(*uiContext, root_, ignoreInsets_);
|
||||
|
||||
uiContext->PushTransform({translation_, scale_, alpha_});
|
||||
|
@ -136,7 +136,7 @@ public:
|
||||
void SetHiddenChoices(std::set<int> hidden) {
|
||||
hidden_ = hidden;
|
||||
}
|
||||
virtual std::string tag() const override { return std::string("listpopup"); }
|
||||
const char *tag() const override { return "listpopup"; }
|
||||
|
||||
UI::Event OnChoice;
|
||||
|
||||
@ -187,6 +187,8 @@ public:
|
||||
disabled_ = *value_ < 0;
|
||||
}
|
||||
|
||||
const char *tag() const override { return "SliderPopup"; }
|
||||
|
||||
Event OnChange;
|
||||
|
||||
private:
|
||||
@ -214,6 +216,8 @@ public:
|
||||
: PopupScreen(title, "OK", "Cancel"), units_(units), value_(value), originalValue_(*value), minValue_(minValue), maxValue_(maxValue), step_(step), changing_(false), liveUpdate_(liveUpdate) {}
|
||||
void CreatePopupContents(UI::ViewGroup *parent) override;
|
||||
|
||||
const char *tag() const override { return "SliderFloatPopup"; }
|
||||
|
||||
Event OnChange;
|
||||
|
||||
private:
|
||||
@ -241,6 +245,8 @@ public:
|
||||
: PopupScreen(title, "OK", "Cancel"), value_(value), placeholder_(placeholder), maxLen_(maxLen) {}
|
||||
virtual void CreatePopupContents(ViewGroup *parent) override;
|
||||
|
||||
const char *tag() const override { return "TextEditPopup"; }
|
||||
|
||||
Event OnChange;
|
||||
|
||||
private:
|
||||
|
@ -9,6 +9,7 @@
|
||||
#include <GLES3/gl3.h>
|
||||
#include <GLES3/gl3ext.h>
|
||||
|
||||
XrFovf fov;
|
||||
XrView* projections;
|
||||
XrPosef invViewTransform[2];
|
||||
XrFrameState frameState = {};
|
||||
@ -293,7 +294,12 @@ bool VR_InitFrame( engine_t* engine ) {
|
||||
projections));
|
||||
//
|
||||
|
||||
fov = {};
|
||||
for (int eye = 0; eye < ovrMaxNumEyes; eye++) {
|
||||
fov.angleLeft += projections[eye].fov.angleLeft / 2.0f;
|
||||
fov.angleRight += projections[eye].fov.angleRight / 2.0f;
|
||||
fov.angleUp += projections[eye].fov.angleUp / 2.0f;
|
||||
fov.angleDown += projections[eye].fov.angleDown / 2.0f;
|
||||
invViewTransform[eye] = projections[eye].pose;
|
||||
}
|
||||
|
||||
@ -353,10 +359,7 @@ void VR_FinishFrame( engine_t* engine ) {
|
||||
for (int eye = 0; eye < ovrMaxNumEyes; eye++) {
|
||||
int imageLayer = engine->appState.Renderer.Multiview ? eye : 0;
|
||||
ovrFramebuffer* frameBuffer = &engine->appState.Renderer.FrameBuffer[0];
|
||||
XrFovf fov = projections[eye].fov;
|
||||
if (vrMode == VR_MODE_MONO_6DOF) {
|
||||
fov = projections[0].fov;
|
||||
} else if (!engine->appState.Renderer.Multiview) {
|
||||
if ((vrMode != VR_MODE_MONO_6DOF) && !engine->appState.Renderer.Multiview) {
|
||||
frameBuffer = &engine->appState.Renderer.FrameBuffer[eye];
|
||||
}
|
||||
|
||||
@ -463,7 +466,6 @@ void VR_BindFramebuffer(engine_t *engine) {
|
||||
ovrMatrix4f VR_GetMatrix( VRMatrix matrix ) {
|
||||
ovrMatrix4f output;
|
||||
if ((matrix == VR_PROJECTION_MATRIX_LEFT_EYE) || (matrix == VR_PROJECTION_MATRIX_RIGHT_EYE)) {
|
||||
XrFovf fov = matrix == VR_PROJECTION_MATRIX_LEFT_EYE ? projections[0].fov : projections[1].fov;
|
||||
float near = (float)vrConfig[VR_CONFIG_FOV_SCALE] / 200.0f;
|
||||
output = ovrMatrix4f_CreateProjectionFov(fov.angleLeft, fov.angleRight, fov.angleUp, fov.angleDown, near, 0.0f );
|
||||
} else if ((matrix == VR_VIEW_MATRIX_LEFT_EYE) || (matrix == VR_VIEW_MATRIX_RIGHT_EYE)) {
|
||||
|
@ -938,7 +938,6 @@ static ConfigSetting graphicsSettings[] = {
|
||||
ReportedConfigSetting("FragmentTestCache", &g_Config.bFragmentTestCache, true, true, true),
|
||||
|
||||
ConfigSetting("GfxDebugOutput", &g_Config.bGfxDebugOutput, false, false, false),
|
||||
ConfigSetting("GfxDebugSplitSubmit", &g_Config.bGfxDebugSplitSubmit, false, false, false),
|
||||
ConfigSetting("LogFrameDrops", &g_Config.bLogFrameDrops, false, true, false),
|
||||
|
||||
ConfigSetting("InflightFrames", &g_Config.iInflightFrames, 3, true, false),
|
||||
|
@ -176,6 +176,7 @@ public:
|
||||
bool bSustainedPerformanceMode; // Android: Slows clocks down to avoid overheating/speed fluctuations.
|
||||
bool bIgnoreScreenInsets; // Android: Center screen disregarding insets if this is enabled.
|
||||
bool bVSync;
|
||||
|
||||
int iFrameSkip;
|
||||
int iFrameSkipType;
|
||||
int iFastForwardMode; // See FastForwardMode in ConfigValues.h.
|
||||
@ -242,7 +243,6 @@ public:
|
||||
bool bShaderChainRequires60FPS;
|
||||
std::string sTextureShaderName;
|
||||
bool bGfxDebugOutput;
|
||||
bool bGfxDebugSplitSubmit;
|
||||
int iInflightFrames;
|
||||
bool bRenderDuplicateFrames;
|
||||
|
||||
|
@ -36,8 +36,9 @@ const static u32 GAMEDATA_BYTES_PER_READ = 32768;
|
||||
// If this is too high, some games (e.g. Senjou no Valkyria 3) will lag.
|
||||
const static u32 GAMEDATA_READS_PER_UPDATE = 20;
|
||||
|
||||
const u32 ERROR_UTILITY_GAMEDATA_MEMSTRICK_WRITE_PROTECTED = 0x80111903;
|
||||
const u32 ERROR_UTILITY_GAMEDATA_MEMSTRICK_REMOVED = 0x80111901;
|
||||
const u32 ERROR_UTILITY_GAMEDATA_MEMSTRICK_WRITE_PROTECTED = 0x80111903;
|
||||
const u32 ERROR_UTILITY_GAMEDATA_INVALID_MODE = 0x80111908;
|
||||
|
||||
static const std::string SFO_FILENAME = "PARAM.SFO";
|
||||
|
||||
@ -88,9 +89,14 @@ int PSPGamedataInstallDialog::Init(u32 paramAddr) {
|
||||
}
|
||||
|
||||
int size = Memory::Read_U32(paramAddr);
|
||||
if (size != 1424 && size != 1432) {
|
||||
ERROR_LOG_REPORT(SCEUTILITY, "sceGamedataInstallInitStart: invalid param size %d", size);
|
||||
return SCE_ERROR_UTILITY_INVALID_PARAM_SIZE;
|
||||
}
|
||||
|
||||
memset(&request, 0, sizeof(request));
|
||||
// Only copy the right size to support different request format
|
||||
Memory::Memcpy(&request, paramAddr, size);
|
||||
Memory::Memcpy(&request, paramAddr, size, "sceGamedataInstallInitStart");
|
||||
|
||||
ChangeStatusInit(GAMEDATA_INIT_DELAY_US);
|
||||
return 0;
|
||||
@ -100,6 +106,17 @@ int PSPGamedataInstallDialog::Update(int animSpeed) {
|
||||
if (GetStatus() != SCE_UTILITY_STATUS_RUNNING)
|
||||
return SCE_ERROR_UTILITY_INVALID_STATUS;
|
||||
|
||||
if (param->mode >= 2) {
|
||||
param->common.result = ERROR_UTILITY_GAMEDATA_INVALID_MODE;
|
||||
param.NotifyWrite("DialogResult");
|
||||
ChangeStatus(SCE_UTILITY_STATUS_FINISHED, 0);
|
||||
WARN_LOG_REPORT(SCEUTILITY, "sceUtilityGamedataInstallUpdate: invalid mode %d", param->mode);
|
||||
return 0;
|
||||
}
|
||||
|
||||
// TODO: param->mode == 1 should show a prompt to confirm, then a progress bar.
|
||||
// Any other mode (i.e. 0 or negative) should proceed and show no UI.
|
||||
|
||||
// TODO: This should return error codes in some cases, like write failure.
|
||||
// request.common.result must be updated for errors as well.
|
||||
|
||||
@ -222,6 +239,9 @@ void PSPGamedataInstallDialog::WriteSfoFile() {
|
||||
}
|
||||
|
||||
int PSPGamedataInstallDialog::Abort() {
|
||||
param->common.result = 1;
|
||||
param.NotifyWrite("DialogResult");
|
||||
|
||||
// TODO: Delete the files or anything?
|
||||
return PSPDialog::Shutdown();
|
||||
}
|
||||
|
@ -22,7 +22,7 @@
|
||||
|
||||
struct SceUtilityGamedataInstallParam {
|
||||
pspUtilityDialogCommon common;
|
||||
u32_le unknown1;
|
||||
s32_le mode;
|
||||
char gameName[13];
|
||||
char ignore1[3];
|
||||
char dataName[20];
|
||||
|
@ -820,7 +820,7 @@ const HLEFunction ThreadManForUser[] =
|
||||
{0X87D4DD36, &WrapI_IU<sceKernelCancelReceiveMbx>, "sceKernelCancelReceiveMbx", 'i', "ix" },
|
||||
{0XA8E8C846, &WrapI_IU<sceKernelReferMbxStatus>, "sceKernelReferMbxStatus", 'i', "ip" },
|
||||
|
||||
{0X7C0DC2A0, &WrapI_CIUUU<sceKernelCreateMsgPipe>, "sceKernelCreateMsgPipe", 'i', "sixxx" },
|
||||
{0X7C0DC2A0, &WrapI_CIUUU<sceKernelCreateMsgPipe>, "sceKernelCreateMsgPipe", 'i', "sixxp" },
|
||||
{0XF0B7DA1C, &WrapI_I<sceKernelDeleteMsgPipe>, "sceKernelDeleteMsgPipe", 'i', "i" },
|
||||
{0X876DBFAD, &WrapI_IUUUUU<sceKernelSendMsgPipe>, "sceKernelSendMsgPipe", 'i', "ixxxxx" },
|
||||
{0X7C41F2C2, &WrapI_IUUUUU<sceKernelSendMsgPipeCB>, "sceKernelSendMsgPipeCB", 'i', "ixxxxx" },
|
||||
@ -831,7 +831,7 @@ const HLEFunction ThreadManForUser[] =
|
||||
{0X349B864D, &WrapI_IUU<sceKernelCancelMsgPipe>, "sceKernelCancelMsgPipe", 'i', "ixx" },
|
||||
{0X33BE4024, &WrapI_IU<sceKernelReferMsgPipeStatus>, "sceKernelReferMsgPipeStatus", 'i', "ip" },
|
||||
|
||||
{0X56C039B5, &WrapI_CIUUU<sceKernelCreateVpl>, "sceKernelCreateVpl", 'i', "sixxx" },
|
||||
{0X56C039B5, &WrapI_CIUUU<sceKernelCreateVpl>, "sceKernelCreateVpl", 'i', "sixxp" },
|
||||
{0X89B3D48C, &WrapI_I<sceKernelDeleteVpl>, "sceKernelDeleteVpl", 'i', "i" },
|
||||
{0XBED27435, &WrapI_IUUU<sceKernelAllocateVpl>, "sceKernelAllocateVpl", 'i', "ixxx", HLE_NOT_IN_INTERRUPT | HLE_NOT_DISPATCH_SUSPENDED },
|
||||
{0XEC0A693F, &WrapI_IUUU<sceKernelAllocateVplCB>, "sceKernelAllocateVplCB", 'i', "ixxx", HLE_NOT_IN_INTERRUPT | HLE_NOT_DISPATCH_SUSPENDED },
|
||||
@ -840,7 +840,7 @@ const HLEFunction ThreadManForUser[] =
|
||||
{0X1D371B8A, &WrapI_IU<sceKernelCancelVpl>, "sceKernelCancelVpl", 'i', "ix" },
|
||||
{0X39810265, &WrapI_IU<sceKernelReferVplStatus>, "sceKernelReferVplStatus", 'i', "ip" },
|
||||
|
||||
{0XC07BB470, &WrapI_CUUUUU<sceKernelCreateFpl>, "sceKernelCreateFpl", 'i', "sxxxxx" },
|
||||
{0XC07BB470, &WrapI_CUUUUU<sceKernelCreateFpl>, "sceKernelCreateFpl", 'i', "sixxxp" },
|
||||
{0XED1410E0, &WrapI_I<sceKernelDeleteFpl>, "sceKernelDeleteFpl", 'i', "i" },
|
||||
{0XD979E9BF, &WrapI_IUU<sceKernelAllocateFpl>, "sceKernelAllocateFpl", 'i', "ixx", HLE_NOT_IN_INTERRUPT | HLE_NOT_DISPATCH_SUSPENDED },
|
||||
{0XE7282CB6, &WrapI_IUU<sceKernelAllocateFplCB>, "sceKernelAllocateFplCB", 'i', "ixx", HLE_NOT_IN_INTERRUPT | HLE_NOT_DISPATCH_SUSPENDED },
|
||||
@ -864,7 +864,7 @@ const HLEFunction ThreadManForUser[] =
|
||||
{0XD8B299AE, &WrapU_IUUU<sceKernelSetVTimerHandler>, "sceKernelSetVTimerHandler", 'x', "ixxx" },
|
||||
{0X53B00E9A, &WrapU_IU64UU<sceKernelSetVTimerHandlerWide>, "sceKernelSetVTimerHandlerWide", 'x', "iXxx" },
|
||||
|
||||
{0X8DAFF657, &WrapI_CUUUUU<sceKernelCreateTlspl>, "sceKernelCreateTlspl", 'i', "sxxxxx" },
|
||||
{0X8DAFF657, &WrapI_CUUUUU<sceKernelCreateTlspl>, "sceKernelCreateTlspl", 'i', "sixxxp" },
|
||||
{0X32BF938E, &WrapI_I<sceKernelDeleteTlspl>, "sceKernelDeleteTlspl", 'i', "i" },
|
||||
{0X721067F3, &WrapI_IU<sceKernelReferTlsplStatus>, "sceKernelReferTlsplStatus", 'i', "xp" },
|
||||
// Not completely certain about args.
|
||||
@ -908,7 +908,7 @@ const HLEFunction ThreadManForKernel[] =
|
||||
{0x1fb15a32, &WrapU_IU<sceKernelSetEventFlag>, "sceKernelSetEventFlag", 'x', "ix", HLE_KERNEL_SYSCALL },
|
||||
{0x812346e4, &WrapU_IU<sceKernelClearEventFlag>, "sceKernelClearEventFlag", 'x', "ix", HLE_KERNEL_SYSCALL },
|
||||
{0x402fcf22, &WrapI_IUUUU<sceKernelWaitEventFlag>, "sceKernelWaitEventFlag", 'i', "ixxpp", HLE_NOT_IN_INTERRUPT | HLE_KERNEL_SYSCALL},
|
||||
{0xc07bb470, &WrapI_CUUUUU<sceKernelCreateFpl>, "sceKernelCreateFpl", 'i', "sxxxxx" ,HLE_KERNEL_SYSCALL },
|
||||
{0xc07bb470, &WrapI_CUUUUU<sceKernelCreateFpl>, "sceKernelCreateFpl", 'i', "sixxxp" ,HLE_KERNEL_SYSCALL },
|
||||
{0xed1410e0, &WrapI_I<sceKernelDeleteFpl>, "sceKernelDeleteFpl", 'i', "i" ,HLE_KERNEL_SYSCALL },
|
||||
{0x623ae665, &WrapI_IU<sceKernelTryAllocateFpl>, "sceKernelTryAllocateFpl", 'i', "ix" ,HLE_KERNEL_SYSCALL },
|
||||
{0x616403ba, &WrapI_I<sceKernelTerminateThread>, "sceKernelTerminateThread", 'i', "i" ,HLE_KERNEL_SYSCALL },
|
||||
@ -932,7 +932,7 @@ const HLEFunction ThreadManForKernel[] =
|
||||
{0x0D81716A, &WrapI_IU<sceKernelPollMbx>, "sceKernelPollMbx", 'i', "ix", HLE_KERNEL_SYSCALL },
|
||||
{0x87D4DD36, &WrapI_IU<sceKernelCancelReceiveMbx>, "sceKernelCancelReceiveMbx", 'i', "ix", HLE_KERNEL_SYSCALL },
|
||||
{0xA8E8C846, &WrapI_IU<sceKernelReferMbxStatus>, "sceKernelReferMbxStatus", 'i', "ip", HLE_KERNEL_SYSCALL },
|
||||
{0x56C039B5, &WrapI_CIUUU<sceKernelCreateVpl>, "sceKernelCreateVpl", 'i', "sixxx", HLE_KERNEL_SYSCALL },
|
||||
{0x56C039B5, &WrapI_CIUUU<sceKernelCreateVpl>, "sceKernelCreateVpl", 'i', "sixxp", HLE_KERNEL_SYSCALL },
|
||||
{0x89B3D48C, &WrapI_I<sceKernelDeleteVpl>, "sceKernelDeleteVpl", 'i', "i", HLE_KERNEL_SYSCALL },
|
||||
{0xBED27435, &WrapI_IUUU<sceKernelAllocateVpl>, "sceKernelAllocateVpl", 'i', "ixxx", HLE_KERNEL_SYSCALL | HLE_NOT_IN_INTERRUPT | HLE_NOT_DISPATCH_SUSPENDED },
|
||||
{0xEC0A693F, &WrapI_IUUU<sceKernelAllocateVplCB>, "sceKernelAllocateVplCB", 'i', "ixxx", HLE_KERNEL_SYSCALL | HLE_NOT_IN_INTERRUPT | HLE_NOT_DISPATCH_SUSPENDED },
|
||||
|
@ -46,6 +46,7 @@ const int TLSPL_NUM_INDEXES = 16;
|
||||
// STATE BEGIN
|
||||
BlockAllocator userMemory(256);
|
||||
BlockAllocator kernelMemory(256);
|
||||
BlockAllocator volatileMemory(256);
|
||||
|
||||
static int vplWaitTimer = -1;
|
||||
static int fplWaitTimer = -1;
|
||||
@ -432,6 +433,7 @@ void __KernelMemoryInit()
|
||||
MemBlockInfoInit();
|
||||
kernelMemory.Init(PSP_GetKernelMemoryBase(), PSP_GetKernelMemoryEnd() - PSP_GetKernelMemoryBase(), false);
|
||||
userMemory.Init(PSP_GetUserMemoryBase(), PSP_GetUserMemoryEnd() - PSP_GetUserMemoryBase(), false);
|
||||
volatileMemory.Init(PSP_GetVolatileMemoryStart(), PSP_GetVolatileMemoryEnd() - PSP_GetVolatileMemoryStart(), false);
|
||||
ParallelMemset(&g_threadManager, Memory::GetPointerWrite(PSP_GetKernelMemoryBase()), 0, PSP_GetUserMemoryEnd() - PSP_GetKernelMemoryBase());
|
||||
NotifyMemInfo(MemBlockFlags::WRITE, PSP_GetKernelMemoryBase(), PSP_GetUserMemoryEnd() - PSP_GetKernelMemoryBase(), "MemInit");
|
||||
INFO_LOG(SCEKERNEL, "Kernel and user memory pools initialized");
|
||||
@ -457,12 +459,14 @@ void __KernelMemoryInit()
|
||||
|
||||
void __KernelMemoryDoState(PointerWrap &p)
|
||||
{
|
||||
auto s = p.Section("sceKernelMemory", 1, 2);
|
||||
auto s = p.Section("sceKernelMemory", 1, 3);
|
||||
if (!s)
|
||||
return;
|
||||
|
||||
kernelMemory.DoState(p);
|
||||
userMemory.DoState(p);
|
||||
if (s >= 3)
|
||||
volatileMemory.DoState(p);
|
||||
|
||||
Do(p, vplWaitTimer);
|
||||
CoreTiming::RestoreRegisterEvent(vplWaitTimer, "VplTimeout", __KernelVplTimeout);
|
||||
@ -481,6 +485,11 @@ void __KernelMemoryDoState(PointerWrap &p)
|
||||
|
||||
void __KernelMemoryShutdown()
|
||||
{
|
||||
#ifdef _DEBUG
|
||||
INFO_LOG(SCEKERNEL, "Shutting down volatile memory pool: ");
|
||||
volatileMemory.ListBlocks();
|
||||
#endif
|
||||
volatileMemory.Shutdown();
|
||||
#ifdef _DEBUG
|
||||
INFO_LOG(SCEKERNEL,"Shutting down user memory pool: ");
|
||||
userMemory.ListBlocks();
|
||||
@ -495,6 +504,56 @@ void __KernelMemoryShutdown()
|
||||
MemBlockInfoShutdown();
|
||||
}
|
||||
|
||||
BlockAllocator *BlockAllocatorFromID(int id) {
|
||||
switch (id) {
|
||||
case 1:
|
||||
case 3:
|
||||
case 4:
|
||||
if (hleIsKernelMode())
|
||||
return &kernelMemory;
|
||||
return nullptr;
|
||||
|
||||
case 2:
|
||||
case 6:
|
||||
return &userMemory;
|
||||
|
||||
case 8:
|
||||
case 10:
|
||||
if (hleIsKernelMode())
|
||||
return &userMemory;
|
||||
return nullptr;
|
||||
|
||||
case 5:
|
||||
return &volatileMemory;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
int BlockAllocatorToID(const BlockAllocator *alloc) {
|
||||
if (alloc == &kernelMemory)
|
||||
return 1;
|
||||
if (alloc == &userMemory)
|
||||
return 2;
|
||||
if (alloc == &volatileMemory)
|
||||
return 5;
|
||||
return 0;
|
||||
}
|
||||
|
||||
BlockAllocator *BlockAllocatorFromAddr(u32 addr) {
|
||||
addr &= 0x3FFFFFFF;
|
||||
if (Memory::IsKernelAndNotVolatileAddress(addr))
|
||||
return &kernelMemory;
|
||||
if (Memory::IsKernelAddress(addr))
|
||||
return &volatileMemory;
|
||||
if (Memory::IsRAMAddress(addr))
|
||||
return &userMemory;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
enum SceKernelFplAttr
|
||||
{
|
||||
PSP_FPL_ATTR_FIFO = 0x0000,
|
||||
@ -580,29 +639,18 @@ static void __KernelSortFplThreads(FPL *fpl)
|
||||
std::stable_sort(fpl->waitingThreads.begin(), fpl->waitingThreads.end(), __FplThreadSortPriority);
|
||||
}
|
||||
|
||||
int sceKernelCreateFpl(const char *name, u32 mpid, u32 attr, u32 blockSize, u32 numBlocks, u32 optPtr)
|
||||
{
|
||||
int sceKernelCreateFpl(const char *name, u32 mpid, u32 attr, u32 blockSize, u32 numBlocks, u32 optPtr) {
|
||||
if (!name)
|
||||
{
|
||||
WARN_LOG_REPORT(SCEKERNEL, "%08x=sceKernelCreateFpl(): invalid name", SCE_KERNEL_ERROR_NO_MEMORY);
|
||||
return SCE_KERNEL_ERROR_NO_MEMORY;
|
||||
}
|
||||
return hleLogWarning(SCEKERNEL, SCE_KERNEL_ERROR_NO_MEMORY, "invalid name");
|
||||
if (mpid < 1 || mpid > 9 || mpid == 7)
|
||||
{
|
||||
WARN_LOG_REPORT(SCEKERNEL, "%08x=sceKernelCreateFpl(): invalid partition %d", SCE_KERNEL_ERROR_ILLEGAL_ARGUMENT, mpid);
|
||||
return SCE_KERNEL_ERROR_ILLEGAL_ARGUMENT;
|
||||
}
|
||||
// We only support user right now.
|
||||
if (mpid != 2 && mpid != 6)
|
||||
{
|
||||
WARN_LOG_REPORT(SCEKERNEL, "%08x=sceKernelCreateFpl(): invalid partition %d", SCE_KERNEL_ERROR_ILLEGAL_PERM, mpid);
|
||||
return SCE_KERNEL_ERROR_ILLEGAL_PERM;
|
||||
}
|
||||
return hleLogWarning(SCEKERNEL, SCE_KERNEL_ERROR_ILLEGAL_ARGUMENT, "invalid partition %d", mpid);
|
||||
|
||||
BlockAllocator *allocator = BlockAllocatorFromID(mpid);
|
||||
if (allocator == nullptr)
|
||||
return hleLogWarning(SCEKERNEL, SCE_KERNEL_ERROR_ILLEGAL_PERM, "invalid partition %d", mpid);
|
||||
if (((attr & ~PSP_FPL_ATTR_KNOWN) & ~0xFF) != 0)
|
||||
{
|
||||
WARN_LOG_REPORT(SCEKERNEL, "%08x=sceKernelCreateFpl(): invalid attr parameter: %08x", SCE_KERNEL_ERROR_ILLEGAL_ATTR, attr);
|
||||
return SCE_KERNEL_ERROR_ILLEGAL_ATTR;
|
||||
}
|
||||
return hleLogWarning(SCEKERNEL, SCE_KERNEL_ERROR_ILLEGAL_ATTR, "invalid attr parameter: %08x", attr);
|
||||
|
||||
// There's probably a simpler way to get this same basic formula...
|
||||
// This is based on results from a PSP.
|
||||
bool illegalMemSize = blockSize == 0 || numBlocks == 0;
|
||||
@ -611,25 +659,16 @@ int sceKernelCreateFpl(const char *name, u32 mpid, u32 attr, u32 blockSize, u32
|
||||
if (!illegalMemSize && (u64) numBlocks >= 0x100000000ULL / (((u64) blockSize + 3ULL) & ~3ULL))
|
||||
illegalMemSize = true;
|
||||
if (illegalMemSize)
|
||||
{
|
||||
WARN_LOG_REPORT(SCEKERNEL, "%08x=sceKernelCreateFpl(): invalid blockSize/count", SCE_KERNEL_ERROR_ILLEGAL_MEMSIZE);
|
||||
return SCE_KERNEL_ERROR_ILLEGAL_MEMSIZE;
|
||||
}
|
||||
return hleReportWarning(SCEKERNEL, SCE_KERNEL_ERROR_ILLEGAL_MEMSIZE, "invalid blockSize/count");
|
||||
|
||||
int alignment = 4;
|
||||
if (optPtr != 0)
|
||||
{
|
||||
u32 size = Memory::Read_U32(optPtr);
|
||||
if (size > 8)
|
||||
WARN_LOG_REPORT(SCEKERNEL, "sceKernelCreateFpl(): unsupported extra options, size = %d", size);
|
||||
if (Memory::IsValidRange(optPtr, 4)) {
|
||||
u32 size = Memory::ReadUnchecked_U32(optPtr);
|
||||
if (size >= 4)
|
||||
alignment = Memory::Read_U32(optPtr + 4);
|
||||
// Must be a power of 2 to be valid.
|
||||
if ((alignment & (alignment - 1)) != 0)
|
||||
{
|
||||
WARN_LOG_REPORT(SCEKERNEL, "%08x=sceKernelCreateFpl(): invalid alignment %d", SCE_KERNEL_ERROR_ILLEGAL_ARGUMENT, alignment);
|
||||
return SCE_KERNEL_ERROR_ILLEGAL_ARGUMENT;
|
||||
}
|
||||
return hleLogWarning(SCEKERNEL, SCE_KERNEL_ERROR_ILLEGAL_ARGUMENT, "invalid alignment %d", alignment);
|
||||
}
|
||||
|
||||
if (alignment < 4)
|
||||
@ -638,9 +677,8 @@ int sceKernelCreateFpl(const char *name, u32 mpid, u32 attr, u32 blockSize, u32
|
||||
int alignedSize = ((int)blockSize + alignment - 1) & ~(alignment - 1);
|
||||
u32 totalSize = alignedSize * numBlocks;
|
||||
bool atEnd = (attr & PSP_FPL_ATTR_HIGHMEM) != 0;
|
||||
u32 address = userMemory.Alloc(totalSize, atEnd, "FPL");
|
||||
if (address == (u32)-1)
|
||||
{
|
||||
u32 address = allocator->Alloc(totalSize, atEnd, "FPL");
|
||||
if (address == (u32)-1) {
|
||||
DEBUG_LOG(SCEKERNEL, "sceKernelCreateFpl(\"%s\", partition=%i, attr=%08x, bsize=%i, nb=%i) FAILED - out of ram",
|
||||
name, mpid, attr, blockSize, numBlocks);
|
||||
return SCE_KERNEL_ERROR_NO_MEMORY;
|
||||
@ -682,7 +720,10 @@ int sceKernelDeleteFpl(SceUID uid)
|
||||
if (wokeThreads)
|
||||
hleReSchedule("fpl deleted");
|
||||
|
||||
userMemory.Free(fpl->address);
|
||||
BlockAllocator *alloc = BlockAllocatorFromAddr(fpl->address);
|
||||
_assert_msg_(alloc != nullptr, "Should always have a valid allocator/address");
|
||||
if (alloc)
|
||||
alloc->Free(fpl->address);
|
||||
return kernelObjects.Destroy<FPL>(uid);
|
||||
}
|
||||
else
|
||||
@ -955,18 +996,23 @@ public:
|
||||
alloc->Free(address);
|
||||
}
|
||||
bool IsValid() {return address != (u32)-1;}
|
||||
BlockAllocator *alloc;
|
||||
|
||||
void DoState(PointerWrap &p) override
|
||||
{
|
||||
auto s = p.Section("PMB", 1);
|
||||
auto s = p.Section("PMB", 1, 2);
|
||||
if (!s)
|
||||
return;
|
||||
|
||||
Do(p, address);
|
||||
DoArray(p, name, sizeof(name));
|
||||
if (s >= 2) {
|
||||
int allocType = BlockAllocatorToID(alloc);
|
||||
Do(p, allocType);
|
||||
alloc = BlockAllocatorFromID(allocType);
|
||||
}
|
||||
}
|
||||
|
||||
BlockAllocator *alloc;
|
||||
u32 address;
|
||||
char name[32];
|
||||
};
|
||||
@ -986,44 +1032,28 @@ static u32 sceKernelTotalFreeMemSize()
|
||||
return retVal;
|
||||
}
|
||||
|
||||
int sceKernelAllocPartitionMemory(int partition, const char *name, int type, u32 size, u32 addr)
|
||||
{
|
||||
if (name == NULL)
|
||||
{
|
||||
WARN_LOG_REPORT(SCEKERNEL, "%08x=sceKernelAllocPartitionMemory(): invalid name", SCE_KERNEL_ERROR_ERROR);
|
||||
return SCE_KERNEL_ERROR_ERROR;
|
||||
}
|
||||
if (size == 0)
|
||||
{
|
||||
WARN_LOG_REPORT(SCEKERNEL, "%08x=sceKernelAllocPartitionMemory(): invalid size %x", SCE_KERNEL_ERROR_MEMBLOCK_ALLOC_FAILED, size);
|
||||
return SCE_KERNEL_ERROR_MEMBLOCK_ALLOC_FAILED;
|
||||
int sceKernelAllocPartitionMemory(int partition, const char *name, int type, u32 size, u32 addr) {
|
||||
if (type < PSP_SMEM_Low || type > PSP_SMEM_HighAligned)
|
||||
return hleLogWarning(SCEKERNEL, SCE_KERNEL_ERROR_ILLEGAL_MEMBLOCKTYPE, "invalid type %x", type);
|
||||
// Alignment is only allowed for powers of 2.
|
||||
if (type == PSP_SMEM_LowAligned || type == PSP_SMEM_HighAligned) {
|
||||
if ((addr & (addr - 1)) != 0 || addr == 0)
|
||||
return hleLogWarning(SCEKERNEL, SCE_KERNEL_ERROR_ILLEGAL_ALIGNMENT_SIZE, "invalid alignment %x", addr);
|
||||
}
|
||||
if (partition < 1 || partition > 9 || partition == 7)
|
||||
{
|
||||
WARN_LOG_REPORT(SCEKERNEL, "%08x=sceKernelAllocPartitionMemory(): invalid partition %x", SCE_KERNEL_ERROR_ILLEGAL_ARGUMENT, partition);
|
||||
return SCE_KERNEL_ERROR_ILLEGAL_ARGUMENT;
|
||||
}
|
||||
// We only support user right now.
|
||||
if (partition != 2 && partition != 5 && partition != 6)
|
||||
{
|
||||
WARN_LOG_REPORT(SCEKERNEL, "%08x=sceKernelAllocPartitionMemory(): invalid partition %x", SCE_KERNEL_ERROR_ILLEGAL_PARTITION, partition);
|
||||
return SCE_KERNEL_ERROR_ILLEGAL_PARTITION;
|
||||
}
|
||||
if (type < PSP_SMEM_Low || type > PSP_SMEM_HighAligned)
|
||||
{
|
||||
WARN_LOG_REPORT(SCEKERNEL, "%08x=sceKernelAllocPartitionMemory(): invalid type %x", SCE_KERNEL_ERROR_ILLEGAL_MEMBLOCKTYPE, type);
|
||||
return SCE_KERNEL_ERROR_ILLEGAL_MEMBLOCKTYPE;
|
||||
}
|
||||
// Alignment is only allowed for powers of 2.
|
||||
if ((type == PSP_SMEM_LowAligned || type == PSP_SMEM_HighAligned) && ((addr & (addr - 1)) != 0 || addr == 0))
|
||||
{
|
||||
WARN_LOG_REPORT(SCEKERNEL, "%08x=sceKernelAllocPartitionMemory(): invalid alignment %x", SCE_KERNEL_ERROR_ILLEGAL_ALIGNMENT_SIZE, addr);
|
||||
return SCE_KERNEL_ERROR_ILLEGAL_ALIGNMENT_SIZE;
|
||||
}
|
||||
return hleLogWarning(SCEKERNEL, SCE_KERNEL_ERROR_ILLEGAL_ARGUMENT, "invalid partition %x", partition);
|
||||
|
||||
PartitionMemoryBlock *block = new PartitionMemoryBlock(&userMemory, name, size, (MemblockType)type, addr);
|
||||
if (!block->IsValid())
|
||||
{
|
||||
BlockAllocator *allocator = BlockAllocatorFromID(partition);
|
||||
if (allocator == nullptr)
|
||||
return hleLogWarning(SCEKERNEL, SCE_KERNEL_ERROR_ILLEGAL_PARTITION, "invalid partition %x", partition);
|
||||
|
||||
if (name == nullptr)
|
||||
return hleLogWarning(SCEKERNEL, SCE_KERNEL_ERROR_ERROR, "invalid name");
|
||||
if (size == 0)
|
||||
return hleLogWarning(SCEKERNEL, SCE_KERNEL_ERROR_MEMBLOCK_ALLOC_FAILED, "invalid size %x", size);
|
||||
|
||||
PartitionMemoryBlock *block = new PartitionMemoryBlock(allocator, name, size, (MemblockType)type, addr);
|
||||
if (!block->IsValid()) {
|
||||
delete block;
|
||||
ERROR_LOG(SCEKERNEL, "sceKernelAllocPartitionMemory(partition = %i, %s, type= %i, size= %i, addr= %08x): allocation failed", partition, name, type, size, addr);
|
||||
return SCE_KERNEL_ERROR_MEMBLOCK_ALLOC_FAILED;
|
||||
@ -1451,40 +1481,23 @@ static void __KernelSortVplThreads(VPL *vpl)
|
||||
std::stable_sort(vpl->waitingThreads.begin(), vpl->waitingThreads.end(), __VplThreadSortPriority);
|
||||
}
|
||||
|
||||
SceUID sceKernelCreateVpl(const char *name, int partition, u32 attr, u32 vplSize, u32 optPtr)
|
||||
{
|
||||
SceUID sceKernelCreateVpl(const char *name, int partition, u32 attr, u32 vplSize, u32 optPtr) {
|
||||
if (!name)
|
||||
{
|
||||
WARN_LOG_REPORT(SCEKERNEL, "%08x=sceKernelCreateVpl(): invalid name", SCE_KERNEL_ERROR_ERROR);
|
||||
return SCE_KERNEL_ERROR_ERROR;
|
||||
}
|
||||
return hleLogWarning(SCEKERNEL, SCE_KERNEL_ERROR_ERROR, "invalid name");
|
||||
if (partition < 1 || partition > 9 || partition == 7)
|
||||
{
|
||||
WARN_LOG_REPORT(SCEKERNEL, "%08x=sceKernelCreateVpl(): invalid partition %d", SCE_KERNEL_ERROR_ILLEGAL_ARGUMENT, partition);
|
||||
return SCE_KERNEL_ERROR_ILLEGAL_ARGUMENT;
|
||||
}
|
||||
// We only support user right now.
|
||||
if (partition != 2 && partition != 6)
|
||||
{
|
||||
WARN_LOG_REPORT(SCEKERNEL, "%08x=sceKernelCreateVpl(): invalid partition %d", SCE_KERNEL_ERROR_ILLEGAL_PERM, partition);
|
||||
return SCE_KERNEL_ERROR_ILLEGAL_PERM;
|
||||
}
|
||||
return hleLogWarning(SCEKERNEL, SCE_KERNEL_ERROR_ILLEGAL_ARGUMENT, "invalid partition %d", partition);
|
||||
|
||||
BlockAllocator *allocator = BlockAllocatorFromID(partition);
|
||||
if (allocator == nullptr)
|
||||
return hleLogWarning(SCEKERNEL, SCE_KERNEL_ERROR_ILLEGAL_PERM, "invalid partition %d", partition);
|
||||
|
||||
if (((attr & ~PSP_VPL_ATTR_KNOWN) & ~0xFF) != 0)
|
||||
{
|
||||
WARN_LOG_REPORT(SCEKERNEL, "%08x=sceKernelCreateVpl(): invalid attr parameter: %08x", SCE_KERNEL_ERROR_ILLEGAL_ATTR, attr);
|
||||
return SCE_KERNEL_ERROR_ILLEGAL_ATTR;
|
||||
}
|
||||
return hleLogWarning(SCEKERNEL, SCE_KERNEL_ERROR_ILLEGAL_ATTR, "invalid attr parameter: %08x", attr);
|
||||
if (vplSize == 0)
|
||||
{
|
||||
WARN_LOG_REPORT(SCEKERNEL, "%08x=sceKernelCreateVpl(): invalid size", SCE_KERNEL_ERROR_ILLEGAL_MEMSIZE);
|
||||
return SCE_KERNEL_ERROR_ILLEGAL_MEMSIZE;
|
||||
}
|
||||
return hleLogWarning(SCEKERNEL, SCE_KERNEL_ERROR_ILLEGAL_MEMSIZE, "invalid size");
|
||||
// Block Allocator seems to A-OK this, let's stop it here.
|
||||
if (vplSize >= 0x80000000)
|
||||
{
|
||||
WARN_LOG_REPORT(SCEKERNEL, "%08x=sceKernelCreateVpl(): way too big size", SCE_KERNEL_ERROR_NO_MEMORY);
|
||||
return SCE_KERNEL_ERROR_NO_MEMORY;
|
||||
}
|
||||
return hleLogWarning(SCEKERNEL, SCE_KERNEL_ERROR_NO_MEMORY, "way too big size");
|
||||
|
||||
// Can't have that little space in a Vpl, sorry.
|
||||
if (vplSize <= 0x30)
|
||||
@ -1493,12 +1506,9 @@ SceUID sceKernelCreateVpl(const char *name, int partition, u32 attr, u32 vplSize
|
||||
|
||||
// We ignore the upalign to 256 and do it ourselves by 8.
|
||||
u32 allocSize = vplSize;
|
||||
u32 memBlockPtr = userMemory.Alloc(allocSize, (attr & PSP_VPL_ATTR_HIGHMEM) != 0, "VPL");
|
||||
u32 memBlockPtr = allocator->Alloc(allocSize, (attr & PSP_VPL_ATTR_HIGHMEM) != 0, "VPL");
|
||||
if (memBlockPtr == (u32)-1)
|
||||
{
|
||||
ERROR_LOG(SCEKERNEL, "sceKernelCreateVpl(): Failed to allocate %i bytes of pool data", vplSize);
|
||||
return SCE_KERNEL_ERROR_NO_MEMORY;
|
||||
}
|
||||
return hleLogError(SCEKERNEL, SCE_KERNEL_ERROR_NO_MEMORY, "failed to allocate %i bytes of pool data", vplSize);
|
||||
|
||||
VPL *vpl = new VPL;
|
||||
SceUID id = kernelObjects.Create(vpl);
|
||||
@ -1542,7 +1552,10 @@ int sceKernelDeleteVpl(SceUID uid)
|
||||
if (wokeThreads)
|
||||
hleReSchedule("vpl deleted");
|
||||
|
||||
userMemory.Free(vpl->address);
|
||||
BlockAllocator *alloc = BlockAllocatorFromAddr(vpl->address);
|
||||
_assert_msg_(alloc != nullptr, "Should always have a valid allocator/address");
|
||||
if (alloc)
|
||||
alloc->Free(vpl->address);
|
||||
kernelObjects.Destroy<VPL>(uid);
|
||||
return 0;
|
||||
}
|
||||
@ -2044,29 +2057,17 @@ void __KernelTlsplThreadEnd(SceUID threadID)
|
||||
tlsplThreadEndChecks.erase(locked.first, locked.second);
|
||||
}
|
||||
|
||||
SceUID sceKernelCreateTlspl(const char *name, u32 partition, u32 attr, u32 blockSize, u32 count, u32 optionsPtr)
|
||||
{
|
||||
SceUID sceKernelCreateTlspl(const char *name, u32 partition, u32 attr, u32 blockSize, u32 count, u32 optionsPtr) {
|
||||
if (!name)
|
||||
{
|
||||
WARN_LOG_REPORT(SCEKERNEL, "%08x=sceKernelCreateTlspl(): invalid name", SCE_KERNEL_ERROR_NO_MEMORY);
|
||||
return SCE_KERNEL_ERROR_NO_MEMORY;
|
||||
}
|
||||
return hleLogWarning(SCEKERNEL, SCE_KERNEL_ERROR_NO_MEMORY, "invalid name");
|
||||
if ((attr & ~PSP_TLSPL_ATTR_KNOWN) >= 0x100)
|
||||
{
|
||||
WARN_LOG_REPORT(SCEKERNEL, "%08x=sceKernelCreateTlspl(): invalid attr parameter: %08x", SCE_KERNEL_ERROR_ILLEGAL_ATTR, attr);
|
||||
return SCE_KERNEL_ERROR_ILLEGAL_ATTR;
|
||||
}
|
||||
return hleLogWarning(SCEKERNEL, SCE_KERNEL_ERROR_ILLEGAL_ATTR, "invalid attr parameter: %08x", attr);
|
||||
if (partition < 1 || partition > 9 || partition == 7)
|
||||
{
|
||||
WARN_LOG_REPORT(SCEKERNEL, "%08x=sceKernelCreateTlspl(): invalid partition %d", SCE_KERNEL_ERROR_ILLEGAL_ARGUMENT, partition);
|
||||
return SCE_KERNEL_ERROR_ILLEGAL_ARGUMENT;
|
||||
}
|
||||
// We only support user right now.
|
||||
if (partition != 2 && partition != 6)
|
||||
{
|
||||
WARN_LOG_REPORT(SCEKERNEL, "%08x=sceKernelCreateTlspl(): invalid partition %d", SCE_KERNEL_ERROR_ILLEGAL_PERM, partition);
|
||||
return SCE_KERNEL_ERROR_ILLEGAL_PERM;
|
||||
}
|
||||
return hleLogWarning(SCEKERNEL, SCE_KERNEL_ERROR_ILLEGAL_ARGUMENT, "invalid partition %d", partition);
|
||||
|
||||
BlockAllocator *allocator = BlockAllocatorFromID(partition);
|
||||
if (allocator == nullptr)
|
||||
return hleLogWarning(SCEKERNEL, SCE_KERNEL_ERROR_ILLEGAL_PERM, "invalid partition %x", partition);
|
||||
|
||||
// There's probably a simpler way to get this same basic formula...
|
||||
// This is based on results from a PSP.
|
||||
@ -2076,41 +2077,29 @@ SceUID sceKernelCreateTlspl(const char *name, u32 partition, u32 attr, u32 block
|
||||
if (!illegalMemSize && (u64) count >= 0x100000000ULL / (((u64) blockSize + 3ULL) & ~3ULL))
|
||||
illegalMemSize = true;
|
||||
if (illegalMemSize)
|
||||
{
|
||||
WARN_LOG_REPORT(SCEKERNEL, "%08x=sceKernelCreateTlspl(): invalid blockSize/count", SCE_KERNEL_ERROR_ILLEGAL_MEMSIZE);
|
||||
return SCE_KERNEL_ERROR_ILLEGAL_MEMSIZE;
|
||||
}
|
||||
return hleLogWarning(SCEKERNEL, SCE_KERNEL_ERROR_ILLEGAL_MEMSIZE, "invalid blockSize/count");
|
||||
|
||||
int index = -1;
|
||||
for (int i = 0; i < TLSPL_NUM_INDEXES; ++i)
|
||||
if (tlsplUsedIndexes[i] == false)
|
||||
{
|
||||
for (int i = 0; i < TLSPL_NUM_INDEXES; ++i) {
|
||||
if (tlsplUsedIndexes[i] == false) {
|
||||
index = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (index == -1)
|
||||
{
|
||||
WARN_LOG_REPORT(SCEKERNEL, "%08x=sceKernelCreateTlspl(): ran out of indexes for TLS pools", PSP_ERROR_TOO_MANY_TLSPL);
|
||||
return PSP_ERROR_TOO_MANY_TLSPL;
|
||||
}
|
||||
return hleLogWarning(SCEKERNEL, PSP_ERROR_TOO_MANY_TLSPL, "ran out of indexes for TLS pools");
|
||||
|
||||
// Unless otherwise specified, we align to 4 bytes (a mips word.)
|
||||
u32 alignment = 4;
|
||||
if (optionsPtr != 0)
|
||||
{
|
||||
u32 size = Memory::Read_U32(optionsPtr);
|
||||
if (size > 8)
|
||||
WARN_LOG_REPORT(SCEKERNEL, "sceKernelCreateTlspl(%s) unsupported options parameter, size = %d", name, size);
|
||||
if (Memory::IsValidRange(optionsPtr, 4)) {
|
||||
u32 size = Memory::ReadUnchecked_U32(optionsPtr);
|
||||
if (size >= 8)
|
||||
alignment = Memory::Read_U32(optionsPtr + 4);
|
||||
|
||||
// Note that 0 intentionally is allowed.
|
||||
if ((alignment & (alignment - 1)) != 0)
|
||||
{
|
||||
ERROR_LOG_REPORT(SCEKERNEL, "sceKernelCreateTlspl(%s): alignment is not a power of 2: %d", name, alignment);
|
||||
return SCE_KERNEL_ERROR_ILLEGAL_ARGUMENT;
|
||||
}
|
||||
return hleLogError(SCEKERNEL, SCE_KERNEL_ERROR_ILLEGAL_ARGUMENT, "alignment is not a power of 2: %d", alignment);
|
||||
// This goes for 0, 1, and 2. Can't have less than 4 byte alignment.
|
||||
if (alignment < 4)
|
||||
alignment = 4;
|
||||
@ -2120,16 +2109,13 @@ SceUID sceKernelCreateTlspl(const char *name, u32 partition, u32 attr, u32 block
|
||||
u32 alignedSize = (blockSize + alignment - 1) & ~(alignment - 1);
|
||||
|
||||
u32 totalSize = alignedSize * count;
|
||||
u32 blockPtr = userMemory.Alloc(totalSize, (attr & PSP_TLSPL_ATTR_HIGHMEM) != 0, name);
|
||||
u32 blockPtr = allocator->Alloc(totalSize, (attr & PSP_TLSPL_ATTR_HIGHMEM) != 0, name);
|
||||
#ifdef _DEBUG
|
||||
userMemory.ListBlocks();
|
||||
allocator->ListBlocks();
|
||||
#endif
|
||||
|
||||
if (blockPtr == (u32) -1)
|
||||
{
|
||||
ERROR_LOG(SCEKERNEL, "%08x=sceKernelCreateTlspl(%s, %d, %08x, %d, %d, %08x): failed to allocate memory", SCE_KERNEL_ERROR_NO_MEMORY, name, partition, attr, blockSize, count, optionsPtr);
|
||||
return SCE_KERNEL_ERROR_NO_MEMORY;
|
||||
}
|
||||
if (blockPtr == (u32)-1)
|
||||
return hleLogError(SCEKERNEL, SCE_KERNEL_ERROR_NO_MEMORY, "failed to allocate memory");
|
||||
|
||||
TLSPL *tls = new TLSPL();
|
||||
SceUID id = kernelObjects.Create(tls);
|
||||
@ -2148,9 +2134,7 @@ SceUID sceKernelCreateTlspl(const char *name, u32 partition, u32 attr, u32 block
|
||||
tls->alignment = alignment;
|
||||
tls->usage.resize(count, 0);
|
||||
|
||||
WARN_LOG(SCEKERNEL, "%08x=sceKernelCreateTlspl(%s, %d, %08x, %d, %d, %08x)", id, name, partition, attr, blockSize, count, optionsPtr);
|
||||
|
||||
return id;
|
||||
return hleLogSuccessInfoI(SCEKERNEL, id);
|
||||
}
|
||||
|
||||
int sceKernelDeleteTlspl(SceUID uid)
|
||||
@ -2178,7 +2162,10 @@ int sceKernelDeleteTlspl(SceUID uid)
|
||||
HLEKernel::ResumeFromWait(threadID, WAITTYPE_TLSPL, uid, 0);
|
||||
hleReSchedule("deleted tlspl");
|
||||
|
||||
userMemory.Free(tls->address);
|
||||
BlockAllocator *allocator = BlockAllocatorFromAddr(tls->address);
|
||||
_assert_msg_(allocator != nullptr, "Should always have a valid allocator/address");
|
||||
if (allocator)
|
||||
allocator->Free(tls->address);
|
||||
tlsplUsedIndexes[tls->ntls.index] = false;
|
||||
kernelObjects.Destroy<TLSPL>(uid);
|
||||
}
|
||||
|
@ -40,6 +40,10 @@ KernelObject *__KernelMemoryVPLObject();
|
||||
KernelObject *__KernelMemoryPMBObject();
|
||||
KernelObject *__KernelTlsplObject();
|
||||
|
||||
BlockAllocator *BlockAllocatorFromID(int id);
|
||||
int BlockAllocatorToID(const BlockAllocator *alloc);
|
||||
BlockAllocator *BlockAllocatorFromAddr(u32 addr);
|
||||
|
||||
SceUID sceKernelCreateVpl(const char *name, int partition, u32 attr, u32 vplSize, u32 optPtr);
|
||||
int sceKernelDeleteVpl(SceUID uid);
|
||||
int sceKernelAllocateVpl(SceUID uid, u32 size, u32 addrPtr, u32 timeoutPtr);
|
||||
|
@ -140,10 +140,13 @@ struct MsgPipe : public KernelObject
|
||||
int GetIDType() const override { return SCE_KERNEL_TMID_Mpipe; }
|
||||
|
||||
MsgPipe() : buffer(0) {}
|
||||
~MsgPipe()
|
||||
{
|
||||
if (buffer != 0)
|
||||
userMemory.Free(buffer);
|
||||
~MsgPipe() {
|
||||
if (buffer != 0) {
|
||||
BlockAllocator *alloc = BlockAllocatorFromAddr(buffer);
|
||||
_assert_msg_(alloc != nullptr, "Should always have a valid allocator/address");
|
||||
if (alloc)
|
||||
alloc->Free(buffer);
|
||||
}
|
||||
}
|
||||
|
||||
u32 GetUsedSize()
|
||||
@ -667,41 +670,26 @@ void __KernelMsgPipeDoState(PointerWrap &p)
|
||||
CoreTiming::RestoreRegisterEvent(waitTimer, "MsgPipeTimeout", __KernelMsgPipeTimeout);
|
||||
}
|
||||
|
||||
int sceKernelCreateMsgPipe(const char *name, int partition, u32 attr, u32 size, u32 optionsPtr)
|
||||
{
|
||||
int sceKernelCreateMsgPipe(const char *name, int partition, u32 attr, u32 size, u32 optionsPtr) {
|
||||
if (!name)
|
||||
{
|
||||
WARN_LOG_REPORT(SCEKERNEL, "%08x=sceKernelCreateMsgPipe(): invalid name", SCE_KERNEL_ERROR_NO_MEMORY);
|
||||
return SCE_KERNEL_ERROR_NO_MEMORY;
|
||||
}
|
||||
return hleLogWarning(SCEKERNEL, SCE_KERNEL_ERROR_NO_MEMORY, "invalid name");
|
||||
if (partition < 1 || partition > 9 || partition == 7)
|
||||
{
|
||||
WARN_LOG_REPORT(SCEKERNEL, "%08x=sceKernelCreateMsgPipe(): invalid partition %d", SCE_KERNEL_ERROR_ILLEGAL_ARGUMENT, partition);
|
||||
return SCE_KERNEL_ERROR_ILLEGAL_ARGUMENT;
|
||||
}
|
||||
// We only support user right now.
|
||||
if (partition != 2 && partition != 6)
|
||||
{
|
||||
WARN_LOG_REPORT(SCEKERNEL, "%08x=sceKernelCreateMsgPipe(): invalid partition %d", SCE_KERNEL_ERROR_ILLEGAL_PERM, partition);
|
||||
return SCE_KERNEL_ERROR_ILLEGAL_PERM;
|
||||
}
|
||||
return hleLogWarning(SCEKERNEL, SCE_KERNEL_ERROR_ILLEGAL_ARGUMENT, "invalid partition %d", partition);
|
||||
|
||||
BlockAllocator *allocator = BlockAllocatorFromID(partition);
|
||||
if (allocator == nullptr)
|
||||
return hleLogWarning(SCEKERNEL, SCE_KERNEL_ERROR_ILLEGAL_PERM, "invalid partition %d", partition);
|
||||
|
||||
if ((attr & ~SCE_KERNEL_MPA_KNOWN) >= 0x100)
|
||||
{
|
||||
WARN_LOG_REPORT(SCEKERNEL, "%08x=sceKernelCreateEventFlag(%s): invalid attr parameter: %08x", SCE_KERNEL_ERROR_ILLEGAL_ATTR, name, attr);
|
||||
return SCE_KERNEL_ERROR_ILLEGAL_ATTR;
|
||||
}
|
||||
return hleLogWarning(SCEKERNEL, SCE_KERNEL_ERROR_ILLEGAL_ATTR, "invalid attr parameter: %08x", attr);
|
||||
|
||||
u32 memBlockPtr = 0;
|
||||
if (size != 0)
|
||||
{
|
||||
if (size != 0) {
|
||||
// We ignore the upalign to 256.
|
||||
u32 allocSize = size;
|
||||
memBlockPtr = userMemory.Alloc(allocSize, (attr & SCE_KERNEL_MPA_HIGHMEM) != 0, "MsgPipe");
|
||||
memBlockPtr = allocator->Alloc(allocSize, (attr & SCE_KERNEL_MPA_HIGHMEM) != 0, "MsgPipe");
|
||||
if (memBlockPtr == (u32)-1)
|
||||
{
|
||||
ERROR_LOG(SCEKERNEL, "%08x=sceKernelCreateEventFlag(%s): Failed to allocate %i bytes for buffer", SCE_KERNEL_ERROR_NO_MEMORY, name, size);
|
||||
return SCE_KERNEL_ERROR_NO_MEMORY;
|
||||
}
|
||||
return hleLogError(SCEKERNEL, SCE_KERNEL_ERROR_NO_MEMORY, "failed to allocate %i bytes for buffer", size);
|
||||
}
|
||||
|
||||
MsgPipe *m = new MsgPipe();
|
||||
|
@ -744,11 +744,14 @@ static int sceUtilityGamedataInstallInitStart(u32 paramsAddr) {
|
||||
}
|
||||
|
||||
ActivateDialog(UtilityDialogType::GAMEDATAINSTALL);
|
||||
return hleLogSuccessInfoX(SCEUTILITY, gamedataInstallDialog->Init(paramsAddr));
|
||||
int result = gamedataInstallDialog->Init(paramsAddr);
|
||||
if (result < 0)
|
||||
DeactivateDialog();
|
||||
return hleLogSuccessInfoX(SCEUTILITY, result);
|
||||
}
|
||||
|
||||
static int sceUtilityGamedataInstallShutdownStart() {
|
||||
if (currentDialogType != UtilityDialogType::GAMEDATAINSTALL) {
|
||||
if (!currentDialogActive || currentDialogType != UtilityDialogType::GAMEDATAINSTALL) {
|
||||
return hleLogWarning(SCEUTILITY, SCE_ERROR_UTILITY_WRONG_TYPE, "wrong dialog type");
|
||||
}
|
||||
|
||||
@ -757,7 +760,7 @@ static int sceUtilityGamedataInstallShutdownStart() {
|
||||
}
|
||||
|
||||
static int sceUtilityGamedataInstallUpdate(int animSpeed) {
|
||||
if (currentDialogType != UtilityDialogType::GAMEDATAINSTALL) {
|
||||
if (!currentDialogActive || currentDialogType != UtilityDialogType::GAMEDATAINSTALL) {
|
||||
return hleLogWarning(SCEUTILITY, SCE_ERROR_UTILITY_WRONG_TYPE, "wrong dialog type");
|
||||
}
|
||||
|
||||
@ -765,8 +768,9 @@ static int sceUtilityGamedataInstallUpdate(int animSpeed) {
|
||||
}
|
||||
|
||||
static int sceUtilityGamedataInstallGetStatus() {
|
||||
if (currentDialogType != UtilityDialogType::GAMEDATAINSTALL) {
|
||||
if (!currentDialogActive || currentDialogType != UtilityDialogType::GAMEDATAINSTALL) {
|
||||
// This is called incorrectly all the time by some games. So let's not bother warning.
|
||||
hleEatCycles(200);
|
||||
return hleLogDebug(SCEUTILITY, SCE_ERROR_UTILITY_WRONG_TYPE, "wrong dialog type");
|
||||
}
|
||||
|
||||
@ -776,7 +780,7 @@ static int sceUtilityGamedataInstallGetStatus() {
|
||||
}
|
||||
|
||||
static int sceUtilityGamedataInstallAbort() {
|
||||
if (currentDialogType != UtilityDialogType::GAMEDATAINSTALL) {
|
||||
if (!currentDialogActive || currentDialogType != UtilityDialogType::GAMEDATAINSTALL) {
|
||||
return hleLogWarning(SCEUTILITY, SCE_ERROR_UTILITY_WRONG_TYPE, "wrong dialog type");
|
||||
}
|
||||
|
||||
|
@ -413,11 +413,11 @@ bool PSP_InitStart(const CoreParameter &coreParam, std::string *error_string) {
|
||||
}
|
||||
|
||||
#if defined(_WIN32) && PPSSPP_ARCH(AMD64)
|
||||
INFO_LOG(BOOT, "PPSSPP %s Windows 64 bit", PPSSPP_GIT_VERSION);
|
||||
NOTICE_LOG(BOOT, "PPSSPP %s Windows 64 bit", PPSSPP_GIT_VERSION);
|
||||
#elif defined(_WIN32) && !PPSSPP_ARCH(AMD64)
|
||||
INFO_LOG(BOOT, "PPSSPP %s Windows 32 bit", PPSSPP_GIT_VERSION);
|
||||
NOTICE_LOG(BOOT, "PPSSPP %s Windows 32 bit", PPSSPP_GIT_VERSION);
|
||||
#else
|
||||
INFO_LOG(BOOT, "PPSSPP %s", PPSSPP_GIT_VERSION);
|
||||
NOTICE_LOG(BOOT, "PPSSPP %s", PPSSPP_GIT_VERSION);
|
||||
#endif
|
||||
|
||||
Core_NotifyLifecycle(CoreLifecycle::STARTING);
|
||||
|
@ -19,6 +19,7 @@
|
||||
|
||||
#include "Common/Data/Convert/ColorConv.h"
|
||||
#include "Common/Profiler/Profiler.h"
|
||||
#include "Common/LogReporting.h"
|
||||
#include "Core/Config.h"
|
||||
#include "GPU/Common/DrawEngineCommon.h"
|
||||
#include "GPU/Common/SplineCommon.h"
|
||||
@ -188,6 +189,57 @@ u32 DrawEngineCommon::NormalizeVertices(u8 *outPtr, u8 *bufPtr, const u8 *inPtr,
|
||||
return DrawEngineCommon::NormalizeVertices(outPtr, bufPtr, inPtr, dec, lowerBound, upperBound, vertType);
|
||||
}
|
||||
|
||||
void DrawEngineCommon::DispatchSubmitImm(GEPrimitiveType prim, TransformedVertex *buffer, int vertexCount, int cullMode, bool continuation) {
|
||||
// Instead of plumbing through properly (we'd need to inject these pretransformed vertices in the middle
|
||||
// of SoftwareTransform(), which would take a lot of refactoring), we'll cheat and just turn these into
|
||||
// through vertices.
|
||||
// Since the only known use is Thrillville and it only uses it to clear, we just use color and pos.
|
||||
struct ImmVertex {
|
||||
float uv[2];
|
||||
uint32_t color;
|
||||
float xyz[3];
|
||||
};
|
||||
std::vector<ImmVertex> temp;
|
||||
temp.resize(vertexCount);
|
||||
uint32_t color1Used = 0;
|
||||
for (int i = 0; i < vertexCount; i++) {
|
||||
// Since we're sending through, scale back up to w/h.
|
||||
temp[i].uv[0] = buffer[i].u * gstate.getTextureWidth(0);
|
||||
temp[i].uv[1] = buffer[i].v * gstate.getTextureHeight(0);
|
||||
temp[i].color = buffer[i].color0_32;
|
||||
temp[i].xyz[0] = buffer[i].pos[0];
|
||||
temp[i].xyz[1] = buffer[i].pos[1];
|
||||
temp[i].xyz[2] = buffer[i].pos[2];
|
||||
color1Used |= buffer[i].color1_32;
|
||||
}
|
||||
int vtype = GE_VTYPE_TC_FLOAT | GE_VTYPE_POS_FLOAT | GE_VTYPE_COL_8888 | GE_VTYPE_THROUGH;
|
||||
// TODO: Handle fog and secondary color somehow?
|
||||
|
||||
if (gstate.isFogEnabled() && !gstate.isModeThrough()) {
|
||||
WARN_LOG_REPORT_ONCE(geimmfog, G3D, "Imm vertex used fog");
|
||||
}
|
||||
if (color1Used != 0 && gstate.isUsingSecondaryColor() && !gstate.isModeThrough()) {
|
||||
WARN_LOG_REPORT_ONCE(geimmcolor1, G3D, "Imm vertex used secondary color");
|
||||
}
|
||||
|
||||
bool prevThrough = gstate.isModeThrough();
|
||||
// Code checks this reg directly, not just the vtype ID.
|
||||
if (!prevThrough) {
|
||||
gstate.vertType |= GE_VTYPE_THROUGH;
|
||||
gstate_c.Dirty(DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE | DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_UVSCALEOFFSET | DIRTY_CULLRANGE);
|
||||
}
|
||||
|
||||
int bytesRead;
|
||||
uint32_t vertTypeID = GetVertTypeID(vtype, 0);
|
||||
SubmitPrim(&temp[0], nullptr, prim, vertexCount, vertTypeID, cullMode, &bytesRead);
|
||||
DispatchFlush();
|
||||
|
||||
if (!prevThrough) {
|
||||
gstate.vertType &= ~GE_VTYPE_THROUGH;
|
||||
gstate_c.Dirty(DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE | DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_UVSCALEOFFSET | DIRTY_CULLRANGE);
|
||||
}
|
||||
}
|
||||
|
||||
// This code has plenty of potential for optimization.
|
||||
//
|
||||
// It does the simplest and safest test possible: If all points of a bbox is outside a single of
|
||||
@ -484,12 +536,12 @@ u32 DrawEngineCommon::NormalizeVertices(u8 *outPtr, u8 *bufPtr, const u8 *inPtr,
|
||||
return GE_VTYPE_TC_FLOAT | GE_VTYPE_COL_8888 | GE_VTYPE_NRM_FLOAT | GE_VTYPE_POS_FLOAT | (vertType & (GE_VTYPE_IDX_MASK | GE_VTYPE_THROUGH));
|
||||
}
|
||||
|
||||
void DrawEngineCommon::ApplyFramebufferRead(bool *fboTexNeedsBind) {
|
||||
void DrawEngineCommon::ApplyFramebufferRead(FBOTexState *fboTexState) {
|
||||
if (gstate_c.Supports(GPU_SUPPORTS_ANY_FRAMEBUFFER_FETCH)) {
|
||||
*fboTexNeedsBind = false;
|
||||
*fboTexState = FBO_TEX_READ_FRAMEBUFFER;
|
||||
} else {
|
||||
gpuStats.numCopiesForShaderBlend++;
|
||||
*fboTexNeedsBind = true;
|
||||
*fboTexState = FBO_TEX_COPY_BIND_TEX;
|
||||
}
|
||||
|
||||
gstate_c.Dirty(DIRTY_SHADERBLEND);
|
||||
|
@ -46,6 +46,12 @@ enum {
|
||||
TEX_SLOT_SPLINE_WEIGHTS_V = 6,
|
||||
};
|
||||
|
||||
enum FBOTexState {
|
||||
FBO_TEX_NONE,
|
||||
FBO_TEX_COPY_BIND_TEX,
|
||||
FBO_TEX_READ_FRAMEBUFFER,
|
||||
};
|
||||
|
||||
inline uint32_t GetVertTypeID(uint32_t vertType, int uvGenMode) {
|
||||
// As the decoder depends on the UVGenMode when we use UV prescale, we simply mash it
|
||||
// into the top of the verttype where there are unused bits.
|
||||
@ -84,10 +90,7 @@ public:
|
||||
SubmitPrim(verts, inds, prim, vertexCount, vertTypeID, cullMode, bytesRead);
|
||||
}
|
||||
|
||||
virtual void DispatchSubmitImm(const void *verts, const void *inds, GEPrimitiveType prim, int vertexCount, u32 vertTypeID, int cullMode, int *bytesRead) {
|
||||
SubmitPrim(verts, inds, prim, vertexCount, vertTypeID, cullMode, bytesRead);
|
||||
DispatchFlush();
|
||||
}
|
||||
virtual void DispatchSubmitImm(GEPrimitiveType prim, TransformedVertex *buffer, int vertexCount, int cullMode, bool continuation);
|
||||
|
||||
bool TestBoundingBox(const void* control_points, int vertexCount, u32 vertType, int *bytesRead);
|
||||
|
||||
@ -130,7 +133,7 @@ protected:
|
||||
// Vertex decoding
|
||||
void DecodeVertsStep(u8 *dest, int &i, int &decodedVerts);
|
||||
|
||||
void ApplyFramebufferRead(bool *fboTexNeedsBind);
|
||||
void ApplyFramebufferRead(FBOTexState *fboTexState);
|
||||
|
||||
inline int IndexSize(u32 vtype) const {
|
||||
const u32 indexType = (vtype & GE_VTYPE_IDX_MASK);
|
||||
|
@ -134,10 +134,12 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu
|
||||
GELogicOp replaceLogicOpType = isModeClear ? GE_LOGIC_COPY : (GELogicOp)id.Bits(FS_BIT_REPLACE_LOGIC_OP, 4);
|
||||
bool replaceLogicOp = replaceLogicOpType != GE_LOGIC_COPY && compat.bitwiseOps;
|
||||
|
||||
bool readFramebuffer = replaceBlend == REPLACE_BLEND_READ_FRAMEBUFFER || colorWriteMask || replaceLogicOp;
|
||||
bool readFramebufferTex = readFramebuffer && !gstate_c.Supports(GPU_SUPPORTS_ANY_FRAMEBUFFER_FETCH);
|
||||
bool needFramebufferRead = replaceBlend == REPLACE_BLEND_READ_FRAMEBUFFER || colorWriteMask || replaceLogicOp;
|
||||
|
||||
bool needFragCoord = readFramebuffer || gstate_c.Supports(GPU_ROUND_FRAGMENT_DEPTH_TO_16BIT);
|
||||
bool fetchFramebuffer = needFramebufferRead && gstate_c.Supports(GPU_SUPPORTS_ANY_FRAMEBUFFER_FETCH);
|
||||
bool readFramebufferTex = needFramebufferRead && !gstate_c.Supports(GPU_SUPPORTS_ANY_FRAMEBUFFER_FETCH);
|
||||
|
||||
bool needFragCoord = readFramebufferTex || gstate_c.Supports(GPU_ROUND_FRAGMENT_DEPTH_TO_16BIT);
|
||||
bool writeDepth = gstate_c.Supports(GPU_ROUND_FRAGMENT_DEPTH_TO_16BIT);
|
||||
|
||||
if (shaderDepalMode != ShaderDepalMode::OFF && !doTexture) {
|
||||
@ -157,6 +159,11 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu
|
||||
|
||||
if (readFramebufferTex) {
|
||||
WRITE(p, "layout (binding = 1) uniform sampler2D fbotex;\n");
|
||||
} else if (fetchFramebuffer) {
|
||||
WRITE(p, "layout (input_attachment_index = 0, binding = 9) uniform subpassInput inputColor;\n");
|
||||
if (fragmentShaderFlags) {
|
||||
*fragmentShaderFlags |= FragmentShaderFlags::INPUT_ATTACHMENT;
|
||||
}
|
||||
}
|
||||
|
||||
if (shaderDepalMode != ShaderDepalMode::OFF) {
|
||||
@ -416,7 +423,7 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu
|
||||
|
||||
if (!strcmp(compat.fragColor0, "fragColor0")) {
|
||||
const char *qualifierColor0 = "out";
|
||||
if (readFramebuffer && compat.lastFragData && !strcmp(compat.lastFragData, compat.fragColor0)) {
|
||||
if (fetchFramebuffer && compat.lastFragData && !strcmp(compat.lastFragData, compat.fragColor0)) {
|
||||
qualifierColor0 = "inout";
|
||||
}
|
||||
// Output the output color definitions.
|
||||
@ -492,20 +499,26 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu
|
||||
}
|
||||
|
||||
// Two things read from the old framebuffer - shader replacement blending and bit-level masking.
|
||||
if (readFramebuffer) {
|
||||
if (readFramebufferTex) {
|
||||
if (compat.shaderLanguage == HLSL_D3D11) {
|
||||
WRITE(p, " vec4 destColor = fbotex.Load(int3((int)gl_FragCoord.x, (int)gl_FragCoord.y, 0));\n");
|
||||
} else if (compat.shaderLanguage == HLSL_D3D9) {
|
||||
WRITE(p, " vec4 destColor = tex2D(fbotex, gl_FragCoord.xy * u_fbotexSize.xy);\n", compat.texture);
|
||||
} else if (gstate_c.Supports(GPU_SUPPORTS_ANY_FRAMEBUFFER_FETCH)) {
|
||||
// If we have EXT_shader_framebuffer_fetch / ARM_shader_framebuffer_fetch, we skip the blit.
|
||||
// We can just read the prev value more directly.
|
||||
WRITE(p, " lowp vec4 destColor = %s;\n", compat.lastFragData);
|
||||
} else if (!compat.texelFetch) {
|
||||
WRITE(p, " lowp vec4 destColor = %s(fbotex, gl_FragCoord.xy * u_fbotexSize.xy);\n", compat.texture);
|
||||
} else {
|
||||
WRITE(p, " lowp vec4 destColor = %s(fbotex, ivec2(gl_FragCoord.x, gl_FragCoord.y), 0);\n", compat.texelFetch);
|
||||
}
|
||||
} else if (fetchFramebuffer) {
|
||||
// If we have EXT_shader_framebuffer_fetch / ARM_shader_framebuffer_fetch, we skip the blit.
|
||||
// We can just read the prev value more directly.
|
||||
if (compat.shaderLanguage == GLSL_3xx) {
|
||||
WRITE(p, " lowp vec4 destColor = %s;\n", compat.lastFragData);
|
||||
} else if (compat.shaderLanguage == GLSL_VULKAN) {
|
||||
WRITE(p, " lowp vec4 destColor = subpassLoad(inputColor);\n", compat.lastFragData);
|
||||
} else {
|
||||
_assert_msg_(false, "Need fetch destColor, but not a compatible language");
|
||||
}
|
||||
}
|
||||
|
||||
if (isModeClear) {
|
||||
|
@ -42,7 +42,7 @@ struct FShaderID;
|
||||
|
||||
// Can technically be deduced from the fragment shader ID, but this is safer.
|
||||
enum class FragmentShaderFlags : u32 {
|
||||
FS_FLAG_INPUT_ATTACHMENT = 1,
|
||||
INPUT_ATTACHMENT = 1,
|
||||
};
|
||||
ENUM_CLASS_BITOPS(FragmentShaderFlags);
|
||||
|
||||
|
@ -231,6 +231,7 @@ StencilValueType ReplaceAlphaWithStencilType() {
|
||||
case GE_FORMAT_8888:
|
||||
case GE_FORMAT_INVALID:
|
||||
case GE_FORMAT_DEPTH16:
|
||||
case GE_FORMAT_CLUT8:
|
||||
switch (gstate.getStencilOpZPass()) {
|
||||
case GE_STENCILOP_REPLACE:
|
||||
// TODO: Could detect zero here and force ZERO - less uniform updates?
|
||||
@ -859,66 +860,63 @@ static inline bool blendColorSimilar(uint32_t a, uint32_t b, int margin = 25) {
|
||||
static bool SimulateLogicOpIfNeeded(BlendFactor &srcBlend, BlendFactor &dstBlend, BlendEq &blendEq) {
|
||||
// Note: our shader solution applies logic ops BEFORE blending, not correctly after.
|
||||
// This is however fine for the most common ones, like CLEAR/NOOP/SET, etc.
|
||||
if (!gstate_c.Supports(GPU_SUPPORTS_LOGIC_OP)) {
|
||||
if (gstate.isLogicOpEnabled()) {
|
||||
switch (gstate.getLogicOp()) {
|
||||
case GE_LOGIC_CLEAR:
|
||||
srcBlend = BlendFactor::ZERO;
|
||||
dstBlend = BlendFactor::ZERO;
|
||||
blendEq = BlendEq::ADD;
|
||||
return true;
|
||||
case GE_LOGIC_AND:
|
||||
case GE_LOGIC_AND_REVERSE:
|
||||
WARN_LOG_REPORT_ONCE(d3dLogicOpAnd, G3D, "Unsupported AND logic op: %x", gstate.getLogicOp());
|
||||
break;
|
||||
case GE_LOGIC_COPY:
|
||||
// This is the same as off.
|
||||
break;
|
||||
case GE_LOGIC_COPY_INVERTED:
|
||||
// Handled in the shader.
|
||||
break;
|
||||
case GE_LOGIC_AND_INVERTED:
|
||||
case GE_LOGIC_NOR:
|
||||
case GE_LOGIC_NAND:
|
||||
case GE_LOGIC_EQUIV:
|
||||
// Handled in the shader.
|
||||
WARN_LOG_REPORT_ONCE(d3dLogicOpAndInverted, G3D, "Attempted invert for logic op: %x", gstate.getLogicOp());
|
||||
break;
|
||||
case GE_LOGIC_INVERTED:
|
||||
srcBlend = BlendFactor::ONE;
|
||||
dstBlend = BlendFactor::ONE;
|
||||
blendEq = BlendEq::SUBTRACT;
|
||||
WARN_LOG_REPORT_ONCE(d3dLogicOpInverted, G3D, "Attempted inverse for logic op: %x", gstate.getLogicOp());
|
||||
return true;
|
||||
case GE_LOGIC_NOOP:
|
||||
srcBlend = BlendFactor::ZERO;
|
||||
dstBlend = BlendFactor::ONE;
|
||||
blendEq = BlendEq::ADD;
|
||||
return true;
|
||||
case GE_LOGIC_XOR:
|
||||
WARN_LOG_REPORT_ONCE(d3dLogicOpOrXor, G3D, "Unsupported XOR logic op: %x", gstate.getLogicOp());
|
||||
break;
|
||||
case GE_LOGIC_OR:
|
||||
case GE_LOGIC_OR_INVERTED:
|
||||
// Inverted in shader.
|
||||
srcBlend = BlendFactor::ONE;
|
||||
dstBlend = BlendFactor::ONE;
|
||||
blendEq = BlendEq::ADD;
|
||||
WARN_LOG_REPORT_ONCE(d3dLogicOpOr, G3D, "Attempted or for logic op: %x", gstate.getLogicOp());
|
||||
return true;
|
||||
case GE_LOGIC_OR_REVERSE:
|
||||
WARN_LOG_REPORT_ONCE(d3dLogicOpOrReverse, G3D, "Unsupported OR REVERSE logic op: %x", gstate.getLogicOp());
|
||||
break;
|
||||
case GE_LOGIC_SET:
|
||||
srcBlend = BlendFactor::ONE;
|
||||
dstBlend = BlendFactor::ONE;
|
||||
blendEq = BlendEq::ADD;
|
||||
WARN_LOG_REPORT_ONCE(d3dLogicOpSet, G3D, "Attempted set for logic op: %x", gstate.getLogicOp());
|
||||
return true;
|
||||
}
|
||||
if (!gstate_c.Supports(GPU_SUPPORTS_LOGIC_OP) && gstate.isLogicOpEnabled()) {
|
||||
switch (gstate.getLogicOp()) {
|
||||
case GE_LOGIC_CLEAR:
|
||||
srcBlend = BlendFactor::ZERO;
|
||||
dstBlend = BlendFactor::ZERO;
|
||||
blendEq = BlendEq::ADD;
|
||||
return true;
|
||||
case GE_LOGIC_AND:
|
||||
case GE_LOGIC_AND_REVERSE:
|
||||
WARN_LOG_REPORT_ONCE(d3dLogicOpAnd, G3D, "Unsupported AND logic op: %x", gstate.getLogicOp());
|
||||
break;
|
||||
case GE_LOGIC_COPY:
|
||||
// This is the same as off.
|
||||
break;
|
||||
case GE_LOGIC_COPY_INVERTED:
|
||||
// Handled in the shader.
|
||||
break;
|
||||
case GE_LOGIC_AND_INVERTED:
|
||||
case GE_LOGIC_NOR:
|
||||
case GE_LOGIC_NAND:
|
||||
case GE_LOGIC_EQUIV:
|
||||
// Handled in the shader.
|
||||
WARN_LOG_REPORT_ONCE(d3dLogicOpAndInverted, G3D, "Attempted invert for logic op: %x", gstate.getLogicOp());
|
||||
break;
|
||||
case GE_LOGIC_INVERTED:
|
||||
srcBlend = BlendFactor::ONE;
|
||||
dstBlend = BlendFactor::ONE;
|
||||
blendEq = BlendEq::SUBTRACT;
|
||||
WARN_LOG_REPORT_ONCE(d3dLogicOpInverted, G3D, "Attempted inverse for logic op: %x", gstate.getLogicOp());
|
||||
return true;
|
||||
case GE_LOGIC_NOOP:
|
||||
srcBlend = BlendFactor::ZERO;
|
||||
dstBlend = BlendFactor::ONE;
|
||||
blendEq = BlendEq::ADD;
|
||||
return true;
|
||||
case GE_LOGIC_XOR:
|
||||
WARN_LOG_REPORT_ONCE(d3dLogicOpOrXor, G3D, "Unsupported XOR logic op: %x", gstate.getLogicOp());
|
||||
break;
|
||||
case GE_LOGIC_OR:
|
||||
case GE_LOGIC_OR_INVERTED:
|
||||
// Inverted in shader.
|
||||
srcBlend = BlendFactor::ONE;
|
||||
dstBlend = BlendFactor::ONE;
|
||||
blendEq = BlendEq::ADD;
|
||||
WARN_LOG_REPORT_ONCE(d3dLogicOpOr, G3D, "Attempted or for logic op: %x", gstate.getLogicOp());
|
||||
return true;
|
||||
case GE_LOGIC_OR_REVERSE:
|
||||
WARN_LOG_REPORT_ONCE(d3dLogicOpOrReverse, G3D, "Unsupported OR REVERSE logic op: %x", gstate.getLogicOp());
|
||||
break;
|
||||
case GE_LOGIC_SET:
|
||||
srcBlend = BlendFactor::ONE;
|
||||
dstBlend = BlendFactor::ONE;
|
||||
blendEq = BlendEq::ADD;
|
||||
WARN_LOG_REPORT_ONCE(d3dLogicOpSet, G3D, "Attempted set for logic op: %x", gstate.getLogicOp());
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -1080,6 +1078,12 @@ static void ConvertBlendState(GenericBlendState &blendState, bool forceReplaceBl
|
||||
case REPLACE_BLEND_NO:
|
||||
// We may still want to do something about stencil -> alpha.
|
||||
ApplyStencilReplaceAndLogicOpIgnoreBlend(replaceAlphaWithStencil, blendState);
|
||||
|
||||
if (forceReplaceBlend) {
|
||||
// If this is true, the logic and mask replacements will be applied, at least. In that case,
|
||||
// we should not apply any logic op simulation.
|
||||
blendState.simulateLogicOpType = LOGICOPTYPE_NORMAL;
|
||||
}
|
||||
return;
|
||||
|
||||
case REPLACE_BLEND_BLUE_TO_ALPHA:
|
||||
|
@ -349,13 +349,10 @@ void ComputeFragmentShaderID(FShaderID *id_out, const ComputedPipelineState &pip
|
||||
|
||||
id.SetBit(FS_BIT_COLOR_WRITEMASK, colorWriteMask);
|
||||
|
||||
if (g_Config.bVendorBugChecksEnabled) {
|
||||
if (bugs.Has(Draw::Bugs::NO_DEPTH_CANNOT_DISCARD_STENCIL)) {
|
||||
id.SetBit(FS_BIT_NO_DEPTH_CANNOT_DISCARD_STENCIL, !IsStencilTestOutputDisabled() && !gstate.isDepthWriteEnabled());
|
||||
} else if (bugs.Has(Draw::Bugs::MALI_STENCIL_DISCARD_BUG) && PSP_CoreParameter().compat.flags().MaliDepthStencilBugWorkaround) {
|
||||
// Very similar driver bug to the Adreno one, with the same workaround (though might look into if there are cheaper ones!)
|
||||
// Keeping the conditions separate since it can probably be made tighter.
|
||||
id.SetBit(FS_BIT_NO_DEPTH_CANNOT_DISCARD_STENCIL, !IsStencilTestOutputDisabled() && (!gstate.isDepthTestEnabled() || !gstate.isDepthWriteEnabled()));
|
||||
if (g_Config.bVendorBugChecksEnabled && bugs.Has(Draw::Bugs::NO_DEPTH_CANNOT_DISCARD_STENCIL)) {
|
||||
bool stencilWithoutDepth = !IsStencilTestOutputDisabled() && (!gstate.isDepthTestEnabled() || !gstate.isDepthWriteEnabled());
|
||||
if (stencilWithoutDepth) {
|
||||
id.SetBit(FS_BIT_NO_DEPTH_CANNOT_DISCARD_STENCIL, stencilWithoutDepth);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -171,6 +171,7 @@ bool FramebufferManagerCommon::PerformStencilUpload(u32 addr, int size, StencilU
|
||||
break;
|
||||
case GE_FORMAT_INVALID:
|
||||
case GE_FORMAT_DEPTH16:
|
||||
case GE_FORMAT_CLUT8:
|
||||
// Inconceivable.
|
||||
_assert_(false);
|
||||
break;
|
||||
|
@ -37,6 +37,7 @@
|
||||
#include "GPU/Common/ShaderId.h"
|
||||
#include "GPU/Common/GPUStateUtils.h"
|
||||
#include "GPU/Debugger/Debugger.h"
|
||||
#include "GPU/Debugger/Record.h"
|
||||
#include "GPU/GPUCommon.h"
|
||||
#include "GPU/GPUInterface.h"
|
||||
#include "GPU/GPUState.h"
|
||||
@ -292,11 +293,18 @@ SamplerCacheKey TextureCacheCommon::GetSamplingParams(int maxLevel, const TexCac
|
||||
SamplerCacheKey TextureCacheCommon::GetFramebufferSamplingParams(u16 bufferWidth, u16 bufferHeight) {
|
||||
SamplerCacheKey key = GetSamplingParams(0, nullptr);
|
||||
|
||||
// In case auto max quality was on, restore min filt. Another fix for water in Outrun.
|
||||
if (g_Config.iTexFiltering == TEX_FILTER_AUTO_MAX_QUALITY) {
|
||||
int minFilt = gstate.texfilter & 0x7;
|
||||
key.minFilt = minFilt & 1;
|
||||
}
|
||||
|
||||
// Kill any mipmapping settings.
|
||||
key.mipEnable = false;
|
||||
key.mipFilt = false;
|
||||
key.aniso = 0.0;
|
||||
key.maxLevel = 0.0f;
|
||||
key.lodBias = 0.0f;
|
||||
|
||||
// Often the framebuffer will not match the texture size. We'll wrap/clamp in the shader in that case.
|
||||
int w = gstate.getTextureWidth(0);
|
||||
@ -1260,14 +1268,17 @@ void TextureCacheCommon::LoadClut(u32 clutAddr, u32 loadBytes) {
|
||||
|
||||
// It's possible for a game to load CLUT outside valid memory without crashing, should result in zeroes.
|
||||
u32 bytes = Memory::ValidSize(clutAddr, loadBytes);
|
||||
if (clutRenderAddress_ != 0xFFFFFFFF && PSP_CoreParameter().compat.flags().AllowDownloadCLUT) {
|
||||
bool performDownload = PSP_CoreParameter().compat.flags().AllowDownloadCLUT;
|
||||
if (GPURecord::IsActive())
|
||||
performDownload = true;
|
||||
if (clutRenderAddress_ != 0xFFFFFFFF && performDownload) {
|
||||
framebufferManager_->DownloadFramebufferForClut(clutRenderAddress_, clutRenderOffset_ + bytes);
|
||||
Memory::MemcpyUnchecked(clutBufRaw_, clutAddr, bytes);
|
||||
if (bytes < loadBytes) {
|
||||
memset((u8 *)clutBufRaw_ + bytes, 0x00, loadBytes - bytes);
|
||||
}
|
||||
} else {
|
||||
// Here we could check for clutRenderAddres_ != 0xFFFFFFFF and zero the CLUT or something,
|
||||
// Here we could check for clutRenderAddress_ != 0xFFFFFFFF and zero the CLUT or something,
|
||||
// but choosing not to for now. Though the results of loading the CLUT from RAM here is
|
||||
// almost certainly going to be bogus.
|
||||
#ifdef _M_SSE
|
||||
@ -1986,6 +1997,9 @@ static bool CanDepalettize(GETextureFormat texFormat, GEBufferFormat bufferForma
|
||||
return true;
|
||||
}
|
||||
break;
|
||||
case GE_FORMAT_CLUT8:
|
||||
// Shouldn't happen here.
|
||||
return false;
|
||||
}
|
||||
WARN_LOG(G3D, "Invalid CLUT/framebuffer combination: %s vs %s", GeTextureFormatToString(texFormat), GeBufferFormatToString(bufferFormat));
|
||||
return false;
|
||||
|
@ -872,22 +872,14 @@ void VertexDecoderJitCache::Jit_NormalFloat() {
|
||||
STMIA(scratchReg, false, 3, tempReg1, tempReg2, tempReg3);
|
||||
}
|
||||
|
||||
// Through expands into floats, always. Might want to look at changing this.
|
||||
void VertexDecoderJitCache::Jit_PosS8Through() {
|
||||
DEBUG_LOG_REPORT_ONCE(vertexS8Through, G3D, "Using S8 positions in throughmode");
|
||||
_dbg_assert_msg_(fpScratchReg + 1 == fpScratchReg2, "VertexDecoder fpScratchRegs must be in order.");
|
||||
_dbg_assert_msg_(fpScratchReg2 + 1 == fpScratchReg3, "VertexDecoder fpScratchRegs must be in order.");
|
||||
|
||||
// TODO: SIMD
|
||||
LDRSB(tempReg1, srcReg, dec_->posoff);
|
||||
LDRSB(tempReg2, srcReg, dec_->posoff + 1);
|
||||
LDRB(tempReg3, srcReg, dec_->posoff + 2);
|
||||
static const ARMReg tr[3] = { tempReg1, tempReg2, tempReg3 };
|
||||
static const ARMReg fr[3] = { fpScratchReg, fpScratchReg2, fpScratchReg3 };
|
||||
// 8-bit positions in throughmode always decode to 0, depth included.
|
||||
VEOR(neonScratchReg, neonScratchReg, neonScratchReg);
|
||||
VEOR(neonScratchReg2, neonScratchReg, neonScratchReg);
|
||||
ADD(scratchReg, dstReg, dec_->decFmt.posoff);
|
||||
VMOV(neonScratchReg, tempReg1, tempReg2);
|
||||
VMOV(neonScratchReg2, tempReg3, tempReg3);
|
||||
VCVT(F_32 | I_SIGNED, neonScratchRegQ, neonScratchRegQ);
|
||||
VST1(F_32, neonScratchReg, scratchReg, 2, ALIGN_NONE);
|
||||
}
|
||||
|
||||
|
@ -668,15 +668,11 @@ void VertexDecoderJitCache::Jit_PosFloat() {
|
||||
}
|
||||
|
||||
void VertexDecoderJitCache::Jit_PosS8Through() {
|
||||
LDRSB(INDEX_UNSIGNED, tempReg1, srcReg, dec_->posoff);
|
||||
LDRSB(INDEX_UNSIGNED, tempReg2, srcReg, dec_->posoff + 1);
|
||||
LDRB(INDEX_UNSIGNED, tempReg3, srcReg, dec_->posoff + 2);
|
||||
fp.SCVTF(fpScratchReg, tempReg1);
|
||||
fp.SCVTF(fpScratchReg2, tempReg2);
|
||||
fp.SCVTF(fpScratchReg3, tempReg3);
|
||||
// 8-bit positions in throughmode always decode to 0, depth included.
|
||||
fp.EOR(fpScratchReg, fpScratchReg, fpScratchReg);
|
||||
STR(INDEX_UNSIGNED, fpScratchReg, dstReg, dec_->decFmt.posoff);
|
||||
STR(INDEX_UNSIGNED, fpScratchReg2, dstReg, dec_->decFmt.posoff + 4);
|
||||
STR(INDEX_UNSIGNED, fpScratchReg3, dstReg, dec_->decFmt.posoff + 8);
|
||||
STR(INDEX_UNSIGNED, fpScratchReg, dstReg, dec_->decFmt.posoff + 4);
|
||||
STR(INDEX_UNSIGNED, fpScratchReg, dstReg, dec_->decFmt.posoff + 8);
|
||||
}
|
||||
|
||||
void VertexDecoderJitCache::Jit_PosS16Through() {
|
||||
|
@ -773,14 +773,20 @@ void VertexDecoder::Step_PosFloatSkin() const
|
||||
Vec3ByMatrix43(pos, fn, skinMatrix);
|
||||
}
|
||||
|
||||
void VertexDecoder::Step_PosS8Through() const
|
||||
{
|
||||
void VertexDecoder::Step_PosInvalid() const {
|
||||
// Invalid positions are just culled. Simulate by forcing invalid values.
|
||||
float *v = (float *)(decoded_ + decFmt.posoff);
|
||||
const s8 *sv = (const s8 *)(ptr_ + posoff);
|
||||
const u8 *uv = (const u8 *)(ptr_ + posoff);
|
||||
v[0] = sv[0];
|
||||
v[1] = sv[1];
|
||||
v[2] = uv[2];
|
||||
v[0] = std::numeric_limits<float>::infinity();
|
||||
v[1] = std::numeric_limits<float>::infinity();
|
||||
v[2] = std::numeric_limits<float>::infinity();
|
||||
}
|
||||
|
||||
void VertexDecoder::Step_PosS8Through() const {
|
||||
// 8-bit positions in throughmode always decode to 0, depth included.
|
||||
float *v = (float *)(decoded_ + decFmt.posoff);
|
||||
v[0] = 0;
|
||||
v[1] = 0;
|
||||
v[2] = 0;
|
||||
}
|
||||
|
||||
void VertexDecoder::Step_PosS16Through() const
|
||||
@ -1023,35 +1029,35 @@ static const StepFunction nrmstep_morphskin[4] = {
|
||||
};
|
||||
|
||||
static const StepFunction posstep[4] = {
|
||||
&VertexDecoder::Step_PosS8,
|
||||
&VertexDecoder::Step_PosInvalid,
|
||||
&VertexDecoder::Step_PosS8,
|
||||
&VertexDecoder::Step_PosS16,
|
||||
&VertexDecoder::Step_PosFloat,
|
||||
};
|
||||
|
||||
static const StepFunction posstep_skin[4] = {
|
||||
&VertexDecoder::Step_PosS8Skin,
|
||||
&VertexDecoder::Step_PosInvalid,
|
||||
&VertexDecoder::Step_PosS8Skin,
|
||||
&VertexDecoder::Step_PosS16Skin,
|
||||
&VertexDecoder::Step_PosFloatSkin,
|
||||
};
|
||||
|
||||
static const StepFunction posstep_morph[4] = {
|
||||
&VertexDecoder::Step_PosS8Morph,
|
||||
&VertexDecoder::Step_PosInvalid,
|
||||
&VertexDecoder::Step_PosS8Morph,
|
||||
&VertexDecoder::Step_PosS16Morph,
|
||||
&VertexDecoder::Step_PosFloatMorph,
|
||||
};
|
||||
|
||||
static const StepFunction posstep_morph_skin[4] = {
|
||||
&VertexDecoder::Step_PosS8MorphSkin,
|
||||
&VertexDecoder::Step_PosInvalid,
|
||||
&VertexDecoder::Step_PosS8MorphSkin,
|
||||
&VertexDecoder::Step_PosS16MorphSkin,
|
||||
&VertexDecoder::Step_PosFloatMorphSkin,
|
||||
};
|
||||
|
||||
static const StepFunction posstep_through[4] = {
|
||||
&VertexDecoder::Step_PosS8Through,
|
||||
&VertexDecoder::Step_PosInvalid,
|
||||
&VertexDecoder::Step_PosS8Through,
|
||||
&VertexDecoder::Step_PosS16Through,
|
||||
&VertexDecoder::Step_PosFloatThrough,
|
||||
@ -1224,9 +1230,8 @@ void VertexDecoder::SetVertexType(u32 fmt, const VertexDecoderOptions &options,
|
||||
bool reportNoPos = false;
|
||||
if (!pos) {
|
||||
reportNoPos = true;
|
||||
pos = 1;
|
||||
}
|
||||
if (pos) { // there's always a position
|
||||
if (pos >= 0) { // there's always a position
|
||||
size = align(size, posalign[pos]);
|
||||
posoff = size;
|
||||
size += possize[pos];
|
||||
|
@ -433,6 +433,7 @@ public:
|
||||
void Step_PosS16MorphSkin() const;
|
||||
void Step_PosFloatMorphSkin() const;
|
||||
|
||||
void Step_PosInvalid() const;
|
||||
void Step_PosS8Through() const;
|
||||
void Step_PosS16Through() const;
|
||||
void Step_PosFloatThrough() const;
|
||||
|
@ -1345,14 +1345,9 @@ void VertexDecoderJitCache::Jit_NormalFloatSkin() {
|
||||
|
||||
// Through expands into floats, always. Might want to look at changing this.
|
||||
void VertexDecoderJitCache::Jit_PosS8Through() {
|
||||
DEBUG_LOG_REPORT_ONCE(vertexS8Through, G3D, "Using S8 positions in throughmode");
|
||||
// SIMD doesn't really matter since this isn't useful on hardware.
|
||||
XORPS(fpScratchReg, R(fpScratchReg));
|
||||
for (int i = 0; i < 3; i++) {
|
||||
if (i == 2)
|
||||
MOVZX(32, 8, tempReg1, MDisp(srcReg, dec_->posoff + i));
|
||||
else
|
||||
MOVSX(32, 8, tempReg1, MDisp(srcReg, dec_->posoff + i));
|
||||
CVTSI2SS(fpScratchReg, R(tempReg1));
|
||||
MOVSS(MDisp(dstReg, dec_->decFmt.posoff + i * 4), fpScratchReg);
|
||||
}
|
||||
}
|
||||
|
@ -142,10 +142,11 @@ bool GenerateVertexShader(const VShaderID &id, char *buffer, const ShaderLanguag
|
||||
if (gl_extensions.EXT_gpu_shader4) {
|
||||
gl_exts.push_back("#extension GL_EXT_gpu_shader4 : enable");
|
||||
}
|
||||
if (gl_extensions.EXT_clip_cull_distance && id.Bit(VS_BIT_VERTEX_RANGE_CULLING)) {
|
||||
bool useClamp = gstate_c.Supports(GPU_SUPPORTS_DEPTH_CLAMP) && !id.Bit(VS_BIT_IS_THROUGH);
|
||||
if (gl_extensions.EXT_clip_cull_distance && (id.Bit(VS_BIT_VERTEX_RANGE_CULLING) || useClamp)) {
|
||||
gl_exts.push_back("#extension GL_EXT_clip_cull_distance : enable");
|
||||
}
|
||||
if (gl_extensions.APPLE_clip_distance && id.Bit(VS_BIT_VERTEX_RANGE_CULLING)) {
|
||||
if (gl_extensions.APPLE_clip_distance && (id.Bit(VS_BIT_VERTEX_RANGE_CULLING) || useClamp)) {
|
||||
gl_exts.push_back("#extension GL_APPLE_clip_distance : enable");
|
||||
}
|
||||
if (gl_extensions.ARB_cull_distance && id.Bit(VS_BIT_VERTEX_RANGE_CULLING)) {
|
||||
@ -227,6 +228,10 @@ bool GenerateVertexShader(const VShaderID &id, char *buffer, const ShaderLanguag
|
||||
bool texCoordInVec3 = false;
|
||||
|
||||
bool vertexRangeCulling = id.Bit(VS_BIT_VERTEX_RANGE_CULLING) && !isModeThrough;
|
||||
bool clipClampedDepth = !isModeThrough && gstate_c.Supports(GPU_SUPPORTS_DEPTH_CLAMP) && gstate_c.Supports(GPU_SUPPORTS_CLIP_DISTANCE);
|
||||
const char *vertexRangeClipSuffix = "[0]";
|
||||
if (vertexRangeCulling && clipClampedDepth)
|
||||
vertexRangeClipSuffix = "[2]";
|
||||
|
||||
if (compat.shaderLanguage == GLSL_VULKAN) {
|
||||
WRITE(p, "\n");
|
||||
@ -419,8 +424,15 @@ bool GenerateVertexShader(const VShaderID &id, char *buffer, const ShaderLanguag
|
||||
WRITE(p, " vec4 gl_Position : POSITION;\n");
|
||||
} else {
|
||||
WRITE(p, " vec4 gl_Position : SV_Position;\n");
|
||||
if (vertexRangeCulling && gstate_c.Supports(GPU_SUPPORTS_CLIP_DISTANCE)) {
|
||||
WRITE(p, " float gl_ClipDistance : SV_ClipDistance0;\n");
|
||||
bool clipRange = vertexRangeCulling && gstate_c.Supports(GPU_SUPPORTS_CLIP_DISTANCE);
|
||||
if (clipClampedDepth && clipRange) {
|
||||
WRITE(p, " float3 gl_ClipDistance : SV_ClipDistance;\n");
|
||||
vertexRangeClipSuffix = ".z";
|
||||
} else if (clipClampedDepth) {
|
||||
WRITE(p, " float2 gl_ClipDistance : SV_ClipDistance;\n");
|
||||
} else if (clipRange) {
|
||||
WRITE(p, " float gl_ClipDistance : SV_ClipDistance;\n");
|
||||
vertexRangeClipSuffix = "";
|
||||
}
|
||||
if (vertexRangeCulling && gstate_c.Supports(GPU_SUPPORTS_CULL_DISTANCE)) {
|
||||
WRITE(p, " float2 gl_CullDistance : SV_CullDistance0;\n");
|
||||
@ -1177,8 +1189,37 @@ bool GenerateVertexShader(const VShaderID &id, char *buffer, const ShaderLanguag
|
||||
WRITE(p, " %sv_fogdepth = (viewPos.z + u_fogcoef.x) * u_fogcoef.y;\n", compat.vsOutPrefix);
|
||||
}
|
||||
|
||||
if (vertexRangeCulling && !IsVRBuild()) {
|
||||
if (clipClampedDepth || (vertexRangeCulling && !IsVRBuild())) {
|
||||
WRITE(p, " vec3 projPos = outPos.xyz / outPos.w;\n");
|
||||
}
|
||||
|
||||
if (clipClampedDepth) {
|
||||
const char *clip0 = compat.shaderLanguage == HLSL_D3D11 ? ".x" : "[0]";
|
||||
const char *clip1 = compat.shaderLanguage == HLSL_D3D11 ? ".y" : "[1]";
|
||||
WRITE(p, " mediump float integerZ = projPos.z * u_depthRange.x + u_depthRange.y;\n");
|
||||
|
||||
// This should clip against minz, but only when it's above zero.
|
||||
if (ShaderLanguageIsOpenGL(compat.shaderLanguage)) {
|
||||
// On OpenGL/GLES, these values account for the -1 -> 1 range.
|
||||
WRITE(p, " if (u_depthRange.y - u_depthRange.x >= 1.0) {\n");
|
||||
} else {
|
||||
// Everywhere else, it's 0 -> 1, simpler.
|
||||
WRITE(p, " if (u_depthRange.y >= 1.0) {\n");
|
||||
}
|
||||
WRITE(p, " %sgl_ClipDistance%s = integerZ;\n", compat.vsOutPrefix, clip0);
|
||||
WRITE(p, " } else {\n");
|
||||
WRITE(p, " %sgl_ClipDistance%s = 0.0;\n", compat.vsOutPrefix, clip0);
|
||||
WRITE(p, " }\n");
|
||||
|
||||
// This is similar, but for maxz when it's below 65535.0. -1/0 don't matter here.
|
||||
WRITE(p, " if (u_depthRange.x + u_depthRange.y <= 65534.0) {\n");
|
||||
WRITE(p, " %sgl_ClipDistance%s = 65535.0 - integerZ;\n", compat.vsOutPrefix, clip1);
|
||||
WRITE(p, " } else {\n");
|
||||
WRITE(p, " %sgl_ClipDistance%s = 0.0;\n", compat.vsOutPrefix, clip1);
|
||||
WRITE(p, " }\n");
|
||||
}
|
||||
|
||||
if (vertexRangeCulling && !IsVRBuild()) {
|
||||
WRITE(p, " float projZ = (projPos.z - u_depthRange.z) * u_depthRange.w;\n");
|
||||
// Vertex range culling doesn't happen when Z clips, note sign of w is important.
|
||||
WRITE(p, " if (u_cullRangeMin.w <= 0.0 || projZ * outPos.w > -outPos.w) {\n");
|
||||
@ -1194,12 +1235,11 @@ bool GenerateVertexShader(const VShaderID &id, char *buffer, const ShaderLanguag
|
||||
WRITE(p, " }\n");
|
||||
WRITE(p, " }\n");
|
||||
|
||||
const char *clip0 = compat.shaderLanguage == HLSL_D3D11 ? "" : "[0]";
|
||||
const char *cull0 = compat.shaderLanguage == HLSL_D3D11 ? ".x" : "[0]";
|
||||
const char *cull1 = compat.shaderLanguage == HLSL_D3D11 ? ".y" : "[1]";
|
||||
if (gstate_c.Supports(GPU_SUPPORTS_CLIP_DISTANCE)) {
|
||||
// TODO: Not rectangles...
|
||||
WRITE(p, " %sgl_ClipDistance%s = projZ * outPos.w + outPos.w;\n", compat.vsOutPrefix, clip0);
|
||||
WRITE(p, " %sgl_ClipDistance%s = projZ * outPos.w + outPos.w;\n", compat.vsOutPrefix, vertexRangeClipSuffix);
|
||||
}
|
||||
if (gstate_c.Supports(GPU_SUPPORTS_CULL_DISTANCE)) {
|
||||
// Cull any triangle fully outside in the same direction when depth clamp enabled.
|
||||
|
@ -82,7 +82,7 @@ GPU_D3D11::GPU_D3D11(GraphicsContext *gfxCtx, Draw::DrawContext *draw)
|
||||
// No need to flush before the tex scale/offset commands if we are baking
|
||||
// the tex scale/offset into the vertices anyway.
|
||||
UpdateCmdInfo();
|
||||
CheckGPUFeatures();
|
||||
gstate_c.featureFlags = CheckGPUFeatures();
|
||||
|
||||
BuildReportingInfo();
|
||||
|
||||
@ -100,40 +100,16 @@ GPU_D3D11::~GPU_D3D11() {
|
||||
stockD3D11.Destroy();
|
||||
}
|
||||
|
||||
void GPU_D3D11::CheckGPUFeatures() {
|
||||
u32 features = 0;
|
||||
|
||||
features |= GPU_SUPPORTS_BLEND_MINMAX;
|
||||
u32 GPU_D3D11::CheckGPUFeatures() const {
|
||||
u32 features = GPUCommon::CheckGPUFeatures();
|
||||
|
||||
// Accurate depth is required because the Direct3D API does not support inverse Z.
|
||||
// So we cannot incorrectly use the viewport transform as the depth range on Direct3D.
|
||||
// TODO: Breaks text in PaRappa for some reason?
|
||||
features |= GPU_SUPPORTS_ACCURATE_DEPTH;
|
||||
|
||||
#ifndef _M_ARM
|
||||
// TODO: Do proper feature detection
|
||||
features |= GPU_SUPPORTS_ANISOTROPY;
|
||||
#endif
|
||||
|
||||
features |= GPU_SUPPORTS_DEPTH_TEXTURE;
|
||||
features |= GPU_SUPPORTS_TEXTURE_NPOT;
|
||||
if (draw_->GetDeviceCaps().dualSourceBlend)
|
||||
features |= GPU_SUPPORTS_DUALSOURCE_BLEND;
|
||||
if (draw_->GetDeviceCaps().depthClampSupported)
|
||||
features |= GPU_SUPPORTS_DEPTH_CLAMP;
|
||||
if (draw_->GetDeviceCaps().clipDistanceSupported)
|
||||
features |= GPU_SUPPORTS_CLIP_DISTANCE;
|
||||
if (draw_->GetDeviceCaps().cullDistanceSupported)
|
||||
features |= GPU_SUPPORTS_CULL_DISTANCE;
|
||||
if (!draw_->GetBugs().Has(Draw::Bugs::BROKEN_NAN_IN_CONDITIONAL)) {
|
||||
// Ignore the compat setting if clip and cull are both enabled.
|
||||
// When supported, we can do the depth side of range culling more correctly.
|
||||
const bool supported = draw_->GetDeviceCaps().clipDistanceSupported && draw_->GetDeviceCaps().cullDistanceSupported;
|
||||
const bool disabled = PSP_CoreParameter().compat.flags().DisableRangeCulling;
|
||||
if (supported || !disabled) {
|
||||
features |= GPU_SUPPORTS_VS_RANGE_CULLING;
|
||||
}
|
||||
}
|
||||
|
||||
features |= GPU_SUPPORTS_TEXTURE_FLOAT;
|
||||
features |= GPU_SUPPORTS_INSTANCE_RENDERING;
|
||||
@ -146,10 +122,6 @@ void GPU_D3D11::CheckGPUFeatures() {
|
||||
features |= GPU_SUPPORTS_16BIT_FORMATS;
|
||||
}
|
||||
|
||||
if (draw_->GetDeviceCaps().logicOpSupported) {
|
||||
features |= GPU_SUPPORTS_LOGIC_OP;
|
||||
}
|
||||
|
||||
if (!g_Config.bHighQualityDepth && (features & GPU_SUPPORTS_ACCURATE_DEPTH) != 0) {
|
||||
features |= GPU_SCALE_DEPTH_FROM_24BIT_TO_16BIT;
|
||||
} else if (PSP_CoreParameter().compat.flags().PixelDepthRounding) {
|
||||
@ -164,11 +136,7 @@ void GPU_D3D11::CheckGPUFeatures() {
|
||||
features |= GPU_USE_DEPTH_RANGE_HACK;
|
||||
}
|
||||
|
||||
if (PSP_CoreParameter().compat.flags().ClearToRAM) {
|
||||
features |= GPU_USE_CLEAR_RAM_HACK;
|
||||
}
|
||||
|
||||
gstate_c.featureFlags = features;
|
||||
return features;
|
||||
}
|
||||
|
||||
// Needs to be called on GPU thread, not reporting thread.
|
||||
@ -206,7 +174,7 @@ void GPU_D3D11::BeginHostFrame() {
|
||||
GPUCommon::BeginHostFrame();
|
||||
UpdateCmdInfo();
|
||||
if (resized_) {
|
||||
CheckGPUFeatures();
|
||||
gstate_c.featureFlags = CheckGPUFeatures();
|
||||
framebufferManager_->Resized();
|
||||
drawEngine_.Resized();
|
||||
textureCache_->NotifyConfigChanged();
|
||||
|
@ -36,7 +36,7 @@ public:
|
||||
GPU_D3D11(GraphicsContext *gfxCtx, Draw::DrawContext *draw);
|
||||
~GPU_D3D11();
|
||||
|
||||
void CheckGPUFeatures() override;
|
||||
u32 CheckGPUFeatures() const override;
|
||||
void PreExecuteOp(u32 op, u32 diff) override;
|
||||
void ExecuteOp(u32 op, u32 diff) override;
|
||||
|
||||
|
@ -153,15 +153,16 @@ void DrawEngineD3D11::ApplyDrawState(int prim) {
|
||||
// We ignore the logicState on D3D since there's no support, the emulation of it is blend-and-shader only.
|
||||
|
||||
if (pipelineState_.FramebufferRead()) {
|
||||
bool fboTexNeedsBind = false;
|
||||
ApplyFramebufferRead(&fboTexNeedsBind);
|
||||
FBOTexState fboTexBindState = FBO_TEX_NONE;
|
||||
ApplyFramebufferRead(&fboTexBindState);
|
||||
// The shader takes over the responsibility for blending, so recompute.
|
||||
ApplyStencilReplaceAndLogicOpIgnoreBlend(blendState.replaceAlphaWithStencil, blendState);
|
||||
|
||||
if (fboTexNeedsBind) {
|
||||
if (fboTexBindState == FBO_TEX_COPY_BIND_TEX) {
|
||||
framebufferManager_->BindFramebufferAsColorTexture(1, framebufferManager_->GetCurrentRenderVFB(), BINDFBCOLOR_MAY_COPY);
|
||||
// No sampler required, we do a plain Load in the pixel shader.
|
||||
fboTexBound_ = true;
|
||||
fboTexBindState = FBO_TEX_NONE;
|
||||
|
||||
framebufferManager_->RebindFramebuffer("RebindFramebuffer - ApplyDrawState");
|
||||
// Must dirty blend state here so we re-copy next time. Example: Lunar's spell effects.
|
||||
|
@ -18,6 +18,7 @@
|
||||
#include <vector>
|
||||
#include "Common/Log.h"
|
||||
#include "Common/StringUtils.h"
|
||||
#include "Common/TimeUtil.h"
|
||||
#include "GPU/GPU.h"
|
||||
#include "GPU/Debugger/Breakpoints.h"
|
||||
#include "GPU/Debugger/Debugger.h"
|
||||
@ -35,6 +36,8 @@ static int primsLastFrame = 0;
|
||||
static int primsThisFrame = 0;
|
||||
static int thisFlipNum = 0;
|
||||
|
||||
static double lastStepTime = -1.0;
|
||||
|
||||
static std::vector<std::pair<int, int>> restrictPrimRanges;
|
||||
static std::string restrictPrimRule;
|
||||
|
||||
@ -56,6 +59,7 @@ void SetActive(bool flag) {
|
||||
breakNext = BreakNext::NONE;
|
||||
breakAtCount = -1;
|
||||
GPUStepping::ResumeFromStepping();
|
||||
lastStepTime = -1.0;
|
||||
}
|
||||
}
|
||||
|
||||
@ -79,6 +83,7 @@ void SetBreakNext(BreakNext next) {
|
||||
GPUBreakpoints::AddCmdBreakpoint(GE_CMD_SPLINE, true);
|
||||
}
|
||||
GPUStepping::ResumeFromStepping();
|
||||
lastStepTime = next == BreakNext::NONE ? -1.0 : time_now_d();
|
||||
}
|
||||
|
||||
void SetBreakCount(int c, bool relative) {
|
||||
@ -130,7 +135,12 @@ bool NotifyCommand(u32 pc) {
|
||||
GPUBreakpoints::ClearTempBreakpoints();
|
||||
|
||||
auto info = gpuDebug->DissassembleOp(pc);
|
||||
NOTICE_LOG(G3D, "Waiting at %08x, %s", pc, info.desc.c_str());
|
||||
if (lastStepTime >= 0.0) {
|
||||
NOTICE_LOG(G3D, "Waiting at %08x, %s (%fms)", pc, info.desc.c_str(), (time_now_d() - lastStepTime) * 1000.0);
|
||||
lastStepTime = -1.0;
|
||||
} else {
|
||||
NOTICE_LOG(G3D, "Waiting at %08x, %s", pc, info.desc.c_str());
|
||||
}
|
||||
GPUStepping::EnterStepping();
|
||||
}
|
||||
|
||||
@ -141,7 +151,12 @@ void NotifyDraw() {
|
||||
if (!active)
|
||||
return;
|
||||
if (breakNext == BreakNext::DRAW && !GPUStepping::IsStepping()) {
|
||||
NOTICE_LOG(G3D, "Waiting at a draw");
|
||||
if (lastStepTime >= 0.0) {
|
||||
NOTICE_LOG(G3D, "Waiting at a draw (%fms)", (time_now_d() - lastStepTime) * 1000.0);
|
||||
lastStepTime = -1.0;
|
||||
} else {
|
||||
NOTICE_LOG(G3D, "Waiting at a draw");
|
||||
}
|
||||
GPUStepping::EnterStepping();
|
||||
}
|
||||
}
|
||||
|
@ -298,6 +298,7 @@ private:
|
||||
void Registers(u32 ptr, u32 sz);
|
||||
void Vertices(u32 ptr, u32 sz);
|
||||
void Indices(u32 ptr, u32 sz);
|
||||
void ClutAddr(u32 ptr, u32 sz);
|
||||
void Clut(u32 ptr, u32 sz);
|
||||
void TransferSrc(u32 ptr, u32 sz);
|
||||
void Memset(u32 ptr, u32 sz);
|
||||
@ -308,6 +309,8 @@ private:
|
||||
void Display(u32 ptr, u32 sz);
|
||||
|
||||
u32 execMemcpyDest = 0;
|
||||
u32 execClutAddr = 0;
|
||||
u32 execClutFlags = 0;
|
||||
u32 execListBuf = 0;
|
||||
u32 execListPos = 0;
|
||||
u32 execListID = 0;
|
||||
@ -472,15 +475,40 @@ void DumpExecute::Indices(u32 ptr, u32 sz) {
|
||||
execListQueue.push_back((GE_CMD_IADDR << 24) | (psp & 0x00FFFFFF));
|
||||
}
|
||||
|
||||
void DumpExecute::Clut(u32 ptr, u32 sz) {
|
||||
u32 psp = mapping_.Map(ptr, sz, std::bind(&DumpExecute::SyncStall, this));
|
||||
if (psp == 0) {
|
||||
ERROR_LOG(SYSTEM, "Unable to allocate for clut");
|
||||
return;
|
||||
}
|
||||
void DumpExecute::ClutAddr(u32 ptr, u32 sz) {
|
||||
struct ClutAddrData {
|
||||
u32 addr;
|
||||
u32 flags;
|
||||
};
|
||||
const ClutAddrData *data = (const ClutAddrData *)(pushbuf_.data() + ptr);
|
||||
execClutAddr = data->addr;
|
||||
execClutFlags = data->flags;
|
||||
}
|
||||
|
||||
execListQueue.push_back((GE_CMD_CLUTADDRUPPER << 24) | ((psp >> 8) & 0x00FF0000));
|
||||
execListQueue.push_back((GE_CMD_CLUTADDR << 24) | (psp & 0x00FFFFFF));
|
||||
void DumpExecute::Clut(u32 ptr, u32 sz) {
|
||||
// This is always run when we have the actual address set.
|
||||
if (execClutAddr != 0) {
|
||||
const bool isTarget = (execClutFlags & 1) != 0;
|
||||
const bool unchangedVRAM = (execClutFlags & 2) != 0;
|
||||
|
||||
// TODO: Could use drawnVRAM flag, but it can be wrong.
|
||||
// Could potentially always skip if !isTarget, but playing it safe for offset texture behavior.
|
||||
if (Memory::IsValidRange(execClutAddr, sz) && !unchangedVRAM && (!isTarget || !g_Config.bSoftwareRendering)) {
|
||||
// Intentionally don't trigger an upload here.
|
||||
Memory::MemcpyUnchecked(execClutAddr, pushbuf_.data() + ptr, sz);
|
||||
}
|
||||
|
||||
execClutAddr = 0;
|
||||
} else {
|
||||
u32 psp = mapping_.Map(ptr, sz, std::bind(&DumpExecute::SyncStall, this));
|
||||
if (psp == 0) {
|
||||
ERROR_LOG(SYSTEM, "Unable to allocate for clut");
|
||||
return;
|
||||
}
|
||||
|
||||
execListQueue.push_back((GE_CMD_CLUTADDRUPPER << 24) | ((psp >> 8) & 0x00FF0000));
|
||||
execListQueue.push_back((GE_CMD_CLUTADDR << 24) | (psp & 0x00FFFFFF));
|
||||
}
|
||||
}
|
||||
|
||||
void DumpExecute::TransferSrc(u32 ptr, u32 sz) {
|
||||
@ -619,6 +647,10 @@ bool DumpExecute::Run() {
|
||||
Indices(cmd.ptr, cmd.sz);
|
||||
break;
|
||||
|
||||
case CommandType::CLUTADDR:
|
||||
ClutAddr(cmd.ptr, cmd.sz);
|
||||
break;
|
||||
|
||||
case CommandType::CLUT:
|
||||
Clut(cmd.ptr, cmd.sz);
|
||||
break;
|
||||
|
@ -37,6 +37,7 @@
|
||||
#include "Core/MemMap.h"
|
||||
#include "Core/System.h"
|
||||
#include "Core/ThreadPools.h"
|
||||
#include "GPU/Common/GPUDebugInterface.h"
|
||||
#include "GPU/GPUInterface.h"
|
||||
#include "GPU/GPUState.h"
|
||||
#include "GPU/ge_constants.h"
|
||||
@ -152,8 +153,19 @@ static void BeginRecording() {
|
||||
u32 sz = 512 * 4;
|
||||
pushbuf.resize(pushbuf.size() + sz);
|
||||
gstate.Save((u32_le *)(pushbuf.data() + ptr));
|
||||
|
||||
commands.push_back({CommandType::INIT, sz, ptr});
|
||||
|
||||
// Also save the initial CLUT.
|
||||
GPUDebugBuffer clut;
|
||||
if (gpuDebug->GetCurrentClut(clut)) {
|
||||
sz = clut.GetStride() * clut.PixelSize();
|
||||
_assert_msg_(sz == 1024, "CLUT should be 1024 bytes");
|
||||
ptr = (u32)pushbuf.size();
|
||||
pushbuf.resize(pushbuf.size() + sz);
|
||||
memcpy(pushbuf.data() + ptr, clut.GetData(), sz);
|
||||
commands.push_back({ CommandType::CLUT, sz, ptr });
|
||||
}
|
||||
|
||||
DirtyAllVRAM(DirtyVRAMFlag::DIRTY);
|
||||
}
|
||||
|
||||
@ -308,6 +320,34 @@ static Command EmitCommandWithRAM(CommandType t, const void *p, u32 sz, u32 alig
|
||||
return cmd;
|
||||
}
|
||||
|
||||
static u32 GetTargetFlags(u32 addr, u32 sizeInRAM) {
|
||||
const bool isTarget = lastRenderTargets.find(addr) != lastRenderTargets.end();
|
||||
|
||||
bool isDirtyVRAM = false;
|
||||
bool isDrawnVRAM = false;
|
||||
uint32_t start = (addr >> DIRTY_VRAM_SHIFT) & DIRTY_VRAM_MASK;
|
||||
for (uint32_t i = 0; i < (sizeInRAM + DIRTY_VRAM_ROUND) >> DIRTY_VRAM_SHIFT; ++i) {
|
||||
DirtyVRAMFlag flag = dirtyVRAM[start + i];
|
||||
isDirtyVRAM = isDirtyVRAM || flag != DirtyVRAMFlag::CLEAN;
|
||||
isDrawnVRAM = isDrawnVRAM || flag == DirtyVRAMFlag::DRAWN;
|
||||
|
||||
// Mark the VRAM clean now that it's been copied to VRAM.
|
||||
if (flag == DirtyVRAMFlag::DIRTY)
|
||||
dirtyVRAM[start + i] = DirtyVRAMFlag::CLEAN;
|
||||
}
|
||||
|
||||
// The isTarget flag is mostly used for replay of dumps on a PSP.
|
||||
u32 flags = isTarget ? 1 : 0;
|
||||
// The unchangedVRAM flag tells us we can skip recopying.
|
||||
if (!isDirtyVRAM)
|
||||
flags |= 2;
|
||||
// And the drawn flag tells us this data was potentially drawn to.
|
||||
if (isDrawnVRAM)
|
||||
flags |= 4;
|
||||
|
||||
return flags;
|
||||
}
|
||||
|
||||
static void EmitTextureData(int level, u32 texaddr) {
|
||||
GETextureFormat format = gstate.getTextureFormat();
|
||||
int w = gstate.getTextureWidth(level);
|
||||
@ -315,7 +355,6 @@ static void EmitTextureData(int level, u32 texaddr) {
|
||||
int bufw = GetTextureBufw(level, texaddr, format);
|
||||
int extraw = w > bufw ? w - bufw : 0;
|
||||
u32 sizeInRAM = (textureBitsPerPixel[format] * (bufw * h + extraw)) / 8;
|
||||
const bool isTarget = lastRenderTargets.find(texaddr) != lastRenderTargets.end();
|
||||
|
||||
CommandType type = CommandType((int)CommandType::TEXTURE0 + level);
|
||||
const u8 *p = Memory::GetPointerUnchecked(texaddr);
|
||||
@ -330,27 +369,7 @@ static void EmitTextureData(int level, u32 texaddr) {
|
||||
u32 pad;
|
||||
};
|
||||
|
||||
bool isDirtyVRAM = false;
|
||||
bool isDrawnVRAM = false;
|
||||
uint32_t start = (texaddr >> DIRTY_VRAM_SHIFT) & DIRTY_VRAM_MASK;
|
||||
for (uint32_t i = 0; i < (sizeInRAM + DIRTY_VRAM_ROUND) >> DIRTY_VRAM_SHIFT; ++i) {
|
||||
DirtyVRAMFlag flag = dirtyVRAM[start + i];
|
||||
isDirtyVRAM = isDirtyVRAM || flag != DirtyVRAMFlag::CLEAN;
|
||||
isDrawnVRAM = isDrawnVRAM || flag == DirtyVRAMFlag::DRAWN;
|
||||
|
||||
// Mark the VRAM clean now that it's been copied to VRAM.
|
||||
if (flag == DirtyVRAMFlag::DIRTY)
|
||||
dirtyVRAM[start + i] = DirtyVRAMFlag::CLEAN;
|
||||
}
|
||||
|
||||
// The isTarget flag is mostly used for replay of dumps on a PSP.
|
||||
u32 flags = isTarget ? 1 : 0;
|
||||
// The unchangedVRAM flag tells us we can skip recopying.
|
||||
if (!isDirtyVRAM)
|
||||
flags |= 2;
|
||||
// And the drawn flag tells us this data was potentially drawn to.
|
||||
if (isDrawnVRAM)
|
||||
flags |= 4;
|
||||
u32 flags = GetTargetFlags(texaddr, sizeInRAM);
|
||||
FramebufData framebuf{ texaddr, bufw, flags };
|
||||
framebufData.resize(sizeof(framebuf) + bytes);
|
||||
memcpy(&framebufData[0], &framebuf, sizeof(framebuf));
|
||||
@ -456,12 +475,33 @@ static void EmitTransfer(u32 op) {
|
||||
|
||||
static void EmitClut(u32 op) {
|
||||
u32 addr = gstate.getClutAddress();
|
||||
|
||||
// Hardware rendering may be using a framebuffer as CLUT.
|
||||
// To get at this, we first run the command (normally we're called right before it has run.)
|
||||
if (Memory::IsVRAMAddress(addr))
|
||||
gpuDebug->SetCmdValue(op);
|
||||
|
||||
// Actually should only be 0x3F, but we allow enhanced CLUTs. See #15727.
|
||||
u32 blocks = (op & 0x7F) == 0x40 ? 0x40 : (op & 0x3F);
|
||||
u32 bytes = blocks * 32;
|
||||
bytes = Memory::ValidSize(addr, bytes);
|
||||
|
||||
if (bytes != 0) {
|
||||
// Send the original address so VRAM can be reasoned about.
|
||||
if (Memory::IsVRAMAddress(addr)) {
|
||||
struct ClutAddrData {
|
||||
u32 addr;
|
||||
u32 flags;
|
||||
};
|
||||
u32 flags = GetTargetFlags(addr, bytes);
|
||||
ClutAddrData data{ addr, flags };
|
||||
|
||||
FlushRegisters();
|
||||
Command cmd{CommandType::CLUTADDR, sizeof(data), (u32)pushbuf.size()};
|
||||
pushbuf.resize(pushbuf.size() + sizeof(data));
|
||||
memcpy(pushbuf.data() + cmd.ptr, &data, sizeof(data));
|
||||
commands.push_back(cmd);
|
||||
}
|
||||
EmitCommandWithRAM(CommandType::CLUT, Memory::GetPointerUnchecked(addr), bytes, 16);
|
||||
}
|
||||
|
||||
|
@ -49,6 +49,7 @@ enum class CommandType : u8 {
|
||||
MEMCPYDEST = 7,
|
||||
MEMCPYDATA = 8,
|
||||
DISPLAY = 9,
|
||||
CLUTADDR = 10,
|
||||
|
||||
TEXTURE0 = 0x10,
|
||||
TEXTURE1 = 0x11,
|
||||
|
@ -170,6 +170,8 @@ private:
|
||||
// Hardware tessellation
|
||||
TessellationDataTransferDX9 *tessDataTransferDX9;
|
||||
|
||||
FBOTexState fboTexBindState_ = FBO_TEX_NONE;
|
||||
|
||||
int lastRenderStepId_ = -1;
|
||||
|
||||
bool fboTexNeedsBind_ = false;
|
||||
|
@ -81,7 +81,7 @@ GPU_DX9::GPU_DX9(GraphicsContext *gfxCtx, Draw::DrawContext *draw)
|
||||
// No need to flush before the tex scale/offset commands if we are baking
|
||||
// the tex scale/offset into the vertices anyway.
|
||||
UpdateCmdInfo();
|
||||
CheckGPUFeatures();
|
||||
gstate_c.featureFlags = CheckGPUFeatures();
|
||||
|
||||
BuildReportingInfo();
|
||||
|
||||
@ -98,68 +98,9 @@ GPU_DX9::GPU_DX9(GraphicsContext *gfxCtx, Draw::DrawContext *draw)
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: Move this detection elsewhere when it's needed elsewhere, not before. It's ugly.
|
||||
// Source: https://envytools.readthedocs.io/en/latest/hw/pciid.html#gf100
|
||||
enum NVIDIAGeneration {
|
||||
NV_PRE_KEPLER,
|
||||
NV_KEPLER,
|
||||
NV_MAXWELL,
|
||||
NV_PASCAL,
|
||||
NV_VOLTA,
|
||||
NV_TURING, // or later
|
||||
};
|
||||
|
||||
static NVIDIAGeneration NVIDIAGetDeviceGeneration(int deviceID) {
|
||||
if (deviceID >= 0x1180 && deviceID <= 0x11bf)
|
||||
return NV_KEPLER; // GK104
|
||||
if (deviceID >= 0x11c0 && deviceID <= 0x11fa)
|
||||
return NV_KEPLER; // GK106
|
||||
if (deviceID >= 0x0fc0 && deviceID <= 0x0fff)
|
||||
return NV_KEPLER; // GK107
|
||||
if (deviceID >= 0x1003 && deviceID <= 0x1028)
|
||||
return NV_KEPLER; // GK110(B)
|
||||
if (deviceID >= 0x1280 && deviceID <= 0x12ba)
|
||||
return NV_KEPLER; // GK208
|
||||
if (deviceID >= 0x1381 && deviceID <= 0x13b0)
|
||||
return NV_MAXWELL; // GM107
|
||||
if (deviceID >= 0x1340 && deviceID <= 0x134d)
|
||||
return NV_MAXWELL; // GM108
|
||||
if (deviceID >= 0x13c0 && deviceID <= 0x13d9)
|
||||
return NV_MAXWELL; // GM204
|
||||
if (deviceID >= 0x1401 && deviceID <= 0x1427)
|
||||
return NV_MAXWELL; // GM206
|
||||
if (deviceID >= 0x15f7 && deviceID <= 0x15f9)
|
||||
return NV_PASCAL; // GP100
|
||||
if (deviceID >= 0x15f7 && deviceID <= 0x15f9)
|
||||
return NV_PASCAL; // GP100
|
||||
if (deviceID >= 0x1b00 && deviceID <= 0x1b38)
|
||||
return NV_PASCAL; // GP102
|
||||
if (deviceID >= 0x1b80 && deviceID <= 0x1be1)
|
||||
return NV_PASCAL; // GP104
|
||||
if (deviceID >= 0x1c02 && deviceID <= 0x1c62)
|
||||
return NV_PASCAL; // GP106
|
||||
if (deviceID >= 0x1c81 && deviceID <= 0x1c92)
|
||||
return NV_PASCAL; // GP107
|
||||
if (deviceID >= 0x1d01 && deviceID <= 0x1d12)
|
||||
return NV_PASCAL; // GP108
|
||||
if (deviceID >= 0x1d81 && deviceID <= 0x1dba)
|
||||
return NV_VOLTA; // GV100
|
||||
if (deviceID >= 0x1e02 && deviceID <= 0x1e3c)
|
||||
return NV_TURING; // TU102
|
||||
if (deviceID >= 0x1e82 && deviceID <= 0x1ed0)
|
||||
return NV_TURING; // TU104
|
||||
if (deviceID >= 0x1f02 && deviceID <= 0x1f51)
|
||||
return NV_TURING; // TU104
|
||||
if (deviceID >= 0x1e02)
|
||||
return NV_TURING; // More TU models or later, probably.
|
||||
return NV_PRE_KEPLER;
|
||||
}
|
||||
|
||||
void GPU_DX9::CheckGPUFeatures() {
|
||||
u32 features = 0;
|
||||
u32 GPU_DX9::CheckGPUFeatures() const {
|
||||
u32 features = GPUCommon::CheckGPUFeatures();
|
||||
features |= GPU_SUPPORTS_16BIT_FORMATS;
|
||||
features |= GPU_SUPPORTS_BLEND_MINMAX;
|
||||
features |= GPU_SUPPORTS_DEPTH_TEXTURE;
|
||||
features |= GPU_SUPPORTS_TEXTURE_LOD_CONTROL;
|
||||
|
||||
// Accurate depth is required because the Direct3D API does not support inverse Z.
|
||||
@ -168,41 +109,6 @@ void GPU_DX9::CheckGPUFeatures() {
|
||||
features |= GPU_SUPPORTS_ACCURATE_DEPTH;
|
||||
|
||||
auto vendor = draw_->GetDeviceCaps().vendor;
|
||||
if (!PSP_CoreParameter().compat.flags().DisableRangeCulling) {
|
||||
// VS range culling (killing triangles in the vertex shader using NaN) causes problems on Intel.
|
||||
// Also causes problems on old NVIDIA.
|
||||
switch (vendor) {
|
||||
case Draw::GPUVendor::VENDOR_INTEL:
|
||||
break;
|
||||
case Draw::GPUVendor::VENDOR_NVIDIA:
|
||||
// Older NVIDIAs don't seem to like NaNs in their DX9 vertex shaders.
|
||||
// No idea if KEPLER is the right cutoff, but let's go with it.
|
||||
if (NVIDIAGetDeviceGeneration(draw_->GetDeviceCaps().deviceID) >= NV_KEPLER) {
|
||||
features |= GPU_SUPPORTS_VS_RANGE_CULLING;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
features |= GPU_SUPPORTS_VS_RANGE_CULLING;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
D3DCAPS9 caps;
|
||||
ZeroMemory(&caps, sizeof(caps));
|
||||
HRESULT result = 0;
|
||||
if (deviceEx_) {
|
||||
result = deviceEx_->GetDeviceCaps(&caps);
|
||||
} else {
|
||||
result = device_->GetDeviceCaps(&caps);
|
||||
}
|
||||
if (FAILED(result)) {
|
||||
WARN_LOG_REPORT(G3D, "Direct3D9: Failed to get the device caps!");
|
||||
} else {
|
||||
if ((caps.RasterCaps & D3DPRASTERCAPS_ANISOTROPY) != 0 && caps.MaxAnisotropy > 1)
|
||||
features |= GPU_SUPPORTS_ANISOTROPY;
|
||||
if ((caps.TextureCaps & (D3DPTEXTURECAPS_NONPOW2CONDITIONAL | D3DPTEXTURECAPS_POW2)) == 0)
|
||||
features |= GPU_SUPPORTS_TEXTURE_NPOT;
|
||||
}
|
||||
|
||||
if (!g_Config.bHighQualityDepth) {
|
||||
features |= GPU_SCALE_DEPTH_FROM_24BIT_TO_16BIT;
|
||||
@ -213,11 +119,7 @@ void GPU_DX9::CheckGPUFeatures() {
|
||||
features |= GPU_ROUND_DEPTH_TO_16BIT;
|
||||
}
|
||||
|
||||
if (PSP_CoreParameter().compat.flags().ClearToRAM) {
|
||||
features |= GPU_USE_CLEAR_RAM_HACK;
|
||||
}
|
||||
|
||||
gstate_c.featureFlags = features;
|
||||
return features;
|
||||
}
|
||||
|
||||
GPU_DX9::~GPU_DX9() {
|
||||
@ -261,7 +163,7 @@ void GPU_DX9::BeginHostFrame() {
|
||||
GPUCommon::BeginHostFrame();
|
||||
UpdateCmdInfo();
|
||||
if (resized_) {
|
||||
CheckGPUFeatures();
|
||||
gstate_c.featureFlags = CheckGPUFeatures();
|
||||
framebufferManager_->Resized();
|
||||
drawEngine_.Resized();
|
||||
shaderManagerDX9_->DirtyShader();
|
||||
|
@ -35,7 +35,7 @@ public:
|
||||
GPU_DX9(GraphicsContext *gfxCtx, Draw::DrawContext *draw);
|
||||
~GPU_DX9();
|
||||
|
||||
void CheckGPUFeatures() override;
|
||||
u32 CheckGPUFeatures() const override;
|
||||
void PreExecuteOp(u32 op, u32 diff) override;
|
||||
void ExecuteOp(u32 op, u32 diff) override;
|
||||
|
||||
|
@ -99,14 +99,14 @@ void DrawEngineDX9::ApplyDrawState(int prim) {
|
||||
if (!gstate.isModeClear()) {
|
||||
textureCache_->ApplyTexture();
|
||||
|
||||
if (fboTexNeedsBind_) {
|
||||
if (fboTexBindState_ = FBO_TEX_COPY_BIND_TEX) {
|
||||
// Note that this is positions, not UVs, that we need the copy from.
|
||||
framebufferManager_->BindFramebufferAsColorTexture(1, framebufferManager_->GetCurrentRenderVFB(), BINDFBCOLOR_MAY_COPY);
|
||||
// If we are rendering at a higher resolution, linear is probably best for the dest color.
|
||||
device_->SetSamplerState(1, D3DSAMP_MAGFILTER, D3DTEXF_LINEAR);
|
||||
device_->SetSamplerState(1, D3DSAMP_MINFILTER, D3DTEXF_LINEAR);
|
||||
fboTexBound_ = true;
|
||||
fboTexNeedsBind_ = false;
|
||||
fboTexBindState_ = FBO_TEX_NONE;
|
||||
}
|
||||
|
||||
// TODO: Test texture?
|
||||
@ -133,20 +133,23 @@ void DrawEngineDX9::ApplyDrawState(int prim) {
|
||||
// We ignore the logicState on D3D since there's no support, the emulation of it is blend-and-shader only.
|
||||
|
||||
if (pipelineState_.FramebufferRead()) {
|
||||
bool fboTexNeedsBind = false;
|
||||
ApplyFramebufferRead(&fboTexNeedsBind);
|
||||
ApplyFramebufferRead(&fboTexBindState_);
|
||||
// The shader takes over the responsibility for blending, so recompute.
|
||||
ApplyStencilReplaceAndLogicOpIgnoreBlend(blendState.replaceAlphaWithStencil, blendState);
|
||||
|
||||
if (fboTexNeedsBind) {
|
||||
if (fboTexBindState_ == FBO_TEX_COPY_BIND_TEX) {
|
||||
// Note that this is positions, not UVs, that we need the copy from.
|
||||
framebufferManager_->BindFramebufferAsColorTexture(1, framebufferManager_->GetCurrentRenderVFB(), BINDFBCOLOR_MAY_COPY);
|
||||
// If we are rendering at a higher resolution, linear is probably best for the dest color.
|
||||
device_->SetSamplerState(1, D3DSAMP_MAGFILTER, D3DTEXF_LINEAR);
|
||||
device_->SetSamplerState(1, D3DSAMP_MINFILTER, D3DTEXF_LINEAR);
|
||||
fboTexBound_ = true;
|
||||
fboTexBindState_ = FBO_TEX_NONE;
|
||||
dirtyRequiresRecheck_ |= DIRTY_BLEND_STATE;
|
||||
gstate_c.Dirty(DIRTY_BLEND_STATE);
|
||||
} else if (fboTexBindState_ == FBO_TEX_READ_FRAMEBUFFER) {
|
||||
// Not supported.
|
||||
fboTexBindState_ = FBO_TEX_NONE;
|
||||
}
|
||||
|
||||
dirtyRequiresRecheck_ |= DIRTY_FRAGMENTSHADER_STATE;
|
||||
|
@ -118,7 +118,8 @@ void FramebufferManagerGLES::PackDepthbuffer(VirtualFramebuffer *vfb, int x, int
|
||||
queries.push_back({ &u_depthDownloadTo8, "u_depthTo8" });
|
||||
std::vector<GLRProgram::Initializer> inits;
|
||||
inits.push_back({ &u_depthDownloadTex, 0, TEX_SLOT_PSP_TEXTURE });
|
||||
depthDownloadProgram_ = render->CreateProgram(shaders, semantics, queries, inits, false, false);
|
||||
GLRProgramFlags flags{};
|
||||
depthDownloadProgram_ = render->CreateProgram(shaders, semantics, queries, inits, flags);
|
||||
for (auto iter : shaders) {
|
||||
render->DeleteShader(iter);
|
||||
}
|
||||
|
@ -54,7 +54,7 @@
|
||||
GPU_GLES::GPU_GLES(GraphicsContext *gfxCtx, Draw::DrawContext *draw)
|
||||
: GPUCommon(gfxCtx, draw), drawEngine_(draw), fragmentTestCache_(draw) {
|
||||
UpdateVsyncInterval(true);
|
||||
CheckGPUFeatures();
|
||||
gstate_c.featureFlags = CheckGPUFeatures();
|
||||
|
||||
shaderManagerGL_ = new ShaderManagerGLES(draw);
|
||||
framebufferManagerGL_ = new FramebufferManagerGLES(draw);
|
||||
@ -148,42 +148,17 @@ GPU_GLES::~GPU_GLES() {
|
||||
// Take the raw GL extension and versioning data and turn into feature flags.
|
||||
// TODO: This should use DrawContext::GetDeviceCaps() more and more, and eventually
|
||||
// this can be shared between all the backends.
|
||||
void GPU_GLES::CheckGPUFeatures() {
|
||||
u32 features = 0;
|
||||
u32 GPU_GLES::CheckGPUFeatures() const {
|
||||
u32 features = GPUCommon::CheckGPUFeatures();
|
||||
|
||||
features |= GPU_SUPPORTS_16BIT_FORMATS;
|
||||
|
||||
if (draw_->GetDeviceCaps().dualSourceBlend) {
|
||||
if (!g_Config.bVendorBugChecksEnabled || !draw_->GetBugs().Has(Draw::Bugs::DUAL_SOURCE_BLENDING_BROKEN)) {
|
||||
features |= GPU_SUPPORTS_DUALSOURCE_BLEND;
|
||||
}
|
||||
}
|
||||
|
||||
if (gl_extensions.EXT_shader_framebuffer_fetch || gl_extensions.ARM_shader_framebuffer_fetch) {
|
||||
// This has caused problems in the past. Let's only enable on GLES3.
|
||||
if (gl_extensions.GLES3) {
|
||||
features |= GPU_SUPPORTS_ANY_FRAMEBUFFER_FETCH;
|
||||
}
|
||||
}
|
||||
|
||||
if ((gl_extensions.gpuVendor == GPU_VENDOR_NVIDIA) || (gl_extensions.gpuVendor == GPU_VENDOR_AMD))
|
||||
features |= GPU_PREFER_REVERSE_COLOR_ORDER;
|
||||
|
||||
if (draw_->GetDeviceCaps().textureNPOTFullySupported)
|
||||
features |= GPU_SUPPORTS_TEXTURE_NPOT;
|
||||
|
||||
if (gl_extensions.EXT_blend_minmax)
|
||||
features |= GPU_SUPPORTS_BLEND_MINMAX;
|
||||
|
||||
if (draw_->GetDeviceCaps().logicOpSupported)
|
||||
features |= GPU_SUPPORTS_LOGIC_OP;
|
||||
|
||||
if (gl_extensions.GLES3 || !gl_extensions.IsGLES)
|
||||
features |= GPU_SUPPORTS_TEXTURE_LOD_CONTROL;
|
||||
|
||||
if (draw_->GetDeviceCaps().anisoSupported)
|
||||
features |= GPU_SUPPORTS_ANISOTROPY;
|
||||
|
||||
bool canUseInstanceID = gl_extensions.EXT_draw_instanced || gl_extensions.ARB_draw_instanced;
|
||||
bool canDefInstanceID = gl_extensions.IsGLES || gl_extensions.EXT_gpu_shader4 || gl_extensions.VersionGEThan(3, 1);
|
||||
bool instanceRendering = gl_extensions.GLES3 || (canUseInstanceID && canDefInstanceID);
|
||||
@ -202,21 +177,6 @@ void GPU_GLES::CheckGPUFeatures() {
|
||||
// Our implementation of depth texturing needs simple Z range, so can't
|
||||
// use the extension hacks (yet).
|
||||
}
|
||||
if (draw_->GetDeviceCaps().textureDepthSupported)
|
||||
features |= GPU_SUPPORTS_DEPTH_TEXTURE;
|
||||
if (draw_->GetDeviceCaps().clipDistanceSupported)
|
||||
features |= GPU_SUPPORTS_CLIP_DISTANCE;
|
||||
if (draw_->GetDeviceCaps().cullDistanceSupported)
|
||||
features |= GPU_SUPPORTS_CULL_DISTANCE;
|
||||
if (!draw_->GetBugs().Has(Draw::Bugs::BROKEN_NAN_IN_CONDITIONAL)) {
|
||||
// Ignore the compat setting if clip and cull are both enabled.
|
||||
// When supported, we can do the depth side of range culling more correctly.
|
||||
const bool supported = draw_->GetDeviceCaps().clipDistanceSupported && draw_->GetDeviceCaps().cullDistanceSupported;
|
||||
const bool disabled = PSP_CoreParameter().compat.flags().DisableRangeCulling;
|
||||
if (supported || !disabled) {
|
||||
features |= GPU_SUPPORTS_VS_RANGE_CULLING;
|
||||
}
|
||||
}
|
||||
|
||||
// If we already have a 16-bit depth buffer, we don't need to round.
|
||||
bool prefer24 = draw_->GetDeviceCaps().preferredDepthBufferFormat == Draw::DataFormat::D24_S8;
|
||||
@ -245,11 +205,7 @@ void GPU_GLES::CheckGPUFeatures() {
|
||||
features |= GPU_USE_DEPTH_RANGE_HACK;
|
||||
}
|
||||
|
||||
if (PSP_CoreParameter().compat.flags().ClearToRAM) {
|
||||
features |= GPU_USE_CLEAR_RAM_HACK;
|
||||
}
|
||||
|
||||
gstate_c.featureFlags = features;
|
||||
return features;
|
||||
}
|
||||
|
||||
bool GPU_GLES::IsReady() {
|
||||
@ -321,7 +277,7 @@ void GPU_GLES::BeginHostFrame() {
|
||||
GPUCommon::BeginHostFrame();
|
||||
UpdateCmdInfo();
|
||||
if (resized_) {
|
||||
CheckGPUFeatures();
|
||||
gstate_c.featureFlags = CheckGPUFeatures();
|
||||
framebufferManager_->Resized();
|
||||
drawEngine_.Resized();
|
||||
shaderManagerGL_->DirtyShader();
|
||||
|
@ -38,7 +38,7 @@ public:
|
||||
~GPU_GLES();
|
||||
|
||||
// This gets called on startup and when we get back from settings.
|
||||
void CheckGPUFeatures() override;
|
||||
u32 CheckGPUFeatures() const override;
|
||||
|
||||
bool IsReady() override;
|
||||
void CancelReady() override;
|
||||
|
@ -192,9 +192,18 @@ LinkedShader::LinkedShader(GLRenderManager *render, VShaderID VSID, Shader *vs,
|
||||
initialize.push_back({ &u_tess_weights_u, 0, TEX_SLOT_SPLINE_WEIGHTS_U });
|
||||
initialize.push_back({ &u_tess_weights_v, 0, TEX_SLOT_SPLINE_WEIGHTS_V });
|
||||
|
||||
bool useDualSource = (gstate_c.featureFlags & GPU_SUPPORTS_DUALSOURCE_BLEND) != 0;
|
||||
bool useClip0 = VSID.Bit(VS_BIT_VERTEX_RANGE_CULLING) && gstate_c.Supports(GPU_SUPPORTS_CLIP_DISTANCE);
|
||||
program = render->CreateProgram(shaders, semantics, queries, initialize, useDualSource, useClip0);
|
||||
GLRProgramFlags flags{};
|
||||
flags.supportDualSource = (gstate_c.featureFlags & GPU_SUPPORTS_DUALSOURCE_BLEND) != 0;
|
||||
if (!VSID.Bit(VS_BIT_IS_THROUGH) && gstate_c.Supports(GPU_SUPPORTS_DEPTH_CLAMP)) {
|
||||
flags.useClipDistance0 = true;
|
||||
flags.useClipDistance1 = true;
|
||||
if (VSID.Bit(VS_BIT_VERTEX_RANGE_CULLING) && gstate_c.Supports(GPU_SUPPORTS_CLIP_DISTANCE))
|
||||
flags.useClipDistance2 = true;
|
||||
} else if (VSID.Bit(VS_BIT_VERTEX_RANGE_CULLING) && gstate_c.Supports(GPU_SUPPORTS_CLIP_DISTANCE)) {
|
||||
flags.useClipDistance0 = true;
|
||||
}
|
||||
|
||||
program = render->CreateProgram(shaders, semantics, queries, initialize, flags);
|
||||
|
||||
// The rest, use the "dirty" mechanism.
|
||||
dirtyUniforms = DIRTY_ALL_UNIFORMS;
|
||||
|
@ -149,13 +149,14 @@ void DrawEngineGLES::ApplyDrawState(int prim) {
|
||||
GenericLogicState &logicState = pipelineState_.logicState;
|
||||
|
||||
if (pipelineState_.FramebufferRead()) {
|
||||
bool fboTexNeedsBind = false;
|
||||
ApplyFramebufferRead(&fboTexNeedsBind);
|
||||
FBOTexState fboTexBindState = FBO_TEX_NONE;
|
||||
ApplyFramebufferRead(&fboTexBindState);
|
||||
// The shader takes over the responsibility for blending, so recompute.
|
||||
ApplyStencilReplaceAndLogicOpIgnoreBlend(blendState.replaceAlphaWithStencil, blendState);
|
||||
|
||||
// We copy the framebuffer here, as doing so will wipe any blend state if we do it later.
|
||||
if (fboTexNeedsBind) {
|
||||
// fboTexNeedsBind_ won't be set if we can read directly from the target.
|
||||
if (fboTexBindState == FBO_TEX_COPY_BIND_TEX) {
|
||||
// Note that this is positions, not UVs, that we need the copy from.
|
||||
framebufferManager_->BindFramebufferAsColorTexture(1, framebufferManager_->GetCurrentRenderVFB(), BINDFBCOLOR_MAY_COPY);
|
||||
// If we are rendering at a higher resolution, linear is probably best for the dest color.
|
||||
@ -166,6 +167,9 @@ void DrawEngineGLES::ApplyDrawState(int prim) {
|
||||
// Must dirty blend state here so we re-copy next time. Example: Lunar's spell effects.
|
||||
dirtyRequiresRecheck_ |= DIRTY_BLEND_STATE;
|
||||
gstate_c.Dirty(DIRTY_BLEND_STATE);
|
||||
} else if (fboTexBindState == FBO_TEX_READ_FRAMEBUFFER) {
|
||||
// No action needed here.
|
||||
fboTexBindState = FBO_TEX_NONE;
|
||||
}
|
||||
dirtyRequiresRecheck_ |= DIRTY_FRAGMENTSHADER_STATE;
|
||||
gstate_c.Dirty(DIRTY_FRAGMENTSHADER_STATE);
|
||||
|
@ -89,7 +89,7 @@ const CommonCommandTableEntry commonCommandTable[] = {
|
||||
// These affect the fragment shader so need flushing.
|
||||
{ GE_CMD_CLEARMODE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_BLEND_STATE | DIRTY_DEPTHSTENCIL_STATE | DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_CULLRANGE | DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE },
|
||||
{ GE_CMD_TEXTUREMAPENABLE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE },
|
||||
{ GE_CMD_FOGENABLE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE},
|
||||
{ GE_CMD_FOGENABLE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE },
|
||||
{ GE_CMD_TEXMODE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_TEXTURE_PARAMS | DIRTY_FRAGMENTSHADER_STATE },
|
||||
{ GE_CMD_TEXSHADELS, FLAG_FLUSHBEFOREONCHANGE, DIRTY_VERTEXSHADER_STATE },
|
||||
// Raster state for Direct3D 9, uncommon.
|
||||
@ -2414,10 +2414,10 @@ void GPUCommon::Execute_ImmVertexAlphaPrim(u32 op, u32 diff) {
|
||||
immPrim_ = (GEPrimitiveType)prim;
|
||||
// Flags seem to only be respected from the first prim.
|
||||
immFlags_ = op & 0x00FFF800;
|
||||
immFirstSent_ = false;
|
||||
} else if (prim == GE_PRIM_KEEP_PREVIOUS && immPrim_ != GE_PRIM_INVALID) {
|
||||
static constexpr int flushPrimCount[] = { 1, 2, 0, 3, 0, 0, 2, 0 };
|
||||
// Instead of finding a proper point to flush, we just emit a full rectangle every time one
|
||||
// is finished.
|
||||
// Instead of finding a proper point to flush, we just emit prims when we can.
|
||||
if (immCount_ == flushPrimCount[immPrim_ & 7])
|
||||
FlushImm();
|
||||
} else {
|
||||
@ -2439,31 +2439,6 @@ void GPUCommon::FlushImm() {
|
||||
}
|
||||
UpdateUVScaleOffset();
|
||||
|
||||
// Instead of plumbing through properly (we'd need to inject these pretransformed vertices in the middle
|
||||
// of SoftwareTransform(), which would take a lot of refactoring), we'll cheat and just turn these into
|
||||
// through vertices.
|
||||
// Since the only known use is Thrillville and it only uses it to clear, we just use color and pos.
|
||||
struct ImmVertex {
|
||||
float uv[2];
|
||||
uint32_t color;
|
||||
float xyz[3];
|
||||
};
|
||||
ImmVertex temp[MAX_IMMBUFFER_SIZE];
|
||||
uint32_t color1Used = 0;
|
||||
for (int i = 0; i < immCount_; i++) {
|
||||
// Since we're sending through, scale back up to w/h.
|
||||
temp[i].uv[0] = immBuffer_[i].u * gstate.getTextureWidth(0);
|
||||
temp[i].uv[1] = immBuffer_[i].v * gstate.getTextureHeight(0);
|
||||
temp[i].color = immBuffer_[i].color0_32;
|
||||
temp[i].xyz[0] = immBuffer_[i].pos[0];
|
||||
temp[i].xyz[1] = immBuffer_[i].pos[1];
|
||||
temp[i].xyz[2] = immBuffer_[i].pos[2];
|
||||
color1Used |= immBuffer_[i].color1_32;
|
||||
}
|
||||
int vtype = GE_VTYPE_TC_FLOAT | GE_VTYPE_POS_FLOAT | GE_VTYPE_COL_8888 | GE_VTYPE_THROUGH;
|
||||
|
||||
// TODO: Handle fog and secondary color somehow?
|
||||
|
||||
bool antialias = (immFlags_ & GE_IMM_ANTIALIAS) != 0;
|
||||
bool prevAntialias = gstate.isAntiAliasEnabled();
|
||||
bool shading = (immFlags_ & GE_IMM_SHADING) != 0;
|
||||
@ -2473,40 +2448,42 @@ void GPUCommon::FlushImm() {
|
||||
int cullMode = (immFlags_ & GE_IMM_CULLFACE) != 0 ? 1 : 0;
|
||||
bool texturing = (immFlags_ & GE_IMM_TEXTURE) != 0;
|
||||
bool prevTexturing = gstate.isTextureMapEnabled();
|
||||
bool fog = (immFlags_ & GE_IMM_FOG) != 0;
|
||||
bool prevFog = gstate.isFogEnabled();
|
||||
bool dither = (immFlags_ & GE_IMM_DITHER) != 0;
|
||||
bool prevDither = gstate.isDitherEnabled();
|
||||
|
||||
if ((immFlags_ & GE_IMM_CLIPMASK) != 0) {
|
||||
WARN_LOG_REPORT_ONCE(geimmclipvalue, G3D, "Imm vertex used clip value, flags=%06x", immFlags_);
|
||||
} else if ((immFlags_ & GE_IMM_FOG) != 0) {
|
||||
WARN_LOG_REPORT_ONCE(geimmfog, G3D, "Imm vertex used fog, flags=%06x", immFlags_);
|
||||
} else if (color1Used != 0 && gstate.isUsingSecondaryColor()) {
|
||||
WARN_LOG_REPORT_ONCE(geimmcolor1, G3D, "Imm vertex used secondary color, flags=%06x", immFlags_);
|
||||
}
|
||||
|
||||
if (texturing != prevTexturing || cullEnable != prevCullEnable || dither != prevDither || prevShading != shading) {
|
||||
bool changed = texturing != prevTexturing || cullEnable != prevCullEnable || dither != prevDither;
|
||||
changed = changed || prevShading != shading || prevFog != fog;
|
||||
if (changed) {
|
||||
DispatchFlush();
|
||||
gstate.antiAliasEnable = (GE_CMD_ANTIALIASENABLE << 24) | (int)antialias;
|
||||
gstate.shademodel = (GE_CMD_SHADEMODE << 24) | (int)shading;
|
||||
gstate.cullfaceEnable = (GE_CMD_CULLFACEENABLE << 24) | (int)cullEnable;
|
||||
gstate.textureMapEnable = (GE_CMD_TEXTUREMAPENABLE << 24) | (int)texturing;
|
||||
gstate.fogEnable = (GE_CMD_FOGENABLE << 24) | (int)fog;
|
||||
gstate.ditherEnable = (GE_CMD_DITHERENABLE << 24) | (int)dither;
|
||||
gstate_c.Dirty(DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE | DIRTY_RASTER_STATE);
|
||||
gstate_c.Dirty(DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE | DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_UVSCALEOFFSET | DIRTY_CULLRANGE);
|
||||
}
|
||||
|
||||
int bytesRead;
|
||||
uint32_t vertTypeID = GetVertTypeID(vtype, 0);
|
||||
drawEngineCommon_->DispatchSubmitImm(temp, nullptr, immPrim_, immCount_, vertTypeID, cullMode, &bytesRead);
|
||||
// TODO: In the future, make a special path for these.
|
||||
// drawEngineCommon_->DispatchSubmitImm(immBuffer_, immCount_);
|
||||
drawEngineCommon_->DispatchSubmitImm(immPrim_, immBuffer_, immCount_, cullMode, immFirstSent_);
|
||||
immCount_ = 0;
|
||||
immFirstSent_ = true;
|
||||
|
||||
gstate.antiAliasEnable = (GE_CMD_ANTIALIASENABLE << 24) | (int)prevAntialias;
|
||||
gstate.shademodel = (GE_CMD_SHADEMODE << 24) | (int)prevShading;
|
||||
gstate.cullfaceEnable = (GE_CMD_CULLFACEENABLE << 24) | (int)prevCullEnable;
|
||||
gstate.textureMapEnable = (GE_CMD_TEXTUREMAPENABLE << 24) | (int)prevTexturing;
|
||||
gstate.ditherEnable = (GE_CMD_DITHERENABLE << 24) | (int)prevDither;
|
||||
gstate_c.Dirty(DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE | DIRTY_RASTER_STATE);
|
||||
if (changed) {
|
||||
DispatchFlush();
|
||||
gstate.antiAliasEnable = (GE_CMD_ANTIALIASENABLE << 24) | (int)prevAntialias;
|
||||
gstate.shademodel = (GE_CMD_SHADEMODE << 24) | (int)prevShading;
|
||||
gstate.cullfaceEnable = (GE_CMD_CULLFACEENABLE << 24) | (int)prevCullEnable;
|
||||
gstate.textureMapEnable = (GE_CMD_TEXTUREMAPENABLE << 24) | (int)prevTexturing;
|
||||
gstate.fogEnable = (GE_CMD_FOGENABLE << 24) | (int)prevFog;
|
||||
gstate.ditherEnable = (GE_CMD_DITHERENABLE << 24) | (int)prevDither;
|
||||
gstate_c.Dirty(DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE | DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_UVSCALEOFFSET | DIRTY_CULLRANGE);
|
||||
}
|
||||
}
|
||||
|
||||
void GPUCommon::ExecuteOp(u32 op, u32 diff) {
|
||||
@ -3176,3 +3153,56 @@ size_t GPUCommon::FormatGPUStatsCommon(char *buffer, size_t size) {
|
||||
vertexAverageCycles
|
||||
);
|
||||
}
|
||||
|
||||
u32 GPUCommon::CheckGPUFeatures() const {
|
||||
u32 features = 0;
|
||||
if (draw_->GetDeviceCaps().logicOpSupported) {
|
||||
features |= GPU_SUPPORTS_LOGIC_OP;
|
||||
}
|
||||
if (draw_->GetDeviceCaps().anisoSupported) {
|
||||
features |= GPU_SUPPORTS_ANISOTROPY;
|
||||
}
|
||||
if (draw_->GetDeviceCaps().textureNPOTFullySupported) {
|
||||
features |= GPU_SUPPORTS_TEXTURE_NPOT;
|
||||
}
|
||||
if (draw_->GetDeviceCaps().dualSourceBlend) {
|
||||
if (!g_Config.bVendorBugChecksEnabled || !draw_->GetBugs().Has(Draw::Bugs::DUAL_SOURCE_BLENDING_BROKEN)) {
|
||||
features |= GPU_SUPPORTS_DUALSOURCE_BLEND;
|
||||
}
|
||||
}
|
||||
if (draw_->GetDeviceCaps().blendMinMaxSupported) {
|
||||
features |= GPU_SUPPORTS_BLEND_MINMAX;
|
||||
}
|
||||
|
||||
if (draw_->GetDeviceCaps().clipDistanceSupported) {
|
||||
features |= GPU_SUPPORTS_CLIP_DISTANCE;
|
||||
}
|
||||
|
||||
if (draw_->GetDeviceCaps().cullDistanceSupported) {
|
||||
features |= GPU_SUPPORTS_CULL_DISTANCE;
|
||||
}
|
||||
|
||||
if (draw_->GetDeviceCaps().textureDepthSupported) {
|
||||
features |= GPU_SUPPORTS_DEPTH_TEXTURE;
|
||||
}
|
||||
|
||||
if (!draw_->GetBugs().Has(Draw::Bugs::BROKEN_NAN_IN_CONDITIONAL)) {
|
||||
// Ignore the compat setting if clip and cull are both enabled.
|
||||
// When supported, we can do the depth side of range culling more correctly.
|
||||
const bool supported = draw_->GetDeviceCaps().clipDistanceSupported && draw_->GetDeviceCaps().cullDistanceSupported;
|
||||
const bool disabled = PSP_CoreParameter().compat.flags().DisableRangeCulling;
|
||||
if (supported || !disabled) {
|
||||
features |= GPU_SUPPORTS_VS_RANGE_CULLING;
|
||||
}
|
||||
}
|
||||
|
||||
if (draw_->GetDeviceCaps().framebufferFetchSupported) {
|
||||
features |= GPU_SUPPORTS_ANY_FRAMEBUFFER_FETCH;
|
||||
}
|
||||
|
||||
if (PSP_CoreParameter().compat.flags().ClearToRAM) {
|
||||
features |= GPU_USE_CLEAR_RAM_HACK;
|
||||
}
|
||||
|
||||
return features;
|
||||
}
|
||||
|
@ -76,7 +76,7 @@ public:
|
||||
Draw::DrawContext *GetDrawContext() override {
|
||||
return draw_;
|
||||
}
|
||||
virtual void CheckGPUFeatures() = 0;
|
||||
virtual u32 CheckGPUFeatures() const;
|
||||
|
||||
void UpdateCmdInfo();
|
||||
|
||||
@ -103,7 +103,7 @@ public:
|
||||
void ExecuteOp(u32 op, u32 diff) override;
|
||||
void PreExecuteOp(u32 op, u32 diff) override;
|
||||
|
||||
bool InterpretList(DisplayList &list) override;
|
||||
bool InterpretList(DisplayList &list);
|
||||
void ProcessDLQueue();
|
||||
u32 UpdateStall(int listid, u32 newstall) override;
|
||||
u32 EnqueueList(u32 listpc, u32 stall, int subIntrBase, PSPPointer<PspGeListArgs> args, bool head) override;
|
||||
@ -355,6 +355,7 @@ protected:
|
||||
int immCount_ = 0;
|
||||
GEPrimitiveType immPrim_ = GE_PRIM_INVALID;
|
||||
uint32_t immFlags_ = 0;
|
||||
bool immFirstSent_ = false;
|
||||
|
||||
std::string reportingPrimaryInfo_;
|
||||
std::string reportingFullInfo_;
|
||||
|
@ -205,7 +205,6 @@ public:
|
||||
|
||||
virtual void PreExecuteOp(u32 op, u32 diff) = 0;
|
||||
virtual void ExecuteOp(u32 op, u32 diff) = 0;
|
||||
virtual bool InterpretList(DisplayList& list) = 0;
|
||||
|
||||
// Framebuffer management
|
||||
virtual void SetDisplayFramebuffer(u32 framebuf, u32 stride, GEBufferFormat format) = 0;
|
||||
|
@ -485,7 +485,8 @@ enum {
|
||||
// Free bit: 15
|
||||
GPU_SUPPORTS_DEPTH_TEXTURE = FLAG_BIT(16),
|
||||
GPU_SUPPORTS_ACCURATE_DEPTH = FLAG_BIT(17),
|
||||
// Free bits: 18-19
|
||||
GPU_SUPPORTS_FRAGMENT_SHADER_INTERLOCK = FLAG_BIT(18),
|
||||
// Free bits: 19
|
||||
GPU_SUPPORTS_ANY_FRAMEBUFFER_FETCH = FLAG_BIT(20),
|
||||
GPU_SCALE_DEPTH_FROM_24BIT_TO_16BIT = FLAG_BIT(21),
|
||||
GPU_ROUND_FRAGMENT_DEPTH_TO_16BIT = FLAG_BIT(22),
|
||||
|
@ -167,7 +167,7 @@ void BinManager::UpdateState(bool throughMode) {
|
||||
if (states_.Full())
|
||||
Flush("states");
|
||||
stateIndex_ = (uint16_t)states_.Push(RasterizerState());
|
||||
ComputeRasterizerState(&states_[stateIndex_], throughMode);
|
||||
ComputeRasterizerState(&states_[stateIndex_]);
|
||||
states_[stateIndex_].samplerID.cached.clut = cluts_[clutIndex_].readable;
|
||||
|
||||
ClearDirty(SoftDirty::PIXEL_ALL | SoftDirty::SAMPLER_ALL | SoftDirty::RAST_ALL);
|
||||
@ -326,7 +326,7 @@ void BinManager::AddTriangle(const VertexData &v0, const VertexData &v1, const V
|
||||
if (d01.x * d02.y - d01.y * d02.x < 0)
|
||||
return;
|
||||
// If all points have identical coords, we'll have 0 weights and not skip properly, so skip here.
|
||||
if (d01.x == 0 && d01.y == 0 && d02.x == 0 && d02.y == 0)
|
||||
if ((d01.x == 0 && d02.x == 0) || (d01.y == 0 && d02.y == 0))
|
||||
return;
|
||||
|
||||
// Was it fully outside the scissor?
|
||||
@ -474,6 +474,9 @@ void BinManager::Drain() {
|
||||
}
|
||||
|
||||
void BinManager::Flush(const char *reason) {
|
||||
if (queueRange_.x1 == 0x7FFFFFFF)
|
||||
return;
|
||||
|
||||
double st;
|
||||
if (coreCollectDebugStats)
|
||||
st = time_now_d();
|
||||
|
@ -133,6 +133,10 @@ static inline bool CheckOutsideZ(ClipCoords p, int &pos, int &neg) {
|
||||
|
||||
void ProcessRect(const VertexData &v0, const VertexData &v1, BinManager &binner) {
|
||||
if (!binner.State().throughMode) {
|
||||
// If any verts were outside range, throw the entire prim away.
|
||||
if (v0.OutsideRange() || v1.OutsideRange())
|
||||
return;
|
||||
|
||||
// We may discard the entire rect based on depth values.
|
||||
int outsidePos = 0, outsideNeg = 0;
|
||||
CheckOutsideZ(v0.clippos, outsidePos, outsideNeg);
|
||||
@ -176,6 +180,12 @@ void ProcessRect(const VertexData &v0, const VertexData &v1, BinManager &binner)
|
||||
}
|
||||
|
||||
void ProcessPoint(const VertexData &v0, BinManager &binner) {
|
||||
// If any verts were outside range, throw the entire prim away.
|
||||
if (!binner.State().throughMode) {
|
||||
if (v0.OutsideRange())
|
||||
return;
|
||||
}
|
||||
|
||||
// Points need no clipping. Will be bounds checked in the rasterizer (which seems backwards?)
|
||||
binner.AddPoint(v0);
|
||||
}
|
||||
@ -187,6 +197,10 @@ void ProcessLine(const VertexData &v0, const VertexData &v1, BinManager &binner)
|
||||
return;
|
||||
}
|
||||
|
||||
// If any verts were outside range, throw the entire prim away.
|
||||
if (v0.OutsideRange() || v1.OutsideRange())
|
||||
return;
|
||||
|
||||
int outsidePos = 0, outsideNeg = 0;
|
||||
CheckOutsideZ(v0.clippos, outsidePos, outsideNeg);
|
||||
CheckOutsideZ(v1.clippos, outsidePos, outsideNeg);
|
||||
@ -222,6 +236,10 @@ void ProcessLine(const VertexData &v0, const VertexData &v1, BinManager &binner)
|
||||
void ProcessTriangle(const VertexData &v0, const VertexData &v1, const VertexData &v2, const VertexData &provoking, BinManager &binner) {
|
||||
int mask = 0;
|
||||
if (!binner.State().throughMode) {
|
||||
// If any verts were outside range, throw the entire prim away.
|
||||
if (v0.OutsideRange() || v1.OutsideRange() || v2.OutsideRange())
|
||||
return;
|
||||
|
||||
mask |= CalcClipMask(v0.clippos);
|
||||
mask |= CalcClipMask(v1.clippos);
|
||||
mask |= CalcClipMask(v2.clippos);
|
||||
|
@ -48,11 +48,11 @@ static inline PixelBlendFactor OptimizeAlphaFactor(uint32_t color) {
|
||||
return PixelBlendFactor::FIX;
|
||||
}
|
||||
|
||||
void ComputePixelFuncID(PixelFuncID *id, bool throughMode) {
|
||||
void ComputePixelFuncID(PixelFuncID *id) {
|
||||
id->fullKey = 0;
|
||||
|
||||
// TODO: Could this be minz > 0x0000 || maxz < 0xFFFF? Maybe unsafe, depending on verts...
|
||||
id->applyDepthRange = !throughMode;
|
||||
id->applyDepthRange = !gstate.isModeThrough();
|
||||
// Dither happens even in clear mode.
|
||||
id->dithering = gstate.isDitherEnabled();
|
||||
id->fbFormat = gstate.FrameBufFormat();
|
||||
@ -169,7 +169,7 @@ void ComputePixelFuncID(PixelFuncID *id, bool throughMode) {
|
||||
}
|
||||
|
||||
id->applyLogicOp = gstate.isLogicOpEnabled() && gstate.getLogicOp() != GE_LOGIC_COPY;
|
||||
id->applyFog = gstate.isFogEnabled() && !throughMode;
|
||||
id->applyFog = gstate.isFogEnabled() && !gstate.isModeThrough();
|
||||
|
||||
id->earlyZChecks = id->DepthTestFunc() != GE_COMP_ALWAYS;
|
||||
if (id->stencilTest && id->earlyZChecks) {
|
||||
|
@ -244,7 +244,7 @@ struct hash<SamplerID> {
|
||||
|
||||
};
|
||||
|
||||
void ComputePixelFuncID(PixelFuncID *id, bool throughMode);
|
||||
void ComputePixelFuncID(PixelFuncID *id);
|
||||
std::string DescribePixelFuncID(const PixelFuncID &id);
|
||||
|
||||
void ComputeSamplerID(SamplerID *id);
|
||||
|
@ -93,8 +93,8 @@ static inline Vec4<float> Interpolate(const float &c0, const float &c1, const fl
|
||||
return Interpolate(c0, c1, c2, w0.Cast<float>(), w1.Cast<float>(), w2.Cast<float>(), wsum_recip);
|
||||
}
|
||||
|
||||
void ComputeRasterizerState(RasterizerState *state, bool throughMode) {
|
||||
ComputePixelFuncID(&state->pixelID, throughMode);
|
||||
void ComputeRasterizerState(RasterizerState *state) {
|
||||
ComputePixelFuncID(&state->pixelID);
|
||||
state->drawPixel = Rasterizer::GetSingleFunc(state->pixelID);
|
||||
|
||||
state->enableTextures = gstate.isTextureMapEnabled() && !state->pixelID.clearMode;
|
||||
@ -132,7 +132,7 @@ void ComputeRasterizerState(RasterizerState *state, bool throughMode) {
|
||||
}
|
||||
|
||||
state->shadeGouraud = gstate.getShadeMode() == GE_SHADE_GOURAUD;
|
||||
state->throughMode = throughMode;
|
||||
state->throughMode = gstate.isModeThrough();
|
||||
state->antialiasLines = gstate.isAntiAliasEnabled();
|
||||
|
||||
#if defined(SOFTGPU_MEMORY_TAGGING_DETAILED) || defined(SOFTGPU_MEMORY_TAGGING_BASIC)
|
||||
@ -1235,6 +1235,7 @@ void ClearRectangle(const VertexData &v0, const VertexData &v1, const BinCoords
|
||||
|
||||
case GE_FORMAT_INVALID:
|
||||
case GE_FORMAT_DEPTH16:
|
||||
case GE_FORMAT_CLUT8:
|
||||
_dbg_assert_msg_(false, "Software: invalid framebuf format.");
|
||||
break;
|
||||
}
|
||||
@ -1365,7 +1366,7 @@ void DrawLine(const VertexData &v0, const VertexData &v1, const BinCoords &range
|
||||
maskOK = false;
|
||||
}
|
||||
|
||||
if (!CheckDepthTestPassed(pixelID.DepthTestFunc(), x, y, pixelID.cached.depthbufStride, z)) {
|
||||
if (!CheckDepthTestPassed(pixelID.DepthTestFunc(), p.x, p.y, pixelID.cached.depthbufStride, z)) {
|
||||
maskOK = false;
|
||||
}
|
||||
}
|
||||
|
@ -65,7 +65,7 @@ struct RasterizerState {
|
||||
}
|
||||
};
|
||||
|
||||
void ComputeRasterizerState(RasterizerState *state, bool throughMode);
|
||||
void ComputeRasterizerState(RasterizerState *state);
|
||||
|
||||
// Draws a triangle if its vertices are specified in counter-clockwise order
|
||||
void DrawTriangle(const VertexData &v0, const VertexData &v1, const VertexData &v2, const BinCoords &range, const RasterizerState &state);
|
||||
|
@ -116,8 +116,12 @@ void DrawSprite(const VertexData &v0, const VertexData &v1, const BinCoords &ran
|
||||
DrawingCoords scissorTL = TransformUnit::ScreenToDrawing(range.x1, range.y1);
|
||||
DrawingCoords scissorBR = TransformUnit::ScreenToDrawing(range.x2, range.y2);
|
||||
|
||||
int z = v1.screenpos.z;
|
||||
int fog = 255;
|
||||
const int z = v1.screenpos.z;
|
||||
constexpr int fog = 255;
|
||||
|
||||
// Since it's flat, we can check depth range early. Matters for earlyZChecks.
|
||||
if (pixelID.applyDepthRange && (z < pixelID.cached.minz || z > pixelID.cached.maxz))
|
||||
return;
|
||||
|
||||
bool isWhite = v1.color0 == 0xFFFFFFFF;
|
||||
|
||||
@ -204,15 +208,31 @@ void DrawSprite(const VertexData &v0, const VertexData &v1, const BinCoords &ran
|
||||
|
||||
float t = tf_start;
|
||||
const Vec4<int> c0 = Vec4<int>::FromRGBA(v1.color0);
|
||||
for (int y = pos0.y; y < pos1.y; y++) {
|
||||
float s = sf_start;
|
||||
// Not really that fast but faster than triangle.
|
||||
for (int x = pos0.x; x < pos1.x; x++) {
|
||||
Vec4<int> prim_color = state.nearest(s, t, xoff, yoff, ToVec4IntArg(c0), &texptr, &texbufw, 0, 0, state.samplerID);
|
||||
state.drawPixel(x, y, z, 255, ToVec4IntArg(prim_color), pixelID);
|
||||
s += dsf;
|
||||
if (pixelID.earlyZChecks) {
|
||||
for (int y = pos0.y; y < pos1.y; y++) {
|
||||
float s = sf_start;
|
||||
// Not really that fast but faster than triangle.
|
||||
for (int x = pos0.x; x < pos1.x; x++) {
|
||||
if (CheckDepthTestPassed(pixelID.DepthTestFunc(), x, y, pixelID.cached.depthbufStride, z)) {
|
||||
Vec4<int> prim_color = state.nearest(s, t, xoff, yoff, ToVec4IntArg(c0), &texptr, &texbufw, 0, 0, state.samplerID);
|
||||
state.drawPixel(x, y, z, fog, ToVec4IntArg(prim_color), pixelID);
|
||||
}
|
||||
|
||||
s += dsf;
|
||||
}
|
||||
t += dtf;
|
||||
}
|
||||
} else {
|
||||
for (int y = pos0.y; y < pos1.y; y++) {
|
||||
float s = sf_start;
|
||||
// Not really that fast but faster than triangle.
|
||||
for (int x = pos0.x; x < pos1.x; x++) {
|
||||
Vec4<int> prim_color = state.nearest(s, t, xoff, yoff, ToVec4IntArg(c0), &texptr, &texbufw, 0, 0, state.samplerID);
|
||||
state.drawPixel(x, y, z, fog, ToVec4IntArg(prim_color), pixelID);
|
||||
s += dsf;
|
||||
}
|
||||
t += dtf;
|
||||
}
|
||||
t += dtf;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
@ -239,6 +259,16 @@ void DrawSprite(const VertexData &v0, const VertexData &v1, const BinCoords &ran
|
||||
pixel++;
|
||||
}
|
||||
}
|
||||
} else if (pixelID.earlyZChecks) {
|
||||
const Vec4<int> prim_color = Vec4<int>::FromRGBA(v1.color0);
|
||||
for (int y = pos0.y; y < pos1.y; y++) {
|
||||
for (int x = pos0.x; x < pos1.x; x++) {
|
||||
if (!CheckDepthTestPassed(pixelID.DepthTestFunc(), x, y, pixelID.cached.depthbufStride, z))
|
||||
continue;
|
||||
|
||||
state.drawPixel(x, y, z, fog, ToVec4IntArg(prim_color), pixelID);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
const Vec4<int> prim_color = Vec4<int>::FromRGBA(v1.color0);
|
||||
for (int y = pos0.y; y < pos1.y; y++) {
|
||||
@ -325,15 +355,18 @@ bool RectangleFastPath(const VertexData &v0, const VertexData &v1, BinManager &b
|
||||
}
|
||||
|
||||
static bool AreCoordsRectangleCompatible(const RasterizerState &state, const VertexData &data0, const VertexData &data1) {
|
||||
if (!(data1.color0 == data0.color0))
|
||||
if (data1.color0 != data0.color0)
|
||||
return false;
|
||||
if (!(data1.screenpos.z == data0.screenpos.z)) {
|
||||
if (data1.screenpos.z != data0.screenpos.z) {
|
||||
// Sometimes, we don't actually care about z.
|
||||
if (state.pixelID.depthWrite || state.pixelID.DepthTestFunc() != GE_COMP_ALWAYS)
|
||||
return false;
|
||||
}
|
||||
if (!state.throughMode) {
|
||||
if (!state.throughMode && !(data1.color1 == data0.color1))
|
||||
if (data1.color1 != data0.color1)
|
||||
return false;
|
||||
// This means it should be culled, outside range.
|
||||
if (data1.OutsideRange() || data0.OutsideRange())
|
||||
return false;
|
||||
// Do we have to think about perspective correction or slope mip level?
|
||||
if (state.enableTextures && data1.clippos.w != data0.clippos.w) {
|
||||
|
@ -361,7 +361,7 @@ const SoftwareCommandTableEntry softgpuCommandTable[] = {
|
||||
{ GE_CMD_VTCT },
|
||||
{ GE_CMD_VTCQ },
|
||||
{ GE_CMD_VCV },
|
||||
{ GE_CMD_VAP, FLAG_EXECUTE, SoftDirty::NONE, &GPUCommon::Execute_ImmVertexAlphaPrim },
|
||||
{ GE_CMD_VAP, FLAG_EXECUTE, SoftDirty::NONE, &SoftGPU::Execute_ImmVertexAlphaPrim },
|
||||
{ GE_CMD_VFC },
|
||||
{ GE_CMD_VSCV },
|
||||
|
||||
@ -639,6 +639,7 @@ void SoftGPU::CopyToCurrentFboFromDisplayRam(int srcwidth, int srcheight) {
|
||||
}
|
||||
|
||||
void SoftGPU::CopyDisplayToOutput(bool reallyDirty) {
|
||||
drawEngine_->transformUnit.Flush("output");
|
||||
// The display always shows 480x272.
|
||||
CopyToCurrentFboFromDisplayRam(FB_WIDTH, FB_HEIGHT);
|
||||
MarkDirty(displayFramebuf_, displayStride_, 272, displayFormat_, SoftGPUVRAMDirty::CLEAR);
|
||||
@ -650,7 +651,7 @@ void SoftGPU::MarkDirty(uint32_t addr, uint32_t stride, uint32_t height, GEBuffe
|
||||
}
|
||||
|
||||
void SoftGPU::MarkDirty(uint32_t addr, uint32_t bytes, SoftGPUVRAMDirty value) {
|
||||
// Don't bother tracking if frameskipping.
|
||||
// Only bother tracking if frameskipping.
|
||||
if (g_Config.iFrameSkip == 0)
|
||||
return;
|
||||
if (!Memory::IsVRAMAddress(addr) || !Memory::IsVRAMAddress(addr + bytes - 1))
|
||||
@ -1005,19 +1006,24 @@ void SoftGPU::Execute_LoadClut(u32 op, u32 diff) {
|
||||
|
||||
void SoftGPU::Execute_FramebufPtr(u32 op, u32 diff) {
|
||||
// We assume fb.data won't change while we're drawing.
|
||||
drawEngine_->transformUnit.Flush("framebuf");
|
||||
fb.data = Memory::GetPointerWrite(gstate.getFrameBufAddress());
|
||||
if (diff) {
|
||||
drawEngine_->transformUnit.Flush("framebuf");
|
||||
fb.data = Memory::GetPointerWrite(gstate.getFrameBufAddress());
|
||||
}
|
||||
}
|
||||
|
||||
void SoftGPU::Execute_FramebufFormat(u32 op, u32 diff) {
|
||||
// We should flush, because ranges within bins may change.
|
||||
drawEngine_->transformUnit.Flush("framebuf");
|
||||
if (diff)
|
||||
drawEngine_->transformUnit.Flush("framebuf");
|
||||
}
|
||||
|
||||
void SoftGPU::Execute_ZbufPtr(u32 op, u32 diff) {
|
||||
// We assume depthbuf.data won't change while we're drawing.
|
||||
drawEngine_->transformUnit.Flush("depthbuf");
|
||||
depthbuf.data = Memory::GetPointerWrite(gstate.getDepthBufAddress());
|
||||
if (diff) {
|
||||
drawEngine_->transformUnit.Flush("depthbuf");
|
||||
depthbuf.data = Memory::GetPointerWrite(gstate.getDepthBufAddress());
|
||||
}
|
||||
}
|
||||
|
||||
void SoftGPU::Execute_VertexType(u32 op, u32 diff) {
|
||||
@ -1109,6 +1115,12 @@ void SoftGPU::Execute_BoneMtxData(u32 op, u32 diff) {
|
||||
gstate.boneMatrixData = GE_CMD_BONEMATRIXDATA << 24;
|
||||
}
|
||||
|
||||
void SoftGPU::Execute_ImmVertexAlphaPrim(u32 op, u32 diff) {
|
||||
GPUCommon::Execute_ImmVertexAlphaPrim(op, diff);
|
||||
// We won't flush as often as hardware renderers, so we want to flush right away.
|
||||
FlushImm();
|
||||
}
|
||||
|
||||
void SoftGPU::Execute_Call(u32 op, u32 diff) {
|
||||
PROFILE_THIS_SCOPE("gpu_call");
|
||||
|
||||
@ -1138,6 +1150,18 @@ void SoftGPU::FinishDeferred() {
|
||||
drawEngine_->transformUnit.Flush("finish");
|
||||
}
|
||||
|
||||
int SoftGPU::ListSync(int listid, int mode) {
|
||||
// Take this as a cue that we need to finish drawing.
|
||||
drawEngine_->transformUnit.Flush("listsync");
|
||||
return GPUCommon::ListSync(listid, mode);
|
||||
}
|
||||
|
||||
u32 SoftGPU::DrawSync(int mode) {
|
||||
// Take this as a cue that we need to finish drawing.
|
||||
drawEngine_->transformUnit.Flush("drawsync");
|
||||
return GPUCommon::DrawSync(mode);
|
||||
}
|
||||
|
||||
void SoftGPU::GetStats(char *buffer, size_t bufsize) {
|
||||
drawEngine_->transformUnit.GetStats(buffer, bufsize);
|
||||
}
|
||||
|
@ -127,10 +127,12 @@ public:
|
||||
SoftGPU(GraphicsContext *gfxCtx, Draw::DrawContext *draw);
|
||||
~SoftGPU();
|
||||
|
||||
void CheckGPUFeatures() override {}
|
||||
u32 CheckGPUFeatures() const override { return 0; }
|
||||
void InitClear() override {}
|
||||
void ExecuteOp(u32 op, u32 diff) override;
|
||||
void FinishDeferred() override;
|
||||
int ListSync(int listid, int mode) override;
|
||||
u32 DrawSync(int mode) override;
|
||||
|
||||
void SetDisplayFramebuffer(u32 framebuf, u32 stride, GEBufferFormat format) override;
|
||||
void CopyDisplayToOutput(bool reallyDirty) override;
|
||||
@ -185,6 +187,8 @@ public:
|
||||
void Execute_TgenMtxData(u32 op, u32 diff);
|
||||
void Execute_BoneMtxData(u32 op, u32 diff);
|
||||
|
||||
void Execute_ImmVertexAlphaPrim(u32 op, u32 diff);
|
||||
|
||||
typedef void (SoftGPU::*CmdFunc)(u32 op, u32 diff);
|
||||
|
||||
protected:
|
||||
|
@ -70,14 +70,66 @@ void SoftwareDrawEngine::DispatchSubmitPrim(const void *verts, const void *inds,
|
||||
transformUnit.SubmitPrimitive(verts, inds, prim, vertexCount, vertTypeID, bytesRead, this);
|
||||
}
|
||||
|
||||
void SoftwareDrawEngine::DispatchSubmitImm(const void *verts, const void *inds, GEPrimitiveType prim, int vertexCount, u32 vertTypeID, int cullMode, int *bytesRead) {
|
||||
void SoftwareDrawEngine::DispatchSubmitImm(GEPrimitiveType prim, TransformedVertex *buffer, int vertexCount, int cullMode, bool continuation) {
|
||||
uint32_t vertTypeID = GetVertTypeID(gstate.vertType | GE_VTYPE_POS_FLOAT, gstate.getUVGenMode());
|
||||
|
||||
int flipCull = cullMode != gstate.getCullMode() ? 1 : 0;
|
||||
// TODO: For now, just setting all dirty.
|
||||
transformUnit.SetDirty(SoftDirty(-1));
|
||||
gstate.cullmode ^= flipCull;
|
||||
transformUnit.SubmitPrimitive(verts, inds, prim, vertexCount, vertTypeID, bytesRead, this);
|
||||
|
||||
// TODO: This is a bit ugly. Should bypass when clipping...
|
||||
uint32_t xScale = gstate.viewportxscale;
|
||||
uint32_t xCenter = gstate.viewportxcenter;
|
||||
uint32_t yScale = gstate.viewportyscale;
|
||||
uint32_t yCenter = gstate.viewportycenter;
|
||||
uint32_t zScale = gstate.viewportzscale;
|
||||
uint32_t zCenter = gstate.viewportzcenter;
|
||||
|
||||
// Force scale to 1 and center to zero.
|
||||
gstate.viewportxscale = (GE_CMD_VIEWPORTXSCALE << 24) | 0x3F8000;
|
||||
gstate.viewportxcenter = (GE_CMD_VIEWPORTXCENTER << 24) | 0x000000;
|
||||
gstate.viewportyscale = (GE_CMD_VIEWPORTYSCALE << 24) | 0x3F8000;
|
||||
gstate.viewportycenter = (GE_CMD_VIEWPORTYCENTER << 24) | 0x000000;
|
||||
// Z we scale to 65535 for neg z clipping.
|
||||
gstate.viewportzscale = (GE_CMD_VIEWPORTZSCALE << 24) | 0x477FFF;
|
||||
gstate.viewportzcenter = (GE_CMD_VIEWPORTZCENTER << 24) | 0x000000;
|
||||
|
||||
// Before we start, submit 0 prims to reset the prev prim type.
|
||||
// Following submits will always be KEEP_PREVIOUS.
|
||||
if (!continuation)
|
||||
transformUnit.SubmitPrimitive(nullptr, nullptr, prim, 0, vertTypeID, nullptr, this);
|
||||
|
||||
for (int i = 0; i < vertexCount; i++) {
|
||||
VertexData vert;
|
||||
vert.clippos = ClipCoords(buffer[i].pos);
|
||||
vert.texturecoords.x = buffer[i].u;
|
||||
vert.texturecoords.y = buffer[i].v;
|
||||
if (gstate.isModeThrough()) {
|
||||
vert.texturecoords.x *= gstate.getTextureWidth(0);
|
||||
vert.texturecoords.y *= gstate.getTextureHeight(0);
|
||||
} else {
|
||||
vert.clippos.z *= 1.0f / 65535.0f;
|
||||
}
|
||||
vert.color0 = buffer[i].color0_32;
|
||||
vert.color1 = gstate.isUsingSecondaryColor() && !gstate.isModeThrough() ? buffer[i].color1_32 : 0;
|
||||
vert.fogdepth = buffer[i].fog;
|
||||
vert.screenpos.x = (int)(buffer[i].x * 16.0f);
|
||||
vert.screenpos.y = (int)(buffer[i].y * 16.0f);
|
||||
vert.screenpos.z = (u16)(u32)buffer[i].z;
|
||||
|
||||
transformUnit.SubmitImmVertex(vert, this);
|
||||
}
|
||||
|
||||
gstate.viewportxscale = xScale;
|
||||
gstate.viewportxcenter = xCenter;
|
||||
gstate.viewportyscale = yScale;
|
||||
gstate.viewportycenter = yCenter;
|
||||
gstate.viewportzscale = zScale;
|
||||
gstate.viewportzcenter = zCenter;
|
||||
|
||||
gstate.cullmode ^= flipCull;
|
||||
// TODO: Should really clear, but the vertex type is faked so things might need resetting...
|
||||
// TODO: Should really clear, but a bunch of values are forced so we this is safest.
|
||||
transformUnit.SetDirty(SoftDirty(-1));
|
||||
}
|
||||
|
||||
@ -273,7 +325,7 @@ void ComputeTransformState(TransformState *state, const VertexReader &vreader) {
|
||||
state->roundToScreen = &ClipToScreenInternal<false, true>;
|
||||
}
|
||||
|
||||
VertexData TransformUnit::ReadVertex(VertexReader &vreader, const TransformState &state, bool &outside_range_flag) {
|
||||
VertexData TransformUnit::ReadVertex(VertexReader &vreader, const TransformState &state) {
|
||||
PROFILE_THIS_SCOPE("read_vert");
|
||||
VertexData vertex;
|
||||
|
||||
@ -362,9 +414,13 @@ VertexData TransformUnit::ReadVertex(VertexReader &vreader, const TransformState
|
||||
#else
|
||||
screenScaled = vertex.clippos.xyz() * state.screenScale / vertex.clippos.w + state.screenAdd;
|
||||
#endif
|
||||
bool outside_range_flag = false;
|
||||
vertex.screenpos = state.roundToScreen(screenScaled, vertex.clippos, &outside_range_flag);
|
||||
if (outside_range_flag)
|
||||
if (outside_range_flag) {
|
||||
// We use this, essentially, as the flag.
|
||||
vertex.screenpos.x = 0x7FFFFFFF;
|
||||
return vertex;
|
||||
}
|
||||
|
||||
if (state.enableFog) {
|
||||
vertex.fogdepth = (viewpos.z + state.fogEnd) * state.fogSlope;
|
||||
@ -447,20 +503,19 @@ void TransformUnit::SubmitPrimitive(const void* vertices, const void* indices, G
|
||||
if (gstate_c.skipDrawReason & SKIPDRAW_SKIPFRAME) {
|
||||
return;
|
||||
}
|
||||
// Throughmode never draws 8-bit primitives, maybe because they can't fully specify the screen?
|
||||
if ((vertex_type & GE_VTYPE_THROUGH_MASK) != 0 && (vertex_type & GE_VTYPE_POS_MASK) == GE_VTYPE_POS_8BIT)
|
||||
return;
|
||||
// Vertices without position are just entirely culled.
|
||||
// Note: Throughmode does draw 8-bit primitives, but positions are always zero - handled in decode.
|
||||
if ((vertex_type & GE_VTYPE_POS_MASK) == 0)
|
||||
return;
|
||||
|
||||
u16 index_lower_bound = 0;
|
||||
u16 index_upper_bound = vertex_count - 1;
|
||||
u16 index_upper_bound = vertex_count == 0 ? 0 : vertex_count - 1;
|
||||
IndexConverter ConvertIndex(vertex_type, indices);
|
||||
|
||||
if (indices)
|
||||
GetIndexBounds(indices, vertex_count, vertex_type, &index_lower_bound, &index_upper_bound);
|
||||
vdecoder.DecodeVerts(decoded_, vertices, index_lower_bound, index_upper_bound);
|
||||
if (vertex_count != 0)
|
||||
vdecoder.DecodeVerts(decoded_, vertices, index_lower_bound, index_upper_bound);
|
||||
|
||||
VertexReader vreader(decoded_, vtxfmt, vertex_type);
|
||||
|
||||
@ -471,19 +526,11 @@ void TransformUnit::SubmitPrimitive(const void* vertices, const void* indices, G
|
||||
prim_type = prev_prim_;
|
||||
}
|
||||
|
||||
int vtcs_per_prim;
|
||||
switch (prim_type) {
|
||||
case GE_PRIM_POINTS: vtcs_per_prim = 1; break;
|
||||
case GE_PRIM_LINES: vtcs_per_prim = 2; break;
|
||||
case GE_PRIM_TRIANGLES: vtcs_per_prim = 3; break;
|
||||
case GE_PRIM_RECTANGLES: vtcs_per_prim = 2; break;
|
||||
default: vtcs_per_prim = 0; break;
|
||||
}
|
||||
|
||||
// TODO: Do this in two passes - first process the vertices (before indexing/stripping),
|
||||
// then resolve the indices. This lets us avoid transforming shared vertices twice.
|
||||
|
||||
binner_->UpdateState(vreader.isThrough());
|
||||
hasDraws_ = true;
|
||||
|
||||
static TransformState transformState;
|
||||
if (binner_->HasDirty(SoftDirty::LIGHT_ALL | SoftDirty::TRANSFORM_ALL)) {
|
||||
@ -494,9 +541,17 @@ void TransformUnit::SubmitPrimitive(const void* vertices, const void* indices, G
|
||||
bool skipCull = !gstate.isCullEnabled() || gstate.isModeClear();
|
||||
const CullType cullType = skipCull ? CullType::OFF : (gstate.getCullMode() ? CullType::CCW : CullType::CW);
|
||||
|
||||
bool outside_range_flag = false;
|
||||
auto readVertexAt = [&](VertexReader &vreader, const TransformState &transformState, int vtx) {
|
||||
if (indices) {
|
||||
vreader.Goto(ConvertIndex(vtx) - index_lower_bound);
|
||||
} else {
|
||||
vreader.Goto(vtx);
|
||||
}
|
||||
|
||||
if (vreader.isThrough() && cullType == CullType::OFF && prim_type == GE_PRIM_TRIANGLES && data_index_ + vertex_count >= 6 && ((data_index_ + vertex_count) % 6) == 0) {
|
||||
return ReadVertex(vreader, transformState);
|
||||
};
|
||||
|
||||
if (vreader.isThrough() && cullType == CullType::OFF && prim_type == GE_PRIM_TRIANGLES && data_index_ == 0 && vertex_count >= 6 && ((vertex_count) % 6) == 0) {
|
||||
// Some games send rectangles as a series of regular triangles.
|
||||
// We look for this, but only in throughmode.
|
||||
VertexData buf[6];
|
||||
@ -506,20 +561,7 @@ void TransformUnit::SubmitPrimitive(const void* vertices, const void* indices, G
|
||||
}
|
||||
|
||||
for (int vtx = 0; vtx < vertex_count; ++vtx) {
|
||||
if (indices) {
|
||||
vreader.Goto(ConvertIndex(vtx) - index_lower_bound);
|
||||
} else {
|
||||
vreader.Goto(vtx);
|
||||
}
|
||||
|
||||
buf[buf_index++] = ReadVertex(vreader, transformState, outside_range_flag);
|
||||
if (buf_index >= 3 && outside_range_flag) {
|
||||
// Cull, just pretend it didn't happen.
|
||||
buf_index -= 3;
|
||||
outside_range_flag = false;
|
||||
continue;
|
||||
}
|
||||
|
||||
buf[buf_index++] = readVertexAt(vreader, transformState, vtx);
|
||||
if (buf_index < 6)
|
||||
continue;
|
||||
|
||||
@ -552,73 +594,54 @@ void TransformUnit::SubmitPrimitive(const void* vertices, const void* indices, G
|
||||
return;
|
||||
}
|
||||
|
||||
// Note: intentionally, these allow for the case of vertex_count == 0, but data_index_ > 0.
|
||||
// This is used for immediate-mode primitives.
|
||||
switch (prim_type) {
|
||||
case GE_PRIM_POINTS:
|
||||
case GE_PRIM_LINES:
|
||||
case GE_PRIM_TRIANGLES:
|
||||
{
|
||||
for (int vtx = 0; vtx < vertex_count; ++vtx) {
|
||||
if (indices) {
|
||||
vreader.Goto(ConvertIndex(vtx) - index_lower_bound);
|
||||
} else {
|
||||
vreader.Goto(vtx);
|
||||
}
|
||||
|
||||
data_[data_index_++] = ReadVertex(vreader, transformState, outside_range_flag);
|
||||
if (data_index_ < vtcs_per_prim) {
|
||||
// Keep reading. Note: an incomplete prim will stay read for GE_PRIM_KEEP_PREVIOUS.
|
||||
continue;
|
||||
}
|
||||
|
||||
// Okay, we've got enough verts. Reset the index for next time.
|
||||
data_index_ = 0;
|
||||
if (outside_range_flag) {
|
||||
// Cull the prim if it was outside, and move to the next prim.
|
||||
outside_range_flag = false;
|
||||
continue;
|
||||
}
|
||||
|
||||
switch (prim_type) {
|
||||
case GE_PRIM_TRIANGLES:
|
||||
SendTriangle(cullType, &data_[0]);
|
||||
break;
|
||||
|
||||
case GE_PRIM_LINES:
|
||||
Clipper::ProcessLine(data_[0], data_[1], *binner_);
|
||||
break;
|
||||
|
||||
case GE_PRIM_POINTS:
|
||||
Clipper::ProcessPoint(data_[0], *binner_);
|
||||
break;
|
||||
|
||||
default:
|
||||
_dbg_assert_msg_(false, "Unexpected prim type: %d", prim_type);
|
||||
}
|
||||
}
|
||||
break;
|
||||
for (int i = 0; i < data_index_; ++i)
|
||||
Clipper::ProcessPoint(data_[i], *binner_);
|
||||
data_index_ = 0;
|
||||
for (int vtx = 0; vtx < vertex_count; ++vtx) {
|
||||
data_[0] = readVertexAt(vreader, transformState, vtx);
|
||||
Clipper::ProcessPoint(data_[0], *binner_);
|
||||
}
|
||||
break;
|
||||
|
||||
case GE_PRIM_LINES:
|
||||
for (int i = 0; i < data_index_ - 1; i += 2)
|
||||
Clipper::ProcessLine(data_[i + 0], data_[i + 1], *binner_);
|
||||
data_index_ &= 1;
|
||||
for (int vtx = 0; vtx < vertex_count; ++vtx) {
|
||||
data_[data_index_++] = readVertexAt(vreader, transformState, vtx);
|
||||
if (data_index_ == 2) {
|
||||
Clipper::ProcessLine(data_[0], data_[1], *binner_);
|
||||
data_index_ = 0;
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case GE_PRIM_TRIANGLES:
|
||||
for (int vtx = 0; vtx < vertex_count; ++vtx) {
|
||||
data_[data_index_++] = readVertexAt(vreader, transformState, vtx);
|
||||
if (data_index_ < 3) {
|
||||
// Keep reading. Note: an incomplete prim will stay read for GE_PRIM_KEEP_PREVIOUS.
|
||||
continue;
|
||||
}
|
||||
// Okay, we've got enough verts. Reset the index for next time.
|
||||
data_index_ = 0;
|
||||
|
||||
SendTriangle(cullType, &data_[0]);
|
||||
}
|
||||
// In case vertex_count was 0.
|
||||
if (data_index_ >= 3) {
|
||||
SendTriangle(cullType, &data_[0]);
|
||||
data_index_ = 0;
|
||||
}
|
||||
break;
|
||||
|
||||
case GE_PRIM_RECTANGLES:
|
||||
for (int vtx = 0; vtx < vertex_count; ++vtx) {
|
||||
if (indices) {
|
||||
vreader.Goto(ConvertIndex(vtx) - index_lower_bound);
|
||||
} else {
|
||||
vreader.Goto(vtx);
|
||||
}
|
||||
|
||||
data_[data_index_++] = ReadVertex(vreader, transformState, outside_range_flag);
|
||||
if (outside_range_flag) {
|
||||
outside_range_flag = false;
|
||||
// Note: this is the post increment index. If odd, we set the first vert.
|
||||
if (data_index_ & 1) {
|
||||
// Skip the next one and forget this one.
|
||||
vtx++;
|
||||
data_index_--;
|
||||
} else {
|
||||
// Forget both of the last 2.
|
||||
data_index_ -= 2;
|
||||
}
|
||||
}
|
||||
data_[data_index_++] = readVertexAt(vreader, transformState, vtx);
|
||||
|
||||
if (data_index_ == 4 && vreader.isThrough() && cullType == CullType::OFF) {
|
||||
if (Rasterizer::DetectRectangleThroughModeSlices(binner_->State(), data_)) {
|
||||
@ -646,19 +669,7 @@ void TransformUnit::SubmitPrimitive(const void* vertices, const void* indices, G
|
||||
// If data_index_ is 1 or 2, etc., it means we're continuing a line strip.
|
||||
int skip_count = data_index_ == 0 ? 1 : 0;
|
||||
for (int vtx = 0; vtx < vertex_count; ++vtx) {
|
||||
if (indices) {
|
||||
vreader.Goto(ConvertIndex(vtx) - index_lower_bound);
|
||||
} else {
|
||||
vreader.Goto(vtx);
|
||||
}
|
||||
|
||||
data_[(data_index_++) & 1] = ReadVertex(vreader, transformState, outside_range_flag);
|
||||
if (outside_range_flag) {
|
||||
// Drop all primitives containing the current vertex
|
||||
skip_count = 2;
|
||||
outside_range_flag = false;
|
||||
continue;
|
||||
}
|
||||
data_[(data_index_++) & 1] = readVertexAt(vreader, transformState, vtx);
|
||||
|
||||
if (skip_count) {
|
||||
--skip_count;
|
||||
@ -667,6 +678,9 @@ void TransformUnit::SubmitPrimitive(const void* vertices, const void* indices, G
|
||||
Clipper::ProcessLine(data_[data_index_ & 1], data_[(data_index_ & 1) ^ 1], *binner_);
|
||||
}
|
||||
}
|
||||
// If this is from immediate-mode drawing, we always had one new vert (already in data_.)
|
||||
if (isImmDraw_ && data_index_ >= 2)
|
||||
Clipper::ProcessLine(data_[data_index_ & 1], data_[(data_index_ & 1) ^ 1], *binner_);
|
||||
break;
|
||||
}
|
||||
|
||||
@ -681,19 +695,15 @@ void TransformUnit::SubmitPrimitive(const void* vertices, const void* indices, G
|
||||
if (data_index_ == 0 && vertex_count >= 4 && (vertex_count & 1) == 0 && cullType == CullType::OFF) {
|
||||
for (int base = 0; base < vertex_count - 2; base += 2) {
|
||||
for (int vtx = base == 0 ? 0 : 2; vtx < 4; ++vtx) {
|
||||
if (indices) {
|
||||
vreader.Goto(ConvertIndex(base + vtx) - index_lower_bound);
|
||||
} else {
|
||||
vreader.Goto(base + vtx);
|
||||
}
|
||||
data_[vtx] = ReadVertex(vreader, transformState, outside_range_flag);
|
||||
data_[vtx] = readVertexAt(vreader, transformState, base + vtx);
|
||||
}
|
||||
|
||||
// If a strip is effectively a rectangle, draw it as such!
|
||||
int tl = -1, br = -1;
|
||||
if (!outside_range_flag && Rasterizer::DetectRectangleFromStrip(binner_->State(), data_, &tl, &br)) {
|
||||
if (Rasterizer::DetectRectangleFromStrip(binner_->State(), data_, &tl, &br)) {
|
||||
Clipper::ProcessRect(data_[tl], data_[br], *binner_);
|
||||
start_vtx += 2;
|
||||
skip_count = 0;
|
||||
if (base + 4 >= vertex_count) {
|
||||
start_vtx = vertex_count;
|
||||
break;
|
||||
@ -710,32 +720,29 @@ void TransformUnit::SubmitPrimitive(const void* vertices, const void* indices, G
|
||||
}
|
||||
}
|
||||
|
||||
outside_range_flag = false;
|
||||
for (int vtx = start_vtx; vtx < vertex_count; ++vtx) {
|
||||
if (indices) {
|
||||
vreader.Goto(ConvertIndex(vtx) - index_lower_bound);
|
||||
} else {
|
||||
vreader.Goto(vtx);
|
||||
}
|
||||
|
||||
for (int vtx = start_vtx; vtx < vertex_count && skip_count > 0; ++vtx) {
|
||||
int provoking_index = (data_index_++) % 3;
|
||||
data_[provoking_index] = ReadVertex(vreader, transformState, outside_range_flag);
|
||||
if (outside_range_flag) {
|
||||
// Drop all primitives containing the current vertex
|
||||
skip_count = 2;
|
||||
outside_range_flag = false;
|
||||
continue;
|
||||
}
|
||||
data_[provoking_index] = readVertexAt(vreader, transformState, vtx);
|
||||
--skip_count;
|
||||
++start_vtx;
|
||||
}
|
||||
|
||||
if (skip_count) {
|
||||
--skip_count;
|
||||
continue;
|
||||
}
|
||||
for (int vtx = start_vtx; vtx < vertex_count; ++vtx) {
|
||||
int provoking_index = (data_index_++) % 3;
|
||||
data_[provoking_index] = readVertexAt(vreader, transformState, vtx);
|
||||
|
||||
int wind = (data_index_ - 1) % 2;
|
||||
CullType altCullType = cullType == CullType::OFF ? cullType : CullType((int)cullType ^ wind);
|
||||
SendTriangle(altCullType, &data_[0], provoking_index);
|
||||
}
|
||||
|
||||
// If this is from immediate-mode drawing, we always had one new vert (already in data_.)
|
||||
if (isImmDraw_ && data_index_ >= 3) {
|
||||
int provoking_index = (data_index_ - 1) % 3;
|
||||
int wind = (data_index_ - 1) % 2;
|
||||
CullType altCullType = cullType == CullType::OFF ? cullType : CullType((int)cullType ^ wind);
|
||||
SendTriangle(altCullType, &data_[0], provoking_index);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
@ -747,64 +754,47 @@ void TransformUnit::SubmitPrimitive(const void* vertices, const void* indices, G
|
||||
int start_vtx = 0;
|
||||
|
||||
// Only read the central vertex if we're not continuing.
|
||||
if (data_index_ == 0) {
|
||||
if (indices) {
|
||||
vreader.Goto(ConvertIndex(0) - index_lower_bound);
|
||||
} else {
|
||||
vreader.Goto(0);
|
||||
}
|
||||
data_[0] = ReadVertex(vreader, transformState, outside_range_flag);
|
||||
if (data_index_ == 0 && vertex_count > 0) {
|
||||
data_[0] = readVertexAt(vreader, transformState, 0);
|
||||
data_index_++;
|
||||
start_vtx = 1;
|
||||
|
||||
// If the central vertex is outside range, all the points are toast.
|
||||
if (outside_range_flag)
|
||||
break;
|
||||
}
|
||||
|
||||
if (data_index_ == 1 && vertex_count == 4 && cullType == CullType::OFF) {
|
||||
for (int vtx = start_vtx; vtx < vertex_count; ++vtx) {
|
||||
if (indices) {
|
||||
vreader.Goto(ConvertIndex(vtx) - index_lower_bound);
|
||||
} else {
|
||||
vreader.Goto(vtx);
|
||||
}
|
||||
data_[vtx] = ReadVertex(vreader, transformState, outside_range_flag);
|
||||
data_[vtx] = readVertexAt(vreader, transformState, vtx);
|
||||
}
|
||||
|
||||
int tl = -1, br = -1;
|
||||
if (!outside_range_flag && Rasterizer::DetectRectangleFromFan(binner_->State(), data_, vertex_count, &tl, &br)) {
|
||||
if (Rasterizer::DetectRectangleFromFan(binner_->State(), data_, vertex_count, &tl, &br)) {
|
||||
Clipper::ProcessRect(data_[tl], data_[br], *binner_);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
outside_range_flag = false;
|
||||
for (int vtx = start_vtx; vtx < vertex_count; ++vtx) {
|
||||
if (indices) {
|
||||
vreader.Goto(ConvertIndex(vtx) - index_lower_bound);
|
||||
} else {
|
||||
vreader.Goto(vtx);
|
||||
}
|
||||
|
||||
for (int vtx = start_vtx; vtx < vertex_count && skip_count > 0; ++vtx) {
|
||||
int provoking_index = 2 - ((data_index_++) % 2);
|
||||
data_[provoking_index] = ReadVertex(vreader, transformState, outside_range_flag);
|
||||
if (outside_range_flag) {
|
||||
// Drop all primitives containing the current vertex
|
||||
skip_count = 2;
|
||||
outside_range_flag = false;
|
||||
continue;
|
||||
}
|
||||
data_[provoking_index] = readVertexAt(vreader, transformState, vtx);
|
||||
--skip_count;
|
||||
++start_vtx;
|
||||
}
|
||||
|
||||
if (skip_count) {
|
||||
--skip_count;
|
||||
continue;
|
||||
}
|
||||
for (int vtx = start_vtx; vtx < vertex_count; ++vtx) {
|
||||
int provoking_index = 2 - ((data_index_++) % 2);
|
||||
data_[provoking_index] = readVertexAt(vreader, transformState, vtx);
|
||||
|
||||
int wind = (data_index_ - 1) % 2;
|
||||
CullType altCullType = cullType == CullType::OFF ? cullType : CullType((int)cullType ^ wind);
|
||||
SendTriangle(altCullType, &data_[0], provoking_index);
|
||||
}
|
||||
|
||||
// If this is from immediate-mode drawing, we always had one new vert (already in data_.)
|
||||
if (isImmDraw_ && data_index_ >= 3) {
|
||||
int wind = (data_index_ - 1) % 2;
|
||||
int provoking_index = 2 - wind;
|
||||
CullType altCullType = cullType == CullType::OFF ? cullType : CullType((int)cullType ^ wind);
|
||||
SendTriangle(altCullType, &data_[0], provoking_index);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
@ -814,6 +804,47 @@ void TransformUnit::SubmitPrimitive(const void* vertices, const void* indices, G
|
||||
}
|
||||
}
|
||||
|
||||
void TransformUnit::SubmitImmVertex(const VertexData &vert, SoftwareDrawEngine *drawEngine) {
|
||||
// Where we put it is different for STRIP/FAN types.
|
||||
switch (prev_prim_) {
|
||||
case GE_PRIM_POINTS:
|
||||
case GE_PRIM_LINES:
|
||||
case GE_PRIM_TRIANGLES:
|
||||
case GE_PRIM_RECTANGLES:
|
||||
// This is the easy one. SubmitPrimitive resets data_index_.
|
||||
data_[data_index_++] = vert;
|
||||
break;
|
||||
|
||||
case GE_PRIM_LINE_STRIP:
|
||||
// This one alternates, and data_index_ > 0 means it draws a segment.
|
||||
data_[(data_index_++) & 1] = vert;
|
||||
break;
|
||||
|
||||
case GE_PRIM_TRIANGLE_STRIP:
|
||||
data_[(data_index_++) % 3] = vert;
|
||||
break;
|
||||
|
||||
case GE_PRIM_TRIANGLE_FAN:
|
||||
if (data_index_ == 0) {
|
||||
data_[data_index_++] = vert;
|
||||
} else {
|
||||
int provoking_index = 2 - ((data_index_++) % 2);
|
||||
data_[provoking_index] = vert;
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
_assert_msg_(false, "Invalid prim type: %d", (int)prev_prim_);
|
||||
break;
|
||||
}
|
||||
|
||||
uint32_t vertTypeID = GetVertTypeID(gstate.vertType | GE_VTYPE_POS_FLOAT, gstate.getUVGenMode());
|
||||
// This now processes the step with shared logic, given the existing data_.
|
||||
isImmDraw_ = true;
|
||||
SubmitPrimitive(nullptr, nullptr, GE_PRIM_KEEP_PREVIOUS, 0, vertTypeID, nullptr, drawEngine);
|
||||
isImmDraw_ = false;
|
||||
}
|
||||
|
||||
void TransformUnit::SendTriangle(CullType cullType, const VertexData *verts, int provoking) {
|
||||
if (cullType == CullType::OFF) {
|
||||
Clipper::ProcessTriangle(verts[0], verts[1], verts[2], verts[provoking], *binner_);
|
||||
@ -826,8 +857,12 @@ void TransformUnit::SendTriangle(CullType cullType, const VertexData *verts, int
|
||||
}
|
||||
|
||||
void TransformUnit::Flush(const char *reason) {
|
||||
if (!hasDraws_)
|
||||
return;
|
||||
|
||||
binner_->Flush(reason);
|
||||
GPUDebug::NotifyDraw();
|
||||
hasDraws_ = false;
|
||||
}
|
||||
|
||||
void TransformUnit::GetStats(char *buffer, size_t bufsize) {
|
||||
@ -836,6 +871,9 @@ void TransformUnit::GetStats(char *buffer, size_t bufsize) {
|
||||
}
|
||||
|
||||
void TransformUnit::FlushIfOverlap(const char *reason, bool modifying, uint32_t addr, uint32_t stride, uint32_t w, uint32_t h) {
|
||||
if (!hasDraws_)
|
||||
return;
|
||||
|
||||
if (binner_->HasPendingWrite(addr, stride, w, h))
|
||||
Flush(reason);
|
||||
if (modifying && binner_->HasPendingRead(addr, stride, w, h))
|
||||
|
@ -90,6 +90,10 @@ struct VertexData {
|
||||
color1 = LerpInt<Vec3<int>, 256>(Vec3<int>::FromRGB(a.color1), Vec3<int>::FromRGB(b.color1), t_int).ToRGB();
|
||||
}
|
||||
|
||||
bool OutsideRange() const {
|
||||
return screenpos.x == 0x7FFFFFFF;
|
||||
}
|
||||
|
||||
ClipCoords clippos;
|
||||
Vec2<float> texturecoords;
|
||||
uint32_t color0;
|
||||
@ -125,6 +129,7 @@ public:
|
||||
static ScreenCoords DrawingToScreen(const DrawingCoords &coords, u16 z);
|
||||
|
||||
void SubmitPrimitive(const void* vertices, const void* indices, GEPrimitiveType prim_type, int vertex_count, u32 vertex_type, int *bytesRead, SoftwareDrawEngine *drawEngine);
|
||||
void SubmitImmVertex(const VertexData &vert, SoftwareDrawEngine *drawEngine);
|
||||
|
||||
bool GetCurrentSimpleVertices(int count, std::vector<GPUDebugVertex> &vertices, std::vector<u16> &indices);
|
||||
|
||||
@ -138,7 +143,7 @@ public:
|
||||
SoftDirty GetDirty();
|
||||
|
||||
private:
|
||||
VertexData ReadVertex(VertexReader &vreader, const TransformState &lstate, bool &outside_range_flag);
|
||||
VertexData ReadVertex(VertexReader &vreader, const TransformState &state);
|
||||
void SendTriangle(CullType cullType, const VertexData *verts, int provoking = 2);
|
||||
|
||||
u8 *decoded_ = nullptr;
|
||||
@ -149,6 +154,8 @@ private:
|
||||
// This is the index of the next vert in data (or higher, may need modulus.)
|
||||
int data_index_ = 0;
|
||||
GEPrimitiveType prev_prim_ = GE_PRIM_POINTS;
|
||||
bool hasDraws_ = false;
|
||||
bool isImmDraw_ = false;
|
||||
};
|
||||
|
||||
class SoftwareDrawEngine : public DrawEngineCommon {
|
||||
@ -158,7 +165,7 @@ public:
|
||||
|
||||
void DispatchFlush() override;
|
||||
void DispatchSubmitPrim(const void *verts, const void *inds, GEPrimitiveType prim, int vertexCount, u32 vertType, int cullMode, int *bytesRead) override;
|
||||
void DispatchSubmitImm(const void *verts, const void *inds, GEPrimitiveType prim, int vertexCount, u32 vertTypeID, int cullMode, int *bytesRead) override;
|
||||
void DispatchSubmitImm(GEPrimitiveType prim, TransformedVertex *buffer, int vertexCount, int cullMode, bool continuation) override;
|
||||
|
||||
VertexDecoder *FindVertexDecoder(u32 vtype);
|
||||
|
||||
|
@ -71,6 +71,7 @@ enum {
|
||||
DRAW_BINDING_TESS_STORAGE_BUF = 6,
|
||||
DRAW_BINDING_TESS_STORAGE_BUF_WU = 7,
|
||||
DRAW_BINDING_TESS_STORAGE_BUF_WV = 8,
|
||||
DRAW_BINDING_INPUT_ATTACHMENT = 9,
|
||||
};
|
||||
|
||||
enum {
|
||||
@ -94,7 +95,10 @@ DrawEngineVulkan::DrawEngineVulkan(Draw::DrawContext *draw)
|
||||
|
||||
void DrawEngineVulkan::InitDeviceObjects() {
|
||||
// All resources we need for PSP drawing. Usually only bindings 0 and 2-4 are populated.
|
||||
VkDescriptorSetLayoutBinding bindings[9]{};
|
||||
|
||||
// TODO: Make things more flexible, so we at least have specialized layouts for input attachments and tess.
|
||||
// Note that it becomes a support matrix..
|
||||
VkDescriptorSetLayoutBinding bindings[10]{};
|
||||
bindings[0].descriptorCount = 1;
|
||||
bindings[0].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
|
||||
bindings[0].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT;
|
||||
@ -132,6 +136,10 @@ void DrawEngineVulkan::InitDeviceObjects() {
|
||||
bindings[8].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
|
||||
bindings[8].stageFlags = VK_SHADER_STAGE_VERTEX_BIT;
|
||||
bindings[8].binding = DRAW_BINDING_TESS_STORAGE_BUF_WV;
|
||||
bindings[9].descriptorCount = 1;
|
||||
bindings[9].descriptorType = VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT;
|
||||
bindings[9].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT;
|
||||
bindings[9].binding = DRAW_BINDING_INPUT_ATTACHMENT;
|
||||
|
||||
VulkanContext *vulkan = (VulkanContext *)draw_->GetNativeObject(Draw::NativeObject::CONTEXT);
|
||||
VkDevice device = vulkan->GetDevice();
|
||||
@ -145,13 +153,15 @@ void DrawEngineVulkan::InitDeviceObjects() {
|
||||
|
||||
static constexpr int DEFAULT_DESC_POOL_SIZE = 512;
|
||||
std::vector<VkDescriptorPoolSize> dpTypes;
|
||||
dpTypes.resize(3);
|
||||
dpTypes.resize(4);
|
||||
dpTypes[0].descriptorCount = DEFAULT_DESC_POOL_SIZE * 3;
|
||||
dpTypes[0].type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC;
|
||||
dpTypes[1].descriptorCount = DEFAULT_DESC_POOL_SIZE * 3; // Don't use these for tess anymore, need max three per set.
|
||||
dpTypes[1].type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
|
||||
dpTypes[2].descriptorCount = DEFAULT_DESC_POOL_SIZE * 3; // TODO: Use a separate layout when no spline stuff is needed to reduce the need for these.
|
||||
dpTypes[2].type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
|
||||
dpTypes[3].descriptorCount = DEFAULT_DESC_POOL_SIZE; // TODO: Use a separate layout when no spline stuff is needed to reduce the need for these.
|
||||
dpTypes[3].type = VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT;
|
||||
|
||||
VkDescriptorPoolCreateInfo dp{ VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO };
|
||||
// Don't want to mess around with individually freeing these.
|
||||
@ -379,6 +389,7 @@ VkDescriptorSet DrawEngineVulkan::GetOrCreateDescriptorSet(VkImageView imageView
|
||||
key.base_ = base;
|
||||
key.light_ = light;
|
||||
key.bone_ = bone;
|
||||
key.secondaryIsInputAttachment = boundSecondaryIsInputAttachment_;
|
||||
|
||||
FrameData &frame = GetCurFrame();
|
||||
// See if we already have this descriptor set cached.
|
||||
@ -417,15 +428,15 @@ VkDescriptorSet DrawEngineVulkan::GetOrCreateDescriptorSet(VkImageView imageView
|
||||
}
|
||||
|
||||
if (boundSecondary_) {
|
||||
tex[1].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
|
||||
tex[1].imageLayout = key.secondaryIsInputAttachment ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
|
||||
tex[1].imageView = boundSecondary_;
|
||||
tex[1].sampler = samplerSecondaryNearest_;
|
||||
writes[n].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
|
||||
writes[n].pNext = nullptr;
|
||||
writes[n].dstBinding = DRAW_BINDING_2ND_TEXTURE;
|
||||
writes[n].dstBinding = key.secondaryIsInputAttachment ? DRAW_BINDING_INPUT_ATTACHMENT : DRAW_BINDING_2ND_TEXTURE;
|
||||
writes[n].pImageInfo = &tex[1];
|
||||
writes[n].descriptorCount = 1;
|
||||
writes[n].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
|
||||
writes[n].descriptorType = key.secondaryIsInputAttachment ? VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT : VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
|
||||
writes[n].dstSet = desc;
|
||||
n++;
|
||||
}
|
||||
@ -788,7 +799,7 @@ void DrawEngineVulkan::DoFlush() {
|
||||
lastRenderStepId_ = curRenderStepId;
|
||||
}
|
||||
|
||||
renderManager->BindPipeline(pipeline->pipeline, (PipelineFlags)pipeline->flags, pipelineLayout_);
|
||||
renderManager->BindPipeline(pipeline->pipeline, pipeline->pipelineFlags, pipelineLayout_);
|
||||
if (pipeline != lastPipeline_) {
|
||||
if (lastPipeline_ && !(lastPipeline_->UsesBlendConstant() && pipeline->UsesBlendConstant())) {
|
||||
gstate_c.Dirty(DIRTY_BLEND_STATE);
|
||||
@ -916,7 +927,7 @@ void DrawEngineVulkan::DoFlush() {
|
||||
lastRenderStepId_ = curRenderStepId;
|
||||
}
|
||||
|
||||
renderManager->BindPipeline(pipeline->pipeline, (PipelineFlags)pipeline->flags, pipelineLayout_);
|
||||
renderManager->BindPipeline(pipeline->pipeline, pipeline->pipelineFlags, pipelineLayout_);
|
||||
if (pipeline != lastPipeline_) {
|
||||
if (lastPipeline_ && !lastPipeline_->UsesBlendConstant() && pipeline->UsesBlendConstant()) {
|
||||
gstate_c.Dirty(DIRTY_BLEND_STATE);
|
||||
|
@ -217,6 +217,8 @@ private:
|
||||
|
||||
// Secondary texture for shader blending
|
||||
VkImageView boundSecondary_ = VK_NULL_HANDLE;
|
||||
bool boundSecondaryIsInputAttachment_ = false;
|
||||
|
||||
// CLUT texture for shader depal
|
||||
VkImageView boundDepal_ = VK_NULL_HANDLE;
|
||||
bool boundDepalSmoothed_ = false;
|
||||
@ -234,6 +236,7 @@ private:
|
||||
VkSampler sampler_;
|
||||
VkBuffer base_, light_, bone_; // All three UBO slots will be set to this. This will usually be identical
|
||||
// for all draws in a frame, except when the buffer has to grow.
|
||||
bool secondaryIsInputAttachment;
|
||||
};
|
||||
|
||||
// We alternate between these.
|
||||
@ -281,7 +284,7 @@ private:
|
||||
VulkanDynamicState dynState_{};
|
||||
|
||||
int tessOffset_ = 0;
|
||||
bool fboTexNeedsBind_ = false;
|
||||
FBOTexState fboTexBindState_ = FBO_TEX_NONE;
|
||||
|
||||
// Hardware tessellation
|
||||
TessellationDataTransferVulkan *tessDataTransferVulkan;
|
||||
|
@ -33,7 +33,7 @@ class VulkanPushBuffer;
|
||||
|
||||
class FramebufferManagerVulkan : public FramebufferManagerCommon {
|
||||
public:
|
||||
FramebufferManagerVulkan(Draw::DrawContext *draw);
|
||||
explicit FramebufferManagerVulkan(Draw::DrawContext *draw);
|
||||
~FramebufferManagerVulkan();
|
||||
|
||||
// If within a render pass, this will just issue a regular clear. If beginning a new render pass,
|
||||
|
@ -52,7 +52,7 @@
|
||||
|
||||
GPU_Vulkan::GPU_Vulkan(GraphicsContext *gfxCtx, Draw::DrawContext *draw)
|
||||
: GPUCommon(gfxCtx, draw), drawEngine_(draw) {
|
||||
CheckGPUFeatures();
|
||||
gstate_c.featureFlags = CheckGPUFeatures();
|
||||
|
||||
VulkanContext *vulkan = (VulkanContext *)gfxCtx->GetAPIContext();
|
||||
|
||||
@ -182,8 +182,8 @@ GPU_Vulkan::~GPU_Vulkan() {
|
||||
delete framebufferManagerVulkan_;
|
||||
}
|
||||
|
||||
void GPU_Vulkan::CheckGPUFeatures() {
|
||||
uint32_t features = 0;
|
||||
u32 GPU_Vulkan::CheckGPUFeatures() const {
|
||||
uint32_t features = GPUCommon::CheckGPUFeatures();
|
||||
|
||||
VulkanContext *vulkan = (VulkanContext *)draw_->GetNativeObject(Draw::NativeObject::CONTEXT);
|
||||
switch (vulkan->GetPhysicalDeviceProperties().properties.vendorID) {
|
||||
@ -222,44 +222,14 @@ void GPU_Vulkan::CheckGPUFeatures() {
|
||||
|
||||
// Mandatory features on Vulkan, which may be checked in "centralized" code
|
||||
features |= GPU_SUPPORTS_TEXTURE_LOD_CONTROL;
|
||||
features |= GPU_SUPPORTS_BLEND_MINMAX;
|
||||
features |= GPU_SUPPORTS_TEXTURE_NPOT;
|
||||
features |= GPU_SUPPORTS_INSTANCE_RENDERING;
|
||||
features |= GPU_SUPPORTS_VERTEX_TEXTURE_FETCH;
|
||||
features |= GPU_SUPPORTS_TEXTURE_FLOAT;
|
||||
features |= GPU_SUPPORTS_DEPTH_TEXTURE;
|
||||
|
||||
auto &enabledFeatures = vulkan->GetDeviceFeatures().enabled;
|
||||
if (enabledFeatures.depthClamp) {
|
||||
features |= GPU_SUPPORTS_DEPTH_CLAMP;
|
||||
}
|
||||
if (enabledFeatures.shaderClipDistance) {
|
||||
features |= GPU_SUPPORTS_CLIP_DISTANCE;
|
||||
}
|
||||
if (enabledFeatures.shaderCullDistance) {
|
||||
// Must support at least 8 if feature supported, so we're fine.
|
||||
features |= GPU_SUPPORTS_CULL_DISTANCE;
|
||||
}
|
||||
if (!draw_->GetBugs().Has(Draw::Bugs::BROKEN_NAN_IN_CONDITIONAL)) {
|
||||
// Ignore the compat setting if clip and cull are both enabled.
|
||||
// When supported, we can do the depth side of range culling more correctly.
|
||||
const bool supported = draw_->GetDeviceCaps().clipDistanceSupported && draw_->GetDeviceCaps().cullDistanceSupported;
|
||||
const bool disabled = PSP_CoreParameter().compat.flags().DisableRangeCulling;
|
||||
if (supported || !disabled) {
|
||||
features |= GPU_SUPPORTS_VS_RANGE_CULLING;
|
||||
}
|
||||
}
|
||||
if (enabledFeatures.dualSrcBlend) {
|
||||
if (!g_Config.bVendorBugChecksEnabled || !draw_->GetBugs().Has(Draw::Bugs::DUAL_SOURCE_BLENDING_BROKEN)) {
|
||||
features |= GPU_SUPPORTS_DUALSOURCE_BLEND;
|
||||
}
|
||||
}
|
||||
if (draw_->GetDeviceCaps().logicOpSupported) {
|
||||
features |= GPU_SUPPORTS_LOGIC_OP;
|
||||
}
|
||||
if (draw_->GetDeviceCaps().anisoSupported) {
|
||||
features |= GPU_SUPPORTS_ANISOTROPY;
|
||||
}
|
||||
|
||||
// These are VULKAN_4444_FORMAT and friends.
|
||||
uint32_t fmt4444 = draw_->GetDataFormatSupport(Draw::DataFormat::B4G4R4A4_UNORM_PACK16);
|
||||
@ -275,10 +245,6 @@ void GPU_Vulkan::CheckGPUFeatures() {
|
||||
INFO_LOG(G3D, "Deficient texture format support: 4444: %d 1555: %d 565: %d", fmt4444, fmt1555, fmt565);
|
||||
}
|
||||
|
||||
if (PSP_CoreParameter().compat.flags().ClearToRAM) {
|
||||
features |= GPU_USE_CLEAR_RAM_HACK;
|
||||
}
|
||||
|
||||
if (!g_Config.bHighQualityDepth && (features & GPU_SUPPORTS_ACCURATE_DEPTH) != 0) {
|
||||
features |= GPU_SCALE_DEPTH_FROM_24BIT_TO_16BIT;
|
||||
}
|
||||
@ -290,7 +256,7 @@ void GPU_Vulkan::CheckGPUFeatures() {
|
||||
features |= GPU_ROUND_DEPTH_TO_16BIT;
|
||||
}
|
||||
|
||||
gstate_c.featureFlags = features;
|
||||
return features;
|
||||
}
|
||||
|
||||
void GPU_Vulkan::BeginHostFrame() {
|
||||
@ -298,7 +264,7 @@ void GPU_Vulkan::BeginHostFrame() {
|
||||
UpdateCmdInfo();
|
||||
|
||||
if (resized_) {
|
||||
CheckGPUFeatures();
|
||||
gstate_c.featureFlags = CheckGPUFeatures();
|
||||
// In case the GPU changed.
|
||||
BuildReportingInfo();
|
||||
framebufferManager_->Resized();
|
||||
@ -537,7 +503,7 @@ void GPU_Vulkan::DeviceRestore() {
|
||||
GPUCommon::DeviceRestore();
|
||||
InitDeviceObjects();
|
||||
|
||||
CheckGPUFeatures();
|
||||
gstate_c.featureFlags = CheckGPUFeatures();
|
||||
BuildReportingInfo();
|
||||
UpdateCmdInfo();
|
||||
|
||||
|
@ -38,7 +38,7 @@ public:
|
||||
~GPU_Vulkan();
|
||||
|
||||
// This gets called on startup and when we get back from settings.
|
||||
void CheckGPUFeatures() override;
|
||||
u32 CheckGPUFeatures() const override;
|
||||
|
||||
bool IsReady() override;
|
||||
void CancelReady() override;
|
||||
|
@ -170,7 +170,7 @@ static std::string CutFromMain(std::string str) {
|
||||
}
|
||||
|
||||
static VulkanPipeline *CreateVulkanPipeline(VulkanRenderManager *renderManager, VkPipelineCache pipelineCache,
|
||||
VkPipelineLayout layout, const VulkanPipelineRasterStateKey &key,
|
||||
VkPipelineLayout layout, PipelineFlags pipelineFlags, const VulkanPipelineRasterStateKey &key,
|
||||
const DecVtxFormat *decFmt, VulkanVertexShader *vs, VulkanFragmentShader *fs, bool useHwTransform, u32 variantBitmask) {
|
||||
VulkanPipeline *vulkanPipeline = new VulkanPipeline();
|
||||
VKRGraphicsPipelineDesc *desc = &vulkanPipeline->desc;
|
||||
@ -299,14 +299,14 @@ static VulkanPipeline *CreateVulkanPipeline(VulkanRenderManager *renderManager,
|
||||
VKRGraphicsPipeline *pipeline = renderManager->CreateGraphicsPipeline(desc, variantBitmask, "game");
|
||||
|
||||
vulkanPipeline->pipeline = pipeline;
|
||||
vulkanPipeline->flags = 0;
|
||||
if (useBlendConstant)
|
||||
vulkanPipeline->flags |= PIPELINE_FLAG_USES_BLEND_CONSTANT;
|
||||
pipelineFlags |= PipelineFlags::USES_BLEND_CONSTANT;
|
||||
if (key.topology == VK_PRIMITIVE_TOPOLOGY_LINE_LIST || key.topology == VK_PRIMITIVE_TOPOLOGY_LINE_STRIP)
|
||||
vulkanPipeline->flags |= PIPELINE_FLAG_USES_LINES;
|
||||
pipelineFlags |= PipelineFlags::USES_LINES;
|
||||
if (dss.depthTestEnable || dss.stencilTestEnable) {
|
||||
vulkanPipeline->flags |= PIPELINE_FLAG_USES_DEPTH_STENCIL;
|
||||
pipelineFlags |= PipelineFlags::USES_DEPTH_STENCIL;
|
||||
}
|
||||
vulkanPipeline->pipelineFlags = pipelineFlags;
|
||||
return vulkanPipeline;
|
||||
}
|
||||
|
||||
@ -329,8 +329,13 @@ VulkanPipeline *PipelineManagerVulkan::GetOrCreatePipeline(VulkanRenderManager *
|
||||
if (iter)
|
||||
return iter;
|
||||
|
||||
PipelineFlags pipelineFlags = (PipelineFlags)0;
|
||||
if (fs->Flags() & FragmentShaderFlags::INPUT_ATTACHMENT) {
|
||||
pipelineFlags |= PipelineFlags::USES_INPUT_ATTACHMENT;
|
||||
}
|
||||
|
||||
VulkanPipeline *pipeline = CreateVulkanPipeline(
|
||||
renderManager, pipelineCache_, layout,
|
||||
renderManager, pipelineCache_, layout, pipelineFlags,
|
||||
rasterKey, decFmt, vs, fs, useHwTransform, variantBitmask);
|
||||
pipelines_.Insert(key, pipeline);
|
||||
|
||||
|
@ -55,11 +55,12 @@ struct VulkanPipelineKey {
|
||||
struct VulkanPipeline {
|
||||
VKRGraphicsPipeline *pipeline;
|
||||
VKRGraphicsPipelineDesc desc;
|
||||
int flags; // PipelineFlags enum above.
|
||||
PipelineFlags pipelineFlags; // PipelineFlags enum above.
|
||||
|
||||
bool UsesBlendConstant() const { return (flags & PIPELINE_FLAG_USES_BLEND_CONSTANT) != 0; }
|
||||
bool UsesLines() const { return (flags & PIPELINE_FLAG_USES_LINES) != 0; }
|
||||
bool UsesDepthStencil() const { return (flags & PIPELINE_FLAG_USES_DEPTH_STENCIL) != 0; }
|
||||
bool UsesBlendConstant() const { return (pipelineFlags & PipelineFlags::USES_BLEND_CONSTANT) != 0; }
|
||||
bool UsesLines() const { return (pipelineFlags & PipelineFlags::USES_LINES) != 0; }
|
||||
bool UsesDepthStencil() const { return (pipelineFlags & PipelineFlags::USES_DEPTH_STENCIL) != 0; }
|
||||
bool UsesInputAttachment() const { return (pipelineFlags & PipelineFlags::USES_INPUT_ATTACHMENT) != 0; }
|
||||
|
||||
u32 GetVariantsBitmask() const;
|
||||
};
|
||||
|
@ -153,7 +153,7 @@ void DrawEngineVulkan::ConvertStateToVulkanKey(FramebufferManagerVulkan &fbManag
|
||||
GenericLogicState &logicState = pipelineState_.logicState;
|
||||
|
||||
if (pipelineState_.FramebufferRead()) {
|
||||
ApplyFramebufferRead(&fboTexNeedsBind_);
|
||||
ApplyFramebufferRead(&fboTexBindState_);
|
||||
// The shader takes over the responsibility for blending, so recompute.
|
||||
// We might still end up using blend to write something to alpha.
|
||||
ApplyStencilReplaceAndLogicOpIgnoreBlend(blendState.replaceAlphaWithStencil, blendState);
|
||||
@ -364,15 +364,23 @@ void DrawEngineVulkan::BindShaderBlendTex() {
|
||||
// TODO: At this point, we know if the vertices are full alpha or not.
|
||||
// Set the nearest/linear here (since we correctly know if alpha/color tests are needed)?
|
||||
if (!gstate.isModeClear()) {
|
||||
if (fboTexNeedsBind_) {
|
||||
if (fboTexBindState_ == FBO_TEX_COPY_BIND_TEX) {
|
||||
bool bindResult = framebufferManager_->BindFramebufferAsColorTexture(1, framebufferManager_->GetCurrentRenderVFB(), BINDFBCOLOR_MAY_COPY);
|
||||
_dbg_assert_(bindResult);
|
||||
boundSecondary_ = (VkImageView)draw_->GetNativeObject(Draw::NativeObject::BOUND_TEXTURE1_IMAGEVIEW);
|
||||
boundSecondaryIsInputAttachment_ = false;
|
||||
fboTexBound_ = true;
|
||||
fboTexNeedsBind_ = false;
|
||||
fboTexBindState_ = FBO_TEX_NONE;
|
||||
|
||||
// Must dirty blend state here so we re-copy next time. Example: Lunar's spell effects.
|
||||
dirtyRequiresRecheck_ |= DIRTY_BLEND_STATE;
|
||||
} else if (fboTexBindState_ == FBO_TEX_READ_FRAMEBUFFER) {
|
||||
draw_->BindCurrentFramebufferForColorInput();
|
||||
boundSecondary_ = (VkImageView)draw_->GetNativeObject(Draw::NativeObject::BOUND_FRAMEBUFFER_COLOR_IMAGEVIEW);
|
||||
boundSecondaryIsInputAttachment_ = true;
|
||||
fboTexBindState_ = FBO_TEX_NONE;
|
||||
} else {
|
||||
boundSecondary_ = VK_NULL_HANDLE;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -116,7 +116,7 @@ bool SDLVulkanGraphicsContext::Init(SDL_Window *&window, int x, int y, int mode,
|
||||
return false;
|
||||
}
|
||||
|
||||
draw_ = Draw::T3DCreateVulkanContext(vulkan_, false);
|
||||
draw_ = Draw::T3DCreateVulkanContext(vulkan_);
|
||||
SetGPUBackend(GPUBackend::VULKAN);
|
||||
bool success = draw_->CreatePresets();
|
||||
_assert_(success);
|
||||
|
@ -56,6 +56,8 @@ public:
|
||||
parent->Add(scroll);
|
||||
}
|
||||
|
||||
const char *tag() const override { return "ButtonShape"; }
|
||||
|
||||
private:
|
||||
int *setting_;
|
||||
};
|
||||
@ -84,6 +86,8 @@ public:
|
||||
parent->Add(scroll);
|
||||
}
|
||||
|
||||
const char *tag() const override { return "ButtonIcon"; }
|
||||
|
||||
private:
|
||||
int *setting_;
|
||||
};
|
||||
|
@ -28,6 +28,8 @@ class ComboKeyScreen : public UIDialogScreenWithBackground {
|
||||
public:
|
||||
ComboKeyScreen(int id): id_(id) {}
|
||||
|
||||
const char *tag() const override { return "ComboKey"; }
|
||||
|
||||
void CreateViews() override;
|
||||
void onFinish(DialogResult result) override;
|
||||
|
||||
|
@ -313,7 +313,7 @@ UI::EventReturn ControlMappingScreen::OnVisualizeMapping(UI::EventParams ¶ms
|
||||
}
|
||||
|
||||
void ControlMappingScreen::dialogFinished(const Screen *dialog, DialogResult result) {
|
||||
if (result == DR_OK && dialog->tag() == "listpopup") {
|
||||
if (result == DR_OK && std::string(dialog->tag()) == "listpopup") {
|
||||
ListPopupScreen *popup = (ListPopupScreen *)dialog;
|
||||
KeyMap::AutoConfForPad(popup->GetChoiceString());
|
||||
}
|
||||
|
@ -35,7 +35,7 @@ class SingleControlMapper;
|
||||
class ControlMappingScreen : public UIDialogScreenWithBackground {
|
||||
public:
|
||||
ControlMappingScreen() {}
|
||||
std::string tag() const override { return "control mapping"; }
|
||||
const char *tag() const override { return "ControlMapping"; }
|
||||
|
||||
protected:
|
||||
void CreateViews() override;
|
||||
@ -47,7 +47,7 @@ private:
|
||||
UI::EventReturn OnAutoConfigure(UI::EventParams ¶ms);
|
||||
UI::EventReturn OnVisualizeMapping(UI::EventParams ¶ms);
|
||||
|
||||
virtual void dialogFinished(const Screen *dialog, DialogResult result) override;
|
||||
void dialogFinished(const Screen *dialog, DialogResult result) override;
|
||||
|
||||
UI::ScrollView *rightScroll_;
|
||||
std::vector<SingleControlMapper *> mappers_;
|
||||
@ -61,17 +61,19 @@ public:
|
||||
pspBtn_ = btn;
|
||||
}
|
||||
|
||||
virtual bool key(const KeyInput &key) override;
|
||||
virtual bool axis(const AxisInput &axis) override;
|
||||
const char *tag() const override { return "KeyMappingNewKey"; }
|
||||
|
||||
bool key(const KeyInput &key) override;
|
||||
bool axis(const AxisInput &axis) override;
|
||||
|
||||
void SetDelay(float t);
|
||||
|
||||
protected:
|
||||
void CreatePopupContents(UI::ViewGroup *parent) override;
|
||||
|
||||
virtual bool FillVertical() const override { return false; }
|
||||
virtual bool ShowButtons() const override { return true; }
|
||||
virtual void OnCompleted(DialogResult result) override {}
|
||||
bool FillVertical() const override { return false; }
|
||||
bool ShowButtons() const override { return true; }
|
||||
void OnCompleted(DialogResult result) override {}
|
||||
|
||||
private:
|
||||
int pspBtn_;
|
||||
@ -87,6 +89,8 @@ public:
|
||||
pspBtn_ = btn;
|
||||
}
|
||||
|
||||
const char *tag() const override { return "KeyMappingNewMouseKey"; }
|
||||
|
||||
bool key(const KeyInput &key) override;
|
||||
bool axis(const AxisInput &axis) override;
|
||||
|
||||
@ -114,6 +118,8 @@ public:
|
||||
|
||||
void update() override;
|
||||
|
||||
const char *tag() const override { return "AnalogSetup"; }
|
||||
|
||||
protected:
|
||||
void CreateViews() override;
|
||||
|
||||
@ -144,6 +150,8 @@ public:
|
||||
bool key(const KeyInput &key) override;
|
||||
bool axis(const AxisInput &axis) override;
|
||||
|
||||
const char *tag() const override { return "TouchTest"; }
|
||||
|
||||
protected:
|
||||
struct TrackedTouch {
|
||||
int id;
|
||||
@ -171,6 +179,8 @@ class VisualMappingScreen : public UIDialogScreenWithBackground {
|
||||
public:
|
||||
VisualMappingScreen() {}
|
||||
|
||||
const char *tag() const override { return "VisualMapping"; }
|
||||
|
||||
protected:
|
||||
void CreateViews() override;
|
||||
|
||||
|
@ -41,6 +41,8 @@ public:
|
||||
void update() override;
|
||||
void onFinish(DialogResult result) override;
|
||||
|
||||
const char *tag() const override { return "CwCheat"; }
|
||||
|
||||
protected:
|
||||
void CreateViews() override;
|
||||
|
||||
|
@ -85,7 +85,7 @@ static const char *logLevelList[] = {
|
||||
"Verb."
|
||||
};
|
||||
|
||||
void DevMenu::CreatePopupContents(UI::ViewGroup *parent) {
|
||||
void DevMenuScreen::CreatePopupContents(UI::ViewGroup *parent) {
|
||||
using namespace UI;
|
||||
auto dev = GetI18NCategory("Developer");
|
||||
auto sy = GetI18NCategory("System");
|
||||
@ -94,25 +94,25 @@ void DevMenu::CreatePopupContents(UI::ViewGroup *parent) {
|
||||
LinearLayout *items = new LinearLayout(ORIENT_VERTICAL);
|
||||
|
||||
#if !defined(MOBILE_DEVICE)
|
||||
items->Add(new Choice(dev->T("Log View")))->OnClick.Handle(this, &DevMenu::OnLogView);
|
||||
items->Add(new Choice(dev->T("Log View")))->OnClick.Handle(this, &DevMenuScreen::OnLogView);
|
||||
#endif
|
||||
items->Add(new Choice(dev->T("Logging Channels")))->OnClick.Handle(this, &DevMenu::OnLogConfig);
|
||||
items->Add(new Choice(sy->T("Developer Tools")))->OnClick.Handle(this, &DevMenu::OnDeveloperTools);
|
||||
items->Add(new Choice(dev->T("Jit Compare")))->OnClick.Handle(this, &DevMenu::OnJitCompare);
|
||||
items->Add(new Choice(dev->T("Shader Viewer")))->OnClick.Handle(this, &DevMenu::OnShaderView);
|
||||
items->Add(new Choice(dev->T("Logging Channels")))->OnClick.Handle(this, &DevMenuScreen::OnLogConfig);
|
||||
items->Add(new Choice(sy->T("Developer Tools")))->OnClick.Handle(this, &DevMenuScreen::OnDeveloperTools);
|
||||
items->Add(new Choice(dev->T("Jit Compare")))->OnClick.Handle(this, &DevMenuScreen::OnJitCompare);
|
||||
items->Add(new Choice(dev->T("Shader Viewer")))->OnClick.Handle(this, &DevMenuScreen::OnShaderView);
|
||||
if (g_Config.iGPUBackend == (int)GPUBackend::VULKAN) {
|
||||
// TODO: Make a new allocator visualizer for VMA.
|
||||
// items->Add(new CheckBox(&g_Config.bShowAllocatorDebug, dev->T("Allocator Viewer")));
|
||||
items->Add(new CheckBox(&g_Config.bShowGpuProfile, dev->T("GPU Profile")));
|
||||
}
|
||||
items->Add(new Choice(dev->T("Toggle Freeze")))->OnClick.Handle(this, &DevMenu::OnFreezeFrame);
|
||||
items->Add(new Choice(dev->T("Dump Frame GPU Commands")))->OnClick.Handle(this, &DevMenu::OnDumpFrame);
|
||||
items->Add(new Choice(dev->T("Toggle Audio Debug")))->OnClick.Handle(this, &DevMenu::OnToggleAudioDebug);
|
||||
items->Add(new Choice(dev->T("Toggle Freeze")))->OnClick.Handle(this, &DevMenuScreen::OnFreezeFrame);
|
||||
items->Add(new Choice(dev->T("Dump Frame GPU Commands")))->OnClick.Handle(this, &DevMenuScreen::OnDumpFrame);
|
||||
items->Add(new Choice(dev->T("Toggle Audio Debug")))->OnClick.Handle(this, &DevMenuScreen::OnToggleAudioDebug);
|
||||
#ifdef USE_PROFILER
|
||||
items->Add(new CheckBox(&g_Config.bShowFrameProfiler, dev->T("Frame Profiler"), ""));
|
||||
#endif
|
||||
items->Add(new CheckBox(&g_Config.bDrawFrameGraph, dev->T("Draw Frametimes Graph")));
|
||||
items->Add(new Choice(dev->T("Reset limited logging")))->OnClick.Handle(this, &DevMenu::OnResetLimitedLogging);
|
||||
items->Add(new Choice(dev->T("Reset limited logging")))->OnClick.Handle(this, &DevMenuScreen::OnResetLimitedLogging);
|
||||
|
||||
scroll->Add(items);
|
||||
parent->Add(scroll);
|
||||
@ -123,48 +123,48 @@ void DevMenu::CreatePopupContents(UI::ViewGroup *parent) {
|
||||
}
|
||||
}
|
||||
|
||||
UI::EventReturn DevMenu::OnToggleAudioDebug(UI::EventParams &e) {
|
||||
UI::EventReturn DevMenuScreen::OnToggleAudioDebug(UI::EventParams &e) {
|
||||
g_Config.bShowAudioDebug = !g_Config.bShowAudioDebug;
|
||||
return UI::EVENT_DONE;
|
||||
}
|
||||
|
||||
UI::EventReturn DevMenu::OnResetLimitedLogging(UI::EventParams &e) {
|
||||
UI::EventReturn DevMenuScreen::OnResetLimitedLogging(UI::EventParams &e) {
|
||||
Reporting::ResetCounts();
|
||||
return UI::EVENT_DONE;
|
||||
}
|
||||
|
||||
UI::EventReturn DevMenu::OnLogView(UI::EventParams &e) {
|
||||
UI::EventReturn DevMenuScreen::OnLogView(UI::EventParams &e) {
|
||||
UpdateUIState(UISTATE_PAUSEMENU);
|
||||
screenManager()->push(new LogScreen());
|
||||
return UI::EVENT_DONE;
|
||||
}
|
||||
|
||||
UI::EventReturn DevMenu::OnLogConfig(UI::EventParams &e) {
|
||||
UI::EventReturn DevMenuScreen::OnLogConfig(UI::EventParams &e) {
|
||||
UpdateUIState(UISTATE_PAUSEMENU);
|
||||
screenManager()->push(new LogConfigScreen());
|
||||
return UI::EVENT_DONE;
|
||||
}
|
||||
|
||||
UI::EventReturn DevMenu::OnDeveloperTools(UI::EventParams &e) {
|
||||
UI::EventReturn DevMenuScreen::OnDeveloperTools(UI::EventParams &e) {
|
||||
UpdateUIState(UISTATE_PAUSEMENU);
|
||||
screenManager()->push(new DeveloperToolsScreen());
|
||||
return UI::EVENT_DONE;
|
||||
}
|
||||
|
||||
UI::EventReturn DevMenu::OnJitCompare(UI::EventParams &e) {
|
||||
UI::EventReturn DevMenuScreen::OnJitCompare(UI::EventParams &e) {
|
||||
UpdateUIState(UISTATE_PAUSEMENU);
|
||||
screenManager()->push(new JitCompareScreen());
|
||||
return UI::EVENT_DONE;
|
||||
}
|
||||
|
||||
UI::EventReturn DevMenu::OnShaderView(UI::EventParams &e) {
|
||||
UI::EventReturn DevMenuScreen::OnShaderView(UI::EventParams &e) {
|
||||
UpdateUIState(UISTATE_PAUSEMENU);
|
||||
if (gpu) // Avoid crashing if chosen while the game is being loaded.
|
||||
screenManager()->push(new ShaderListScreen());
|
||||
return UI::EVENT_DONE;
|
||||
}
|
||||
|
||||
UI::EventReturn DevMenu::OnFreezeFrame(UI::EventParams &e) {
|
||||
UI::EventReturn DevMenuScreen::OnFreezeFrame(UI::EventParams &e) {
|
||||
if (PSP_CoreParameter().frozen) {
|
||||
PSP_CoreParameter().frozen = false;
|
||||
} else {
|
||||
@ -173,12 +173,12 @@ UI::EventReturn DevMenu::OnFreezeFrame(UI::EventParams &e) {
|
||||
return UI::EVENT_DONE;
|
||||
}
|
||||
|
||||
UI::EventReturn DevMenu::OnDumpFrame(UI::EventParams &e) {
|
||||
UI::EventReturn DevMenuScreen::OnDumpFrame(UI::EventParams &e) {
|
||||
gpu->DumpNextFrame();
|
||||
return UI::EVENT_DONE;
|
||||
}
|
||||
|
||||
void DevMenu::dialogFinished(const Screen *dialog, DialogResult result) {
|
||||
void DevMenuScreen::dialogFinished(const Screen *dialog, DialogResult result) {
|
||||
UpdateUIState(UISTATE_INGAME);
|
||||
// Close when a subscreen got closed.
|
||||
// TODO: a bug in screenmanager causes this not to work here.
|
||||
@ -514,7 +514,15 @@ void SystemInfoScreen::CreateViews() {
|
||||
const std::string apiNameKey = draw->GetInfoString(InfoField::APINAME);
|
||||
const char *apiName = gr->T(apiNameKey);
|
||||
deviceSpecs->Add(new InfoItem(si->T("3D API"), apiName));
|
||||
deviceSpecs->Add(new InfoItem(si->T("Vendor"), draw->GetInfoString(InfoField::VENDORSTRING)));
|
||||
|
||||
// TODO: Not really vendor, on most APIs it's a device name (GL calls it vendor though).
|
||||
std::string vendorString;
|
||||
if (draw->GetDeviceCaps().deviceID != 0) {
|
||||
vendorString = StringFromFormat("%s (%08x)", draw->GetInfoString(InfoField::VENDORSTRING).c_str(), draw->GetDeviceCaps().deviceID);
|
||||
} else {
|
||||
vendorString = draw->GetInfoString(InfoField::VENDORSTRING);
|
||||
}
|
||||
deviceSpecs->Add(new InfoItem(si->T("Vendor"), vendorString));
|
||||
std::string vendor = draw->GetInfoString(InfoField::VENDOR);
|
||||
if (vendor.size())
|
||||
deviceSpecs->Add(new InfoItem(si->T("Vendor (detected)"), vendor));
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user