Merge pull request #18813 from hrydgard/ufc-crash-work

Mali: Turn off any depth writes in the shader if depth test == NEVER
This commit is contained in:
Henrik Rydgård 2024-02-02 11:51:30 +01:00 committed by GitHub
commit 7d894b8283
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
9 changed files with 36 additions and 16 deletions

View File

@ -973,7 +973,7 @@ VKContext::VKContext(VulkanContext *vulkan, bool useRenderThread)
// See: https://github.com/hrydgard/ppsspp/pull/11684
if (deviceProps.deviceID >= 0x05000000 && deviceProps.deviceID < 0x06000000) {
if (deviceProps.driverVersion < 0x80180000) {
bugs_.Infest(Bugs::NO_DEPTH_CANNOT_DISCARD_STENCIL);
bugs_.Infest(Bugs::NO_DEPTH_CANNOT_DISCARD_STENCIL_ADRENO);
}
}
// Color write mask not masking write in certain scenarios with a depth test, see #10421.
@ -1006,8 +1006,9 @@ VKContext::VKContext(VulkanContext *vulkan, bool useRenderThread)
bugs_.Infest(Bugs::EQUAL_WZ_CORRUPTS_DEPTH);
// Nearly identical to the the Adreno bug, see #13833 (Midnight Club map broken) and other issues.
// It has the additional caveat that combining depth writes with NEVER depth tests crashes the driver.
// Reported fixed in major version 40 - let's add a check once confirmed.
bugs_.Infest(Bugs::NO_DEPTH_CANNOT_DISCARD_STENCIL);
bugs_.Infest(Bugs::NO_DEPTH_CANNOT_DISCARD_STENCIL_MALI);
// This started in driver 31 or 32, fixed in 40 - let's add a check once confirmed.
if (majorVersion >= 32) {
@ -1042,6 +1043,8 @@ VKContext::VKContext(VulkanContext *vulkan, bool useRenderThread)
INFO_LOG(G3D, "KHR_depth_stencil_resolve not supported, disabling multisampling");
}
bugs_.Infest(Draw::Bugs::NO_DEPTH_CANNOT_DISCARD_STENCIL_MALI);
// We limit multisampling functionality to reasonably recent and known-good tiling GPUs.
if (multisampleAllowed) {
// Check for depth stencil resolve. Without it, depth textures won't work, and we don't want that mess

View File

@ -751,7 +751,8 @@ void ConvertToD16(uint8_t *dst, const uint8_t *src, uint32_t dstStride, uint32_t
const char *Bugs::GetBugName(uint32_t bug) {
switch (bug) {
case NO_DEPTH_CANNOT_DISCARD_STENCIL: return "NO_DEPTH_CANNOT_DISCARD_STENCIL";
case NO_DEPTH_CANNOT_DISCARD_STENCIL_MALI: return "NO_DEPTH_CANNOT_DISCARD_STENCIL_MALI";
case NO_DEPTH_CANNOT_DISCARD_STENCIL_ADRENO: return "NO_DEPTH_CANNOT_DISCARD_STENCIL_ADRENO";
case DUAL_SOURCE_BLENDING_BROKEN: return "DUAL_SOURCE_BLENDING_BROKEN";
case ANY_MAP_BUFFER_RANGE_SLOW: return "ANY_MAP_BUFFER_RANGE_SLOW";
case PVR_GENMIPMAP_HEIGHT_GREATER: return "PVR_GENMIPMAP_HEIGHT_GREATER";

View File

@ -336,7 +336,7 @@ public:
const char *GetBugName(uint32_t bug);
enum : uint32_t {
NO_DEPTH_CANNOT_DISCARD_STENCIL = 0,
NO_DEPTH_CANNOT_DISCARD_STENCIL_ADRENO = 0,
DUAL_SOURCE_BLENDING_BROKEN = 1,
ANY_MAP_BUFFER_RANGE_SLOW = 2,
PVR_GENMIPMAP_HEIGHT_GREATER = 3,
@ -351,6 +351,7 @@ public:
ADRENO_RESOURCE_DEADLOCK = 12,
UNIFORM_INDEXING_BROKEN = 13, // not a properly diagnosed issue, a workaround attempt: #17386
PVR_BAD_16BIT_TEXFORMATS = 14,
NO_DEPTH_CANNOT_DISCARD_STENCIL_MALI = 15,
MAX_BUG,
};

View File

@ -625,7 +625,7 @@ bool GameManager::InstallMemstickGame(struct zip *z, const Path &zipfile, const
g_OSD.SetProgressBar("install", di->T("Installing..."), 0.0f, 1.0f, 0.1f + (i + 1) / (float)info.numFiles * 0.9f, 0.1f);
}
INFO_LOG(HLE, "Extracted %d files from zip (%d bytes / %d).", info.numFiles, (int)bytesCopied, (int)allBytes);
INFO_LOG(HLE, "Unzipped %d files (%d bytes / %d).", info.numFiles, (int)bytesCopied, (int)allBytes);
zip_close(z);
z = nullptr;
installProgress_ = 1.0f;
@ -733,7 +733,7 @@ bool GameManager::InstallZippedISO(struct zip *z, int isoFileIndex, const Path &
auto di = GetI18NCategory(I18NCat::DIALOG);
g_OSD.SetProgressBar("install", di->T("Installing..."), 0.0f, 0.0f, 0.0f, 0.1f);
if (ExtractFile(z, isoFileIndex, outputISOFilename, &bytesCopied, allBytes)) {
INFO_LOG(IO, "Successfully extracted ISO file to '%s'", outputISOFilename.c_str());
INFO_LOG(IO, "Successfully unzipped ISO file to '%s'", outputISOFilename.c_str());
success = true;
}
zip_close(z);

View File

@ -155,7 +155,9 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu
shading = doFlatShading ? "flat" : "";
}
bool useDiscardStencilBugWorkaround = id.Bit(FS_BIT_NO_DEPTH_CANNOT_DISCARD_STENCIL);
bool forceDepthWritesOff = id.Bit(FS_BIT_DEPTH_TEST_NEVER);
bool useDiscardStencilBugWorkaround = id.Bit(FS_BIT_NO_DEPTH_CANNOT_DISCARD_STENCIL) && !forceDepthWritesOff;
GEBlendSrcFactor replaceBlendFuncA = (GEBlendSrcFactor)id.Bits(FS_BIT_BLENDFUNC_A, 4);
GEBlendDstFactor replaceBlendFuncB = (GEBlendDstFactor)id.Bits(FS_BIT_BLENDFUNC_B, 4);
@ -177,7 +179,7 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu
}
bool needFragCoord = readFramebufferTex || gstate_c.Use(GPU_ROUND_FRAGMENT_DEPTH_TO_16BIT);
bool writeDepth = gstate_c.Use(GPU_ROUND_FRAGMENT_DEPTH_TO_16BIT);
bool writeDepth = gstate_c.Use(GPU_ROUND_FRAGMENT_DEPTH_TO_16BIT) && !forceDepthWritesOff;
// TODO: We could have a separate mechanism to support more ops using the shader blending mechanism,
// on hardware that can do proper bit math in fragment shaders.
@ -192,7 +194,7 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu
std::vector<SamplerDef> samplers;
if (compat.shaderLanguage == ShaderLanguage::GLSL_VULKAN) {
if (useDiscardStencilBugWorkaround && !gstate_c.Use(GPU_ROUND_FRAGMENT_DEPTH_TO_16BIT)) {
if (useDiscardStencilBugWorkaround && !writeDepth) {
WRITE(p, "layout (depth_unchanged) out float gl_FragDepth;\n");
}

View File

@ -196,6 +196,7 @@ std::string FragmentShaderDesc(const FShaderID &id) {
if (id.Bit(FS_BIT_FLATSHADE)) desc << "Flat ";
if (id.Bit(FS_BIT_BGRA_TEXTURE)) desc << "BGRA ";
if (id.Bit(FS_BIT_UBERSHADER)) desc << "FragUber ";
if (id.Bit(FS_BIT_DEPTH_TEST_NEVER)) desc << "DepthNever ";
switch ((ShaderDepalMode)id.Bits(FS_BIT_SHADER_DEPAL_MODE, 2)) {
case ShaderDepalMode::OFF: break;
case ShaderDepalMode::NORMAL: desc << "Depal "; break;
@ -387,13 +388,24 @@ void ComputeFragmentShaderID(FShaderID *id_out, const ComputedPipelineState &pip
id.SetBit(FS_BIT_STEREO);
}
if (g_Config.bVendorBugChecksEnabled && bugs.Has(Draw::Bugs::NO_DEPTH_CANNOT_DISCARD_STENCIL)) {
bool stencilWithoutDepth = !IsStencilTestOutputDisabled() && (!gstate.isDepthTestEnabled() || !gstate.isDepthWriteEnabled());
if (stencilWithoutDepth) {
id.SetBit(FS_BIT_NO_DEPTH_CANNOT_DISCARD_STENCIL, stencilWithoutDepth);
if (g_Config.bVendorBugChecksEnabled) {
if (bugs.Has(Draw::Bugs::NO_DEPTH_CANNOT_DISCARD_STENCIL_ADRENO) || bugs.Has(Draw::Bugs::NO_DEPTH_CANNOT_DISCARD_STENCIL_MALI)) {
// On Adreno, the workaround is safe, so we do simple checks.
bool stencilWithoutDepth = (!gstate.isDepthTestEnabled() || !gstate.isDepthWriteEnabled()) && !IsStencilTestOutputDisabled();
if (stencilWithoutDepth) {
id.SetBit(FS_BIT_NO_DEPTH_CANNOT_DISCARD_STENCIL, stencilWithoutDepth);
}
}
}
// Forcibly disable NEVER + depth-write on Mali.
// TODO: Take this from computed depth test instead of directly from the gstate.
// That will take more refactoring though.
if (bugs.Has(Draw::Bugs::NO_DEPTH_CANNOT_DISCARD_STENCIL_MALI) &&
gstate.getDepthTestFunction() == GE_COMP_NEVER && gstate.isDepthTestEnabled()) {
id.SetBit(FS_BIT_DEPTH_TEST_NEVER);
}
// In case the USE flag changes (for example, in multisampling we might disable input attachments),
// we don't want to accidentally use the wrong cached shader here. So moved it to a bit.
if (FragmentIdNeedsFramebufferRead(id)) {

View File

@ -102,6 +102,7 @@ enum FShaderBit : uint8_t {
FS_BIT_STEREO = 58,
FS_BIT_USE_FRAMEBUFFER_FETCH = 59,
FS_BIT_UBERSHADER = 60,
FS_BIT_DEPTH_TEST_NEVER = 61, // Only used on Mali. Set when depth == NEVER. We forcibly avoid writing to depth in this case, since it crashes the driver.
};
static inline FShaderBit operator +(FShaderBit bit, int i) {

View File

@ -119,7 +119,7 @@ void GenerateStencilFs(char *buffer, const ShaderLanguageDesc &lang, const Draw:
writer.HighPrecisionFloat();
writer.DeclareSamplers(samplers);
if (bugs.Has(Draw::Bugs::NO_DEPTH_CANNOT_DISCARD_STENCIL)) {
if (bugs.Has(Draw::Bugs::NO_DEPTH_CANNOT_DISCARD_STENCIL_MALI) || bugs.Has(Draw::Bugs::NO_DEPTH_CANNOT_DISCARD_STENCIL_ADRENO)) {
writer.C("layout (depth_unchanged) out float gl_FragDepth;\n");
}
@ -137,7 +137,7 @@ void GenerateStencilFs(char *buffer, const ShaderLanguageDesc &lang, const Draw:
writer.C(" if (mod(floor(shifted), 2.0) < 0.99) DISCARD;\n");
}
if (bugs.Has(Draw::Bugs::NO_DEPTH_CANNOT_DISCARD_STENCIL)) {
if (bugs.Has(Draw::Bugs::NO_DEPTH_CANNOT_DISCARD_STENCIL_MALI) || bugs.Has(Draw::Bugs::NO_DEPTH_CANNOT_DISCARD_STENCIL_ADRENO)) {
writer.C(" gl_FragDepth = gl_FragCoord.z;\n");
}

View File

@ -124,7 +124,7 @@ const CommonCommandTableEntry commonCommandTable[] = {
{ GE_CMD_BLENDFIXEDB, FLAG_FLUSHBEFOREONCHANGE, DIRTY_BLEND_STATE | DIRTY_FRAGMENTSHADER_STATE },
{ GE_CMD_MASKRGB, FLAG_FLUSHBEFOREONCHANGE, DIRTY_BLEND_STATE | DIRTY_FRAGMENTSHADER_STATE | DIRTY_DEPTHSTENCIL_STATE | DIRTY_COLORWRITEMASK },
{ GE_CMD_MASKALPHA, FLAG_FLUSHBEFOREONCHANGE, DIRTY_BLEND_STATE | DIRTY_FRAGMENTSHADER_STATE | DIRTY_DEPTHSTENCIL_STATE | DIRTY_COLORWRITEMASK },
{ GE_CMD_ZTEST, FLAG_FLUSHBEFOREONCHANGE, DIRTY_DEPTHSTENCIL_STATE },
{ GE_CMD_ZTEST, FLAG_FLUSHBEFOREONCHANGE, DIRTY_DEPTHSTENCIL_STATE | DIRTY_FRAGMENTSHADER_STATE },
{ GE_CMD_ZTESTENABLE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_DEPTHSTENCIL_STATE | DIRTY_FRAGMENTSHADER_STATE },
{ GE_CMD_ZWRITEDISABLE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_DEPTHSTENCIL_STATE | DIRTY_FRAGMENTSHADER_STATE },
{ GE_CMD_LOGICOP, FLAG_FLUSHBEFOREONCHANGE, DIRTY_BLEND_STATE | DIRTY_FRAGMENTSHADER_STATE },