Implement color-to-depth for Vulkan, start implementing for D3D11

This commit is contained in:
Henrik Rydgård 2022-07-31 13:01:50 +02:00
parent 93321f2d5e
commit 90a44579bf
11 changed files with 94 additions and 10 deletions

View File

@ -145,8 +145,9 @@ void GenerateDepalShader300(char *buffer, GEBufferFormat pixelFormat, ShaderLang
}
float texturePixels = 256;
if (clutFormat != GE_CMODE_32BIT_ABGR8888)
if (clutFormat != GE_CMODE_32BIT_ABGR8888) {
texturePixels = 512;
}
if (shift) {
WRITE(p, " index = (int(uint(index) >> uint(%i)) & 0x%02x)", shift, mask);

View File

@ -90,6 +90,7 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu
bool shaderDepal = id.Bit(FS_BIT_SHADER_DEPAL) && !texture3D; // combination with texture3D not supported. Enforced elsewhere too.
bool bgraTexture = id.Bit(FS_BIT_BGRA_TEXTURE);
bool colorWriteMask = id.Bit(FS_BIT_COLOR_WRITEMASK) && compat.bitwiseOps;
bool colorToDepth = id.Bit(FS_BIT_COLOR_TO_DEPTH);
GEComparison alphaTestFunc = (GEComparison)id.Bits(FS_BIT_ALPHA_TEST_FUNC, 3);
GEComparison colorTestFunc = (GEComparison)id.Bits(FS_BIT_COLOR_TEST_FUNC, 2);
@ -122,7 +123,7 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu
bool readFramebufferTex = readFramebuffer && !gstate_c.Supports(GPU_SUPPORTS_ANY_FRAMEBUFFER_FETCH);
bool needFragCoord = readFramebuffer || gstate_c.Supports(GPU_ROUND_FRAGMENT_DEPTH_TO_16BIT);
bool writeDepth = gstate_c.Supports(GPU_ROUND_FRAGMENT_DEPTH_TO_16BIT);
bool writeDepth = gstate_c.Supports(GPU_ROUND_FRAGMENT_DEPTH_TO_16BIT) || colorToDepth;
if (shaderDepal && !doTexture) {
*errorString = "depal requires a texture";
@ -1034,7 +1035,9 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu
// Final color computed - apply color write mask.
// TODO: Maybe optimize to only do math on the affected channels?
// Or .. meh.
// Or .. meh. That would require more shader bits. Though we could
// of course optimize for the common mask 0xF00000, though again, blue-to-alpha
// does a better job with that.
if (colorWriteMask) {
WRITE(p, " highp uint v32 = packUnorm4x8(%s);\n", compat.fragColor0);
WRITE(p, " highp uint d32 = packUnorm4x8(destColor);\n");
@ -1047,6 +1050,11 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu
WRITE(p, " %s = vec4(0.0, 0.0, 0.0, %s.z); // blue to alpha\n", compat.fragColor0, compat.fragColor0);
}
if (colorToDepth) {
WRITE(p, " highp float depthValue = float(uint(%s.x * 32.0) | (uint(%s.y * 64.0) << 5) | (uint(%s.z * 32.0) << 11)) / 65535.0;\n", "v", "v", "v"); // compat.fragColor0, compat.fragColor0, compat.fragColor0);
WRITE(p, " gl_FragDepth = depthValue;\n"); // TODO: Don't forget to apply accurate-depth kind of stuff
}
if (gstate_c.Supports(GPU_ROUND_FRAGMENT_DEPTH_TO_16BIT)) {
const double scale = DepthSliceFactor() * 65535.0;
@ -1060,7 +1068,7 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu
WRITE(p, " z = floor(z * %f) * (1.0 / %f);\n", scale, scale);
}
} else {
WRITE(p, " z = (1.0/65535.0) * floor(z * 65535.0);\n");
WRITE(p, " z = (1.0 / 65535.0) * floor(z * 65535.0);\n");
}
WRITE(p, " gl_FragDepth = z;\n");
} else if (useDiscardStencilBugWorkaround) {

View File

@ -281,6 +281,8 @@ VirtualFramebuffer *FramebufferManagerCommon::DoSetRenderFrameBuffer(const Frame
WARN_LOG_ONCE(color_equal_z, G3D, "Framebuffer bound with color addr == z addr, likely will not use Z in this pass: %08x", params.fb_address);
}
FramebufferRenderMode mode = FB_MODE_NORMAL;
// Find a matching framebuffer
VirtualFramebuffer *vfb = nullptr;
for (size_t i = 0; i < vfbs_.size(); ++i) {
@ -288,7 +290,7 @@ VirtualFramebuffer *FramebufferManagerCommon::DoSetRenderFrameBuffer(const Frame
const u32 bpp = v->format == GE_FORMAT_8888 ? 4 : 2;
if (v->fb_address == params.fb_address) {
if (params.fb_address == v->fb_address) {
vfb = v;
// Update fb stride in case it changed
if (vfb->fb_stride != params.fb_stride) {
@ -317,11 +319,19 @@ VirtualFramebuffer *FramebufferManagerCommon::DoSetRenderFrameBuffer(const Frame
vfb->height = drawing_height;
}
break;
} else if (params.fb_address == v->z_address && params.fmt != GE_FORMAT_8888) {
} else if (params.fb_address == v->z_address && params.fmt != GE_FORMAT_8888 && params.fb_stride == v->z_stride) {
// Looks like the game might be intending to use color to write directly to a Z buffer.
// This is seen in Kuroyou 2.
// Ignore this in this loop, BUT, we do a lookup in the depth tracking afterwards to
// make sure we get the latest one.
WARN_LOG_ONCE(color_matches_z, G3D, "Color framebuffer bound at %08x with likely intent to write explicit Z values using color. fmt = %s", params.fb_address, GeBufferFormatToString(params.fmt));
// Skip this for now.
// Seems impractical to use the other 16-bit formats for this due to the limited control over alpha,
// so we'll simply only support 565.
if (params.fmt == GE_FORMAT_565) {
mode = FB_MODE_COLOR_TO_DEPTH;
break;
}
} else if (v->fb_stride == params.fb_stride && v->format == params.fmt) {
u32 v_fb_first_line_end_ptr = v->fb_address + v->fb_stride * 4; // This should be * bpp, but leaving like this until after 1.13 to be safe. The God of War games use this for shadows.
u32 v_fb_end_ptr = v->fb_address + v->fb_stride * v->height * bpp;
@ -356,6 +366,25 @@ VirtualFramebuffer *FramebufferManagerCommon::DoSetRenderFrameBuffer(const Frame
}
}
if (mode == FB_MODE_COLOR_TO_DEPTH) {
// Lookup in the depth tracking to find which VFB has the latest version of this Z buffer.
// Then bind it in color-to-depth mode.
//
// We are going to do this by having a special render mode where we take color and move to
// depth in the fragment shader, and set color writes to off.
//
// We'll need a special fragment shader flag to convert color to depth.
for (auto &depth : this->trackedDepthBuffers_) {
if (depth->z_address == params.fb_address && depth->z_stride == params.fb_stride) {
// Found the matching depth buffer. Use this vfb.
vfb = depth->vfb;
}
}
}
gstate_c.SetFramebufferRenderMode(mode);
if (vfb) {
if ((drawing_width != vfb->bufferWidth || drawing_height != vfb->bufferHeight)) {
// Even if it's not newly wrong, if this is larger we need to resize up.

View File

@ -224,7 +224,7 @@ public:
void SetDisplayFramebuffer(u32 framebuf, u32 stride, GEBufferFormat format);
void DestroyFramebuf(VirtualFramebuffer *v);
VirtualFramebuffer *DoSetRenderFrameBuffer(const FramebufferHeuristicParams &params, u32 skipDrawReason);
VirtualFramebuffer *DoSetRenderFrameBuffer(const FramebufferHeuristicParams &params, u32 skipDrawReason);
VirtualFramebuffer *SetRenderFrameBuffer(bool framebufChanged, int skipDrawReason) {
// Inlining this part since it's so frequent.
if (!framebufChanged && currentRenderVfb_) {

View File

@ -1018,6 +1018,16 @@ void ConvertMaskState(GenericMaskState &maskState, bool allowFramebufferRead) {
return;
}
if (gstate_c.renderMode == FB_MODE_COLOR_TO_DEPTH) {
// Suppress color writes entirely in this mode.
maskState.applyFramebufferRead = false;
maskState.rgba[0] = false;
maskState.rgba[1] = false;
maskState.rgba[2] = false;
maskState.rgba[3] = false;
return;
}
// Invert to convert masks from the PSP's format where 1 is don't draw to PC where 1 is draw.
uint32_t colorMask = ~((gstate.pmskc & 0xFFFFFF) | (gstate.pmska << 24));

View File

@ -7,6 +7,7 @@
#include "Core/Config.h"
#include "GPU/ge_constants.h"
#include "GPU/GPU.h"
#include "GPU/GPUState.h"
#include "GPU/Common/GPUStateUtils.h"
#include "GPU/Common/ShaderId.h"
@ -239,6 +240,8 @@ std::string FragmentShaderDesc(const FShaderID &id) {
if (id.Bit(FS_BIT_COLOR_AGAINST_ZERO)) desc << "ColorTest0 " << alphaTestFuncs[id.Bits(FS_BIT_COLOR_TEST_FUNC, 2)] << " "; // first 4 match;
else if (id.Bit(FS_BIT_COLOR_TEST)) desc << "ColorTest " << alphaTestFuncs[id.Bits(FS_BIT_COLOR_TEST_FUNC, 2)] << " "; // first 4 match
if (id.Bit(FS_BIT_COLOR_TO_DEPTH)) desc << "ColorToDepth ";
return desc.str();
}
@ -261,6 +264,7 @@ void ComputeFragmentShaderID(FShaderID *id_out, const Draw::Bugs &bugs) {
bool doFlatShading = gstate.getShadeMode() == GE_SHADE_FLAT;
bool useShaderDepal = gstate_c.useShaderDepal;
bool colorWriteMask = IsColorWriteMaskComplex(gstate_c.allowFramebufferRead);
bool colorToDepth = gstate_c.renderMode == FramebufferRenderMode::FB_MODE_COLOR_TO_DEPTH;
// Note how we here recompute some of the work already done in state mapping.
// Not ideal! At least we share the code.
@ -292,6 +296,8 @@ void ComputeFragmentShaderID(FShaderID *id_out, const Draw::Bugs &bugs) {
id.SetBit(FS_BIT_3D_TEXTURE, gstate_c.curTextureIs3D);
}
id.SetBit(FS_BIT_COLOR_TO_DEPTH, colorToDepth);
id.SetBit(FS_BIT_LMODE, lmode);
if (enableAlphaTest) {
// 5 bits total.

View File

@ -94,6 +94,7 @@ enum FShaderBit : uint8_t {
FS_BIT_NO_DEPTH_CANNOT_DISCARD_STENCIL = 49,
FS_BIT_COLOR_WRITEMASK = 50,
FS_BIT_3D_TEXTURE = 51,
FS_BIT_COLOR_TO_DEPTH = 52,
};
static inline FShaderBit operator +(FShaderBit bit, int i) {

View File

@ -283,7 +283,13 @@ void DrawEngineD3D11::ApplyDrawState(int prim) {
GenericStencilFuncState stencilState;
ConvertStencilFuncState(stencilState);
if (gstate.isModeClear()) {
if (gstate_c.renderMode == FB_MODE_COLOR_TO_DEPTH) {
// Enforce plain depth writing.
keys_.depthStencil.value = 0;
keys_.depthStencil.depthWriteEnable = true;
keys_.depthStencil.stencilTestEnable = false;
keys_.depthStencil.depthCompareOp = D3D11_COMPARISON_ALWAYS;
} else if (gstate.isModeClear()) {
keys_.depthStencil.value = 0;
keys_.depthStencil.depthTestEnable = true;
keys_.depthStencil.depthCompareOp = D3D11_COMPARISON_ALWAYS;

View File

@ -24,6 +24,11 @@ class GPUInterface;
class GPUDebugInterface;
class GraphicsContext;
enum FramebufferRenderMode {
FB_MODE_NORMAL = 0,
FB_MODE_COLOR_TO_DEPTH = 1,
};
enum SkipDrawReasonFlags {
SKIPDRAW_SKIPFRAME = 1,
SKIPDRAW_NON_DISPLAYED_FB = 2, // Skip drawing to FBO:s that have not been displayed.

View File

@ -557,6 +557,14 @@ struct GPUStateCache {
Dirty(DIRTY_FRAGMENTSHADER_STATE | (is3D ? DIRTY_MIPBIAS : 0));
}
}
void SetFramebufferRenderMode(FramebufferRenderMode mode) {
if (mode != renderMode) {
// This mode modifies the fragment shader to write depth, the depth state to write without testing, and the blend state to write nothing to color.
// So we need to re-evaluate those states.
Dirty(DIRTY_FRAGMENTSHADER_STATE | DIRTY_BLEND_STATE | DIRTY_DEPTHSTENCIL_STATE);
renderMode = mode;
}
}
u32 featureFlags;
@ -607,6 +615,9 @@ struct GPUStateCache {
// We detect this case and go into a special drawing mode.
bool blueToAlpha;
// Some games try to write to the Z buffer using color. Catch that and actually do the writes to the Z buffer instead.
FramebufferRenderMode renderMode;
// TODO: These should be accessed from the current VFB object directly.
u32 curRTWidth;
u32 curRTHeight;

View File

@ -250,7 +250,14 @@ void DrawEngineVulkan::ConvertStateToVulkanKey(FramebufferManagerVulkan &fbManag
GenericStencilFuncState stencilState;
ConvertStencilFuncState(stencilState);
if (gstate.isModeClear()) {
if (gstate_c.renderMode == FB_MODE_COLOR_TO_DEPTH) {
// Enforce plain depth writing.
key.depthTestEnable = true;
key.depthWriteEnable = true;
key.stencilTestEnable = false;
key.depthCompareOp = VK_COMPARE_OP_ALWAYS;
key.depthClampEnable = false;
} else if (gstate.isModeClear()) {
key.depthTestEnable = true;
key.depthCompareOp = VK_COMPARE_OP_ALWAYS;
key.depthWriteEnable = gstate.isClearModeDepthMask();