GPU: Optimize clip distances needed.

We only need to write one clip distance to clip clamped depth, since we
don't clamp when it needs clipping on both sides.
This commit is contained in:
Unknown W. Brackets 2022-10-05 21:17:17 -07:00
parent 14bf9d1923
commit bc3d3cf9fb
3 changed files with 13 additions and 24 deletions

View File

@ -276,7 +276,6 @@ bool GenerateGeometryShader(const GShaderID &id, char *buffer, const ShaderLangu
} else {
const char *clipSuffix0 = compat.shaderLanguage == HLSL_D3D11 ? ".x" : "[0]";
const char *clipSuffix1 = compat.shaderLanguage == HLSL_D3D11 ? ".y" : "[1]";
const char *clipSuffix2 = compat.shaderLanguage == HLSL_D3D11 ? ".z" : "[2]";
p.C(" for (int i = 0; i < 3; i++) {\n"); // TODO: 3 or gl_in.length()? which will be faster?
p.C(" vec4 outPos = gl_in[i].gl_Position;\n");
@ -285,8 +284,7 @@ bool GenerateGeometryShader(const GShaderID &id, char *buffer, const ShaderLangu
if (clipClampedDepth) {
// Copy the clip distance from the vertex shader.
p.F(" gl_ClipDistance%s = gl_in[i].gl_ClipDistance%s;\n", clipSuffix0, clipSuffix0);
p.F(" gl_ClipDistance%s = gl_in[i].gl_ClipDistance%s;\n", clipSuffix1, clipSuffix1);
p.F(" gl_ClipDistance%s = projZ * outPos.w + outPos.w;\n", clipSuffix2);
p.F(" gl_ClipDistance%s = projZ * outPos.w + outPos.w;\n", clipSuffix1);
} else {
// We shouldn't need to worry about rectangles-as-triangles here, since we don't use geometry shaders for that.
p.F(" gl_ClipDistance%s = projZ * outPos.w + outPos.w;\n", clipSuffix0);

View File

@ -231,9 +231,8 @@ bool GenerateVertexShader(const VShaderID &id, char *buffer, const ShaderLanguag
bool vertexRangeCulling = id.Bit(VS_BIT_VERTEX_RANGE_CULLING) && !isModeThrough;
bool clipClampedDepth = !isModeThrough && gstate_c.Supports(GPU_SUPPORTS_DEPTH_CLAMP) && gstate_c.Supports(GPU_SUPPORTS_CLIP_DISTANCE);
const char *vertexRangeClipSuffix = "[0]";
if (vertexRangeCulling && clipClampedDepth)
vertexRangeClipSuffix = "[2]";
const char *clipClampedDepthSuffix = "[0]";
const char *vertexRangeClipSuffix = clipClampedDepth ? "[1]" : "[0]";
if (compat.shaderLanguage == GLSL_VULKAN) {
WRITE(p, "\n");
@ -419,12 +418,12 @@ bool GenerateVertexShader(const VShaderID &id, char *buffer, const ShaderLanguag
WRITE(p, " vec4 gl_Position : SV_Position;\n");
bool clipRange = vertexRangeCulling && gstate_c.Supports(GPU_SUPPORTS_CLIP_DISTANCE);
if (clipClampedDepth && clipRange) {
WRITE(p, " float3 gl_ClipDistance : SV_ClipDistance;\n");
vertexRangeClipSuffix = ".z";
} else if (clipClampedDepth) {
WRITE(p, " float2 gl_ClipDistance : SV_ClipDistance;\n");
} else if (clipRange) {
clipClampedDepthSuffix = ".x";
vertexRangeClipSuffix = ".y";
} else if (clipClampedDepth || clipRange) {
WRITE(p, " float gl_ClipDistance : SV_ClipDistance;\n");
clipClampedDepthSuffix = "";
vertexRangeClipSuffix = "";
}
if (vertexRangeCulling && gstate_c.Supports(GPU_SUPPORTS_CULL_DISTANCE)) {
@ -1267,28 +1266,21 @@ bool GenerateVertexShader(const VShaderID &id, char *buffer, const ShaderLanguag
}
if (clipClampedDepth) {
const char *clip0 = compat.shaderLanguage == HLSL_D3D11 ? ".x" : "[0]";
const char *clip1 = compat.shaderLanguage == HLSL_D3D11 ? ".y" : "[1]";
// This should clip against minz, but only when it's above zero.
if (ShaderLanguageIsOpenGL(compat.shaderLanguage)) {
// On OpenGL/GLES, these values account for the -1 -> 1 range.
WRITE(p, " if (u_depthRange.y - u_depthRange.x >= 1.0) {\n");
WRITE(p, " %sgl_ClipDistance%s = outPos.w + outPos.z;\n", compat.vsOutPrefix, clip0);
WRITE(p, " %sgl_ClipDistance%s = outPos.w + outPos.z;\n", compat.vsOutPrefix, clipClampedDepthSuffix);
} else {
// Everywhere else, it's 0 -> 1, simpler.
WRITE(p, " if (u_depthRange.y >= 1.0) {\n");
WRITE(p, " %sgl_ClipDistance%s = outPos.z;\n", compat.vsOutPrefix, clip0);
WRITE(p, " %sgl_ClipDistance%s = outPos.z;\n", compat.vsOutPrefix, clipClampedDepthSuffix);
}
WRITE(p, " } else {\n");
WRITE(p, " %sgl_ClipDistance%s = 0.0;\n", compat.vsOutPrefix, clip0);
WRITE(p, " }\n");
// This is similar, but for maxz when it's below 65535.0. -1/0 don't matter here.
WRITE(p, " if (u_depthRange.x + u_depthRange.y <= 65534.0) {\n");
WRITE(p, " %sgl_ClipDistance%s = outPos.w - outPos.z;\n", compat.vsOutPrefix, clip1);
WRITE(p, " } else if (u_depthRange.x + u_depthRange.y <= 65534.0) {\n");
WRITE(p, " %sgl_ClipDistance%s = outPos.w - outPos.z;\n", compat.vsOutPrefix, clipClampedDepthSuffix);
WRITE(p, " } else {\n");
WRITE(p, " %sgl_ClipDistance%s = 0.0;\n", compat.vsOutPrefix, clip1);
WRITE(p, " %sgl_ClipDistance%s = 0.0;\n", compat.vsOutPrefix, clipClampedDepthSuffix);
WRITE(p, " }\n");
}

View File

@ -199,9 +199,8 @@ LinkedShader::LinkedShader(GLRenderManager *render, VShaderID VSID, Shader *vs,
flags.supportDualSource = (gstate_c.featureFlags & GPU_SUPPORTS_DUALSOURCE_BLEND) != 0;
if (!VSID.Bit(VS_BIT_IS_THROUGH) && gstate_c.Supports(GPU_SUPPORTS_DEPTH_CLAMP)) {
flags.useClipDistance0 = true;
flags.useClipDistance1 = true;
if (VSID.Bit(VS_BIT_VERTEX_RANGE_CULLING) && gstate_c.Supports(GPU_SUPPORTS_CLIP_DISTANCE))
flags.useClipDistance2 = true;
flags.useClipDistance1 = true;
} else if (VSID.Bit(VS_BIT_VERTEX_RANGE_CULLING) && gstate_c.Supports(GPU_SUPPORTS_CLIP_DISTANCE)) {
flags.useClipDistance0 = true;
}