mirror of
https://github.com/hrydgard/ppsspp.git
synced 2025-02-21 06:33:22 +00:00
Vulkan: Clip clamped depth in geometry shader.
This corrects deformed geometry on Mali devices which don't support user-space clipping but do support depth clamp.
This commit is contained in:
parent
5d88e50201
commit
3e5c09d432
@ -48,7 +48,7 @@ bool GenerateGeometryShader(const GShaderID &id, char *buffer, const ShaderLangu
|
||||
|
||||
ShaderWriter p(buffer, compat, ShaderStage::Geometry, gl_exts.data(), gl_exts.size());
|
||||
p.C("layout(triangles) in;\n");
|
||||
p.C("layout(triangle_strip, max_vertices = 6) out;\n");
|
||||
p.C("layout(triangle_strip, max_vertices = 12) out;\n");
|
||||
|
||||
if (compat.shaderLanguage == GLSL_VULKAN) {
|
||||
WRITE(p, "\n");
|
||||
@ -58,6 +58,8 @@ bool GenerateGeometryShader(const GShaderID &id, char *buffer, const ShaderLangu
|
||||
}
|
||||
|
||||
std::vector<VaryingDef> varyings, outVaryings;
|
||||
bool vertexRangeCulling = !id.Bit(GS_BIT_CURVE);
|
||||
bool clipClampedDepth = gstate_c.Supports(GPU_SUPPORTS_DEPTH_CLAMP) && !gstate_c.Supports(GPU_SUPPORTS_CLIP_DISTANCE);
|
||||
|
||||
if (id.Bit(GS_BIT_DO_TEXTURE)) {
|
||||
varyings.push_back(VaryingDef{ "vec3", "v_texcoord", Draw::SEM_TEXCOORD0, 0, "highp" });
|
||||
@ -74,53 +76,87 @@ bool GenerateGeometryShader(const GShaderID &id, char *buffer, const ShaderLangu
|
||||
|
||||
p.BeginGSMain(varyings, outVaryings);
|
||||
|
||||
// Apply culling
|
||||
p.C(" bool anyInside = false;\n");
|
||||
// Apply culling.
|
||||
if (vertexRangeCulling) {
|
||||
p.C(" bool anyInside = false;\n");
|
||||
}
|
||||
// And apply manual clipping if necessary.
|
||||
if (!gstate_c.Supports(GPU_SUPPORTS_CLIP_DISTANCE)) {
|
||||
p.C(" float clip0[3];\n");
|
||||
if (clipClampedDepth) {
|
||||
p.C(" float clip1[3];\n");
|
||||
}
|
||||
}
|
||||
|
||||
p.C(" for (int i = 0; i < 3; i++) {\n"); // TODO: 3 or gl_in.length()? which will be faster?
|
||||
p.C(" vec4 outPos = gl_in[i].gl_Position;\n");
|
||||
p.C(" vec3 projPos = outPos.xyz / outPos.w;\n");
|
||||
p.C(" float projZ = (projPos.z - u_depthRange.z) * u_depthRange.w;\n");
|
||||
// Vertex range culling doesn't happen when Z clips, note sign of w is important.
|
||||
p.C(" if (u_cullRangeMin.w <= 0.0 || projZ * outPos.w > -outPos.w) {\n");
|
||||
const char *outMin = "projPos.x < u_cullRangeMin.x || projPos.y < u_cullRangeMin.y";
|
||||
const char *outMax = "projPos.x > u_cullRangeMax.x || projPos.y > u_cullRangeMax.y";
|
||||
p.F(" if ((%s) || (%s)) {\n", outMin, outMax);
|
||||
p.C(" return;\n"); // Cull!
|
||||
p.C(" }\n");
|
||||
p.C(" }\n");
|
||||
p.C(" if (u_cullRangeMin.w <= 0.0) {\n");
|
||||
p.C(" if (projPos.z < u_cullRangeMin.z || projPos.z > u_cullRangeMax.z) {\n");
|
||||
// When not clamping depth, cull the triangle of Z is outside the valid range (not based on clip Z.)
|
||||
p.C(" return;\n");
|
||||
p.C(" }\n");
|
||||
p.C(" } else {\n");
|
||||
p.C(" if (projPos.z >= u_cullRangeMin.z) { anyInside = true; }\n");
|
||||
p.C(" if (projPos.z <= u_cullRangeMax.z) { anyInside = true; }\n");
|
||||
p.C(" }\n");
|
||||
|
||||
if (vertexRangeCulling) {
|
||||
p.C(" float projZ = (projPos.z - u_depthRange.z) * u_depthRange.w;\n");
|
||||
// Vertex range culling doesn't happen when Z clips, note sign of w is important.
|
||||
p.C(" if (u_cullRangeMin.w <= 0.0 || projZ * outPos.w > -outPos.w) {\n");
|
||||
const char *outMin = "projPos.x < u_cullRangeMin.x || projPos.y < u_cullRangeMin.y";
|
||||
const char *outMax = "projPos.x > u_cullRangeMax.x || projPos.y > u_cullRangeMax.y";
|
||||
p.F(" if ((%s) || (%s)) {\n", outMin, outMax);
|
||||
p.C(" return;\n"); // Cull!
|
||||
p.C(" }\n");
|
||||
p.C(" }\n");
|
||||
p.C(" if (u_cullRangeMin.w <= 0.0) {\n");
|
||||
p.C(" if (projPos.z < u_cullRangeMin.z || projPos.z > u_cullRangeMax.z) {\n");
|
||||
// When not clamping depth, cull the triangle of Z is outside the valid range (not based on clip Z.)
|
||||
p.C(" return;\n");
|
||||
p.C(" }\n");
|
||||
p.C(" } else {\n");
|
||||
p.C(" if (projPos.z >= u_cullRangeMin.z) { anyInside = true; }\n");
|
||||
p.C(" if (projPos.z <= u_cullRangeMax.z) { anyInside = true; }\n");
|
||||
p.C(" }\n");
|
||||
}
|
||||
|
||||
if (!gstate_c.Supports(GPU_SUPPORTS_CLIP_DISTANCE)) {
|
||||
// This is basically the same value as gl_ClipDistance would take, z + w.
|
||||
// TODO: Ignore triangles from GE_PRIM_RECTANGLES in transform mode, which should not clip to neg z.
|
||||
p.F(" clip0[i] = projZ * outPos.w + outPos.w;\n");
|
||||
if (vertexRangeCulling) {
|
||||
p.C(" clip0[i] = projZ * outPos.w + outPos.w;\n");
|
||||
} else {
|
||||
// Let's not complicate the code overly for this case. We'll clipClampedDepth.
|
||||
p.C(" clip0[i] = 0.0;\n");
|
||||
}
|
||||
|
||||
// This one does happen for rectangles.
|
||||
if (clipClampedDepth) {
|
||||
if (ShaderLanguageIsOpenGL(compat.shaderLanguage)) {
|
||||
// On OpenGL/GLES, these values account for the -1 -> 1 range.
|
||||
p.C(" if (u_depthRange.y - u_depthRange.x >= 1.0) {\n");
|
||||
p.C(" clip1[i] = outPos.w + outPos.z;\n");
|
||||
} else {
|
||||
// Everywhere else, it's 0 -> 1, simpler.
|
||||
p.C(" if (u_depthRange.y >= 1.0) {\n");
|
||||
p.C(" clip1[i] = outPos.z;\n");
|
||||
}
|
||||
// This is similar, but for maxz when it's below 65535.0. -1/0 don't matter here.
|
||||
p.C(" } else if (u_depthRange.x + u_depthRange.y <= 65534.0) {\n");
|
||||
p.C(" clip1[i] = outPos.w - outPos.z;\n");
|
||||
p.C(" } else {\n");
|
||||
p.C(" clip1[i] = 0.0;\n");
|
||||
p.C(" }\n");
|
||||
}
|
||||
}
|
||||
|
||||
p.C(" } // for\n");
|
||||
|
||||
// Cull any triangle fully outside in the same direction when depth clamp enabled.
|
||||
// Basically simulate cull distances.
|
||||
p.C(" if (u_cullRangeMin.w > 0.0 && !anyInside) {\n");
|
||||
p.C(" return;\n");
|
||||
p.C(" }\n");
|
||||
if (vertexRangeCulling) {
|
||||
p.C(" if (u_cullRangeMin.w > 0.0 && !anyInside) {\n");
|
||||
p.C(" return;\n");
|
||||
p.C(" }\n");
|
||||
}
|
||||
|
||||
if (!gstate_c.Supports(GPU_SUPPORTS_CLIP_DISTANCE)) {
|
||||
// Clipping against one half-space cuts a triangle (17/27), culls (7/27), or creates two triangles (3/27).
|
||||
p.C(" int indices[4];\n");
|
||||
p.C(" float factors[4];\n");
|
||||
// We clip against two, so we can generate up to 4 triangles, a polygon with 6 points.
|
||||
p.C(" int indices[6];\n");
|
||||
p.C(" float factors[6];\n");
|
||||
p.C(" int ind = 0;\n");
|
||||
|
||||
// Pass 1 - clip against first half-space.
|
||||
@ -143,22 +179,80 @@ bool GenerateGeometryShader(const GShaderID &id, char *buffer, const ShaderLangu
|
||||
|
||||
p.C(" }\n");
|
||||
|
||||
// Pass 2 - further clip against clamped Z.
|
||||
if (clipClampedDepth) {
|
||||
p.C(" int count0 = ind;\n");
|
||||
p.C(" int indices1[6];\n");
|
||||
p.C(" float factors1[6];\n");
|
||||
p.C(" ind = 0;\n");
|
||||
|
||||
// Let's start by interpolating the clip values.
|
||||
p.C(" float clip1after[4];\n");
|
||||
p.C(" for (int i = 0; i < count0; i++) {\n");
|
||||
p.C(" int idx = indices[i];\n");
|
||||
p.C(" float factor = factors[i];\n");
|
||||
p.C(" int next = idx == 2 ? 0 : idx + 1;\n");
|
||||
p.C(" clip1after[i] = mix(clip1[idx], clip1[next], factor);\n");
|
||||
p.C(" }\n");
|
||||
|
||||
// Alright, now time to clip, again.
|
||||
p.C(" for (int i = 0; i < count0; i++) {\n");
|
||||
// First, use this vertex if it doesn't need clipping.
|
||||
p.C(" if (clip1after[i] >= 0.0) {\n");
|
||||
p.C(" indices1[ind] = i;\n");
|
||||
p.C(" factors1[ind] = 0.0;\n");
|
||||
p.C(" ind++;\n");
|
||||
p.C(" }\n");
|
||||
|
||||
// Next, we generate an interpolated vertex if signs differ.
|
||||
p.C(" int inext = i == count0 - 1 ? 0 : i + 1;\n");
|
||||
p.C(" if (clip1after[i] * clip1after[inext] < 0.0) {\n");
|
||||
p.C(" float t = clip1after[i] < 0.0 ? clip1after[i] / (clip1after[i] - clip1after[inext]) : 1.0 - (clip1after[inext] / (clip1after[inext] - clip1after[i]));\n");
|
||||
p.C(" indices1[ind] = i;\n");
|
||||
p.C(" factors1[ind] = t;\n");
|
||||
p.C(" ind++;\n");
|
||||
p.C(" }\n");
|
||||
|
||||
p.C(" }\n");
|
||||
}
|
||||
|
||||
p.C(" if (ind < 3) {\n");
|
||||
p.C(" return;\n");
|
||||
p.C(" }\n");
|
||||
|
||||
p.C(" int idx;\n");
|
||||
p.C(" int next;\n");
|
||||
p.C(" float factor;\n");
|
||||
|
||||
auto emitIndex = [&](const char *which) {
|
||||
if (clipClampedDepth) {
|
||||
// We have to interpolate between four vertices.
|
||||
p.F(" idx = indices1[%s];\n", which);
|
||||
p.F(" factor = factors1[%s];\n", which);
|
||||
p.C(" next = idx == count0 - 1 ? 0 : idx + 1;\n");
|
||||
p.C(" gl_Position = mix(mix(gl_in[indices[idx]].gl_Position, gl_in[(indices[idx] + 1) % 3].gl_Position, factors[idx]), mix(gl_in[indices[next]].gl_Position, gl_in[(indices[next] + 1) % 3].gl_Position, factors[next]), factor);\n");
|
||||
for (size_t i = 0; i < varyings.size(); i++) {
|
||||
const VaryingDef &in = varyings[i];
|
||||
const VaryingDef &out = outVaryings[i];
|
||||
p.F(" %s = mix(mix(%s[indices[idx]], %s[(indices[idx] + 1) % 3], factors[idx]), mix(%s[indices[next]], %s[(indices[next] + 1) % 3], factors[next]), factor);\n", out.name, in.name, in.name, in.name, in.name);
|
||||
}
|
||||
} else {
|
||||
p.F(" idx = indices[%s];\n", which);
|
||||
p.F(" factor = factors[%s];\n", which);
|
||||
p.C(" next = idx == 2 ? 0 : idx + 1;\n");
|
||||
p.C(" gl_Position = mix(gl_in[idx].gl_Position, gl_in[next].gl_Position, factor);\n");
|
||||
for (size_t i = 0; i < varyings.size(); i++) {
|
||||
const VaryingDef &in = varyings[i];
|
||||
const VaryingDef &out = outVaryings[i];
|
||||
p.F(" %s = mix(%s[idx], %s[next], factor);\n", out.name, in.name, in.name);
|
||||
}
|
||||
}
|
||||
p.C(" EmitVertex();\n");
|
||||
};
|
||||
|
||||
// Alright, time to actually emit the first triangle.
|
||||
p.C(" for (int i = 0; i < 3; i++) {\n");
|
||||
p.C(" int idx = indices[i];\n");
|
||||
p.C(" float factor = factors[i];\n");
|
||||
p.C(" int next = idx == 2 ? 0 : idx + 1;\n");
|
||||
p.C(" gl_Position = mix(gl_in[idx].gl_Position, gl_in[next].gl_Position, factor);\n");
|
||||
for (size_t i = 0; i < varyings.size(); i++) {
|
||||
VaryingDef &in = varyings[i];
|
||||
VaryingDef &out = outVaryings[i];
|
||||
p.F(" %s = mix(%s[idx], %s[next], factor);\n", outVaryings[i].name, varyings[i].name, varyings[i].name);
|
||||
}
|
||||
p.C(" EmitVertex();\n");
|
||||
emitIndex("i");
|
||||
p.C(" }\n");
|
||||
|
||||
// Did we end up with additional triangles? We'll do three points each for the rest.
|
||||
@ -166,40 +260,13 @@ bool GenerateGeometryShader(const GShaderID &id, char *buffer, const ShaderLangu
|
||||
p.C(" EndPrimitive();\n");
|
||||
|
||||
// Point one, always index zero.
|
||||
p.C(" int idx = indices[0];\n");
|
||||
p.C(" float factor = factors[0];\n");
|
||||
p.C(" int next = idx == 2 ? 0 : idx + 1;\n");
|
||||
p.C(" gl_Position = mix(gl_in[idx].gl_Position, gl_in[next].gl_Position, factor);\n");
|
||||
for (size_t i = 0; i < varyings.size(); i++) {
|
||||
VaryingDef &in = varyings[i];
|
||||
VaryingDef &out = outVaryings[i];
|
||||
p.F(" %s = mix(%s[idx], %s[next], factor);\n", outVaryings[i].name, varyings[i].name, varyings[i].name);
|
||||
}
|
||||
p.C(" EmitVertex();\n");
|
||||
emitIndex("0");
|
||||
|
||||
// After that, one less than i (basically a triangle fan.)
|
||||
p.C(" idx = indices[i - 1];\n");
|
||||
p.C(" factor = factors[i - 1];\n");
|
||||
p.C(" next = idx == 2 ? 0 : idx + 1;\n");
|
||||
p.C(" gl_Position = mix(gl_in[idx].gl_Position, gl_in[next].gl_Position, factor);\n");
|
||||
for (size_t i = 0; i < varyings.size(); i++) {
|
||||
VaryingDef &in = varyings[i];
|
||||
VaryingDef &out = outVaryings[i];
|
||||
p.F(" %s = mix(%s[idx], %s[next], factor);\n", outVaryings[i].name, varyings[i].name, varyings[i].name);
|
||||
}
|
||||
p.C(" EmitVertex();\n");
|
||||
emitIndex("(i - 1)");
|
||||
|
||||
// And the new vertex itself.
|
||||
p.C(" idx = indices[i];\n");
|
||||
p.C(" factor = factors[i];\n");
|
||||
p.C(" next = idx == 2 ? 0 : idx + 1;\n");
|
||||
p.C(" gl_Position = mix(gl_in[idx].gl_Position, gl_in[next].gl_Position, factor);\n");
|
||||
for (size_t i = 0; i < varyings.size(); i++) {
|
||||
VaryingDef &in = varyings[i];
|
||||
VaryingDef &out = outVaryings[i];
|
||||
p.F(" %s = mix(%s[idx], %s[next], factor);\n", outVaryings[i].name, varyings[i].name, varyings[i].name);
|
||||
}
|
||||
p.C(" EmitVertex();\n");
|
||||
emitIndex("i");
|
||||
|
||||
p.C(" }\n");
|
||||
} else {
|
||||
@ -209,16 +276,16 @@ bool GenerateGeometryShader(const GShaderID &id, char *buffer, const ShaderLangu
|
||||
p.C(" vec4 outPos = gl_in[i].gl_Position;\n");
|
||||
p.C(" vec3 projPos = outPos.xyz / outPos.w;\n");
|
||||
p.C(" float projZ = (projPos.z - u_depthRange.z) * u_depthRange.w;\n");
|
||||
// TODO: Ignore triangles from GE_PRIM_RECTANGLES in transform mode, which should not clip to neg z.
|
||||
// We shouldn't need to worry about rectangles-as-triangles here, since we don't use geometry shaders for that.
|
||||
p.F(" gl_ClipDistance%s = projZ * outPos.w + outPos.w;\n", clipSuffix0);
|
||||
p.C(" gl_Position = outPos;\n");
|
||||
if (gstate_c.Supports(GPU_SUPPORTS_CLIP_DISTANCE)) {
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < varyings.size(); i++) {
|
||||
VaryingDef &in = varyings[i];
|
||||
VaryingDef &out = outVaryings[i];
|
||||
p.F(" %s = %s[i];\n", outVaryings[i].name, varyings[i].name);
|
||||
const VaryingDef &in = varyings[i];
|
||||
const VaryingDef &out = outVaryings[i];
|
||||
p.F(" %s = %s[i];\n", out.name, in.name);
|
||||
}
|
||||
// Debug - null the red channel
|
||||
//p.C(" if (i == 0) v_color0Out.x = 0.0;\n");
|
||||
|
@ -379,22 +379,27 @@ std::string GeometryShaderDesc(const GShaderID &id) {
|
||||
void ComputeGeometryShaderID(GShaderID *id_out, const Draw::Bugs &bugs, int prim) {
|
||||
GShaderID id;
|
||||
|
||||
bool vertexRangeCulling =
|
||||
!gstate.isModeThrough() && gstate_c.submitType == SubmitType::DRAW; // neither hw nor sw spline/bezier. See #11692
|
||||
bool isModeThrough = gstate.isModeThrough();
|
||||
bool isCurve = gstate_c.submitType != SubmitType::DRAW;
|
||||
bool isTriangle = prim == GE_PRIM_TRIANGLES || prim == GE_PRIM_TRIANGLE_FAN || prim == GE_PRIM_TRIANGLE_STRIP;
|
||||
|
||||
bool vertexRangeCulling = !isCurve;
|
||||
bool clipClampedDepth = gstate_c.Supports(GPU_SUPPORTS_DEPTH_CLAMP) && !gstate_c.Supports(GPU_SUPPORTS_CLIP_DISTANCE);
|
||||
|
||||
// If we're not using GS culling, return a zero ID.
|
||||
// Also, only use this for triangle primitives.
|
||||
if (!vertexRangeCulling || !gstate_c.Supports(GPU_SUPPORTS_GS_CULLING) || (prim != GE_PRIM_TRIANGLES && prim != GE_PRIM_TRIANGLE_FAN && prim != GE_PRIM_TRIANGLE_STRIP)) {
|
||||
if ((!vertexRangeCulling && !clipClampedDepth) || isModeThrough || !isTriangle || !gstate_c.Supports(GPU_SUPPORTS_GS_CULLING)) {
|
||||
*id_out = id;
|
||||
return;
|
||||
}
|
||||
|
||||
id.SetBit(GS_BIT_ENABLED, true);
|
||||
// Vertex range culling doesn't seem tno happen for spline/bezier, see #11692.
|
||||
id.SetBit(GS_BIT_CURVE, isCurve);
|
||||
|
||||
if (gstate.isModeClear()) {
|
||||
// No attribute bits.
|
||||
} else {
|
||||
bool isModeThrough = gstate.isModeThrough();
|
||||
bool lmode = gstate.isUsingSecondaryColor() && gstate.isLightingEnabled() && !isModeThrough;
|
||||
|
||||
id.SetBit(GS_BIT_LMODE, lmode);
|
||||
|
@ -109,6 +109,7 @@ enum GShaderBit : uint8_t {
|
||||
GS_BIT_ENABLED = 0, // If not set, we don't use a geo shader.
|
||||
GS_BIT_DO_TEXTURE = 1, // presence of texcoords
|
||||
GS_BIT_LMODE = 2, // presence of specular color (regular color always present)
|
||||
GS_BIT_CURVE = 3, // curve, which means don't do range culling.
|
||||
};
|
||||
|
||||
static inline GShaderBit operator +(GShaderBit bit, int i) {
|
||||
|
Loading…
x
Reference in New Issue
Block a user