gsdx glsl: extend hw shader to sample depth texture

Will use integral coordinate to avoid any rescaling. Bilinear interpolation isn't supported. I don't think it is allowed to filter a depth texture anyway.
2024-12-28 20:54:46 +00:00 · 2016-04-23 12:06:10 +02:00 · 2016-04-23 12:06:10 +02:00 · fda511a949
commit fda511a949
parent 583de1bf0b
4 changed files with 197 additions and 5 deletions
--- a/plugins/GSdx/GSDeviceOGL.cpp
+++ b/plugins/GSdx/GSDeviceOGL.cpp
@ -777,6 +777,7 @@ GLuint GSDeviceOGL::CompilePS(PSSelector sel)
 		+ format("#define PS_WMT %d\n", sel.wmt)
 		+ format("#define PS_TEX_FMT %d\n", sel.tex_fmt)
 		+ format("#define PS_DFMT %d\n", sel.dfmt)
+		+ format("#define PS_DEPTH_FMT %d\n", sel.depth_fmt)
 		+ format("#define PS_AEM %d\n", sel.aem)
 		+ format("#define PS_TFX %d\n", sel.tfx)
 		+ format("#define PS_TCC %d\n", sel.tcc)
--- a/plugins/GSdx/GSDeviceOGL.h
+++ b/plugins/GSdx/GSDeviceOGL.h
@ -247,6 +247,7 @@ class GSDeviceOGL final : public GSDevice
 				// Format
 				uint32 tex_fmt:4;
 				uint32 dfmt:2;
+				uint32 depth_fmt:2;
 				// Alpha extension/Correction
 				uint32 aem:1;
 				uint32 fba:1;
@ -270,7 +271,7 @@ class GSDeviceOGL final : public GSDevice
 				uint32 write_rg:1;
 				uint32 fbmask:1;

-				uint32 _free1:2;
+				//uint32 _free1:0;

 				// *** Word 2
 				// Blend and Colclip
--- a/plugins/GSdx/res/glsl/tfx_fs.glsl
+++ b/plugins/GSdx/res/glsl/tfx_fs.glsl
@ -175,6 +175,94 @@ mat4 sample_4p(vec4 u)
    return c;
 }

+//////////////////////////////////////////////////////////////////////
+// Depth sampling
+//////////////////////////////////////////////////////////////////////
+vec4 fetch_c(ivec2 uv)
+{
+    return texelFetch(TextureSampler, ivec2(uv), 0);
+}
+
+ivec2 clamp_wrap_uv_depth(ivec2 uv)
+{
+    ivec2 uv_out = uv;
+
+    // Keep the full precision
+    // It allow to multiply the ScalingFactor before the 1/16 coeff
+    ivec4 mask = ivec4(MskFix) << 4;
+
+#if PS_WMS == PS_WMT
+
+#if PS_WMS == 2
+    uv_out = clamp(uv, mask.xy, mask.zw);
+#elif PS_WMS == 3
+    uv_out = (uv & mask.xy) | mask.zw;
+#endif
+
+#else // PS_WMS != PS_WMT
+
+#if PS_WMS == 2
+    uv_out.x = clamp(uv, mask.x, mask.z);
+#elif PS_WMS == 3
+    uv_out.x = (uv.x & mask.x) | mask.z;
+#endif
+
+#if PS_WMT == 2
+    uv_out.y = clamp(uv, mask.y, mask.w);
+#elif PS_WMT == 3
+    uv_out.y = (uv.y & mask.y) | mask.w;
+#endif
+
+#endif
+
+    return uv_out;
+}
+
+vec4 sample_depth(vec2 st)
+{
+    vec2 uv_f = vec2(clamp_wrap_uv_depth(ivec2(st))) * vec2(ScalingFactor.xy) * vec2(1.0f/16.0f);
+    ivec2 uv = ivec2(uv_f);
+
+    vec4 t;
+#if PS_DEPTH_FMT == 1
+    // Based on ps_main11 of convert
+
+    // Convert a GL_FLOAT32 depth texture into a RGBA color texture
+    const vec4 bitSh = vec4(exp2(24.0f), exp2(16.0f), exp2(8.0f), exp2(0.0f));
+    const vec4 bitMsk = vec4(0.0, 1.0/256.0, 1.0/256.0, 1.0/256.0);
+
+    vec4 res = fract(vec4(fetch_c(uv).r) * bitSh);
+
+    t = (res - res.xxyz * bitMsk) * 256.0f;
+
+#elif PS_DEPTH_FMT == 2
+    // Based on ps_main12 of convert
+
+    // Convert a GL_FLOAT32 (only 16 lsb) depth into a RGB5A1 color texture
+    const vec4 bitSh = vec4(exp2(32.0f), exp2(27.0f), exp2(22.0f), exp2(17.0f));
+    const uvec4 bitMsk = uvec4(0x1F, 0x1F, 0x1F, 0x1);
+    uvec4 color = uvec4(vec4(fetch_c(uv).r) * bitSh) & bitMsk;
+
+    t = vec4(color) * vec4(8.0f, 8.0f, 8.0f, 128.0f);
+
+#elif PS_DEPTH_FMT == 3
+    // Convert a RGBA/RGB5A1 color texture into a RGBA/RGB5A1 color texture
+    t = fetch_c(uv) * 255.0f;
+
+#endif
+
+    // warning t ranges from 0 to 255
+#if (PS_AEM_FMT == FMT_24)
+    t.a = ( (PS_AEM == 0) || any(bvec3(t.rgb))  ) ? 255.0f * TA.x : 0.0f;
+#elif (PS_AEM_FMT == FMT_16)
+    t.a = t.a >= 128.0f ? 255.0f * TA.y : ( (PS_AEM == 0) || any(bvec3(t.rgb)) ) ? 255.0f * TA.x : 0.0f;
+#endif
+
+
+    return t;
+}
+//////////////////////////////////////////////////////////////////////
+
 vec4 sample_color(vec2 st)
 {
 #if (PS_TCOFFSETHACK == 1)
@ -328,10 +416,17 @@ vec4 ps_color()
 {
    //FIXME: maybe we can set gl_Position.w = q in VS
 #if (PS_FST == 0)
-    vec4 T = sample_color(PSin.t_float.xy / vec2(PSin.t_float.w));
+    vec2 st = PSin.t_float.xy / vec2(PSin.t_float.w);
 #else
    // Note xy are normalized coordinate
-    vec4 T = sample_color(PSin.t_int.xy);
+    vec2 st = PSin.t_int.xy;
+#endif
+
+#if (PS_DEPTH_FMT > 0)
+    // Integral coordinate
+    vec4 T = sample_depth(PSin.t_int.zw);
+#else
+    vec4 T = sample_color(st);
 #endif

 #if PS_IIP == 1
--- a/plugins/GSdx/res/glsl_source.h
+++ b/plugins/GSdx/res/glsl_source.h
@ -1019,6 +1019,94 @@ static const char* const tfx_fs_all_glsl =
 	"    return c;\n"
 	"}\n"
 	"\n"
+	"//////////////////////////////////////////////////////////////////////\n"
+	"// Depth sampling\n"
+	"//////////////////////////////////////////////////////////////////////\n"
+	"vec4 fetch_c(ivec2 uv)\n"
+	"{\n"
+	"    return texelFetch(TextureSampler, ivec2(uv), 0);\n"
+	"}\n"
+	"\n"
+	"ivec2 clamp_wrap_uv_depth(ivec2 uv)\n"
+	"{\n"
+	"    ivec2 uv_out = uv;\n"
+	"\n"
+	"    // Keep the full precision\n"
+	"    // It allow to multiply the ScalingFactor before the 1/16 coeff\n"
+	"    ivec4 mask = ivec4(MskFix) << 4;\n"
+	"\n"
+	"#if PS_WMS == PS_WMT\n"
+	"\n"
+	"#if PS_WMS == 2\n"
+	"    uv_out = clamp(uv, mask.xy, mask.zw);\n"
+	"#elif PS_WMS == 3\n"
+	"    uv_out = (uv & mask.xy) | mask.zw;\n"
+	"#endif\n"
+	"\n"
+	"#else // PS_WMS != PS_WMT\n"
+	"\n"
+	"#if PS_WMS == 2\n"
+	"    uv_out.x = clamp(uv, mask.x, mask.z);\n"
+	"#elif PS_WMS == 3\n"
+	"    uv_out.x = (uv.x & mask.x) | mask.z;\n"
+	"#endif\n"
+	"\n"
+	"#if PS_WMT == 2\n"
+	"    uv_out.y = clamp(uv, mask.y, mask.w);\n"
+	"#elif PS_WMT == 3\n"
+	"    uv_out.y = (uv.y & mask.y) | mask.w;\n"
+	"#endif\n"
+	"\n"
+	"#endif\n"
+	"\n"
+	"    return uv_out;\n"
+	"}\n"
+	"\n"
+	"vec4 sample_depth(vec2 st)\n"
+	"{\n"
+	"    vec2 uv_f = vec2(clamp_wrap_uv_depth(ivec2(st))) * vec2(ScalingFactor.xy) * vec2(1.0f/16.0f);\n"
+	"    ivec2 uv = ivec2(uv_f);\n"
+	"\n"
+	"    vec4 t;\n"
+	"#if PS_DEPTH_FMT == 1\n"
+	"    // Based on ps_main11 of convert\n"
+	"\n"
+	"    // Convert a GL_FLOAT32 depth texture into a RGBA color texture\n"
+	"    const vec4 bitSh = vec4(exp2(24.0f), exp2(16.0f), exp2(8.0f), exp2(0.0f));\n"
+	"    const vec4 bitMsk = vec4(0.0, 1.0/256.0, 1.0/256.0, 1.0/256.0);\n"
+	"\n"
+	"    vec4 res = fract(vec4(fetch_c(uv).r) * bitSh);\n"
+	"\n"
+	"    t = (res - res.xxyz * bitMsk) * 256.0f;\n"
+	"\n"
+	"#elif PS_DEPTH_FMT == 2\n"
+	"    // Based on ps_main12 of convert\n"
+	"\n"
+	"    // Convert a GL_FLOAT32 (only 16 lsb) depth into a RGB5A1 color texture\n"
+	"    const vec4 bitSh = vec4(exp2(32.0f), exp2(27.0f), exp2(22.0f), exp2(17.0f));\n"
+	"    const uvec4 bitMsk = uvec4(0x1F, 0x1F, 0x1F, 0x1);\n"
+	"    uvec4 color = uvec4(vec4(fetch_c(uv).r) * bitSh) & bitMsk;\n"
+	"\n"
+	"    t = vec4(color) * vec4(8.0f, 8.0f, 8.0f, 128.0f);\n"
+	"\n"
+	"#elif PS_DEPTH_FMT == 3\n"
+	"    // Convert a RGBA/RGB5A1 color texture into a RGBA/RGB5A1 color texture\n"
+	"    t = fetch_c(uv) * 255.0f;\n"
+	"\n"
+	"#endif\n"
+	"\n"
+	"    // warning t ranges from 0 to 255\n"
+	"#if (PS_AEM_FMT == FMT_24)\n"
+	"    t.a = ( (PS_AEM == 0) || any(bvec3(t.rgb))  ) ? 255.0f * TA.x : 0.0f;\n"
+	"#elif (PS_AEM_FMT == FMT_16)\n"
+	"    t.a = t.a >= 128.0f ? 255.0f * TA.y : ( (PS_AEM == 0) || any(bvec3(t.rgb)) ) ? 255.0f * TA.x : 0.0f;\n"
+	"#endif\n"
+	"\n"
+	"\n"
+	"    return t;\n"
+	"}\n"
+	"//////////////////////////////////////////////////////////////////////\n"
+	"\n"
 	"vec4 sample_color(vec2 st)\n"
 	"{\n"
 	"#if (PS_TCOFFSETHACK == 1)\n"
@ -1172,10 +1260,17 @@ static const char* const tfx_fs_all_glsl =
 	"{\n"
 	"    //FIXME: maybe we can set gl_Position.w = q in VS\n"
 	"#if (PS_FST == 0)\n"
-	"    vec4 T = sample_color(PSin.t_float.xy / vec2(PSin.t_float.w));\n"
+	"    vec2 st = PSin.t_float.xy / vec2(PSin.t_float.w);\n"
 	"#else\n"
 	"    // Note xy are normalized coordinate\n"
-	"    vec4 T = sample_color(PSin.t_int.xy);\n"
+	"    vec2 st = PSin.t_int.xy;\n"
+	"#endif\n"
+	"\n"
+	"#if (PS_DEPTH_FMT > 0)\n"
+	"    // Integral coordinate\n"
+	"    vec4 T = sample_depth(PSin.t_int.zw);\n"
+	"#else\n"
+	"    vec4 T = sample_color(st);\n"
 	"#endif\n"
 	"\n"
 	"#if PS_IIP == 1\n"