GPU: Apply color test after doubling.

2025-01-31 13:52:21 +00:00 · 2018-09-09 20:09:48 -07:00 · 2018-09-09 20:09:48 -07:00 · 703181607e
commit 703181607e
parent 8cdead90f9
7 changed files with 70 additions and 35 deletions
--- a/GPU/Common/ShaderId.cpp
+++ b/GPU/Common/ShaderId.cpp
@ -233,7 +233,7 @@ void ComputeFragmentShaderID(ShaderID *id_out) {
 		bool enableFog = gstate.isFogEnabled() && !isModeThrough;
 		bool enableAlphaTest = gstate.isAlphaTestEnabled() && !IsAlphaTestTriviallyTrue();
 		bool enableColorTest = gstate.isColorTestEnabled() && !IsColorTestTriviallyTrue();
-		bool enableColorDoubling = gstate.isColorDoublingEnabled() && gstate.isTextureMapEnabled();
+		bool enableColorDoubling = gstate.isColorDoublingEnabled() && gstate.isTextureMapEnabled() && gstate.getTextureFunction() == GE_TEXFUNC_MODULATE;
 		bool doTextureProjection = (gstate.getUVGenMode() == GE_TEXMAP_TEXTURE_MATRIX && MatrixNeedsProjection(gstate.tgenMatrix));
 		bool doTextureAlpha = gstate.isTextureAlphaUsed();
 		bool doFlatShading = gstate.getShadeMode() == GE_SHADE_FLAT;
--- a/GPU/Directx9/PixelShaderGeneratorDX9.cpp
+++ b/GPU/Directx9/PixelShaderGeneratorDX9.cpp
@ -304,6 +304,11 @@ bool GenerateFragmentShaderHLSL(const FShaderID &id, char *buffer, ShaderLanguag
 			}
 		}
 		if (enableColorTest) {
+			// Color doubling happens before the color test, but we try to optimize doubling when test is off.
+			if (enableColorDoubling) {
+				WRITE(p, "  v.rgb = v.rgb * 2.0;\n");
+			}
+
 			if (colorTestAgainstZero) {
 				// When testing against 0 (common), we can avoid some math.
 				// 0.002 is approximately half of 1.0 / 255.0.
@ -322,14 +327,14 @@ bool GenerateFragmentShaderHLSL(const FShaderID &id, char *buffer, ShaderLanguag
 				if (colorTestFuncs[colorTestFunc][0] != '#') {
 					const char *test = colorTestFuncs[colorTestFunc];
 					if (lang == HLSL_D3D11) {
-						WRITE(p, "  uint3 v_scaled = roundAndScaleTo255iv(v.rgb);\n");
+						WRITE(p, "  uint3 v_scaled = roundAndScaleTo255iv(clamp(v.rgb, 0.0, 1.0));\n");
 						WRITE(p, "  uint3 v_masked = v_scaled & u_alphacolormask.rgb;\n");
 						WRITE(p, "  uint3 colorTestRef = u_alphacolorref.rgb & u_alphacolormask.rgb;\n");
 						// We have to test the components separately, or we get incorrect results.  See #10629.
 						WRITE(p, "  if (v_masked.r %s colorTestRef.r && v_masked.g %s colorTestRef.g && v_masked.b %s colorTestRef.b) discard;\n", test, test, test);
 					} else {
 						// TODO: Use a texture to lookup bitwise ops instead?
-						WRITE(p, "  float3 colortest = roundAndScaleTo255v(v.rgb);\n");
+						WRITE(p, "  float3 colortest = roundAndScaleTo255v(clamp(v.rgb, 0.0, 1.0));\n");
 						WRITE(p, "  if ((colortest.r %s u_alphacolorref.r) && (colortest.g %s u_alphacolorref.g) && (colortest.b %s u_alphacolorref.b)) clip(-1);\n", test, test, test);
 					}
 				}
@ -337,13 +342,17 @@ bool GenerateFragmentShaderHLSL(const FShaderID &id, char *buffer, ShaderLanguag
 					WRITE(p, lang == HLSL_DX9 ? "  clip(-1);\n" : "  discard;\n");
 				}
 			}
-		}

-		// Color doubling happens after the color test.
-		if (enableColorDoubling && replaceBlend == REPLACE_BLEND_2X_SRC) {
-			WRITE(p, "  v.rgb = v.rgb * 4.0;\n");
-		} else if (enableColorDoubling || replaceBlend == REPLACE_BLEND_2X_SRC) {
-			WRITE(p, "  v.rgb = v.rgb * 2.0;\n");
+			if (replaceBlend == REPLACE_BLEND_2X_SRC) {
+				WRITE(p, "  v.rgb = v.rgb * 2.0;\n");
+			}
+		} else {
+			// If there's no color test, we can potentially double and replace blend at once.
+			if (enableColorDoubling && replaceBlend == REPLACE_BLEND_2X_SRC) {
+				WRITE(p, "  v.rgb = v.rgb * 4.0;\n");
+			} else if (enableColorDoubling || replaceBlend == REPLACE_BLEND_2X_SRC) {
+				WRITE(p, "  v.rgb = v.rgb * 2.0;\n");
+			}
 		}

 		if (enableFog) {
--- a/GPU/GLES/FragmentShaderGeneratorGLES.cpp
+++ b/GPU/GLES/FragmentShaderGeneratorGLES.cpp
@ -558,6 +558,14 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, uint64_t *uniform
 		}

 		if (enableColorTest) {
+			// Color doubling happens before the color test, but we try to optimize doubling when test is off.
+			if (enableColorDoubling) {
+				WRITE(p, "  v.rgb = v.rgb * 2.0;\n");
+				if (g_Config.bFragmentTestCache && !colorTestAgainstZero) {
+					WRITE(p, "  vScale256.rgb = vScale256.rgb * 2.0;\n");
+				}
+			}
+
 			if (colorTestAgainstZero) {
 				// When testing against 0 (common), we can avoid some math.
 				// 0.002 is approximately half of 1.0 / 255.0.
@ -576,7 +584,7 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, uint64_t *uniform
 				WRITE(p, "  float gResult = %s(testtex, vec2(vScale256.g, 0)).g;\n", texture);
 				WRITE(p, "  float bResult = %s(testtex, vec2(vScale256.b, 0)).b;\n", texture);
 				if (colorTestFunc == GE_COMP_EQUAL) {
-					// Equal means all parts must be equal.
+					// Equal means all parts must be equal (so discard if any is not.)
 					WRITE(p, "  if (rResult < 0.5 || gResult < 0.5 || bResult < 0.5) %s\n", discardStatement);
 				} else {
 					// Not equal means any part must be not equal.
@ -587,7 +595,7 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, uint64_t *uniform
 				if (colorTestFuncs[colorTestFunc][0] != '#') {
 					if (bitwiseOps) {
 						// Apparently GLES3 does not support vector bitwise ops.
-						WRITE(p, "  ivec3 v_scaled = roundAndScaleTo255iv(v.rgb);\n");
+						WRITE(p, "  ivec3 v_scaled = roundAndScaleTo255iv(clamp(v.rgb, 0.0, 1.0));\n");
 						const char *maskedFragColor = "ivec3(v_scaled.r & u_alphacolormask.r, v_scaled.g & u_alphacolormask.g, v_scaled.b & u_alphacolormask.b)";
 						const char *maskedColorRef = "ivec3(int(u_alphacolorref.r) & u_alphacolormask.r, int(u_alphacolorref.g) & u_alphacolormask.g, int(u_alphacolorref.b) & u_alphacolormask.b)";
 						WRITE(p, "  if (%s %s %s) %s\n", maskedFragColor, colorTestFuncs[colorTestFunc], maskedColorRef, discardStatement);
@ -600,13 +608,17 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, uint64_t *uniform
 					WRITE(p, "  %s\n", discardStatement);
 				}
 			}
-		}

-		// Color doubling happens after the color test.
-		if (enableColorDoubling && replaceBlend == REPLACE_BLEND_2X_SRC) {
-			WRITE(p, "  v.rgb = v.rgb * 4.0;\n");
-		} else if (enableColorDoubling || replaceBlend == REPLACE_BLEND_2X_SRC) {
-			WRITE(p, "  v.rgb = v.rgb * 2.0;\n");
+			if (replaceBlend == REPLACE_BLEND_2X_SRC) {
+				WRITE(p, "  v.rgb = v.rgb * 2.0;\n");
+			}
+		} else {
+			// If there's no color test, we can potentially double and replace blend at once.
+			if (enableColorDoubling && replaceBlend == REPLACE_BLEND_2X_SRC) {
+				WRITE(p, "  v.rgb = v.rgb * 4.0;\n");
+			} else if (enableColorDoubling || replaceBlend == REPLACE_BLEND_2X_SRC) {
+				WRITE(p, "  v.rgb = v.rgb * 2.0;\n");
+			}
 		}

 		if (enableFog) {
--- a/GPU/GLES/FragmentTestCacheGLES.cpp
+++ b/GPU/GLES/FragmentTestCacheGLES.cpp
@ -81,6 +81,8 @@ void FragmentTestCacheGLES::BindTestTexture(int slot) {
 	GLRTexture *tex = CreateTestTexture(funcs, refs, masks, valid);
 	lastTexture_ = tex;
 	render_->BindTexture(slot, tex);
+	// We only need to do this once for the texture.
+	render_->SetTextureSampler(slot, GL_CLAMP_TO_EDGE, GL_CLAMP_TO_EDGE, GL_NEAREST, GL_NEAREST, 0.0f);
 	FragmentTestTexture item;
 	item.lastFrame = gpuStats.numFlips;
 	item.texture = tex;
--- a/GPU/Software/Rasterizer.cpp
+++ b/GPU/Software/Rasterizer.cpp
@ -622,7 +622,13 @@ static inline Vec4<int> GetTextureFunctionOutput(const Vec4<int>& prim_color, co
 		// We can be accurate up to 24 bit integers, should be enough.
 		const __m128 p = _mm_cvtepi32_ps(prim_color.ivec);
 		const __m128 t = _mm_cvtepi32_ps(texcolor.ivec);
-		out_rgb.ivec = _mm_cvtps_epi32(_mm_div_ps(_mm_mul_ps(p, t), _mm_set_ps1(255.0f)));
+		const __m128 b = _mm_mul_ps(p, t);
+		if (gstate.isColorDoublingEnabled()) {
+			// We double right here, only for modulate.  Other tex funcs do not color double.
+			out_rgb.ivec = _mm_cvtps_epi32(_mm_mul_ps(b, _mm_set_ps1(2.0f / 255.0f)));
+		} else {
+			out_rgb.ivec = _mm_cvtps_epi32(_mm_mul_ps(b, _mm_set_ps1(1.0f / 255.0f)));
+		}

 		if (rgba) {
 			return Vec4<int>(out_rgb.ivec);
@ -630,7 +636,11 @@ static inline Vec4<int> GetTextureFunctionOutput(const Vec4<int>& prim_color, co
 			out_a = prim_color.a();
 		}
 #else
-		out_rgb = prim_color.rgb() * texcolor.rgb() / 255;
+		if (gstate.isColorDoublingEnabled()) {
+			out_rgb = (prim_color.rgb() * texcolor.rgb() * 2) / 255;
+		} else {
+			out_rgb = prim_color.rgb() * texcolor.rgb() / 255;
+		}
 		out_a = (rgba) ? (prim_color.a() * texcolor.a() / 255) : prim_color.a();
 #endif
 		break;
@ -894,7 +904,7 @@ static inline Vec3<int> AlphaBlendingResult(const Vec4<int> &source, const Vec4<

 template <bool clearMode>
 inline void DrawSinglePixel(const DrawingCoords &p, u16 z, u8 fog, const Vec4<int> &color_in) {
-	Vec4<int> prim_color = color_in;
+	Vec4<int> prim_color = color_in.Clamp(0, 255);
 	// Depth range test - applied in clear mode, if not through mode.
 	if (!gstate.isModeThrough())
 		if (z < gstate.getDepthRangeMin() || z > gstate.getDepthRangeMax())
@ -935,14 +945,6 @@ inline void DrawSinglePixel(const DrawingCoords &p, u16 z, u8 fog, const Vec4<in
 		SetPixelDepth(p.x, p.y, z);
 	}

-	// Doubling happens only when texturing is enabled, and after tests.
-	if (gstate.isTextureMapEnabled() && gstate.isColorDoublingEnabled() && !clearMode) {
-		// TODO: Does this need to be clamped before blending?
-		prim_color.r() <<= 1;
-		prim_color.g() <<= 1;
-		prim_color.b() <<= 1;
-	}
-
 	if (gstate.isFogEnabled() && !gstate.isModeThrough() && !clearMode) {
 		Vec3<int> fogColor = Vec3<int>::FromRGB(gstate.fogcolor);
 		fogColor = (prim_color.rgb() * (int)fog + fogColor * (255 - (int)fog)) / 255;
--- a/GPU/Vulkan/FragmentShaderGeneratorVulkan.cpp
+++ b/GPU/Vulkan/FragmentShaderGeneratorVulkan.cpp
@ -375,6 +375,11 @@ bool GenerateVulkanGLSLFragmentShader(const FShaderID &id, char *buffer) {
 		}

 		if (enableColorTest) {
+			// Color doubling happens before the color test, but we try to optimize doubling when test is off.
+			if (enableColorDoubling) {
+				WRITE(p, "  v.rgb = v.rgb * 2.0;\n");
+			}
+
 			if (colorTestAgainstZero) {
 				// When testing against 0 (common), we can avoid some math.
 				// Have my doubts that this special case is actually worth it, but whatever.
@ -392,19 +397,23 @@ bool GenerateVulkanGLSLFragmentShader(const FShaderID &id, char *buffer) {
 			} else {
 				const char *colorTestFuncs[] = { "#", "#", " != ", " == " };
 				if (colorTestFuncs[colorTestFunc][0] != '#') {
-					WRITE(p, "  ivec3 v_scaled = roundAndScaleTo255iv(v.rgb);\n");
+					WRITE(p, "  ivec3 v_scaled = roundAndScaleTo255iv(clamp(v.rgb, 0.0, 1.0));\n");
 					WRITE(p, "  if ((v_scaled & base.alphacolormask.rgb) %s (base.alphacolorref.rgb & base.alphacolormask.rgb)) %s\n", colorTestFuncs[colorTestFunc], discardStatement);
 				} else {
 					WRITE(p, "  %s\n", discardStatement);
 				}
 			}
-		}

-		// Color doubling happens after the color test.
-		if (enableColorDoubling && replaceBlend == REPLACE_BLEND_2X_SRC) {
-			WRITE(p, "  v.rgb = v.rgb * 4.0;\n");
-		} else if (enableColorDoubling || replaceBlend == REPLACE_BLEND_2X_SRC) {
-			WRITE(p, "  v.rgb = v.rgb * 2.0;\n");
+			if (replaceBlend == REPLACE_BLEND_2X_SRC) {
+				WRITE(p, "  v.rgb = v.rgb * 2.0;\n");
+			}
+		} else {
+			// If there's no color test, we can potentially double and replace blend at once.
+			if (enableColorDoubling && replaceBlend == REPLACE_BLEND_2X_SRC) {
+				WRITE(p, "  v.rgb = v.rgb * 4.0;\n");
+			} else if (enableColorDoubling || replaceBlend == REPLACE_BLEND_2X_SRC) {
+				WRITE(p, "  v.rgb = v.rgb * 2.0;\n");
+			}
 		}

 		if (enableFog) {
--- a/headless/Headless.cpp
+++ b/headless/Headless.cpp
@ -373,6 +373,7 @@ int main(int argc, const char* argv[])
 	g_Config.iSplineBezierQuality = 2;
 	g_Config.bHighQualityDepth = true;
 	g_Config.bMemStickInserted = true;
+	g_Config.bFragmentTestCache = true;

 #ifdef _WIN32
 	InitSysDirectories();