Merge pull request #11379 from unknownbrackets/softgpu

Correct interactions between fog, doubling, and color testing
2024-11-27 15:30:35 +00:00 · 2018-09-10 09:23:11 +02:00 · 2018-09-10 09:23:11 +02:00 · e2a9f6acdd
commit e2a9f6acdd
parent ae73b8f45d 817b5d7c1c
8 changed files with 95 additions and 86 deletions
--- a/GPU/Common/ShaderId.cpp
+++ b/GPU/Common/ShaderId.cpp
@ -233,7 +233,7 @@ void ComputeFragmentShaderID(ShaderID *id_out) {
 		bool enableFog = gstate.isFogEnabled() && !isModeThrough;
 		bool enableAlphaTest = gstate.isAlphaTestEnabled() && !IsAlphaTestTriviallyTrue();
 		bool enableColorTest = gstate.isColorTestEnabled() && !IsColorTestTriviallyTrue();
-		bool enableColorDoubling = gstate.isColorDoublingEnabled() && gstate.isTextureMapEnabled();
+		bool enableColorDoubling = gstate.isColorDoublingEnabled() && gstate.isTextureMapEnabled() && gstate.getTextureFunction() == GE_TEXFUNC_MODULATE;
 		bool doTextureProjection = (gstate.getUVGenMode() == GE_TEXMAP_TEXTURE_MATRIX && MatrixNeedsProjection(gstate.tgenMatrix));
 		bool doTextureAlpha = gstate.isTextureAlphaUsed();
 		bool doFlatShading = gstate.getShadeMode() == GE_SHADE_FLAT;
--- a/GPU/Directx9/PixelShaderGeneratorDX9.cpp
+++ b/GPU/Directx9/PixelShaderGeneratorDX9.cpp
@ -268,6 +268,11 @@ bool GenerateFragmentShaderHLSL(const FShaderID &id, char *buffer, ShaderLanguag
 					WRITE(p, "  float4 v = p;\n"); break;
 				}
 			}
+
+			if (enableColorDoubling) {
+				// This happens before fog is applied.
+				WRITE(p, "  v.rgb = clamp(v.rgb * 2.0, 0.0, 1.0);\n");
+			}
 		} else {
 			// No texture mapping
 			WRITE(p, "  float4 v = In.v_color0 %s;\n", secondary);
@ -303,6 +308,12 @@ bool GenerateFragmentShaderHLSL(const FShaderID &id, char *buffer, ShaderLanguag
 				}
 			}
 		}
+
+		if (enableFog) {
+			WRITE(p, "  float fogCoef = clamp(In.v_fogdepth, 0.0, 1.0);\n");
+			WRITE(p, "  v = lerp(float4(u_fogcolor, v.a), v, fogCoef);\n");
+		}
+
 		if (enableColorTest) {
 			if (colorTestAgainstZero) {
 				// When testing against 0 (common), we can avoid some math.
@ -339,18 +350,10 @@ bool GenerateFragmentShaderHLSL(const FShaderID &id, char *buffer, ShaderLanguag
 			}
 		}

-		// Color doubling happens after the color test.
-		if (enableColorDoubling && replaceBlend == REPLACE_BLEND_2X_SRC) {
-			WRITE(p, "  v.rgb = v.rgb * 4.0;\n");
-		} else if (enableColorDoubling || replaceBlend == REPLACE_BLEND_2X_SRC) {
+		if (replaceBlend == REPLACE_BLEND_2X_SRC) {
 			WRITE(p, "  v.rgb = v.rgb * 2.0;\n");
 		}

-		if (enableFog) {
-			WRITE(p, "  float fogCoef = clamp(In.v_fogdepth, 0.0, 1.0);\n");
-			WRITE(p, "  v = lerp(float4(u_fogcolor, v.a), v, fogCoef);\n");
-		}
-
 		if (replaceBlend == REPLACE_BLEND_PRE_SRC || replaceBlend == REPLACE_BLEND_PRE_SRC_2X_ALPHA) {
 			const char *srcFactor = "ERROR";
 			switch (replaceBlendFuncA) {
--- a/GPU/GLES/FragmentShaderGeneratorGLES.cpp
+++ b/GPU/GLES/FragmentShaderGeneratorGLES.cpp
@ -500,11 +500,22 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, uint64_t *uniform
 					WRITE(p, "  vec4 v = p;\n"); break;
 				}
 			}
+
+			if (enableColorDoubling) {
+				// This happens before fog is applied.
+				WRITE(p, "  v.rgb = clamp(v.rgb * 2.0, 0.0, 1.0);\n");
+			}
 		} else {
 			// No texture mapping
 			WRITE(p, "  vec4 v = v_color0 %s;\n", secondary);
 		}

+		if (enableFog) {
+			WRITE(p, "  float fogCoef = clamp(v_fogdepth, 0.0, 1.0);\n");
+			WRITE(p, "  v = mix(vec4(u_fogcolor, v.a), v, fogCoef);\n");
+			// WRITE(p, "  v.x = v_depth;\n");
+		}
+
 		// Texture access is at half texels [0.5/256, 255.5/256], but colors are normalized [0, 255].
 		// So we have to scale to account for the difference.
 		std::string alphaTestXCoord = "0";
@ -576,7 +587,7 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, uint64_t *uniform
 				WRITE(p, "  float gResult = %s(testtex, vec2(vScale256.g, 0)).g;\n", texture);
 				WRITE(p, "  float bResult = %s(testtex, vec2(vScale256.b, 0)).b;\n", texture);
 				if (colorTestFunc == GE_COMP_EQUAL) {
-					// Equal means all parts must be equal.
+					// Equal means all parts must be equal (so discard if any is not.)
 					WRITE(p, "  if (rResult < 0.5 || gResult < 0.5 || bResult < 0.5) %s\n", discardStatement);
 				} else {
 					// Not equal means any part must be not equal.
@ -602,19 +613,10 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, uint64_t *uniform
 			}
 		}

-		// Color doubling happens after the color test.
-		if (enableColorDoubling && replaceBlend == REPLACE_BLEND_2X_SRC) {
-			WRITE(p, "  v.rgb = v.rgb * 4.0;\n");
-		} else if (enableColorDoubling || replaceBlend == REPLACE_BLEND_2X_SRC) {
+		if (replaceBlend == REPLACE_BLEND_2X_SRC) {
 			WRITE(p, "  v.rgb = v.rgb * 2.0;\n");
 		}

-		if (enableFog) {
-			WRITE(p, "  float fogCoef = clamp(v_fogdepth, 0.0, 1.0);\n");
-			WRITE(p, "  v = mix(vec4(u_fogcolor, v.a), v, fogCoef);\n");
-			// WRITE(p, "  v.x = v_depth;\n");
-		}
-
 		if (replaceBlend == REPLACE_BLEND_PRE_SRC || replaceBlend == REPLACE_BLEND_PRE_SRC_2X_ALPHA) {
 			const char *srcFactor = "ERROR";
 			switch (replaceBlendFuncA) {
--- a/GPU/GLES/FragmentTestCacheGLES.cpp
+++ b/GPU/GLES/FragmentTestCacheGLES.cpp
@ -81,6 +81,8 @@ void FragmentTestCacheGLES::BindTestTexture(int slot) {
 	GLRTexture *tex = CreateTestTexture(funcs, refs, masks, valid);
 	lastTexture_ = tex;
 	render_->BindTexture(slot, tex);
+	// We only need to do this once for the texture.
+	render_->SetTextureSampler(slot, GL_CLAMP_TO_EDGE, GL_CLAMP_TO_EDGE, GL_NEAREST, GL_NEAREST, 0.0f);
 	FragmentTestTexture item;
 	item.lastFrame = gpuStats.numFlips;
 	item.texture = tex;
--- a/GPU/Software/Clipper.cpp
+++ b/GPU/Software/Clipper.cpp
@ -157,7 +157,7 @@ void ProcessRect(const VertexData& v0, const VertexData& v1)
 		// Color and depth values of second vertex are used for the whole rectangle
 		buf[0].color0 = buf[1].color0 = buf[2].color0 = buf[3].color0;
 		buf[0].color1 = buf[1].color1 = buf[2].color1 = buf[3].color1;
-		buf[0].fogdepth = buf[1].fogdepth = buf[2].fogdepth = buf[3].fogdepth = 1.0f;
+		buf[0].fogdepth = buf[1].fogdepth = buf[2].fogdepth = buf[3].fogdepth;

 		VertexData* topleft = &buf[0];
 		VertexData* topright = &buf[1];
--- a/GPU/Software/Rasterizer.cpp
+++ b/GPU/Software/Rasterizer.cpp
@ -484,9 +484,8 @@ static inline bool StencilTestPassed(u8 stencil)
 	return true;
 }

-static inline u8 ApplyStencilOp(int op, int x, int y)
-{
-	u8 old_stencil = GetPixelStencil(x, y); // TODO: Apply mask?
+static inline u8 ApplyStencilOp(int op, u8 old_stencil) {
+	// TODO: Apply mask to reference or old stencil?
 	u8 reference_stencil = gstate.getStencilTestRef(); // TODO: Apply mask?

 	switch (op) {
@ -538,71 +537,71 @@ static inline u8 ApplyStencilOp(int op, int x, int y)
 	return old_stencil;
 }

-static inline u32 ApplyLogicOp(GELogicOp op, u32 old_color, u32 new_color)
-{
+static inline u32 ApplyLogicOp(GELogicOp op, u32 old_color, u32 new_color) {
+	// All of the operations here intentionally preserve alpha/stencil.
 	switch (op) {
 	case GE_LOGIC_CLEAR:
-		new_color = 0;
+		new_color &= 0xFF000000;
 		break;

 	case GE_LOGIC_AND:
-		new_color = new_color & old_color;
+		new_color = new_color & (old_color | 0xFF000000);
 		break;

 	case GE_LOGIC_AND_REVERSE:
-		new_color = new_color & ~old_color;
+		new_color = new_color & (~old_color | 0xFF000000);
 		break;

 	case GE_LOGIC_COPY:
-		//new_color = new_color;
+		// No change to new_color.
 		break;

 	case GE_LOGIC_AND_INVERTED:
-		new_color = ~new_color & old_color;
+		new_color = (~new_color & (old_color & 0x00FFFFFF)) | (new_color & 0xFF000000);
 		break;

 	case GE_LOGIC_NOOP:
-		new_color = old_color;
+		new_color = (old_color & 0x00FFFFFF) | (new_color & 0xFF000000);
 		break;

 	case GE_LOGIC_XOR:
-		new_color = new_color ^ old_color;
+		new_color = new_color ^ (old_color & 0x00FFFFFF);
 		break;

 	case GE_LOGIC_OR:
-		new_color = new_color | old_color;
+		new_color = new_color | (old_color & 0x00FFFFFF);
 		break;

 	case GE_LOGIC_NOR:
-		new_color = ~(new_color | old_color);
+		new_color = (~(new_color | old_color) & 0x00FFFFFF) | (new_color & 0xFF000000);
 		break;

 	case GE_LOGIC_EQUIV:
-		new_color = ~(new_color ^ old_color);
+		new_color = (~(new_color ^ old_color) & 0x00FFFFFF) | (new_color & 0xFF000000);
 		break;

 	case GE_LOGIC_INVERTED:
-		new_color = ~old_color;
+		new_color = (~old_color & 0x00FFFFFF) | (new_color & 0xFF000000);
 		break;

 	case GE_LOGIC_OR_REVERSE:
-		new_color = new_color | ~old_color;
+		new_color = new_color | (~old_color & 0x00FFFFFF);
 		break;

 	case GE_LOGIC_COPY_INVERTED:
-		new_color = ~new_color;
+		new_color = (~new_color & 0x00FFFFFF) | (new_color & 0xFF000000);
 		break;

 	case GE_LOGIC_OR_INVERTED:
-		new_color = ~new_color | old_color;
+		new_color = ((~new_color | old_color) & 0x00FFFFFF) | (new_color & 0xFF000000);
 		break;

 	case GE_LOGIC_NAND:
-		new_color = ~(new_color & old_color);
+		new_color = (~(new_color & old_color) & 0x00FFFFFF) | (new_color & 0xFF000000);
 		break;

 	case GE_LOGIC_SET:
-		new_color = 0xFFFFFFFF;
+		new_color |= 0x00FFFFFF;
 		break;
 	}

@ -623,7 +622,13 @@ static inline Vec4<int> GetTextureFunctionOutput(const Vec4<int>& prim_color, co
 		// We can be accurate up to 24 bit integers, should be enough.
 		const __m128 p = _mm_cvtepi32_ps(prim_color.ivec);
 		const __m128 t = _mm_cvtepi32_ps(texcolor.ivec);
-		out_rgb.ivec = _mm_cvtps_epi32(_mm_div_ps(_mm_mul_ps(p, t), _mm_set_ps1(255.0f)));
+		const __m128 b = _mm_mul_ps(p, t);
+		if (gstate.isColorDoublingEnabled()) {
+			// We double right here, only for modulate.  Other tex funcs do not color double.
+			out_rgb.ivec = _mm_cvtps_epi32(_mm_mul_ps(b, _mm_set_ps1(2.0f / 255.0f)));
+		} else {
+			out_rgb.ivec = _mm_cvtps_epi32(_mm_mul_ps(b, _mm_set_ps1(1.0f / 255.0f)));
+		}

 		if (rgba) {
 			return Vec4<int>(out_rgb.ivec);
@ -631,7 +636,11 @@ static inline Vec4<int> GetTextureFunctionOutput(const Vec4<int>& prim_color, co
 			out_a = prim_color.a();
 		}
 #else
-		out_rgb = prim_color.rgb() * texcolor.rgb() / 255;
+		if (gstate.isColorDoublingEnabled()) {
+			out_rgb = (prim_color.rgb() * texcolor.rgb() * 2) / 255;
+		} else {
+			out_rgb = prim_color.rgb() * texcolor.rgb() / 255;
+		}
 		out_a = (rgba) ? (prim_color.a() * texcolor.a() / 255) : prim_color.a();
 #endif
 		break;
@ -895,28 +904,34 @@ static inline Vec3<int> AlphaBlendingResult(const Vec4<int> &source, const Vec4<

 template <bool clearMode>
 inline void DrawSinglePixel(const DrawingCoords &p, u16 z, u8 fog, const Vec4<int> &color_in) {
-	Vec4<int> prim_color = color_in;
-	// Depth range test
-	// TODO: Clear mode?
+	Vec4<int> prim_color = color_in.Clamp(0, 255);
+	// Depth range test - applied in clear mode, if not through mode.
 	if (!gstate.isModeThrough())
 		if (z < gstate.getDepthRangeMin() || z > gstate.getDepthRangeMax())
 			return;

+	if (gstate.isAlphaTestEnabled() && !clearMode)
+		if (!AlphaTestPassed(prim_color.a()))
+			return;
+
+	// Fog is applied prior to color test.
+	if (gstate.isFogEnabled() && !gstate.isModeThrough() && !clearMode) {
+		Vec3<int> fogColor = Vec3<int>::FromRGB(gstate.fogcolor);
+		fogColor = (prim_color.rgb() * (int)fog + fogColor * (255 - (int)fog)) / 255;
+		prim_color.r() = fogColor.r();
+		prim_color.g() = fogColor.g();
+		prim_color.b() = fogColor.b();
+	}
+
 	if (gstate.isColorTestEnabled() && !clearMode)
 		if (!ColorTestPassed(prim_color.rgb()))
 			return;

-	// TODO: Does a need to be clamped?
-	if (gstate.isAlphaTestEnabled() && !clearMode)
-		if (!AlphaTestPassed(prim_color.a()))
-			return;
-
 	// In clear mode, it uses the alpha color as stencil.
 	u8 stencil = clearMode ? prim_color.a() : GetPixelStencil(p.x, p.y);
-	// TODO: Is it safe to ignore gstate.isDepthTestEnabled() when clear mode is enabled? Probably yes
 	if (!clearMode && (gstate.isStencilTestEnabled() || gstate.isDepthTestEnabled())) {
 		if (gstate.isStencilTestEnabled() && !StencilTestPassed(stencil)) {
-			stencil = ApplyStencilOp(gstate.getStencilOpSFail(), p.x, p.y);
+			stencil = ApplyStencilOp(gstate.getStencilOpSFail(), stencil);
 			SetPixelStencil(p.x, p.y, stencil);
 			return;
 		}
@ -924,12 +939,12 @@ inline void DrawSinglePixel(const DrawingCoords &p, u16 z, u8 fog, const Vec4<in
 		// Also apply depth at the same time.  If disabled, same as passing.
 		if (gstate.isDepthTestEnabled() && !DepthTestPassed(p.x, p.y, z)) {
 			if (gstate.isStencilTestEnabled()) {
-				stencil = ApplyStencilOp(gstate.getStencilOpZFail(), p.x, p.y);
+				stencil = ApplyStencilOp(gstate.getStencilOpZFail(), stencil);
 				SetPixelStencil(p.x, p.y, stencil);
 			}
 			return;
 		} else if (gstate.isStencilTestEnabled()) {
-			stencil = ApplyStencilOp(gstate.getStencilOpZPass(), p.x, p.y);
+			stencil = ApplyStencilOp(gstate.getStencilOpZPass(), stencil);
 		}

 		if (gstate.isDepthTestEnabled() && gstate.isDepthWriteEnabled()) {
@ -939,28 +954,12 @@ inline void DrawSinglePixel(const DrawingCoords &p, u16 z, u8 fog, const Vec4<in
 		SetPixelDepth(p.x, p.y, z);
 	}

-	// Doubling happens only when texturing is enabled, and after tests.
-	if (gstate.isTextureMapEnabled() && gstate.isColorDoublingEnabled() && !clearMode) {
-		// TODO: Does this need to be clamped before blending?
-		prim_color.r() <<= 1;
-		prim_color.g() <<= 1;
-		prim_color.b() <<= 1;
-	}
-
-	if (gstate.isFogEnabled() && !gstate.isModeThrough() && !clearMode) {
-		Vec3<int> fogColor = Vec3<int>::FromRGB(gstate.fogcolor);
-		fogColor = (prim_color.rgb() * (int)fog + fogColor * (255 - (int)fog)) / 255;
-		prim_color.r() = fogColor.r();
-		prim_color.g() = fogColor.g();
-		prim_color.b() = fogColor.b();
-	}
-
 	const u32 old_color = GetPixelColor(p.x, p.y);
 	u32 new_color;

 	if (gstate.isAlphaBlendEnabled() && !clearMode) {
 		const Vec4<int> dst = Vec4<int>::FromRGBA(old_color);
-		// ToRGBA() always automatically clamps.
+		// ToRGB() always automatically clamps.
 		new_color = AlphaBlendingResult(prim_color, dst).ToRGB();
 		new_color |= stencil << 24;
 	} else {
@ -974,8 +973,8 @@ inline void DrawSinglePixel(const DrawingCoords &p, u16 z, u8 fog, const Vec4<in

 	// Logic ops are applied after blending (if blending is enabled.)
 	if (gstate.isLogicOpEnabled() && !clearMode) {
-		// Logic ops don't affect stencil.
-		new_color = (stencil << 24) | (ApplyLogicOp(gstate.getLogicOp(), old_color, new_color) & 0x00FFFFFF);
+		// Logic ops don't affect stencil, which happens inside ApplyLogicOp.
+		new_color = ApplyLogicOp(gstate.getLogicOp(), old_color, new_color);
 	}

 	if (clearMode) {
--- a/GPU/Vulkan/FragmentShaderGeneratorVulkan.cpp
+++ b/GPU/Vulkan/FragmentShaderGeneratorVulkan.cpp
@ -339,6 +339,11 @@ bool GenerateVulkanGLSLFragmentShader(const FShaderID &id, char *buffer) {
 					WRITE(p, "  vec4 v = p;\n"); break;
 				}
 			}
+
+			if (enableColorDoubling) {
+				// This happens before fog is applied.
+				WRITE(p, "  v.rgb = clamp(v.rgb * 2.0, 0.0, 1.0);\n");
+			}
 		} else {
 			// No texture mapping
 			WRITE(p, "  vec4 v = v_color0 %s;\n", secondary);
@ -374,6 +379,12 @@ bool GenerateVulkanGLSLFragmentShader(const FShaderID &id, char *buffer) {
 			}
 		}

+		if (enableFog) {
+			WRITE(p, "  float fogCoef = clamp(v_fogdepth, 0.0, 1.0);\n");
+			WRITE(p, "  v = mix(vec4(base.fogcolor, v.a), v, fogCoef);\n");
+			// WRITE(p, "  v.x = v_depth;\n");
+		}
+
 		if (enableColorTest) {
 			if (colorTestAgainstZero) {
 				// When testing against 0 (common), we can avoid some math.
@ -400,19 +411,10 @@ bool GenerateVulkanGLSLFragmentShader(const FShaderID &id, char *buffer) {
 			}
 		}

-		// Color doubling happens after the color test.
-		if (enableColorDoubling && replaceBlend == REPLACE_BLEND_2X_SRC) {
-			WRITE(p, "  v.rgb = v.rgb * 4.0;\n");
-		} else if (enableColorDoubling || replaceBlend == REPLACE_BLEND_2X_SRC) {
+		if (replaceBlend == REPLACE_BLEND_2X_SRC) {
 			WRITE(p, "  v.rgb = v.rgb * 2.0;\n");
 		}

-		if (enableFog) {
-			WRITE(p, "  float fogCoef = clamp(v_fogdepth, 0.0, 1.0);\n");
-			WRITE(p, "  v = mix(vec4(base.fogcolor, v.a), v, fogCoef);\n");
-			// WRITE(p, "  v.x = v_depth;\n");
-		}
-
 		if (replaceBlend == REPLACE_BLEND_PRE_SRC || replaceBlend == REPLACE_BLEND_PRE_SRC_2X_ALPHA) {
 			const char *srcFactor = "ERROR";
 			switch (replaceBlendFuncA) {
--- a/headless/Headless.cpp
+++ b/headless/Headless.cpp
@ -373,6 +373,7 @@ int main(int argc, const char* argv[])
 	g_Config.iSplineBezierQuality = 2;
 	g_Config.bHighQualityDepth = true;
 	g_Config.bMemStickInserted = true;
+	g_Config.bFragmentTestCache = true;

 #ifdef _WIN32
 	InitSysDirectories();