Initial implementation of 32/16-bit color reinterpret blits.

2025-02-17 04:39:34 +00:00 · 2022-08-26 12:16:56 +02:00 · 2022-08-26 12:16:56 +02:00 · 3c5ec25f61
commit 3c5ec25f61
parent afb90f6b04
8 changed files with 102 additions and 44 deletions
--- a/Common/GPU/Shader.h
+++ b/Common/GPU/Shader.h
@ -85,6 +85,12 @@ struct UniformBufferDesc {
 	std::vector<UniformDesc> uniforms;
 };

+struct UniformDef {
+	const char *type;
+	const char *name;
+	int index;
+};
+
 struct SamplerDef {
 	const char *name;
 	// TODO: Might need unsigned samplers, 3d samplers, or other types in the future.
--- a/Common/GPU/ShaderWriter.h
+++ b/Common/GPU/ShaderWriter.h
@ -22,12 +22,6 @@ struct InputDef {
 	int semantic;
 };

-struct UniformDef {
-	const char *type;
-	const char *name;
-	int index;
-};
-
 struct VaryingDef {
 	const char *type;
 	const char *name;
--- a/GPU/Common/Draw2D.cpp
+++ b/GPU/Common/Draw2D.cpp
@ -40,7 +40,7 @@ static const SamplerDef samplers[1] = {
 	{ "tex" },
 };

-static const UniformDef uniforms[2] = {
+const UniformDef g_draw2Duniforms[2] = {
 	{ "vec2", "texSize", 0 },
 	{ "float", "scaleFactor", 1},
 };
@ -102,7 +102,7 @@ Draw2DPipelineInfo GenerateDraw2D565ToDepthFs(ShaderWriter &writer) {

 Draw2DPipelineInfo GenerateDraw2D565ToDepthDeswizzleFs(ShaderWriter &writer) {
 	writer.DeclareSamplers(samplers);
-	writer.BeginFSMain(uniforms, varyings, FSFLAG_WRITEDEPTH);
+	writer.BeginFSMain(g_draw2Duniforms, varyings, FSFLAG_WRITEDEPTH);
 	writer.C("  vec4 outColor = vec4(0.0, 0.0, 0.0, 0.0);\n");
 	// Unlike when just copying a depth buffer, here we're generating new depth values so we'll
 	// have to apply the scaling.
--- a/GPU/Common/Draw2D.h
+++ b/GPU/Common/Draw2D.h
@ -36,6 +36,8 @@ struct Draw2DPipelineInfo {
 	Slice<SamplerDef> samplers;
 };

+extern const UniformDef g_draw2Duniforms[2];
+
 struct Draw2DPipeline {
 	Draw::Pipeline *pipeline;
 	Draw2DPipelineInfo info;
--- a/GPU/Common/FramebufferManagerCommon.cpp
+++ b/GPU/Common/FramebufferManagerCommon.cpp
@ -725,33 +725,30 @@ void FramebufferManagerCommon::CopyToColorFromOverlappingFramebuffers(VirtualFra
 				}

 				if (IsBufferFormat16Bit(src->fb_format) && !IsBufferFormat16Bit(dst->fb_format)) {
-					WARN_LOG_N_TIMES(i16to32, 50, G3D, "16-bit to 32-bit reinterpret needed: %s to %s", GeBufferFormatToString(src->fb_format), GeBufferFormatToString(dst->fb_format));
 					// We halve the X coordinates in the destination framebuffer.
 					// The shader will collect two pixels worth of input data and merge into one.
 					dstX1 *= 0.5f;
 					dstX2 *= 0.5f;
 				} else if (!IsBufferFormat16Bit(src->fb_format) && IsBufferFormat16Bit(dst->fb_format)) {
-					WARN_LOG_N_TIMES(i32to16, 50, G3D, "32-bit to 16-bit reinterpret needed: %s to %s", GeBufferFormatToString(src->fb_format), GeBufferFormatToString(dst->fb_format));
 					// We double the X coordinates in the destination framebuffer.
 					// The shader will sample and depending on the X coordinate & 1, use the upper or lower bits.
 					dstX1 *= 2.0f;
 					dstX2 *= 2.0f;
 				}

-				if (IsBufferFormat16Bit(src->fb_format) && IsBufferFormat16Bit(dst->fb_format)) {
-					// Reinterpret!
-					WARN_LOG_N_TIMES(reint, 20, G3D, "Reinterpret detected from %08x_%s to %08x_%s",
-						src->fb_address, GeBufferFormatToString(src->fb_format),
-						dst->fb_address, GeBufferFormatToString(dst->fb_format));
-					pipeline = reinterpretFromTo_[(int)src->fb_format][(int)dst->fb_format];
-					pass_name = reinterpretStrings[(int)src->fb_format][(int)dst->fb_format];
-					if (!pipeline) {
-						pipeline = draw2D_.Create2DPipeline([=](ShaderWriter &shaderWriter) -> Draw2DPipelineInfo {
-							return GenerateReinterpretFragmentShader(shaderWriter, src->fb_format, dst->fb_format);
-						});
-						reinterpretFromTo_[(int)src->fb_format][(int)dst->fb_format] = pipeline;
-					}
+				// Reinterpret!
+				WARN_LOG_N_TIMES(reint, 20, G3D, "Reinterpret detected from %08x_%s to %08x_%s",
+					src->fb_address, GeBufferFormatToString(src->fb_format),
+					dst->fb_address, GeBufferFormatToString(dst->fb_format));
+				pipeline = reinterpretFromTo_[(int)src->fb_format][(int)dst->fb_format];
+				pass_name = reinterpretStrings[(int)src->fb_format][(int)dst->fb_format];
+				if (!pipeline) {
+					pipeline = draw2D_.Create2DPipeline([=](ShaderWriter &shaderWriter) -> Draw2DPipelineInfo {
+						return GenerateReinterpretFragmentShader(shaderWriter, src->fb_format, dst->fb_format);
+					});
+					reinterpretFromTo_[(int)src->fb_format][(int)dst->fb_format] = pipeline;
 				}
+
 				gpuStats.numReinterpretCopies++;
 			} else if (IsBufferFormat16Bit(src->fb_format) && IsBufferFormat16Bit(dst->fb_format)) {
 				// Fake reinterpret - just clear the way we always did on Vulkan. Just clear color and stencil.
@ -2575,8 +2572,8 @@ void FramebufferManagerCommon::DeviceLost() {

 	presentation_->DeviceLost();

-	for (int i = 0; i < 3; i++) {
-		for (int j = 0; j < 3; j++) {
+	for (int i = 0; i < ARRAY_SIZE(reinterpretFromTo_); i++) {
+		for (int j = 0; j < ARRAY_SIZE(reinterpretFromTo_); j++) {
 			DoRelease(reinterpretFromTo_[i][j]);
 		}
 	}
--- a/GPU/Common/FramebufferManagerCommon.h
+++ b/GPU/Common/FramebufferManagerCommon.h
@ -533,10 +533,10 @@ protected:
 		FBO_OLD_USAGE_FLAG = 15,
 	};

-	// Thin3D stuff for reinterpreting image data between the various 16-bit formats.
+	// Thin3D stuff for reinterpreting image data between the various 16-bit color formats.
 	// Safe, not optimal - there might be input attachment tricks, etc, but we can't use them
 	// since we don't want N different implementations.
-	Draw2DPipeline *reinterpretFromTo_[3][3]{};
+	Draw2DPipeline *reinterpretFromTo_[4][4]{};

 	// Common implementation of stencil buffer upload. Also not 100% optimal, but not performance
 	// critical either.
--- a/GPU/Common/ReinterpretFramebuffer.cpp
+++ b/GPU/Common/ReinterpretFramebuffer.cpp
@ -25,106 +25,161 @@ Draw2DPipelineInfo GenerateReinterpretFragmentShader(ShaderWriter &writer, GEBuf
 	writer.DeclareSamplers(samplers);

 	if (writer.Lang().bitwiseOps) {
-		writer.C("uint packColor(vec4 val) {\n");
 		switch (from) {
 		case GE_FORMAT_4444:
+			writer.C("uint packColor(vec4 val) {\n");
 			writer.C("  return uint(val.r * 15.99) | (uint(val.g * 15.99) << 4u) | (uint(val.b * 15.99) << 8u) | (uint(val.a * 15.99) << 12u);\n");
+			writer.C("}\n");
 			break;
 		case GE_FORMAT_5551:
+			writer.C("uint packColor(vec4 val) {\n");
 			writer.C("  uint color = uint(val.r * 31.99) | (uint(val.g * 31.99) << 5u) | (uint(val.b * 31.99) << 10u);\n");
 			writer.C("  if (val.a >= 0.5) color |= 0x8000U;\n");
 			writer.C("  return color;\n");
+			writer.C("}\n");
 			break;
 		case GE_FORMAT_565:
+			writer.C("uint packColor(vec4 val) {\n");
 			writer.C("  return uint(val.r * 31.99) | (uint(val.g * 63.99) << 5u) | (uint(val.b * 31.99) << 11u);\n");
+			writer.C("}\n");
+			break;
+		case GE_FORMAT_8888:
+			writer.C("uint packColor(vec2 val) {\n");
+			writer.C("  return uint(val.r * 255.99) | (uint(val.g * 255.99) << 8u);\n");
+			writer.C("}\n");
 			break;
 		default:
 			_assert_(false);
 			break;
 		}
-		writer.C("}\n");
 	} else {
 		// Floating point can comfortably represent integers up to 16 million, we only need 65536 since these textures are 16-bit.
-		writer.C("float packColor(vec4 val) {\n");
 		switch (from) {
 		case GE_FORMAT_4444:
+			writer.C("float packColor(vec4 val) {\n");
 			writer.C("  return (floor(val.r * 15.99) + floor(val.g * 15.99) * 16.0) + (floor(val.b * 15.99) * 256.0 + floor(val.a * 15.99) * 4096.0);\n");
+			writer.C("}\n");
 			break;
 		case GE_FORMAT_5551:
+			writer.C("float packColor(vec4 val) {\n");
 			writer.C("  float color = floor(val.r * 31.99) + floor(val.g * 31.99) * 32.0 + floor(val.b * 31.99) * 1024.0;\n");
 			writer.C("  if (val.a >= 0.5) color += 32768.0;\n");
 			writer.C("  return color;\n");
+			writer.C("}\n");
 			break;
 		case GE_FORMAT_565:
+			writer.C("float packColor(vec4 val) {\n");
 			writer.C("  return floor(val.r * 31.99) + floor(val.g * 63.99) * 32.0 + floor(val.b * 31.99) * 2048.0;\n");
+			writer.C("}\n");
+			break;
+		case GE_FORMAT_8888:
+			writer.C("float packColor(vec2 val) {\n");
+			writer.C("  return floor(val.r * 255.99) + floor(val.g * 255.99) * 256.0;\n");
+			writer.C("}\n");
 			break;
 		default:
 			_assert_(false);
 			break;
 		}
-		writer.C("}\n");
 	}

 	if (writer.Lang().bitwiseOps) {
-		writer.C("vec4 unpackColor(uint color) {\n");
 		switch (to) {
 		case GE_FORMAT_4444:
+			writer.C("vec4 unpackColor(uint color) {\n");
 			writer.C("  vec4 outColor = vec4(float(color & 0xFU), float((color >> 4u) & 0xFU), float((color >> 8u) & 0xFU), float((color >> 12u) & 0xFU));\n");
 			writer.C("  outColor *= 1.0 / 15.0;\n");
+			writer.C("  return outColor;\n");
+			writer.C("}\n");
 			break;
 		case GE_FORMAT_5551:
+			writer.C("vec4 unpackColor(uint color) {\n");
 			writer.C("  vec4 outColor = vec4(float(color & 0x1FU), float((color >> 5u) & 0x1FU), float((color >> 10u) & 0x1FU), 0.0);\n");
 			writer.C("  outColor.rgb *= 1.0 / 31.0;\n");
 			writer.C("  outColor.a = float(color >> 15);\n");
+			writer.C("  return outColor;\n");
+			writer.C("}\n");
 			break;
 		case GE_FORMAT_565:
+			writer.C("vec4 unpackColor(uint color) {\n");
 			writer.C("  vec4 outColor = vec4(float(color & 0x1FU), float((color >> 5u) & 0x3FU), float((color >> 11u) & 0x1FU), 1.0);\n");
 			writer.C("  outColor.rb *= 1.0 / 31.0;\n");
 			writer.C("  outColor.g *= 1.0 / 63.0;\n");
+			writer.C("  return outColor;\n");
+			writer.C("}\n");
+			break;
+		case GE_FORMAT_8888:
+			writer.C("vec4 unpackColor(uint colorLeft, uint colorRight) {\n");
+			writer.C("  vec4 outColor = vec4(float(colorLeft & 0xFFu),  float((colorLeft >> 8u)  & 0xFFu),\n");
+			writer.C("                       float(colorRight & 0xFFu), float((colorRight >> 8u) & 0xFFu));\n");
+			writer.C("  outColor *= 1.0 / 255.0;\n");
+			writer.C("  return outColor;\n");
+			writer.C("}\n");
 			break;
 		default:
 			_assert_(false);
 			break;
 		}
-		writer.C("  return outColor;\n");
-		writer.C("}\n");
 	} else {
-		writer.C("vec4 unpackColor(float val) {\n");
 		switch (to) {
 		case GE_FORMAT_4444:
+			writer.C("vec4 unpackColor(float val) {\n");
 			writer.C("  vec4 outColor = vec4(mod(floor(color), 16.0), mod(floor(color / 16.0), 16.0),");
 			writer.C("                       mod(floor(color / 256.0), 16.0), mod(floor(color / 4096.0), 16.0)); \n");
 			writer.C("  outColor *= 1.0 / 15.0;\n");
+			writer.C("  return outColor;\n");
+			writer.C("}\n");
 			break;
 		case GE_FORMAT_5551:
+			writer.C("vec4 unpackColor(float val) {\n");
 			writer.C("  vec4 outColor = vec4(mod(floor(color), 32.0), mod(floor(color / 32.0), 32.0), mod(floor(color / 1024.0), 32.0), 0.0);\n");
 			writer.C("  outColor.rgb *= 1.0 / 31.0;\n");
 			writer.C("  outColor.a = floor(color / 32768.0);\n");
+			writer.C("  return outColor;\n");
+			writer.C("}\n");
 			break;
 		case GE_FORMAT_565:
+			writer.C("vec4 unpackColor(float val) {\n");
 			writer.C("  vec4 outColor = vec4(mod(floor(color), 32.0), mod(floor(color / 32.0), 64.0), mod(floor(color / 2048.0), 32.0), 0.0);\n");
 			writer.C("  outColor.rb *= 1.0 / 31.0;\n");
 			writer.C("  outColor.g *= 1.0 / 63.0;\n");
 			writer.C("  outColor.a = 1.0;\n");
+			writer.C("  return outColor;\n");
+			writer.C("}\n");
+			break;
+		case GE_FORMAT_8888:
+			writer.C("vec4 unpackColor(float colorLeft, float colorRight) {\n");
+			writer.C("  vec4 outColor = vec4(mod(floor(colorLeft), 256.0), mod(floor(colorLeft / 256.0), 256.0),\n");
+			writer.C("                       mod(floor(colorRight), 256.0), mod(floor(colorRight / 256.0), 256.0));\n");
+			writer.C("  outColor *= 1.0 / 255.0;\n");
+			writer.C("  return outColor;\n");
+			writer.C("}\n");
 			break;
 		default:
 			_assert_(false);
 			break;
 		}
-		writer.C("  return outColor;\n");
-		writer.C("}\n");
 	}

-	writer.BeginFSMain(Slice<UniformDef>::empty(), varyings, FSFLAG_NONE);
-	writer.C("  vec4 val = ").SampleTexture2D("tex", "v_texcoord.xy").C(";\n");
+	writer.BeginFSMain(g_draw2Duniforms, varyings, FSFLAG_NONE);

 	if (IsBufferFormat16Bit(from) && IsBufferFormat16Bit(to)) {
-		if (writer.Lang().bitwiseOps) {
-			writer.C("uint color = packColor(val);\n");
-			writer.C("vec4 outColor = unpackColor(color);\n");
-		}
-	} else {
+		writer.C("  vec4 val = ").SampleTexture2D("tex", "v_texcoord.xy").C(";\n");
+		writer.C("  vec4 outColor = unpackColor(packColor(val));\n");
+	} else if (IsBufferFormat16Bit(from) && !IsBufferFormat16Bit(to)) {
+		// 16-to-32-bit (two pixels, draw size is halved)
+
+		writer.C("  vec4 valLeft = ").SampleTexture2D("tex", "v_texcoord.xy").C(";\n");
+		writer.C("  vec4 valRight = ").SampleTexture2D("tex", "v_texcoord.xy + vec2(0.5 / texSize.x, 0.0)").C(";\n");
+		writer.C("  vec4 outColor = unpackColor(packColor(valLeft), packColor(valRight));\n");
+
 		_assert_("not yet implemented");
+	} else if (!IsBufferFormat16Bit(from) && IsBufferFormat16Bit(to)) {
+		// 32-to-16-bit (half of the pixel, draw size is doubled).
+
+		writer.C("  vec4 val = ").SampleTexture2D("tex", "v_texcoord.xy").C(";\n");
+		writer.C("  float u = mod(floor(v_texcoord.x * texSize.x * 2.0), 2.0);\n");
+		writer.C("  vec4 outColor = unpackColor(u == 0.0 ? packColor(val.rg) : packColor(val.ba));\n");
 	}

 	writer.EndFSMain("outColor", FSFLAG_NONE);
--- a/assets/compat.ini
+++ b/assets/compat.ini
@ -1149,6 +1149,10 @@ ULES01441 = true
 ULJM05600 = true
 ULJM05775 = true

+# Spongebob - The Yellow Avenger (see #15898)
+ULUS10092 = true
+ULES00280 = true
+
 [ShaderColorBitmask]
 # No users right now, but keeping it around as a more accurate option than BlueToAlpha, for debugging mainly Outrun.