Combined two uniforms to get the base UBO down to 512b, in order to not waste space (nVidia needs 256-byte alignment)

2025-02-19 21:52:45 +00:00 · 2016-03-20 20:53:46 +01:00 · 2016-03-20 20:53:46 +01:00 · 827481d41d
commit 827481d41d
parent c33c3cf3d4
4 changed files with 24 additions and 19 deletions
--- a/GPU/Vulkan/FragmentShaderGeneratorVulkan.cpp
+++ b/GPU/Vulkan/FragmentShaderGeneratorVulkan.cpp
@ -398,7 +398,7 @@ bool GenerateVulkanGLSLFragmentShader(const ShaderID &id, char *buffer) {
 	if (stencilToAlpha != REPLACE_ALPHA_NO) {
 		switch (replaceAlphaWithStencilType) {
 		case STENCIL_VALUE_UNIFORM:
-			replacedAlpha = "base.stencilReplaceValue";
+			replacedAlpha = "base.fogcoef_stencilreplace.z";
 			break;

 		case STENCIL_VALUE_ZERO:
--- a/GPU/Vulkan/ShaderManagerVulkan.cpp
+++ b/GPU/Vulkan/ShaderManagerVulkan.cpp
@ -169,6 +169,10 @@ ShaderManagerVulkan::ShaderManagerVulkan(VulkanContext *vulkan)
 	memset(&ub_base, 0, sizeof(ub_base));
 	memset(&ub_lights, 0, sizeof(ub_lights));
 	memset(&ub_bones, 0, sizeof(ub_bones));
+
+	ILOG("sizeof(ub_base): %d", (int)sizeof(ub_base));
+	ILOG("sizeof(ub_lights): %d", (int)sizeof(ub_lights));
+	ILOG("sizeof(ub_bones): %d", (int)sizeof(ub_bones));
 }

 ShaderManagerVulkan::~ShaderManagerVulkan() {
@ -202,9 +206,6 @@ void ShaderManagerVulkan::BaseUpdateUniforms(int dirtyUniforms) {
 	if (dirtyUniforms & DIRTY_FOGCOLOR) {
 		Uint8x3ToFloat4(ub_base.fogColor, gstate.fogcolor);
 	}
-	if (dirtyUniforms & DIRTY_STENCILREPLACEVALUE) {
-		Uint8x1ToFloat4(ub_base.stencilReplace, gstate.getStencilTestRef());
-	}
 	if (dirtyUniforms & DIRTY_SHADERBLEND) {
 		Uint8x3ToFloat4(ub_base.blendFixA, gstate.getFixA());
 		Uint8x3ToFloat4(ub_base.blendFixB, gstate.getFixB());
@ -270,28 +271,31 @@ void ShaderManagerVulkan::BaseUpdateUniforms(int dirtyUniforms) {
 	if (dirtyUniforms & DIRTY_TEXMATRIX) {
 		ConvertMatrix4x3To4x4(ub_base.tex, gstate.tgenMatrix);
 	}
-	if (dirtyUniforms & DIRTY_FOGCOEF) {
-		float fogcoef[2] = {
+
+	// Combined two small uniforms
+	if (dirtyUniforms & (DIRTY_FOGCOEF | DIRTY_STENCILREPLACEVALUE)) {
+		float fogcoef_stencil[3] = {
 			getFloat24(gstate.fog1),
 			getFloat24(gstate.fog2),
+			(float)gstate.getStencilTestRef()
 		};
-		if (my_isinf(fogcoef[1])) {
+		if (my_isinf(fogcoef_stencil[1])) {
 			// not really sure what a sensible value might be.
-			fogcoef[1] = fogcoef[1] < 0.0f ? -10000.0f : 10000.0f;
-		} else if (my_isnan(fogcoef[1])) {
+			fogcoef_stencil[1] = fogcoef_stencil[1] < 0.0f ? -10000.0f : 10000.0f;
+		} else if (my_isnan(fogcoef_stencil[1])) {
 			// Workaround for https://github.com/hrydgard/ppsspp/issues/5384#issuecomment-38365988
 			// Just put the fog far away at a large finite distance.
 			// Infinities and NaNs are rather unpredictable in shaders on many GPUs
 			// so it's best to just make it a sane calculation.
-			fogcoef[0] = 100000.0f;
-			fogcoef[1] = 1.0f;
+			fogcoef_stencil[0] = 100000.0f;
+			fogcoef_stencil[1] = 1.0f;
 		}
 #ifndef MOBILE_DEVICE
-		else if (my_isnanorinf(fogcoef[1]) || my_isnanorinf(fogcoef[0])) {
-			ERROR_LOG_REPORT_ONCE(fognan, G3D, "Unhandled fog NaN/INF combo: %f %f", fogcoef[0], fogcoef[1]);
+		else if (my_isnanorinf(fogcoef_stencil[1]) || my_isnanorinf(fogcoef_stencil[0])) {
+			ERROR_LOG_REPORT_ONCE(fognan, G3D, "Unhandled fog NaN/INF combo: %f %f", fogcoef_stencil[0], fogcoef_stencil[1]);
 		}
 #endif
-		CopyFloat2(ub_base.fogCoef, fogcoef);
+		CopyFloat3(ub_base.fogCoef_stencil, fogcoef_stencil);
 	}

 	// Texturing
--- a/GPU/Vulkan/ShaderManagerVulkan.h
+++ b/GPU/Vulkan/ShaderManagerVulkan.h
@ -82,6 +82,8 @@ enum {
 };

 // TODO: Split into two structs, one for software transform and one for hardware transform, to save space.
+// This is just a bit too big to fit in 512 bytes...
+// 512 bytes. Probably can't get to 256 (nVidia's UBO alignment).
 struct UB_VS_FS_Base {
 	float proj[16];
 	float proj_through[16];
@ -90,14 +92,13 @@ struct UB_VS_FS_Base {
 	float tex[16];  // not that common, may want to break out
 	float uvScaleOffset[4];
 	float depthRange[4];
-	float fogCoef[4];
+	float fogCoef_stencil[4];
 	float matAmbient[4];
 	// Fragment data
 	float fogColor[4];
 	float texEnvColor[4];
 	int alphaColorRef[4];
 	int colorTestMask[4];
-	float stencilReplace[4];  // only first float used
 	float blendFixA[4];
 	float blendFixB[4];
 	float texClamp[4];
@ -112,19 +113,19 @@ R"(  mat4 proj_mtx;
  mat4 tex_mtx;
  vec4 uvscaleoffset;
  vec4 depthRange;
-  vec2 fogcoef;
+  vec3 fogcoef_stencilreplace;
  vec4 matambientalpha;
  vec3 fogcolor;
  vec3 texenv;
  ivec4 alphacolorref;
  ivec4 alphacolormask;
-  float stencilReplaceValue;
  vec3 blendFixA;
  vec3 blendFixB;
  vec4 texclamp;
  vec2 texclampoff;
 )";

+// 576 bytes. Can we get down to 512?
 struct UB_VS_Lights {
 	float ambientColor[4];
 	float materialDiffuse[4];
--- a/GPU/Vulkan/VertexShaderGeneratorVulkan.cpp
+++ b/GPU/Vulkan/VertexShaderGeneratorVulkan.cpp
@ -505,7 +505,7 @@ bool GenerateVulkanGLSLVertexShader(const ShaderID &id, char *buffer, bool *uses

 		// Compute fogdepth
 		if (enableFog)
-			WRITE(p, "  v_fogdepth = (viewPos.z + base.fogcoef.x) * base.fogcoef.y;\n");
+			WRITE(p, "  v_fogdepth = (viewPos.z + base.fogcoef_stencilreplace.x) * base.fogcoef_stencilreplace.y;\n");
 	}
 	WRITE(p, "}\n");
 	return true;