Some shader and vertex format optimizations

2025-02-18 16:09:53 +00:00 · 2013-02-05 01:37:00 +01:00 · 2013-02-05 01:37:00 +01:00 · bdc467769e
commit bdc467769e
parent 3ee6ff01e1
6 changed files with 93 additions and 49 deletions
--- a/GPU/GLES/DisplayListInterpreter.cpp
+++ b/GPU/GLES/DisplayListInterpreter.cpp
@ -513,6 +513,7 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) {
 			// Throughmode changed, let's make the proj matrix dirty.
 			shaderManager_->DirtyUniform(DIRTY_PROJMATRIX);
 		}
+		shaderManager_->DirtyUniform(DIRTY_UVSCALEOFFSET);
 		// This sets through-mode or not, as well.
 		break;

--- a/GPU/GLES/FragmentShaderGenerator.cpp
+++ b/GPU/GLES/FragmentShaderGenerator.cpp
@ -77,7 +77,7 @@ void GenerateFragmentShader(char *buffer)


 #if defined(GLSL_ES_1_0)
-	WRITE(p, "precision mediump float;\n");
+	WRITE(p, "precision lowp float;\n");
 #elif !defined(FORCE_OPENGL_2_0)
 	WRITE(p, "#version 110\n");
 #endif
@ -86,30 +86,38 @@ void GenerateFragmentShader(char *buffer)

 	int doTexture = (gstate.textureMapEnable & 1) && !(gstate.clearmode & 1);
 	bool enableFog = gstate.isFogEnabled() && !gstate.isModeThrough() && !gstate.isModeClear();
+	bool enableAlphaTest = (gstate.alphaTestEnable & 1) && !gstate.isModeClear();
+	bool enableColorTest = (gstate.colorTestEnable & 1) && !gstate.isModeClear();
+

 	if (doTexture)
 		WRITE(p, "uniform sampler2D tex;\n");
-	if ((gstate.alphaTestEnable & 1) || (gstate.colorTestEnable & 1)) {
+	if (enableAlphaTest || enableColorTest) {
 		WRITE(p, "uniform vec4 u_alphacolorref;\n");
 	}
-	WRITE(p, "uniform vec3 u_texenv;\n");
+	if (gstate.textureMapEnable & 1) {
+		WRITE(p, "uniform vec3 u_texenv;\n");
+	}
 	WRITE(p, "varying vec4 v_color0;\n");
 	if (lmode)
 		WRITE(p, "varying vec3 v_color1;\n");
 	if (enableFog) {
 		WRITE(p, "uniform vec3 u_fogcolor;\n");
+#if defined(GLSL_ES_1_0)
+		WRITE(p, "varying mediump float v_fogdepth;\n");
+#else
 		WRITE(p, "varying float v_fogdepth;\n");
+#endif
 	}
 	if (doTexture)
 		WRITE(p, "varying vec2 v_texcoord;\n");

 	WRITE(p, "void main() {\n");
-	WRITE(p, "  vec4 v;\n");

 	if (gstate.clearmode & 1)
 	{
 		// Clear mode does not allow any fancy shading.
-		WRITE(p, "  v = v_color0;\n");
+		WRITE(p, "  gl_FragColor = v_color0;\n");
 	}
 	else
 	{
@ -119,59 +127,58 @@ void GenerateFragmentShader(char *buffer)
 			WRITE(p, "  vec4 s = vec4(v_color1, 0.0);\n");
 			secondary = " + s";
 		} else {
-			WRITE(p, "  vec4 s = vec4(0.0, 0.0, 0.0, 0.0);\n");
 			secondary = "";
 		}

 		if (gstate.textureMapEnable & 1) {
 			WRITE(p, "  vec4 t = texture2D(tex, v_texcoord);\n");
-			WRITE(p, "  vec4 p = clamp(v_color0, 0.0, 1.0);\n");
+			WRITE(p, "  vec4 p = v_color0;\n");

 			if (gstate.texfunc & 0x100) { // texfmt == RGBA
 				switch (gstate.texfunc & 0x7) {
 				case GE_TEXFUNC_MODULATE:
-					WRITE(p, "  v = t * p%s;\n", secondary); break;
+					WRITE(p, "  vec4 v = t * p%s;\n", secondary); break;
 				case GE_TEXFUNC_DECAL:
-					WRITE(p, "  v = vec4(1.0 - t.a * p.rgb + t.a * u_texenv.rgb, p.a)%s;\n", secondary); break;
+					WRITE(p, "  vec4 v = vec4(1.0 - t.a * p.rgb + t.a * u_texenv.rgb, p.a)%s;\n", secondary); break;
 				case GE_TEXFUNC_BLEND:
-					WRITE(p, "  v = vec4((1.0 - t.rgb) * p.rgb + t.rgb * u_texenv.rgb, p.a * t.a)%s;\n", secondary); break;
+					WRITE(p, "  vec4 v = vec4((1.0 - t.rgb) * p.rgb + t.rgb * u_texenv.rgb, p.a * t.a)%s;\n", secondary); break;
 				case GE_TEXFUNC_REPLACE:
-					WRITE(p, "  v = t%s;\n", secondary); break;
+					WRITE(p, "  vec4 v = t%s;\n", secondary); break;
 				case GE_TEXFUNC_ADD:
-					WRITE(p, "  v = vec4(t.rgb + p.rgb, p.a * t.a)%s;\n", secondary); break;
+					WRITE(p, "  vec4 v = vec4(t.rgb + p.rgb, p.a * t.a)%s;\n", secondary); break;
 				default:
-					WRITE(p, "  v = p;\n"); break;
+					WRITE(p, "  vec4 v = p;\n"); break;
 				}
 			} else {	// texfmt == RGB
 				switch (gstate.texfunc & 0x7) {
 				case GE_TEXFUNC_MODULATE:
-					WRITE(p, "	v = vec4(t.rgb * p.rgb, p.a)%s;\n", secondary); break;
+					WRITE(p, "	vec4 v = vec4(t.rgb * p.rgb, p.a)%s;\n", secondary); break;
 				case GE_TEXFUNC_DECAL:
-					WRITE(p, "	v = vec4(t.rgb, p.a)%s;\n", secondary); break;
+					WRITE(p, "	vec4 v = vec4(t.rgb, p.a)%s;\n", secondary); break;
 				case GE_TEXFUNC_BLEND:
-					WRITE(p, "	v = vec4(1.0 - t.rgb) * p.rgb + t.rgb * u_texenv.rgb, p.a)%s;\n", secondary); break;
+					WRITE(p, "	vec4 v = vec4(1.0 - t.rgb) * p.rgb + t.rgb * u_texenv.rgb, p.a)%s;\n", secondary); break;
 				case GE_TEXFUNC_REPLACE:
-					WRITE(p, "	v = vec4(t.rgb, p.a)%s;\n", secondary); break;
+					WRITE(p, "	vec4 v = vec4(t.rgb, p.a)%s;\n", secondary); break;
 				case GE_TEXFUNC_ADD:
-					WRITE(p, "	v = vec4(t.rgb + p.rgb, p.a)%s;\n", secondary); break;
+					WRITE(p, "	vec4 v = vec4(t.rgb + p.rgb, p.a)%s;\n", secondary); break;
 				default:
-					WRITE(p, "  v = p;\n"); break;
+					WRITE(p, "  vec4 v = p;\n"); break;
 				}
 			}
 		} else {
 			// No texture mapping
-			WRITE(p, "  v = clamp(v_color0, 0.0, 1.0)%s;\n", secondary);
+			WRITE(p, "  vec4 v = v_color0 %s;\n", secondary);
 		}
 		// Color doubling
 		if (gstate.texfunc & 0x10000) {
-			WRITE(p, "  v = v * vec4(2.0, 2.0, 2.0, 2.0);");
+			WRITE(p, "  v = v * 2.0;\n");
 		}

-		if (gstate.alphaTestEnable & 1) {
+		if (enableAlphaTest) {
 			int alphaTestFunc = gstate.alphatest & 7;
 			const char *alphaTestFuncs[] = { "#", "#", " == ", " != ", " < ", " <= ", " > ", " >= " };	// never/always don't make sense
 			if (alphaTestFuncs[alphaTestFunc][0] != '#')
-				WRITE(p, "if (!(v.a %s u_alphacolorref.a)) discard;", alphaTestFuncs[alphaTestFunc]);
+				WRITE(p, "  if (!(v.a %s u_alphacolorref.a)) discard;\n", alphaTestFuncs[alphaTestFunc]);
 		}

 		// Disabled for now until we actually find a need for it.
@ -187,20 +194,20 @@ void GenerateFragmentShader(char *buffer)

 		if (enableFog) {
 			WRITE(p, "  float fogCoef = clamp(v_fogdepth, 0.0, 1.0);\n");
-			WRITE(p, "  v = mix(vec4(u_fogcolor, v.a), v, fogCoef);\n");
+			WRITE(p, "  gl_FragColor = mix(vec4(u_fogcolor, v.a), v, fogCoef);\n");
 			// WRITE(p, "  v.x = v_depth;\n");
+		} else {
+			WRITE(p, "  gl_FragColor = v;\n");
 		}
-
 	}

 #ifdef DEBUG_SHADER
 	if (doTexture) {
-		WRITE(p, "  v = texture2D(tex, v_texcoord);\n");
+		WRITE(p, "  gl_FragColor = texture2D(tex, v_texcoord);\n");
 	} else {
-		WRITE(p, "  v = vec4(1,0,1,1);\n");
+		WRITE(p, "  gl_FragColor = vec4(1,0,1,1);\n");
 	}
 #endif
-	WRITE(p, "  gl_FragColor = v;\n");
 	WRITE(p, "}\n");
 }

--- a/GPU/GLES/ShaderManager.cpp
+++ b/GPU/GLES/ShaderManager.cpp
@ -39,7 +39,7 @@ Shader::Shader(const char *code, uint32_t shaderType) {
 	OutputDebugString(code);
 #endif
 	shader = glCreateShader(shaderType);
-	glShaderSource(shader, 1, &code, 0);
+ 	glShaderSource(shader, 1, &code, 0);
 	glCompileShader(shader);
 	GLint success;
 	glGetShaderiv(shader, GL_COMPILE_STATUS, &success);
@ -264,7 +264,13 @@ void LinkedShader::updateUniforms() {

 	// Texturing
 	if (u_uvscaleoffset != -1 && (dirtyUniforms & DIRTY_UVSCALEOFFSET)) {
-		const float uvscaleoff[4] = { gstate_c.uScale, gstate_c.vScale, gstate_c.uOff, gstate_c.vOff};
+		float uvscaleoff[4] = { gstate_c.uScale, gstate_c.vScale, gstate_c.uOff, gstate_c.vOff};
+		if (gstate.isModeThrough()) {
+			uvscaleoff[0] /= gstate_c.curTextureWidth;
+			uvscaleoff[1] /= gstate_c.curTextureHeight;
+			uvscaleoff[2] /= gstate_c.curTextureWidth;
+			uvscaleoff[3] /= gstate_c.curTextureHeight;
+		}
 		glUniform4fv(u_uvscaleoffset, 1, uvscaleoff);
 	}

--- a/GPU/GLES/TransformPipeline.cpp
+++ b/GPU/GLES/TransformPipeline.cpp
@ -315,6 +315,8 @@ static const GlTypeInfo GLComp[] = {
 	{GL_UNSIGNED_BYTE, 2, GL_TRUE},// 	DEC_U8_2,
 	{GL_UNSIGNED_BYTE, 3, GL_TRUE},// 	DEC_U8_3,
 	{GL_UNSIGNED_BYTE, 4, GL_TRUE},// 	DEC_U8_4,
+	{GL_UNSIGNED_SHORT, 2, GL_TRUE},// 	DEC_U16_2,
+	{GL_UNSIGNED_SHORT, 2, GL_FALSE},// 	DEC_U16A_2,
 };

 static inline void VertexAttribSetup(int attrib, int fmt, int stride, u8 *ptr) {
@ -399,6 +401,13 @@ void TransformDrawEngine::SoftwareTransformAndDraw(
 		vertexCount = 0x10000/3;
 #endif

+	float uscale = 1.0f;
+	float vscale = 1.0f;
+	if (throughmode) {
+		uscale /= gstate_c.curTextureWidth;
+		vscale /= gstate_c.curTextureHeight;
+	}
+
 	Lighter lighter;
 	float fog_end = getFloat24(gstate.fog1);
 	float fog_slope = getFloat24(gstate.fog2);
@ -430,6 +439,9 @@ void TransformDrawEngine::SoftwareTransformAndDraw(

 			if (reader.hasUV()) {
 				reader.ReadUV(uv);
+
+				uv[0] *= uscale;
+				uv[1] *= vscale;
 			}
 			fogCoef = 1.0f;
 			// Scale UV?
@ -529,8 +541,8 @@ void TransformDrawEngine::SoftwareTransformAndDraw(
 				{
 				case 0:	// UV mapping
 					// Texture scale/offset is only performed in this mode.
-					uv[0] = ruv[0]*gstate_c.uScale + gstate_c.uOff;
-					uv[1] = ruv[1]*gstate_c.vScale + gstate_c.vOff;
+					uv[0] = uscale * (ruv[0]*gstate_c.uScale + gstate_c.uOff);
+					uv[1] = vscale * (ruv[1]*gstate_c.vScale + gstate_c.vOff);
 					break;
 				case 1:
 					{
@ -580,8 +592,12 @@ void TransformDrawEngine::SoftwareTransformAndDraw(
 		memcpy(&transformed[index].x, v, 3 * sizeof(float));
 		transformed[index].fog = fogCoef;
 		memcpy(&transformed[index].u, uv, 2 * sizeof(float));
-		memcpy(&transformed[index].color0, c0, 4 * sizeof(float));
-		memcpy(&transformed[index].color1, c1, 3 * sizeof(float));
+		for (int i = 0; i < 4; i++) {
+			transformed[index].color0[i] = c0[i] * 255.0f;
+		}
+		for (int i = 0; i < 4; i++) {
+			transformed[index].color1[i] = c1[i] * 255.0f;
+		}
 	}

 	// Step 2: expand rectangles.
@ -674,8 +690,8 @@ void TransformDrawEngine::SoftwareTransformAndDraw(
 	}
 	glVertexAttribPointer(program->a_position, 4, GL_FLOAT, GL_FALSE, vertexSize, drawBuffer);
 	if (program->a_texcoord != -1) glVertexAttribPointer(program->a_texcoord, 2, GL_FLOAT, GL_FALSE, vertexSize, ((uint8_t*)drawBuffer) + 4 * 4);
-	if (program->a_color0 != -1) glVertexAttribPointer(program->a_color0, 4, GL_FLOAT, GL_FALSE, vertexSize, ((uint8_t*)drawBuffer) + 6 * 4);
-	if (program->a_color1 != -1) glVertexAttribPointer(program->a_color1, 3, GL_FLOAT, GL_FALSE, vertexSize, ((uint8_t*)drawBuffer) + 10 * 4);
+	if (program->a_color0 != -1) glVertexAttribPointer(program->a_color0, 4, GL_UNSIGNED_BYTE, GL_TRUE, vertexSize, ((uint8_t*)drawBuffer) + 6 * 4);
+	if (program->a_color1 != -1) glVertexAttribPointer(program->a_color1, 3, GL_UNSIGNED_BYTE, GL_TRUE, vertexSize, ((uint8_t*)drawBuffer) + 7 * 4);
 	if (drawIndexed) {
 		if (useVBO) {
 			glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, ebo_[curVbo_]);
--- a/GPU/GLES/VertexDecoder.cpp
+++ b/GPU/GLES/VertexDecoder.cpp
@ -73,6 +73,8 @@ int DecFmtSize(u8 fmt) {
 	case DEC_U8_2: return 4;
 	case DEC_U8_3: return 4;
 	case DEC_U8_4: return 4;
+	case DEC_U16_2: return 4;
+	case DEC_U16A_2: return 4;
 	default:
 		return 0;
 	}
@ -154,10 +156,10 @@ void VertexDecoder::Step_TcU16() const

 void VertexDecoder::Step_TcU16Through() const
 {
-	float *uv = (float *)(decoded_ + decFmt.uvoff);
+	u16 *uv = (u16 *)(decoded_ + decFmt.uvoff);
 	const u16 *uvdata = (const u16*)(ptr_ + tcoff);
-	uv[0] = (float)uvdata[0] / (float)(gstate_c.curTextureWidth);
-	uv[1] = (float)uvdata[1] / (float)(gstate_c.curTextureHeight);
+	uv[0] = uvdata[0];
+	uv[1] = uvdata[1];
 }

 void VertexDecoder::Step_TcFloat() const
@ -171,8 +173,8 @@ void VertexDecoder::Step_TcFloatThrough() const
 {
 	float *uv = (float *)(decoded_ + decFmt.uvoff);
 	const float *uvdata = (const float*)(ptr_ + tcoff);
-	uv[0] = uvdata[0] / (float)(gstate_c.curTextureWidth);
-	uv[1] = uvdata[1] / (float)(gstate_c.curTextureHeight);
+	uv[0] = uvdata[0];
+	uv[1] = uvdata[1];
 }

 void VertexDecoder::Step_Color565() const
@ -580,8 +582,11 @@ void VertexDecoder::SetVertexType(u32 fmt) {

 		steps_[numSteps_++] = throughmode ? tcstep_through[tc] : tcstep[tc];

-		// All UV decode to DEC_FLOAT2 currently.
-		decFmt.uvfmt = DEC_FLOAT_2;
+		// All UV except through mode decode to DEC_FLOAT2 currently.
+		if (throughmode && (tc == (GE_VTYPE_TC_16BIT >> GE_VTYPE_TC_SHIFT)))
+			decFmt.uvfmt = DEC_U16A_2;
+		else
+			decFmt.uvfmt = DEC_FLOAT_2;
 		decFmt.uvoff = decOff;
 		decOff += DecFmtSize(decFmt.uvfmt);
 	}
--- a/GPU/GLES/VertexDecoder.h
+++ b/GPU/GLES/VertexDecoder.h
@ -38,6 +38,8 @@ enum {
 	DEC_U8_2,
 	DEC_U8_3,
 	DEC_U8_4,
+	DEC_U16_2,
+	DEC_U16A_2,
 };

 int DecFmtSize(u8 fmt);
@ -58,8 +60,8 @@ struct TransformedVertex
 {
 	float x, y, z, fog;     // in case of morph, preblend during decode
 	float u; float v;      // scaled by uscale, vscale, if there
-	float color0[4];   // prelit
-	float color1[3];   // prelit
+	u8 color0[4];   // prelit
+	u8 color1[3];   // prelit
 };

 DecVtxFormat GetTransformedVtxFormat(const DecVtxFormat &fmt);
@ -189,14 +191,14 @@ public:
 			break;
 		case DEC_S16_3:
 			{
-				s16 *p = (s16 *)(data_ + decFmt_.posoff);
+				const s16 *p = (s16 *)(data_ + decFmt_.posoff);
 				for (int i = 0; i < 3; i++)
 					pos[i] = p[i] / 32767.0f;
 			}
 			break;
 		case DEC_S8_3:
 			{
-				s8 *p = (s8 *)(data_ + decFmt_.posoff);
+				const s8 *p = (s8 *)(data_ + decFmt_.posoff);
 				for (int i = 0; i < 3; i++)
 					pos[i] = p[i] / 127.0f;
 			}
@ -214,14 +216,14 @@ public:
 			break;
 		case DEC_S16_3:
 			{
-				s16 *p = (s16 *)(data_ + decFmt_.nrmoff);
+				const s16 *p = (s16 *)(data_ + decFmt_.nrmoff);
 				for (int i = 0; i < 3; i++)
 					nrm[i] = p[i] / 32767.0f;
 			}
 			break;
 		case DEC_S8_3:
 			{
-				s8 *p = (s8 *)(data_ + decFmt_.nrmoff);
+				const s8 *p = (s8 *)(data_ + decFmt_.nrmoff);
 				for (int i = 0; i < 3; i++)
 					nrm[i] = p[i] / 127.0f;
 			}
@ -236,6 +238,13 @@ public:
 		switch (decFmt_.uvfmt) {
 		case DEC_FLOAT_2:
 			memcpy(uv, data_ + decFmt_.uvoff, 8); break;
+		case DEC_U16A_2:
+			{
+				const u16 *p = (const u16 *)(data_ + decFmt_.uvoff);
+				uv[0] = (float)p[0];
+				uv[1] = (float)p[1];
+			}
+			break;
 		default:
 			ERROR_LOG(G3D, "Reader: Unsupported UV Format");
 			break;