From 2f0cf82f722787517a41a923fff12fef50c2f960 Mon Sep 17 00:00:00 2001
From: Henrik Rydgard <hrydgard@gmail.com>
Date: Mon, 21 Jan 2013 19:11:32 +0100
Subject: [PATCH] Don't expand normals and positions to floats unless
 necessary. Fix sign of Z component of position in through mode.

---
 GPU/GLES/VertexDecoder.cpp         | 88 ++++++++++++++++--------------
 GPU/GLES/VertexShaderGenerator.cpp |  5 +-
 2 files changed, 50 insertions(+), 43 deletions(-)

diff --git a/GPU/GLES/VertexDecoder.cpp b/GPU/GLES/VertexDecoder.cpp
index 6043a93b2..6e4764298 100644
--- a/GPU/GLES/VertexDecoder.cpp
+++ b/GPU/GLES/VertexDecoder.cpp
@@ -276,31 +276,31 @@ void VertexDecoder::Step_Color8888Morph() const
 
 void VertexDecoder::Step_NormalS8() const
 {
-	float *normal = (float *)(decoded_ + decFmt.nrmoff);
-	float multiplier = 1.0f;
-	if (gstate.reversenormals & 0xFFFFFF)
-		multiplier = -multiplier;
+	s8 *normal = (s8 *)(decoded_ + decFmt.nrmoff);
+	u8 xor = 0;
+	if (gstate.reversenormals & 1)
+		xor = 0xFF;  // Using xor instead of - to handle -128
 	const s8 *sv = (const s8*)(ptr_ + nrmoff);
 	for (int j = 0; j < 3; j++)
-		normal[j] = (sv[j] / 127.0f) * multiplier;
+		normal[j] = sv[j] ^ xor;
 }
 
 void VertexDecoder::Step_NormalS16() const
 {
-	float *normal = (float *)(decoded_ + decFmt.nrmoff);
-	float multiplier = 1.0f;
-	if (gstate.reversenormals & 0xFFFFFF)
-		multiplier = -multiplier;
-	const short *sv = (const short*)(ptr_ + nrmoff);
+	s16 *normal = (s16 *)(decoded_ + decFmt.nrmoff);
+	u16 xor = 0;
+	if (gstate.reversenormals & 1)
+		xor = 0xFFFF;
+	const s16 *sv = (const s16*)(ptr_ + nrmoff);
 	for (int j = 0; j < 3; j++)
-		normal[j] = (sv[j] / 32767.0f) * multiplier;
+		normal[j] = sv[j] ^ xor;
 }
 
 void VertexDecoder::Step_NormalFloat() const
 {
 	float *normal = (float *)(decoded_ + decFmt.nrmoff);
 	float multiplier = 1.0f;
-	if (gstate.reversenormals & 0xFFFFFF)
+	if (gstate.reversenormals & 1)
 		multiplier = -multiplier;
 	const float *fv = (const float*)(ptr_ + nrmoff);
 	for (int j = 0; j < 3; j++)
@@ -314,7 +314,7 @@ void VertexDecoder::Step_NormalS8Morph() const
 	for (int n = 0; n < morphcount; n++)
 	{
 		float multiplier = gstate_c.morphWeights[n];
-		if (gstate.reversenormals & 0xFFFFFF) {
+		if (gstate.reversenormals & 1) {
 			multiplier = -multiplier;
 		}
 		const s8 *sv = (const s8*)(ptr_ + onesize_*n + nrmoff);
@@ -330,7 +330,7 @@ void VertexDecoder::Step_NormalS16Morph() const
 	for (int n = 0; n < morphcount; n++)
 	{
 		float multiplier = gstate_c.morphWeights[n];
-		if (gstate.reversenormals & 0xFFFFFF) {
+		if (gstate.reversenormals & 1) {
 			multiplier = -multiplier;
 		}
 		const float *fv = (const float*)(ptr_ + onesize_*n + nrmoff);
@@ -346,7 +346,7 @@ void VertexDecoder::Step_NormalFloatMorph() const
 	for (int n = 0; n < morphcount; n++)
 	{
 		float multiplier = gstate_c.morphWeights[n];
-		if (gstate.reversenormals & 0xFFFFFF) {
+		if (gstate.reversenormals & 1) {
 			multiplier = -multiplier;
 		}
 		const float *fv = (const float*)(ptr_ + onesize_*n + nrmoff);
@@ -357,20 +357,18 @@ void VertexDecoder::Step_NormalFloatMorph() const
 
 void VertexDecoder::Step_PosS8() const
 {
-	float *v = (float *)(decoded_ + decFmt.posoff);
-	float multiplier = 1.0f / 127.0f;
+	s8 *v = (s8 *)(decoded_ + decFmt.posoff);
 	const s8 *sv = (const s8*)(ptr_ + posoff);
 	for (int j = 0; j < 3; j++)
-		v[j] = sv[j] * multiplier;
+		v[j] = sv[j];
 }
 
 void VertexDecoder::Step_PosS16() const
 {
-	float *v = (float *)(decoded_ + decFmt.posoff);
-	float multiplier = 1.0f / 32767.0f;
-	const short *sv = (const short*)(ptr_ + posoff);
+	s16 *v = (s16 *)(decoded_ + decFmt.posoff);
+	const s16 *sv = (const s16*)(ptr_ + posoff);
 	for (int j = 0; j < 3; j++)
-		v[j] = sv[j] * multiplier;
+		v[j] = sv[j];
 }
 
 void VertexDecoder::Step_PosFloat() const
@@ -385,16 +383,20 @@ void VertexDecoder::Step_PosS8Through() const
 {
 	float *v = (float *)(decoded_ + decFmt.posoff);
 	const s8 *sv = (const s8*)(ptr_ + posoff);
-	for (int j = 0; j < 3; j++)
-		v[j] = sv[j];
+	const u8 *uv = (const u8*)(ptr_ + posoff);
+	v[0] = sv[0];
+	v[1] = sv[1];
+	v[2] = uv[2];
 }
 
 void VertexDecoder::Step_PosS16Through() const
 {
 	float *v = (float *)(decoded_ + decFmt.posoff);
-	const short *sv = (const short*)(ptr_ + posoff);
-	for (int j = 0; j < 3; j++)
-		v[j] = sv[j];
+	const s16 *sv = (const s16*)(ptr_ + posoff);
+	const u16 *uv = (const u16*)(ptr_ + posoff);
+	v[0] = sv[0];
+	v[1] = sv[1];
+	v[2] = uv[2];
 }
 
 void VertexDecoder::Step_PosFloatThrough() const
@@ -598,15 +600,18 @@ void VertexDecoder::SetVertexType(u32 fmt) {
 
 		steps_[numSteps_++] = morphcount == 1 ? nrmstep[nrm] : nrmstep_morph[nrm];
 
-		// The normal formats match the gl formats perfectly, let's use 'em.
-		switch (nrm) {
-		case GE_VTYPE_NRM_8BIT >> GE_VTYPE_NRM_SHIFT: decFmt.nrmfmt = DEC_S8_3; break;
-		case GE_VTYPE_NRM_16BIT >> GE_VTYPE_NRM_SHIFT: decFmt.nrmfmt = DEC_S16_3; break;
-		case GE_VTYPE_NRM_FLOAT >> GE_VTYPE_NRM_SHIFT: decFmt.nrmfmt = DEC_FLOAT_3; break;
+		if (morphcount == 1) {
+			// The normal formats match the gl formats perfectly, let's use 'em.
+			switch (nrm) {
+			case GE_VTYPE_NRM_8BIT >> GE_VTYPE_NRM_SHIFT: decFmt.nrmfmt = DEC_S8_3; break;
+			case GE_VTYPE_NRM_16BIT >> GE_VTYPE_NRM_SHIFT: decFmt.nrmfmt = DEC_S16_3; break;
+			case GE_VTYPE_NRM_FLOAT >> GE_VTYPE_NRM_SHIFT: decFmt.nrmfmt = DEC_FLOAT_3; break;
+			}
+		} else {
+			decFmt.nrmfmt = DEC_FLOAT_3;
 		}
 
 		// Actually, temporarily let's not.
-		decFmt.nrmfmt = DEC_FLOAT_3;
 		decFmt.nrmoff = decOff;
 		decOff += DecFmtSize(decFmt.nrmfmt);
 	}
@@ -625,14 +630,17 @@ void VertexDecoder::SetVertexType(u32 fmt) {
 		} else {
 			steps_[numSteps_++] = morphcount == 1 ? posstep[pos] : posstep_morph[pos];
 
-			// The non-through-mode position formats match the gl formats perfectly, let's use 'em.
-			switch (pos) {
-			case GE_VTYPE_POS_8BIT >> GE_VTYPE_POS_SHIFT: decFmt.posfmt = DEC_S8_3; break;
-			case GE_VTYPE_POS_16BIT >> GE_VTYPE_POS_SHIFT: decFmt.posfmt = DEC_S16_3; break;
-			case GE_VTYPE_POS_FLOAT >> GE_VTYPE_POS_SHIFT: decFmt.posfmt = DEC_FLOAT_3; break;
+			if (morphcount == 1) {
+				// The non-through-mode position formats match the gl formats perfectly, let's use 'em.
+				switch (pos) {
+				case GE_VTYPE_POS_8BIT >> GE_VTYPE_POS_SHIFT: decFmt.posfmt = DEC_S8_3; break;
+				case GE_VTYPE_POS_16BIT >> GE_VTYPE_POS_SHIFT: decFmt.posfmt = DEC_S16_3; break;
+				case GE_VTYPE_POS_FLOAT >> GE_VTYPE_POS_SHIFT: decFmt.posfmt = DEC_FLOAT_3; break;
+				}
+			} else {
+				// Actually, temporarily let's not.
+				decFmt.posfmt = DEC_FLOAT_3;
 			}
-			// Actually, temporarily let's not.
-			decFmt.posfmt = DEC_FLOAT_3;
 		}
 		decFmt.posoff = decOff;
 		decOff += DecFmtSize(decFmt.posfmt);
diff --git a/GPU/GLES/VertexShaderGenerator.cpp b/GPU/GLES/VertexShaderGenerator.cpp
index 61e8aa811..b610c9b7e 100644
--- a/GPU/GLES/VertexShaderGenerator.cpp
+++ b/GPU/GLES/VertexShaderGenerator.cpp
@@ -46,7 +46,7 @@ bool CanUseHardwareTransform(int prim)
 // prim so we can special case for RECTANGLES :(
 void ComputeVertexShaderID(VertexShaderID *id, int prim)
 {
-	int doTexture = (gstate.textureMapEnable & 1) && !(gstate.clearmode & 1);
+	int doTexture = (gstate.textureMapEnable & 1) && !(gstate.isModeClear());
 
 	bool hasColor = (gstate.vertType & GE_VTYPE_COL_MASK) != 0;
 	bool hasNormal = (gstate.vertType & GE_VTYPE_NRM_MASK) != 0;
@@ -56,10 +56,9 @@ void ComputeVertexShaderID(VertexShaderID *id, int prim)
 	memset(id->d, 0, sizeof(id->d));
 	id->d[0] = gstate.lmode & 1;
 	id->d[0] |= ((int)gstate.isModeThrough()) << 1;
-	id->d[0] |= ((int)gstate.isFogEnabled()) << 2;
+	id->d[0] |= ((int)enableFog) << 2;
 	id->d[0] |= doTexture << 3;
 	id->d[0] |= (hasColor & 1) << 4;
-	id->d[0] |= ((int)enableFog) << 5;
 	if (CanUseHardwareTransform(prim)) {
 		id->d[0] |= 1 << 8;
 		id->d[0] |= (hasNormal & 1) << 9;