Move depalettize-shader generator to Common

2025-01-30 13:21:52 +00:00 · 2014-09-17 22:31:18 +02:00 · 2014-09-17 22:31:18 +02:00 · 0c9f541a43
commit 0c9f541a43
parent f45afb72c9
8 changed files with 304 additions and 215 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -1378,6 +1378,9 @@ if(ARMV7)
 	set(GPU_NEON GPU/Common/TextureDecoderNEON.cpp)
 endif()
 add_library(GPU OBJECT
+	GPU/Common/DepalettizeShaderCommon.h
+	GPU/Common/DepalettizeShaderCommon.cpp
+	GPU/Common/FramebufferCommon.h
 	GPU/Common/FramebufferCommon.cpp
 	GPU/Common/FramebufferCommon.h
 	GPU/Common/GPUDebugInterface.h
--- a/GPU/Common/DepalettizeShaderCommon.cpp
+++ b/GPU/Common/DepalettizeShaderCommon.cpp
@ -0,0 +1,261 @@
+// Copyright (c) 2014- PPSSPP Project.
+
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, version 2.0 or later versions.
+
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License 2.0 for more details.
+
+// A copy of the GPL 2.0 should have been included with the program.
+// If not, see http://www.gnu.org/licenses/
+
+// Official git repository and contact information can be found at
+// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
+
+#pragma once
+
+#include <stdio.h>
+
+#include "Common/Log.h"
+#include "Core/Reporting.h"
+#include "GPU/GPUState.h"
+#include "GPU/Common/DepalettizeShaderCommon.h"
+
+
+#define WRITE p+=sprintf
+
+// Uses integer instructions available since OpenGL 3.0. Suitable for ES 3.0 as well.
+void GenerateDepalShader300(char *buffer, GEBufferFormat pixelFormat) {
+	char *p = buffer;
+#ifdef USING_GLES2
+	WRITE(p, "#version 300 es\n");
+	WRITE(p, "precision mediump float;\n");
+#else
+	WRITE(p, "#version 330\n");
+#endif
+	WRITE(p, "in vec2 v_texcoord0;\n");
+	WRITE(p, "out vec4 fragColor0;\n");
+	WRITE(p, "uniform sampler2D tex;\n");
+	WRITE(p, "uniform sampler2D pal;\n");
+
+	WRITE(p, "void main() {\n");
+	WRITE(p, "  vec4 color = texture(tex, v_texcoord0);\n");
+
+	int mask = gstate.getClutIndexMask();
+	int shift = gstate.getClutIndexShift();
+	int offset = gstate.getClutIndexStartPos();
+	const GEPaletteFormat clutFormat = gstate.getClutPaletteFormat();
+	// Unfortunately sampling turned our texture into floating point. To avoid this, might be able
+	// to declare them as isampler2D objects, but these require integer textures, which needs more work.
+	// Anyhow, we simply work around this by converting back to integer. Hopefully there will be no loss of precision.
+	// Use the mask to skip reading some components.
+	int shiftedMask = mask << shift;
+	switch (pixelFormat) {
+	case GE_FORMAT_8888:
+		if (shiftedMask & 0xFF) WRITE(p, "  int r = int(color.r * 255.99);\n"); else WRITE(p, "  int r = 0;\n");
+		if (shiftedMask & 0xFF00) WRITE(p, "  int g = int(color.g * 255.99);\n"); else WRITE(p, "  int g = 0;\n");
+		if (shiftedMask & 0xFF0000) WRITE(p, "  int b = int(color.b * 255.99);\n"); else WRITE(p, "  int b = 0;\n");
+		if (shiftedMask & 0xFF000000) WRITE(p, "  int a = int(color.a * 255.99);\n"); else WRITE(p, "  int a = 0;\n");
+		WRITE(p, "  int index = (a << 24) | (b << 16) | (g << 8) | (r);\n");
+		break;
+	case GE_FORMAT_4444:
+		if (shiftedMask & 0xF) WRITE(p, "  int r = int(color.r * 15.99);\n"); else WRITE(p, "  int r = 0;\n");
+		if (shiftedMask & 0xF0) WRITE(p, "  int g = int(color.g * 15.99);\n"); else WRITE(p, "  int g = 0;\n");
+		if (shiftedMask & 0xF00) WRITE(p, "  int b = int(color.b * 15.99);\n"); else WRITE(p, "  int b = 0;\n");
+		if (shiftedMask & 0xF000) WRITE(p, "  int a = int(color.a * 15.99);\n"); else WRITE(p, "  int a = 0;\n");
+		WRITE(p, "  int index = (a << 12) | (b << 8) | (g << 4) | (r);\n");
+		break;
+	case GE_FORMAT_565:
+		if (shiftedMask & 0x1F) WRITE(p, "  int r = int(color.r * 31.99);\n"); else WRITE(p, "  int r = 0;\n");
+		if (shiftedMask & 0x7E0) WRITE(p, "  int g = int(color.g * 63.99);\n"); else WRITE(p, "  int g = 0;\n");
+		if (shiftedMask & 0xF800) WRITE(p, "  int b = int(color.b * 31.99);\n"); else WRITE(p, "  int b = 0;\n");
+		WRITE(p, "  int index = (b << 11) | (g << 5) | (r);\n");
+		break;
+	case GE_FORMAT_5551:
+		if (shiftedMask & 0x1F) WRITE(p, "  int r = int(color.r * 31.99);\n"); else WRITE(p, "  int r = 0;\n");
+		if (shiftedMask & 0x3E0) WRITE(p, "  int g = int(color.g * 31.99);\n"); else WRITE(p, "  int g = 0;\n");
+		if (shiftedMask & 0x7C00) WRITE(p, "  int b = int(color.b * 31.99);\n"); else WRITE(p, "  int b = 0;\n");
+		if (shiftedMask & 0x8000) WRITE(p, "  int a = int(color.a);\n"); else WRITE(p, "  int a = 0;\n");
+		WRITE(p, "  int index = (a << 15) | (b << 10) | (g << 5) | (r);\n");
+		break;
+	default:
+		break;
+	}
+
+	float texturePixels = 256;
+	if (clutFormat != GE_CMODE_32BIT_ABGR8888)
+		texturePixels = 512;
+
+	if (shift) {
+		WRITE(p, "  index = ((index >> %i) & 0x%02x)", shift, mask);
+	} else {
+		WRITE(p, "  index = (index & 0x%02x)", mask);
+	}
+	if (offset) {
+		WRITE(p, " | %i;\n", offset);  // '|' matches what we have in gstate.h
+	} else {
+		WRITE(p, ";\n");
+	}
+
+	WRITE(p, "  fragColor0 = texture(pal, vec2((float(index) + 0.5) * (1.0 / %f), 0.0));\n", texturePixels);
+	WRITE(p, "}\n");
+}
+
+// FP only, to suit GL(ES) 2.0
+void GenerateDepalShaderFloat(char *buffer, GEBufferFormat pixelFormat, ShaderLanguage lang) {
+	char *p = buffer;
+
+	const char *modFunc = lang == HLSL_DX9 ? "fmod" : "mod";
+
+	char lookupMethod[128] = "index.r";
+	char offset[128] = "";
+
+	const GEPaletteFormat clutFormat = gstate.getClutPaletteFormat();
+	const u32 clutBase = gstate.getClutIndexStartPos();
+
+	const int shift = gstate.getClutIndexShift();
+	const int mask = gstate.getClutIndexMask();
+
+	float index_multiplier = 1.0f;
+	// pixelformat is the format of the texture we are sampling.
+	bool formatOK = true;
+	switch (pixelFormat) {
+	case GE_FORMAT_8888:
+		if ((mask & (mask + 1)) == 0) {
+			// If the value has all bits contiguous (bitmask check above), we can mod by it + 1.
+			const char *rgba = "rrrrrrrrggggggggbbbbbbbbaaaaaaaa";
+			const u8 rgba_shift = shift & 7;
+			if (rgba_shift == 0 && mask == 0xFF) {
+				sprintf(lookupMethod, "index.%c", rgba[shift]);
+			} else {
+				sprintf(lookupMethod, "%s(index.%c * %f, %d.0)", modFunc, rgba[shift], 255.99f / (1 << rgba_shift), mask + 1);
+				index_multiplier = 1.0f / 256.0f;
+				// Format was OK if there weren't bits from another component.
+				formatOK = mask <= 255 - (1 << rgba_shift);
+			}
+		} else {
+			formatOK = false;
+		}
+		break;
+	case GE_FORMAT_4444:
+		if ((mask & (mask + 1)) == 0 && shift < 16) {
+			const char *rgba = "rrrrggggbbbbaaaa";
+			const u8 rgba_shift = shift & 3;
+			if (rgba_shift == 0 && mask == 0xF) {
+				sprintf(lookupMethod, "index.%c", rgba[shift]);
+				index_multiplier = 15.0f / 256.0f;
+			} else {
+				// Let's divide and mod to get the right bits.  A common case is shift=0, mask=01.
+				sprintf(lookupMethod, "%s(index.%c * %f, %d.0)", modFunc, rgba[shift], 15.99f / (1 << rgba_shift), mask + 1);
+				index_multiplier = 1.0f / 256.0f;
+				formatOK = mask <= 15 - (1 << rgba_shift);
+			}
+		} else {
+			formatOK = false;
+		}
+		break;
+	case GE_FORMAT_565:
+		if ((mask & (mask + 1)) == 0 && shift < 16) {
+			const u8 shifts[16] = { 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4 };
+			const int multipliers[16] = { 31, 31, 31, 31, 31, 63, 63, 63, 63, 63, 63, 31, 31, 31, 31, 31 };
+			const char *rgba = "rrrrrggggggbbbbb";
+			const u8 rgba_shift = shifts[shift];
+			if (rgba_shift == 0 && mask == multipliers[shift]) {
+				sprintf(lookupMethod, "index.%c", rgba[shift]);
+				index_multiplier = multipliers[shift] / 256.0f;
+			} else {
+				// We just need to divide the right component by the right value, and then mod against the mask.
+				// A common case is shift=1, mask=0f.
+				sprintf(lookupMethod, "%s(index.%c * %f, %d.0)", modFunc, rgba[shift], ((float)multipliers[shift] + 0.99f) / (1 << rgba_shift), mask + 1);
+				index_multiplier = 1.0f / 256.0f;
+				formatOK = mask <= multipliers[shift] - (1 << rgba_shift);
+			}
+		} else {
+			formatOK = false;
+		}
+		break;
+	case GE_FORMAT_5551:
+		if ((mask & (mask + 1)) == 0 && shift < 16) {
+			const char *rgba = "rrrrrgggggbbbbba";
+			const u8 rgba_shift = shift % 5;
+			if (rgba_shift == 0 && mask == 0x1F) {
+				sprintf(lookupMethod, "index.%c", rgba[shift]);
+				index_multiplier = 31.0f / 256.0f;
+			} else if (shift == 15 && mask == 1) {
+				sprintf(lookupMethod, "index.%c", rgba[shift]);
+				index_multiplier = 1.0f / 256.0f;
+			} else {
+				// A isn't possible here.
+				sprintf(lookupMethod, "%s(index.%c * %f, %d.0)", modFunc, rgba[shift], 31.99f / (1 << rgba_shift), mask + 1);
+				index_multiplier = 1.0f / 256.0f;
+				formatOK = mask <= 31 - (1 << rgba_shift);
+			}
+		} else {
+			formatOK = false;
+		}
+		break;
+	default:
+		break;
+	}
+
+	float texturePixels = 256.f;
+	if (clutFormat != GE_CMODE_32BIT_ABGR8888) {
+		texturePixels = 512.f;
+		index_multiplier *= 0.5f;
+	}
+
+	// Adjust index_multiplier, similar to the use of 15.99 instead of 16 in the ES 3 path.
+	// index_multiplier -= 0.01f / texturePixels;
+
+	if (!formatOK) {
+		ERROR_LOG_REPORT_ONCE(depal, G3D, "%i depal unsupported: shift=%i mask=%02x offset=%d", pixelFormat, shift, mask, clutBase);
+	}
+
+	// Offset by half a texel (plus clutBase) to turn NEAREST filtering into FLOOR.
+	float texel_offset = ((float)clutBase + 0.5f) / texturePixels;
+	sprintf(offset, " + %f", texel_offset);
+
+	if (lang == GLSL_140) {
+#ifdef USING_GLES2
+		WRITE(p, "#version 100\n");
+		WRITE(p, "precision mediump float;\n");
+#else
+		WRITE(p, "#version 110\n");
+#endif
+		WRITE(p, "varying vec2 v_texcoord0;\n");
+		WRITE(p, "uniform sampler2D tex;\n");
+		WRITE(p, "uniform sampler2D pal;\n");
+		WRITE(p, "void main() {\n");
+		WRITE(p, "  vec4 index = texture2D(tex, v_texcoord0);\n");
+		WRITE(p, "  float coord = (%s * %f)%s;\n", lookupMethod, index_multiplier, offset);
+		WRITE(p, "  gl_FragColor = texture2D(pal, vec2(coord, 0.0));\n");
+		WRITE(p, "}\n");
+	} else if (lang == HLSL_DX9) {
+		WRITE(p, "sampler tex: register(s0);\n");
+		WRITE(p, "sampler pal: register(s1);\n");
+		WRITE(p, "float4 main(float2 v_texcoord0 : TEXCOORD0) {\n");
+		WRITE(p, "  float4 index = tex2D(tex, v_texcoord0);\n");
+		WRITE(p, "  float coord = (%s * %f)%s;\n", lookupMethod, index_multiplier, offset);
+		WRITE(p, "  gl_FragColor = tex2D(pal, float2(coord, 0.0));\n");
+		WRITE(p, "}\n");
+	}
+}
+
+void GenerateDepalShader(char *buffer, GEBufferFormat pixelFormat, ShaderLanguage language) {
+	switch (language) {
+	case GLSL_140:
+		GenerateDepalShaderFloat(buffer, pixelFormat, language);
+		break;
+	case GLSL_300:
+		GenerateDepalShader300(buffer, pixelFormat);
+		break;
+	case HLSL_DX9:
+		GenerateDepalShaderFloat(buffer, pixelFormat, language);
+		break;
+	}
+}
+
+#undef WRITE
--- a/GPU/Common/DepalettizeShaderCommon.h
+++ b/GPU/Common/DepalettizeShaderCommon.h
@ -0,0 +1,28 @@
+// Copyright (c) 2014- PPSSPP Project.
+
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, version 2.0 or later versions.
+
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License 2.0 for more details.
+
+// A copy of the GPL 2.0 should have been included with the program.
+// If not, see http://www.gnu.org/licenses/
+
+// Official git repository and contact information can be found at
+// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
+
+#pragma once
+
+#include "GPU/ge_constants.h"
+
+enum ShaderLanguage {
+	GLSL_140,
+	GLSL_300,
+	HLSL_DX9,
+};
+
+void GenerateDepalShader(char *buffer, GEBufferFormat pixelFormat, ShaderLanguage language);
--- a/GPU/GLES/DepalettizeShader.cpp
+++ b/GPU/GLES/DepalettizeShader.cpp
@ -23,6 +23,7 @@
 #include "DepalettizeShader.h"
 #include "GPU/GPUState.h"
 #include "GPU/GLES/TextureCache.h"
+#include "GPU/Common/DepalettizeShaderCommon.h"

 static const int DEPAL_TEXTURE_OLD_AGE = 120;

@ -107,216 +108,6 @@ DepalShaderCache::~DepalShaderCache() {
 	glDeleteShader(vertexShader_);
 }

-#define WRITE p+=sprintf
-
-// Uses integer instructions available since OpenGL 3.0. Suitable for ES 3.0 as well.
-void GenerateDepalShader300(char *buffer, GEBufferFormat pixelFormat) {
-	char *p = buffer;
-#ifdef USING_GLES2
-	WRITE(p, "#version 300 es\n");
-	WRITE(p, "precision mediump float;\n");
-#else
-	WRITE(p, "#version 330\n");
-#endif
-	WRITE(p, "in vec2 v_texcoord0;\n");
-	WRITE(p, "out vec4 fragColor0;\n");
-	WRITE(p, "uniform sampler2D tex;\n");
-	WRITE(p, "uniform sampler2D pal;\n");
-
-	WRITE(p, "void main() {\n");
-	WRITE(p, "  vec4 color = texture(tex, v_texcoord0);\n");
-
-	int mask = gstate.getClutIndexMask();
-	int shift = gstate.getClutIndexShift();
-	int offset = gstate.getClutIndexStartPos();
-	const GEPaletteFormat clutFormat = gstate.getClutPaletteFormat();
-	// Unfortunately sampling turned our texture into floating point. To avoid this, might be able
-	// to declare them as isampler2D objects, but these require integer textures, which needs more work.
-	// Anyhow, we simply work around this by converting back to integer. Hopefully there will be no loss of precision.
-	// Use the mask to skip reading some components.
-	int shiftedMask = mask << shift;
-	switch (pixelFormat) {
-	case GE_FORMAT_8888:
-		if (shiftedMask & 0xFF) WRITE(p, "  int r = int(color.r * 255.99);\n"); else WRITE(p, "  int r = 0;\n");
-		if (shiftedMask & 0xFF00) WRITE(p, "  int g = int(color.g * 255.99);\n"); else WRITE(p, "  int g = 0;\n");
-		if (shiftedMask & 0xFF0000) WRITE(p, "  int b = int(color.b * 255.99);\n"); else WRITE(p, "  int b = 0;\n");
-		if (shiftedMask & 0xFF000000) WRITE(p, "  int a = int(color.a * 255.99);\n"); else WRITE(p, "  int a = 0;\n");
-		WRITE(p, "  int index = (a << 24) | (b << 16) | (g << 8) | (r);\n");
-		break;
-	case GE_FORMAT_4444:
-		if (shiftedMask & 0xF) WRITE(p, "  int r = int(color.r * 15.99);\n"); else WRITE(p, "  int r = 0;\n");
-		if (shiftedMask & 0xF0) WRITE(p, "  int g = int(color.g * 15.99);\n"); else WRITE(p, "  int g = 0;\n");
-		if (shiftedMask & 0xF00) WRITE(p, "  int b = int(color.b * 15.99);\n"); else WRITE(p, "  int b = 0;\n");
-		if (shiftedMask & 0xF000) WRITE(p, "  int a = int(color.a * 15.99);\n"); else WRITE(p, "  int a = 0;\n");
-		WRITE(p, "  int index = (a << 12) | (b << 8) | (g << 4) | (r);\n");
-		break;
-	case GE_FORMAT_565:
-		if (shiftedMask & 0x1F) WRITE(p, "  int r = int(color.r * 31.99);\n"); else WRITE(p, "  int r = 0;\n");
-		if (shiftedMask & 0x7E0) WRITE(p, "  int g = int(color.g * 63.99);\n"); else WRITE(p, "  int g = 0;\n");
-		if (shiftedMask & 0xF800) WRITE(p, "  int b = int(color.b * 31.99);\n"); else WRITE(p, "  int b = 0;\n");
-		WRITE(p, "  int index = (b << 11) | (g << 5) | (r);\n");
-		break;
-	case GE_FORMAT_5551:
-		if (shiftedMask & 0x1F) WRITE(p, "  int r = int(color.r * 31.99);\n"); else WRITE(p, "  int r = 0;\n");
-		if (shiftedMask & 0x3E0) WRITE(p, "  int g = int(color.g * 31.99);\n"); else WRITE(p, "  int g = 0;\n");
-		if (shiftedMask & 0x7C00) WRITE(p, "  int b = int(color.b * 31.99);\n"); else WRITE(p, "  int b = 0;\n");
-		if (shiftedMask & 0x8000) WRITE(p, "  int a = int(color.a);\n"); else WRITE(p, "  int a = 0;\n");
-		WRITE(p, "  int index = (a << 15) | (b << 10) | (g << 5) | (r);\n");
-		break;
-	default:
-		break;
-	}
-
-	float texturePixels = 256;
-	if (clutFormat != GE_CMODE_32BIT_ABGR8888)
-		texturePixels = 512;
-
-	if (shift) {
-		WRITE(p, "  index = ((index >> %i) & 0x%02x)", shift, mask);
-	} else {
-		WRITE(p, "  index = (index & 0x%02x)", mask);
-	}
-	if (offset) {
-		WRITE(p, " | %i;\n", offset);  // '|' matches what we have in gstate.h
-	} else {
-		WRITE(p, ";\n");
-	}
-
-	WRITE(p, "  fragColor0 = texture(pal, vec2((float(index) + 0.5) * (1.0 / %f), 0.0));\n", texturePixels);
-	WRITE(p, "}\n");
-}
-
-// FP only, to suit GL(ES) 2.0
-void GenerateDepalShader100(char *buffer, GEBufferFormat pixelFormat) {
-	char *p = buffer;
-
-	char lookupMethod[128] = "index.r";
-	char offset[128] = "";
-
-	const GEPaletteFormat clutFormat = gstate.getClutPaletteFormat();
-	const u32 clutBase = gstate.getClutIndexStartPos();
-
-	const int shift = gstate.getClutIndexShift();
-	const int mask = gstate.getClutIndexMask();
-
-	float index_multiplier = 1.0f;
-	// pixelformat is the format of the texture we are sampling.
-	bool formatOK = true;
-	switch (pixelFormat) {
-	case GE_FORMAT_8888:
-		if ((mask & (mask + 1)) == 0) {
-			// If the value has all bits contiguous (bitmask check above), we can mod by it + 1.
-			const char *rgba = "rrrrrrrrggggggggbbbbbbbbaaaaaaaa";
-			const u8 rgba_shift = shift & 7;
-			if (rgba_shift == 0 && mask == 0xFF) {
-				sprintf(lookupMethod, "index.%c", rgba[shift]);
-			} else {
-				sprintf(lookupMethod, "mod(index.%c * %f, %d.0)", rgba[shift], 255.99f / (1 << rgba_shift), mask + 1);
-				index_multiplier = 1.0f / 256.0f;
-				// Format was OK if there weren't bits from another component.
-				formatOK = mask <= 255 - (1 << rgba_shift);
-			}
-		} else {
-			formatOK = false;
-		}
-		break;
-	case GE_FORMAT_4444:
-		if ((mask & (mask + 1)) == 0 && shift < 16) {
-			const char *rgba = "rrrrggggbbbbaaaa";
-			const u8 rgba_shift = shift & 3;
-			if (rgba_shift == 0 && mask == 0xF) {
-				sprintf(lookupMethod, "index.%c", rgba[shift]);
-				index_multiplier = 15.0f / 256.0f;
-			} else {
-				// Let's divide and mod to get the right bits.  A common case is shift=0, mask=01.
-				sprintf(lookupMethod, "mod(index.%c * %f, %d.0)", rgba[shift], 15.99f / (1 << rgba_shift), mask + 1);
-				index_multiplier = 1.0f / 256.0f;
-				formatOK = mask <= 15 - (1 << rgba_shift);
-			}
-		} else {
-			formatOK = false;
-		}
-		break;
-	case GE_FORMAT_565:
-		if ((mask & (mask + 1)) == 0 && shift < 16) {
-			const u8 shifts[16] = {0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4};
-			const int multipliers[16] = {31, 31, 31, 31, 31, 63, 63, 63, 63, 63, 63, 31, 31, 31, 31, 31};
-			const char *rgba = "rrrrrggggggbbbbb";
-			const u8 rgba_shift = shifts[shift];
-			if (rgba_shift == 0 && mask == multipliers[shift]) {
-				sprintf(lookupMethod, "index.%c", rgba[shift]);
-				index_multiplier = multipliers[shift] / 256.0f;
-			} else {
-				// We just need to divide the right component by the right value, and then mod against the mask.
-				// A common case is shift=1, mask=0f.
-				sprintf(lookupMethod, "mod(index.%c * %f, %d.0)", rgba[shift], ((float)multipliers[shift] + 0.99f) / (1 << rgba_shift), mask + 1);
-				index_multiplier = 1.0f / 256.0f;
-				formatOK = mask <= multipliers[shift] - (1 << rgba_shift);
-			}
-		} else {
-			formatOK = false;
-		}
-		break;
-	case GE_FORMAT_5551:
-		if ((mask & (mask + 1)) == 0 && shift < 16) {
-			const char *rgba = "rrrrrgggggbbbbba";
-			const u8 rgba_shift = shift % 5;
-			if (rgba_shift == 0 && mask == 0x1F) {
-				sprintf(lookupMethod, "index.%c", rgba[shift]);
-				index_multiplier = 31.0f / 256.0f;
-			} else if (shift == 15 && mask == 1) {
-				sprintf(lookupMethod, "index.%c", rgba[shift]);
-				index_multiplier = 1.0f / 256.0f;
-			} else {
-				// A isn't possible here.
-				sprintf(lookupMethod, "mod(index.%c * %f, %d.0)", rgba[shift], 31.99f / (1 << rgba_shift), mask + 1);
-				index_multiplier = 1.0f / 256.0f;
-				formatOK = mask <= 31 - (1 << rgba_shift);
-			}
-		} else {
-			formatOK = false;
-		}
-		break;
-	default:
-		break;
-	}
-
-	float texturePixels = 256.f;
-	if (clutFormat != GE_CMODE_32BIT_ABGR8888) {
-		texturePixels = 512.f;
-		index_multiplier *= 0.5f;
-	}
-
-	// Adjust index_multiplier, similar to the use of 15.99 instead of 16 in the ES 3 path.
-	// index_multiplier -= 0.01f / texturePixels;
-
-	if (!formatOK) {
-		ERROR_LOG_REPORT_ONCE(depal, G3D, "%i depal unsupported: shift=%i mask=%02x offset=%d", pixelFormat, shift, mask, clutBase);
-	}
-
-	// Offset by half a texel (plus clutBase) to turn NEAREST filtering into FLOOR.
-	float texel_offset = ((float)clutBase + 0.5f) / texturePixels;
-	sprintf(offset, " + %f", texel_offset);
-
-#ifdef USING_GLES2
-	WRITE(p, "#version 100\n");
-	WRITE(p, "precision mediump float;\n");
-#else
-	WRITE(p, "#version 110\n");
-#endif
-	WRITE(p, "varying vec2 v_texcoord0;\n");
-	WRITE(p, "uniform sampler2D tex;\n");
-	WRITE(p, "uniform sampler2D pal;\n");
-	WRITE(p, "void main() {\n");
-	WRITE(p, "  vec4 index = texture2D(tex, v_texcoord0);\n");
-	WRITE(p, "  float coord = (%s * %f)%s;\n", lookupMethod, index_multiplier, offset);
-	WRITE(p, "  gl_FragColor = texture2D(pal, vec2(coord, 0.0));\n");
-	WRITE(p, "}\n");
-}
-
-#undef WRITE
-
-
 u32 DepalShaderCache::GenerateShaderID(GEBufferFormat pixelFormat) {
 	return (gstate.clutformat & 0xFFFFFF) | (pixelFormat << 24);
 }
@ -394,11 +185,7 @@ GLuint DepalShaderCache::GetDepalettizeShader(GEBufferFormat pixelFormat) {

 	char *buffer = new char[2048];

-	if (useGL3_) {
-		GenerateDepalShader300(buffer, pixelFormat);
-	} else {
-		GenerateDepalShader100(buffer, pixelFormat);
-	}
+	GenerateDepalShader(buffer, pixelFormat, useGL3_ ? GLSL_300 : GLSL_140);

 	GLuint fragShader = glCreateShader(GL_FRAGMENT_SHADER);

--- a/GPU/GPU.vcxproj
+++ b/GPU/GPU.vcxproj
@ -181,6 +181,7 @@
  </ItemDefinitionGroup>
  <ItemGroup>
    <ClInclude Include="..\ext\xbrz\xbrz.h" />
+    <ClInclude Include="Common\DepalettizeShaderCommon.h" />
    <ClInclude Include="Common\DrawEngineCommon.h" />
    <ClInclude Include="Common\FramebufferCommon.h" />
    <ClInclude Include="Common\GPUDebugInterface.h" />
@ -239,6 +240,7 @@
  </ItemGroup>
  <ItemGroup>
    <ClCompile Include="..\ext\xbrz\xbrz.cpp" />
+    <ClCompile Include="Common\DepalettizeShaderCommon.cpp" />
    <ClCompile Include="Common\DrawEngineCommon.cpp" />
    <ClCompile Include="Common\FramebufferCommon.cpp" />
    <ClCompile Include="Common\IndexGenerator.cpp" />
--- a/GPU/GPU.vcxproj.filters
+++ b/GPU/GPU.vcxproj.filters
@ -177,6 +177,9 @@
    <ClInclude Include="Common\DrawEngineCommon.h">
      <Filter>Common</Filter>
    </ClInclude>
+    <ClInclude Include="Common\DepalettizeShaderCommon.h">
+      <Filter>Common</Filter>
+    </ClInclude>
  </ItemGroup>
  <ItemGroup>
    <ClCompile Include="Math3D.cpp">
@ -338,6 +341,9 @@
    <ClCompile Include="Directx9\StencilBufferDX9.cpp">
      <Filter>DirectX9</Filter>
    </ClCompile>
+    <ClCompile Include="Common\DepalettizeShaderCommon.cpp">
+      <Filter>Common</Filter>
+    </ClCompile>
  </ItemGroup>
  <ItemGroup>
    <None Include="CMakeLists.txt" />
--- a/Qt/GPU.pro
+++ b/Qt/GPU.pro
@ -41,6 +41,7 @@ SOURCES += $$P/GPU/GeDisasm.cpp \ # GPU
 	$$P/GPU/GLES/VertexShaderGenerator.cpp \
 	$$P/GPU/Software/*.cpp \
 	$$P/GPU/Debugger/*.cpp \
+	$$P/GPU/Common/DepalettizeShaderCommon.cpp \
 	$$P/GPU/Common/IndexGenerator.cpp \
 	$$P/GPU/Common/TextureDecoder.cpp \
 	$$P/GPU/Common/VertexDecoderCommon.cpp \
--- a/android/jni/Android.mk
+++ b/android/jni/Android.mk
@ -148,6 +148,7 @@ EXEC_AND_LIB_FILES := \
  $(SRC)/GPU/GPUCommon.cpp \
  $(SRC)/GPU/GPUState.cpp \
  $(SRC)/GPU/GeDisasm.cpp \
+  $(SRC)/GPU/Common/DepalettizeShaderCommon.cpp \
  $(SRC)/GPU/Common/FramebufferCommon.cpp \
  $(SRC)/GPU/Common/IndexGenerator.cpp.arm \
  $(SRC)/GPU/Common/SoftwareTransformCommon.cpp.arm \