Some shader and vertex format optimizations

This commit is contained in:
Henrik Rydgard 2013-02-05 01:37:00 +01:00
parent 3ee6ff01e1
commit bdc467769e
6 changed files with 93 additions and 49 deletions

View File

@ -513,6 +513,7 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) {
// Throughmode changed, let's make the proj matrix dirty. // Throughmode changed, let's make the proj matrix dirty.
shaderManager_->DirtyUniform(DIRTY_PROJMATRIX); shaderManager_->DirtyUniform(DIRTY_PROJMATRIX);
} }
shaderManager_->DirtyUniform(DIRTY_UVSCALEOFFSET);
// This sets through-mode or not, as well. // This sets through-mode or not, as well.
break; break;

View File

@ -77,7 +77,7 @@ void GenerateFragmentShader(char *buffer)
#if defined(GLSL_ES_1_0) #if defined(GLSL_ES_1_0)
WRITE(p, "precision mediump float;\n"); WRITE(p, "precision lowp float;\n");
#elif !defined(FORCE_OPENGL_2_0) #elif !defined(FORCE_OPENGL_2_0)
WRITE(p, "#version 110\n"); WRITE(p, "#version 110\n");
#endif #endif
@ -86,30 +86,38 @@ void GenerateFragmentShader(char *buffer)
int doTexture = (gstate.textureMapEnable & 1) && !(gstate.clearmode & 1); int doTexture = (gstate.textureMapEnable & 1) && !(gstate.clearmode & 1);
bool enableFog = gstate.isFogEnabled() && !gstate.isModeThrough() && !gstate.isModeClear(); bool enableFog = gstate.isFogEnabled() && !gstate.isModeThrough() && !gstate.isModeClear();
bool enableAlphaTest = (gstate.alphaTestEnable & 1) && !gstate.isModeClear();
bool enableColorTest = (gstate.colorTestEnable & 1) && !gstate.isModeClear();
if (doTexture) if (doTexture)
WRITE(p, "uniform sampler2D tex;\n"); WRITE(p, "uniform sampler2D tex;\n");
if ((gstate.alphaTestEnable & 1) || (gstate.colorTestEnable & 1)) { if (enableAlphaTest || enableColorTest) {
WRITE(p, "uniform vec4 u_alphacolorref;\n"); WRITE(p, "uniform vec4 u_alphacolorref;\n");
} }
WRITE(p, "uniform vec3 u_texenv;\n"); if (gstate.textureMapEnable & 1) {
WRITE(p, "uniform vec3 u_texenv;\n");
}
WRITE(p, "varying vec4 v_color0;\n"); WRITE(p, "varying vec4 v_color0;\n");
if (lmode) if (lmode)
WRITE(p, "varying vec3 v_color1;\n"); WRITE(p, "varying vec3 v_color1;\n");
if (enableFog) { if (enableFog) {
WRITE(p, "uniform vec3 u_fogcolor;\n"); WRITE(p, "uniform vec3 u_fogcolor;\n");
#if defined(GLSL_ES_1_0)
WRITE(p, "varying mediump float v_fogdepth;\n");
#else
WRITE(p, "varying float v_fogdepth;\n"); WRITE(p, "varying float v_fogdepth;\n");
#endif
} }
if (doTexture) if (doTexture)
WRITE(p, "varying vec2 v_texcoord;\n"); WRITE(p, "varying vec2 v_texcoord;\n");
WRITE(p, "void main() {\n"); WRITE(p, "void main() {\n");
WRITE(p, " vec4 v;\n");
if (gstate.clearmode & 1) if (gstate.clearmode & 1)
{ {
// Clear mode does not allow any fancy shading. // Clear mode does not allow any fancy shading.
WRITE(p, " v = v_color0;\n"); WRITE(p, " gl_FragColor = v_color0;\n");
} }
else else
{ {
@ -119,59 +127,58 @@ void GenerateFragmentShader(char *buffer)
WRITE(p, " vec4 s = vec4(v_color1, 0.0);\n"); WRITE(p, " vec4 s = vec4(v_color1, 0.0);\n");
secondary = " + s"; secondary = " + s";
} else { } else {
WRITE(p, " vec4 s = vec4(0.0, 0.0, 0.0, 0.0);\n");
secondary = ""; secondary = "";
} }
if (gstate.textureMapEnable & 1) { if (gstate.textureMapEnable & 1) {
WRITE(p, " vec4 t = texture2D(tex, v_texcoord);\n"); WRITE(p, " vec4 t = texture2D(tex, v_texcoord);\n");
WRITE(p, " vec4 p = clamp(v_color0, 0.0, 1.0);\n"); WRITE(p, " vec4 p = v_color0;\n");
if (gstate.texfunc & 0x100) { // texfmt == RGBA if (gstate.texfunc & 0x100) { // texfmt == RGBA
switch (gstate.texfunc & 0x7) { switch (gstate.texfunc & 0x7) {
case GE_TEXFUNC_MODULATE: case GE_TEXFUNC_MODULATE:
WRITE(p, " v = t * p%s;\n", secondary); break; WRITE(p, " vec4 v = t * p%s;\n", secondary); break;
case GE_TEXFUNC_DECAL: case GE_TEXFUNC_DECAL:
WRITE(p, " v = vec4(1.0 - t.a * p.rgb + t.a * u_texenv.rgb, p.a)%s;\n", secondary); break; WRITE(p, " vec4 v = vec4(1.0 - t.a * p.rgb + t.a * u_texenv.rgb, p.a)%s;\n", secondary); break;
case GE_TEXFUNC_BLEND: case GE_TEXFUNC_BLEND:
WRITE(p, " v = vec4((1.0 - t.rgb) * p.rgb + t.rgb * u_texenv.rgb, p.a * t.a)%s;\n", secondary); break; WRITE(p, " vec4 v = vec4((1.0 - t.rgb) * p.rgb + t.rgb * u_texenv.rgb, p.a * t.a)%s;\n", secondary); break;
case GE_TEXFUNC_REPLACE: case GE_TEXFUNC_REPLACE:
WRITE(p, " v = t%s;\n", secondary); break; WRITE(p, " vec4 v = t%s;\n", secondary); break;
case GE_TEXFUNC_ADD: case GE_TEXFUNC_ADD:
WRITE(p, " v = vec4(t.rgb + p.rgb, p.a * t.a)%s;\n", secondary); break; WRITE(p, " vec4 v = vec4(t.rgb + p.rgb, p.a * t.a)%s;\n", secondary); break;
default: default:
WRITE(p, " v = p;\n"); break; WRITE(p, " vec4 v = p;\n"); break;
} }
} else { // texfmt == RGB } else { // texfmt == RGB
switch (gstate.texfunc & 0x7) { switch (gstate.texfunc & 0x7) {
case GE_TEXFUNC_MODULATE: case GE_TEXFUNC_MODULATE:
WRITE(p, " v = vec4(t.rgb * p.rgb, p.a)%s;\n", secondary); break; WRITE(p, " vec4 v = vec4(t.rgb * p.rgb, p.a)%s;\n", secondary); break;
case GE_TEXFUNC_DECAL: case GE_TEXFUNC_DECAL:
WRITE(p, " v = vec4(t.rgb, p.a)%s;\n", secondary); break; WRITE(p, " vec4 v = vec4(t.rgb, p.a)%s;\n", secondary); break;
case GE_TEXFUNC_BLEND: case GE_TEXFUNC_BLEND:
WRITE(p, " v = vec4(1.0 - t.rgb) * p.rgb + t.rgb * u_texenv.rgb, p.a)%s;\n", secondary); break; WRITE(p, " vec4 v = vec4(1.0 - t.rgb) * p.rgb + t.rgb * u_texenv.rgb, p.a)%s;\n", secondary); break;
case GE_TEXFUNC_REPLACE: case GE_TEXFUNC_REPLACE:
WRITE(p, " v = vec4(t.rgb, p.a)%s;\n", secondary); break; WRITE(p, " vec4 v = vec4(t.rgb, p.a)%s;\n", secondary); break;
case GE_TEXFUNC_ADD: case GE_TEXFUNC_ADD:
WRITE(p, " v = vec4(t.rgb + p.rgb, p.a)%s;\n", secondary); break; WRITE(p, " vec4 v = vec4(t.rgb + p.rgb, p.a)%s;\n", secondary); break;
default: default:
WRITE(p, " v = p;\n"); break; WRITE(p, " vec4 v = p;\n"); break;
} }
} }
} else { } else {
// No texture mapping // No texture mapping
WRITE(p, " v = clamp(v_color0, 0.0, 1.0)%s;\n", secondary); WRITE(p, " vec4 v = v_color0 %s;\n", secondary);
} }
// Color doubling // Color doubling
if (gstate.texfunc & 0x10000) { if (gstate.texfunc & 0x10000) {
WRITE(p, " v = v * vec4(2.0, 2.0, 2.0, 2.0);"); WRITE(p, " v = v * 2.0;\n");
} }
if (gstate.alphaTestEnable & 1) { if (enableAlphaTest) {
int alphaTestFunc = gstate.alphatest & 7; int alphaTestFunc = gstate.alphatest & 7;
const char *alphaTestFuncs[] = { "#", "#", " == ", " != ", " < ", " <= ", " > ", " >= " }; // never/always don't make sense const char *alphaTestFuncs[] = { "#", "#", " == ", " != ", " < ", " <= ", " > ", " >= " }; // never/always don't make sense
if (alphaTestFuncs[alphaTestFunc][0] != '#') if (alphaTestFuncs[alphaTestFunc][0] != '#')
WRITE(p, "if (!(v.a %s u_alphacolorref.a)) discard;", alphaTestFuncs[alphaTestFunc]); WRITE(p, " if (!(v.a %s u_alphacolorref.a)) discard;\n", alphaTestFuncs[alphaTestFunc]);
} }
// Disabled for now until we actually find a need for it. // Disabled for now until we actually find a need for it.
@ -187,20 +194,20 @@ void GenerateFragmentShader(char *buffer)
if (enableFog) { if (enableFog) {
WRITE(p, " float fogCoef = clamp(v_fogdepth, 0.0, 1.0);\n"); WRITE(p, " float fogCoef = clamp(v_fogdepth, 0.0, 1.0);\n");
WRITE(p, " v = mix(vec4(u_fogcolor, v.a), v, fogCoef);\n"); WRITE(p, " gl_FragColor = mix(vec4(u_fogcolor, v.a), v, fogCoef);\n");
// WRITE(p, " v.x = v_depth;\n"); // WRITE(p, " v.x = v_depth;\n");
} else {
WRITE(p, " gl_FragColor = v;\n");
} }
} }
#ifdef DEBUG_SHADER #ifdef DEBUG_SHADER
if (doTexture) { if (doTexture) {
WRITE(p, " v = texture2D(tex, v_texcoord);\n"); WRITE(p, " gl_FragColor = texture2D(tex, v_texcoord);\n");
} else { } else {
WRITE(p, " v = vec4(1,0,1,1);\n"); WRITE(p, " gl_FragColor = vec4(1,0,1,1);\n");
} }
#endif #endif
WRITE(p, " gl_FragColor = v;\n");
WRITE(p, "}\n"); WRITE(p, "}\n");
} }

View File

@ -39,7 +39,7 @@ Shader::Shader(const char *code, uint32_t shaderType) {
OutputDebugString(code); OutputDebugString(code);
#endif #endif
shader = glCreateShader(shaderType); shader = glCreateShader(shaderType);
glShaderSource(shader, 1, &code, 0); glShaderSource(shader, 1, &code, 0);
glCompileShader(shader); glCompileShader(shader);
GLint success; GLint success;
glGetShaderiv(shader, GL_COMPILE_STATUS, &success); glGetShaderiv(shader, GL_COMPILE_STATUS, &success);
@ -264,7 +264,13 @@ void LinkedShader::updateUniforms() {
// Texturing // Texturing
if (u_uvscaleoffset != -1 && (dirtyUniforms & DIRTY_UVSCALEOFFSET)) { if (u_uvscaleoffset != -1 && (dirtyUniforms & DIRTY_UVSCALEOFFSET)) {
const float uvscaleoff[4] = { gstate_c.uScale, gstate_c.vScale, gstate_c.uOff, gstate_c.vOff}; float uvscaleoff[4] = { gstate_c.uScale, gstate_c.vScale, gstate_c.uOff, gstate_c.vOff};
if (gstate.isModeThrough()) {
uvscaleoff[0] /= gstate_c.curTextureWidth;
uvscaleoff[1] /= gstate_c.curTextureHeight;
uvscaleoff[2] /= gstate_c.curTextureWidth;
uvscaleoff[3] /= gstate_c.curTextureHeight;
}
glUniform4fv(u_uvscaleoffset, 1, uvscaleoff); glUniform4fv(u_uvscaleoffset, 1, uvscaleoff);
} }

View File

@ -315,6 +315,8 @@ static const GlTypeInfo GLComp[] = {
{GL_UNSIGNED_BYTE, 2, GL_TRUE},// DEC_U8_2, {GL_UNSIGNED_BYTE, 2, GL_TRUE},// DEC_U8_2,
{GL_UNSIGNED_BYTE, 3, GL_TRUE},// DEC_U8_3, {GL_UNSIGNED_BYTE, 3, GL_TRUE},// DEC_U8_3,
{GL_UNSIGNED_BYTE, 4, GL_TRUE},// DEC_U8_4, {GL_UNSIGNED_BYTE, 4, GL_TRUE},// DEC_U8_4,
{GL_UNSIGNED_SHORT, 2, GL_TRUE},// DEC_U16_2,
{GL_UNSIGNED_SHORT, 2, GL_FALSE},// DEC_U16A_2,
}; };
static inline void VertexAttribSetup(int attrib, int fmt, int stride, u8 *ptr) { static inline void VertexAttribSetup(int attrib, int fmt, int stride, u8 *ptr) {
@ -399,6 +401,13 @@ void TransformDrawEngine::SoftwareTransformAndDraw(
vertexCount = 0x10000/3; vertexCount = 0x10000/3;
#endif #endif
float uscale = 1.0f;
float vscale = 1.0f;
if (throughmode) {
uscale /= gstate_c.curTextureWidth;
vscale /= gstate_c.curTextureHeight;
}
Lighter lighter; Lighter lighter;
float fog_end = getFloat24(gstate.fog1); float fog_end = getFloat24(gstate.fog1);
float fog_slope = getFloat24(gstate.fog2); float fog_slope = getFloat24(gstate.fog2);
@ -430,6 +439,9 @@ void TransformDrawEngine::SoftwareTransformAndDraw(
if (reader.hasUV()) { if (reader.hasUV()) {
reader.ReadUV(uv); reader.ReadUV(uv);
uv[0] *= uscale;
uv[1] *= vscale;
} }
fogCoef = 1.0f; fogCoef = 1.0f;
// Scale UV? // Scale UV?
@ -529,8 +541,8 @@ void TransformDrawEngine::SoftwareTransformAndDraw(
{ {
case 0: // UV mapping case 0: // UV mapping
// Texture scale/offset is only performed in this mode. // Texture scale/offset is only performed in this mode.
uv[0] = ruv[0]*gstate_c.uScale + gstate_c.uOff; uv[0] = uscale * (ruv[0]*gstate_c.uScale + gstate_c.uOff);
uv[1] = ruv[1]*gstate_c.vScale + gstate_c.vOff; uv[1] = vscale * (ruv[1]*gstate_c.vScale + gstate_c.vOff);
break; break;
case 1: case 1:
{ {
@ -580,8 +592,12 @@ void TransformDrawEngine::SoftwareTransformAndDraw(
memcpy(&transformed[index].x, v, 3 * sizeof(float)); memcpy(&transformed[index].x, v, 3 * sizeof(float));
transformed[index].fog = fogCoef; transformed[index].fog = fogCoef;
memcpy(&transformed[index].u, uv, 2 * sizeof(float)); memcpy(&transformed[index].u, uv, 2 * sizeof(float));
memcpy(&transformed[index].color0, c0, 4 * sizeof(float)); for (int i = 0; i < 4; i++) {
memcpy(&transformed[index].color1, c1, 3 * sizeof(float)); transformed[index].color0[i] = c0[i] * 255.0f;
}
for (int i = 0; i < 4; i++) {
transformed[index].color1[i] = c1[i] * 255.0f;
}
} }
// Step 2: expand rectangles. // Step 2: expand rectangles.
@ -674,8 +690,8 @@ void TransformDrawEngine::SoftwareTransformAndDraw(
} }
glVertexAttribPointer(program->a_position, 4, GL_FLOAT, GL_FALSE, vertexSize, drawBuffer); glVertexAttribPointer(program->a_position, 4, GL_FLOAT, GL_FALSE, vertexSize, drawBuffer);
if (program->a_texcoord != -1) glVertexAttribPointer(program->a_texcoord, 2, GL_FLOAT, GL_FALSE, vertexSize, ((uint8_t*)drawBuffer) + 4 * 4); if (program->a_texcoord != -1) glVertexAttribPointer(program->a_texcoord, 2, GL_FLOAT, GL_FALSE, vertexSize, ((uint8_t*)drawBuffer) + 4 * 4);
if (program->a_color0 != -1) glVertexAttribPointer(program->a_color0, 4, GL_FLOAT, GL_FALSE, vertexSize, ((uint8_t*)drawBuffer) + 6 * 4); if (program->a_color0 != -1) glVertexAttribPointer(program->a_color0, 4, GL_UNSIGNED_BYTE, GL_TRUE, vertexSize, ((uint8_t*)drawBuffer) + 6 * 4);
if (program->a_color1 != -1) glVertexAttribPointer(program->a_color1, 3, GL_FLOAT, GL_FALSE, vertexSize, ((uint8_t*)drawBuffer) + 10 * 4); if (program->a_color1 != -1) glVertexAttribPointer(program->a_color1, 3, GL_UNSIGNED_BYTE, GL_TRUE, vertexSize, ((uint8_t*)drawBuffer) + 7 * 4);
if (drawIndexed) { if (drawIndexed) {
if (useVBO) { if (useVBO) {
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, ebo_[curVbo_]); glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, ebo_[curVbo_]);

View File

@ -73,6 +73,8 @@ int DecFmtSize(u8 fmt) {
case DEC_U8_2: return 4; case DEC_U8_2: return 4;
case DEC_U8_3: return 4; case DEC_U8_3: return 4;
case DEC_U8_4: return 4; case DEC_U8_4: return 4;
case DEC_U16_2: return 4;
case DEC_U16A_2: return 4;
default: default:
return 0; return 0;
} }
@ -154,10 +156,10 @@ void VertexDecoder::Step_TcU16() const
void VertexDecoder::Step_TcU16Through() const void VertexDecoder::Step_TcU16Through() const
{ {
float *uv = (float *)(decoded_ + decFmt.uvoff); u16 *uv = (u16 *)(decoded_ + decFmt.uvoff);
const u16 *uvdata = (const u16*)(ptr_ + tcoff); const u16 *uvdata = (const u16*)(ptr_ + tcoff);
uv[0] = (float)uvdata[0] / (float)(gstate_c.curTextureWidth); uv[0] = uvdata[0];
uv[1] = (float)uvdata[1] / (float)(gstate_c.curTextureHeight); uv[1] = uvdata[1];
} }
void VertexDecoder::Step_TcFloat() const void VertexDecoder::Step_TcFloat() const
@ -171,8 +173,8 @@ void VertexDecoder::Step_TcFloatThrough() const
{ {
float *uv = (float *)(decoded_ + decFmt.uvoff); float *uv = (float *)(decoded_ + decFmt.uvoff);
const float *uvdata = (const float*)(ptr_ + tcoff); const float *uvdata = (const float*)(ptr_ + tcoff);
uv[0] = uvdata[0] / (float)(gstate_c.curTextureWidth); uv[0] = uvdata[0];
uv[1] = uvdata[1] / (float)(gstate_c.curTextureHeight); uv[1] = uvdata[1];
} }
void VertexDecoder::Step_Color565() const void VertexDecoder::Step_Color565() const
@ -580,8 +582,11 @@ void VertexDecoder::SetVertexType(u32 fmt) {
steps_[numSteps_++] = throughmode ? tcstep_through[tc] : tcstep[tc]; steps_[numSteps_++] = throughmode ? tcstep_through[tc] : tcstep[tc];
// All UV decode to DEC_FLOAT2 currently. // All UV except through mode decode to DEC_FLOAT2 currently.
decFmt.uvfmt = DEC_FLOAT_2; if (throughmode && (tc == (GE_VTYPE_TC_16BIT >> GE_VTYPE_TC_SHIFT)))
decFmt.uvfmt = DEC_U16A_2;
else
decFmt.uvfmt = DEC_FLOAT_2;
decFmt.uvoff = decOff; decFmt.uvoff = decOff;
decOff += DecFmtSize(decFmt.uvfmt); decOff += DecFmtSize(decFmt.uvfmt);
} }

View File

@ -38,6 +38,8 @@ enum {
DEC_U8_2, DEC_U8_2,
DEC_U8_3, DEC_U8_3,
DEC_U8_4, DEC_U8_4,
DEC_U16_2,
DEC_U16A_2,
}; };
int DecFmtSize(u8 fmt); int DecFmtSize(u8 fmt);
@ -58,8 +60,8 @@ struct TransformedVertex
{ {
float x, y, z, fog; // in case of morph, preblend during decode float x, y, z, fog; // in case of morph, preblend during decode
float u; float v; // scaled by uscale, vscale, if there float u; float v; // scaled by uscale, vscale, if there
float color0[4]; // prelit u8 color0[4]; // prelit
float color1[3]; // prelit u8 color1[3]; // prelit
}; };
DecVtxFormat GetTransformedVtxFormat(const DecVtxFormat &fmt); DecVtxFormat GetTransformedVtxFormat(const DecVtxFormat &fmt);
@ -189,14 +191,14 @@ public:
break; break;
case DEC_S16_3: case DEC_S16_3:
{ {
s16 *p = (s16 *)(data_ + decFmt_.posoff); const s16 *p = (s16 *)(data_ + decFmt_.posoff);
for (int i = 0; i < 3; i++) for (int i = 0; i < 3; i++)
pos[i] = p[i] / 32767.0f; pos[i] = p[i] / 32767.0f;
} }
break; break;
case DEC_S8_3: case DEC_S8_3:
{ {
s8 *p = (s8 *)(data_ + decFmt_.posoff); const s8 *p = (s8 *)(data_ + decFmt_.posoff);
for (int i = 0; i < 3; i++) for (int i = 0; i < 3; i++)
pos[i] = p[i] / 127.0f; pos[i] = p[i] / 127.0f;
} }
@ -214,14 +216,14 @@ public:
break; break;
case DEC_S16_3: case DEC_S16_3:
{ {
s16 *p = (s16 *)(data_ + decFmt_.nrmoff); const s16 *p = (s16 *)(data_ + decFmt_.nrmoff);
for (int i = 0; i < 3; i++) for (int i = 0; i < 3; i++)
nrm[i] = p[i] / 32767.0f; nrm[i] = p[i] / 32767.0f;
} }
break; break;
case DEC_S8_3: case DEC_S8_3:
{ {
s8 *p = (s8 *)(data_ + decFmt_.nrmoff); const s8 *p = (s8 *)(data_ + decFmt_.nrmoff);
for (int i = 0; i < 3; i++) for (int i = 0; i < 3; i++)
nrm[i] = p[i] / 127.0f; nrm[i] = p[i] / 127.0f;
} }
@ -236,6 +238,13 @@ public:
switch (decFmt_.uvfmt) { switch (decFmt_.uvfmt) {
case DEC_FLOAT_2: case DEC_FLOAT_2:
memcpy(uv, data_ + decFmt_.uvoff, 8); break; memcpy(uv, data_ + decFmt_.uvoff, 8); break;
case DEC_U16A_2:
{
const u16 *p = (const u16 *)(data_ + decFmt_.uvoff);
uv[0] = (float)p[0];
uv[1] = (float)p[1];
}
break;
default: default:
ERROR_LOG(G3D, "Reader: Unsupported UV Format"); ERROR_LOG(G3D, "Reader: Unsupported UV Format");
break; break;