Some shader and vertex format optimizations

This commit is contained in:
Henrik Rydgard 2013-02-05 01:37:00 +01:00
parent 3ee6ff01e1
commit bdc467769e
6 changed files with 93 additions and 49 deletions

View File

@ -513,6 +513,7 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) {
// Throughmode changed, let's make the proj matrix dirty.
shaderManager_->DirtyUniform(DIRTY_PROJMATRIX);
}
shaderManager_->DirtyUniform(DIRTY_UVSCALEOFFSET);
// This sets through-mode or not, as well.
break;

View File

@ -77,7 +77,7 @@ void GenerateFragmentShader(char *buffer)
#if defined(GLSL_ES_1_0)
WRITE(p, "precision mediump float;\n");
WRITE(p, "precision lowp float;\n");
#elif !defined(FORCE_OPENGL_2_0)
WRITE(p, "#version 110\n");
#endif
@ -86,30 +86,38 @@ void GenerateFragmentShader(char *buffer)
int doTexture = (gstate.textureMapEnable & 1) && !(gstate.clearmode & 1);
bool enableFog = gstate.isFogEnabled() && !gstate.isModeThrough() && !gstate.isModeClear();
bool enableAlphaTest = (gstate.alphaTestEnable & 1) && !gstate.isModeClear();
bool enableColorTest = (gstate.colorTestEnable & 1) && !gstate.isModeClear();
if (doTexture)
WRITE(p, "uniform sampler2D tex;\n");
if ((gstate.alphaTestEnable & 1) || (gstate.colorTestEnable & 1)) {
if (enableAlphaTest || enableColorTest) {
WRITE(p, "uniform vec4 u_alphacolorref;\n");
}
WRITE(p, "uniform vec3 u_texenv;\n");
if (gstate.textureMapEnable & 1) {
WRITE(p, "uniform vec3 u_texenv;\n");
}
WRITE(p, "varying vec4 v_color0;\n");
if (lmode)
WRITE(p, "varying vec3 v_color1;\n");
if (enableFog) {
WRITE(p, "uniform vec3 u_fogcolor;\n");
#if defined(GLSL_ES_1_0)
WRITE(p, "varying mediump float v_fogdepth;\n");
#else
WRITE(p, "varying float v_fogdepth;\n");
#endif
}
if (doTexture)
WRITE(p, "varying vec2 v_texcoord;\n");
WRITE(p, "void main() {\n");
WRITE(p, " vec4 v;\n");
if (gstate.clearmode & 1)
{
// Clear mode does not allow any fancy shading.
WRITE(p, " v = v_color0;\n");
WRITE(p, " gl_FragColor = v_color0;\n");
}
else
{
@ -119,59 +127,58 @@ void GenerateFragmentShader(char *buffer)
WRITE(p, " vec4 s = vec4(v_color1, 0.0);\n");
secondary = " + s";
} else {
WRITE(p, " vec4 s = vec4(0.0, 0.0, 0.0, 0.0);\n");
secondary = "";
}
if (gstate.textureMapEnable & 1) {
WRITE(p, " vec4 t = texture2D(tex, v_texcoord);\n");
WRITE(p, " vec4 p = clamp(v_color0, 0.0, 1.0);\n");
WRITE(p, " vec4 p = v_color0;\n");
if (gstate.texfunc & 0x100) { // texfmt == RGBA
switch (gstate.texfunc & 0x7) {
case GE_TEXFUNC_MODULATE:
WRITE(p, " v = t * p%s;\n", secondary); break;
WRITE(p, " vec4 v = t * p%s;\n", secondary); break;
case GE_TEXFUNC_DECAL:
WRITE(p, " v = vec4(1.0 - t.a * p.rgb + t.a * u_texenv.rgb, p.a)%s;\n", secondary); break;
WRITE(p, " vec4 v = vec4(1.0 - t.a * p.rgb + t.a * u_texenv.rgb, p.a)%s;\n", secondary); break;
case GE_TEXFUNC_BLEND:
WRITE(p, " v = vec4((1.0 - t.rgb) * p.rgb + t.rgb * u_texenv.rgb, p.a * t.a)%s;\n", secondary); break;
WRITE(p, " vec4 v = vec4((1.0 - t.rgb) * p.rgb + t.rgb * u_texenv.rgb, p.a * t.a)%s;\n", secondary); break;
case GE_TEXFUNC_REPLACE:
WRITE(p, " v = t%s;\n", secondary); break;
WRITE(p, " vec4 v = t%s;\n", secondary); break;
case GE_TEXFUNC_ADD:
WRITE(p, " v = vec4(t.rgb + p.rgb, p.a * t.a)%s;\n", secondary); break;
WRITE(p, " vec4 v = vec4(t.rgb + p.rgb, p.a * t.a)%s;\n", secondary); break;
default:
WRITE(p, " v = p;\n"); break;
WRITE(p, " vec4 v = p;\n"); break;
}
} else { // texfmt == RGB
switch (gstate.texfunc & 0x7) {
case GE_TEXFUNC_MODULATE:
WRITE(p, " v = vec4(t.rgb * p.rgb, p.a)%s;\n", secondary); break;
WRITE(p, " vec4 v = vec4(t.rgb * p.rgb, p.a)%s;\n", secondary); break;
case GE_TEXFUNC_DECAL:
WRITE(p, " v = vec4(t.rgb, p.a)%s;\n", secondary); break;
WRITE(p, " vec4 v = vec4(t.rgb, p.a)%s;\n", secondary); break;
case GE_TEXFUNC_BLEND:
WRITE(p, " v = vec4(1.0 - t.rgb) * p.rgb + t.rgb * u_texenv.rgb, p.a)%s;\n", secondary); break;
WRITE(p, " vec4 v = vec4(1.0 - t.rgb) * p.rgb + t.rgb * u_texenv.rgb, p.a)%s;\n", secondary); break;
case GE_TEXFUNC_REPLACE:
WRITE(p, " v = vec4(t.rgb, p.a)%s;\n", secondary); break;
WRITE(p, " vec4 v = vec4(t.rgb, p.a)%s;\n", secondary); break;
case GE_TEXFUNC_ADD:
WRITE(p, " v = vec4(t.rgb + p.rgb, p.a)%s;\n", secondary); break;
WRITE(p, " vec4 v = vec4(t.rgb + p.rgb, p.a)%s;\n", secondary); break;
default:
WRITE(p, " v = p;\n"); break;
WRITE(p, " vec4 v = p;\n"); break;
}
}
} else {
// No texture mapping
WRITE(p, " v = clamp(v_color0, 0.0, 1.0)%s;\n", secondary);
WRITE(p, " vec4 v = v_color0 %s;\n", secondary);
}
// Color doubling
if (gstate.texfunc & 0x10000) {
WRITE(p, " v = v * vec4(2.0, 2.0, 2.0, 2.0);");
WRITE(p, " v = v * 2.0;\n");
}
if (gstate.alphaTestEnable & 1) {
if (enableAlphaTest) {
int alphaTestFunc = gstate.alphatest & 7;
const char *alphaTestFuncs[] = { "#", "#", " == ", " != ", " < ", " <= ", " > ", " >= " }; // never/always don't make sense
if (alphaTestFuncs[alphaTestFunc][0] != '#')
WRITE(p, "if (!(v.a %s u_alphacolorref.a)) discard;", alphaTestFuncs[alphaTestFunc]);
WRITE(p, " if (!(v.a %s u_alphacolorref.a)) discard;\n", alphaTestFuncs[alphaTestFunc]);
}
// Disabled for now until we actually find a need for it.
@ -187,20 +194,20 @@ void GenerateFragmentShader(char *buffer)
if (enableFog) {
WRITE(p, " float fogCoef = clamp(v_fogdepth, 0.0, 1.0);\n");
WRITE(p, " v = mix(vec4(u_fogcolor, v.a), v, fogCoef);\n");
WRITE(p, " gl_FragColor = mix(vec4(u_fogcolor, v.a), v, fogCoef);\n");
// WRITE(p, " v.x = v_depth;\n");
} else {
WRITE(p, " gl_FragColor = v;\n");
}
}
#ifdef DEBUG_SHADER
if (doTexture) {
WRITE(p, " v = texture2D(tex, v_texcoord);\n");
WRITE(p, " gl_FragColor = texture2D(tex, v_texcoord);\n");
} else {
WRITE(p, " v = vec4(1,0,1,1);\n");
WRITE(p, " gl_FragColor = vec4(1,0,1,1);\n");
}
#endif
WRITE(p, " gl_FragColor = v;\n");
WRITE(p, "}\n");
}

View File

@ -39,7 +39,7 @@ Shader::Shader(const char *code, uint32_t shaderType) {
OutputDebugString(code);
#endif
shader = glCreateShader(shaderType);
glShaderSource(shader, 1, &code, 0);
glShaderSource(shader, 1, &code, 0);
glCompileShader(shader);
GLint success;
glGetShaderiv(shader, GL_COMPILE_STATUS, &success);
@ -264,7 +264,13 @@ void LinkedShader::updateUniforms() {
// Texturing
if (u_uvscaleoffset != -1 && (dirtyUniforms & DIRTY_UVSCALEOFFSET)) {
const float uvscaleoff[4] = { gstate_c.uScale, gstate_c.vScale, gstate_c.uOff, gstate_c.vOff};
float uvscaleoff[4] = { gstate_c.uScale, gstate_c.vScale, gstate_c.uOff, gstate_c.vOff};
if (gstate.isModeThrough()) {
uvscaleoff[0] /= gstate_c.curTextureWidth;
uvscaleoff[1] /= gstate_c.curTextureHeight;
uvscaleoff[2] /= gstate_c.curTextureWidth;
uvscaleoff[3] /= gstate_c.curTextureHeight;
}
glUniform4fv(u_uvscaleoffset, 1, uvscaleoff);
}

View File

@ -315,6 +315,8 @@ static const GlTypeInfo GLComp[] = {
{GL_UNSIGNED_BYTE, 2, GL_TRUE},// DEC_U8_2,
{GL_UNSIGNED_BYTE, 3, GL_TRUE},// DEC_U8_3,
{GL_UNSIGNED_BYTE, 4, GL_TRUE},// DEC_U8_4,
{GL_UNSIGNED_SHORT, 2, GL_TRUE},// DEC_U16_2,
{GL_UNSIGNED_SHORT, 2, GL_FALSE},// DEC_U16A_2,
};
static inline void VertexAttribSetup(int attrib, int fmt, int stride, u8 *ptr) {
@ -399,6 +401,13 @@ void TransformDrawEngine::SoftwareTransformAndDraw(
vertexCount = 0x10000/3;
#endif
float uscale = 1.0f;
float vscale = 1.0f;
if (throughmode) {
uscale /= gstate_c.curTextureWidth;
vscale /= gstate_c.curTextureHeight;
}
Lighter lighter;
float fog_end = getFloat24(gstate.fog1);
float fog_slope = getFloat24(gstate.fog2);
@ -430,6 +439,9 @@ void TransformDrawEngine::SoftwareTransformAndDraw(
if (reader.hasUV()) {
reader.ReadUV(uv);
uv[0] *= uscale;
uv[1] *= vscale;
}
fogCoef = 1.0f;
// Scale UV?
@ -529,8 +541,8 @@ void TransformDrawEngine::SoftwareTransformAndDraw(
{
case 0: // UV mapping
// Texture scale/offset is only performed in this mode.
uv[0] = ruv[0]*gstate_c.uScale + gstate_c.uOff;
uv[1] = ruv[1]*gstate_c.vScale + gstate_c.vOff;
uv[0] = uscale * (ruv[0]*gstate_c.uScale + gstate_c.uOff);
uv[1] = vscale * (ruv[1]*gstate_c.vScale + gstate_c.vOff);
break;
case 1:
{
@ -580,8 +592,12 @@ void TransformDrawEngine::SoftwareTransformAndDraw(
memcpy(&transformed[index].x, v, 3 * sizeof(float));
transformed[index].fog = fogCoef;
memcpy(&transformed[index].u, uv, 2 * sizeof(float));
memcpy(&transformed[index].color0, c0, 4 * sizeof(float));
memcpy(&transformed[index].color1, c1, 3 * sizeof(float));
for (int i = 0; i < 4; i++) {
transformed[index].color0[i] = c0[i] * 255.0f;
}
for (int i = 0; i < 4; i++) {
transformed[index].color1[i] = c1[i] * 255.0f;
}
}
// Step 2: expand rectangles.
@ -674,8 +690,8 @@ void TransformDrawEngine::SoftwareTransformAndDraw(
}
glVertexAttribPointer(program->a_position, 4, GL_FLOAT, GL_FALSE, vertexSize, drawBuffer);
if (program->a_texcoord != -1) glVertexAttribPointer(program->a_texcoord, 2, GL_FLOAT, GL_FALSE, vertexSize, ((uint8_t*)drawBuffer) + 4 * 4);
if (program->a_color0 != -1) glVertexAttribPointer(program->a_color0, 4, GL_FLOAT, GL_FALSE, vertexSize, ((uint8_t*)drawBuffer) + 6 * 4);
if (program->a_color1 != -1) glVertexAttribPointer(program->a_color1, 3, GL_FLOAT, GL_FALSE, vertexSize, ((uint8_t*)drawBuffer) + 10 * 4);
if (program->a_color0 != -1) glVertexAttribPointer(program->a_color0, 4, GL_UNSIGNED_BYTE, GL_TRUE, vertexSize, ((uint8_t*)drawBuffer) + 6 * 4);
if (program->a_color1 != -1) glVertexAttribPointer(program->a_color1, 3, GL_UNSIGNED_BYTE, GL_TRUE, vertexSize, ((uint8_t*)drawBuffer) + 7 * 4);
if (drawIndexed) {
if (useVBO) {
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, ebo_[curVbo_]);

View File

@ -73,6 +73,8 @@ int DecFmtSize(u8 fmt) {
case DEC_U8_2: return 4;
case DEC_U8_3: return 4;
case DEC_U8_4: return 4;
case DEC_U16_2: return 4;
case DEC_U16A_2: return 4;
default:
return 0;
}
@ -154,10 +156,10 @@ void VertexDecoder::Step_TcU16() const
void VertexDecoder::Step_TcU16Through() const
{
float *uv = (float *)(decoded_ + decFmt.uvoff);
u16 *uv = (u16 *)(decoded_ + decFmt.uvoff);
const u16 *uvdata = (const u16*)(ptr_ + tcoff);
uv[0] = (float)uvdata[0] / (float)(gstate_c.curTextureWidth);
uv[1] = (float)uvdata[1] / (float)(gstate_c.curTextureHeight);
uv[0] = uvdata[0];
uv[1] = uvdata[1];
}
void VertexDecoder::Step_TcFloat() const
@ -171,8 +173,8 @@ void VertexDecoder::Step_TcFloatThrough() const
{
float *uv = (float *)(decoded_ + decFmt.uvoff);
const float *uvdata = (const float*)(ptr_ + tcoff);
uv[0] = uvdata[0] / (float)(gstate_c.curTextureWidth);
uv[1] = uvdata[1] / (float)(gstate_c.curTextureHeight);
uv[0] = uvdata[0];
uv[1] = uvdata[1];
}
void VertexDecoder::Step_Color565() const
@ -580,8 +582,11 @@ void VertexDecoder::SetVertexType(u32 fmt) {
steps_[numSteps_++] = throughmode ? tcstep_through[tc] : tcstep[tc];
// All UV decode to DEC_FLOAT2 currently.
decFmt.uvfmt = DEC_FLOAT_2;
// All UV except through mode decode to DEC_FLOAT2 currently.
if (throughmode && (tc == (GE_VTYPE_TC_16BIT >> GE_VTYPE_TC_SHIFT)))
decFmt.uvfmt = DEC_U16A_2;
else
decFmt.uvfmt = DEC_FLOAT_2;
decFmt.uvoff = decOff;
decOff += DecFmtSize(decFmt.uvfmt);
}

View File

@ -38,6 +38,8 @@ enum {
DEC_U8_2,
DEC_U8_3,
DEC_U8_4,
DEC_U16_2,
DEC_U16A_2,
};
int DecFmtSize(u8 fmt);
@ -58,8 +60,8 @@ struct TransformedVertex
{
float x, y, z, fog; // in case of morph, preblend during decode
float u; float v; // scaled by uscale, vscale, if there
float color0[4]; // prelit
float color1[3]; // prelit
u8 color0[4]; // prelit
u8 color1[3]; // prelit
};
DecVtxFormat GetTransformedVtxFormat(const DecVtxFormat &fmt);
@ -189,14 +191,14 @@ public:
break;
case DEC_S16_3:
{
s16 *p = (s16 *)(data_ + decFmt_.posoff);
const s16 *p = (s16 *)(data_ + decFmt_.posoff);
for (int i = 0; i < 3; i++)
pos[i] = p[i] / 32767.0f;
}
break;
case DEC_S8_3:
{
s8 *p = (s8 *)(data_ + decFmt_.posoff);
const s8 *p = (s8 *)(data_ + decFmt_.posoff);
for (int i = 0; i < 3; i++)
pos[i] = p[i] / 127.0f;
}
@ -214,14 +216,14 @@ public:
break;
case DEC_S16_3:
{
s16 *p = (s16 *)(data_ + decFmt_.nrmoff);
const s16 *p = (s16 *)(data_ + decFmt_.nrmoff);
for (int i = 0; i < 3; i++)
nrm[i] = p[i] / 32767.0f;
}
break;
case DEC_S8_3:
{
s8 *p = (s8 *)(data_ + decFmt_.nrmoff);
const s8 *p = (s8 *)(data_ + decFmt_.nrmoff);
for (int i = 0; i < 3; i++)
nrm[i] = p[i] / 127.0f;
}
@ -236,6 +238,13 @@ public:
switch (decFmt_.uvfmt) {
case DEC_FLOAT_2:
memcpy(uv, data_ + decFmt_.uvoff, 8); break;
case DEC_U16A_2:
{
const u16 *p = (const u16 *)(data_ + decFmt_.uvoff);
uv[0] = (float)p[0];
uv[1] = (float)p[1];
}
break;
default:
ERROR_LOG(G3D, "Reader: Unsupported UV Format");
break;