ppsspp/GPU/GLES/FragmentShaderGenerator.cpp

600 lines
18 KiB
C++

// Copyright (c) 2012- PPSSPP Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0 or later versions.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official git repository and contact information can be found at
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
#if !defined(USING_GLES2)
// SDL 1.2 on Apple does not have support for OpenGL 3 and hence needs
// special treatment in the shader generator.
#if defined(__APPLE__)
#define FORCE_OPENGL_2_0
#endif
#endif
#include <cstdio>
#include "gfx_es2/gpu_features.h"
#include "Core/Reporting.h"
#include "Core/Config.h"
#include "GPU/GLES/FragmentShaderGenerator.h"
#include "GPU/GLES/Framebuffer.h"
#include "GPU/ge_constants.h"
#include "GPU/GPUState.h"
#define WRITE p+=sprintf
// #define DEBUG_SHADER
// GL_NV_shader_framebuffer_fetch looks interesting....
// Dest factors where it's safe to eliminate the alpha test under certain conditions
const bool safeDestFactors[16] = {
true, // GE_DSTBLEND_SRCCOLOR,
true, // GE_DSTBLEND_INVSRCCOLOR,
false, // GE_DSTBLEND_SRCALPHA,
true, // GE_DSTBLEND_INVSRCALPHA,
true, // GE_DSTBLEND_DSTALPHA,
true, // GE_DSTBLEND_INVDSTALPHA,
false, // GE_DSTBLEND_DOUBLESRCALPHA,
false, // GE_DSTBLEND_DOUBLEINVSRCALPHA,
true, // GE_DSTBLEND_DOUBLEDSTALPHA,
true, // GE_DSTBLEND_DOUBLEINVDSTALPHA,
true, //GE_DSTBLEND_FIXB,
};
static bool IsAlphaTestTriviallyTrue() {
GEComparison alphaTestFunc = gstate.getAlphaTestFunction();
int alphaTestRef = gstate.getAlphaTestRef();
int alphaTestMask = gstate.getAlphaTestMask();
switch (alphaTestFunc) {
case GE_COMP_NEVER:
return false;
case GE_COMP_ALWAYS:
return true;
case GE_COMP_GEQUAL:
return alphaTestRef == 0;
// Non-zero check. If we have no depth testing (and thus no depth writing), and an alpha func that will result in no change if zero alpha, get rid of the alpha test.
// Speeds up Lumines by a LOT on PowerVR.
case GE_COMP_NOTEQUAL:
case GE_COMP_GREATER:
{
bool depthTest = gstate.isDepthTestEnabled();
bool stencilTest = gstate.isStencilTestEnabled();
GEBlendSrcFactor src = gstate.getBlendFuncA();
GEBlendDstFactor dst = gstate.getBlendFuncB();
if (!stencilTest && !depthTest && alphaTestRef == 0 && gstate.isAlphaBlendEnabled() && src == GE_SRCBLEND_SRCALPHA && safeDestFactors[(int)dst])
return true;
return false;
}
case GE_COMP_LEQUAL:
return alphaTestRef == 255;
case GE_COMP_EQUAL:
case GE_COMP_LESS:
return false;
default:
return false;
}
}
const bool nonAlphaSrcFactors[16] = {
true, // GE_SRCBLEND_DSTCOLOR,
true, // GE_SRCBLEND_INVDSTCOLOR,
false, // GE_SRCBLEND_SRCALPHA,
false, // GE_SRCBLEND_INVSRCALPHA,
true, // GE_SRCBLEND_DSTALPHA,
true, // GE_SRCBLEND_INVDSTALPHA,
false, // GE_SRCBLEND_DOUBLESRCALPHA,
false, // GE_SRCBLEND_DOUBLEINVSRCALPHA,
true, // GE_SRCBLEND_DOUBLEDSTALPHA,
true, // GE_SRCBLEND_DOUBLEINVDSTALPHA,
true, // GE_SRCBLEND_FIXA,
};
const bool nonAlphaDestFactors[16] = {
true, // GE_DSTBLEND_SRCCOLOR,
true, // GE_DSTBLEND_INVSRCCOLOR,
false, // GE_DSTBLEND_SRCALPHA,
false, // GE_DSTBLEND_INVSRCALPHA,
true, // GE_DSTBLEND_DSTALPHA,
true, // GE_DSTBLEND_INVDSTALPHA,
false, // GE_DSTBLEND_DOUBLESRCALPHA,
false, // GE_DSTBLEND_DOUBLEINVSRCALPHA,
true, // GE_DSTBLEND_DOUBLEDSTALPHA,
true, // GE_DSTBLEND_DOUBLEINVDSTALPHA,
true, // GE_DSTBLEND_FIXB,
};
ReplaceAlphaType ReplaceAlphaWithStencil() {
if (!gstate.isStencilTestEnabled() || gstate.isModeClear()) {
return REPLACE_ALPHA_NO;
}
if (gstate.isAlphaBlendEnabled()) {
if (nonAlphaSrcFactors[gstate.getBlendFuncA()] && nonAlphaDestFactors[gstate.getBlendFuncB()]) {
return REPLACE_ALPHA_YES;
} else {
if (gl_extensions.ARB_blend_func_extended) {
return REPLACE_ALPHA_DUALSOURCE;
} else {
return REPLACE_ALPHA_NO;
}
}
}
return REPLACE_ALPHA_YES;
}
StencilValueType ReplaceAlphaWithStencilType() {
switch (gstate.FrameBufFormat()) {
case GE_FORMAT_565:
// There's never a stencil value. Maybe the right alpha is 1?
return STENCIL_VALUE_ONE;
case GE_FORMAT_5551:
switch (gstate.getStencilOpZPass()) {
// Technically, this should only ever use zero/one.
case GE_STENCILOP_REPLACE:
return (gstate.getStencilTestRef() & 0x80) != 0 ? STENCIL_VALUE_ONE : STENCIL_VALUE_ZERO;
// Decrementing always zeros, since there's only one bit.
case GE_STENCILOP_DECR:
case GE_STENCILOP_ZERO:
return STENCIL_VALUE_ZERO;
// Incrementing always fills, since there's only one bit.
case GE_STENCILOP_INCR:
return STENCIL_VALUE_ONE;
case GE_STENCILOP_INVERT:
return STENCIL_VALUE_UNKNOWN;
case GE_STENCILOP_KEEP:
return STENCIL_VALUE_KEEP;
}
break;
case GE_FORMAT_4444:
case GE_FORMAT_8888:
case GE_FORMAT_INVALID:
switch (gstate.getStencilOpZPass()) {
case GE_STENCILOP_REPLACE:
return STENCIL_VALUE_UNIFORM;
case GE_STENCILOP_ZERO:
if (gstate.getStencilOpZFail() == GE_STENCILOP_ZERO) {
return STENCIL_VALUE_ZERO;
} else {
return STENCIL_VALUE_KEEP;
}
// Decrementing always zeros, since there's only one bit.
case GE_STENCILOP_DECR:
case GE_STENCILOP_INCR:
case GE_STENCILOP_INVERT:
return STENCIL_VALUE_UNKNOWN;
case GE_STENCILOP_KEEP:
return STENCIL_VALUE_KEEP;
}
break;
}
return STENCIL_VALUE_UNKNOWN;
}
static bool IsColorTestTriviallyTrue() {
GEComparison colorTestFunc = gstate.getColorTestFunction();
switch (colorTestFunc) {
case GE_COMP_NEVER:
return false;
case GE_COMP_ALWAYS:
return true;
case GE_COMP_EQUAL:
case GE_COMP_NOTEQUAL:
return false;
default:
return false;
}
}
static bool CanDoubleSrcBlendMode() {
if (!gstate.isAlphaBlendEnabled()) {
return false;
}
int funcA = gstate.getBlendFuncA();
int funcB = gstate.getBlendFuncB();
if (funcA != GE_SRCBLEND_DOUBLESRCALPHA) {
funcB = funcA;
funcA = gstate.getBlendFuncB();
}
if (funcA != GE_SRCBLEND_DOUBLESRCALPHA) {
return false;
}
// One side should be doubled. Let's check the other side.
// LittleBigPlanet, for example, uses 2.0 * src, 1.0 - src, which can't double.
switch (funcB) {
case GE_DSTBLEND_SRCALPHA:
case GE_DSTBLEND_INVSRCALPHA:
return false;
default:
return true;
}
}
// Here we must take all the bits of the gstate that determine what the fragment shader will
// look like, and concatenate them together into an ID.
void ComputeFragmentShaderID(FragmentShaderID *id) {
memset(&id->d[0], 0, sizeof(id->d));
if (gstate.isModeClear()) {
// We only need one clear shader, so let's ignore the rest of the bits.
id->d[0] = 1;
} else {
bool lmode = gstate.isUsingSecondaryColor() && gstate.isLightingEnabled();
bool enableFog = gstate.isFogEnabled() && !gstate.isModeThrough();
bool enableAlphaTest = gstate.isAlphaTestEnabled() && !IsAlphaTestTriviallyTrue() && !g_Config.bDisableAlphaTest;
bool enableColorTest = gstate.isColorTestEnabled() && !IsColorTestTriviallyTrue();
bool enableColorDoubling = gstate.isColorDoublingEnabled();
// This isn't really correct, but it's a hack to get doubled blend modes to work more correctly.
bool enableAlphaDoubling = CanDoubleSrcBlendMode();
bool doTextureProjection = gstate.getUVGenMode() == GE_TEXMAP_TEXTURE_MATRIX;
bool doTextureAlpha = gstate.isTextureAlphaUsed();
ReplaceAlphaType stencilToAlpha = ReplaceAlphaWithStencil();
// All texfuncs except replace are the same for RGB as for RGBA with full alpha.
if (gstate_c.textureFullAlpha && gstate.getTextureFunction() != GE_TEXFUNC_REPLACE)
doTextureAlpha = false;
// id->d[0] |= (gstate.isModeClear() & 1);
if (gstate.isTextureMapEnabled()) {
id->d[0] |= 1 << 1;
id->d[0] |= gstate.getTextureFunction() << 2;
id->d[0] |= (doTextureAlpha & 1) << 5; // rgb or rgba
}
id->d[0] |= (lmode & 1) << 7;
id->d[0] |= enableAlphaTest << 8;
if (enableAlphaTest)
id->d[0] |= gstate.getAlphaTestFunction() << 9;
id->d[0] |= enableColorTest << 12;
if (enableColorTest)
id->d[0] |= gstate.getColorTestFunction() << 13; // color test func
id->d[0] |= (enableFog & 1) << 15;
id->d[0] |= (doTextureProjection & 1) << 16;
id->d[0] |= (enableColorDoubling & 1) << 17;
id->d[0] |= (enableAlphaDoubling & 1) << 18;
id->d[0] |= (stencilToAlpha) << 19;
if (stencilToAlpha != REPLACE_ALPHA_NO) {
// 3 bits
id->d[0] |= ReplaceAlphaWithStencilType() << 21;
}
if (enableAlphaTest)
gpuStats.numAlphaTestedDraws++;
else
gpuStats.numNonAlphaTestedDraws++;
}
}
// Missing: Z depth range
void GenerateFragmentShader(char *buffer) {
char *p = buffer;
// In GLSL ES 3.0, you use "in" variables instead of varying.
bool glslES30 = false;
const char *varying = "varying";
const char *fragColor0 = "gl_FragColor";
const char *texture = "texture2D";
bool highpFog = false;
#if defined(USING_GLES2)
// Let's wait until we have a real use for this.
// ES doesn't support dual source alpha :(
if (gl_extensions.GLES3) {
WRITE(p, "#version 300 es\n"); // GLSL ES 1.0
fragColor0 = "fragColor0";
texture = "texture";
glslES30 = true;
} else {
WRITE(p, "#version 100\n"); // GLSL ES 1.0
}
WRITE(p, "precision lowp float;\n");
// PowerVR needs highp to do the fog in MHU correctly.
// Others don't, and some can't handle highp in the fragment shader.
highpFog = gl_extensions.gpuVendor == GPU_VENDOR_POWERVR;
#elif !defined(FORCE_OPENGL_2_0)
if (gl_extensions.VersionGEThan(3, 3, 0)) {
fragColor0 = "fragColor0";
texture = "texture";
glslES30 = true;
WRITE(p, "#version 330\n");
} else if (gl_extensions.VersionGEThan(3, 0, 0)) {
fragColor0 = "fragColor0";
WRITE(p, "#version 130\n");
// Remove lowp/mediump in non-mobile non-glsl 3 implementations
WRITE(p, "#define lowp\n");
WRITE(p, "#define mediump\n");
WRITE(p, "#define highp\n");
} else {
WRITE(p, "#version 110\n");
// Remove lowp/mediump in non-mobile non-glsl 3 implementations
WRITE(p, "#define lowp\n");
WRITE(p, "#define mediump\n");
WRITE(p, "#define highp\n");
}
#else
// Need to remove lowp/mediump for Mac
WRITE(p, "#define lowp\n");
WRITE(p, "#define mediump\n");
WRITE(p, "#define highp\n");
#endif
if (glslES30) {
varying = "in";
}
bool lmode = gstate.isUsingSecondaryColor() && gstate.isLightingEnabled();
bool doTexture = gstate.isTextureMapEnabled() && !gstate.isModeClear();
bool enableFog = gstate.isFogEnabled() && !gstate.isModeThrough() && !gstate.isModeClear();
bool enableAlphaTest = gstate.isAlphaTestEnabled() && !IsAlphaTestTriviallyTrue() && !gstate.isModeClear() && !g_Config.bDisableAlphaTest;
bool enableColorTest = gstate.isColorTestEnabled() && !IsColorTestTriviallyTrue() && !gstate.isModeClear();
bool enableColorDoubling = gstate.isColorDoublingEnabled();
// This isn't really correct, but it's a hack to get doubled blend modes to work more correctly.
bool enableAlphaDoubling = CanDoubleSrcBlendMode();
bool doTextureProjection = gstate.getUVGenMode() == GE_TEXMAP_TEXTURE_MATRIX;
bool doTextureAlpha = gstate.isTextureAlphaUsed();
ReplaceAlphaType stencilToAlpha = ReplaceAlphaWithStencil();
if (gstate_c.textureFullAlpha && gstate.getTextureFunction() != GE_TEXFUNC_REPLACE)
doTextureAlpha = false;
if (doTexture)
WRITE(p, "uniform sampler2D tex;\n");
if (enableAlphaTest || enableColorTest) {
WRITE(p, "uniform vec4 u_alphacolorref;\n");
}
if (stencilToAlpha && ReplaceAlphaWithStencilType() == STENCIL_VALUE_UNIFORM) {
WRITE(p, "uniform float u_stencilReplaceValue;\n");
}
if (gstate.isTextureMapEnabled() && gstate.getTextureFunction() == GE_TEXFUNC_BLEND)
WRITE(p, "uniform vec3 u_texenv;\n");
WRITE(p, "%s vec4 v_color0;\n", varying);
if (lmode)
WRITE(p, "%s vec3 v_color1;\n", varying);
if (enableFog) {
WRITE(p, "uniform vec3 u_fogcolor;\n");
WRITE(p, "%s %s float v_fogdepth;\n", varying, highpFog ? "highp" : "mediump");
}
if (doTexture) {
if (doTextureProjection)
WRITE(p, "%s mediump vec3 v_texcoord;\n", varying);
else
WRITE(p, "%s mediump vec2 v_texcoord;\n", varying);
}
if (enableAlphaTest) {
if (gl_extensions.gpuVendor == GPU_VENDOR_POWERVR)
WRITE(p, "float roundTo255thf(in mediump float x) { mediump float y = x + (0.5/255.0); return y - fract(y * 255.0) * (1.0 / 255.0); }\n");
else
WRITE(p, "float roundAndScaleTo255f(in float x) { return floor(x * 255.0 + 0.5); }\n");
}
if (enableColorTest) {
if (gl_extensions.gpuVendor == GPU_VENDOR_POWERVR)
WRITE(p, "vec3 roundTo255thv(in vec3 x) { vec3 y = x + (0.5/255.0); return y - fract(y * 255.0) * (1.0 / 255.0); }\n");
else
WRITE(p, "vec3 roundAndScaleTo255v(in vec3 x) { return floor(x * 255.0 + 0.5); }\n");
}
if (stencilToAlpha == REPLACE_ALPHA_DUALSOURCE) {
WRITE(p, "out vec4 fragColor0;\n");
WRITE(p, "out vec4 fragColor1;\n");
} else if (!strcmp(fragColor0, "fragColor0")) {
WRITE(p, "out vec4 fragColor0;\n");
}
WRITE(p, "void main() {\n");
if (gstate.isModeClear()) {
// Clear mode does not allow any fancy shading.
WRITE(p, " vec4 v = v_color0;\n");
} else {
const char *secondary = "";
// Secondary color for specular on top of texture
if (lmode) {
WRITE(p, " vec4 s = vec4(v_color1, 0.0);\n");
secondary = " + s";
} else {
secondary = "";
}
if (gstate.isTextureMapEnabled()) {
if (doTextureProjection) {
WRITE(p, " vec4 t = %sProj(tex, v_texcoord);\n", texture);
} else {
WRITE(p, " vec4 t = %s(tex, v_texcoord);\n", texture);
}
WRITE(p, " vec4 p = v_color0;\n");
if (doTextureAlpha) { // texfmt == RGBA
switch (gstate.getTextureFunction()) {
case GE_TEXFUNC_MODULATE:
WRITE(p, " vec4 v = p * t%s;\n", secondary);
break;
case GE_TEXFUNC_DECAL:
WRITE(p, " vec4 v = vec4(mix(p.rgb, t.rgb, t.a), p.a)%s;\n", secondary);
break;
case GE_TEXFUNC_BLEND:
WRITE(p, " vec4 v = vec4(mix(p.rgb, u_texenv.rgb, t.rgb), p.a * t.a)%s;\n", secondary);
break;
case GE_TEXFUNC_REPLACE:
WRITE(p, " vec4 v = t%s;\n", secondary);
break;
case GE_TEXFUNC_ADD:
case GE_TEXFUNC_UNKNOWN1:
case GE_TEXFUNC_UNKNOWN2:
case GE_TEXFUNC_UNKNOWN3:
WRITE(p, " vec4 v = vec4(p.rgb + t.rgb, p.a * t.a)%s;\n", secondary);
break;
default:
WRITE(p, " vec4 v = p;\n"); break;
}
} else { // texfmt == RGB
switch (gstate.getTextureFunction()) {
case GE_TEXFUNC_MODULATE:
WRITE(p, " vec4 v = vec4(t.rgb * p.rgb, p.a)%s;\n", secondary);
break;
case GE_TEXFUNC_DECAL:
WRITE(p, " vec4 v = vec4(t.rgb, p.a)%s;\n", secondary);
break;
case GE_TEXFUNC_BLEND:
WRITE(p, " vec4 v = vec4(mix(p.rgb, u_texenv.rgb, t.rgb), p.a)%s;\n", secondary);
break;
case GE_TEXFUNC_REPLACE:
WRITE(p, " vec4 v = vec4(t.rgb, p.a)%s;\n", secondary);
break;
case GE_TEXFUNC_ADD:
case GE_TEXFUNC_UNKNOWN1:
case GE_TEXFUNC_UNKNOWN2:
case GE_TEXFUNC_UNKNOWN3:
WRITE(p, " vec4 v = vec4(p.rgb + t.rgb, p.a)%s;\n", secondary); break;
default:
WRITE(p, " vec4 v = p;\n"); break;
}
}
} else {
// No texture mapping
WRITE(p, " vec4 v = v_color0 %s;\n", secondary);
}
if (enableAlphaTest) {
GEComparison alphaTestFunc = gstate.getAlphaTestFunction();
const char *alphaTestFuncs[] = { "#", "#", " != ", " == ", " >= ", " > ", " <= ", " < " }; // never/always don't make sense
if (alphaTestFuncs[alphaTestFunc][0] != '#') {
if (gl_extensions.gpuVendor == GPU_VENDOR_POWERVR) {
// Work around bad PVR driver problem where equality check + discard just doesn't work.
if (alphaTestFunc != 3)
WRITE(p, " if (roundTo255thf(v.a) %s u_alphacolorref.a) discard;\n", alphaTestFuncs[alphaTestFunc]);
} else {
WRITE(p, " if (roundAndScaleTo255f(v.a) %s u_alphacolorref.a) discard;\n", alphaTestFuncs[alphaTestFunc]);
}
}
}
// TODO: Before or after the color test?
if (enableColorDoubling && enableAlphaDoubling) {
WRITE(p, " v = v * 2.0;\n");
} else if (enableColorDoubling) {
WRITE(p, " v.rgb = v.rgb * 2.0;\n");
} else if (enableAlphaDoubling) {
WRITE(p, " v.a = v.a * 2.0;\n");
}
if (enableColorTest) {
GEComparison colorTestFunc = gstate.getColorTestFunction();
const char *colorTestFuncs[] = { "#", "#", " != ", " == " }; // never/always don't make sense
WARN_LOG_REPORT_ONCE(colortest, G3D, "Color test function : %s", colorTestFuncs[colorTestFunc]);
u32 colorTestMask = gstate.getColorTestMask();
if (colorTestFuncs[colorTestFunc][0] != '#') {
if (gl_extensions.gpuVendor == GPU_VENDOR_POWERVR)
WRITE(p, " if (roundTo255thv(v.rgb) %s u_alphacolorref.rgb) discard;\n", colorTestFuncs[colorTestFunc]);
else
WRITE(p, " if (roundAndScaleTo255v(v.rgb) %s u_alphacolorref.rgb) discard;\n", colorTestFuncs[colorTestFunc]);
}
}
if (enableFog) {
WRITE(p, " float fogCoef = clamp(v_fogdepth, 0.0, 1.0);\n");
WRITE(p, " v = mix(vec4(u_fogcolor, v.a), v, fogCoef);\n");
// WRITE(p, " v.x = v_depth;\n");
}
}
switch (stencilToAlpha) {
case REPLACE_ALPHA_DUALSOURCE:
WRITE(p, " fragColor0 = vec4(v.rgb, 0.0);\n");
WRITE(p, " fragColor1 = vec4(0.0, 0.0, 0.0, v.a);\n");
break;
case REPLACE_ALPHA_YES:
WRITE(p, " %s = vec4(v.rgb, 0.0);\n", fragColor0);
break;
case REPLACE_ALPHA_NO:
WRITE(p, " %s = v;\n", fragColor0);
break;
}
if (stencilToAlpha != REPLACE_ALPHA_NO) {
switch (ReplaceAlphaWithStencilType()) {
case STENCIL_VALUE_UNIFORM:
WRITE(p, " %s.a = u_stencilReplaceValue;\n", fragColor0);
break;
case STENCIL_VALUE_ZERO:
WRITE(p, " %s.a = 0.0;\n", fragColor0);
break;
case STENCIL_VALUE_ONE:
WRITE(p, " %s.a = 1.0;\n", fragColor0);
break;
case STENCIL_VALUE_UNKNOWN:
// Maybe we should even mask away alpha using glColorMask and not change it at all? We do get here
// if the stencil mode is KEEP for example.
WRITE(p, " %s.a = 0.0;\n", fragColor0);
break;
case STENCIL_VALUE_KEEP:
// Do nothing. We'll mask out the alpha using color mask.
break;
}
}
#ifdef DEBUG_SHADER
if (doTexture) {
WRITE(p, " %s = texture2D(tex, v_texcoord.xy);\n", fragColor0);
WRITE(p, " %s += vec4(0.3,0,0.3,0.3);\n", fragColor0);
} else {
WRITE(p, " %s = vec4(1,0,1,1);\n", fragColor0);
}
#endif
WRITE(p, "}\n");
}