Experiment: Generate "Ubershaders" that can handle all lighting configurations

This drastically reduces the shader compile stutter that happens when a lot of new
light setups are created, like on the first punch in Tekken 6.

There's more stuff that might benefit from being made dynamic like this.
These branches are very cheap on modern GPUs since they're branching on
a uniform variable, so no divergence.

Only tested on Vulkan. I think we'll need to keep the old path too for
gpus like Mali-450...
This commit is contained in:
Henrik Rydgård 2022-09-25 13:59:52 +02:00
parent b1afeeaf43
commit 7adba20fac
9 changed files with 218 additions and 86 deletions

View File

@ -88,12 +88,13 @@ enum : uint64_t {
DIRTY_COLORWRITEMASK = 1ULL << 36, DIRTY_COLORWRITEMASK = 1ULL << 36,
DIRTY_MIPBIAS = 1ULL << 37, DIRTY_MIPBIAS = 1ULL << 37,
DIRTY_LIGHT_CONTROL = 1ULL << 38,
// space for 4 more uniform dirty flags. Remember to update DIRTY_ALL_UNIFORMS. // space for 1 more uniform dirty flags. Remember to update DIRTY_ALL_UNIFORMS.
DIRTY_BONE_UNIFORMS = 0xFF000000ULL, DIRTY_BONE_UNIFORMS = 0xFF000000ULL,
DIRTY_ALL_UNIFORMS = 0x3FFFFFFFFFULL, DIRTY_ALL_UNIFORMS = 0x7FFFFFFFFFULL,
DIRTY_ALL_LIGHTS = DIRTY_LIGHT0 | DIRTY_LIGHT1 | DIRTY_LIGHT2 | DIRTY_LIGHT3, DIRTY_ALL_LIGHTS = DIRTY_LIGHT0 | DIRTY_LIGHT1 | DIRTY_LIGHT2 | DIRTY_LIGHT3,
// Other dirty elements that aren't uniforms! // Other dirty elements that aren't uniforms!
@ -113,6 +114,8 @@ enum : uint64_t {
// TODO: Should we also add DIRTY_FRAMEBUF here? It kinda generally takes care of itself. // TODO: Should we also add DIRTY_FRAMEBUF here? It kinda generally takes care of itself.
DIRTY_ALL_RENDER_STATE = DIRTY_BLEND_STATE | DIRTY_DEPTHSTENCIL_STATE | DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE | DIRTY_TEXTURE_IMAGE | DIRTY_TEXTURE_PARAMS, DIRTY_ALL_RENDER_STATE = DIRTY_BLEND_STATE | DIRTY_DEPTHSTENCIL_STATE | DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE | DIRTY_TEXTURE_IMAGE | DIRTY_TEXTURE_PARAMS,
// Note that the top 8 bits (54-63) cannot be dirtied through the commonCommandTable due to packing of other flags.
DIRTY_ALL = 0xFFFFFFFFFFFFFFFF DIRTY_ALL = 0xFFFFFFFFFFFFFFFF
}; };

View File

@ -41,6 +41,9 @@ std::string VertexShaderDesc(const VShaderID &id) {
if (id.Bit(VS_BIT_LIGHTING_ENABLE)) { if (id.Bit(VS_BIT_LIGHTING_ENABLE)) {
desc << "Light: "; desc << "Light: ";
} }
if (id.Bit(VS_BIT_LIGHT_UBERSHADER)) {
desc << "LightUberShader ";
}
for (int i = 0; i < 4; i++) { for (int i = 0; i < 4; i++) {
bool enabled = id.Bit(VS_BIT_LIGHT0_ENABLE + i) && id.Bit(VS_BIT_LIGHTING_ENABLE); bool enabled = id.Bit(VS_BIT_LIGHT0_ENABLE + i) && id.Bit(VS_BIT_LIGHTING_ENABLE);
if (enabled || (uvgMode == GE_TEXMAP_ENVIRONMENT_MAP && (ls0 == i || ls1 == i))) { if (enabled || (uvgMode == GE_TEXMAP_ENVIRONMENT_MAP && (ls0 == i || ls1 == i))) {
@ -125,13 +128,17 @@ void ComputeVertexShaderID(VShaderID *id_out, u32 vertType, bool useHWTransform,
// doShadeMapping is stored as UVGenMode, and light type doesn't matter for shade mapping. // doShadeMapping is stored as UVGenMode, and light type doesn't matter for shade mapping.
id.SetBits(VS_BIT_MATERIAL_UPDATE, 3, gstate.getMaterialUpdate()); id.SetBits(VS_BIT_MATERIAL_UPDATE, 3, gstate.getMaterialUpdate());
id.SetBit(VS_BIT_LIGHTING_ENABLE); id.SetBit(VS_BIT_LIGHTING_ENABLE);
// Light bits if (gstate_c.Supports(GPU_USE_LIGHT_UBERSHADER)) {
for (int i = 0; i < 4; i++) { id.SetBit(VS_BIT_LIGHT_UBERSHADER);
bool chanEnabled = gstate.isLightChanEnabled(i) != 0; } else {
id.SetBit(VS_BIT_LIGHT0_ENABLE + i, chanEnabled); // Light bits
if (chanEnabled) { for (int i = 0; i < 4; i++) {
id.SetBits(VS_BIT_LIGHT0_COMP + 4 * i, 2, gstate.getLightComputation(i)); bool chanEnabled = gstate.isLightChanEnabled(i) != 0;
id.SetBits(VS_BIT_LIGHT0_TYPE + 4 * i, 2, gstate.getLightType(i)); id.SetBit(VS_BIT_LIGHT0_ENABLE + i, chanEnabled);
if (chanEnabled) {
id.SetBits(VS_BIT_LIGHT0_COMP + 4 * i, 2, gstate.getLightComputation(i));
id.SetBits(VS_BIT_LIGHT0_TYPE + 4 * i, 2, gstate.getLightType(i));
}
} }
} }
} }

View File

@ -33,7 +33,11 @@ enum VShaderBit : uint8_t {
VS_BIT_BONES = 22, // 3 should be enough, not 8 VS_BIT_BONES = 22, // 3 should be enough, not 8
// 25 - 29 are free. // 25 - 29 are free.
VS_BIT_ENABLE_BONES = 30, VS_BIT_ENABLE_BONES = 30,
// 31 is free.
// If this is set along with LIGHTING_ENABLE, all other lighting bits below
// are passed to the shader directly instead.
VS_BIT_LIGHT_UBERSHADER = 31,
VS_BIT_LIGHT0_COMP = 32, // 2 bits VS_BIT_LIGHT0_COMP = 32, // 2 bits
VS_BIT_LIGHT0_TYPE = 34, // 2 bits VS_BIT_LIGHT0_TYPE = 34, // 2 bits
VS_BIT_LIGHT1_COMP = 36, // 2 bits VS_BIT_LIGHT1_COMP = 36, // 2 bits

View File

@ -279,8 +279,30 @@ void LightUpdateUniforms(UB_VS_Lights *ub, uint64_t dirtyUniforms) {
Uint8x3ToFloat4_Alpha(ub->materialSpecular, gstate.materialspecular, std::max(0.0f, getFloat24(gstate.materialspecularcoef))); Uint8x3ToFloat4_Alpha(ub->materialSpecular, gstate.materialspecular, std::max(0.0f, getFloat24(gstate.materialspecularcoef)));
} }
if (dirtyUniforms & DIRTY_MATEMISSIVE) { if (dirtyUniforms & DIRTY_MATEMISSIVE) {
Uint8x3ToFloat4(ub->materialEmissive, gstate.materialemissive); // We're not touching the fourth f32 here, because we store an u32 of control bits in it.
float temp[4];
Uint8x3ToFloat4(temp, gstate.materialemissive);
memcpy(ub->materialEmissive, temp, 12);
} }
if (dirtyUniforms & DIRTY_LIGHT_CONTROL) {
// Bit organization
// Bottom 4 bits are enable bits for each light.
// Then, for each light, comes 2 bits for "comp" and 2 bits for "type".
uint32_t lightControl = 0;
for (int i = 0; i < 4; i++) {
if (gstate.isLightChanEnabled(i)) {
lightControl |= 1 << i;
}
u32 computation = (u32)gstate.getLightComputation(i); // 2 bits
u32 type = (u32)gstate.getLightType(i); // 2 bits
lightControl |= computation << (4 + i * 4);
lightControl |= type << (4 + i * 4 + 2);
}
ub->lightControl = lightControl;
}
for (int i = 0; i < 4; i++) { for (int i = 0; i < 4; i++) {
if (dirtyUniforms & (DIRTY_LIGHT0 << i)) { if (dirtyUniforms & (DIRTY_LIGHT0 << i)) {
if (gstate.isDirectionalLight(i)) { if (gstate.isDirectionalLight(i)) {

View File

@ -80,7 +80,8 @@ struct UB_VS_Lights {
float ambientColor[4]; float ambientColor[4];
float materialDiffuse[4]; float materialDiffuse[4];
float materialSpecular[4]; float materialSpecular[4];
float materialEmissive[4]; float materialEmissive[3];
uint32_t lightControl;
float lpos[4][4]; float lpos[4][4];
float ldir[4][4]; float ldir[4][4];
float latt[4][4]; float latt[4][4];
@ -95,6 +96,7 @@ R"( vec4 u_ambient;
vec3 u_matdiffuse; vec3 u_matdiffuse;
vec4 u_matspecular; vec4 u_matspecular;
vec3 u_matemissive; vec3 u_matemissive;
uint u_lightControl; // light ubershader
vec3 u_lightpos0; vec3 u_lightpos0;
vec3 u_lightpos1; vec3 u_lightpos1;
vec3 u_lightpos2; vec3 u_lightpos2;

View File

@ -185,6 +185,11 @@ bool GenerateVertexShader(const VShaderID &id, char *buffer, const ShaderLanguag
bool enableLighting = id.Bit(VS_BIT_LIGHTING_ENABLE); bool enableLighting = id.Bit(VS_BIT_LIGHTING_ENABLE);
int matUpdate = id.Bits(VS_BIT_MATERIAL_UPDATE, 3); int matUpdate = id.Bits(VS_BIT_MATERIAL_UPDATE, 3);
bool lightUberShader = id.Bit(VS_BIT_LIGHT_UBERSHADER);
if (lightUberShader) {
_dbg_assert_(compat.bitwiseOps);
}
// Apparently we don't support bezier/spline together with bones. // Apparently we don't support bezier/spline together with bones.
bool doBezier = id.Bit(VS_BIT_BEZIER) && !enableBones && useHWTransform; bool doBezier = id.Bit(VS_BIT_BEZIER) && !enableBones && useHWTransform;
bool doSpline = id.Bit(VS_BIT_SPLINE) && !enableBones && useHWTransform; bool doSpline = id.Bit(VS_BIT_SPLINE) && !enableBones && useHWTransform;
@ -524,12 +529,12 @@ bool GenerateVertexShader(const VShaderID &id, char *buffer, const ShaderLanguag
*uniformMask |= DIRTY_UVSCALEOFFSET; *uniformMask |= DIRTY_UVSCALEOFFSET;
} }
for (int i = 0; i < 4; i++) { for (int i = 0; i < 4; i++) {
if (doLight[i] != LIGHT_OFF) { if (lightUberShader || doLight[i] != LIGHT_OFF) {
// This is needed for shade mapping // This is needed for shade mapping
WRITE(p, "uniform vec3 u_lightpos%i;\n", i); WRITE(p, "uniform vec3 u_lightpos%i;\n", i);
*uniformMask |= DIRTY_LIGHT0 << i; *uniformMask |= DIRTY_LIGHT0 << i;
} }
if (doLight[i] == LIGHT_FULL) { if (lightUberShader || doLight[i] == LIGHT_FULL) {
*uniformMask |= DIRTY_LIGHT0 << i; *uniformMask |= DIRTY_LIGHT0 << i;
GELightType type = static_cast<GELightType>(id.Bits(VS_BIT_LIGHT0_TYPE + 4 * i, 2)); GELightType type = static_cast<GELightType>(id.Bits(VS_BIT_LIGHT0_TYPE + 4 * i, 2));
GELightComputation comp = static_cast<GELightComputation>(id.Bits(VS_BIT_LIGHT0_COMP + 4 * i, 2)); GELightComputation comp = static_cast<GELightComputation>(id.Bits(VS_BIT_LIGHT0_COMP + 4 * i, 2));
@ -728,7 +733,6 @@ bool GenerateVertexShader(const VShaderID &id, char *buffer, const ShaderLanguag
WRITE(p, " vec4 basis_u = tess_weights_u[weight_idx.x].basis;\n"); WRITE(p, " vec4 basis_u = tess_weights_u[weight_idx.x].basis;\n");
WRITE(p, " vec4 basis_v = tess_weights_v[weight_idx.y].basis;\n"); WRITE(p, " vec4 basis_v = tess_weights_v[weight_idx.y].basis;\n");
WRITE(p, " mat4 basis = outerProduct(basis_u, basis_v);\n"); WRITE(p, " mat4 basis = outerProduct(basis_u, basis_v);\n");
} else { } else {
WRITE(p, " int index_u, index_v;\n"); WRITE(p, " int index_u, index_v;\n");
for (int i = 0; i < 4; i++) { for (int i = 0; i < 4; i++) {
@ -987,6 +991,13 @@ bool GenerateVertexShader(const VShaderID &id, char *buffer, const ShaderLanguag
anySpots = true; anySpots = true;
} }
if (lightUberShader) {
anySpots = true;
diffuseIsZero = false;
specularIsZero = false;
distanceNeeded = true;
}
if (!specularIsZero) { if (!specularIsZero) {
WRITE(p, " lowp vec3 lightSum1 = splat3(0.0);\n"); WRITE(p, " lowp vec3 lightSum1 = splat3(0.0);\n");
} }
@ -1004,76 +1015,131 @@ bool GenerateVertexShader(const VShaderID &id, char *buffer, const ShaderLanguag
} }
} }
// Calculate lights if needed. If shade mapping is enabled, lights may need to be if (lightUberShader) {
// at least partially calculated. // TODO: Actually loop in the shader. For now, we write it all out.
for (int i = 0; i < 4; i++) { for (int i = 0; i < 4; i++) {
if (doLight[i] != LIGHT_FULL) p.F("if ((u_lightControl & %d) != 0) {\n", 1 << i);
continue; p.F(" uint type = (u_lightControl >> %d) & 3;\n", 4 + 4 * i);
p.F(" uint comp = (u_lightControl >> %d) & 3;\n", 4 + 4 * i + 2);
GELightType type = static_cast<GELightType>(id.Bits(VS_BIT_LIGHT0_TYPE + 4*i, 2)); p.C(" if (type == 0) {\n"); // GE_LIGHTTYPE_DIRECTIONAL
GELightComputation comp = static_cast<GELightComputation>(id.Bits(VS_BIT_LIGHT0_COMP + 4*i, 2)); p.F(" toLight = u_lightpos%d;\n", i);
p.C(" } else {\n");
if (type == GE_LIGHTTYPE_DIRECTIONAL) { p.F(" toLight = u_lightpos%d - worldpos;\n", i);
// We prenormalize light positions for directional lights. p.F(" distance = length(toLight);\n", i);
WRITE(p, " toLight = u_lightpos%i;\n", i); p.F(" toLight /= distance;\n", i);
} else { p.C(" }\n");
WRITE(p, " toLight = u_lightpos%i - worldpos;\n", i); p.C(" ldot = dot(toLight, worldnormal);\n");
WRITE(p, " distance = length(toLight);\n"); p.C(" if (comp == 2) {\n"); // GE_LIGHTCOMP_ONLYPOWDIFFUSE
WRITE(p, " toLight /= distance;\n"); p.C(" if (u_matspecular.a <= 0.0) {\n");
p.C(" ldot = 1.0;\n");
p.C(" } else {\n");
p.C(" ldot = pow(max(ldot, 0.0), u_matspecular.a);\n");
p.C(" }\n");
p.C(" }\n");
p.C(" switch (type) {\n");// Attenuation
p.C(" case 1:\n"); // GE_LIGHTTYPE_POINT
p.F(" lightScale = clamp(1.0 / dot(u_lightatt%i, vec3(1.0, distance, distance*distance)), 0.0, 1.0);\n", i);
p.C(" break;\n");
p.C(" case 2:\n"); // GE_LIGHTTYPE_SPOT
p.F(" angle = length(u_lightdir%i) == 0.0 ? 0.0 : dot(normalize(u_lightdir%i), toLight);\n", i, i);
p.F(" if (angle >= u_lightangle_spotCoef%i.x) {\n", i);
p.F(" lightScale = clamp(1.0 / dot(u_lightatt%i, vec3(1.0, distance, distance*distance)), 0.0, 1.0) * (u_lightangle_spotCoef%i.y <= 0.0 ? 1.0 : pow(angle, u_lightangle_spotCoef%i.y));\n", i, i, i);
p.C(" } else {\n");
p.C(" lightScale = 0.0;\n");
p.C(" }\n");
p.C(" break;\n");
p.C(" default:\n"); // GE_LIGHTTYPE_DIRECTIONAL
p.C(" lightScale = 1.0;\n");
p.C(" break;\n");
p.C(" }\n");
p.F(" diffuse = (u_lightdiffuse%i * %s) * max(ldot, 0.0);\n", i, diffuseStr);
p.C(" if (comp == 1) {\n"); // do specular
p.C(" if (ldot >= 0.0) {\n");
p.C(" ldot = dot(normalize(toLight + vec3(0.0, 0.0, 1.0)), worldnormal);\n");
p.C(" if (u_matspecular.a <= 0.0) {\n");
p.C(" ldot = 1.0;\n");
p.C(" } else {\n");
p.C(" ldot = pow(max(ldot, 0.0), u_matspecular.a);\n");
p.C(" }\n");
p.C(" if (ldot > 0.0)\n");
p.F(" lightSum1 += u_lightspecular%i * %s * ldot * lightScale;\n", i, specularStr);
p.C(" }\n");
p.C(" }\n");
p.F(" lightSum0.rgb += (u_lightambient%i * %s.rgb + diffuse) * lightScale;\n", i, ambientStr);
p.C(" }\n");
} }
} else {
// Calculate lights if needed. If shade mapping is enabled, lights may need to be
// at least partially calculated.
for (int i = 0; i < 4; i++) {
if (doLight[i] != LIGHT_FULL)
continue;
bool doSpecular = comp == GE_LIGHTCOMP_BOTH; GELightType type = static_cast<GELightType>(id.Bits(VS_BIT_LIGHT0_TYPE + 4 * i, 2));
bool poweredDiffuse = comp == GE_LIGHTCOMP_ONLYPOWDIFFUSE; GELightComputation comp = static_cast<GELightComputation>(id.Bits(VS_BIT_LIGHT0_COMP + 4 * i, 2));
WRITE(p, " ldot = dot(toLight, worldnormal);\n"); if (type == GE_LIGHTTYPE_DIRECTIONAL) {
if (poweredDiffuse) { // We prenormalize light positions for directional lights.
// pow(0.0, 0.0) may be undefined, but the PSP seems to treat it as 1.0. p.F(" toLight = u_lightpos%i;\n", i);
// Seen in Tales of the World: Radiant Mythology (#2424.) } else {
WRITE(p, " if (u_matspecular.a <= 0.0) {\n"); p.F(" toLight = u_lightpos%i - worldpos;\n", i);
WRITE(p, " ldot = 1.0;\n"); p.C(" distance = length(toLight);\n");
WRITE(p, " } else {\n"); p.C(" toLight /= distance;\n");
WRITE(p, " ldot = pow(max(ldot, 0.0), u_matspecular.a);\n"); }
WRITE(p, " }\n");
bool doSpecular = comp == GE_LIGHTCOMP_BOTH;
bool poweredDiffuse = comp == GE_LIGHTCOMP_ONLYPOWDIFFUSE;
p.C(" ldot = dot(toLight, worldnormal);\n");
if (poweredDiffuse) {
// pow(0.0, 0.0) may be undefined, but the PSP seems to treat it as 1.0.
// Seen in Tales of the World: Radiant Mythology (#2424.)
p.C(" if (u_matspecular.a <= 0.0) {\n");
p.C(" ldot = 1.0;\n");
p.C(" } else {\n");
p.C(" ldot = pow(max(ldot, 0.0), u_matspecular.a);\n");
p.C(" }\n");
}
const char *timesLightScale = " * lightScale";
// Attenuation
switch (type) {
case GE_LIGHTTYPE_DIRECTIONAL:
timesLightScale = "";
break;
case GE_LIGHTTYPE_POINT:
p.F(" lightScale = clamp(1.0 / dot(u_lightatt%i, vec3(1.0, distance, distance*distance)), 0.0, 1.0);\n", i);
break;
case GE_LIGHTTYPE_SPOT:
case GE_LIGHTTYPE_UNKNOWN:
p.F(" angle = length(u_lightdir%i) == 0.0 ? 0.0 : dot(normalize(u_lightdir%i), toLight);\n", i, i);
p.F(" if (angle >= u_lightangle_spotCoef%i.x) {\n", i);
p.F(" lightScale = clamp(1.0 / dot(u_lightatt%i, vec3(1.0, distance, distance*distance)), 0.0, 1.0) * (u_lightangle_spotCoef%i.y <= 0.0 ? 1.0 : pow(angle, u_lightangle_spotCoef%i.y));\n", i, i, i);
p.C(" } else {\n");
p.C(" lightScale = 0.0;\n");
p.C(" }\n");
break;
default:
// ILLEGAL
break;
}
p.F(" diffuse = (u_lightdiffuse%i * %s) * max(ldot, 0.0);\n", i, diffuseStr);
if (doSpecular) {
p.C(" if (ldot >= 0.0) {\n");
p.C(" ldot = dot(normalize(toLight + vec3(0.0, 0.0, 1.0)), worldnormal);\n");
p.C(" if (u_matspecular.a <= 0.0) {\n");
p.C(" ldot = 1.0;\n");
p.C(" } else {\n");
p.C(" ldot = pow(max(ldot, 0.0), u_matspecular.a);\n");
p.C(" }\n");
p.C(" if (ldot > 0.0)\n");
p.F(" lightSum1 += u_lightspecular%i * %s * ldot %s;\n", i, specularStr, timesLightScale);
p.C(" }\n");
}
p.F(" lightSum0.rgb += (u_lightambient%i * %s.rgb + diffuse)%s;\n", i, ambientStr, timesLightScale);
} }
const char *timesLightScale = " * lightScale";
// Attenuation
switch (type) {
case GE_LIGHTTYPE_DIRECTIONAL:
timesLightScale = "";
break;
case GE_LIGHTTYPE_POINT:
WRITE(p, " lightScale = clamp(1.0 / dot(u_lightatt%i, vec3(1.0, distance, distance*distance)), 0.0, 1.0);\n", i);
break;
case GE_LIGHTTYPE_SPOT:
case GE_LIGHTTYPE_UNKNOWN:
WRITE(p, " angle = length(u_lightdir%i) == 0.0 ? 0.0 : dot(normalize(u_lightdir%i), toLight);\n", i, i);
WRITE(p, " if (angle >= u_lightangle_spotCoef%i.x) {\n", i);
WRITE(p, " lightScale = clamp(1.0 / dot(u_lightatt%i, vec3(1.0, distance, distance*distance)), 0.0, 1.0) * (u_lightangle_spotCoef%i.y <= 0.0 ? 1.0 : pow(angle, u_lightangle_spotCoef%i.y));\n", i, i, i);
WRITE(p, " } else {\n");
WRITE(p, " lightScale = 0.0;\n");
WRITE(p, " }\n");
break;
default:
// ILLEGAL
break;
}
WRITE(p, " diffuse = (u_lightdiffuse%i * %s) * max(ldot, 0.0);\n", i, diffuseStr);
if (doSpecular) {
WRITE(p, " if (ldot >= 0.0) {\n");
WRITE(p, " ldot = dot(normalize(toLight + vec3(0.0, 0.0, 1.0)), worldnormal);\n");
WRITE(p, " if (u_matspecular.a <= 0.0) {\n");
WRITE(p, " ldot = 1.0;\n");
WRITE(p, " } else {\n");
WRITE(p, " ldot = pow(max(ldot, 0.0), u_matspecular.a);\n");
WRITE(p, " }\n");
WRITE(p, " if (ldot > 0.0)\n");
WRITE(p, " lightSum1 += u_lightspecular%i * %s * ldot %s;\n", i, specularStr, timesLightScale);
WRITE(p, " }\n");
}
WRITE(p, " lightSum0.rgb += (u_lightambient%i * %s.rgb + diffuse)%s;\n", i, ambientStr, timesLightScale);
} }
if (enableLighting) { if (enableLighting) {

View File

@ -103,10 +103,10 @@ const CommonCommandTableEntry commonCommandTable[] = {
// These change the vertex shader so need flushing. // These change the vertex shader so need flushing.
{ GE_CMD_REVERSENORMAL, FLAG_FLUSHBEFOREONCHANGE, DIRTY_VERTEXSHADER_STATE }, { GE_CMD_REVERSENORMAL, FLAG_FLUSHBEFOREONCHANGE, DIRTY_VERTEXSHADER_STATE },
{ GE_CMD_LIGHTINGENABLE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE }, { GE_CMD_LIGHTINGENABLE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE },
{ GE_CMD_LIGHTENABLE0, FLAG_FLUSHBEFOREONCHANGE, DIRTY_VERTEXSHADER_STATE }, { GE_CMD_LIGHTENABLE0, FLAG_FLUSHBEFOREONCHANGE, DIRTY_VERTEXSHADER_STATE },
{ GE_CMD_LIGHTENABLE1, FLAG_FLUSHBEFOREONCHANGE, DIRTY_VERTEXSHADER_STATE }, { GE_CMD_LIGHTENABLE1, FLAG_FLUSHBEFOREONCHANGE, DIRTY_VERTEXSHADER_STATE },
{ GE_CMD_LIGHTENABLE2, FLAG_FLUSHBEFOREONCHANGE, DIRTY_VERTEXSHADER_STATE }, { GE_CMD_LIGHTENABLE2, FLAG_FLUSHBEFOREONCHANGE, DIRTY_VERTEXSHADER_STATE },
{ GE_CMD_LIGHTENABLE3, FLAG_FLUSHBEFOREONCHANGE, DIRTY_VERTEXSHADER_STATE }, { GE_CMD_LIGHTENABLE3, FLAG_FLUSHBEFOREONCHANGE, DIRTY_VERTEXSHADER_STATE },
{ GE_CMD_LIGHTTYPE0, FLAG_FLUSHBEFOREONCHANGE, DIRTY_VERTEXSHADER_STATE | DIRTY_LIGHT0 }, { GE_CMD_LIGHTTYPE0, FLAG_FLUSHBEFOREONCHANGE, DIRTY_VERTEXSHADER_STATE | DIRTY_LIGHT0 },
{ GE_CMD_LIGHTTYPE1, FLAG_FLUSHBEFOREONCHANGE, DIRTY_VERTEXSHADER_STATE | DIRTY_LIGHT1 }, { GE_CMD_LIGHTTYPE1, FLAG_FLUSHBEFOREONCHANGE, DIRTY_VERTEXSHADER_STATE | DIRTY_LIGHT1 },
{ GE_CMD_LIGHTTYPE2, FLAG_FLUSHBEFOREONCHANGE, DIRTY_VERTEXSHADER_STATE | DIRTY_LIGHT2 }, { GE_CMD_LIGHTTYPE2, FLAG_FLUSHBEFOREONCHANGE, DIRTY_VERTEXSHADER_STATE | DIRTY_LIGHT2 },
@ -450,6 +450,21 @@ void GPUCommon::UpdateCmdInfo() {
cmdInfo_[GE_CMD_JUMP].func = &GPUCommon::Execute_Jump; cmdInfo_[GE_CMD_JUMP].func = &GPUCommon::Execute_Jump;
cmdInfo_[GE_CMD_CALL].func = &GPUCommon::Execute_Call; cmdInfo_[GE_CMD_CALL].func = &GPUCommon::Execute_Call;
} }
// Reconfigure for light ubershader or not.
for (int i = 0; i < 4; i++) {
if (gstate_c.Supports(GPU_USE_LIGHT_UBERSHADER)) {
cmdInfo_[GE_CMD_LIGHTENABLE0 + i].RemoveDirty(DIRTY_VERTEXSHADER_STATE);
cmdInfo_[GE_CMD_LIGHTENABLE0 + i].AddDirty(DIRTY_LIGHT_CONTROL);
cmdInfo_[GE_CMD_LIGHTTYPE0 + i].RemoveDirty(DIRTY_VERTEXSHADER_STATE);
cmdInfo_[GE_CMD_LIGHTTYPE0 + i].AddDirty(DIRTY_LIGHT_CONTROL);
} else {
cmdInfo_[GE_CMD_LIGHTENABLE0 + i].RemoveDirty(DIRTY_LIGHT_CONTROL);
cmdInfo_[GE_CMD_LIGHTENABLE0 + i].AddDirty(DIRTY_VERTEXSHADER_STATE);
cmdInfo_[GE_CMD_LIGHTTYPE0 + i].RemoveDirty(DIRTY_LIGHT_CONTROL);
cmdInfo_[GE_CMD_LIGHTTYPE0 + i].AddDirty(DIRTY_VERTEXSHADER_STATE);
}
}
} }
void GPUCommon::BeginHostFrame() { void GPUCommon::BeginHostFrame() {
@ -3202,6 +3217,10 @@ u32 GPUCommon::CheckGPUFeatures() const {
features |= GPU_SUPPORTS_ANY_FRAMEBUFFER_FETCH; features |= GPU_SUPPORTS_ANY_FRAMEBUFFER_FETCH;
} }
if (draw_->GetDeviceCaps().fragmentShaderInt32Supported) {
features |= GPU_USE_LIGHT_UBERSHADER;
}
if (PSP_CoreParameter().compat.flags().ClearToRAM) { if (PSP_CoreParameter().compat.flags().ClearToRAM) {
features |= GPU_USE_CLEAR_RAM_HACK; features |= GPU_USE_CLEAR_RAM_HACK;
} }

View File

@ -314,6 +314,14 @@ protected:
struct CommandInfo { struct CommandInfo {
uint64_t flags; uint64_t flags;
GPUCommon::CmdFunc func; GPUCommon::CmdFunc func;
// Dirty flags are mashed into the regular flags by a left shift of 8.
void AddDirty(u64 dirty) {
flags |= dirty << 8;
}
void RemoveDirty(u64 dirty) {
flags &= ~(dirty << 8);
}
}; };
static CommandInfo cmdInfo_[256]; static CommandInfo cmdInfo_[256];

View File

@ -469,7 +469,8 @@ struct UVScale {
// Might want to move this mechanism into the backend later. // Might want to move this mechanism into the backend later.
enum { enum {
GPU_SUPPORTS_DUALSOURCE_BLEND = FLAG_BIT(0), GPU_SUPPORTS_DUALSOURCE_BLEND = FLAG_BIT(0),
// Free bits: 1-2 GPU_USE_LIGHT_UBERSHADER = FLAG_BIT(1),
// Free bit: 2
GPU_SUPPORTS_VS_RANGE_CULLING = FLAG_BIT(3), GPU_SUPPORTS_VS_RANGE_CULLING = FLAG_BIT(3),
GPU_SUPPORTS_BLEND_MINMAX = FLAG_BIT(4), GPU_SUPPORTS_BLEND_MINMAX = FLAG_BIT(4),
GPU_SUPPORTS_LOGIC_OP = FLAG_BIT(5), GPU_SUPPORTS_LOGIC_OP = FLAG_BIT(5),