diff --git a/GPU/Common/ShaderCommon.h b/GPU/Common/ShaderCommon.h index a4adfb70ea..d6dfbbc3f8 100644 --- a/GPU/Common/ShaderCommon.h +++ b/GPU/Common/ShaderCommon.h @@ -88,12 +88,13 @@ enum : uint64_t { DIRTY_COLORWRITEMASK = 1ULL << 36, DIRTY_MIPBIAS = 1ULL << 37, + DIRTY_LIGHT_CONTROL = 1ULL << 38, - // space for 4 more uniform dirty flags. Remember to update DIRTY_ALL_UNIFORMS. + // space for 1 more uniform dirty flags. Remember to update DIRTY_ALL_UNIFORMS. DIRTY_BONE_UNIFORMS = 0xFF000000ULL, - DIRTY_ALL_UNIFORMS = 0x3FFFFFFFFFULL, + DIRTY_ALL_UNIFORMS = 0x7FFFFFFFFFULL, DIRTY_ALL_LIGHTS = DIRTY_LIGHT0 | DIRTY_LIGHT1 | DIRTY_LIGHT2 | DIRTY_LIGHT3, // Other dirty elements that aren't uniforms! @@ -113,6 +114,8 @@ enum : uint64_t { // TODO: Should we also add DIRTY_FRAMEBUF here? It kinda generally takes care of itself. DIRTY_ALL_RENDER_STATE = DIRTY_BLEND_STATE | DIRTY_DEPTHSTENCIL_STATE | DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE | DIRTY_TEXTURE_IMAGE | DIRTY_TEXTURE_PARAMS, + // Note that the top 8 bits (54-63) cannot be dirtied through the commonCommandTable due to packing of other flags. + DIRTY_ALL = 0xFFFFFFFFFFFFFFFF }; diff --git a/GPU/Common/ShaderId.cpp b/GPU/Common/ShaderId.cpp index 6345816604..7530e74aaf 100644 --- a/GPU/Common/ShaderId.cpp +++ b/GPU/Common/ShaderId.cpp @@ -41,6 +41,9 @@ std::string VertexShaderDesc(const VShaderID &id) { if (id.Bit(VS_BIT_LIGHTING_ENABLE)) { desc << "Light: "; } + if (id.Bit(VS_BIT_LIGHT_UBERSHADER)) { + desc << "LightUberShader "; + } for (int i = 0; i < 4; i++) { bool enabled = id.Bit(VS_BIT_LIGHT0_ENABLE + i) && id.Bit(VS_BIT_LIGHTING_ENABLE); if (enabled || (uvgMode == GE_TEXMAP_ENVIRONMENT_MAP && (ls0 == i || ls1 == i))) { @@ -125,13 +128,17 @@ void ComputeVertexShaderID(VShaderID *id_out, u32 vertType, bool useHWTransform, // doShadeMapping is stored as UVGenMode, and light type doesn't matter for shade mapping. id.SetBits(VS_BIT_MATERIAL_UPDATE, 3, gstate.getMaterialUpdate()); id.SetBit(VS_BIT_LIGHTING_ENABLE); - // Light bits - for (int i = 0; i < 4; i++) { - bool chanEnabled = gstate.isLightChanEnabled(i) != 0; - id.SetBit(VS_BIT_LIGHT0_ENABLE + i, chanEnabled); - if (chanEnabled) { - id.SetBits(VS_BIT_LIGHT0_COMP + 4 * i, 2, gstate.getLightComputation(i)); - id.SetBits(VS_BIT_LIGHT0_TYPE + 4 * i, 2, gstate.getLightType(i)); + if (gstate_c.Supports(GPU_USE_LIGHT_UBERSHADER)) { + id.SetBit(VS_BIT_LIGHT_UBERSHADER); + } else { + // Light bits + for (int i = 0; i < 4; i++) { + bool chanEnabled = gstate.isLightChanEnabled(i) != 0; + id.SetBit(VS_BIT_LIGHT0_ENABLE + i, chanEnabled); + if (chanEnabled) { + id.SetBits(VS_BIT_LIGHT0_COMP + 4 * i, 2, gstate.getLightComputation(i)); + id.SetBits(VS_BIT_LIGHT0_TYPE + 4 * i, 2, gstate.getLightType(i)); + } } } } diff --git a/GPU/Common/ShaderId.h b/GPU/Common/ShaderId.h index 999faf2e44..a27d7ad7fa 100644 --- a/GPU/Common/ShaderId.h +++ b/GPU/Common/ShaderId.h @@ -33,7 +33,11 @@ enum VShaderBit : uint8_t { VS_BIT_BONES = 22, // 3 should be enough, not 8 // 25 - 29 are free. VS_BIT_ENABLE_BONES = 30, - // 31 is free. + + // If this is set along with LIGHTING_ENABLE, all other lighting bits below + // are passed to the shader directly instead. + VS_BIT_LIGHT_UBERSHADER = 31, + VS_BIT_LIGHT0_COMP = 32, // 2 bits VS_BIT_LIGHT0_TYPE = 34, // 2 bits VS_BIT_LIGHT1_COMP = 36, // 2 bits diff --git a/GPU/Common/ShaderUniforms.cpp b/GPU/Common/ShaderUniforms.cpp index c3480e6fef..8bf1f69356 100644 --- a/GPU/Common/ShaderUniforms.cpp +++ b/GPU/Common/ShaderUniforms.cpp @@ -267,6 +267,24 @@ void BaseUpdateUniforms(UB_VS_FS_Base *ub, uint64_t dirtyUniforms, bool flipView } } +uint32_t PackLightControlBits() { + // Bit organization + // Bottom 4 bits are enable bits for each light. + // Then, for each light, comes 2 bits for "comp" and 2 bits for "type". + uint32_t lightControl = 0; + for (int i = 0; i < 4; i++) { + if (gstate.isLightChanEnabled(i)) { + lightControl |= 1 << i; + } + + u32 computation = (u32)gstate.getLightComputation(i); // 2 bits + u32 type = (u32)gstate.getLightType(i); // 2 bits + lightControl |= computation << (4 + i * 4); + lightControl |= type << (4 + i * 4 + 2); + } + return lightControl; +} + void LightUpdateUniforms(UB_VS_Lights *ub, uint64_t dirtyUniforms) { // Lighting if (dirtyUniforms & DIRTY_AMBIENT) { @@ -279,7 +297,13 @@ void LightUpdateUniforms(UB_VS_Lights *ub, uint64_t dirtyUniforms) { Uint8x3ToFloat4_Alpha(ub->materialSpecular, gstate.materialspecular, std::max(0.0f, getFloat24(gstate.materialspecularcoef))); } if (dirtyUniforms & DIRTY_MATEMISSIVE) { - Uint8x3ToFloat4(ub->materialEmissive, gstate.materialemissive); + // We're not touching the fourth f32 here, because we store an u32 of control bits in it. + float temp[4]; + Uint8x3ToFloat4(temp, gstate.materialemissive); + memcpy(ub->materialEmissive, temp, 12); + } + if (dirtyUniforms & DIRTY_LIGHT_CONTROL) { + ub->lightControl = PackLightControlBits(); } for (int i = 0; i < 4; i++) { if (dirtyUniforms & (DIRTY_LIGHT0 << i)) { diff --git a/GPU/Common/ShaderUniforms.h b/GPU/Common/ShaderUniforms.h index f64fc3417b..ab71309e93 100644 --- a/GPU/Common/ShaderUniforms.h +++ b/GPU/Common/ShaderUniforms.h @@ -13,7 +13,7 @@ enum : uint64_t { DIRTY_ALPHACOLORMASK | DIRTY_SHADERBLEND | DIRTY_COLORWRITEMASK | DIRTY_UVSCALEOFFSET | DIRTY_TEXCLAMP | DIRTY_DEPTHRANGE | DIRTY_MATAMBIENTALPHA | DIRTY_BEZIERSPLINE | DIRTY_DEPAL, DIRTY_LIGHT_UNIFORMS = - DIRTY_LIGHT0 | DIRTY_LIGHT1 | DIRTY_LIGHT2 | DIRTY_LIGHT3 | + DIRTY_LIGHT_CONTROL | DIRTY_LIGHT0 | DIRTY_LIGHT1 | DIRTY_LIGHT2 | DIRTY_LIGHT3 | DIRTY_MATDIFFUSE | DIRTY_MATSPECULAR | DIRTY_MATEMISSIVE | DIRTY_AMBIENT, }; @@ -80,7 +80,8 @@ struct UB_VS_Lights { float ambientColor[4]; float materialDiffuse[4]; float materialSpecular[4]; - float materialEmissive[4]; + float materialEmissive[3]; + uint32_t lightControl; float lpos[4][4]; float ldir[4][4]; float latt[4][4]; @@ -95,6 +96,7 @@ R"( vec4 u_ambient; vec3 u_matdiffuse; vec4 u_matspecular; vec3 u_matemissive; + uint u_lightControl; // light ubershader vec3 u_lightpos0; vec3 u_lightpos1; vec3 u_lightpos2; @@ -141,3 +143,4 @@ void BaseUpdateUniforms(UB_VS_FS_Base *ub, uint64_t dirtyUniforms, bool flipView void LightUpdateUniforms(UB_VS_Lights *ub, uint64_t dirtyUniforms); void BoneUpdateUniforms(UB_VS_Bones *ub, uint64_t dirtyUniforms); +uint32_t PackLightControlBits(); diff --git a/GPU/Common/VertexShaderGenerator.cpp b/GPU/Common/VertexShaderGenerator.cpp index 126d188500..8263457d60 100644 --- a/GPU/Common/VertexShaderGenerator.cpp +++ b/GPU/Common/VertexShaderGenerator.cpp @@ -185,6 +185,12 @@ bool GenerateVertexShader(const VShaderID &id, char *buffer, const ShaderLanguag bool enableLighting = id.Bit(VS_BIT_LIGHTING_ENABLE); int matUpdate = id.Bits(VS_BIT_MATERIAL_UPDATE, 3); + bool lightUberShader = id.Bit(VS_BIT_LIGHT_UBERSHADER) && enableLighting; // checking lighting here for the shader test's benefit, in reality if ubershader is set, lighting is set. + if (lightUberShader && !compat.bitwiseOps) { + *errorString = "Light ubershader requires bitwise ops in shader language"; + return false; + } + // Apparently we don't support bezier/spline together with bones. bool doBezier = id.Bit(VS_BIT_BEZIER) && !enableBones && useHWTransform; bool doSpline = id.Bit(VS_BIT_SPLINE) && !enableBones && useHWTransform; @@ -324,6 +330,7 @@ bool GenerateVertexShader(const VShaderID &id, char *buffer, const ShaderLanguag if (doTexture) { WRITE(p, "vec4 u_uvscaleoffset : register(c%i);\n", CONST_VS_UVSCALEOFFSET); } + // No need for light ubershader support here, D3D9 doesn't do it. for (int i = 0; i < 4; i++) { if (doLight[i] != LIGHT_OFF) { // This is needed for shade mapping @@ -418,7 +425,7 @@ bool GenerateVertexShader(const VShaderID &id, char *buffer, const ShaderLanguag WRITE(p, " vec3 v_color1 : COLOR1;\n"); if (enableFog) { - WRITE(p, " float v_fogdepth: TEXCOORD1;\n"); + WRITE(p, " float v_fogdepth : TEXCOORD1;\n"); } if (compat.shaderLanguage == HLSL_D3D9) { WRITE(p, " vec4 gl_Position : POSITION;\n"); @@ -523,28 +530,32 @@ bool GenerateVertexShader(const VShaderID &id, char *buffer, const ShaderLanguag WRITE(p, "uniform vec4 u_uvscaleoffset;\n"); *uniformMask |= DIRTY_UVSCALEOFFSET; } + if (lightUberShader) { + p.C("uniform uint u_lightControl;\n"); + *uniformMask |= DIRTY_LIGHT_CONTROL; + } for (int i = 0; i < 4; i++) { - if (doLight[i] != LIGHT_OFF) { + if (lightUberShader || doLight[i] != LIGHT_OFF) { // This is needed for shade mapping WRITE(p, "uniform vec3 u_lightpos%i;\n", i); *uniformMask |= DIRTY_LIGHT0 << i; } - if (doLight[i] == LIGHT_FULL) { + if (lightUberShader || doLight[i] == LIGHT_FULL) { *uniformMask |= DIRTY_LIGHT0 << i; GELightType type = static_cast(id.Bits(VS_BIT_LIGHT0_TYPE + 4 * i, 2)); GELightComputation comp = static_cast(id.Bits(VS_BIT_LIGHT0_COMP + 4 * i, 2)); - if (type != GE_LIGHTTYPE_DIRECTIONAL) + if (lightUberShader || type != GE_LIGHTTYPE_DIRECTIONAL) WRITE(p, "uniform mediump vec3 u_lightatt%i;\n", i); - if (type == GE_LIGHTTYPE_SPOT || type == GE_LIGHTTYPE_UNKNOWN) { + if (lightUberShader || type == GE_LIGHTTYPE_SPOT || type == GE_LIGHTTYPE_UNKNOWN) { WRITE(p, "uniform mediump vec3 u_lightdir%i;\n", i); WRITE(p, "uniform mediump vec2 u_lightangle_spotCoef%i;\n", i); } WRITE(p, "uniform lowp vec3 u_lightambient%i;\n", i); WRITE(p, "uniform lowp vec3 u_lightdiffuse%i;\n", i); - if (comp == GE_LIGHTCOMP_BOTH) { + if (lightUberShader || comp == GE_LIGHTCOMP_BOTH) { WRITE(p, "uniform lowp vec3 u_lightspecular%i;\n", i); } } @@ -728,7 +739,6 @@ bool GenerateVertexShader(const VShaderID &id, char *buffer, const ShaderLanguag WRITE(p, " vec4 basis_u = tess_weights_u[weight_idx.x].basis;\n"); WRITE(p, " vec4 basis_v = tess_weights_v[weight_idx.y].basis;\n"); WRITE(p, " mat4 basis = outerProduct(basis_u, basis_v);\n"); - } else { WRITE(p, " int index_u, index_v;\n"); for (int i = 0; i < 4; i++) { @@ -987,6 +997,13 @@ bool GenerateVertexShader(const VShaderID &id, char *buffer, const ShaderLanguag anySpots = true; } + if (lightUberShader) { + anySpots = true; + diffuseIsZero = false; + specularIsZero = false; + distanceNeeded = true; + } + if (!specularIsZero) { WRITE(p, " lowp vec3 lightSum1 = splat3(0.0);\n"); } @@ -1004,76 +1021,132 @@ bool GenerateVertexShader(const VShaderID &id, char *buffer, const ShaderLanguag } } - // Calculate lights if needed. If shade mapping is enabled, lights may need to be - // at least partially calculated. - for (int i = 0; i < 4; i++) { - if (doLight[i] != LIGHT_FULL) - continue; - - GELightType type = static_cast(id.Bits(VS_BIT_LIGHT0_TYPE + 4*i, 2)); - GELightComputation comp = static_cast(id.Bits(VS_BIT_LIGHT0_COMP + 4*i, 2)); - - if (type == GE_LIGHTTYPE_DIRECTIONAL) { - // We prenormalize light positions for directional lights. - WRITE(p, " toLight = u_lightpos%i;\n", i); - } else { - WRITE(p, " toLight = u_lightpos%i - worldpos;\n", i); - WRITE(p, " distance = length(toLight);\n"); - WRITE(p, " toLight /= distance;\n"); + if (lightUberShader) { + // TODO: Actually loop in the shader. For now, we write it all out. + // u_lightControl is computed in PackLightControlBits(). + for (int i = 0; i < 4; i++) { + p.F(" if ((u_lightControl & %du) != 0u) { \n", 1 << i); + p.F(" uint comp = (u_lightControl >> %d) & 3u;\n", 4 + 4 * i); + p.F(" uint type = (u_lightControl >> %d) & 3u;\n", 4 + 4 * i + 2); + p.C(" if (type == 0u) {\n"); // GE_LIGHTTYPE_DIRECTIONAL + p.F(" toLight = u_lightpos%d;\n", i); + p.C(" } else {\n"); + p.F(" toLight = u_lightpos%d - worldpos;\n", i); + p.F(" distance = length(toLight);\n", i); + p.F(" toLight /= distance;\n", i); + p.C(" }\n"); + p.C(" ldot = dot(toLight, worldnormal);\n"); + p.C(" if (comp == 2u) {\n"); // GE_LIGHTCOMP_ONLYPOWDIFFUSE + p.C(" if (u_matspecular.a <= 0.0) {\n"); + p.C(" ldot = 1.0;\n"); + p.C(" } else {\n"); + p.C(" ldot = pow(max(ldot, 0.0), u_matspecular.a);\n"); + p.C(" }\n"); + p.C(" }\n"); + p.C(" switch (type) {\n"); // Attenuation + p.C(" case 1u:\n"); // GE_LIGHTTYPE_POINT + p.F(" lightScale = clamp(1.0 / dot(u_lightatt%i, vec3(1.0, distance, distance*distance)), 0.0, 1.0);\n", i); + p.C(" break;\n"); + p.C(" case 2u:\n"); // GE_LIGHTTYPE_SPOT + p.F(" angle = length(u_lightdir%i) == 0.0 ? 0.0 : dot(normalize(u_lightdir%i), toLight);\n", i, i); + p.F(" if (angle >= u_lightangle_spotCoef%i.x) {\n", i); + p.F(" lightScale = clamp(1.0 / dot(u_lightatt%i, vec3(1.0, distance, distance*distance)), 0.0, 1.0) * (u_lightangle_spotCoef%i.y <= 0.0 ? 1.0 : pow(angle, u_lightangle_spotCoef%i.y));\n", i, i, i); + p.C(" } else {\n"); + p.C(" lightScale = 0.0;\n"); + p.C(" }\n"); + p.C(" break;\n"); + p.C(" default:\n"); // GE_LIGHTTYPE_DIRECTIONAL + p.C(" lightScale = 1.0;\n"); + p.C(" break;\n"); + p.C(" }\n"); + p.F(" diffuse = (u_lightdiffuse%i * %s) * max(ldot, 0.0);\n", i, diffuseStr); + p.C(" if (comp == 1u) {\n"); // do specular + p.C(" if (ldot >= 0.0) {\n"); + p.C(" ldot = dot(normalize(toLight + vec3(0.0, 0.0, 1.0)), worldnormal);\n"); + p.C(" if (u_matspecular.a <= 0.0) {\n"); + p.C(" ldot = 1.0;\n"); + p.C(" } else {\n"); + p.C(" ldot = pow(max(ldot, 0.0), u_matspecular.a);\n"); + p.C(" }\n"); + p.C(" if (ldot > 0.0)\n"); + p.F(" lightSum1 += u_lightspecular%i * %s * ldot * lightScale;\n", i, specularStr); + p.C(" }\n"); + p.C(" }\n"); + p.F(" lightSum0.rgb += (u_lightambient%i * %s.rgb + diffuse) * lightScale;\n", i, ambientStr); + p.C(" }\n"); } + } else { + // Calculate lights if needed. If shade mapping is enabled, lights may need to be + // at least partially calculated. + for (int i = 0; i < 4; i++) { + if (doLight[i] != LIGHT_FULL) + continue; - bool doSpecular = comp == GE_LIGHTCOMP_BOTH; - bool poweredDiffuse = comp == GE_LIGHTCOMP_ONLYPOWDIFFUSE; + GELightType type = static_cast(id.Bits(VS_BIT_LIGHT0_TYPE + 4 * i, 2)); + GELightComputation comp = static_cast(id.Bits(VS_BIT_LIGHT0_COMP + 4 * i, 2)); - WRITE(p, " ldot = dot(toLight, worldnormal);\n"); - if (poweredDiffuse) { - // pow(0.0, 0.0) may be undefined, but the PSP seems to treat it as 1.0. - // Seen in Tales of the World: Radiant Mythology (#2424.) - WRITE(p, " if (u_matspecular.a <= 0.0) {\n"); - WRITE(p, " ldot = 1.0;\n"); - WRITE(p, " } else {\n"); - WRITE(p, " ldot = pow(max(ldot, 0.0), u_matspecular.a);\n"); - WRITE(p, " }\n"); + if (type == GE_LIGHTTYPE_DIRECTIONAL) { + // We prenormalize light positions for directional lights. + p.F(" toLight = u_lightpos%i;\n", i); + } else { + p.F(" toLight = u_lightpos%i - worldpos;\n", i); + p.C(" distance = length(toLight);\n"); + p.C(" toLight /= distance;\n"); + } + + bool doSpecular = comp == GE_LIGHTCOMP_BOTH; + bool poweredDiffuse = comp == GE_LIGHTCOMP_ONLYPOWDIFFUSE; + + p.C(" ldot = dot(toLight, worldnormal);\n"); + if (poweredDiffuse) { + // pow(0.0, 0.0) may be undefined, but the PSP seems to treat it as 1.0. + // Seen in Tales of the World: Radiant Mythology (#2424.) + p.C(" if (u_matspecular.a <= 0.0) {\n"); + p.C(" ldot = 1.0;\n"); + p.C(" } else {\n"); + p.C(" ldot = pow(max(ldot, 0.0), u_matspecular.a);\n"); + p.C(" }\n"); + } + + const char *timesLightScale = " * lightScale"; + + // Attenuation + switch (type) { + case GE_LIGHTTYPE_DIRECTIONAL: + timesLightScale = ""; + break; + case GE_LIGHTTYPE_POINT: + p.F(" lightScale = clamp(1.0 / dot(u_lightatt%i, vec3(1.0, distance, distance*distance)), 0.0, 1.0);\n", i); + break; + case GE_LIGHTTYPE_SPOT: + case GE_LIGHTTYPE_UNKNOWN: + p.F(" angle = length(u_lightdir%i) == 0.0 ? 0.0 : dot(normalize(u_lightdir%i), toLight);\n", i, i); + p.F(" if (angle >= u_lightangle_spotCoef%i.x) {\n", i); + p.F(" lightScale = clamp(1.0 / dot(u_lightatt%i, vec3(1.0, distance, distance*distance)), 0.0, 1.0) * (u_lightangle_spotCoef%i.y <= 0.0 ? 1.0 : pow(angle, u_lightangle_spotCoef%i.y));\n", i, i, i); + p.C(" } else {\n"); + p.C(" lightScale = 0.0;\n"); + p.C(" }\n"); + break; + default: + // ILLEGAL + break; + } + + p.F(" diffuse = (u_lightdiffuse%i * %s) * max(ldot, 0.0);\n", i, diffuseStr); + if (doSpecular) { + p.C(" if (ldot >= 0.0) {\n"); + p.C(" ldot = dot(normalize(toLight + vec3(0.0, 0.0, 1.0)), worldnormal);\n"); + p.C(" if (u_matspecular.a <= 0.0) {\n"); + p.C(" ldot = 1.0;\n"); + p.C(" } else {\n"); + p.C(" ldot = pow(max(ldot, 0.0), u_matspecular.a);\n"); + p.C(" }\n"); + p.C(" if (ldot > 0.0)\n"); + p.F(" lightSum1 += u_lightspecular%i * %s * ldot %s;\n", i, specularStr, timesLightScale); + p.C(" }\n"); + } + p.F(" lightSum0.rgb += (u_lightambient%i * %s.rgb + diffuse)%s;\n", i, ambientStr, timesLightScale); } - - const char *timesLightScale = " * lightScale"; - - // Attenuation - switch (type) { - case GE_LIGHTTYPE_DIRECTIONAL: - timesLightScale = ""; - break; - case GE_LIGHTTYPE_POINT: - WRITE(p, " lightScale = clamp(1.0 / dot(u_lightatt%i, vec3(1.0, distance, distance*distance)), 0.0, 1.0);\n", i); - break; - case GE_LIGHTTYPE_SPOT: - case GE_LIGHTTYPE_UNKNOWN: - WRITE(p, " angle = length(u_lightdir%i) == 0.0 ? 0.0 : dot(normalize(u_lightdir%i), toLight);\n", i, i); - WRITE(p, " if (angle >= u_lightangle_spotCoef%i.x) {\n", i); - WRITE(p, " lightScale = clamp(1.0 / dot(u_lightatt%i, vec3(1.0, distance, distance*distance)), 0.0, 1.0) * (u_lightangle_spotCoef%i.y <= 0.0 ? 1.0 : pow(angle, u_lightangle_spotCoef%i.y));\n", i, i, i); - WRITE(p, " } else {\n"); - WRITE(p, " lightScale = 0.0;\n"); - WRITE(p, " }\n"); - break; - default: - // ILLEGAL - break; - } - - WRITE(p, " diffuse = (u_lightdiffuse%i * %s) * max(ldot, 0.0);\n", i, diffuseStr); - if (doSpecular) { - WRITE(p, " if (ldot >= 0.0) {\n"); - WRITE(p, " ldot = dot(normalize(toLight + vec3(0.0, 0.0, 1.0)), worldnormal);\n"); - WRITE(p, " if (u_matspecular.a <= 0.0) {\n"); - WRITE(p, " ldot = 1.0;\n"); - WRITE(p, " } else {\n"); - WRITE(p, " ldot = pow(max(ldot, 0.0), u_matspecular.a);\n"); - WRITE(p, " }\n"); - WRITE(p, " if (ldot > 0.0)\n"); - WRITE(p, " lightSum1 += u_lightspecular%i * %s * ldot %s;\n", i, specularStr, timesLightScale); - WRITE(p, " }\n"); - } - WRITE(p, " lightSum0.rgb += (u_lightambient%i * %s.rgb + diffuse)%s;\n", i, ambientStr, timesLightScale); } if (enableLighting) { diff --git a/GPU/GLES/ShaderManagerGLES.cpp b/GPU/GLES/ShaderManagerGLES.cpp index 92dfa9488f..a0a8aac16d 100644 --- a/GPU/GLES/ShaderManagerGLES.cpp +++ b/GPU/GLES/ShaderManagerGLES.cpp @@ -152,6 +152,7 @@ LinkedShader::LinkedShader(GLRenderManager *render, VShaderID VSID, Shader *vs, queries.push_back({ &u_uvscaleoffset, "u_uvscaleoffset" }); queries.push_back({ &u_texclamp, "u_texclamp" }); queries.push_back({ &u_texclampoff, "u_texclampoff" }); + queries.push_back({ &u_lightControl, "u_lightControl" }); for (int i = 0; i < 4; i++) { static const char * const lightPosNames[4] = { "u_lightpos0", "u_lightpos1", "u_lightpos2", "u_lightpos3", }; @@ -471,7 +472,6 @@ void LinkedShader::UpdateUniforms(u32 vertType, const ShaderID &vsid, bool useBu } render_->SetUniformF(&u_fogcoef, 2, fogcoef); } - if (dirty & DIRTY_UVSCALEOFFSET) { const float invW = 1.0f / (float)gstate_c.curTextureWidth; const float invH = 1.0f / (float)gstate_c.curTextureHeight; @@ -605,6 +605,9 @@ void LinkedShader::UpdateUniforms(u32 vertType, const ShaderID &vsid, bool useBu } // Lighting + if (dirty & DIRTY_LIGHT_CONTROL) { + render_->SetUniformUI1(&u_lightControl, PackLightControlBits()); + } if (dirty & DIRTY_AMBIENT) { SetColorUniform3Alpha(render_, &u_ambient, gstate.ambientcolor, gstate.getAmbientA()); } diff --git a/GPU/GLES/ShaderManagerGLES.h b/GPU/GLES/ShaderManagerGLES.h index 101326ee9b..2dd715cc0b 100644 --- a/GPU/GLES/ShaderManagerGLES.h +++ b/GPU/GLES/ShaderManagerGLES.h @@ -97,6 +97,7 @@ public: int u_texclampoff; // Lighting + int u_lightControl; int u_ambient; int u_matambientalpha; int u_matdiffuse; diff --git a/GPU/GPUCommon.cpp b/GPU/GPUCommon.cpp index a5fd84e696..bb78a5be96 100644 --- a/GPU/GPUCommon.cpp +++ b/GPU/GPUCommon.cpp @@ -103,10 +103,10 @@ const CommonCommandTableEntry commonCommandTable[] = { // These change the vertex shader so need flushing. { GE_CMD_REVERSENORMAL, FLAG_FLUSHBEFOREONCHANGE, DIRTY_VERTEXSHADER_STATE }, { GE_CMD_LIGHTINGENABLE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE }, - { GE_CMD_LIGHTENABLE0, FLAG_FLUSHBEFOREONCHANGE, DIRTY_VERTEXSHADER_STATE }, - { GE_CMD_LIGHTENABLE1, FLAG_FLUSHBEFOREONCHANGE, DIRTY_VERTEXSHADER_STATE }, - { GE_CMD_LIGHTENABLE2, FLAG_FLUSHBEFOREONCHANGE, DIRTY_VERTEXSHADER_STATE }, - { GE_CMD_LIGHTENABLE3, FLAG_FLUSHBEFOREONCHANGE, DIRTY_VERTEXSHADER_STATE }, + { GE_CMD_LIGHTENABLE0, FLAG_FLUSHBEFOREONCHANGE, DIRTY_VERTEXSHADER_STATE }, + { GE_CMD_LIGHTENABLE1, FLAG_FLUSHBEFOREONCHANGE, DIRTY_VERTEXSHADER_STATE }, + { GE_CMD_LIGHTENABLE2, FLAG_FLUSHBEFOREONCHANGE, DIRTY_VERTEXSHADER_STATE }, + { GE_CMD_LIGHTENABLE3, FLAG_FLUSHBEFOREONCHANGE, DIRTY_VERTEXSHADER_STATE }, { GE_CMD_LIGHTTYPE0, FLAG_FLUSHBEFOREONCHANGE, DIRTY_VERTEXSHADER_STATE | DIRTY_LIGHT0 }, { GE_CMD_LIGHTTYPE1, FLAG_FLUSHBEFOREONCHANGE, DIRTY_VERTEXSHADER_STATE | DIRTY_LIGHT1 }, { GE_CMD_LIGHTTYPE2, FLAG_FLUSHBEFOREONCHANGE, DIRTY_VERTEXSHADER_STATE | DIRTY_LIGHT2 }, @@ -450,6 +450,21 @@ void GPUCommon::UpdateCmdInfo() { cmdInfo_[GE_CMD_JUMP].func = &GPUCommon::Execute_Jump; cmdInfo_[GE_CMD_CALL].func = &GPUCommon::Execute_Call; } + + // Reconfigure for light ubershader or not. + for (int i = 0; i < 4; i++) { + if (gstate_c.Supports(GPU_USE_LIGHT_UBERSHADER)) { + cmdInfo_[GE_CMD_LIGHTENABLE0 + i].RemoveDirty(DIRTY_VERTEXSHADER_STATE); + cmdInfo_[GE_CMD_LIGHTENABLE0 + i].AddDirty(DIRTY_LIGHT_CONTROL); + cmdInfo_[GE_CMD_LIGHTTYPE0 + i].RemoveDirty(DIRTY_VERTEXSHADER_STATE); + cmdInfo_[GE_CMD_LIGHTTYPE0 + i].AddDirty(DIRTY_LIGHT_CONTROL); + } else { + cmdInfo_[GE_CMD_LIGHTENABLE0 + i].RemoveDirty(DIRTY_LIGHT_CONTROL); + cmdInfo_[GE_CMD_LIGHTENABLE0 + i].AddDirty(DIRTY_VERTEXSHADER_STATE); + cmdInfo_[GE_CMD_LIGHTTYPE0 + i].RemoveDirty(DIRTY_LIGHT_CONTROL); + cmdInfo_[GE_CMD_LIGHTTYPE0 + i].AddDirty(DIRTY_VERTEXSHADER_STATE); + } + } } void GPUCommon::BeginHostFrame() { @@ -3202,6 +3217,10 @@ u32 GPUCommon::CheckGPUFeatures() const { features |= GPU_SUPPORTS_ANY_FRAMEBUFFER_FETCH; } + if (draw_->GetDeviceCaps().fragmentShaderInt32Supported) { + features |= GPU_USE_LIGHT_UBERSHADER; + } + if (PSP_CoreParameter().compat.flags().ClearToRAM) { features |= GPU_USE_CLEAR_RAM_HACK; } diff --git a/GPU/GPUCommon.h b/GPU/GPUCommon.h index 1127e38d91..36c27d72d7 100644 --- a/GPU/GPUCommon.h +++ b/GPU/GPUCommon.h @@ -314,6 +314,14 @@ protected: struct CommandInfo { uint64_t flags; GPUCommon::CmdFunc func; + + // Dirty flags are mashed into the regular flags by a left shift of 8. + void AddDirty(u64 dirty) { + flags |= dirty << 8; + } + void RemoveDirty(u64 dirty) { + flags &= ~(dirty << 8); + } }; static CommandInfo cmdInfo_[256]; diff --git a/GPU/GPUState.h b/GPU/GPUState.h index db4a04d3cd..a10cfe790e 100644 --- a/GPU/GPUState.h +++ b/GPU/GPUState.h @@ -469,7 +469,8 @@ struct UVScale { // Might want to move this mechanism into the backend later. enum { GPU_SUPPORTS_DUALSOURCE_BLEND = FLAG_BIT(0), - // Free bits: 1-2 + GPU_USE_LIGHT_UBERSHADER = FLAG_BIT(1), + // Free bit: 2 GPU_SUPPORTS_VS_RANGE_CULLING = FLAG_BIT(3), GPU_SUPPORTS_BLEND_MINMAX = FLAG_BIT(4), GPU_SUPPORTS_LOGIC_OP = FLAG_BIT(5),