diff --git a/Common/GPU/OpenGL/GLRenderManager.h b/Common/GPU/OpenGL/GLRenderManager.h index e2d132726f..0acd2b5793 100644 --- a/Common/GPU/OpenGL/GLRenderManager.h +++ b/Common/GPU/OpenGL/GLRenderManager.h @@ -760,6 +760,19 @@ public: curRenderStep_->commands.push_back(data); } + void SetUniformB(const GLint *loc, bool value) { + _dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER); +#ifdef _DEBUG + _dbg_assert_(curProgram_); +#endif + GLRRenderData data{ GLRRenderCommand::UNIFORM4I }; + data.uniform4.loc = loc; + data.uniform4.count = 1; + u32 udata = value ? 1 : 0; + memcpy(data.uniform4.v, &udata, sizeof(u32)); + curRenderStep_->commands.push_back(data); + } + void SetUniformM4x4(const GLint *loc, const float *udata) { _dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER); #ifdef _DEBUG diff --git a/GPU/Common/FragmentShaderGenerator.cpp b/GPU/Common/FragmentShaderGenerator.cpp index 64568fff86..15f4067b8b 100644 --- a/GPU/Common/FragmentShaderGenerator.cpp +++ b/GPU/Common/FragmentShaderGenerator.cpp @@ -105,7 +105,6 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu bool colorTestAgainstZero = id.Bit(FS_BIT_COLOR_AGAINST_ZERO); bool enableColorDoubling = id.Bit(FS_BIT_COLOR_DOUBLE); bool doTextureProjection = id.Bit(FS_BIT_DO_TEXTURE_PROJ); - bool doTextureAlpha = id.Bit(FS_BIT_TEXALPHA); if (texture3D && arrayTexture) { *errorString = "Invalid combination of 3D texture and array texture, shouldn't happen"; @@ -257,8 +256,11 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu if (stencilToAlpha && replaceAlphaWithStencilType == STENCIL_VALUE_UNIFORM) { WRITE(p, "float u_stencilReplaceValue : register(c%i);\n", CONST_PS_STENCILREPLACE); } - if (doTexture && texFunc == GE_TEXFUNC_BLEND) { - WRITE(p, "float3 u_texenv : register(c%i);\n", CONST_PS_TEXENV); + if (doTexture) { + if (texFunc == GE_TEXFUNC_BLEND) { + WRITE(p, "float3 u_texenv : register(c%i);\n", CONST_PS_TEXENV); + } + WRITE(p, "bool u_texAlpha : register(c%i);\n", CONST_PS_TEXALPHA); } WRITE(p, "float3 u_fogcolor : register(c%i);\n", CONST_PS_FOGCOLOR); if (texture3D) { @@ -351,6 +353,8 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu } else { WRITE(p, "uniform sampler2D tex;\n"); } + *uniformMask |= DIRTY_TEXALPHA; + WRITE(p, "uniform bool u_texAlpha;\n"); } if (readFramebufferTex) { @@ -817,64 +821,36 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu break; } - if (texFunc != GE_TEXFUNC_REPLACE || !doTextureAlpha) - WRITE(p, " vec4 p = v_color0;\n"); + WRITE(p, " vec4 p = v_color0;\n"); - if (doTextureAlpha) { // texfmt == RGBA - switch (texFunc) { - case GE_TEXFUNC_MODULATE: - WRITE(p, " vec4 v = p * t + s\n;"); - break; - - case GE_TEXFUNC_DECAL: - WRITE(p, " vec4 v = vec4(mix(p.rgb, t.rgb, t.a), p.a) + s;\n"); - break; - - case GE_TEXFUNC_BLEND: - WRITE(p, " vec4 v = vec4(mix(p.rgb, u_texenv.rgb, t.rgb), p.a * t.a) + s;\n"); - break; - - case GE_TEXFUNC_REPLACE: - WRITE(p, " vec4 v = t + s;\n"); - break; - - case GE_TEXFUNC_ADD: - case GE_TEXFUNC_UNKNOWN1: - case GE_TEXFUNC_UNKNOWN2: - case GE_TEXFUNC_UNKNOWN3: - WRITE(p, " vec4 v = vec4(p.rgb + t.rgb, p.a * t.a) + s;\n"); - break; - default: - WRITE(p, " vec4 v = p;\n"); break; - } - } else { // texfmt == RGB - switch (texFunc) { - case GE_TEXFUNC_MODULATE: - WRITE(p, " vec4 v = vec4(t.rgb * p.rgb, p.a) + s;\n"); - break; - - case GE_TEXFUNC_DECAL: - WRITE(p, " vec4 v = vec4(t.rgb, p.a) + s;\n"); - break; - - case GE_TEXFUNC_BLEND: - WRITE(p, " vec4 v = vec4(mix(p.rgb, u_texenv.rgb, t.rgb), p.a) + s;\n"); - break; - - case GE_TEXFUNC_REPLACE: - WRITE(p, " vec4 v = vec4(t.rgb, p.a) + s;\n"); - break; - - case GE_TEXFUNC_ADD: - case GE_TEXFUNC_UNKNOWN1: - case GE_TEXFUNC_UNKNOWN2: - case GE_TEXFUNC_UNKNOWN3: - WRITE(p, " vec4 v = vec4(p.rgb + t.rgb, p.a) + s;\n"); break; - default: - WRITE(p, " vec4 v = p;\n"); break; - } + if (texFunc != GE_TEXFUNC_REPLACE) { + WRITE(p, " if (!u_texAlpha) { t.a = 1.0; }\n"); } + switch (texFunc) { + case GE_TEXFUNC_MODULATE: + WRITE(p, " vec4 v = p * t + s;\n"); + break; + case GE_TEXFUNC_DECAL: + WRITE(p, " vec4 v = vec4(mix(p.rgb, t.rgb, t.a), p.a) + s;\n"); + break; + case GE_TEXFUNC_BLEND: + WRITE(p, " vec4 v = vec4(mix(p.rgb, u_texenv.rgb, t.rgb), p.a * t.a) + s;\n"); + break; + case GE_TEXFUNC_REPLACE: + WRITE(p, " vec4 v = (u_texAlpha ? t : vec4(t.rgb, p.a)) + s;\n"); + break; + case GE_TEXFUNC_ADD: + case GE_TEXFUNC_UNKNOWN1: + case GE_TEXFUNC_UNKNOWN2: + case GE_TEXFUNC_UNKNOWN3: + WRITE(p, " vec4 v = vec4(p.rgb + t.rgb, p.a * t.a) + s;\n"); + break; + default: + // Doesn't happen + WRITE(p, " vec4 v = p + s;\n"); break; + break; + } if (enableColorDoubling) { // This happens before fog is applied. WRITE(p, " v.rgb = clamp(v.rgb * 2.0, 0.0, 1.0);\n"); diff --git a/GPU/Common/FragmentShaderGenerator.h b/GPU/Common/FragmentShaderGenerator.h index 88c2c3f9d6..d69583452c 100644 --- a/GPU/Common/FragmentShaderGenerator.h +++ b/GPU/Common/FragmentShaderGenerator.h @@ -36,9 +36,10 @@ struct FShaderID; #define CONST_PS_TEXCLAMP 8 #define CONST_PS_TEXCLAMPOFF 9 #define CONST_PS_MIPBIAS 10 +#define CONST_PS_TEXALPHA 11 // For stencil upload -#define CONST_PS_STENCILVALUE 11 +#define CONST_PS_STENCILVALUE 12 // Can technically be deduced from the fragment shader ID, but this is safer. enum class FragmentShaderFlags : u32 { diff --git a/GPU/Common/ShaderCommon.h b/GPU/Common/ShaderCommon.h index e715936ddd..7743dac933 100644 --- a/GPU/Common/ShaderCommon.h +++ b/GPU/Common/ShaderCommon.h @@ -90,11 +90,11 @@ enum : uint64_t { DIRTY_MIPBIAS = 1ULL << 37, DIRTY_LIGHT_CONTROL = 1ULL << 38, - // space for 1 more uniform dirty flags. Remember to update DIRTY_ALL_UNIFORMS. + DIRTY_TEXALPHA = 1ULL << 39, DIRTY_BONE_UNIFORMS = 0xFF000000ULL, - DIRTY_ALL_UNIFORMS = 0x7FFFFFFFFFULL, + DIRTY_ALL_UNIFORMS = 0xFFFFFFFFFFULL, DIRTY_ALL_LIGHTS = DIRTY_LIGHT0 | DIRTY_LIGHT1 | DIRTY_LIGHT2 | DIRTY_LIGHT3, // Other dirty elements that aren't uniforms! diff --git a/GPU/Common/ShaderId.cpp b/GPU/Common/ShaderId.cpp index d7da61c290..1ec1012f75 100644 --- a/GPU/Common/ShaderId.cpp +++ b/GPU/Common/ShaderId.cpp @@ -192,7 +192,6 @@ std::string FragmentShaderDesc(const FShaderID &id) { if (id.Bit(FS_BIT_CLEARMODE)) desc << "Clear "; if (id.Bit(FS_BIT_DO_TEXTURE)) desc << (id.Bit(FS_BIT_3D_TEXTURE) ? "Tex3D " : "Tex "); if (id.Bit(FS_BIT_DO_TEXTURE_PROJ)) desc << "TexProj "; - if (id.Bit(FS_BIT_TEXALPHA)) desc << "TexAlpha "; if (id.Bit(FS_BIT_TEXTURE_AT_OFFSET)) desc << "TexOffs "; if (id.Bit(FS_BIT_COLOR_DOUBLE)) desc << "2x "; if (id.Bit(FS_BIT_FLATSHADE)) desc << "Flat "; @@ -291,7 +290,6 @@ void ComputeFragmentShaderID(FShaderID *id_out, const ComputedPipelineState &pip bool enableColorTest = gstate.isColorTestEnabled() && !IsColorTestTriviallyTrue(); bool enableColorDoubling = gstate.isColorDoublingEnabled() && gstate.isTextureMapEnabled(); bool doTextureProjection = (gstate.getUVGenMode() == GE_TEXMAP_TEXTURE_MATRIX && MatrixNeedsProjection(gstate.tgenMatrix, gstate.getUVProjMode())); - bool doTextureAlpha = gstate.isTextureAlphaUsed(); bool doFlatShading = gstate.getShadeMode() == GE_SHADE_FLAT; ShaderDepalMode shaderDepalMode = gstate_c.shaderDepalMode; @@ -303,16 +301,9 @@ void ComputeFragmentShaderID(FShaderID *id_out, const ComputedPipelineState &pip SimulateLogicOpType simulateLogicOpType = pipelineState.blendState.simulateLogicOpType; ReplaceAlphaType stencilToAlpha = pipelineState.blendState.replaceAlphaWithStencil; - // All texfuncs except replace are the same for RGB as for RGBA with full alpha. - // Note that checking this means that we must dirty the fragment shader ID whenever textureFullAlpha changes. - if (gstate_c.textureFullAlpha && gstate.getTextureFunction() != GE_TEXFUNC_REPLACE) { - doTextureAlpha = false; - } - if (gstate.isTextureMapEnabled()) { id.SetBit(FS_BIT_DO_TEXTURE); id.SetBits(FS_BIT_TEXFUNC, 3, gstate.getTextureFunction()); - id.SetBit(FS_BIT_TEXALPHA, doTextureAlpha & 1); // rgb or rgba if (gstate_c.needShaderTexClamp) { bool textureAtOffset = gstate_c.curTextureXOffset != 0 || gstate_c.curTextureYOffset != 0; // 4 bits total. diff --git a/GPU/Common/ShaderId.h b/GPU/Common/ShaderId.h index e03b27f214..beab743309 100644 --- a/GPU/Common/ShaderId.h +++ b/GPU/Common/ShaderId.h @@ -68,7 +68,7 @@ enum FShaderBit : uint8_t { FS_BIT_CLEARMODE = 0, FS_BIT_DO_TEXTURE = 1, FS_BIT_TEXFUNC = 2, // 3 bits - FS_BIT_TEXALPHA = 5, + // 1 bit free at position 5 FS_BIT_3D_TEXTURE = 6, FS_BIT_SHADER_TEX_CLAMP = 7, FS_BIT_CLAMP_S = 8, diff --git a/GPU/Common/ShaderUniforms.cpp b/GPU/Common/ShaderUniforms.cpp index 92c22be0fe..5112efb8e6 100644 --- a/GPU/Common/ShaderUniforms.cpp +++ b/GPU/Common/ShaderUniforms.cpp @@ -198,8 +198,12 @@ void BaseUpdateUniforms(UB_VS_FS_Base *ub, uint64_t dirtyUniforms, bool flipView } } + if (dirtyUniforms & DIRTY_TEXALPHA) { + ub->texAlpha = gstate.isTextureAlphaUsed() ? 1 : 0; + } + if (dirtyUniforms & DIRTY_STENCILREPLACEVALUE) { - ub->stencil = (float)gstate.getStencilTestRef() * (1.0 / 255.0); + ub->stencilReplaceValue = (float)gstate.getStencilTestRef() * (1.0 / 255.0); } // Note - this one is not in lighting but in transformCommon as it has uses beyond lighting diff --git a/GPU/Common/ShaderUniforms.h b/GPU/Common/ShaderUniforms.h index c09c231b86..7bf61dc005 100644 --- a/GPU/Common/ShaderUniforms.h +++ b/GPU/Common/ShaderUniforms.h @@ -9,7 +9,7 @@ enum : uint64_t { DIRTY_BASE_UNIFORMS = DIRTY_WORLDMATRIX | DIRTY_PROJTHROUGHMATRIX | DIRTY_VIEWMATRIX | DIRTY_TEXMATRIX | DIRTY_ALPHACOLORREF | - DIRTY_PROJMATRIX | DIRTY_FOGCOLOR | DIRTY_FOGCOEFENABLE | DIRTY_TEXENV | DIRTY_STENCILREPLACEVALUE | + DIRTY_PROJMATRIX | DIRTY_FOGCOLOR | DIRTY_FOGCOEFENABLE | DIRTY_TEXENV | DIRTY_TEXALPHA | DIRTY_STENCILREPLACEVALUE | DIRTY_ALPHACOLORMASK | DIRTY_SHADERBLEND | DIRTY_COLORWRITEMASK | DIRTY_UVSCALEOFFSET | DIRTY_TEXCLAMP | DIRTY_DEPTHRANGE | DIRTY_MATAMBIENTALPHA | DIRTY_BEZIERSPLINE | DIRTY_DEPAL, DIRTY_LIGHT_UNIFORMS = @@ -35,10 +35,11 @@ struct alignas(16) UB_VS_FS_Base { // Fragment data float fogColor[3]; uint32_t alphaColorRef; float texEnvColor[3]; uint32_t colorTestMask; - float blendFixA[3]; float stencil; + float blendFixA[3]; float stencilReplaceValue; float blendFixB[3]; float rotation; float texClamp[4]; float texClampOffset[2]; float fogCoef[2]; + uint32_t texAlpha; float pad[3]; // VR stuff is to go here, later. For normal drawing, we can then get away // with just uploading the first 448 bytes of the struct (up to and including fogCoef). }; @@ -65,6 +66,7 @@ R"( mat4 u_proj; vec4 u_texclamp; vec2 u_texclampoff; vec2 u_fogcoef; + bool u_texAlpha; float pad0; float pad1; float pad2; )"; // 512 bytes. Would like to shrink more. Some colors only have 8-bit precision and we expand diff --git a/GPU/Directx9/ShaderManagerDX9.cpp b/GPU/Directx9/ShaderManagerDX9.cpp index 1f820310bd..729e21c0c9 100644 --- a/GPU/Directx9/ShaderManagerDX9.cpp +++ b/GPU/Directx9/ShaderManagerDX9.cpp @@ -213,6 +213,11 @@ void ShaderManagerDX9::VSSetColorUniform3Alpha(int creg, u32 color, u8 alpha) { device_->SetVertexShaderConstantF(creg, f, 1); } +void ShaderManagerDX9::PSSetBool(int creg, bool value) { + BOOL b = value ? 1 : 0; + HRESULT retval = device_->SetPixelShaderConstantB(creg, &b, 1); +} + void ShaderManagerDX9::VSSetColorUniform3ExtraFloat(int creg, u32 color, float extra) { const float col[4] = { ((color & 0xFF)) / 255.0f, @@ -279,7 +284,9 @@ void ShaderManagerDX9::PSUpdateUniforms(u64 dirtyUniforms) { if (dirtyUniforms & DIRTY_STENCILREPLACEVALUE) { PSSetFloat(CONST_PS_STENCILREPLACE, (float)gstate.getStencilTestRef() * (1.0f / 255.0f)); } - + if (dirtyUniforms & DIRTY_TEXALPHA) { + PSSetBool(CONST_PS_TEXALPHA, gstate.isTextureAlphaUsed()); + } if (dirtyUniforms & DIRTY_SHADERBLEND) { PSSetColorUniform3(CONST_PS_BLENDFIXA, gstate.getFixA()); PSSetColorUniform3(CONST_PS_BLENDFIXB, gstate.getFixB()); diff --git a/GPU/Directx9/ShaderManagerDX9.h b/GPU/Directx9/ShaderManagerDX9.h index 372fbf9659..eb62216fa1 100644 --- a/GPU/Directx9/ShaderManagerDX9.h +++ b/GPU/Directx9/ShaderManagerDX9.h @@ -95,6 +95,7 @@ private: inline void PSSetColorUniform3(int creg, u32 color); inline void PSSetFloat(int creg, float value); inline void PSSetFloatArray(int creg, const float *value, int count); + void PSSetBool(int creg, bool value); void VSSetMatrix4x3_3(int creg, const float *m4x3); inline void VSSetColorUniform3(int creg, u32 color); diff --git a/GPU/GLES/ShaderManagerGLES.cpp b/GPU/GLES/ShaderManagerGLES.cpp index 8a728d7db4..3ebae535bf 100644 --- a/GPU/GLES/ShaderManagerGLES.cpp +++ b/GPU/GLES/ShaderManagerGLES.cpp @@ -152,6 +152,7 @@ LinkedShader::LinkedShader(GLRenderManager *render, VShaderID VSID, Shader *vs, queries.push_back({ &u_uvscaleoffset, "u_uvscaleoffset" }); queries.push_back({ &u_texclamp, "u_texclamp" }); queries.push_back({ &u_texclampoff, "u_texclampoff" }); + queries.push_back({ &u_texAlpha, "u_texAlpha" }); queries.push_back({ &u_lightControl, "u_lightControl" }); for (int i = 0; i < 4; i++) { @@ -229,6 +230,10 @@ static inline void SetColorUniform3(GLRenderManager *render, GLint *uniform, u32 render->SetUniformF(uniform, 3, f); } +static inline void SetBoolUniform(GLRenderManager *render, GLint *uniform, bool value) { + render->SetUniformB(uniform, value); +} + static void SetColorUniform3Alpha(GLRenderManager *render, GLint *uniform, u32 color, u8 alpha) { float f[4]; Uint8x3ToFloat4_AlphaUint8(f, color, alpha); @@ -440,6 +445,9 @@ void LinkedShader::UpdateUniforms(const ShaderID &vsid, bool useBufferedRenderin if (dirty & DIRTY_TEXENV) { SetColorUniform3(render_, &u_texenv, gstate.texenvcolor); } + if (dirty & DIRTY_TEXALPHA) { + SetBoolUniform(render_, &u_texAlpha, gstate.isTextureAlphaUsed()); + } if (dirty & DIRTY_ALPHACOLORREF) { if (shaderLanguage.bitwiseOps) { render_->SetUniformUI1(&u_alphacolorref, gstate.getColorTestRef() | ((gstate.getAlphaTestRef() & gstate.getAlphaTestMask()) << 24)); @@ -945,7 +953,7 @@ enum class CacheDetectFlags { }; #define CACHE_HEADER_MAGIC 0x83277592 -#define CACHE_VERSION 21 +#define CACHE_VERSION 22 struct CacheHeader { uint32_t magic; uint32_t version; diff --git a/GPU/GLES/ShaderManagerGLES.h b/GPU/GLES/ShaderManagerGLES.h index 6520b1e4ad..15a783084c 100644 --- a/GPU/GLES/ShaderManagerGLES.h +++ b/GPU/GLES/ShaderManagerGLES.h @@ -101,6 +101,7 @@ public: int u_uvscaleoffset; int u_texclamp; int u_texclampoff; + int u_texAlpha; // Lighting int u_lightControl; diff --git a/GPU/GPUCommon.cpp b/GPU/GPUCommon.cpp index 0718862eb0..f7a8dfa7e6 100644 --- a/GPU/GPUCommon.cpp +++ b/GPU/GPUCommon.cpp @@ -94,7 +94,7 @@ const CommonCommandTableEntry commonCommandTable[] = { { GE_CMD_TEXSHADELS, FLAG_FLUSHBEFOREONCHANGE, DIRTY_VERTEXSHADER_STATE }, // Raster state for Direct3D 9, uncommon. { GE_CMD_SHADEMODE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE | DIRTY_RASTER_STATE }, - { GE_CMD_TEXFUNC, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAGMENTSHADER_STATE }, + { GE_CMD_TEXFUNC, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAGMENTSHADER_STATE | DIRTY_TEXALPHA }, { GE_CMD_COLORTEST, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAGMENTSHADER_STATE }, { GE_CMD_ALPHATESTENABLE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAGMENTSHADER_STATE }, { GE_CMD_COLORTESTENABLE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAGMENTSHADER_STATE }, diff --git a/GPU/Vulkan/ShaderManagerVulkan.cpp b/GPU/Vulkan/ShaderManagerVulkan.cpp index 955932550b..df2495c088 100644 --- a/GPU/Vulkan/ShaderManagerVulkan.cpp +++ b/GPU/Vulkan/ShaderManagerVulkan.cpp @@ -516,7 +516,7 @@ enum class VulkanCacheDetectFlags { }; #define CACHE_HEADER_MAGIC 0xff51f420 -#define CACHE_VERSION 35 +#define CACHE_VERSION 36 struct VulkanCacheHeader { uint32_t magic; uint32_t version;