diff --git a/GPU/Common/ShaderId.cpp b/GPU/Common/ShaderId.cpp index 9a6d9afb8..83bbd08e3 100644 --- a/GPU/Common/ShaderId.cpp +++ b/GPU/Common/ShaderId.cpp @@ -233,7 +233,7 @@ void ComputeFragmentShaderID(ShaderID *id_out) { bool enableFog = gstate.isFogEnabled() && !isModeThrough; bool enableAlphaTest = gstate.isAlphaTestEnabled() && !IsAlphaTestTriviallyTrue(); bool enableColorTest = gstate.isColorTestEnabled() && !IsColorTestTriviallyTrue(); - bool enableColorDoubling = gstate.isColorDoublingEnabled() && gstate.isTextureMapEnabled(); + bool enableColorDoubling = gstate.isColorDoublingEnabled() && gstate.isTextureMapEnabled() && gstate.getTextureFunction() == GE_TEXFUNC_MODULATE; bool doTextureProjection = (gstate.getUVGenMode() == GE_TEXMAP_TEXTURE_MATRIX && MatrixNeedsProjection(gstate.tgenMatrix)); bool doTextureAlpha = gstate.isTextureAlphaUsed(); bool doFlatShading = gstate.getShadeMode() == GE_SHADE_FLAT; diff --git a/GPU/Directx9/PixelShaderGeneratorDX9.cpp b/GPU/Directx9/PixelShaderGeneratorDX9.cpp index bd8e9d4e3..1d489296e 100644 --- a/GPU/Directx9/PixelShaderGeneratorDX9.cpp +++ b/GPU/Directx9/PixelShaderGeneratorDX9.cpp @@ -304,6 +304,11 @@ bool GenerateFragmentShaderHLSL(const FShaderID &id, char *buffer, ShaderLanguag } } if (enableColorTest) { + // Color doubling happens before the color test, but we try to optimize doubling when test is off. + if (enableColorDoubling) { + WRITE(p, " v.rgb = v.rgb * 2.0;\n"); + } + if (colorTestAgainstZero) { // When testing against 0 (common), we can avoid some math. // 0.002 is approximately half of 1.0 / 255.0. @@ -322,14 +327,14 @@ bool GenerateFragmentShaderHLSL(const FShaderID &id, char *buffer, ShaderLanguag if (colorTestFuncs[colorTestFunc][0] != '#') { const char *test = colorTestFuncs[colorTestFunc]; if (lang == HLSL_D3D11) { - WRITE(p, " uint3 v_scaled = roundAndScaleTo255iv(v.rgb);\n"); + WRITE(p, " uint3 v_scaled = roundAndScaleTo255iv(clamp(v.rgb, 0.0, 1.0));\n"); WRITE(p, " uint3 v_masked = v_scaled & u_alphacolormask.rgb;\n"); WRITE(p, " uint3 colorTestRef = u_alphacolorref.rgb & u_alphacolormask.rgb;\n"); // We have to test the components separately, or we get incorrect results. See #10629. WRITE(p, " if (v_masked.r %s colorTestRef.r && v_masked.g %s colorTestRef.g && v_masked.b %s colorTestRef.b) discard;\n", test, test, test); } else { // TODO: Use a texture to lookup bitwise ops instead? - WRITE(p, " float3 colortest = roundAndScaleTo255v(v.rgb);\n"); + WRITE(p, " float3 colortest = roundAndScaleTo255v(clamp(v.rgb, 0.0, 1.0));\n"); WRITE(p, " if ((colortest.r %s u_alphacolorref.r) && (colortest.g %s u_alphacolorref.g) && (colortest.b %s u_alphacolorref.b)) clip(-1);\n", test, test, test); } } @@ -337,13 +342,17 @@ bool GenerateFragmentShaderHLSL(const FShaderID &id, char *buffer, ShaderLanguag WRITE(p, lang == HLSL_DX9 ? " clip(-1);\n" : " discard;\n"); } } - } - // Color doubling happens after the color test. - if (enableColorDoubling && replaceBlend == REPLACE_BLEND_2X_SRC) { - WRITE(p, " v.rgb = v.rgb * 4.0;\n"); - } else if (enableColorDoubling || replaceBlend == REPLACE_BLEND_2X_SRC) { - WRITE(p, " v.rgb = v.rgb * 2.0;\n"); + if (replaceBlend == REPLACE_BLEND_2X_SRC) { + WRITE(p, " v.rgb = v.rgb * 2.0;\n"); + } + } else { + // If there's no color test, we can potentially double and replace blend at once. + if (enableColorDoubling && replaceBlend == REPLACE_BLEND_2X_SRC) { + WRITE(p, " v.rgb = v.rgb * 4.0;\n"); + } else if (enableColorDoubling || replaceBlend == REPLACE_BLEND_2X_SRC) { + WRITE(p, " v.rgb = v.rgb * 2.0;\n"); + } } if (enableFog) { diff --git a/GPU/GLES/FragmentShaderGeneratorGLES.cpp b/GPU/GLES/FragmentShaderGeneratorGLES.cpp index 19dc1ba37..64b43d35f 100644 --- a/GPU/GLES/FragmentShaderGeneratorGLES.cpp +++ b/GPU/GLES/FragmentShaderGeneratorGLES.cpp @@ -558,6 +558,14 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, uint64_t *uniform } if (enableColorTest) { + // Color doubling happens before the color test, but we try to optimize doubling when test is off. + if (enableColorDoubling) { + WRITE(p, " v.rgb = v.rgb * 2.0;\n"); + if (g_Config.bFragmentTestCache && !colorTestAgainstZero) { + WRITE(p, " vScale256.rgb = vScale256.rgb * 2.0;\n"); + } + } + if (colorTestAgainstZero) { // When testing against 0 (common), we can avoid some math. // 0.002 is approximately half of 1.0 / 255.0. @@ -576,7 +584,7 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, uint64_t *uniform WRITE(p, " float gResult = %s(testtex, vec2(vScale256.g, 0)).g;\n", texture); WRITE(p, " float bResult = %s(testtex, vec2(vScale256.b, 0)).b;\n", texture); if (colorTestFunc == GE_COMP_EQUAL) { - // Equal means all parts must be equal. + // Equal means all parts must be equal (so discard if any is not.) WRITE(p, " if (rResult < 0.5 || gResult < 0.5 || bResult < 0.5) %s\n", discardStatement); } else { // Not equal means any part must be not equal. @@ -587,7 +595,7 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, uint64_t *uniform if (colorTestFuncs[colorTestFunc][0] != '#') { if (bitwiseOps) { // Apparently GLES3 does not support vector bitwise ops. - WRITE(p, " ivec3 v_scaled = roundAndScaleTo255iv(v.rgb);\n"); + WRITE(p, " ivec3 v_scaled = roundAndScaleTo255iv(clamp(v.rgb, 0.0, 1.0));\n"); const char *maskedFragColor = "ivec3(v_scaled.r & u_alphacolormask.r, v_scaled.g & u_alphacolormask.g, v_scaled.b & u_alphacolormask.b)"; const char *maskedColorRef = "ivec3(int(u_alphacolorref.r) & u_alphacolormask.r, int(u_alphacolorref.g) & u_alphacolormask.g, int(u_alphacolorref.b) & u_alphacolormask.b)"; WRITE(p, " if (%s %s %s) %s\n", maskedFragColor, colorTestFuncs[colorTestFunc], maskedColorRef, discardStatement); @@ -600,13 +608,17 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, uint64_t *uniform WRITE(p, " %s\n", discardStatement); } } - } - // Color doubling happens after the color test. - if (enableColorDoubling && replaceBlend == REPLACE_BLEND_2X_SRC) { - WRITE(p, " v.rgb = v.rgb * 4.0;\n"); - } else if (enableColorDoubling || replaceBlend == REPLACE_BLEND_2X_SRC) { - WRITE(p, " v.rgb = v.rgb * 2.0;\n"); + if (replaceBlend == REPLACE_BLEND_2X_SRC) { + WRITE(p, " v.rgb = v.rgb * 2.0;\n"); + } + } else { + // If there's no color test, we can potentially double and replace blend at once. + if (enableColorDoubling && replaceBlend == REPLACE_BLEND_2X_SRC) { + WRITE(p, " v.rgb = v.rgb * 4.0;\n"); + } else if (enableColorDoubling || replaceBlend == REPLACE_BLEND_2X_SRC) { + WRITE(p, " v.rgb = v.rgb * 2.0;\n"); + } } if (enableFog) { diff --git a/GPU/GLES/FragmentTestCacheGLES.cpp b/GPU/GLES/FragmentTestCacheGLES.cpp index ea482896a..e870426ff 100644 --- a/GPU/GLES/FragmentTestCacheGLES.cpp +++ b/GPU/GLES/FragmentTestCacheGLES.cpp @@ -81,6 +81,8 @@ void FragmentTestCacheGLES::BindTestTexture(int slot) { GLRTexture *tex = CreateTestTexture(funcs, refs, masks, valid); lastTexture_ = tex; render_->BindTexture(slot, tex); + // We only need to do this once for the texture. + render_->SetTextureSampler(slot, GL_CLAMP_TO_EDGE, GL_CLAMP_TO_EDGE, GL_NEAREST, GL_NEAREST, 0.0f); FragmentTestTexture item; item.lastFrame = gpuStats.numFlips; item.texture = tex; diff --git a/GPU/Software/Rasterizer.cpp b/GPU/Software/Rasterizer.cpp index c1fa015d0..9fe9a9a6e 100644 --- a/GPU/Software/Rasterizer.cpp +++ b/GPU/Software/Rasterizer.cpp @@ -622,7 +622,13 @@ static inline Vec4 GetTextureFunctionOutput(const Vec4& prim_color, co // We can be accurate up to 24 bit integers, should be enough. const __m128 p = _mm_cvtepi32_ps(prim_color.ivec); const __m128 t = _mm_cvtepi32_ps(texcolor.ivec); - out_rgb.ivec = _mm_cvtps_epi32(_mm_div_ps(_mm_mul_ps(p, t), _mm_set_ps1(255.0f))); + const __m128 b = _mm_mul_ps(p, t); + if (gstate.isColorDoublingEnabled()) { + // We double right here, only for modulate. Other tex funcs do not color double. + out_rgb.ivec = _mm_cvtps_epi32(_mm_mul_ps(b, _mm_set_ps1(2.0f / 255.0f))); + } else { + out_rgb.ivec = _mm_cvtps_epi32(_mm_mul_ps(b, _mm_set_ps1(1.0f / 255.0f))); + } if (rgba) { return Vec4(out_rgb.ivec); @@ -630,7 +636,11 @@ static inline Vec4 GetTextureFunctionOutput(const Vec4& prim_color, co out_a = prim_color.a(); } #else - out_rgb = prim_color.rgb() * texcolor.rgb() / 255; + if (gstate.isColorDoublingEnabled()) { + out_rgb = (prim_color.rgb() * texcolor.rgb() * 2) / 255; + } else { + out_rgb = prim_color.rgb() * texcolor.rgb() / 255; + } out_a = (rgba) ? (prim_color.a() * texcolor.a() / 255) : prim_color.a(); #endif break; @@ -894,7 +904,7 @@ static inline Vec3 AlphaBlendingResult(const Vec4 &source, const Vec4< template inline void DrawSinglePixel(const DrawingCoords &p, u16 z, u8 fog, const Vec4 &color_in) { - Vec4 prim_color = color_in; + Vec4 prim_color = color_in.Clamp(0, 255); // Depth range test - applied in clear mode, if not through mode. if (!gstate.isModeThrough()) if (z < gstate.getDepthRangeMin() || z > gstate.getDepthRangeMax()) @@ -935,14 +945,6 @@ inline void DrawSinglePixel(const DrawingCoords &p, u16 z, u8 fog, const Vec4 fogColor = Vec3::FromRGB(gstate.fogcolor); fogColor = (prim_color.rgb() * (int)fog + fogColor * (255 - (int)fog)) / 255; diff --git a/GPU/Vulkan/FragmentShaderGeneratorVulkan.cpp b/GPU/Vulkan/FragmentShaderGeneratorVulkan.cpp index d35ade99e..13a111629 100644 --- a/GPU/Vulkan/FragmentShaderGeneratorVulkan.cpp +++ b/GPU/Vulkan/FragmentShaderGeneratorVulkan.cpp @@ -375,6 +375,11 @@ bool GenerateVulkanGLSLFragmentShader(const FShaderID &id, char *buffer) { } if (enableColorTest) { + // Color doubling happens before the color test, but we try to optimize doubling when test is off. + if (enableColorDoubling) { + WRITE(p, " v.rgb = v.rgb * 2.0;\n"); + } + if (colorTestAgainstZero) { // When testing against 0 (common), we can avoid some math. // Have my doubts that this special case is actually worth it, but whatever. @@ -392,19 +397,23 @@ bool GenerateVulkanGLSLFragmentShader(const FShaderID &id, char *buffer) { } else { const char *colorTestFuncs[] = { "#", "#", " != ", " == " }; if (colorTestFuncs[colorTestFunc][0] != '#') { - WRITE(p, " ivec3 v_scaled = roundAndScaleTo255iv(v.rgb);\n"); + WRITE(p, " ivec3 v_scaled = roundAndScaleTo255iv(clamp(v.rgb, 0.0, 1.0));\n"); WRITE(p, " if ((v_scaled & base.alphacolormask.rgb) %s (base.alphacolorref.rgb & base.alphacolormask.rgb)) %s\n", colorTestFuncs[colorTestFunc], discardStatement); } else { WRITE(p, " %s\n", discardStatement); } } - } - // Color doubling happens after the color test. - if (enableColorDoubling && replaceBlend == REPLACE_BLEND_2X_SRC) { - WRITE(p, " v.rgb = v.rgb * 4.0;\n"); - } else if (enableColorDoubling || replaceBlend == REPLACE_BLEND_2X_SRC) { - WRITE(p, " v.rgb = v.rgb * 2.0;\n"); + if (replaceBlend == REPLACE_BLEND_2X_SRC) { + WRITE(p, " v.rgb = v.rgb * 2.0;\n"); + } + } else { + // If there's no color test, we can potentially double and replace blend at once. + if (enableColorDoubling && replaceBlend == REPLACE_BLEND_2X_SRC) { + WRITE(p, " v.rgb = v.rgb * 4.0;\n"); + } else if (enableColorDoubling || replaceBlend == REPLACE_BLEND_2X_SRC) { + WRITE(p, " v.rgb = v.rgb * 2.0;\n"); + } } if (enableFog) { diff --git a/headless/Headless.cpp b/headless/Headless.cpp index e786e08c3..a099118f7 100644 --- a/headless/Headless.cpp +++ b/headless/Headless.cpp @@ -373,6 +373,7 @@ int main(int argc, const char* argv[]) g_Config.iSplineBezierQuality = 2; g_Config.bHighQualityDepth = true; g_Config.bMemStickInserted = true; + g_Config.bFragmentTestCache = true; #ifdef _WIN32 InitSysDirectories();