From 69b43ab7341ae7d73454d21640b5923d90fea16b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Tue, 3 Oct 2023 23:30:18 +0200 Subject: [PATCH] Extend the Test Drive color ramp smoother to detect up to 3 ramps in a texture. Note that we also offset the lookup slightly to miss the wrap-around points. The existing 31 scale factor instead of 32, together with that half-texel, are enough to avoid that problem. Fixes #18300 --- GPU/Common/FragmentShaderGenerator.cpp | 4 ++- GPU/Common/TextureCacheCommon.cpp | 42 ++++++++++++----------- GPU/Common/TextureShaderCommon.cpp | 46 ++++++++++++++++---------- GPU/Common/TextureShaderCommon.h | 5 ++- 4 files changed, 57 insertions(+), 40 deletions(-) diff --git a/GPU/Common/FragmentShaderGenerator.cpp b/GPU/Common/FragmentShaderGenerator.cpp index 2495051ac7..1bbdd19ace 100644 --- a/GPU/Common/FragmentShaderGenerator.cpp +++ b/GPU/Common/FragmentShaderGenerator.cpp @@ -717,6 +717,7 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu // lookup with the filtered value once. p.F(" vec4 t = ").SampleTexture2D("tex", "uv").C(";\n"); p.C(" uint depalShift = (u_depal_mask_shift_off_fmt >> 0x8u) & 0xFFu;\n"); + p.C(" uint depalOffset = ((u_depal_mask_shift_off_fmt >> 0x10u) & 0xFFu) << 0x4u;\n"); p.C(" uint depalFmt = (u_depal_mask_shift_off_fmt >> 0x18u) & 0x3u;\n"); p.C(" float index0 = t.r;\n"); p.C(" float factor = 31.0 / 256.0;\n"); @@ -727,7 +728,8 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu p.C(" if (depalShift == 0x5u) { index0 = t.g; }\n"); p.C(" else if (depalShift == 0xAu) { index0 = t.b; }\n"); p.C(" }\n"); - p.F(" t = ").SampleTexture2D("pal", "vec2(index0 * factor * 0.5, 0.0)").C(";\n"); // 0.5 for 512-entry CLUT. + p.C(" float offset = float(depalOffset) / 256.0;\n"); + p.F(" t = ").SampleTexture2D("pal", "vec2((index0 * factor + offset) * 0.5 + 0.5 / 512.0, 0.0)").C(";\n"); // 0.5 for 512-entry CLUT. break; case ShaderDepalMode::NORMAL: if (doTextureProjection) { diff --git a/GPU/Common/TextureCacheCommon.cpp b/GPU/Common/TextureCacheCommon.cpp index 4a06b47c98..14027f52a9 100644 --- a/GPU/Common/TextureCacheCommon.cpp +++ b/GPU/Common/TextureCacheCommon.cpp @@ -2194,26 +2194,28 @@ static bool CanDepalettize(GETextureFormat texFormat, GEBufferFormat bufferForma // If the palette is detected as a smooth ramp, we can interpolate for higher color precision. // But we only do it if the mask/shift exactly matches a color channel, else something different might be going // on and we definitely don't want to interpolate. -// Great enhancement for Test Drive. -static bool CanUseSmoothDepal(const GPUgstate &gstate, GEBufferFormat framebufferFormat, int rampLength) { - if (gstate.getClutIndexStartPos() == 0 && - gstate.getClutIndexMask() < rampLength) { - switch (framebufferFormat) { - case GE_FORMAT_565: - if (gstate.getClutIndexShift() == 0 || gstate.getClutIndexShift() == 11) { - return gstate.getClutIndexMask() == 0x1F; - } else if (gstate.getClutIndexShift() == 5) { - return gstate.getClutIndexMask() == 0x3F; +// Great enhancement for Test Drive and Manhunt 2. +static bool CanUseSmoothDepal(const GPUgstate &gstate, GEBufferFormat framebufferFormat, const ClutTexture &clutTexture) { + for (int i = 0; i < ClutTexture::MAX_RAMPS; i++) { + if (gstate.getClutIndexStartPos() == clutTexture.rampStarts[i] && + gstate.getClutIndexMask() < clutTexture.rampLengths[i]) { + switch (framebufferFormat) { + case GE_FORMAT_565: + if (gstate.getClutIndexShift() == 0 || gstate.getClutIndexShift() == 11) { + return gstate.getClutIndexMask() == 0x1F; + } else if (gstate.getClutIndexShift() == 5) { + return gstate.getClutIndexMask() == 0x3F; + } + break; + case GE_FORMAT_5551: + if (gstate.getClutIndexShift() == 0 || gstate.getClutIndexShift() == 5 || gstate.getClutIndexShift() == 10) { + return gstate.getClutIndexMask() == 0x1F; + } + break; + default: + // No uses for the other formats yet, add if needed. + break; } - break; - case GE_FORMAT_5551: - if (gstate.getClutIndexShift() == 0 || gstate.getClutIndexShift() == 5 || gstate.getClutIndexShift() == 10) { - return gstate.getClutIndexMask() == 0x1F; - } - break; - default: - // No uses for the other formats yet, add if needed. - break; } } return false; @@ -2253,7 +2255,7 @@ void TextureCacheCommon::ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer if (need_depalettize) { if (clutRenderAddress_ == 0xFFFFFFFF) { clutTexture = textureShaderCache_->GetClutTexture(clutFormat, clutHash_, clutBufRaw_); - smoothedDepal = CanUseSmoothDepal(gstate, framebuffer->fb_format, clutTexture.rampLength); + smoothedDepal = CanUseSmoothDepal(gstate, framebuffer->fb_format, clutTexture); } else { // The CLUT texture is dynamic, it's the framebuffer pointed to by clutRenderAddress. // Instead of texturing directly from that, we copy to a temporary CLUT texture. diff --git a/GPU/Common/TextureShaderCommon.cpp b/GPU/Common/TextureShaderCommon.cpp index e7faf67ac7..2e8b597802 100644 --- a/GPU/Common/TextureShaderCommon.cpp +++ b/GPU/Common/TextureShaderCommon.cpp @@ -95,33 +95,43 @@ ClutTexture TextureShaderCache::GetClutTexture(GEPaletteFormat clutFormat, const break; } - int lastR = 0; - int lastG = 0; - int lastB = 0; - int lastA = 0; - int rampLength = 0; + for (int i = 0; i < 3; i++) { + tex->rampLengths[i] = 0; + tex->rampStarts[i] = 0; + } // Quick check for how many continuously growing entries we have at the start. // Bilinearly filtering CLUTs only really makes sense for this kind of ramp. - for (int i = 0; i < maxClutEntries; i++) { - rampLength = i; - int r = desc.initData[0][i * 4]; - int g = desc.initData[0][i * 4 + 1]; - int b = desc.initData[0][i * 4 + 2]; - int a = desc.initData[0][i * 4 + 3]; - if (r < lastR || g < lastG || b < lastB || a < lastA) { + int i = 0; + for (int j = 0; j < ClutTexture::MAX_RAMPS; j++) { + tex->rampStarts[j] = i; + int lastR = 0; + int lastG = 0; + int lastB = 0; + int lastA = 0; + for (; i < maxClutEntries; i++) { + int r = desc.initData[0][i * 4]; + int g = desc.initData[0][i * 4 + 1]; + int b = desc.initData[0][i * 4 + 2]; + int a = desc.initData[0][i * 4 + 3]; + if (r < lastR || g < lastG || b < lastB || a < lastA) { + lastR = r; lastG = g; lastB = b; lastA = a; + break; + } else { + lastR = r; + lastG = g; + lastB = b; + lastA = a; + } + } + tex->rampLengths[j] = i - tex->rampStarts[j]; + if (i >= maxClutEntries) { break; - } else { - lastR = r; - lastG = g; - lastB = b; - lastA = a; } } tex->texture = draw_->CreateTexture(desc); tex->lastFrame = gpuStats.numFlips; - tex->rampLength = rampLength; texCache_[clutId] = tex; return *tex; diff --git a/GPU/Common/TextureShaderCommon.h b/GPU/Common/TextureShaderCommon.h index d6a6096f09..8aa881fdc8 100644 --- a/GPU/Common/TextureShaderCommon.h +++ b/GPU/Common/TextureShaderCommon.h @@ -29,11 +29,14 @@ #include "GPU/Common/ShaderCommon.h" #include "GPU/Common/DepalettizeShaderCommon.h" + class ClutTexture { public: + enum { MAX_RAMPS = 3 }; Draw::Texture *texture; int lastFrame; - int rampLength; + int rampLengths[MAX_RAMPS]; + int rampStarts[MAX_RAMPS]; }; // For CLUT depal shaders, and other pre-bind texture shaders.