Extend the Test Drive color ramp smoother to detect up to 3 ramps in a texture.

Note that we also offset the lookup slightly to miss the wrap-around
points. The existing 31 scale factor instead of 32, together with that
half-texel, are enough to avoid that problem.

Fixes #18300
This commit is contained in:
Henrik Rydgård 2023-10-03 23:30:18 +02:00
parent f94442d1b3
commit 69b43ab734
4 changed files with 57 additions and 40 deletions

View File

@ -717,6 +717,7 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu
// lookup with the filtered value once.
p.F(" vec4 t = ").SampleTexture2D("tex", "uv").C(";\n");
p.C(" uint depalShift = (u_depal_mask_shift_off_fmt >> 0x8u) & 0xFFu;\n");
p.C(" uint depalOffset = ((u_depal_mask_shift_off_fmt >> 0x10u) & 0xFFu) << 0x4u;\n");
p.C(" uint depalFmt = (u_depal_mask_shift_off_fmt >> 0x18u) & 0x3u;\n");
p.C(" float index0 = t.r;\n");
p.C(" float factor = 31.0 / 256.0;\n");
@ -727,7 +728,8 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu
p.C(" if (depalShift == 0x5u) { index0 = t.g; }\n");
p.C(" else if (depalShift == 0xAu) { index0 = t.b; }\n");
p.C(" }\n");
p.F(" t = ").SampleTexture2D("pal", "vec2(index0 * factor * 0.5, 0.0)").C(";\n"); // 0.5 for 512-entry CLUT.
p.C(" float offset = float(depalOffset) / 256.0;\n");
p.F(" t = ").SampleTexture2D("pal", "vec2((index0 * factor + offset) * 0.5 + 0.5 / 512.0, 0.0)").C(";\n"); // 0.5 for 512-entry CLUT.
break;
case ShaderDepalMode::NORMAL:
if (doTextureProjection) {

View File

@ -2194,26 +2194,28 @@ static bool CanDepalettize(GETextureFormat texFormat, GEBufferFormat bufferForma
// If the palette is detected as a smooth ramp, we can interpolate for higher color precision.
// But we only do it if the mask/shift exactly matches a color channel, else something different might be going
// on and we definitely don't want to interpolate.
// Great enhancement for Test Drive.
static bool CanUseSmoothDepal(const GPUgstate &gstate, GEBufferFormat framebufferFormat, int rampLength) {
if (gstate.getClutIndexStartPos() == 0 &&
gstate.getClutIndexMask() < rampLength) {
switch (framebufferFormat) {
case GE_FORMAT_565:
if (gstate.getClutIndexShift() == 0 || gstate.getClutIndexShift() == 11) {
return gstate.getClutIndexMask() == 0x1F;
} else if (gstate.getClutIndexShift() == 5) {
return gstate.getClutIndexMask() == 0x3F;
// Great enhancement for Test Drive and Manhunt 2.
static bool CanUseSmoothDepal(const GPUgstate &gstate, GEBufferFormat framebufferFormat, const ClutTexture &clutTexture) {
for (int i = 0; i < ClutTexture::MAX_RAMPS; i++) {
if (gstate.getClutIndexStartPos() == clutTexture.rampStarts[i] &&
gstate.getClutIndexMask() < clutTexture.rampLengths[i]) {
switch (framebufferFormat) {
case GE_FORMAT_565:
if (gstate.getClutIndexShift() == 0 || gstate.getClutIndexShift() == 11) {
return gstate.getClutIndexMask() == 0x1F;
} else if (gstate.getClutIndexShift() == 5) {
return gstate.getClutIndexMask() == 0x3F;
}
break;
case GE_FORMAT_5551:
if (gstate.getClutIndexShift() == 0 || gstate.getClutIndexShift() == 5 || gstate.getClutIndexShift() == 10) {
return gstate.getClutIndexMask() == 0x1F;
}
break;
default:
// No uses for the other formats yet, add if needed.
break;
}
break;
case GE_FORMAT_5551:
if (gstate.getClutIndexShift() == 0 || gstate.getClutIndexShift() == 5 || gstate.getClutIndexShift() == 10) {
return gstate.getClutIndexMask() == 0x1F;
}
break;
default:
// No uses for the other formats yet, add if needed.
break;
}
}
return false;
@ -2253,7 +2255,7 @@ void TextureCacheCommon::ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer
if (need_depalettize) {
if (clutRenderAddress_ == 0xFFFFFFFF) {
clutTexture = textureShaderCache_->GetClutTexture(clutFormat, clutHash_, clutBufRaw_);
smoothedDepal = CanUseSmoothDepal(gstate, framebuffer->fb_format, clutTexture.rampLength);
smoothedDepal = CanUseSmoothDepal(gstate, framebuffer->fb_format, clutTexture);
} else {
// The CLUT texture is dynamic, it's the framebuffer pointed to by clutRenderAddress.
// Instead of texturing directly from that, we copy to a temporary CLUT texture.

View File

@ -95,33 +95,43 @@ ClutTexture TextureShaderCache::GetClutTexture(GEPaletteFormat clutFormat, const
break;
}
int lastR = 0;
int lastG = 0;
int lastB = 0;
int lastA = 0;
int rampLength = 0;
for (int i = 0; i < 3; i++) {
tex->rampLengths[i] = 0;
tex->rampStarts[i] = 0;
}
// Quick check for how many continuously growing entries we have at the start.
// Bilinearly filtering CLUTs only really makes sense for this kind of ramp.
for (int i = 0; i < maxClutEntries; i++) {
rampLength = i;
int r = desc.initData[0][i * 4];
int g = desc.initData[0][i * 4 + 1];
int b = desc.initData[0][i * 4 + 2];
int a = desc.initData[0][i * 4 + 3];
if (r < lastR || g < lastG || b < lastB || a < lastA) {
int i = 0;
for (int j = 0; j < ClutTexture::MAX_RAMPS; j++) {
tex->rampStarts[j] = i;
int lastR = 0;
int lastG = 0;
int lastB = 0;
int lastA = 0;
for (; i < maxClutEntries; i++) {
int r = desc.initData[0][i * 4];
int g = desc.initData[0][i * 4 + 1];
int b = desc.initData[0][i * 4 + 2];
int a = desc.initData[0][i * 4 + 3];
if (r < lastR || g < lastG || b < lastB || a < lastA) {
lastR = r; lastG = g; lastB = b; lastA = a;
break;
} else {
lastR = r;
lastG = g;
lastB = b;
lastA = a;
}
}
tex->rampLengths[j] = i - tex->rampStarts[j];
if (i >= maxClutEntries) {
break;
} else {
lastR = r;
lastG = g;
lastB = b;
lastA = a;
}
}
tex->texture = draw_->CreateTexture(desc);
tex->lastFrame = gpuStats.numFlips;
tex->rampLength = rampLength;
texCache_[clutId] = tex;
return *tex;

View File

@ -29,11 +29,14 @@
#include "GPU/Common/ShaderCommon.h"
#include "GPU/Common/DepalettizeShaderCommon.h"
class ClutTexture {
public:
enum { MAX_RAMPS = 3 };
Draw::Texture *texture;
int lastFrame;
int rampLength;
int rampLengths[MAX_RAMPS];
int rampStarts[MAX_RAMPS];
};
// For CLUT depal shaders, and other pre-bind texture shaders.