diff --git a/GPU/Common/DepalettizeCommon.cpp b/GPU/Common/DepalettizeCommon.cpp index 70214fc3d5..185eecdbc4 100644 --- a/GPU/Common/DepalettizeCommon.cpp +++ b/GPU/Common/DepalettizeCommon.cpp @@ -147,10 +147,10 @@ Draw::SamplerState *DepalShaderCache::GetSampler() { return nearestSampler_; } -DepalShader *DepalShaderCache::GetDepalettizeShader(uint32_t clutMode, GEBufferFormat pixelFormat) { +DepalShader *DepalShaderCache::GetDepalettizeShader(uint32_t clutMode, GETextureFormat textureFormat, GEBufferFormat bufferFormat) { using namespace Draw; - u32 id = GenerateShaderID(clutMode, pixelFormat); + u32 id = GenerateShaderID(clutMode, textureFormat, bufferFormat); auto shader = cache_.find(id); if (shader != cache_.end()) { @@ -171,7 +171,8 @@ DepalShader *DepalShaderCache::GetDepalettizeShader(uint32_t clutMode, GEBufferF config.startPos = gstate.getClutIndexStartPos(); config.shift = gstate.getClutIndexShift(); config.mask = gstate.getClutIndexMask(); - config.pixelFormat = pixelFormat; + config.bufferFormat = bufferFormat; + config.textureFormat = textureFormat; GenerateDepalFs(buffer, config, draw_->GetShaderLanguageDesc()); diff --git a/GPU/Common/DepalettizeCommon.h b/GPU/Common/DepalettizeCommon.h index c394f47eb7..fbac86300d 100644 --- a/GPU/Common/DepalettizeCommon.h +++ b/GPU/Common/DepalettizeCommon.h @@ -49,7 +49,7 @@ public: ~DepalShaderCache(); // This also uploads the palette and binds the correct texture. - DepalShader *GetDepalettizeShader(uint32_t clutMode, GEBufferFormat pixelFormat); + DepalShader *GetDepalettizeShader(uint32_t clutMode, GETextureFormat texFormat, GEBufferFormat pixelFormat); Draw::Texture *GetClutTexture(GEPaletteFormat clutFormat, const u32 clutHash, u32 *rawClut); Draw::SamplerState *GetSampler(); @@ -63,8 +63,8 @@ public: void DeviceRestore(Draw::DrawContext *draw); private: - static uint32_t GenerateShaderID(uint32_t clutMode, GEBufferFormat pixelFormat) { - return (clutMode & 0xFFFFFF) | (pixelFormat << 24); + static uint32_t GenerateShaderID(uint32_t clutMode, GETextureFormat texFormat, GEBufferFormat pixelFormat) { + return (clutMode & 0xFFFFFF) | (pixelFormat << 24) | (texFormat << 28); } static uint32_t GetClutID(GEPaletteFormat clutFormat, uint32_t clutHash) { diff --git a/GPU/Common/DepalettizeShaderCommon.cpp b/GPU/Common/DepalettizeShaderCommon.cpp index 9874e91d6e..11c3c50394 100644 --- a/GPU/Common/DepalettizeShaderCommon.cpp +++ b/GPU/Common/DepalettizeShaderCommon.cpp @@ -49,7 +49,7 @@ void GenerateDepalShader300(ShaderWriter &writer, const DepalConfig &config, con const int shift = config.shift; const int mask = config.mask; - if (config.pixelFormat == GE_FORMAT_DEPTH16) { + if (config.bufferFormat == GE_FORMAT_DEPTH16) { DepthScaleFactors factors = GetDepthScaleFactors(); writer.ConstFloat("z_scale", factors.scale); writer.ConstFloat("z_offset", factors.offset); @@ -71,7 +71,7 @@ void GenerateDepalShader300(ShaderWriter &writer, const DepalConfig &config, con writer.C(" vec4 color = ").SampleTexture2D("tex", "v_texcoord").C(";\n"); int shiftedMask = mask << shift; - switch (config.pixelFormat) { + switch (config.bufferFormat) { case GE_FORMAT_8888: if (shiftedMask & 0xFF) writer.C(" int r = int(color.r * 255.99);\n"); else writer.C(" int r = 0;\n"); if (shiftedMask & 0xFF00) writer.C(" int g = int(color.g * 255.99);\n"); else writer.C(" int g = 0;\n"); @@ -102,6 +102,17 @@ void GenerateDepalShader300(ShaderWriter &writer, const DepalConfig &config, con case GE_FORMAT_DEPTH16: // Remap depth buffer. writer.C(" float depth = (color.x - z_offset) * z_scale;\n"); + + if (config.bufferFormat == GE_FORMAT_DEPTH16 && config.textureFormat == GE_TFMT_5650) { + // Convert depth to 565, without going through a CLUT. + writer.C(" int idepth = int(clamp(depth, 0.0, 65535.0));\n"); + writer.C(" float r = (idepth & 31) / 31.0f;\n"); + writer.C(" float g = ((idepth >> 5) & 63) / 63.0f;\n"); + writer.C(" float b = ((idepth >> 11) & 31) / 31.0f;\n"); + writer.C(" vec4 outColor = vec4(r, g, b, 1.0);\n"); + return; + } + writer.C(" int index = int(clamp(depth, 0.0, 65535.0));\n"); break; default: @@ -135,7 +146,7 @@ void GenerateDepalShaderFloat(ShaderWriter &writer, const DepalConfig &config, c const int shift = config.shift; const int mask = config.mask; - if (config.pixelFormat == GE_FORMAT_DEPTH16) { + if (config.bufferFormat == GE_FORMAT_DEPTH16) { DepthScaleFactors factors = GetDepthScaleFactors(); writer.ConstFloat("z_scale", factors.scale); writer.ConstFloat("z_offset", factors.offset); @@ -144,7 +155,7 @@ void GenerateDepalShaderFloat(ShaderWriter &writer, const DepalConfig &config, c float index_multiplier = 1.0f; // pixelformat is the format of the texture we are sampling. bool formatOK = true; - switch (config.pixelFormat) { + switch (config.bufferFormat) { case GE_FORMAT_8888: if ((mask & (mask + 1)) == 0) { // If the value has all bits contiguous (bitmask check above), we can mod by it + 1. @@ -249,7 +260,7 @@ void GenerateDepalShaderFloat(ShaderWriter &writer, const DepalConfig &config, c // index_multiplier -= 0.01f / texturePixels; if (!formatOK) { - ERROR_LOG_REPORT_ONCE(depal, G3D, "%s depal unsupported: shift=%d mask=%02x offset=%d", GeBufferFormatToString(config.pixelFormat), shift, mask, config.startPos); + ERROR_LOG_REPORT_ONCE(depal, G3D, "%s depal unsupported: shift=%d mask=%02x offset=%d", GeBufferFormatToString(config.bufferFormat), shift, mask, config.startPos); } // Offset by half a texel (plus clutBase) to turn NEAREST filtering into FLOOR. diff --git a/GPU/Common/DepalettizeShaderCommon.h b/GPU/Common/DepalettizeShaderCommon.h index 5ce5ef88a8..74bb38d196 100644 --- a/GPU/Common/DepalettizeShaderCommon.h +++ b/GPU/Common/DepalettizeShaderCommon.h @@ -29,7 +29,8 @@ struct DepalConfig { int shift; u32 startPos; GEPaletteFormat clutFormat; - GEBufferFormat pixelFormat; + GETextureFormat textureFormat; + GEBufferFormat bufferFormat; }; void GenerateDepalFs(char *buffer, const DepalConfig &config, const ShaderLanguageDesc &lang); diff --git a/GPU/Common/TextureCacheCommon.cpp b/GPU/Common/TextureCacheCommon.cpp index af642a7df1..64a48537b6 100644 --- a/GPU/Common/TextureCacheCommon.cpp +++ b/GPU/Common/TextureCacheCommon.cpp @@ -525,6 +525,10 @@ TexCacheEntry *TextureCacheCommon::SetTexture() { def.format = format; def.bufw = bufw; + if (texaddr == 0x04710000) { + texaddr = texaddr; + } + std::vector candidates = GetFramebufferCandidates(def, 0); if (candidates.size() > 0) { int index = GetBestCandidateIndex(candidates); @@ -892,6 +896,7 @@ bool TextureCacheCommon::MatchFramebuffer( const bool noOffset = texaddr == addr; const bool exactMatch = noOffset && entry.format < 4 && channel == RASTER_COLOR; + const u32 w = 1 << ((entry.dim >> 0) & 0xf); const u32 h = 1 << ((entry.dim >> 8) & 0xf); // 512 on a 272 framebuffer is sane, so let's be lenient. @@ -927,6 +932,7 @@ bool TextureCacheCommon::MatchFramebuffer( // Check works for D16 too (???) const bool matchingClutFormat = (fb_format == GE_FORMAT_DEPTH16 && entry.format == GE_TFMT_CLUT16) || + (fb_format == GE_FORMAT_DEPTH16 && entry.format == GE_TFMT_5650) || (fb_format == GE_FORMAT_8888 && entry.format == GE_TFMT_CLUT32) || (fb_format != GE_FORMAT_8888 && entry.format == GE_TFMT_CLUT16); @@ -971,7 +977,9 @@ bool TextureCacheCommon::MatchFramebuffer( // 3rd Birthday (and a bunch of other games) render to a 16 bit clut texture. if (matchingClutFormat) { if (!noOffset) { - WARN_LOG_ONCE(subareaClut, G3D, "Texturing from framebuffer using CLUT with offset at %08x +%dx%d", fb_address, matchInfo->xOffset, matchInfo->yOffset); + WARN_LOG_ONCE(subareaClut, G3D, "Texturing from framebuffer (%s) using %s with offset at %08x +%dx%d", channel == RASTER_DEPTH ? "DEPTH" : "COLOR", GeTextureFormatToString(entry.format), fb_address, matchInfo->xOffset, matchInfo->yOffset); + } else { + WARN_LOG_ONCE(subareaClut, G3D, "Texturing from framebuffer (%s) using %s at %08x", channel == RASTER_DEPTH ? "DEPTH" : "COLOR", GeTextureFormatToString(entry.format), fb_address); } return true; } else if (IsClutFormat((GETextureFormat)(entry.format)) || IsDXTFormat((GETextureFormat)(entry.format))) { @@ -1823,12 +1831,33 @@ void TextureCacheCommon::ApplyTexture() { gstate_c.SetTextureIs3D((entry->status & TexCacheEntry::STATUS_3D) != 0); } +bool CanDepalettize(GETextureFormat texFormat, GEBufferFormat bufferFormat) { + if (IsClutFormat(texFormat)) { + switch (bufferFormat) { + case GE_FORMAT_4444: + case GE_FORMAT_565: + case GE_FORMAT_5551: + case GE_FORMAT_DEPTH16: + return texFormat == GE_TFMT_CLUT16; + case GE_FORMAT_8888: + return texFormat == GE_TFMT_CLUT32; + } + WARN_LOG(G3D, "Invalid CLUT/framebuffer combination: %s vs %s", GeTextureFormatToString(texFormat), GeBufferFormatToString(bufferFormat)); + return false; + } else if (texFormat == GE_TFMT_5650 && bufferFormat == GE_FORMAT_DEPTH16) { + // We can also "depal" 565 format, this is used to read depth buffers as 565 on occasion (#15491). + return true; + } else { + return false; + } +} + void TextureCacheCommon::ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer, GETextureFormat texFormat, RasterChannel channel) { DepalShader *depalShader = nullptr; uint32_t clutMode = gstate.clutformat & 0xFFFFFF; - bool need_depalettize = IsClutFormat(texFormat); bool depth = channel == RASTER_DEPTH; + bool need_depalettize = CanDepalettize(texFormat, depth ? GE_FORMAT_DEPTH16 : framebuffer->drawnFormat); bool useShaderDepal = framebufferManager_->GetCurrentRenderVFB() != framebuffer && !depth && !gstate_c.curTextureIs3D; // TODO: Implement shader depal in the fragment shader generator for D3D11 at least. @@ -1878,7 +1907,7 @@ void TextureCacheCommon::ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer return; } - depalShader = depalShaderCache_->GetDepalettizeShader(clutMode, depth ? GE_FORMAT_DEPTH16 : framebuffer->drawnFormat); + depalShader = depalShaderCache_->GetDepalettizeShader(clutMode, texFormat, depth ? GE_FORMAT_DEPTH16 : framebuffer->drawnFormat); gstate_c.SetUseShaderDepal(false); } diff --git a/unittest/TestShaderGenerators.cpp b/unittest/TestShaderGenerators.cpp index 19b0b0d2e9..a96df6657c 100644 --- a/unittest/TestShaderGenerators.cpp +++ b/unittest/TestShaderGenerators.cpp @@ -309,7 +309,8 @@ bool TestDepalShaders() { config.shift = 8; config.startPos = 64; config.mask = 0xFF; - config.pixelFormat = GE_FORMAT_8888; + config.bufferFormat = GE_FORMAT_8888; + config.textureFormat = GE_TFMT_CLUT32; GenerateDepalFs(buffer, config, desc); if (!TestCompileShader(buffer, languages[k], ShaderStage::Fragment, &errorMessage)) {