diff --git a/GPU/Common/TextureCacheCommon.cpp b/GPU/Common/TextureCacheCommon.cpp index 3ab618fae6..5a1256f2e5 100644 --- a/GPU/Common/TextureCacheCommon.cpp +++ b/GPU/Common/TextureCacheCommon.cpp @@ -1033,7 +1033,7 @@ void TextureCacheCommon::DecodeTextureLevel(u8 *out, int outPitch, GETextureForm case GE_CMODE_16BIT_ABGR4444: { const u16 *clut = GetCurrentClut() + clutSharingOffset; - if (clutAlphaLinear_ && mipmapShareClut) { + if (clutAlphaLinear_ && mipmapShareClut && !expandTo32bit) { // Here, reverseColors means the CLUT is already reversed. if (reverseColors) { for (int y = 0; y < h; ++y) { @@ -1045,8 +1045,26 @@ void TextureCacheCommon::DecodeTextureLevel(u8 *out, int outPitch, GETextureForm } } } else { - for (int y = 0; y < h; ++y) { - DeIndexTexture4((u16 *)(out + outPitch * y), texptr + (bufw * y) / 2, w, clut); + if (expandTo32bit && !reverseColors) { + // We simply expand the CLUT to 32-bit, then we deindex as usual. Probably the fastest way. + switch (clutformat) { + case GE_CMODE_16BIT_ABGR4444: + ConvertRGBA4444ToRGBA8888(expandClut_, clut, 16); + break; + case GE_CMODE_16BIT_ABGR5551: + ConvertRGBA5551ToRGBA8888(expandClut_, clut, 16); + break; + case GE_CMODE_16BIT_BGR5650: + ConvertRGBA565ToRGBA8888(expandClut_, clut, 16); + break; + } + for (int y = 0; y < h; ++y) { + DeIndexTexture4((u32 *)(out + outPitch * y), texptr + (bufw * y) / 2, w, expandClut_); + } + } else { + for (int y = 0; y < h; ++y) { + DeIndexTexture4((u16 *)(out + outPitch * y), texptr + (bufw * y) / 2, w, clut); + } } } } @@ -1069,15 +1087,15 @@ void TextureCacheCommon::DecodeTextureLevel(u8 *out, int outPitch, GETextureForm break; case GE_TFMT_CLUT8: - ReadIndexedTex(out, outPitch, level, texptr, 1, bufw); + ReadIndexedTex(out, outPitch, level, texptr, 1, bufw, expandTo32bit); break; case GE_TFMT_CLUT16: - ReadIndexedTex(out, outPitch, level, texptr, 2, bufw); + ReadIndexedTex(out, outPitch, level, texptr, 2, bufw, expandTo32bit); break; case GE_TFMT_CLUT32: - ReadIndexedTex(out, outPitch, level, texptr, 4, bufw); + ReadIndexedTex(out, outPitch, level, texptr, 4, bufw, expandTo32bit); break; case GE_TFMT_4444: @@ -1089,6 +1107,20 @@ void TextureCacheCommon::DecodeTextureLevel(u8 *out, int outPitch, GETextureForm for (int y = 0; y < h; ++y) { ReverseColors(out + outPitch * y, texptr + bufw * sizeof(u16) * y, format, w, useBGRA); } + } else if (expandTo32bit) { + for (int y = 0; y < h; ++y) { + switch (format) { + case GE_CMODE_16BIT_ABGR4444: + ConvertRGBA4444ToRGBA8888((u32 *)(out + outPitch * y), (const u16 *)texptr + bufw * y, w); + break; + case GE_CMODE_16BIT_ABGR5551: + ConvertRGBA5551ToRGBA8888((u32 *)(out + outPitch * y), (const u16 *)texptr + bufw * y, w); + break; + case GE_CMODE_16BIT_BGR5650: + ConvertRGBA565ToRGBA8888((u32 *)(out + outPitch * y), (const u16 *)texptr + bufw * y, w); + break; + } + } } else { for (int y = 0; y < h; ++y) { memcpy(out + outPitch * y, texptr + bufw * sizeof(u16) * y, w * sizeof(u16)); @@ -1109,6 +1141,20 @@ void TextureCacheCommon::DecodeTextureLevel(u8 *out, int outPitch, GETextureForm for (int y = 0; y < h; ++y) { ReverseColors(out + outPitch * y, unswizzled + bufw * sizeof(u16) * y, format, w, useBGRA); } + } else if (expandTo32bit) { + for (int y = 0; y < h; ++y) { + switch (format) { + case GE_CMODE_16BIT_ABGR4444: + ConvertRGBA4444ToRGBA8888((u32 *)(out + outPitch * y), (const u16 *)unswizzled + bufw * y, w); + break; + case GE_CMODE_16BIT_ABGR5551: + ConvertRGBA5551ToRGBA8888((u32 *)(out + outPitch * y), (const u16 *)unswizzled + bufw * y, w); + break; + case GE_CMODE_16BIT_BGR5650: + ConvertRGBA565ToRGBA8888((u32 *)(out + outPitch * y), (const u16 *)unswizzled + bufw * y, w); + break; + } + } } else { for (int y = 0; y < h; ++y) { memcpy(out + outPitch * y, unswizzled + bufw * sizeof(u16) * y, w * sizeof(u16)); @@ -1223,7 +1269,7 @@ void TextureCacheCommon::DecodeTextureLevel(u8 *out, int outPitch, GETextureForm } } -void TextureCacheCommon::ReadIndexedTex(u8 *out, int outPitch, int level, const u8 *texptr, int bytesPerIndex, int bufw) { +void TextureCacheCommon::ReadIndexedTex(u8 *out, int outPitch, int level, const u8 *texptr, int bytesPerIndex, int bufw, bool expandTo32Bit) { int w = gstate.getTextureWidth(level); int h = gstate.getTextureHeight(level); @@ -1233,28 +1279,48 @@ void TextureCacheCommon::ReadIndexedTex(u8 *out, int outPitch, int level, const texptr = (u8 *)tmpTexBuf32_.data(); } - switch (gstate.getClutPaletteFormat()) { + int palFormat = gstate.getClutPaletteFormat(); + + const u16 *clut16 = (const u16 *)clutBuf_; + const u32 *clut32 = (const u32 *)clutBuf_; + + if (expandTo32Bit && palFormat != GE_CMODE_32BIT_ABGR8888) { + switch (palFormat) { + case GE_CMODE_16BIT_ABGR4444: + ConvertRGBA4444ToRGBA8888(expandClut_, clut16, 256); + break; + case GE_CMODE_16BIT_ABGR5551: + ConvertRGBA5551ToRGBA8888(expandClut_, clut16, 256); + break; + case GE_CMODE_16BIT_BGR5650: + ConvertRGBA565ToRGBA8888(expandClut_, clut16, 256); + break; + } + clut32 = expandClut_; + palFormat = GE_CMODE_32BIT_ABGR8888; + } + + switch (palFormat) { case GE_CMODE_16BIT_BGR5650: case GE_CMODE_16BIT_ABGR5551: case GE_CMODE_16BIT_ABGR4444: { - const u16 *clut = GetCurrentClut(); switch (bytesPerIndex) { case 1: for (int y = 0; y < h; ++y) { - DeIndexTexture((u16 *)(out + outPitch * y), (const u8 *)texptr + bufw * y, w, clut); + DeIndexTexture((u16 *)(out + outPitch * y), (const u8 *)texptr + bufw * y, w, clut16); } break; case 2: for (int y = 0; y < h; ++y) { - DeIndexTexture((u16 *)(out + outPitch * y), (const u16_le *)texptr + bufw * y, w, clut); + DeIndexTexture((u16 *)(out + outPitch * y), (const u16_le *)texptr + bufw * y, w, clut16); } break; case 4: for (int y = 0; y < h; ++y) { - DeIndexTexture((u16 *)(out + outPitch * y), (const u32_le *)texptr + bufw * y, w, clut); + DeIndexTexture((u16 *)(out + outPitch * y), (const u32_le *)texptr + bufw * y, w, clut16); } break; } @@ -1263,23 +1329,22 @@ void TextureCacheCommon::ReadIndexedTex(u8 *out, int outPitch, int level, const case GE_CMODE_32BIT_ABGR8888: { - const u32 *clut = GetCurrentClut(); switch (bytesPerIndex) { case 1: for (int y = 0; y < h; ++y) { - DeIndexTexture((u32 *)(out + outPitch * y), (const u8 *)texptr + bufw * y, w, clut); + DeIndexTexture((u32 *)(out + outPitch * y), (const u8 *)texptr + bufw * y, w, clut32); } break; case 2: for (int y = 0; y < h; ++y) { - DeIndexTexture((u32 *)(out + outPitch * y), (const u16_le *)texptr + bufw * y, w, clut); + DeIndexTexture((u32 *)(out + outPitch * y), (const u16_le *)texptr + bufw * y, w, clut32); } break; case 4: for (int y = 0; y < h; ++y) { - DeIndexTexture((u32 *)(out + outPitch * y), (const u32_le *)texptr + bufw * y, w, clut); + DeIndexTexture((u32 *)(out + outPitch * y), (const u32_le *)texptr + bufw * y, w, clut32); } break; } diff --git a/GPU/Common/TextureCacheCommon.h b/GPU/Common/TextureCacheCommon.h index 83409212dc..1443338f41 100644 --- a/GPU/Common/TextureCacheCommon.h +++ b/GPU/Common/TextureCacheCommon.h @@ -225,9 +225,9 @@ protected: u32 yOffset; }; - void DecodeTextureLevel(u8 *out, int outPitch, GETextureFormat format, GEPaletteFormat clutformat, uint32_t texaddr, int level, int bufw, bool reverseColors, bool useBGRA, bool expandTo32bit); + void DecodeTextureLevel(u8 *out, int outPitch, GETextureFormat format, GEPaletteFormat clutformat, uint32_t texaddr, int level, int bufw, bool reverseColors, bool useBGRA, bool expandTo32Bit); void UnswizzleFromMem(u32 *dest, u32 destPitch, const u8 *texptr, u32 bufw, u32 height, u32 bytesPerPixel); - void ReadIndexedTex(u8 *out, int outPitch, int level, const u8 *texptr, int bytesPerIndex, int bufw); + void ReadIndexedTex(u8 *out, int outPitch, int level, const u8 *texptr, int bytesPerIndex, int bufw, bool expandTo32Bit); template inline const T *GetCurrentClut() { @@ -322,6 +322,8 @@ protected: bool nextNeedsRebuild_; bool isBgraBackend_; + + u32 expandClut_[256]; }; inline bool TexCacheEntry::Matches(u16 dim2, u8 format2, u8 maxLevel2) const { diff --git a/GPU/D3D11/DepalettizeShaderD3D11.cpp b/GPU/D3D11/DepalettizeShaderD3D11.cpp index 12852b2d13..9d0df8ddad 100644 --- a/GPU/D3D11/DepalettizeShaderD3D11.cpp +++ b/GPU/D3D11/DepalettizeShaderD3D11.cpp @@ -81,7 +81,7 @@ ID3D11ShaderResourceView *DepalShaderCacheD3D11::GetClutTexture(GEPaletteFormat return oldtex->second->view; } - DXGI_FORMAT dstFmt = getClutDestFormatD3D11(clutFormat); + DXGI_FORMAT dstFmt = GetClutDestFormatD3D11(clutFormat); int texturePixels = clutFormat == GE_CMODE_32BIT_ABGR8888 ? 256 : 512; DepalTextureD3D11 *tex = new DepalTextureD3D11(); diff --git a/GPU/D3D11/TextureCacheD3D11.cpp b/GPU/D3D11/TextureCacheD3D11.cpp index fbad16e944..8ff98cc33f 100644 --- a/GPU/D3D11/TextureCacheD3D11.cpp +++ b/GPU/D3D11/TextureCacheD3D11.cpp @@ -146,21 +146,6 @@ void TextureCacheD3D11::InvalidateLastTexture(TexCacheEntry *entry) { } } -DXGI_FORMAT TextureCacheD3D11::GetClutDestFormatD3D11(GEPaletteFormat format) { - switch (format) { - case GE_CMODE_16BIT_ABGR4444: - return DXGI_FORMAT_B4G4R4A4_UNORM; - case GE_CMODE_16BIT_ABGR5551: - return DXGI_FORMAT_B5G5R5A1_UNORM; - case GE_CMODE_16BIT_BGR5650: - return DXGI_FORMAT_B5G6R5_UNORM; - case GE_CMODE_32BIT_ABGR8888: - return DXGI_FORMAT_B8G8R8A8_UNORM; - } - // Should never be here ! - return DXGI_FORMAT_B8G8R8A8_UNORM; -} - void TextureCacheD3D11::UpdateSamplingParams(TexCacheEntry &entry, SamplerCacheKey &key) { // TODO: Make GetSamplingParams write SamplerCacheKey directly int minFilt; @@ -572,13 +557,32 @@ void TextureCacheD3D11::BuildTexture(TexCacheEntry *const entry, bool replaceIma } } +DXGI_FORMAT GetClutDestFormatD3D11(GEPaletteFormat format) { + switch (format) { + case GE_CMODE_16BIT_ABGR4444: + return DXGI_FORMAT_B4G4R4A4_UNORM; + case GE_CMODE_16BIT_ABGR5551: + return DXGI_FORMAT_B5G5R5A1_UNORM; + case GE_CMODE_16BIT_BGR5650: + return DXGI_FORMAT_B5G6R5_UNORM; + case GE_CMODE_32BIT_ABGR8888: + return DXGI_FORMAT_B8G8R8A8_UNORM; + } + // Should never be here ! + return DXGI_FORMAT_B8G8R8A8_UNORM; +} + DXGI_FORMAT TextureCacheD3D11::GetDestFormat(GETextureFormat format, GEPaletteFormat clutFormat) const { + if (!gstate_c.Supports(GPU_SUPPORTS_16BIT_FORMATS)) { + return DXGI_FORMAT_B8G8R8A8_UNORM; + } + switch (format) { case GE_TFMT_CLUT4: case GE_TFMT_CLUT8: case GE_TFMT_CLUT16: case GE_TFMT_CLUT32: - return getClutDestFormatD3D11(clutFormat); + return GetClutDestFormatD3D11(clutFormat); case GE_TFMT_4444: return DXGI_FORMAT_B4G4R4A4_UNORM; case GE_TFMT_5551: @@ -633,7 +637,7 @@ DXGI_FORMAT ToDXGIFormat(ReplacedTextureFormat fmt) { } } -void TextureCacheD3D11::LoadTextureLevel(TexCacheEntry &entry, ReplacedTexture &replaced, int level, int maxLevel, bool replaceImages, int scaleFactor, u32 dstFmt) { +void TextureCacheD3D11::LoadTextureLevel(TexCacheEntry &entry, ReplacedTexture &replaced, int level, int maxLevel, bool replaceImages, int scaleFactor, DXGI_FORMAT dstFmt) { int w = gstate.getTextureWidth(level); int h = gstate.getTextureHeight(level); @@ -642,14 +646,13 @@ void TextureCacheD3D11::LoadTextureLevel(TexCacheEntry &entry, ReplacedTexture & // Create texture int levels = scaleFactor == 1 ? maxLevel + 1 : 1; int tw = w, th = h; - DXGI_FORMAT tfmt = (DXGI_FORMAT)(dstFmt); if (replaced.GetSize(level, tw, th)) { - tfmt = ToDXGIFormat(replaced.Format(level)); + dstFmt = ToDXGIFormat(replaced.Format(level)); } else { tw *= scaleFactor; th *= scaleFactor; if (scaleFactor > 1) { - tfmt = DXGI_FORMAT_B8G8R8A8_UNORM; + dstFmt = DXGI_FORMAT_B8G8R8A8_UNORM; } } @@ -661,7 +664,7 @@ void TextureCacheD3D11::LoadTextureLevel(TexCacheEntry &entry, ReplacedTexture & desc.SampleDesc.Count = 1; desc.Width = tw; desc.Height = th; - desc.Format = tfmt; + desc.Format = dstFmt; desc.MipLevels = levels; desc.BindFlags = D3D11_BIND_SHADER_RESOURCE; @@ -708,10 +711,12 @@ void TextureCacheD3D11::LoadTextureLevel(TexCacheEntry &entry, ReplacedTexture & decPitch = mapRowPitch; } - DecodeTextureLevel((u8 *)pixelData, decPitch, tfmt, clutformat, texaddr, level, bufw, false, false, false); + bool expand32 = !gstate_c.Supports(GPU_SUPPORTS_16BIT_FORMATS); + DecodeTextureLevel((u8 *)pixelData, decPitch, tfmt, clutformat, texaddr, level, bufw, false, false, expand32); if (scaleFactor > 1) { - scaler.ScaleAlways((u32 *)mapData, pixelData, dstFmt, w, h, scaleFactor); + u32 scaleFmt = (u32)dstFmt; + scaler.ScaleAlways((u32 *)mapData, pixelData, scaleFmt, w, h, scaleFactor); pixelData = (u32 *)mapData; // We always end up at 8888. Other parts assume this. diff --git a/GPU/D3D11/TextureCacheD3D11.h b/GPU/D3D11/TextureCacheD3D11.h index 33d36a9be5..e9e4b0ed58 100644 --- a/GPU/D3D11/TextureCacheD3D11.h +++ b/GPU/D3D11/TextureCacheD3D11.h @@ -71,7 +71,7 @@ protected: private: void UpdateSamplingParams(TexCacheEntry &entry, SamplerCacheKey &key); - void LoadTextureLevel(TexCacheEntry &entry, ReplacedTexture &replaced, int level, int maxLevel, bool replaceImages, int scaleFactor, u32 dstFmt); + void LoadTextureLevel(TexCacheEntry &entry, ReplacedTexture &replaced, int level, int maxLevel, bool replaceImages, int scaleFactor, DXGI_FORMAT dstFmt); DXGI_FORMAT GetDestFormat(GETextureFormat format, GEPaletteFormat clutFormat) const; TexCacheEntry::Status CheckAlpha(const u32 *pixelData, u32 dstFmt, int stride, int w, int h); void UpdateCurrentClut(GEPaletteFormat clutFormat, u32 clutBase, bool clutIndexIsSimple) override; @@ -79,8 +79,6 @@ private: void ApplyTextureFramebuffer(TexCacheEntry *entry, VirtualFramebuffer *framebuffer) override; void BuildTexture(TexCacheEntry *const entry, bool replaceImages) override; - DXGI_FORMAT GetClutDestFormatD3D11(GEPaletteFormat format); - ID3D11Device *device_; ID3D11DeviceContext *context_; @@ -106,4 +104,4 @@ private: ShaderManagerD3D11 *shaderManager_; }; -DXGI_FORMAT getClutDestFormatD3D11(GEPaletteFormat format); +DXGI_FORMAT GetClutDestFormatD3D11(GEPaletteFormat format);