mirror of
https://github.com/libretro/ppsspp.git
synced 2025-01-26 03:04:20 +00:00
Allow unswizzling with a dest pitch.
This commit is contained in:
parent
3593a7963e
commit
1300631e9a
@ -364,7 +364,7 @@ void TextureCacheCommon::LoadClut(u32 clutAddr, u32 loadBytes) {
|
||||
clutMaxBytes_ = std::max(clutMaxBytes_, loadBytes);
|
||||
}
|
||||
|
||||
void TextureCacheCommon::UnswizzleFromMem(u32 *dest, const u8 *texptr, u32 bufw, u32 height, u32 bytesPerPixel) {
|
||||
void TextureCacheCommon::UnswizzleFromMem(u32 *dest, u32 destPitch, const u8 *texptr, u32 bufw, u32 height, u32 bytesPerPixel) {
|
||||
// Note: bufw is always aligned to 16 bytes, so rowWidth is always >= 16.
|
||||
const u32 rowWidth = (bytesPerPixel > 0) ? (bufw * bytesPerPixel) : (bufw / 2);
|
||||
// A visual mapping of unswizzling, where each letter is 16-byte and 8 letters is a block:
|
||||
@ -381,8 +381,7 @@ void TextureCacheCommon::UnswizzleFromMem(u32 *dest, const u8 *texptr, u32 bufw,
|
||||
// The height is not always aligned to 8, but rounds up.
|
||||
int byc = (height + 7) / 8;
|
||||
|
||||
// TODO: Can change rowWidth param below (leave above) to adjust dest pitch.
|
||||
DoUnswizzleTex16(texptr, dest, bxc, byc, rowWidth);
|
||||
DoUnswizzleTex16(texptr, dest, bxc, byc, destPitch);
|
||||
}
|
||||
|
||||
void *TextureCacheCommon::RearrangeBuf(void *inBuf, u32 inRowBytes, u32 outRowBytes, int h, bool allowInPlace) {
|
||||
|
@ -143,7 +143,7 @@ protected:
|
||||
// Can't be unordered_map, we use lower_bound ... although for some reason that compiles on MSVC.
|
||||
typedef std::map<u64, TexCacheEntry> TexCache;
|
||||
|
||||
void UnswizzleFromMem(u32 *dest, const u8 *texptr, u32 bufw, u32 height, u32 bytesPerPixel);
|
||||
void UnswizzleFromMem(u32 *dest, u32 destPitch, const u8 *texptr, u32 bufw, u32 height, u32 bytesPerPixel);
|
||||
void *RearrangeBuf(void *inBuf, u32 inRowBytes, u32 outRowBytes, int h, bool allowInPlace = true);
|
||||
|
||||
u32 EstimateTexMemoryUsage(const TexCacheEntry *entry);
|
||||
|
@ -118,7 +118,10 @@ u32 QuickTexHashNEON(const void *checkp, u32 size) {
|
||||
return check;
|
||||
}
|
||||
|
||||
void DoUnswizzleTex16NEON(const u8 *texptr, u32 *ydestp, int bxc, int byc, u32 pitch, u32 rowWidth) {
|
||||
void DoUnswizzleTex16NEON(const u8 *texptr, u32 *ydestp, int bxc, int byc, u32 pitch) {
|
||||
// ydestp is in 32-bits, so this is convenient.
|
||||
const u32 pitchBy32 = pitch >> 2;
|
||||
|
||||
__builtin_prefetch(texptr, 0, 0);
|
||||
__builtin_prefetch(ydestp, 1, 1);
|
||||
|
||||
@ -134,18 +137,18 @@ void DoUnswizzleTex16NEON(const u8 *texptr, u32 *ydestp, int bxc, int byc, u32 p
|
||||
uint32x4_t temp3 = vld1q_u32(src + 8);
|
||||
uint32x4_t temp4 = vld1q_u32(src + 12);
|
||||
vst1q_u32(dest, temp1);
|
||||
dest += pitch;
|
||||
dest += pitchBy32;
|
||||
vst1q_u32(dest, temp2);
|
||||
dest += pitch;
|
||||
dest += pitchBy32;
|
||||
vst1q_u32(dest, temp3);
|
||||
dest += pitch;
|
||||
dest += pitchBy32;
|
||||
vst1q_u32(dest, temp4);
|
||||
dest += pitch;
|
||||
dest += pitchBy32;
|
||||
src += 16;
|
||||
}
|
||||
xdest += 4;
|
||||
}
|
||||
ydestp += (rowWidth * 8) / 4;
|
||||
ydestp += pitchBy32 * 8;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -18,7 +18,7 @@
|
||||
#include "GPU/Common/TextureDecoder.h"
|
||||
|
||||
u32 QuickTexHashNEON(const void *checkp, u32 size);
|
||||
void DoUnswizzleTex16NEON(const u8 *texptr, u32 *ydestp, int bxc, int byc, u32 pitch, u32 rowWidth);
|
||||
void DoUnswizzleTex16NEON(const u8 *texptr, u32 *ydestp, int bxc, int byc, u32 pitch);
|
||||
u32 ReliableHash32NEON(const void *input, size_t len, u32 seed);
|
||||
|
||||
CheckAlphaResult CheckAlphaRGBA8888NEON(const u32 *pixelData, int stride, int w, int h);
|
||||
|
@ -377,7 +377,7 @@ void *TextureCacheDX9::ReadIndexedTex(int level, const u8 *texptr, int bytesPerI
|
||||
}
|
||||
} else {
|
||||
tmpTexBuf32.resize(std::max(bufw, w) * h);
|
||||
UnswizzleFromMem(tmpTexBuf32.data(), texptr, bufw, h, bytesPerIndex);
|
||||
UnswizzleFromMem(tmpTexBuf32.data(), bufw * bytesPerIndex, texptr, bufw, h, bytesPerIndex);
|
||||
switch (bytesPerIndex) {
|
||||
case 1:
|
||||
DeIndexTexture(tmpTexBuf16.data(), (u8 *) tmpTexBuf32.data(), length, clut);
|
||||
@ -417,7 +417,7 @@ void *TextureCacheDX9::ReadIndexedTex(int level, const u8 *texptr, int bytesPerI
|
||||
}
|
||||
buf = tmpTexBuf32.data();
|
||||
} else {
|
||||
UnswizzleFromMem(tmpTexBuf32.data(), texptr, bufw, h, bytesPerIndex);
|
||||
UnswizzleFromMem(tmpTexBuf32.data(), bufw * bytesPerIndex, texptr, bufw, h, bytesPerIndex);
|
||||
// Since we had to unswizzle to tmpTexBuf32, let's output to tmpTexBuf16.
|
||||
tmpTexBuf16.resize(std::max(bufw, w) * h * 2);
|
||||
u32 *dest32 = (u32 *) tmpTexBuf16.data();
|
||||
@ -1366,7 +1366,7 @@ void *TextureCacheDX9::DecodeTextureLevel(GETextureFormat format, GEPaletteForma
|
||||
}
|
||||
} else {
|
||||
tmpTexBuf32.resize(std::max(bufw, w) * h);
|
||||
UnswizzleFromMem(tmpTexBuf32.data(), texptr, bufw, h, 0);
|
||||
UnswizzleFromMem(tmpTexBuf32.data(), bufw / 2, texptr, bufw, h, 0);
|
||||
if (clutAlphaLinear_ && mipmapShareClut) {
|
||||
DeIndexTexture4OptimalRev(tmpTexBuf16.data(), (const u8 *)tmpTexBuf32.data(), bufw * h, clutAlphaLinearColor_);
|
||||
} else {
|
||||
@ -1386,7 +1386,7 @@ void *TextureCacheDX9::DecodeTextureLevel(GETextureFormat format, GEPaletteForma
|
||||
DeIndexTexture4(tmpTexBuf32.data(), texptr, bufw * h, clut);
|
||||
finalBuf = tmpTexBuf32.data();
|
||||
} else {
|
||||
UnswizzleFromMem(tmpTexBuf32.data(), texptr, bufw, h, 0);
|
||||
UnswizzleFromMem(tmpTexBuf32.data(), bufw / 2, texptr, bufw, h, 0);
|
||||
// Let's reuse tmpTexBuf16, just need double the space.
|
||||
tmpTexBuf16.resize(std::max(bufw, w) * h * 2);
|
||||
DeIndexTexture4((u32 *)tmpTexBuf16.data(), (u8 *)tmpTexBuf32.data(), bufw * h, clut);
|
||||
@ -1431,7 +1431,7 @@ void *TextureCacheDX9::DecodeTextureLevel(GETextureFormat format, GEPaletteForma
|
||||
}
|
||||
else {
|
||||
tmpTexBuf32.resize(std::max(bufw, w) * h);
|
||||
UnswizzleFromMem(tmpTexBuf32.data(), texptr, bufw, h, 2);
|
||||
UnswizzleFromMem(tmpTexBuf32.data(), bufw * 2, texptr, bufw, h, 2);
|
||||
finalBuf = tmpTexBuf32.data();
|
||||
}
|
||||
break;
|
||||
@ -1451,7 +1451,7 @@ void *TextureCacheDX9::DecodeTextureLevel(GETextureFormat format, GEPaletteForma
|
||||
}
|
||||
} else {
|
||||
tmpTexBuf32.resize(std::max(bufw, w) * h);
|
||||
UnswizzleFromMem(tmpTexBuf32.data(), texptr, bufw, h, 4);
|
||||
UnswizzleFromMem(tmpTexBuf32.data(), bufw * 4, texptr, bufw, h, 4);
|
||||
finalBuf = tmpTexBuf32.data();
|
||||
}
|
||||
break;
|
||||
|
@ -375,7 +375,7 @@ void *TextureCache::ReadIndexedTex(int level, const u8 *texptr, int bytesPerInde
|
||||
}
|
||||
} else {
|
||||
tmpTexBuf32.resize(std::max(bufw, w) * h);
|
||||
UnswizzleFromMem(tmpTexBuf32.data(), texptr, bufw, h, bytesPerIndex);
|
||||
UnswizzleFromMem(tmpTexBuf32.data(), bufw * bytesPerIndex, texptr, bufw, h, bytesPerIndex);
|
||||
switch (bytesPerIndex) {
|
||||
case 1:
|
||||
DeIndexTexture(tmpTexBuf16.data(), (u8 *) tmpTexBuf32.data(), length, clut);
|
||||
@ -415,7 +415,7 @@ void *TextureCache::ReadIndexedTex(int level, const u8 *texptr, int bytesPerInde
|
||||
}
|
||||
buf = tmpTexBuf32.data();
|
||||
} else {
|
||||
UnswizzleFromMem(tmpTexBuf32.data(), texptr, bufw, h, bytesPerIndex);
|
||||
UnswizzleFromMem(tmpTexBuf32.data(), bufw * bytesPerIndex, texptr, bufw, h, bytesPerIndex);
|
||||
// Since we had to unswizzle to tmpTexBuf32, let's output to tmpTexBuf16.
|
||||
tmpTexBuf16.resize(std::max(bufw, w) * h * 2);
|
||||
u32 *dest32 = (u32 *) tmpTexBuf16.data();
|
||||
@ -1501,7 +1501,7 @@ void *TextureCache::DecodeTextureLevel(GETextureFormat format, GEPaletteFormat c
|
||||
}
|
||||
} else {
|
||||
tmpTexBuf32.resize(std::max(bufw, w) * h);
|
||||
UnswizzleFromMem(tmpTexBuf32.data(), texptr, bufw, h, 0);
|
||||
UnswizzleFromMem(tmpTexBuf32.data(), bufw / 2, texptr, bufw, h, 0);
|
||||
if (clutAlphaLinear_ && mipmapShareClut) {
|
||||
DeIndexTexture4Optimal(tmpTexBuf16.data(), (const u8 *)tmpTexBuf32.data(), bufw * h, clutAlphaLinearColor_);
|
||||
} else {
|
||||
@ -1521,7 +1521,7 @@ void *TextureCache::DecodeTextureLevel(GETextureFormat format, GEPaletteFormat c
|
||||
DeIndexTexture4(tmpTexBuf32.data(), texptr, bufw * h, clut);
|
||||
finalBuf = tmpTexBuf32.data();
|
||||
} else {
|
||||
UnswizzleFromMem(tmpTexBuf32.data(), texptr, bufw, h, 0);
|
||||
UnswizzleFromMem(tmpTexBuf32.data(), bufw / 2, texptr, bufw, h, 0);
|
||||
// Let's reuse tmpTexBuf16, just need double the space.
|
||||
tmpTexBuf16.resize(std::max(bufw, w) * h * 2);
|
||||
DeIndexTexture4((u32 *)tmpTexBuf16.data(), (u8 *)tmpTexBuf32.data(), bufw * h, clut);
|
||||
@ -1565,7 +1565,7 @@ void *TextureCache::DecodeTextureLevel(GETextureFormat format, GEPaletteFormat c
|
||||
ConvertColors(finalBuf, texptr, dstFmt, bufw * h);
|
||||
} else {
|
||||
tmpTexBuf32.resize(std::max(bufw, w) * h);
|
||||
UnswizzleFromMem(tmpTexBuf32.data(), texptr, bufw, h, 2);
|
||||
UnswizzleFromMem(tmpTexBuf32.data(), bufw * 2, texptr, bufw, h, 2);
|
||||
finalBuf = tmpTexBuf32.data();
|
||||
ConvertColors(finalBuf, finalBuf, dstFmt, bufw * h);
|
||||
}
|
||||
@ -1590,7 +1590,7 @@ void *TextureCache::DecodeTextureLevel(GETextureFormat format, GEPaletteFormat c
|
||||
}
|
||||
} else {
|
||||
tmpTexBuf32.resize(std::max(bufw, w) * h);
|
||||
UnswizzleFromMem(tmpTexBuf32.data(), texptr, bufw, h, 4);
|
||||
UnswizzleFromMem(tmpTexBuf32.data(), bufw * 4, texptr, bufw, h, 4);
|
||||
finalBuf = tmpTexBuf32.data();
|
||||
ConvertColors(finalBuf, finalBuf, dstFmt, bufw * h);
|
||||
}
|
||||
|
@ -407,8 +407,8 @@ bool TextureCacheVulkan::ReadIndexedTex(u8 *out, int outPitch, int level, const
|
||||
int h = gstate.getTextureHeight(level);
|
||||
|
||||
if (gstate.isTextureSwizzled()) {
|
||||
tmpTexBuf32.resize(std::max(bufw, w) * h);
|
||||
UnswizzleFromMem(tmpTexBuf32.data(), texptr, bufw, h, bytesPerIndex);
|
||||
tmpTexBuf32.resize(bufw * ((h + 7) & ~7));
|
||||
UnswizzleFromMem(tmpTexBuf32.data(), bufw * bytesPerIndex, texptr, bufw, h, bytesPerIndex);
|
||||
texptr = (u8 *)tmpTexBuf32.data();
|
||||
}
|
||||
|
||||
@ -1409,8 +1409,8 @@ bool TextureCacheVulkan::DecodeTextureLevel(u8 *out, int outPitch, GETextureForm
|
||||
const int clutSharingOffset = mipmapShareClut ? 0 : level * 16;
|
||||
|
||||
if (swizzled) {
|
||||
tmpTexBuf32.resize(std::max(bufw, w) * h);
|
||||
UnswizzleFromMem(tmpTexBuf32.data(), texptr, bufw, h, 0);
|
||||
tmpTexBuf32.resize(bufw * ((h + 7) & ~7));
|
||||
UnswizzleFromMem(tmpTexBuf32.data(), bufw / 2, texptr, bufw, h, 0);
|
||||
texptr = (u8 *)tmpTexBuf32.data();
|
||||
}
|
||||
|
||||
@ -1475,8 +1475,8 @@ bool TextureCacheVulkan::DecodeTextureLevel(u8 *out, int outPitch, GETextureForm
|
||||
memcpy(out + outPitch * y, texptr + bufw * sizeof(u16) * y, w * sizeof(u16));
|
||||
}
|
||||
} else {
|
||||
tmpTexBuf32.resize(std::max(bufw, w) * h);
|
||||
UnswizzleFromMem(tmpTexBuf32.data(), texptr, bufw, h, 2);
|
||||
tmpTexBuf32.resize(bufw * ((h + 7) & ~7));
|
||||
UnswizzleFromMem(tmpTexBuf32.data(), bufw * 2, texptr, bufw, h, 2);
|
||||
const u8 *unswizzled = (u8 *)tmpTexBuf32.data();
|
||||
for (int y = 0; y < h; ++y) {
|
||||
memcpy(out + outPitch * y, unswizzled + bufw * sizeof(u16) * y, w * sizeof(u16));
|
||||
@ -1490,8 +1490,8 @@ bool TextureCacheVulkan::DecodeTextureLevel(u8 *out, int outPitch, GETextureForm
|
||||
memcpy(out + outPitch * y, texptr + bufw * sizeof(u32) * y, w * sizeof(u32));
|
||||
}
|
||||
} else {
|
||||
tmpTexBuf32.resize(std::max(bufw, w) * h);
|
||||
UnswizzleFromMem(tmpTexBuf32.data(), texptr, bufw, h, 4);
|
||||
tmpTexBuf32.resize(bufw * ((h + 7) & ~7));
|
||||
UnswizzleFromMem(tmpTexBuf32.data(), bufw * 4, texptr, bufw, h, 4);
|
||||
const u8 *unswizzled = (u8 *)tmpTexBuf32.data();
|
||||
for (int y = 0; y < h; ++y) {
|
||||
memcpy(out + outPitch * y, unswizzled + bufw * sizeof(u32) * y, w * sizeof(u32));
|
||||
|
Loading…
x
Reference in New Issue
Block a user