Allow unswizzling with a dest pitch.

This commit is contained in:
Unknown W. Brackets 2016-03-26 21:50:49 -07:00
parent 3593a7963e
commit 1300631e9a
7 changed files with 33 additions and 31 deletions

View File

@ -364,7 +364,7 @@ void TextureCacheCommon::LoadClut(u32 clutAddr, u32 loadBytes) {
clutMaxBytes_ = std::max(clutMaxBytes_, loadBytes);
}
void TextureCacheCommon::UnswizzleFromMem(u32 *dest, const u8 *texptr, u32 bufw, u32 height, u32 bytesPerPixel) {
void TextureCacheCommon::UnswizzleFromMem(u32 *dest, u32 destPitch, const u8 *texptr, u32 bufw, u32 height, u32 bytesPerPixel) {
// Note: bufw is always aligned to 16 bytes, so rowWidth is always >= 16.
const u32 rowWidth = (bytesPerPixel > 0) ? (bufw * bytesPerPixel) : (bufw / 2);
// A visual mapping of unswizzling, where each letter is 16-byte and 8 letters is a block:
@ -381,8 +381,7 @@ void TextureCacheCommon::UnswizzleFromMem(u32 *dest, const u8 *texptr, u32 bufw,
// The height is not always aligned to 8, but rounds up.
int byc = (height + 7) / 8;
// TODO: Can change rowWidth param below (leave above) to adjust dest pitch.
DoUnswizzleTex16(texptr, dest, bxc, byc, rowWidth);
DoUnswizzleTex16(texptr, dest, bxc, byc, destPitch);
}
void *TextureCacheCommon::RearrangeBuf(void *inBuf, u32 inRowBytes, u32 outRowBytes, int h, bool allowInPlace) {

View File

@ -143,7 +143,7 @@ protected:
// Can't be unordered_map, we use lower_bound ... although for some reason that compiles on MSVC.
typedef std::map<u64, TexCacheEntry> TexCache;
void UnswizzleFromMem(u32 *dest, const u8 *texptr, u32 bufw, u32 height, u32 bytesPerPixel);
void UnswizzleFromMem(u32 *dest, u32 destPitch, const u8 *texptr, u32 bufw, u32 height, u32 bytesPerPixel);
void *RearrangeBuf(void *inBuf, u32 inRowBytes, u32 outRowBytes, int h, bool allowInPlace = true);
u32 EstimateTexMemoryUsage(const TexCacheEntry *entry);

View File

@ -118,7 +118,10 @@ u32 QuickTexHashNEON(const void *checkp, u32 size) {
return check;
}
void DoUnswizzleTex16NEON(const u8 *texptr, u32 *ydestp, int bxc, int byc, u32 pitch, u32 rowWidth) {
void DoUnswizzleTex16NEON(const u8 *texptr, u32 *ydestp, int bxc, int byc, u32 pitch) {
// ydestp is in 32-bits, so this is convenient.
const u32 pitchBy32 = pitch >> 2;
__builtin_prefetch(texptr, 0, 0);
__builtin_prefetch(ydestp, 1, 1);
@ -134,18 +137,18 @@ void DoUnswizzleTex16NEON(const u8 *texptr, u32 *ydestp, int bxc, int byc, u32 p
uint32x4_t temp3 = vld1q_u32(src + 8);
uint32x4_t temp4 = vld1q_u32(src + 12);
vst1q_u32(dest, temp1);
dest += pitch;
dest += pitchBy32;
vst1q_u32(dest, temp2);
dest += pitch;
dest += pitchBy32;
vst1q_u32(dest, temp3);
dest += pitch;
dest += pitchBy32;
vst1q_u32(dest, temp4);
dest += pitch;
dest += pitchBy32;
src += 16;
}
xdest += 4;
}
ydestp += (rowWidth * 8) / 4;
ydestp += pitchBy32 * 8;
}
}

View File

@ -18,7 +18,7 @@
#include "GPU/Common/TextureDecoder.h"
u32 QuickTexHashNEON(const void *checkp, u32 size);
void DoUnswizzleTex16NEON(const u8 *texptr, u32 *ydestp, int bxc, int byc, u32 pitch, u32 rowWidth);
void DoUnswizzleTex16NEON(const u8 *texptr, u32 *ydestp, int bxc, int byc, u32 pitch);
u32 ReliableHash32NEON(const void *input, size_t len, u32 seed);
CheckAlphaResult CheckAlphaRGBA8888NEON(const u32 *pixelData, int stride, int w, int h);

View File

@ -377,7 +377,7 @@ void *TextureCacheDX9::ReadIndexedTex(int level, const u8 *texptr, int bytesPerI
}
} else {
tmpTexBuf32.resize(std::max(bufw, w) * h);
UnswizzleFromMem(tmpTexBuf32.data(), texptr, bufw, h, bytesPerIndex);
UnswizzleFromMem(tmpTexBuf32.data(), bufw * bytesPerIndex, texptr, bufw, h, bytesPerIndex);
switch (bytesPerIndex) {
case 1:
DeIndexTexture(tmpTexBuf16.data(), (u8 *) tmpTexBuf32.data(), length, clut);
@ -417,7 +417,7 @@ void *TextureCacheDX9::ReadIndexedTex(int level, const u8 *texptr, int bytesPerI
}
buf = tmpTexBuf32.data();
} else {
UnswizzleFromMem(tmpTexBuf32.data(), texptr, bufw, h, bytesPerIndex);
UnswizzleFromMem(tmpTexBuf32.data(), bufw * bytesPerIndex, texptr, bufw, h, bytesPerIndex);
// Since we had to unswizzle to tmpTexBuf32, let's output to tmpTexBuf16.
tmpTexBuf16.resize(std::max(bufw, w) * h * 2);
u32 *dest32 = (u32 *) tmpTexBuf16.data();
@ -1366,7 +1366,7 @@ void *TextureCacheDX9::DecodeTextureLevel(GETextureFormat format, GEPaletteForma
}
} else {
tmpTexBuf32.resize(std::max(bufw, w) * h);
UnswizzleFromMem(tmpTexBuf32.data(), texptr, bufw, h, 0);
UnswizzleFromMem(tmpTexBuf32.data(), bufw / 2, texptr, bufw, h, 0);
if (clutAlphaLinear_ && mipmapShareClut) {
DeIndexTexture4OptimalRev(tmpTexBuf16.data(), (const u8 *)tmpTexBuf32.data(), bufw * h, clutAlphaLinearColor_);
} else {
@ -1386,7 +1386,7 @@ void *TextureCacheDX9::DecodeTextureLevel(GETextureFormat format, GEPaletteForma
DeIndexTexture4(tmpTexBuf32.data(), texptr, bufw * h, clut);
finalBuf = tmpTexBuf32.data();
} else {
UnswizzleFromMem(tmpTexBuf32.data(), texptr, bufw, h, 0);
UnswizzleFromMem(tmpTexBuf32.data(), bufw / 2, texptr, bufw, h, 0);
// Let's reuse tmpTexBuf16, just need double the space.
tmpTexBuf16.resize(std::max(bufw, w) * h * 2);
DeIndexTexture4((u32 *)tmpTexBuf16.data(), (u8 *)tmpTexBuf32.data(), bufw * h, clut);
@ -1431,7 +1431,7 @@ void *TextureCacheDX9::DecodeTextureLevel(GETextureFormat format, GEPaletteForma
}
else {
tmpTexBuf32.resize(std::max(bufw, w) * h);
UnswizzleFromMem(tmpTexBuf32.data(), texptr, bufw, h, 2);
UnswizzleFromMem(tmpTexBuf32.data(), bufw * 2, texptr, bufw, h, 2);
finalBuf = tmpTexBuf32.data();
}
break;
@ -1451,7 +1451,7 @@ void *TextureCacheDX9::DecodeTextureLevel(GETextureFormat format, GEPaletteForma
}
} else {
tmpTexBuf32.resize(std::max(bufw, w) * h);
UnswizzleFromMem(tmpTexBuf32.data(), texptr, bufw, h, 4);
UnswizzleFromMem(tmpTexBuf32.data(), bufw * 4, texptr, bufw, h, 4);
finalBuf = tmpTexBuf32.data();
}
break;

View File

@ -375,7 +375,7 @@ void *TextureCache::ReadIndexedTex(int level, const u8 *texptr, int bytesPerInde
}
} else {
tmpTexBuf32.resize(std::max(bufw, w) * h);
UnswizzleFromMem(tmpTexBuf32.data(), texptr, bufw, h, bytesPerIndex);
UnswizzleFromMem(tmpTexBuf32.data(), bufw * bytesPerIndex, texptr, bufw, h, bytesPerIndex);
switch (bytesPerIndex) {
case 1:
DeIndexTexture(tmpTexBuf16.data(), (u8 *) tmpTexBuf32.data(), length, clut);
@ -415,7 +415,7 @@ void *TextureCache::ReadIndexedTex(int level, const u8 *texptr, int bytesPerInde
}
buf = tmpTexBuf32.data();
} else {
UnswizzleFromMem(tmpTexBuf32.data(), texptr, bufw, h, bytesPerIndex);
UnswizzleFromMem(tmpTexBuf32.data(), bufw * bytesPerIndex, texptr, bufw, h, bytesPerIndex);
// Since we had to unswizzle to tmpTexBuf32, let's output to tmpTexBuf16.
tmpTexBuf16.resize(std::max(bufw, w) * h * 2);
u32 *dest32 = (u32 *) tmpTexBuf16.data();
@ -1501,7 +1501,7 @@ void *TextureCache::DecodeTextureLevel(GETextureFormat format, GEPaletteFormat c
}
} else {
tmpTexBuf32.resize(std::max(bufw, w) * h);
UnswizzleFromMem(tmpTexBuf32.data(), texptr, bufw, h, 0);
UnswizzleFromMem(tmpTexBuf32.data(), bufw / 2, texptr, bufw, h, 0);
if (clutAlphaLinear_ && mipmapShareClut) {
DeIndexTexture4Optimal(tmpTexBuf16.data(), (const u8 *)tmpTexBuf32.data(), bufw * h, clutAlphaLinearColor_);
} else {
@ -1521,7 +1521,7 @@ void *TextureCache::DecodeTextureLevel(GETextureFormat format, GEPaletteFormat c
DeIndexTexture4(tmpTexBuf32.data(), texptr, bufw * h, clut);
finalBuf = tmpTexBuf32.data();
} else {
UnswizzleFromMem(tmpTexBuf32.data(), texptr, bufw, h, 0);
UnswizzleFromMem(tmpTexBuf32.data(), bufw / 2, texptr, bufw, h, 0);
// Let's reuse tmpTexBuf16, just need double the space.
tmpTexBuf16.resize(std::max(bufw, w) * h * 2);
DeIndexTexture4((u32 *)tmpTexBuf16.data(), (u8 *)tmpTexBuf32.data(), bufw * h, clut);
@ -1565,7 +1565,7 @@ void *TextureCache::DecodeTextureLevel(GETextureFormat format, GEPaletteFormat c
ConvertColors(finalBuf, texptr, dstFmt, bufw * h);
} else {
tmpTexBuf32.resize(std::max(bufw, w) * h);
UnswizzleFromMem(tmpTexBuf32.data(), texptr, bufw, h, 2);
UnswizzleFromMem(tmpTexBuf32.data(), bufw * 2, texptr, bufw, h, 2);
finalBuf = tmpTexBuf32.data();
ConvertColors(finalBuf, finalBuf, dstFmt, bufw * h);
}
@ -1590,7 +1590,7 @@ void *TextureCache::DecodeTextureLevel(GETextureFormat format, GEPaletteFormat c
}
} else {
tmpTexBuf32.resize(std::max(bufw, w) * h);
UnswizzleFromMem(tmpTexBuf32.data(), texptr, bufw, h, 4);
UnswizzleFromMem(tmpTexBuf32.data(), bufw * 4, texptr, bufw, h, 4);
finalBuf = tmpTexBuf32.data();
ConvertColors(finalBuf, finalBuf, dstFmt, bufw * h);
}

View File

@ -407,8 +407,8 @@ bool TextureCacheVulkan::ReadIndexedTex(u8 *out, int outPitch, int level, const
int h = gstate.getTextureHeight(level);
if (gstate.isTextureSwizzled()) {
tmpTexBuf32.resize(std::max(bufw, w) * h);
UnswizzleFromMem(tmpTexBuf32.data(), texptr, bufw, h, bytesPerIndex);
tmpTexBuf32.resize(bufw * ((h + 7) & ~7));
UnswizzleFromMem(tmpTexBuf32.data(), bufw * bytesPerIndex, texptr, bufw, h, bytesPerIndex);
texptr = (u8 *)tmpTexBuf32.data();
}
@ -1409,8 +1409,8 @@ bool TextureCacheVulkan::DecodeTextureLevel(u8 *out, int outPitch, GETextureForm
const int clutSharingOffset = mipmapShareClut ? 0 : level * 16;
if (swizzled) {
tmpTexBuf32.resize(std::max(bufw, w) * h);
UnswizzleFromMem(tmpTexBuf32.data(), texptr, bufw, h, 0);
tmpTexBuf32.resize(bufw * ((h + 7) & ~7));
UnswizzleFromMem(tmpTexBuf32.data(), bufw / 2, texptr, bufw, h, 0);
texptr = (u8 *)tmpTexBuf32.data();
}
@ -1475,8 +1475,8 @@ bool TextureCacheVulkan::DecodeTextureLevel(u8 *out, int outPitch, GETextureForm
memcpy(out + outPitch * y, texptr + bufw * sizeof(u16) * y, w * sizeof(u16));
}
} else {
tmpTexBuf32.resize(std::max(bufw, w) * h);
UnswizzleFromMem(tmpTexBuf32.data(), texptr, bufw, h, 2);
tmpTexBuf32.resize(bufw * ((h + 7) & ~7));
UnswizzleFromMem(tmpTexBuf32.data(), bufw * 2, texptr, bufw, h, 2);
const u8 *unswizzled = (u8 *)tmpTexBuf32.data();
for (int y = 0; y < h; ++y) {
memcpy(out + outPitch * y, unswizzled + bufw * sizeof(u16) * y, w * sizeof(u16));
@ -1490,8 +1490,8 @@ bool TextureCacheVulkan::DecodeTextureLevel(u8 *out, int outPitch, GETextureForm
memcpy(out + outPitch * y, texptr + bufw * sizeof(u32) * y, w * sizeof(u32));
}
} else {
tmpTexBuf32.resize(std::max(bufw, w) * h);
UnswizzleFromMem(tmpTexBuf32.data(), texptr, bufw, h, 4);
tmpTexBuf32.resize(bufw * ((h + 7) & ~7));
UnswizzleFromMem(tmpTexBuf32.data(), bufw * 4, texptr, bufw, h, 4);
const u8 *unswizzled = (u8 *)tmpTexBuf32.data();
for (int y = 0; y < h; ++y) {
memcpy(out + outPitch * y, unswizzled + bufw * sizeof(u32) * y, w * sizeof(u32));