diff --git a/GPU/GLES/TextureCache.cpp b/GPU/GLES/TextureCache.cpp index e4f0568a33..43eaa86519 100644 --- a/GPU/GLES/TextureCache.cpp +++ b/GPU/GLES/TextureCache.cpp @@ -170,57 +170,67 @@ static void ReadClut32(u32 *clutBuf32) { } void *TextureCache::UnswizzleFromMem(u32 texaddr, u32 bytesPerPixel, u32 level) { - u32 addr = texaddr; - u32 rowWidth = (bytesPerPixel > 0) ? ((gstate.texbufwidth[level] & 0x3FF) * bytesPerPixel) : ((gstate.texbufwidth[level] & 0x3FF) / 2); - u32 pitch = rowWidth / 4; - int bxc = rowWidth / 16; + const u32 rowWidth = (bytesPerPixel > 0) ? ((gstate.texbufwidth[level] & 0x3FF) * bytesPerPixel) : ((gstate.texbufwidth[level] & 0x3FF) / 2); + const u32 pitch = rowWidth / 4; + const int bxc = rowWidth / 16; int byc = ((1 << ((gstate.texsize[level] >> 8) & 0xf)) + 7) / 8; if (byc == 0) byc = 1; u32 ydest = 0; - for (int by = 0; by < byc; by++) { - if (rowWidth >= 16) { - u32 xdest = ydest; + if (rowWidth >= 16) { + const u32 *src = (u32 *) Memory::GetPointer(texaddr); + u32 *ydest = tmpTexBuf32.data(); + for (int by = 0; by < byc; by++) { + u32 *xdest = ydest; for (int bx = 0; bx < bxc; bx++) { - u32 dest = xdest; + u32 *dest = xdest; for (int n = 0; n < 8; n++) { - for (int k = 0; k < 4; k++) { - tmpTexBuf32[dest + k] = Memory::ReadUnchecked_U32(addr); - addr += 4; - } + memcpy(dest, src, 16); dest += pitch; + src += 4; } xdest += 4; } ydest += (rowWidth * 8) / 4; - } else if (rowWidth == 8) { + } + } else if (rowWidth == 8) { + const u32 *src = (u32 *) Memory::GetPointer(texaddr); + for (int by = 0; by < byc; by++) { for (int n = 0; n < 8; n++, ydest += 2) { - tmpTexBuf32[ydest + 0] = Memory::ReadUnchecked_U32(addr + 0); - tmpTexBuf32[ydest + 1] = Memory::ReadUnchecked_U32(addr + 4); - addr += 16; // skip two u32 - } - } else if (rowWidth == 4) { - for (int n = 0; n < 8; n++, ydest++) { - tmpTexBuf32[ydest] = Memory::ReadUnchecked_U32(addr); - addr += 16; - } - } else if (rowWidth == 2) { - for (int n = 0; n < 4; n++, ydest++) { - u16 n1 = Memory::ReadUnchecked_U16(addr + 0); - u16 n2 = Memory::ReadUnchecked_U16(addr + 16); - tmpTexBuf32[ydest] = (u32)n1 | ((u32)n2 << 16); - addr += 32; + tmpTexBuf32[ydest + 0] = *src++; + tmpTexBuf32[ydest + 1] = *src++; + src += 2; // skip two u32 } } - else if (rowWidth == 1) { + } else if (rowWidth == 4) { + const u32 *src = (u32 *) Memory::GetPointer(texaddr); + for (int by = 0; by < byc; by++) { + for (int n = 0; n < 8; n++, ydest++) { + tmpTexBuf32[ydest] = *src++; + src += 3; + } + } + } else if (rowWidth == 2) { + const u16 *src = (u16 *) Memory::GetPointer(texaddr); + for (int by = 0; by < byc; by++) { + for (int n = 0; n < 4; n++, ydest++) { + u16 n1 = src[0]; + u16 n2 = src[8]; + tmpTexBuf32[ydest] = (u32)n1 | ((u32)n2 << 16); + src += 16; + } + } + } else if (rowWidth == 1) { + const u8 *src = (u8 *) Memory::GetPointer(texaddr); + for (int by = 0; by < byc; by++) { for (int n = 0; n < 2; n++, ydest++) { - // This looks wrong, shouldn't it be & 0xFF (that is no mask at all?) - u8 n1 = Memory::ReadUnchecked_U8(addr + 0) & 0xf; - u8 n2 = Memory::ReadUnchecked_U8(addr + 16) & 0xf; - u8 n3 = Memory::ReadUnchecked_U8(addr + 32) & 0xf; - u8 n4 = Memory::ReadUnchecked_U8(addr + 48) & 0xf; + u8 n1 = src[ 0]; + u8 n2 = src[16]; + u8 n3 = src[32]; + u8 n4 = src[48]; tmpTexBuf32[ydest] = (u32)n1 | ((u32)n2 << 8) | ((u32)n3 << 16) | ((u32)n4 << 24); + src += 64; } } } @@ -228,10 +238,9 @@ void *TextureCache::UnswizzleFromMem(u32 texaddr, u32 bytesPerPixel, u32 level) } template -inline void DeIndexTextureNoSwizzle(ClutT *dest, const u32 texaddr, int length, const ClutT *clut) { +inline void DeIndexTexture(ClutT *dest, const IndexT *indexed, int length, const ClutT *clut) { // Usually, there is no special offset, mask, or shift. const bool nakedIndex = (gstate.clutformat & ~3) == 0xC500FF00; - const IndexT *indexed = (const IndexT *) Memory::GetPointer(texaddr); if (nakedIndex) { for (int i = 0; i < length; ++i) { @@ -244,6 +253,12 @@ inline void DeIndexTextureNoSwizzle(ClutT *dest, const u32 texaddr, int length, } } +template +inline void DeIndexTextureNoSwizzle(ClutT *dest, const u32 texaddr, int length, const ClutT *clut) { + const IndexT *indexed = (const IndexT *) Memory::GetPointer(texaddr); + DeIndexTexture(dest, indexed, length, clut); +} + void *TextureCache::readIndexedTex(int level, u32 texaddr, int bytesPerIndex) { int length = (gstate.texbufwidth[level] & 0x3FF) * (1 << ((gstate.texsize[level] >> 8) & 0xf)); void *buf = NULL; @@ -274,29 +289,15 @@ void *TextureCache::readIndexedTex(int level, u32 texaddr, int bytesPerIndex) { UnswizzleFromMem(texaddr, bytesPerIndex, level); switch (bytesPerIndex) { case 1: - for (int i = 0, j = 0; i < length; i += 4, j++) { - u32 n = tmpTexBuf32[j]; - u32 k; - for (k = 0; k < 4; k++) { - u8 index = (n >> (k * 8)) & 0xff; - tmpTexBuf16[i + k] = clut[GetClutIndex(index)]; - } - } + DeIndexTexture(tmpTexBuf16.data(), (u8 *) tmpTexBuf32.data(), length, clut); break; case 2: - for (int i = 0, j = 0; i < length; i += 2, j++) { - u32 n = tmpTexBuf32[j]; - tmpTexBuf16[i + 0] = clut[GetClutIndex(n & 0xffff)]; - tmpTexBuf16[i + 1] = clut[GetClutIndex(n >> 16)]; - } + DeIndexTexture(tmpTexBuf16.data(), (u16 *) tmpTexBuf32.data(), length, clut); break; case 4: - for (int i = 0; i < length; i++) { - u32 n = tmpTexBuf32[i]; - tmpTexBuf16[i] = clut[GetClutIndex(n)]; - } + DeIndexTexture(tmpTexBuf16.data(), (u32 *) tmpTexBuf32.data(), length, clut); break; } } @@ -323,38 +324,31 @@ void *TextureCache::readIndexedTex(int level, u32 texaddr, int bytesPerIndex) { DeIndexTextureNoSwizzle(tmpTexBuf32.data(), texaddr, length, clutBuf32); break; } + buf = tmpTexBuf32.data(); } else { const u32 *clut = clutBuf32; UnswizzleFromMem(texaddr, bytesPerIndex, level); + // Since we had to unswizzle to tmpTexBuf32, let's output to tmpTexBuf16. + tmpTexBuf16.resize(length * 2); + u32 *dest32 = (u32 *) tmpTexBuf16.data(); switch (bytesPerIndex) { case 1: - for (int i = length - 4, j = (length / 4) - 1; i >= 0; i -= 4, j--) { - u32 n = tmpTexBuf32[j]; - u32 k; - for (k = 0; k < 4; k++) { - u32 index = (n >> (k * 8)) & 0xff; - tmpTexBuf32[i + k] = clut[GetClutIndex(index)]; - } - } + DeIndexTexture(dest32, (u8 *) tmpTexBuf32.data(), length, clut); + buf = dest32; break; case 2: - for (int i = length - 2, j = (length / 2) - 1; i >= 0; i -= 2, j--) { - u32 n = tmpTexBuf32[j]; - tmpTexBuf32[i + 0] = clut[GetClutIndex(n & 0xffff)]; - tmpTexBuf32[i + 1] = clut[GetClutIndex(n >> 16)]; - } + DeIndexTexture(dest32, (u16 *) tmpTexBuf32.data(), length, clut); + buf = dest32; break; case 4: - for (int i = 0; i < length; i++) { - u32 n = tmpTexBuf32[i]; - tmpTexBuf32[i] = clut[GetClutIndex(n)]; - } + // TODO: If a game actually uses this crazy mode, check if using dest32 or tmpTexBuf32 is faster. + DeIndexTexture(tmpTexBuf32.data(), tmpTexBuf32.data(), length, clut); + buf = tmpTexBuf32.data(); break; } } - buf = tmpTexBuf32.data(); } break;