mirror of
https://github.com/hrydgard/ppsspp.git
synced 2024-11-23 21:39:52 +00:00
Move more common code around in texcache.
This commit is contained in:
parent
256c7b6271
commit
69daa75228
@ -21,6 +21,7 @@
|
||||
#include "Core/Reporting.h"
|
||||
#include "GPU/Common/FramebufferCommon.h"
|
||||
#include "GPU/Common/TextureCacheCommon.h"
|
||||
#include "GPU/Common/TextureDecoder.h"
|
||||
#include "GPU/Common/ShaderId.h"
|
||||
#include "GPU/Common/GPUStateUtils.h"
|
||||
#include "GPU/GPUState.h"
|
||||
@ -39,6 +40,11 @@ TextureCacheCommon::TextureCacheCommon()
|
||||
// Zap so we get consistent behavior if the game fails to load some of the CLUT.
|
||||
memset(clutBufRaw_, 0, 1024 * sizeof(u32));
|
||||
memset(clutBufConverted_, 0, 1024 * sizeof(u32));
|
||||
|
||||
// This is 5MB of temporary storage. Might be possible to shrink it.
|
||||
tmpTexBuf32.resize(1024 * 512); // 2MB
|
||||
tmpTexBuf16.resize(1024 * 512); // 1MB
|
||||
tmpTexBufRearrange.resize(1024 * 512); // 2MB
|
||||
}
|
||||
|
||||
TextureCacheCommon::~TextureCacheCommon() {
|
||||
@ -213,3 +219,76 @@ void TextureCacheCommon::LoadClut(u32 clutAddr, u32 loadBytes) {
|
||||
clutLastFormat_ = 0xFFFFFFFF;
|
||||
clutMaxBytes_ = std::max(clutMaxBytes_, loadBytes);
|
||||
}
|
||||
|
||||
void *TextureCacheCommon::UnswizzleFromMem(const u8 *texptr, u32 bufw, u32 height, u32 bytesPerPixel) {
|
||||
const u32 rowWidth = (bytesPerPixel > 0) ? (bufw * bytesPerPixel) : (bufw / 2);
|
||||
const u32 pitch = rowWidth / 4;
|
||||
const int bxc = rowWidth / 16;
|
||||
int byc = (height + 7) / 8;
|
||||
if (byc == 0)
|
||||
byc = 1;
|
||||
|
||||
u32 ydest = 0;
|
||||
if (rowWidth >= 16) {
|
||||
u32 *ydestp = tmpTexBuf32.data();
|
||||
// The most common one, so it gets an optimized implementation.
|
||||
DoUnswizzleTex16(texptr, ydestp, bxc, byc, pitch, rowWidth);
|
||||
} else if (rowWidth == 8) {
|
||||
const u32 *src = (const u32 *) texptr;
|
||||
for (int by = 0; by < byc; by++) {
|
||||
for (int n = 0; n < 8; n++, ydest += 2) {
|
||||
tmpTexBuf32[ydest + 0] = *src++;
|
||||
tmpTexBuf32[ydest + 1] = *src++;
|
||||
src += 2; // skip two u32
|
||||
}
|
||||
}
|
||||
} else if (rowWidth == 4) {
|
||||
const u32 *src = (const u32 *) texptr;
|
||||
for (int by = 0; by < byc; by++) {
|
||||
for (int n = 0; n < 8; n++, ydest++) {
|
||||
tmpTexBuf32[ydest] = *src++;
|
||||
src += 3;
|
||||
}
|
||||
}
|
||||
} else if (rowWidth == 2) {
|
||||
const u16 *src = (const u16 *) texptr;
|
||||
for (int by = 0; by < byc; by++) {
|
||||
for (int n = 0; n < 4; n++, ydest++) {
|
||||
u16 n1 = src[0];
|
||||
u16 n2 = src[8];
|
||||
tmpTexBuf32[ydest] = (u32)n1 | ((u32)n2 << 16);
|
||||
src += 16;
|
||||
}
|
||||
}
|
||||
} else if (rowWidth == 1) {
|
||||
const u8 *src = (const u8 *) texptr;
|
||||
for (int by = 0; by < byc; by++) {
|
||||
for (int n = 0; n < 2; n++, ydest++) {
|
||||
u8 n1 = src[ 0];
|
||||
u8 n2 = src[16];
|
||||
u8 n3 = src[32];
|
||||
u8 n4 = src[48];
|
||||
tmpTexBuf32[ydest] = (u32)n1 | ((u32)n2 << 8) | ((u32)n3 << 16) | ((u32)n4 << 24);
|
||||
src += 64;
|
||||
}
|
||||
}
|
||||
}
|
||||
return tmpTexBuf32.data();
|
||||
}
|
||||
|
||||
void *TextureCacheCommon::RearrangeBuf(void *inBuf, u32 inRowBytes, u32 outRowBytes, int h, bool allowInPlace) {
|
||||
const u8 *read = (const u8 *)inBuf;
|
||||
void *outBuf = inBuf;
|
||||
u8 *write = (u8 *)inBuf;
|
||||
if (outRowBytes > inRowBytes || !allowInPlace) {
|
||||
write = (u8 *)tmpTexBufRearrange.data();
|
||||
outBuf = tmpTexBufRearrange.data();
|
||||
}
|
||||
for (int y = 0; y < h; y++) {
|
||||
memmove(write, read, outRowBytes);
|
||||
read += inRowBytes;
|
||||
write += outRowBytes;
|
||||
}
|
||||
|
||||
return outBuf;
|
||||
}
|
||||
|
@ -130,6 +130,9 @@ protected:
|
||||
// Can't be unordered_map, we use lower_bound ... although for some reason that compiles on MSVC.
|
||||
typedef std::map<u64, TexCacheEntry> TexCache;
|
||||
|
||||
void *UnswizzleFromMem(const u8 *texptr, u32 bufw, u32 height, u32 bytesPerPixel);
|
||||
void *RearrangeBuf(void *inBuf, u32 inRowBytes, u32 outRowBytes, int h, bool allowInPlace = true);
|
||||
|
||||
void GetSamplingParams(int &minFilt, int &magFilt, bool &sClamp, bool &tClamp, float &lodBias, u8 maxLevel);
|
||||
|
||||
virtual bool AttachFramebuffer(TexCacheEntry *entry, u32 address, VirtualFramebuffer *framebuffer, u32 texaddrOffset = 0) = 0;
|
||||
@ -138,6 +141,10 @@ protected:
|
||||
TexCache cache;
|
||||
std::vector<VirtualFramebuffer *> fbCache_;
|
||||
|
||||
SimpleBuf<u32> tmpTexBuf32;
|
||||
SimpleBuf<u16> tmpTexBuf16;
|
||||
SimpleBuf<u32> tmpTexBufRearrange;
|
||||
|
||||
TexCacheEntry *nextTexture_;
|
||||
|
||||
// Raw is where we keep the original bytes. Converted is where we swap colors if necessary.
|
||||
|
@ -28,6 +28,7 @@ enum CheckAlphaResult {
|
||||
#include "Core/MemMap.h"
|
||||
#include "GPU/ge_constants.h"
|
||||
#include "GPU/Common/TextureDecoderNEON.h"
|
||||
#include "GPU/GPUState.h"
|
||||
|
||||
void SetupTextureDecoder();
|
||||
|
||||
|
@ -67,10 +67,6 @@ TextureCacheDX9::TextureCacheDX9() : cacheSizeEstimate_(0), secondCacheSizeEstim
|
||||
timesInvalidatedAllThisFrame_ = 0;
|
||||
lastBoundTexture = INVALID_TEX;
|
||||
decimationCounter_ = TEXCACHE_DECIMATION_INTERVAL;
|
||||
// This is 5MB of temporary storage. Might be possible to shrink it.
|
||||
tmpTexBuf32.resize(1024 * 512); // 2MB
|
||||
tmpTexBuf16.resize(1024 * 512); // 1MB
|
||||
tmpTexBufRearrange.resize(1024 * 512); // 2MB
|
||||
|
||||
D3DCAPS9 pCaps;
|
||||
ZeroMemory(&pCaps, sizeof(pCaps));
|
||||
@ -432,62 +428,6 @@ inline void TextureCacheDX9::DetachFramebuffer(TexCacheEntry *entry, u32 address
|
||||
}
|
||||
}
|
||||
|
||||
void *TextureCacheDX9::UnswizzleFromMem(const u8 *texptr, u32 bufw, u32 height, u32 bytesPerPixel) {
|
||||
const u32 rowWidth = (bytesPerPixel > 0) ? (bufw * bytesPerPixel) : (bufw / 2);
|
||||
const u32 pitch = rowWidth / 4;
|
||||
const int bxc = rowWidth / 16;
|
||||
int byc = (height + 7) / 8;
|
||||
if (byc == 0)
|
||||
byc = 1;
|
||||
|
||||
u32 ydest = 0;
|
||||
if (rowWidth >= 16) {
|
||||
u32 *ydestp = tmpTexBuf32.data();
|
||||
// The most common one, so it gets an optimized implementation.
|
||||
DoUnswizzleTex16(texptr, ydestp, bxc, byc, pitch, rowWidth);
|
||||
} else if (rowWidth == 8) {
|
||||
const u32 *src = (const u32 *) texptr;
|
||||
for (int by = 0; by < byc; by++) {
|
||||
for (int n = 0; n < 8; n++, ydest += 2) {
|
||||
tmpTexBuf32[ydest + 0] = *src++;
|
||||
tmpTexBuf32[ydest + 1] = *src++;
|
||||
src += 2; // skip two u32
|
||||
}
|
||||
}
|
||||
} else if (rowWidth == 4) {
|
||||
const u32 *src = (const u32 *) texptr;
|
||||
for (int by = 0; by < byc; by++) {
|
||||
for (int n = 0; n < 8; n++, ydest++) {
|
||||
tmpTexBuf32[ydest] = *src++;
|
||||
src += 3;
|
||||
}
|
||||
}
|
||||
} else if (rowWidth == 2) {
|
||||
const u16 *src = (const u16 *) texptr;
|
||||
for (int by = 0; by < byc; by++) {
|
||||
for (int n = 0; n < 4; n++, ydest++) {
|
||||
u16 n1 = src[0];
|
||||
u16 n2 = src[8];
|
||||
tmpTexBuf32[ydest] = (u32)n1 | ((u32)n2 << 16);
|
||||
src += 16;
|
||||
}
|
||||
}
|
||||
} else if (rowWidth == 1) {
|
||||
const u8 *src = (const u8 *) texptr;
|
||||
for (int by = 0; by < byc; by++) {
|
||||
for (int n = 0; n < 2; n++, ydest++) {
|
||||
u8 n1 = src[ 0];
|
||||
u8 n2 = src[16];
|
||||
u8 n3 = src[32];
|
||||
u8 n4 = src[48];
|
||||
tmpTexBuf32[ydest] = (u32)n1 | ((u32)n2 << 8) | ((u32)n3 << 16) | ((u32)n4 << 24);
|
||||
src += 64;
|
||||
}
|
||||
}
|
||||
}
|
||||
return tmpTexBuf32.data();
|
||||
}
|
||||
|
||||
void *TextureCacheDX9::ReadIndexedTex(int level, const u8 *texptr, int bytesPerIndex, u32 dstFmt, int bufw) {
|
||||
int w = gstate.getTextureWidth(level);
|
||||
int h = gstate.getTextureHeight(level);
|
||||
@ -1650,7 +1590,7 @@ void *TextureCacheDX9::DecodeTextureLevel(GETextureFormat format, GEPaletteForma
|
||||
ERROR_LOG_REPORT(G3D, "NO finalbuf! Will crash!");
|
||||
}
|
||||
|
||||
if (w != bufw) {
|
||||
if (!(g_Config.iTexScalingLevel == 1 && gstate_c.Supports(GPU_SUPPORTS_UNPACK_SUBIMAGE)) && w != bufw) {
|
||||
int pixelSize;
|
||||
switch (dstFmt) {
|
||||
case D3DFMT_A4R4G4B4:
|
||||
@ -1663,21 +1603,7 @@ void *TextureCacheDX9::DecodeTextureLevel(GETextureFormat format, GEPaletteForma
|
||||
break;
|
||||
}
|
||||
// Need to rearrange the buffer to simulate GL_UNPACK_ROW_LENGTH etc.
|
||||
int inRowBytes = bufw * pixelSize;
|
||||
int outRowBytes = w * pixelSize;
|
||||
const u8 *read = (const u8 *)finalBuf;
|
||||
u8 *write = 0;
|
||||
if (w > bufw) {
|
||||
write = (u8 *)tmpTexBufRearrange.data();
|
||||
finalBuf = tmpTexBufRearrange.data();
|
||||
} else {
|
||||
write = (u8 *)finalBuf;
|
||||
}
|
||||
for (int y = 0; y < h; y++) {
|
||||
memmove(write, read, outRowBytes);
|
||||
read += inRowBytes;
|
||||
write += outRowBytes;
|
||||
}
|
||||
finalBuf = RearrangeBuf(finalBuf, bufw * pixelSize, w * pixelSize, h);
|
||||
}
|
||||
|
||||
return finalBuf;
|
||||
|
@ -75,7 +75,6 @@ public:
|
||||
private:
|
||||
void Decimate(); // Run this once per frame to get rid of old textures.
|
||||
void DeleteTexture(TexCache::iterator it);
|
||||
void *UnswizzleFromMem(const u8 *texptr, u32 bufw, u32 height, u32 bytesPerPixel);
|
||||
void *ReadIndexedTex(int level, const u8 *texptr, int bytesPerIndex, u32 dstFmt, int bufw);
|
||||
void UpdateSamplingParams(TexCacheEntry &entry, bool force);
|
||||
void LoadTextureLevel(TexCacheEntry &entry, int level, int maxLevel, bool replaceImages, int scaleFactor, u32 dstFmt);
|
||||
@ -119,11 +118,6 @@ private:
|
||||
bool lowMemoryMode_;
|
||||
TextureScalerDX9 scaler;
|
||||
|
||||
SimpleBuf<u32> tmpTexBuf32;
|
||||
SimpleBuf<u16> tmpTexBuf16;
|
||||
|
||||
SimpleBuf<u32> tmpTexBufRearrange;
|
||||
|
||||
u32 *clutBuf_;
|
||||
u32 clutHash_;
|
||||
// True if the clut is just alpha values in the same order (RGBA4444-bit only.)
|
||||
|
@ -78,10 +78,6 @@ TextureCache::TextureCache() : cacheSizeEstimate_(0), secondCacheSizeEstimate_(0
|
||||
timesInvalidatedAllThisFrame_ = 0;
|
||||
lastBoundTexture = INVALID_TEX;
|
||||
decimationCounter_ = TEXCACHE_DECIMATION_INTERVAL;
|
||||
// This is 5MB of temporary storage. Might be possible to shrink it.
|
||||
tmpTexBuf32.resize(1024 * 512); // 2MB
|
||||
tmpTexBuf16.resize(1024 * 512); // 1MB
|
||||
tmpTexBufRearrange.resize(1024 * 512); // 2MB
|
||||
|
||||
glGetFloatv(GL_MAX_TEXTURE_MAX_ANISOTROPY_EXT, &maxAnisotropyLevel);
|
||||
SetupTextureDecoder();
|
||||
@ -432,62 +428,6 @@ inline void TextureCache::DetachFramebuffer(TexCacheEntry *entry, u32 address, V
|
||||
}
|
||||
}
|
||||
|
||||
void *TextureCache::UnswizzleFromMem(const u8 *texptr, u32 bufw, u32 height, u32 bytesPerPixel) {
|
||||
const u32 rowWidth = (bytesPerPixel > 0) ? (bufw * bytesPerPixel) : (bufw / 2);
|
||||
const u32 pitch = rowWidth / 4;
|
||||
const int bxc = rowWidth / 16;
|
||||
int byc = (height + 7) / 8;
|
||||
if (byc == 0)
|
||||
byc = 1;
|
||||
|
||||
u32 ydest = 0;
|
||||
if (rowWidth >= 16) {
|
||||
u32 *ydestp = tmpTexBuf32.data();
|
||||
// The most common one, so it gets an optimized implementation.
|
||||
DoUnswizzleTex16(texptr, ydestp, bxc, byc, pitch, rowWidth);
|
||||
} else if (rowWidth == 8) {
|
||||
const u32 *src = (const u32 *) texptr;
|
||||
for (int by = 0; by < byc; by++) {
|
||||
for (int n = 0; n < 8; n++, ydest += 2) {
|
||||
tmpTexBuf32[ydest + 0] = *src++;
|
||||
tmpTexBuf32[ydest + 1] = *src++;
|
||||
src += 2; // skip two u32
|
||||
}
|
||||
}
|
||||
} else if (rowWidth == 4) {
|
||||
const u32 *src = (const u32 *) texptr;
|
||||
for (int by = 0; by < byc; by++) {
|
||||
for (int n = 0; n < 8; n++, ydest++) {
|
||||
tmpTexBuf32[ydest] = *src++;
|
||||
src += 3;
|
||||
}
|
||||
}
|
||||
} else if (rowWidth == 2) {
|
||||
const u16 *src = (const u16 *) texptr;
|
||||
for (int by = 0; by < byc; by++) {
|
||||
for (int n = 0; n < 4; n++, ydest++) {
|
||||
u16 n1 = src[0];
|
||||
u16 n2 = src[8];
|
||||
tmpTexBuf32[ydest] = (u32)n1 | ((u32)n2 << 16);
|
||||
src += 16;
|
||||
}
|
||||
}
|
||||
} else if (rowWidth == 1) {
|
||||
const u8 *src = (const u8 *) texptr;
|
||||
for (int by = 0; by < byc; by++) {
|
||||
for (int n = 0; n < 2; n++, ydest++) {
|
||||
u8 n1 = src[ 0];
|
||||
u8 n2 = src[16];
|
||||
u8 n3 = src[32];
|
||||
u8 n4 = src[48];
|
||||
tmpTexBuf32[ydest] = (u32)n1 | ((u32)n2 << 8) | ((u32)n3 << 16) | ((u32)n4 << 24);
|
||||
src += 64;
|
||||
}
|
||||
}
|
||||
}
|
||||
return tmpTexBuf32.data();
|
||||
}
|
||||
|
||||
void *TextureCache::ReadIndexedTex(int level, const u8 *texptr, int bytesPerIndex, GLuint dstFmt, int bufw) {
|
||||
int w = gstate.getTextureWidth(level);
|
||||
int h = gstate.getTextureHeight(level);
|
||||
@ -1820,21 +1760,7 @@ void *TextureCache::DecodeTextureLevel(GETextureFormat format, GEPaletteFormat c
|
||||
}
|
||||
|
||||
// Need to rearrange the buffer to simulate GL_UNPACK_ROW_LENGTH etc.
|
||||
int inRowBytes = bufw * pixelSize;
|
||||
int outRowBytes = w * pixelSize;
|
||||
const u8 *read = (const u8 *)finalBuf;
|
||||
u8 *write = 0;
|
||||
if (w > bufw) {
|
||||
write = (u8 *)tmpTexBufRearrange.data();
|
||||
finalBuf = tmpTexBufRearrange.data();
|
||||
} else {
|
||||
write = (u8 *)finalBuf;
|
||||
}
|
||||
for (int y = 0; y < h; y++) {
|
||||
memmove(write, read, outRowBytes);
|
||||
read += inRowBytes;
|
||||
write += outRowBytes;
|
||||
}
|
||||
finalBuf = RearrangeBuf(finalBuf, bufw * pixelSize, w * pixelSize, h);
|
||||
}
|
||||
|
||||
return finalBuf;
|
||||
|
@ -90,7 +90,6 @@ public:
|
||||
private:
|
||||
void Decimate(); // Run this once per frame to get rid of old textures.
|
||||
void DeleteTexture(TexCache::iterator it);
|
||||
void *UnswizzleFromMem(const u8 *texptr, u32 bufw, u32 height, u32 bytesPerPixel);
|
||||
void *ReadIndexedTex(int level, const u8 *texptr, int bytesPerIndex, GLuint dstFmt, int bufw);
|
||||
void UpdateSamplingParams(TexCacheEntry &entry, bool force);
|
||||
void LoadTextureLevel(TexCacheEntry &entry, int level, bool replaceImages, int scaleFactor, GLenum dstFmt);
|
||||
@ -125,11 +124,6 @@ private:
|
||||
|
||||
TextureScalerGL scaler;
|
||||
|
||||
SimpleBuf<u32> tmpTexBuf32;
|
||||
SimpleBuf<u16> tmpTexBuf16;
|
||||
|
||||
SimpleBuf<u32> tmpTexBufRearrange;
|
||||
|
||||
u32 *clutBuf_;
|
||||
u32 clutHash_;
|
||||
// True if the clut is just alpha values in the same order (RGBA4444-bit only.)
|
||||
|
Loading…
Reference in New Issue
Block a user