mirror of
https://github.com/hrydgard/ppsspp.git
synced 2024-12-02 10:36:22 +00:00
Merge pull request #8348 from unknownbrackets/texcache
Centralize code in the texture cache, minor cleanups
This commit is contained in:
commit
8be22f47cd
@ -15,17 +15,42 @@
|
||||
// Official git repository and contact information can be found at
|
||||
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
|
||||
|
||||
#include <algorithm>
|
||||
#include "Common/MemoryUtil.h"
|
||||
#include "Core/Config.h"
|
||||
#include "Core/Reporting.h"
|
||||
#include "GPU/Common/FramebufferCommon.h"
|
||||
#include "GPU/Common/TextureCacheCommon.h"
|
||||
#include "GPU/Common/TextureDecoder.h"
|
||||
#include "GPU/Common/ShaderId.h"
|
||||
#include "GPU/Common/GPUStateUtils.h"
|
||||
#include "GPU/GPUState.h"
|
||||
#include "GPU/GPUInterface.h"
|
||||
|
||||
// Ugly.
|
||||
extern int g_iNumVideos;
|
||||
|
||||
TextureCacheCommon::~TextureCacheCommon() {}
|
||||
TextureCacheCommon::TextureCacheCommon()
|
||||
: nextTexture_(nullptr),
|
||||
clutLastFormat_(0xFFFFFFFF), clutTotalBytes_(0), clutMaxBytes_(0), clutRenderAddress_(0xFFFFFFFF) {
|
||||
// TODO: Clamp down to 256/1KB? Need to check mipmapShareClut and clamp loadclut.
|
||||
clutBufRaw_ = (u32 *)AllocateAlignedMemory(1024 * sizeof(u32), 16); // 4KB
|
||||
clutBufConverted_ = (u32 *)AllocateAlignedMemory(1024 * sizeof(u32), 16); // 4KB
|
||||
|
||||
// Zap so we get consistent behavior if the game fails to load some of the CLUT.
|
||||
memset(clutBufRaw_, 0, 1024 * sizeof(u32));
|
||||
memset(clutBufConverted_, 0, 1024 * sizeof(u32));
|
||||
|
||||
// This is 5MB of temporary storage. Might be possible to shrink it.
|
||||
tmpTexBuf32.resize(1024 * 512); // 2MB
|
||||
tmpTexBuf16.resize(1024 * 512); // 1MB
|
||||
tmpTexBufRearrange.resize(1024 * 512); // 2MB
|
||||
}
|
||||
|
||||
TextureCacheCommon::~TextureCacheCommon() {
|
||||
FreeAlignedMemory(clutBufConverted_);
|
||||
FreeAlignedMemory(clutBufRaw_);
|
||||
}
|
||||
|
||||
bool TextureCacheCommon::SetOffsetTexture(u32 offset) {
|
||||
return false;
|
||||
@ -88,3 +113,182 @@ void TextureCacheCommon::GetSamplingParams(int &minFilt, int &magFilt, bool &sCl
|
||||
minFilt &= 1;
|
||||
}
|
||||
}
|
||||
|
||||
void TextureCacheCommon::NotifyFramebuffer(u32 address, VirtualFramebuffer *framebuffer, FramebufferNotification msg) {
|
||||
// Must be in VRAM so | 0x04000000 it is. Also, ignore memory mirrors.
|
||||
// These checks are mainly to reduce scanning all textures.
|
||||
const u32 addr = (address | 0x04000000) & 0x3F9FFFFF;
|
||||
const u32 bpp = framebuffer->format == GE_FORMAT_8888 ? 4 : 2;
|
||||
const u64 cacheKey = (u64)addr << 32;
|
||||
// If it has a clut, those are the low 32 bits, so it'll be inside this range.
|
||||
// Also, if it's a subsample of the buffer, it'll also be within the FBO.
|
||||
const u64 cacheKeyEnd = cacheKey + ((u64)(framebuffer->fb_stride * framebuffer->height * bpp) << 32);
|
||||
|
||||
// The first mirror starts at 0x04200000 and there are 3. We search all for framebuffers.
|
||||
const u64 mirrorCacheKey = (u64)0x04200000 << 32;
|
||||
const u64 mirrorCacheKeyEnd = (u64)0x04800000 << 32;
|
||||
|
||||
switch (msg) {
|
||||
case NOTIFY_FB_CREATED:
|
||||
case NOTIFY_FB_UPDATED:
|
||||
// Ensure it's in the framebuffer cache.
|
||||
if (std::find(fbCache_.begin(), fbCache_.end(), framebuffer) == fbCache_.end()) {
|
||||
fbCache_.push_back(framebuffer);
|
||||
}
|
||||
for (auto it = cache.lower_bound(cacheKey), end = cache.upper_bound(cacheKeyEnd); it != end; ++it) {
|
||||
AttachFramebuffer(&it->second, addr, framebuffer);
|
||||
}
|
||||
// Let's assume anything in mirrors is fair game to check.
|
||||
for (auto it = cache.lower_bound(mirrorCacheKey), end = cache.upper_bound(mirrorCacheKeyEnd); it != end; ++it) {
|
||||
const u64 mirrorlessKey = it->first & ~0x0060000000000000ULL;
|
||||
// Let's still make sure it's in the cache range.
|
||||
if (mirrorlessKey >= cacheKey && mirrorlessKey <= cacheKeyEnd) {
|
||||
AttachFramebuffer(&it->second, addr, framebuffer);
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case NOTIFY_FB_DESTROYED:
|
||||
fbCache_.erase(std::remove(fbCache_.begin(), fbCache_.end(), framebuffer), fbCache_.end());
|
||||
for (auto it = cache.lower_bound(cacheKey), end = cache.upper_bound(cacheKeyEnd); it != end; ++it) {
|
||||
DetachFramebuffer(&it->second, addr, framebuffer);
|
||||
}
|
||||
for (auto it = cache.lower_bound(mirrorCacheKey), end = cache.upper_bound(mirrorCacheKeyEnd); it != end; ++it) {
|
||||
const u64 mirrorlessKey = it->first & ~0x0060000000000000ULL;
|
||||
// Let's still make sure it's in the cache range.
|
||||
if (mirrorlessKey >= cacheKey && mirrorlessKey <= cacheKeyEnd) {
|
||||
DetachFramebuffer(&it->second, addr, framebuffer);
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void TextureCacheCommon::LoadClut(u32 clutAddr, u32 loadBytes) {
|
||||
clutTotalBytes_ = loadBytes;
|
||||
clutRenderAddress_ = 0xFFFFFFFF;
|
||||
|
||||
if (Memory::IsValidAddress(clutAddr)) {
|
||||
if (Memory::IsVRAMAddress(clutAddr)) {
|
||||
// Clear the uncached bit, etc. to match framebuffers.
|
||||
const u32 clutFramebufAddr = clutAddr & 0x3FFFFFFF;
|
||||
|
||||
for (size_t i = 0, n = fbCache_.size(); i < n; ++i) {
|
||||
auto framebuffer = fbCache_[i];
|
||||
if ((framebuffer->fb_address | 0x04000000) == clutFramebufAddr) {
|
||||
framebuffer->last_frame_clut = gpuStats.numFlips;
|
||||
framebuffer->usageFlags |= FB_USAGE_CLUT;
|
||||
clutRenderAddress_ = framebuffer->fb_address;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// It's possible for a game to (successfully) access outside valid memory.
|
||||
u32 bytes = Memory::ValidSize(clutAddr, loadBytes);
|
||||
if (clutRenderAddress_ != 0xFFFFFFFF && !g_Config.bDisableSlowFramebufEffects) {
|
||||
gpu->PerformMemoryDownload(clutAddr, bytes);
|
||||
}
|
||||
|
||||
#ifdef _M_SSE
|
||||
int numBlocks = bytes / 16;
|
||||
if (bytes == loadBytes) {
|
||||
const __m128i *source = (const __m128i *)Memory::GetPointerUnchecked(clutAddr);
|
||||
__m128i *dest = (__m128i *)clutBufRaw_;
|
||||
for (int i = 0; i < numBlocks; i++, source += 2, dest += 2) {
|
||||
__m128i data1 = _mm_loadu_si128(source);
|
||||
__m128i data2 = _mm_loadu_si128(source + 1);
|
||||
_mm_store_si128(dest, data1);
|
||||
_mm_store_si128(dest + 1, data2);
|
||||
}
|
||||
} else {
|
||||
Memory::MemcpyUnchecked(clutBufRaw_, clutAddr, bytes);
|
||||
if (bytes < loadBytes) {
|
||||
memset((u8 *)clutBufRaw_ + bytes, 0x00, loadBytes - bytes);
|
||||
}
|
||||
}
|
||||
#else
|
||||
Memory::MemcpyUnchecked(clutBufRaw_, clutAddr, bytes);
|
||||
if (bytes < clutTotalBytes_) {
|
||||
memset((u8 *)clutBufRaw_ + bytes, 0x00, clutTotalBytes_ - bytes);
|
||||
}
|
||||
#endif
|
||||
} else {
|
||||
memset(clutBufRaw_, 0x00, loadBytes);
|
||||
}
|
||||
// Reload the clut next time.
|
||||
clutLastFormat_ = 0xFFFFFFFF;
|
||||
clutMaxBytes_ = std::max(clutMaxBytes_, loadBytes);
|
||||
}
|
||||
|
||||
void *TextureCacheCommon::UnswizzleFromMem(const u8 *texptr, u32 bufw, u32 height, u32 bytesPerPixel) {
|
||||
const u32 rowWidth = (bytesPerPixel > 0) ? (bufw * bytesPerPixel) : (bufw / 2);
|
||||
const u32 pitch = rowWidth / 4;
|
||||
const int bxc = rowWidth / 16;
|
||||
int byc = (height + 7) / 8;
|
||||
if (byc == 0)
|
||||
byc = 1;
|
||||
|
||||
u32 ydest = 0;
|
||||
if (rowWidth >= 16) {
|
||||
u32 *ydestp = tmpTexBuf32.data();
|
||||
// The most common one, so it gets an optimized implementation.
|
||||
DoUnswizzleTex16(texptr, ydestp, bxc, byc, pitch, rowWidth);
|
||||
} else if (rowWidth == 8) {
|
||||
const u32 *src = (const u32 *) texptr;
|
||||
for (int by = 0; by < byc; by++) {
|
||||
for (int n = 0; n < 8; n++, ydest += 2) {
|
||||
tmpTexBuf32[ydest + 0] = *src++;
|
||||
tmpTexBuf32[ydest + 1] = *src++;
|
||||
src += 2; // skip two u32
|
||||
}
|
||||
}
|
||||
} else if (rowWidth == 4) {
|
||||
const u32 *src = (const u32 *) texptr;
|
||||
for (int by = 0; by < byc; by++) {
|
||||
for (int n = 0; n < 8; n++, ydest++) {
|
||||
tmpTexBuf32[ydest] = *src++;
|
||||
src += 3;
|
||||
}
|
||||
}
|
||||
} else if (rowWidth == 2) {
|
||||
const u16 *src = (const u16 *) texptr;
|
||||
for (int by = 0; by < byc; by++) {
|
||||
for (int n = 0; n < 4; n++, ydest++) {
|
||||
u16 n1 = src[0];
|
||||
u16 n2 = src[8];
|
||||
tmpTexBuf32[ydest] = (u32)n1 | ((u32)n2 << 16);
|
||||
src += 16;
|
||||
}
|
||||
}
|
||||
} else if (rowWidth == 1) {
|
||||
const u8 *src = (const u8 *) texptr;
|
||||
for (int by = 0; by < byc; by++) {
|
||||
for (int n = 0; n < 2; n++, ydest++) {
|
||||
u8 n1 = src[ 0];
|
||||
u8 n2 = src[16];
|
||||
u8 n3 = src[32];
|
||||
u8 n4 = src[48];
|
||||
tmpTexBuf32[ydest] = (u32)n1 | ((u32)n2 << 8) | ((u32)n3 << 16) | ((u32)n4 << 24);
|
||||
src += 64;
|
||||
}
|
||||
}
|
||||
}
|
||||
return tmpTexBuf32.data();
|
||||
}
|
||||
|
||||
void *TextureCacheCommon::RearrangeBuf(void *inBuf, u32 inRowBytes, u32 outRowBytes, int h, bool allowInPlace) {
|
||||
const u8 *read = (const u8 *)inBuf;
|
||||
void *outBuf = inBuf;
|
||||
u8 *write = (u8 *)inBuf;
|
||||
if (outRowBytes > inRowBytes || !allowInPlace) {
|
||||
write = (u8 *)tmpTexBufRearrange.data();
|
||||
outBuf = tmpTexBufRearrange.data();
|
||||
}
|
||||
for (int y = 0; y < h; y++) {
|
||||
memmove(write, read, outRowBytes);
|
||||
read += inRowBytes;
|
||||
write += outRowBytes;
|
||||
}
|
||||
|
||||
return outBuf;
|
||||
}
|
||||
|
@ -26,14 +26,26 @@ enum TextureFiltering {
|
||||
TEX_FILTER_LINEAR_VIDEO = 4,
|
||||
};
|
||||
|
||||
enum FramebufferNotification {
|
||||
NOTIFY_FB_CREATED,
|
||||
NOTIFY_FB_UPDATED,
|
||||
NOTIFY_FB_DESTROYED,
|
||||
};
|
||||
|
||||
struct VirtualFramebuffer;
|
||||
|
||||
class TextureCacheCommon {
|
||||
public:
|
||||
TextureCacheCommon();
|
||||
virtual ~TextureCacheCommon();
|
||||
|
||||
void LoadClut(u32 clutAddr, u32 loadBytes);
|
||||
|
||||
virtual bool SetOffsetTexture(u32 offset);
|
||||
|
||||
// FramebufferManager keeps TextureCache updated about what regions of memory are being rendered to.
|
||||
void NotifyFramebuffer(u32 address, VirtualFramebuffer *framebuffer, FramebufferNotification msg);
|
||||
|
||||
int AttachedDrawingHeight();
|
||||
|
||||
// Wow this is starting to grow big. Soon need to start looking at resizing it.
|
||||
@ -115,9 +127,33 @@ public:
|
||||
};
|
||||
|
||||
protected:
|
||||
// Can't be unordered_map, we use lower_bound ... although for some reason that compiles on MSVC.
|
||||
typedef std::map<u64, TexCacheEntry> TexCache;
|
||||
|
||||
void *UnswizzleFromMem(const u8 *texptr, u32 bufw, u32 height, u32 bytesPerPixel);
|
||||
void *RearrangeBuf(void *inBuf, u32 inRowBytes, u32 outRowBytes, int h, bool allowInPlace = true);
|
||||
|
||||
void GetSamplingParams(int &minFilt, int &magFilt, bool &sClamp, bool &tClamp, float &lodBias, u8 maxLevel);
|
||||
|
||||
virtual bool AttachFramebuffer(TexCacheEntry *entry, u32 address, VirtualFramebuffer *framebuffer, u32 texaddrOffset = 0) = 0;
|
||||
virtual void DetachFramebuffer(TexCacheEntry *entry, u32 address, VirtualFramebuffer *framebuffer) = 0;
|
||||
|
||||
TexCache cache;
|
||||
std::vector<VirtualFramebuffer *> fbCache_;
|
||||
|
||||
SimpleBuf<u32> tmpTexBuf32;
|
||||
SimpleBuf<u16> tmpTexBuf16;
|
||||
SimpleBuf<u32> tmpTexBufRearrange;
|
||||
|
||||
TexCacheEntry *nextTexture_;
|
||||
|
||||
// Raw is where we keep the original bytes. Converted is where we swap colors if necessary.
|
||||
u32 *clutBufRaw_;
|
||||
u32 *clutBufConverted_;
|
||||
u32 clutLastFormat_;
|
||||
u32 clutTotalBytes_;
|
||||
u32 clutMaxBytes_;
|
||||
u32 clutRenderAddress_;
|
||||
};
|
||||
|
||||
inline bool TextureCacheCommon::TexCacheEntry::Matches(u16 dim2, u8 format2, u8 maxLevel2) {
|
||||
|
@ -28,6 +28,7 @@ enum CheckAlphaResult {
|
||||
#include "Core/MemMap.h"
|
||||
#include "GPU/ge_constants.h"
|
||||
#include "GPU/Common/TextureDecoderNEON.h"
|
||||
#include "GPU/GPUState.h"
|
||||
|
||||
void SetupTextureDecoder();
|
||||
|
||||
|
@ -63,23 +63,10 @@ namespace DX9 {
|
||||
#define TEXCACHE_MIN_PRESSURE 16 * 1024 * 1024 // Total in VRAM
|
||||
#define TEXCACHE_SECOND_MIN_PRESSURE 4 * 1024 * 1024
|
||||
|
||||
TextureCacheDX9::TextureCacheDX9() : cacheSizeEstimate_(0), secondCacheSizeEstimate_(0), clearCacheNextFrame_(false), lowMemoryMode_(false), clutBuf_(NULL), clutMaxBytes_(0), clutRenderAddress_(0), texelsScaledThisFrame_(0) {
|
||||
TextureCacheDX9::TextureCacheDX9() : cacheSizeEstimate_(0), secondCacheSizeEstimate_(0), clearCacheNextFrame_(false), lowMemoryMode_(false), clutBuf_(NULL), texelsScaledThisFrame_(0) {
|
||||
timesInvalidatedAllThisFrame_ = 0;
|
||||
lastBoundTexture = INVALID_TEX;
|
||||
decimationCounter_ = TEXCACHE_DECIMATION_INTERVAL;
|
||||
// This is 5MB of temporary storage. Might be possible to shrink it.
|
||||
tmpTexBuf32.resize(1024 * 512); // 2MB
|
||||
tmpTexBuf16.resize(1024 * 512); // 1MB
|
||||
tmpTexBufRearrange.resize(1024 * 512); // 2MB
|
||||
|
||||
// TODO: Clamp down to 256/1KB? Need to check mipmapShareClut and clamp loadclut.
|
||||
clutBufConverted_ = (u32 *)AllocateAlignedMemory(1024 * sizeof(u32), 16); // 4KB
|
||||
clutBufRaw_ = (u32 *)AllocateAlignedMemory(1024 * sizeof(u32), 16); // 4KB
|
||||
|
||||
// Zap these so that reads from uninitialized parts of the CLUT look the same in
|
||||
// release and debug
|
||||
memset(clutBufConverted_, 0, 1024 * sizeof(u32));
|
||||
memset(clutBufRaw_, 0, 1024 * sizeof(u32));
|
||||
|
||||
D3DCAPS9 pCaps;
|
||||
ZeroMemory(&pCaps, sizeof(pCaps));
|
||||
@ -102,8 +89,6 @@ TextureCacheDX9::TextureCacheDX9() : cacheSizeEstimate_(0), secondCacheSizeEstim
|
||||
|
||||
TextureCacheDX9::~TextureCacheDX9() {
|
||||
Clear(true);
|
||||
FreeAlignedMemory(clutBufConverted_);
|
||||
FreeAlignedMemory(clutBufRaw_);
|
||||
}
|
||||
|
||||
static u32 EstimateTexMemoryUsage(const TextureCacheDX9::TexCacheEntry *entry) {
|
||||
@ -443,104 +428,6 @@ inline void TextureCacheDX9::DetachFramebuffer(TexCacheEntry *entry, u32 address
|
||||
}
|
||||
}
|
||||
|
||||
void TextureCacheDX9::NotifyFramebuffer(u32 address, VirtualFramebuffer *framebuffer, FramebufferNotification msg) {
|
||||
// Must be in VRAM so | 0x04000000 it is. Also, ignore memory mirrors.
|
||||
// These checks are mainly to reduce scanning all textures.
|
||||
const u32 addr = (address | 0x04000000) & 0x3F9FFFFF;
|
||||
const u32 bpp = framebuffer->format == GE_FORMAT_8888 ? 4 : 2;
|
||||
const u64 cacheKey = (u64)addr << 32;
|
||||
// If it has a clut, those are the low 32 bits, so it'll be inside this range.
|
||||
// Also, if it's a subsample of the buffer, it'll also be within the FBO.
|
||||
const u64 cacheKeyEnd = cacheKey + ((u64)(framebuffer->fb_stride * framebuffer->height * bpp) << 32);
|
||||
|
||||
// The first mirror starts at 0x04200000 and there are 3. We search all for framebuffers.
|
||||
const u64 mirrorCacheKey = (u64)0x04200000 << 32;
|
||||
const u64 mirrorCacheKeyEnd = (u64)0x04800000 << 32;
|
||||
|
||||
switch (msg) {
|
||||
case NOTIFY_FB_CREATED:
|
||||
case NOTIFY_FB_UPDATED:
|
||||
// Ensure it's in the framebuffer cache.
|
||||
if (std::find(fbCache_.begin(), fbCache_.end(), framebuffer) == fbCache_.end()) {
|
||||
fbCache_.push_back(framebuffer);
|
||||
}
|
||||
for (auto it = cache.lower_bound(cacheKey), end = cache.upper_bound(cacheKeyEnd); it != end; ++it) {
|
||||
AttachFramebuffer(&it->second, addr, framebuffer);
|
||||
}
|
||||
// Let's assume anything in mirrors is fair game to check.
|
||||
for (auto it = cache.lower_bound(mirrorCacheKey), end = cache.upper_bound(mirrorCacheKeyEnd); it != end; ++it) {
|
||||
AttachFramebuffer(&it->second, addr, framebuffer);
|
||||
}
|
||||
break;
|
||||
|
||||
case NOTIFY_FB_DESTROYED:
|
||||
fbCache_.erase(std::remove(fbCache_.begin(), fbCache_.end(), framebuffer), fbCache_.end());
|
||||
for (auto it = cache.lower_bound(cacheKey), end = cache.upper_bound(cacheKeyEnd); it != end; ++it) {
|
||||
DetachFramebuffer(&it->second, addr, framebuffer);
|
||||
}
|
||||
for (auto it = cache.lower_bound(mirrorCacheKey), end = cache.upper_bound(mirrorCacheKeyEnd); it != end; ++it) {
|
||||
DetachFramebuffer(&it->second, addr, framebuffer);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void *TextureCacheDX9::UnswizzleFromMem(const u8 *texptr, u32 bufw, u32 height, u32 bytesPerPixel) {
|
||||
const u32 rowWidth = (bytesPerPixel > 0) ? (bufw * bytesPerPixel) : (bufw / 2);
|
||||
const u32 pitch = rowWidth / 4;
|
||||
const int bxc = rowWidth / 16;
|
||||
int byc = (height + 7) / 8;
|
||||
if (byc == 0)
|
||||
byc = 1;
|
||||
|
||||
u32 ydest = 0;
|
||||
if (rowWidth >= 16) {
|
||||
u32 *ydestp = tmpTexBuf32.data();
|
||||
// The most common one, so it gets an optimized implementation.
|
||||
DoUnswizzleTex16(texptr, ydestp, bxc, byc, pitch, rowWidth);
|
||||
} else if (rowWidth == 8) {
|
||||
const u32 *src = (const u32 *) texptr;
|
||||
for (int by = 0; by < byc; by++) {
|
||||
for (int n = 0; n < 8; n++, ydest += 2) {
|
||||
tmpTexBuf32[ydest + 0] = *src++;
|
||||
tmpTexBuf32[ydest + 1] = *src++;
|
||||
src += 2; // skip two u32
|
||||
}
|
||||
}
|
||||
} else if (rowWidth == 4) {
|
||||
const u32 *src = (const u32 *) texptr;
|
||||
for (int by = 0; by < byc; by++) {
|
||||
for (int n = 0; n < 8; n++, ydest++) {
|
||||
tmpTexBuf32[ydest] = *src++;
|
||||
src += 3;
|
||||
}
|
||||
}
|
||||
} else if (rowWidth == 2) {
|
||||
const u16 *src = (const u16 *) texptr;
|
||||
for (int by = 0; by < byc; by++) {
|
||||
for (int n = 0; n < 4; n++, ydest++) {
|
||||
u16 n1 = src[0];
|
||||
u16 n2 = src[8];
|
||||
tmpTexBuf32[ydest] = (u32)n1 | ((u32)n2 << 16);
|
||||
src += 16;
|
||||
}
|
||||
}
|
||||
} else if (rowWidth == 1) {
|
||||
const u8 *src = (const u8 *) texptr;
|
||||
for (int by = 0; by < byc; by++) {
|
||||
for (int n = 0; n < 2; n++, ydest++) {
|
||||
u8 n1 = src[ 0];
|
||||
u8 n2 = src[16];
|
||||
u8 n3 = src[32];
|
||||
u8 n4 = src[48];
|
||||
tmpTexBuf32[ydest] = (u32)n1 | ((u32)n2 << 8) | ((u32)n3 << 16) | ((u32)n4 << 24);
|
||||
src += 64;
|
||||
}
|
||||
}
|
||||
}
|
||||
return tmpTexBuf32.data();
|
||||
}
|
||||
|
||||
void *TextureCacheDX9::ReadIndexedTex(int level, const u8 *texptr, int bytesPerIndex, u32 dstFmt, int bufw) {
|
||||
int w = gstate.getTextureWidth(level);
|
||||
int h = gstate.getTextureHeight(level);
|
||||
@ -786,62 +673,6 @@ static inline u32 QuickTexHash(u32 addr, int bufw, int w, int h, GETextureFormat
|
||||
return DoQuickTexHash(checkp, sizeInRAM);
|
||||
}
|
||||
|
||||
void TextureCacheDX9::LoadClut(u32 clutAddr, u32 loadBytes) {
|
||||
// Clear the uncached bit, etc. to match framebuffers.
|
||||
clutAddr = clutAddr & 0x3FFFFFFF;
|
||||
bool foundFramebuffer = false;
|
||||
|
||||
clutRenderAddress_ = 0;
|
||||
for (size_t i = 0, n = fbCache_.size(); i < n; ++i) {
|
||||
auto framebuffer = fbCache_[i];
|
||||
if ((framebuffer->fb_address | 0x04000000) == clutAddr) {
|
||||
framebuffer->last_frame_clut = gpuStats.numFlips;
|
||||
framebuffer->usageFlags |= FB_USAGE_CLUT;
|
||||
foundFramebuffer = true;
|
||||
WARN_LOG_REPORT_ONCE(clutrenderdx9, G3D, "Using rendered CLUT for texture decode at %08x (%dx%dx%d)", clutAddr, framebuffer->width, framebuffer->height, framebuffer->colorDepth);
|
||||
clutRenderAddress_ = framebuffer->fb_address;
|
||||
}
|
||||
}
|
||||
|
||||
clutTotalBytes_ = loadBytes;
|
||||
if (Memory::IsValidAddress(clutAddr)) {
|
||||
// It's possible for a game to (successfully) access outside valid memory.
|
||||
u32 bytes = Memory::ValidSize(clutAddr, loadBytes);
|
||||
if (foundFramebuffer && !g_Config.bDisableSlowFramebufEffects) {
|
||||
gpu->PerformMemoryDownload(clutAddr, bytes);
|
||||
}
|
||||
|
||||
#ifdef _M_SSE
|
||||
int numBlocks = bytes / 16;
|
||||
if (bytes == loadBytes) {
|
||||
const __m128i *source = (const __m128i *)Memory::GetPointerUnchecked(clutAddr);
|
||||
__m128i *dest = (__m128i *)clutBufRaw_;
|
||||
for (int i = 0; i < numBlocks; i++, source += 2, dest += 2) {
|
||||
__m128i data1 = _mm_loadu_si128(source);
|
||||
__m128i data2 = _mm_loadu_si128(source + 1);
|
||||
_mm_store_si128(dest, data1);
|
||||
_mm_store_si128(dest + 1, data2);
|
||||
}
|
||||
} else {
|
||||
Memory::MemcpyUnchecked(clutBufRaw_, clutAddr, bytes);
|
||||
if (bytes < loadBytes) {
|
||||
memset(clutBufRaw_ + bytes, 0x00, loadBytes - bytes);
|
||||
}
|
||||
}
|
||||
#else
|
||||
Memory::MemcpyUnchecked(clutBufRaw_, clutAddr, bytes);
|
||||
if (bytes < clutTotalBytes_) {
|
||||
memset(clutBufRaw_ + bytes, 0x00, loadBytes - bytes);
|
||||
}
|
||||
#endif
|
||||
} else {
|
||||
memset(clutBufRaw_, 0x00, loadBytes);
|
||||
}
|
||||
// Reload the clut next time.
|
||||
clutLastFormat_ = 0xFFFFFFFF;
|
||||
clutMaxBytes_ = std::max(clutMaxBytes_, loadBytes);
|
||||
}
|
||||
|
||||
void TextureCacheDX9::UpdateCurrentClut(GEPaletteFormat clutFormat, u32 clutBase, bool clutIndexIsSimple) {
|
||||
const u32 clutBaseBytes = clutBase * (clutFormat == GE_CMODE_32BIT_ABGR8888 ? sizeof(u32) : sizeof(u16));
|
||||
// Technically, these extra bytes weren't loaded, but hopefully it was loaded earlier.
|
||||
@ -1202,7 +1033,7 @@ void TextureCacheDX9::SetTexture(bool force) {
|
||||
// Check for FBO - slow!
|
||||
if (entry->framebuffer) {
|
||||
if (match) {
|
||||
if (hasClut && clutRenderAddress_ != 0) {
|
||||
if (hasClut && clutRenderAddress_ != 0xFFFFFFFF) {
|
||||
WARN_LOG_REPORT_ONCE(clutAndTexRender, G3D, "Using rendered texture with rendered CLUT: texfmt=%d, clutfmt=%d", gstate.getTextureFormat(), gstate.getClutPaletteFormat());
|
||||
}
|
||||
|
||||
@ -1378,7 +1209,7 @@ void TextureCacheDX9::SetTexture(bool force) {
|
||||
TexCacheEntry entryNew = {0};
|
||||
cache[cachekey] = entryNew;
|
||||
|
||||
if (hasClut && clutRenderAddress_ != 0) {
|
||||
if (hasClut && clutRenderAddress_ != 0xFFFFFFFF) {
|
||||
WARN_LOG_REPORT_ONCE(clutUseRender, G3D, "Using texture with rendered CLUT: texfmt=%d, clutfmt=%d", gstate.getTextureFormat(), gstate.getClutPaletteFormat());
|
||||
}
|
||||
|
||||
@ -1759,7 +1590,7 @@ void *TextureCacheDX9::DecodeTextureLevel(GETextureFormat format, GEPaletteForma
|
||||
ERROR_LOG_REPORT(G3D, "NO finalbuf! Will crash!");
|
||||
}
|
||||
|
||||
if (w != bufw) {
|
||||
if (!(g_Config.iTexScalingLevel == 1 && gstate_c.Supports(GPU_SUPPORTS_UNPACK_SUBIMAGE)) && w != bufw) {
|
||||
int pixelSize;
|
||||
switch (dstFmt) {
|
||||
case D3DFMT_A4R4G4B4:
|
||||
@ -1772,21 +1603,7 @@ void *TextureCacheDX9::DecodeTextureLevel(GETextureFormat format, GEPaletteForma
|
||||
break;
|
||||
}
|
||||
// Need to rearrange the buffer to simulate GL_UNPACK_ROW_LENGTH etc.
|
||||
int inRowBytes = bufw * pixelSize;
|
||||
int outRowBytes = w * pixelSize;
|
||||
const u8 *read = (const u8 *)finalBuf;
|
||||
u8 *write = 0;
|
||||
if (w > bufw) {
|
||||
write = (u8 *)tmpTexBufRearrange.data();
|
||||
finalBuf = tmpTexBufRearrange.data();
|
||||
} else {
|
||||
write = (u8 *)finalBuf;
|
||||
}
|
||||
for (int y = 0; y < h; y++) {
|
||||
memmove(write, read, outRowBytes);
|
||||
read += inRowBytes;
|
||||
write += outRowBytes;
|
||||
}
|
||||
finalBuf = RearrangeBuf(finalBuf, bufw * pixelSize, w * pixelSize, h);
|
||||
}
|
||||
|
||||
return finalBuf;
|
||||
|
@ -35,12 +35,6 @@ class FramebufferManagerDX9;
|
||||
class DepalShaderCacheDX9;
|
||||
class ShaderManagerDX9;
|
||||
|
||||
enum FramebufferNotification {
|
||||
NOTIFY_FB_CREATED,
|
||||
NOTIFY_FB_UPDATED,
|
||||
NOTIFY_FB_DESTROYED,
|
||||
};
|
||||
|
||||
class TextureCacheDX9 : public TextureCacheCommon {
|
||||
public:
|
||||
TextureCacheDX9();
|
||||
@ -54,11 +48,6 @@ public:
|
||||
void Invalidate(u32 addr, int size, GPUInvalidationType type);
|
||||
void InvalidateAll(GPUInvalidationType type);
|
||||
void ClearNextFrame();
|
||||
void LoadClut(u32 clutAddr, u32 loadBytes);
|
||||
|
||||
// FramebufferManager keeps TextureCache updated about what regions of memory
|
||||
// are being rendered to. This is barebones so far.
|
||||
void NotifyFramebuffer(u32 address, VirtualFramebuffer *framebuffer, FramebufferNotification msg);
|
||||
|
||||
void SetFramebufferManager(FramebufferManagerDX9 *fbManager) {
|
||||
framebufferManager_ = fbManager;
|
||||
@ -84,12 +73,8 @@ public:
|
||||
void ApplyTexture();
|
||||
|
||||
private:
|
||||
// Can't be unordered_map, we use lower_bound ... although for some reason that compiles on MSVC.
|
||||
typedef std::map<u64, TexCacheEntry> TexCache;
|
||||
|
||||
void Decimate(); // Run this once per frame to get rid of old textures.
|
||||
void DeleteTexture(TexCache::iterator it);
|
||||
void *UnswizzleFromMem(const u8 *texptr, u32 bufw, u32 height, u32 bytesPerPixel);
|
||||
void *ReadIndexedTex(int level, const u8 *texptr, int bytesPerIndex, u32 dstFmt, int bufw);
|
||||
void UpdateSamplingParams(TexCacheEntry &entry, bool force);
|
||||
void LoadTextureLevel(TexCacheEntry &entry, int level, int maxLevel, bool replaceImages, int scaleFactor, u32 dstFmt);
|
||||
@ -100,8 +85,8 @@ private:
|
||||
const T *GetCurrentClut();
|
||||
u32 GetCurrentClutHash();
|
||||
void UpdateCurrentClut(GEPaletteFormat clutFormat, u32 clutBase, bool clutIndexIsSimple);
|
||||
bool AttachFramebuffer(TexCacheEntry *entry, u32 address, VirtualFramebuffer *framebuffer, u32 texaddrOffset = 0);
|
||||
void DetachFramebuffer(TexCacheEntry *entry, u32 address, VirtualFramebuffer *framebuffer);
|
||||
bool AttachFramebuffer(TexCacheEntry *entry, u32 address, VirtualFramebuffer *framebuffer, u32 texaddrOffset = 0) override;
|
||||
void DetachFramebuffer(TexCacheEntry *entry, u32 address, VirtualFramebuffer *framebuffer) override;
|
||||
void SetTextureFramebuffer(TexCacheEntry *entry, VirtualFramebuffer *framebuffer);
|
||||
void ApplyTextureFramebuffer(TexCacheEntry *entry, VirtualFramebuffer *framebuffer);
|
||||
|
||||
@ -116,9 +101,7 @@ private:
|
||||
}
|
||||
}
|
||||
|
||||
TexCache cache;
|
||||
TexCache secondCache;
|
||||
std::vector<VirtualFramebuffer *> fbCache_;
|
||||
u32 cacheSizeEstimate_;
|
||||
u32 secondCacheSizeEstimate_;
|
||||
|
||||
@ -135,22 +118,11 @@ private:
|
||||
bool lowMemoryMode_;
|
||||
TextureScalerDX9 scaler;
|
||||
|
||||
SimpleBuf<u32> tmpTexBuf32;
|
||||
SimpleBuf<u16> tmpTexBuf16;
|
||||
|
||||
SimpleBuf<u32> tmpTexBufRearrange;
|
||||
|
||||
u32 clutLastFormat_;
|
||||
u32 *clutBufRaw_;
|
||||
u32 *clutBufConverted_;
|
||||
u32 *clutBuf_;
|
||||
u32 clutHash_;
|
||||
u32 clutTotalBytes_;
|
||||
u32 clutMaxBytes_;
|
||||
// True if the clut is just alpha values in the same order (RGBA4444-bit only.)
|
||||
bool clutAlphaLinear_;
|
||||
u16 clutAlphaLinearColor_;
|
||||
u32 clutRenderAddress_;
|
||||
|
||||
LPDIRECT3DTEXTURE9 lastBoundTexture;
|
||||
float maxAnisotropyLevel;
|
||||
|
@ -69,26 +69,15 @@
|
||||
#define GL_UNPACK_ROW_LENGTH 0x0CF2
|
||||
#endif
|
||||
|
||||
#define INVALID_TEX -1
|
||||
|
||||
// Hack!
|
||||
extern int g_iNumVideos;
|
||||
|
||||
TextureCache::TextureCache() : cacheSizeEstimate_(0), secondCacheSizeEstimate_(0), clearCacheNextFrame_(false), lowMemoryMode_(false), clutBuf_(NULL), clutMaxBytes_(0), clutRenderAddress_(0), texelsScaledThisFrame_(0) {
|
||||
TextureCache::TextureCache() : cacheSizeEstimate_(0), secondCacheSizeEstimate_(0), clearCacheNextFrame_(false), lowMemoryMode_(false), clutBuf_(NULL), texelsScaledThisFrame_(0) {
|
||||
timesInvalidatedAllThisFrame_ = 0;
|
||||
lastBoundTexture = -1;
|
||||
lastBoundTexture = INVALID_TEX;
|
||||
decimationCounter_ = TEXCACHE_DECIMATION_INTERVAL;
|
||||
// This is 5MB of temporary storage. Might be possible to shrink it.
|
||||
tmpTexBuf32.resize(1024 * 512); // 2MB
|
||||
tmpTexBuf16.resize(1024 * 512); // 1MB
|
||||
tmpTexBufRearrange.resize(1024 * 512); // 2MB
|
||||
|
||||
// TODO: Clamp down to 256/1KB? Need to check mipmapShareClut and clamp loadclut.
|
||||
clutBufConverted_ = (u32 *)AllocateAlignedMemory(1024 * sizeof(u32), 16); // 4KB
|
||||
clutBufRaw_ = (u32 *)AllocateAlignedMemory(1024 * sizeof(u32), 16); // 4KB
|
||||
|
||||
// Zap these so that reads from uninitialized parts of the CLUT look the same in
|
||||
// release and debug
|
||||
memset(clutBufConverted_, 0, 1024 * sizeof(u32));
|
||||
memset(clutBufRaw_, 0, 1024 * sizeof(u32));
|
||||
|
||||
glGetFloatv(GL_MAX_TEXTURE_MAX_ANISOTROPY_EXT, &maxAnisotropyLevel);
|
||||
SetupTextureDecoder();
|
||||
@ -98,8 +87,6 @@ TextureCache::TextureCache() : cacheSizeEstimate_(0), secondCacheSizeEstimate_(0
|
||||
|
||||
TextureCache::~TextureCache() {
|
||||
Clear(true);
|
||||
FreeAlignedMemory(clutBufConverted_);
|
||||
FreeAlignedMemory(clutBufRaw_);
|
||||
}
|
||||
|
||||
static u32 EstimateTexMemoryUsage(const TextureCache::TexCacheEntry *entry) {
|
||||
@ -136,7 +123,7 @@ static u32 EstimateTexMemoryUsage(const TextureCache::TexCacheEntry *entry) {
|
||||
|
||||
void TextureCache::Clear(bool delete_them) {
|
||||
glBindTexture(GL_TEXTURE_2D, 0);
|
||||
lastBoundTexture = -1;
|
||||
lastBoundTexture = INVALID_TEX;
|
||||
if (delete_them) {
|
||||
for (TexCache::iterator iter = cache.begin(); iter != cache.end(); ++iter) {
|
||||
DEBUG_LOG(G3D, "Deleting texture %i", iter->second.textureName);
|
||||
@ -184,7 +171,7 @@ void TextureCache::Decimate() {
|
||||
const u32 had = cacheSizeEstimate_;
|
||||
|
||||
glBindTexture(GL_TEXTURE_2D, 0);
|
||||
lastBoundTexture = -1;
|
||||
lastBoundTexture = INVALID_TEX;
|
||||
int killAge = lowMemoryMode_ ? TEXTURE_KILL_AGE_LOWMEM : TEXTURE_KILL_AGE;
|
||||
for (TexCache::iterator iter = cache.begin(); iter != cache.end(); ) {
|
||||
if (iter->second.lastFrame + killAge < gpuStats.numFlips) {
|
||||
@ -441,104 +428,6 @@ inline void TextureCache::DetachFramebuffer(TexCacheEntry *entry, u32 address, V
|
||||
}
|
||||
}
|
||||
|
||||
void TextureCache::NotifyFramebuffer(u32 address, VirtualFramebuffer *framebuffer, FramebufferNotification msg) {
|
||||
// Must be in VRAM so | 0x04000000 it is. Also, ignore memory mirrors.
|
||||
// These checks are mainly to reduce scanning all textures.
|
||||
const u32 addr = (address | 0x04000000) & 0x3F9FFFFF;
|
||||
const u32 bpp = framebuffer->format == GE_FORMAT_8888 ? 4 : 2;
|
||||
const u64 cacheKey = (u64)addr << 32;
|
||||
// If it has a clut, those are the low 32 bits, so it'll be inside this range.
|
||||
// Also, if it's a subsample of the buffer, it'll also be within the FBO.
|
||||
const u64 cacheKeyEnd = cacheKey + ((u64)(framebuffer->fb_stride * framebuffer->height * bpp) << 32);
|
||||
|
||||
// The first mirror starts at 0x04200000 and there are 3. We search all for framebuffers.
|
||||
const u64 mirrorCacheKey = (u64)0x04200000 << 32;
|
||||
const u64 mirrorCacheKeyEnd = (u64)0x04800000 << 32;
|
||||
|
||||
switch (msg) {
|
||||
case NOTIFY_FB_CREATED:
|
||||
case NOTIFY_FB_UPDATED:
|
||||
// Ensure it's in the framebuffer cache.
|
||||
if (std::find(fbCache_.begin(), fbCache_.end(), framebuffer) == fbCache_.end()) {
|
||||
fbCache_.push_back(framebuffer);
|
||||
}
|
||||
for (auto it = cache.lower_bound(cacheKey), end = cache.upper_bound(cacheKeyEnd); it != end; ++it) {
|
||||
AttachFramebuffer(&it->second, addr, framebuffer);
|
||||
}
|
||||
// Let's assume anything in mirrors is fair game to check.
|
||||
for (auto it = cache.lower_bound(mirrorCacheKey), end = cache.upper_bound(mirrorCacheKeyEnd); it != end; ++it) {
|
||||
AttachFramebuffer(&it->second, addr, framebuffer);
|
||||
}
|
||||
break;
|
||||
|
||||
case NOTIFY_FB_DESTROYED:
|
||||
fbCache_.erase(std::remove(fbCache_.begin(), fbCache_.end(), framebuffer), fbCache_.end());
|
||||
for (auto it = cache.lower_bound(cacheKey), end = cache.upper_bound(cacheKeyEnd); it != end; ++it) {
|
||||
DetachFramebuffer(&it->second, addr, framebuffer);
|
||||
}
|
||||
for (auto it = cache.lower_bound(mirrorCacheKey), end = cache.upper_bound(mirrorCacheKeyEnd); it != end; ++it) {
|
||||
DetachFramebuffer(&it->second, addr, framebuffer);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void *TextureCache::UnswizzleFromMem(const u8 *texptr, u32 bufw, u32 height, u32 bytesPerPixel) {
|
||||
const u32 rowWidth = (bytesPerPixel > 0) ? (bufw * bytesPerPixel) : (bufw / 2);
|
||||
const u32 pitch = rowWidth / 4;
|
||||
const int bxc = rowWidth / 16;
|
||||
int byc = (height + 7) / 8;
|
||||
if (byc == 0)
|
||||
byc = 1;
|
||||
|
||||
u32 ydest = 0;
|
||||
if (rowWidth >= 16) {
|
||||
u32 *ydestp = tmpTexBuf32.data();
|
||||
// The most common one, so it gets an optimized implementation.
|
||||
DoUnswizzleTex16(texptr, ydestp, bxc, byc, pitch, rowWidth);
|
||||
} else if (rowWidth == 8) {
|
||||
const u32 *src = (const u32 *) texptr;
|
||||
for (int by = 0; by < byc; by++) {
|
||||
for (int n = 0; n < 8; n++, ydest += 2) {
|
||||
tmpTexBuf32[ydest + 0] = *src++;
|
||||
tmpTexBuf32[ydest + 1] = *src++;
|
||||
src += 2; // skip two u32
|
||||
}
|
||||
}
|
||||
} else if (rowWidth == 4) {
|
||||
const u32 *src = (const u32 *) texptr;
|
||||
for (int by = 0; by < byc; by++) {
|
||||
for (int n = 0; n < 8; n++, ydest++) {
|
||||
tmpTexBuf32[ydest] = *src++;
|
||||
src += 3;
|
||||
}
|
||||
}
|
||||
} else if (rowWidth == 2) {
|
||||
const u16 *src = (const u16 *) texptr;
|
||||
for (int by = 0; by < byc; by++) {
|
||||
for (int n = 0; n < 4; n++, ydest++) {
|
||||
u16 n1 = src[0];
|
||||
u16 n2 = src[8];
|
||||
tmpTexBuf32[ydest] = (u32)n1 | ((u32)n2 << 16);
|
||||
src += 16;
|
||||
}
|
||||
}
|
||||
} else if (rowWidth == 1) {
|
||||
const u8 *src = (const u8 *) texptr;
|
||||
for (int by = 0; by < byc; by++) {
|
||||
for (int n = 0; n < 2; n++, ydest++) {
|
||||
u8 n1 = src[ 0];
|
||||
u8 n2 = src[16];
|
||||
u8 n3 = src[32];
|
||||
u8 n4 = src[48];
|
||||
tmpTexBuf32[ydest] = (u32)n1 | ((u32)n2 << 8) | ((u32)n3 << 16) | ((u32)n4 << 24);
|
||||
src += 64;
|
||||
}
|
||||
}
|
||||
}
|
||||
return tmpTexBuf32.data();
|
||||
}
|
||||
|
||||
void *TextureCache::ReadIndexedTex(int level, const u8 *texptr, int bytesPerIndex, GLuint dstFmt, int bufw) {
|
||||
int w = gstate.getTextureWidth(level);
|
||||
int h = gstate.getTextureHeight(level);
|
||||
@ -780,7 +669,7 @@ static void ConvertColors(void *dstBuf, const void *srcBuf, GLuint dstFmt, int n
|
||||
}
|
||||
|
||||
void TextureCache::StartFrame() {
|
||||
lastBoundTexture = -1;
|
||||
lastBoundTexture = INVALID_TEX;
|
||||
timesInvalidatedAllThisFrame_ = 0;
|
||||
|
||||
if (texelsScaledThisFrame_) {
|
||||
@ -810,62 +699,6 @@ static inline u32 QuickTexHash(u32 addr, int bufw, int w, int h, GETextureFormat
|
||||
return DoQuickTexHash(checkp, sizeInRAM);
|
||||
}
|
||||
|
||||
void TextureCache::LoadClut(u32 clutAddr, u32 loadBytes) {
|
||||
// Clear the uncached bit, etc. to match framebuffers.
|
||||
clutAddr = clutAddr & 0x3FFFFFFF;
|
||||
bool foundFramebuffer = false;
|
||||
|
||||
clutRenderAddress_ = 0;
|
||||
for (size_t i = 0, n = fbCache_.size(); i < n; ++i) {
|
||||
auto framebuffer = fbCache_[i];
|
||||
if ((framebuffer->fb_address | 0x04000000) == clutAddr) {
|
||||
framebuffer->last_frame_clut = gpuStats.numFlips;
|
||||
framebuffer->usageFlags |= FB_USAGE_CLUT;
|
||||
foundFramebuffer = true;
|
||||
WARN_LOG_REPORT_ONCE(clutrender, G3D, "Using rendered CLUT for texture decode at %08x (%dx%dx%d)", clutAddr, framebuffer->width, framebuffer->height, framebuffer->colorDepth);
|
||||
clutRenderAddress_ = framebuffer->fb_address;
|
||||
}
|
||||
}
|
||||
|
||||
clutTotalBytes_ = loadBytes;
|
||||
if (Memory::IsValidAddress(clutAddr)) {
|
||||
// It's possible for a game to (successfully) access outside valid memory.
|
||||
u32 bytes = Memory::ValidSize(clutAddr, loadBytes);
|
||||
if (foundFramebuffer && !g_Config.bDisableSlowFramebufEffects) {
|
||||
gpu->PerformMemoryDownload(clutAddr, bytes);
|
||||
}
|
||||
|
||||
#ifdef _M_SSE
|
||||
int numBlocks = bytes / 16;
|
||||
if (bytes == loadBytes) {
|
||||
const __m128i *source = (const __m128i *)Memory::GetPointerUnchecked(clutAddr);
|
||||
__m128i *dest = (__m128i *)clutBufRaw_;
|
||||
for (int i = 0; i < numBlocks; i++, source += 2, dest += 2) {
|
||||
__m128i data1 = _mm_loadu_si128(source);
|
||||
__m128i data2 = _mm_loadu_si128(source + 1);
|
||||
_mm_store_si128(dest, data1);
|
||||
_mm_store_si128(dest + 1, data2);
|
||||
}
|
||||
} else {
|
||||
Memory::MemcpyUnchecked(clutBufRaw_, clutAddr, bytes);
|
||||
if (bytes < loadBytes) {
|
||||
memset((u8 *)clutBufRaw_ + bytes, 0x00, loadBytes - bytes);
|
||||
}
|
||||
}
|
||||
#else
|
||||
Memory::MemcpyUnchecked(clutBufRaw_, clutAddr, bytes);
|
||||
if (bytes < clutTotalBytes_) {
|
||||
memset((u8 *)clutBufRaw_ + bytes, 0x00, clutTotalBytes_ - bytes);
|
||||
}
|
||||
#endif
|
||||
} else {
|
||||
memset(clutBufRaw_, 0x00, loadBytes);
|
||||
}
|
||||
// Reload the clut next time.
|
||||
clutLastFormat_ = 0xFFFFFFFF;
|
||||
clutMaxBytes_ = std::max(clutMaxBytes_, loadBytes);
|
||||
}
|
||||
|
||||
void TextureCache::UpdateCurrentClut(GEPaletteFormat clutFormat, u32 clutBase, bool clutIndexIsSimple) {
|
||||
const u32 clutBaseBytes = clutFormat == GE_CMODE_32BIT_ABGR8888 ? (clutBase * sizeof(u32)) : (clutBase * sizeof(u16));
|
||||
// Technically, these extra bytes weren't loaded, but hopefully it was loaded earlier.
|
||||
@ -1179,7 +1012,7 @@ void TextureCache::ApplyTextureFramebuffer(TexCacheEntry *entry, VirtualFramebuf
|
||||
framebufferManager_->RebindFramebuffer();
|
||||
SetFramebufferSamplingParams(framebuffer->bufferWidth, framebuffer->bufferHeight);
|
||||
|
||||
lastBoundTexture = -1;
|
||||
lastBoundTexture = INVALID_TEX;
|
||||
}
|
||||
|
||||
bool TextureCache::SetOffsetTexture(u32 offset) {
|
||||
@ -1221,20 +1054,20 @@ void TextureCache::SetTexture(bool force) {
|
||||
#ifdef DEBUG_TEXTURES
|
||||
if (SetDebugTexture()) {
|
||||
// A different texture was bound, let's rebind next time.
|
||||
lastBoundTexture = -1;
|
||||
lastBoundTexture = INVALID_TEX;
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (force) {
|
||||
lastBoundTexture = -1;
|
||||
lastBoundTexture = INVALID_TEX;
|
||||
}
|
||||
|
||||
u32 texaddr = gstate.getTextureAddress(0);
|
||||
if (!Memory::IsValidAddress(texaddr)) {
|
||||
// Bind a null texture and return.
|
||||
glBindTexture(GL_TEXTURE_2D, 0);
|
||||
lastBoundTexture = -1;
|
||||
lastBoundTexture = INVALID_TEX;
|
||||
return;
|
||||
}
|
||||
|
||||
@ -1285,7 +1118,7 @@ void TextureCache::SetTexture(bool force) {
|
||||
// Check for FBO - slow!
|
||||
if (entry->framebuffer) {
|
||||
if (match) {
|
||||
if (hasClut && clutRenderAddress_ != 0) {
|
||||
if (hasClut && clutRenderAddress_ != 0xFFFFFFFF) {
|
||||
WARN_LOG_REPORT_ONCE(clutAndTexRender, G3D, "Using rendered texture with rendered CLUT: texfmt=%d, clutfmt=%d", gstate.getTextureFormat(), gstate.getClutPaletteFormat());
|
||||
}
|
||||
|
||||
@ -1433,7 +1266,7 @@ void TextureCache::SetTexture(bool force) {
|
||||
replaceImages = true;
|
||||
} else {
|
||||
if (entry->textureName == lastBoundTexture) {
|
||||
lastBoundTexture = -1;
|
||||
lastBoundTexture = INVALID_TEX;
|
||||
}
|
||||
glDeleteTextures(1, &entry->textureName);
|
||||
}
|
||||
@ -1459,7 +1292,7 @@ void TextureCache::SetTexture(bool force) {
|
||||
TexCacheEntry entryNew = {0};
|
||||
cache[cachekey] = entryNew;
|
||||
|
||||
if (hasClut && clutRenderAddress_ != 0) {
|
||||
if (hasClut && clutRenderAddress_ != 0xFFFFFFFF) {
|
||||
WARN_LOG_REPORT_ONCE(clutUseRender, G3D, "Using texture with rendered CLUT: texfmt=%d, clutfmt=%d", gstate.getTextureFormat(), gstate.getClutPaletteFormat());
|
||||
}
|
||||
|
||||
@ -1927,21 +1760,7 @@ void *TextureCache::DecodeTextureLevel(GETextureFormat format, GEPaletteFormat c
|
||||
}
|
||||
|
||||
// Need to rearrange the buffer to simulate GL_UNPACK_ROW_LENGTH etc.
|
||||
int inRowBytes = bufw * pixelSize;
|
||||
int outRowBytes = w * pixelSize;
|
||||
const u8 *read = (const u8 *)finalBuf;
|
||||
u8 *write = 0;
|
||||
if (w > bufw) {
|
||||
write = (u8 *)tmpTexBufRearrange.data();
|
||||
finalBuf = tmpTexBufRearrange.data();
|
||||
} else {
|
||||
write = (u8 *)finalBuf;
|
||||
}
|
||||
for (int y = 0; y < h; y++) {
|
||||
memmove(write, read, outRowBytes);
|
||||
read += inRowBytes;
|
||||
write += outRowBytes;
|
||||
}
|
||||
finalBuf = RearrangeBuf(finalBuf, bufw * pixelSize, w * pixelSize, h);
|
||||
}
|
||||
|
||||
return finalBuf;
|
||||
|
@ -34,12 +34,6 @@ class DepalShaderCache;
|
||||
class ShaderManager;
|
||||
class TransformDrawEngine;
|
||||
|
||||
enum FramebufferNotification {
|
||||
NOTIFY_FB_CREATED,
|
||||
NOTIFY_FB_UPDATED,
|
||||
NOTIFY_FB_DESTROYED,
|
||||
};
|
||||
|
||||
inline bool UseBGRA8888() {
|
||||
// TODO: Other platforms? May depend on vendor which is faster?
|
||||
#ifdef _WIN32
|
||||
@ -61,11 +55,6 @@ public:
|
||||
void Invalidate(u32 addr, int size, GPUInvalidationType type);
|
||||
void InvalidateAll(GPUInvalidationType type);
|
||||
void ClearNextFrame();
|
||||
void LoadClut(u32 clutAddr, u32 loadBytes);
|
||||
|
||||
// FramebufferManager keeps TextureCache updated about what regions of memory
|
||||
// are being rendered to. This is barebones so far.
|
||||
void NotifyFramebuffer(u32 address, VirtualFramebuffer *framebuffer, FramebufferNotification msg);
|
||||
|
||||
void SetFramebufferManager(FramebufferManager *fbManager) {
|
||||
framebufferManager_ = fbManager;
|
||||
@ -99,12 +88,8 @@ public:
|
||||
void ApplyTexture();
|
||||
|
||||
private:
|
||||
// Can't be unordered_map, we use lower_bound ... although for some reason that compiles on MSVC.
|
||||
typedef std::map<u64, TexCacheEntry> TexCache;
|
||||
|
||||
void Decimate(); // Run this once per frame to get rid of old textures.
|
||||
void DeleteTexture(TexCache::iterator it);
|
||||
void *UnswizzleFromMem(const u8 *texptr, u32 bufw, u32 height, u32 bytesPerPixel);
|
||||
void *ReadIndexedTex(int level, const u8 *texptr, int bytesPerIndex, GLuint dstFmt, int bufw);
|
||||
void UpdateSamplingParams(TexCacheEntry &entry, bool force);
|
||||
void LoadTextureLevel(TexCacheEntry &entry, int level, bool replaceImages, int scaleFactor, GLenum dstFmt);
|
||||
@ -115,14 +100,12 @@ private:
|
||||
const T *GetCurrentClut();
|
||||
u32 GetCurrentClutHash();
|
||||
void UpdateCurrentClut(GEPaletteFormat clutFormat, u32 clutBase, bool clutIndexIsSimple);
|
||||
bool AttachFramebuffer(TexCacheEntry *entry, u32 address, VirtualFramebuffer *framebuffer, u32 texaddrOffset = 0);
|
||||
void DetachFramebuffer(TexCacheEntry *entry, u32 address, VirtualFramebuffer *framebuffer);
|
||||
bool AttachFramebuffer(TexCacheEntry *entry, u32 address, VirtualFramebuffer *framebuffer, u32 texaddrOffset = 0) override;
|
||||
void DetachFramebuffer(TexCacheEntry *entry, u32 address, VirtualFramebuffer *framebuffer) override;
|
||||
void SetTextureFramebuffer(TexCacheEntry *entry, VirtualFramebuffer *framebuffer);
|
||||
void ApplyTextureFramebuffer(TexCacheEntry *entry, VirtualFramebuffer *framebuffer);
|
||||
|
||||
TexCache cache;
|
||||
TexCache secondCache;
|
||||
std::vector<VirtualFramebuffer *> fbCache_;
|
||||
std::vector<u32> nameCache_;
|
||||
u32 cacheSizeEstimate_;
|
||||
u32 secondCacheSizeEstimate_;
|
||||
@ -141,22 +124,11 @@ private:
|
||||
|
||||
TextureScalerGL scaler;
|
||||
|
||||
SimpleBuf<u32> tmpTexBuf32;
|
||||
SimpleBuf<u16> tmpTexBuf16;
|
||||
|
||||
SimpleBuf<u32> tmpTexBufRearrange;
|
||||
|
||||
u32 clutLastFormat_;
|
||||
u32 *clutBufRaw_;
|
||||
u32 *clutBufConverted_;
|
||||
u32 *clutBuf_;
|
||||
u32 clutHash_;
|
||||
u32 clutTotalBytes_;
|
||||
u32 clutMaxBytes_;
|
||||
// True if the clut is just alpha values in the same order (RGBA4444-bit only.)
|
||||
bool clutAlphaLinear_;
|
||||
u16 clutAlphaLinearColor_;
|
||||
u32 clutRenderAddress_;
|
||||
|
||||
u32 lastBoundTexture;
|
||||
float maxAnisotropyLevel;
|
||||
|
Loading…
Reference in New Issue
Block a user