Merge pull request #2570 from unknownbrackets/perf

Small performance improvements (textures, shader lookup)
This commit is contained in:
Henrik Rydgård 2013-06-30 23:46:03 -07:00
commit 7c681973f5
3 changed files with 40 additions and 27 deletions

View File

@ -433,13 +433,13 @@ void ShaderManager::DirtyUniform(u32 what) {
}
void ShaderManager::Clear() {
for (LinkedShaderCache::iterator iter = linkedShaderCache.begin(); iter != linkedShaderCache.end(); ++iter) {
for (auto iter = linkedShaderCache.begin(); iter != linkedShaderCache.end(); ++iter) {
delete iter->ls;
}
for (auto iter = fsCache.begin(); iter != fsCache.end(); ++iter) {
delete iter->second;
}
for (FSCache::iterator iter = fsCache.begin(); iter != fsCache.end(); ++iter) {
delete iter->second;
}
for (VSCache::iterator iter = vsCache.begin(); iter != vsCache.end(); ++iter) {
for (auto iter = vsCache.begin(); iter != vsCache.end(); ++iter) {
delete iter->second;
}
linkedShaderCache.clear();
@ -494,12 +494,6 @@ LinkedShader *ShaderManager::ApplyShader(int prim) {
lastShader->stop();
}
// Deferred dirtying! Let's see if we can make this even more clever later.
for (LinkedShaderCache::iterator iter = linkedShaderCache.begin(); iter != linkedShaderCache.end(); ++iter) {
iter->second->dirtyUniforms |= shaderSwitchDirty;
}
shaderSwitchDirty = 0;
lastVSID = VSID;
lastFSID = FSID;
@ -541,15 +535,23 @@ LinkedShader *ShaderManager::ApplyShader(int prim) {
}
// Okay, we have both shaders. Let's see if there's a linked one.
std::pair<Shader*, Shader*> linkedID(vs, fs);
LinkedShader *ls = NULL;
LinkedShaderCache::iterator iter = linkedShaderCache.find(linkedID);
LinkedShader *ls;
if (iter == linkedShaderCache.end()) {
for (auto iter = linkedShaderCache.begin(); iter != linkedShaderCache.end(); ++iter) {
// Deferred dirtying! Let's see if we can make this even more clever later.
iter->ls->dirtyUniforms |= shaderSwitchDirty;
if (iter->vs == vs && iter->fs == fs) {
ls = iter->ls;
}
}
shaderSwitchDirty = 0;
if (ls == NULL) {
ls = new LinkedShader(vs, fs, vs->UseHWTransform()); // This does "use" automatically
linkedShaderCache[linkedID] = ls;
const LinkedShaderCacheEntry entry(vs, fs, ls);
linkedShaderCache.push_back(entry);
} else {
ls = iter->second;
ls->use();
}

View File

@ -166,7 +166,15 @@ public:
private:
void Clear();
typedef std::map<std::pair<Shader *, Shader *>, LinkedShader *> LinkedShaderCache;
struct LinkedShaderCacheEntry {
LinkedShaderCacheEntry(Shader *vs_, Shader *fs_, LinkedShader *ls_)
: vs(vs_), fs(fs_), ls(ls_) { }
Shader *vs;
Shader *fs;
LinkedShader *ls;
};
typedef std::vector<LinkedShaderCacheEntry> LinkedShaderCache;
LinkedShaderCache linkedShaderCache;
FragmentShaderID lastFSID;

View File

@ -122,7 +122,11 @@ void TextureCache::Invalidate(u32 addr, int size, GPUInvalidationType type) {
addr &= 0xFFFFFFF;
u32 addr_end = addr + size;
for (TexCache::iterator iter = cache.begin(), end = cache.end(); iter != end; ++iter) {
// They could invalidate inside the texture, let's just give a bit of leeway.
const int LARGEST_TEXTURE_SIZE = 512 * 512 * 4;
u64 startKey = addr - LARGEST_TEXTURE_SIZE;
u64 endKey = addr + size + LARGEST_TEXTURE_SIZE;
for (TexCache::iterator iter = cache.lower_bound(startKey), end = cache.upper_bound(endKey); iter != end; ++iter) {
u32 texAddr = iter->second.addr;
u32 texEnd = iter->second.addr + iter->second.sizeInRAM;
@ -159,8 +163,8 @@ void TextureCache::ClearNextFrame() {
TextureCache::TexCacheEntry *TextureCache::GetEntryAt(u32 texaddr) {
// If no CLUT, as in framebuffer textures, cache key is simply texaddr.
auto iter = cache.find(texaddr);
// If no CLUT, as in framebuffer textures, cache key is simply texaddr shifted up.
auto iter = cache.find((u64)texaddr << 32);
if (iter != cache.end() && iter->second.addr == texaddr)
return &iter->second;
else
@ -940,7 +944,7 @@ void TextureCache::SetTexture() {
// GE_TFMT_CLUT4 - GE_TFMT_CLUT32 are 0b1xx.
bool hasClut = (format & 4) != 0;
u64 cachekey = texaddr;
u64 cachekey = (u64)texaddr << 32;
u32 clutformat, cluthash;
if (hasClut) {
@ -950,7 +954,7 @@ void TextureCache::SetTexture() {
UpdateCurrentClut();
}
cluthash = GetCurrentClutHash() ^ gstate.clutformat;
cachekey |= (u64)cluthash << 32;
cachekey |= cluthash;
} else {
clutformat = 0;
cluthash = 0;
@ -1302,14 +1306,13 @@ void *TextureCache::DecodeTextureLevel(u8 format, u8 clutformat, int level, u32
int len = std::max(bufw, w) * h;
tmpTexBuf16.resize(len);
tmpTexBufRearrange.resize(len);
Memory::Memcpy(tmpTexBuf16.data(), texaddr, len * sizeof(u16));
finalBuf = tmpTexBuf16.data();
}
else {
ConvertColors(finalBuf, Memory::GetPointer(texaddr), dstFmt, bufw * h);
} else {
tmpTexBuf32.resize(std::max(bufw, w) * h);
finalBuf = UnswizzleFromMem(texaddr, bufw, 2, level);
ConvertColors(finalBuf, finalBuf, dstFmt, bufw * h);
}
ConvertColors(finalBuf, finalBuf, dstFmt, bufw * h);
break;
case GE_TFMT_8888: