Merge pull request #4487 from unknownbrackets/perf

Texture cache hashing tweaks, disable second cache on mobile
This commit is contained in:
Henrik Rydgård 2013-11-09 17:30:40 -08:00
commit 345a4ccf0e
3 changed files with 94 additions and 2 deletions

View File

@ -64,10 +64,33 @@ static u32 QuickTexHashSSE2(const void *checkp, u32 size) {
}
static u32 QuickTexHashBasic(const void *checkp, u32 size) {
#ifdef __GNUC__
#if defined(ARM) && defined(__GNUC__)
__builtin_prefetch(checkp, 0, 0);
#endif
u32 check;
asm volatile (
// Let's change size to the end address.
"add %1, %1, %2\n"
"mov r6, #0\n"
// If we have zero sized input, we'll return garbage. Oh well, shouldn't happen.
"QuickTexHashBasic_next:\n"
"ldmia %2!, {r2-r5}\n"
"add r6, r6, r2\n"
"eor r6, r6, r3\n"
"cmp %2, %1\n"
"add r6, r6, r4\n"
"eor r6, r6, r5\n"
"blo QuickTexHashBasic_next\n"
"QuickTexHashBasic_done:\n"
"mov %0, r6\n"
: "=r"(check)
: "r"(size), "r"(checkp)
: "r2", "r3", "r4", "r5", "r6"
);
#else
u32 check = 0;
const u32 size_u32 = size / 4;
const u32 *p = (const u32 *)checkp;
@ -77,6 +100,7 @@ static u32 QuickTexHashBasic(const void *checkp, u32 size) {
check += p[i + 2];
check ^= p[i + 3];
}
#endif
return check;
}

View File

@ -29,6 +29,7 @@ u32 QuickTexHashNEON(const void *checkp, u32 size) {
__builtin_prefetch(checkp, 0, 0);
if (((intptr_t)checkp & 0xf) == 0 && (size & 0x3f) == 0) {
#if 0
uint32x4_t cursor = vdupq_n_u32(0);
uint32x4_t cursor2 = vld1q_u32((const u32 *)QuickTexHashInitial);
uint32x4_t update = vdupq_n_u32(0x24552455U);
@ -46,6 +47,61 @@ u32 QuickTexHashNEON(const void *checkp, u32 size) {
cursor = vaddq_u32(cursor, cursor2);
check = vgetq_lane_u32(cursor, 0) + vgetq_lane_u32(cursor, 1) + vgetq_lane_u32(cursor, 2) + vgetq_lane_u32(cursor, 3);
#else
// d0/d1 (q0) - cursor
// d2/d3 (q1) - cursor2
// d4/d5 (q2) - update
// d6-d13 (q3-q6) - memory transfer
asm volatile (
// Initialize cursor.
"vmov.i32 q0, #0\n"
// Initialize cursor2.
"movw r0, 0x0001\n"
"movt r0, 0x0083\n"
"movw r1, 0x4309\n"
"movt r1, 0x4d9b\n"
"vmov d2, r0, r1\n"
"movw r0, 0xb651\n"
"movt r0, 0x4b73\n"
"movw r1, 0x9bd9\n"
"movt r1, 0xc00b\n"
"vmov d2, r0, r1\n"
// Initialize update.
"movw r0, 0x2455\n"
"movt r0, 0x2455\n"
"mov r1, r0\n"
"vmov d4, r0, r1\n"
"vmov d5, r0, r1\n"
// This is where we end.
"add r0, %1, %2\n"
// Okay, do the memory hashing.
"QuickTexHashNEON_next:\n"
"pld [%2, #0xc0]\n"
"vldmia %2!, {d6-d13}\n"
"vmla.i32 q0, q1, q3\n"
"veor.i32 q0, q0, q4\n"
"vmul.i32 q6, q6, q1\n"
"cmp %2, r0\n"
"vadd.i32 q0, q0, q5\n"
"veor.i32 q0, q0, q6\n"
"vadd.i32 q1, q1, q2\n"
"blo QuickTexHashNEON_next\n"
// Now let's get the result.
"vadd.i32 q0, q0, q1\n"
"vadd.i32 d0, d0, d1\n"
"vmov r0, r1, s0, s1\n"
"add %0, r0, r1\n"
: "=r"(check)
: "r"(size), "r"(checkp)
: "r0", "r1", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "d8", "d9", "d10", "d11", "d12", "d13"
);
#endif
} else {
const u32 size_u32 = size / 4;
const u32 *p = (const u32 *)checkp;

View File

@ -48,6 +48,14 @@
#define GL_UNPACK_ROW_LENGTH 0x0CF2
#endif
// TODO: This helps when you have plenty of VRAM, sometimes quite a bit.
// But on Android, it sometimes causes out of memory that isn't recovered from.
#if !defined(USING_GLES2) && !defined(_XBOX)
#define USE_SECONDARY_CACHE 1
#else
#define USE_SECONDARY_CACHE 0
#endif
extern int g_iNumVideos;
TextureCache::TextureCache() : clearCacheNextFrame_(false), lowMemoryMode_(false), clutBuf_(NULL) {
@ -107,6 +115,7 @@ void TextureCache::Decimate() {
else
++iter;
}
#if USE_SECONDARY_CACHE
for (TexCache::iterator iter = secondCache.begin(); iter != secondCache.end(); ) {
if (lowMemoryMode_ || iter->second.lastFrame + TEXTURE_KILL_AGE < gpuStats.numFlips) {
glDeleteTextures(1, &iter->second.texture);
@ -115,6 +124,7 @@ void TextureCache::Decimate() {
else
++iter;
}
#endif
}
void TextureCache::Invalidate(u32 addr, int size, GPUInvalidationType type) {
@ -955,6 +965,7 @@ void TextureCache::SetTexture(bool force) {
// Don't give up just yet. Let's try the secondary cache if it's been invalidated before.
// If it's failed a bunch of times, then the second cache is just wasting time and VRAM.
#if USE_SECONDARY_CACHE
if (entry->numInvalidated > 2 && entry->numInvalidated < 128 && !lowMemoryMode_) {
u64 secondKey = fullhash | (u64)cluthash << 32;
TexCache::iterator secondIter = secondCache.find(secondKey);
@ -974,6 +985,7 @@ void TextureCache::SetTexture(bool force) {
doDelete = false;
}
}
#endif
}
}