Mix add and xor in the texcache for better results.

2025-02-18 16:09:53 +00:00 · 2013-05-11 14:20:53 -07:00 · 2013-05-11 14:20:53 -07:00 · 724495cbb3
commit 724495cbb3
parent dd1077d9cd
1 changed files with 6 additions and 3 deletions
--- a/GPU/GLES/TextureCache.cpp
+++ b/GPU/GLES/TextureCache.cpp
@ -756,11 +756,12 @@ static inline u32 QuickTexHash(u32 addr, int bufw, int w, int h, u32 format) {

 #ifdef _M_SSE
 	// Make sure both the size and start are aligned, OR will get either.
-	if ((((u32)(intptr_t)checkp | sizeInRAM) & 0xf) == 0) {
+	if ((((u32)(intptr_t)checkp | sizeInRAM) & 0x1f) == 0) {
 		__m128i cursor = _mm_set1_epi32(0);
 		const __m128i *p = (const __m128i *)checkp;
-		for (u32 i = 0; i < sizeInRAM / 16; ++i) {
+		for (u32 i = 0; i < sizeInRAM / 16; i += 2) {
 			cursor = _mm_add_epi32(cursor, _mm_load_si128(&p[i]));
+			cursor = _mm_xor_si128(cursor, _mm_load_si128(&p[i + 1]));
 		}
 		// Add the four parts into the low i32.
 		cursor = _mm_add_epi32(cursor, _mm_srli_si128(cursor, 8));
@ -771,8 +772,10 @@ static inline u32 QuickTexHash(u32 addr, int bufw, int w, int h, u32 format) {
 	// TODO: ARM NEON implementation (using CPUDetect to be sure it has NEON.)
 	{
 #endif
-		for (u32 i = 0; i < sizeInRAM / 4; ++i)
+		for (u32 i = 0; i < sizeInRAM / 8; ++i) {
 			check += *checkp++;
+			check ^= *checkp++;
+		}
 	}

 	return check;