Mix add and xor in the texcache for better results.

This commit is contained in:
Unknown W. Brackets 2013-05-11 14:20:53 -07:00
parent dd1077d9cd
commit 724495cbb3

View File

@ -756,11 +756,12 @@ static inline u32 QuickTexHash(u32 addr, int bufw, int w, int h, u32 format) {
#ifdef _M_SSE
// Make sure both the size and start are aligned, OR will get either.
if ((((u32)(intptr_t)checkp | sizeInRAM) & 0xf) == 0) {
if ((((u32)(intptr_t)checkp | sizeInRAM) & 0x1f) == 0) {
__m128i cursor = _mm_set1_epi32(0);
const __m128i *p = (const __m128i *)checkp;
for (u32 i = 0; i < sizeInRAM / 16; ++i) {
for (u32 i = 0; i < sizeInRAM / 16; i += 2) {
cursor = _mm_add_epi32(cursor, _mm_load_si128(&p[i]));
cursor = _mm_xor_si128(cursor, _mm_load_si128(&p[i + 1]));
}
// Add the four parts into the low i32.
cursor = _mm_add_epi32(cursor, _mm_srli_si128(cursor, 8));
@ -771,8 +772,10 @@ static inline u32 QuickTexHash(u32 addr, int bufw, int w, int h, u32 format) {
// TODO: ARM NEON implementation (using CPUDetect to be sure it has NEON.)
{
#endif
for (u32 i = 0; i < sizeInRAM / 4; ++i)
for (u32 i = 0; i < sizeInRAM / 8; ++i) {
check += *checkp++;
check ^= *checkp++;
}
}
return check;