Merge pull request #7029 from unknownbrackets/texhash

Attempt to make the texture hash match(able) - needs testing
This commit is contained in:
Henrik Rydgård 2014-10-30 00:58:00 +01:00
commit a4c8bb03a2
2 changed files with 70 additions and 19 deletions

View File

@ -39,7 +39,7 @@ u32 QuickTexHashSSE2(const void *checkp, u32 size) {
const __m128i *p = (const __m128i *)checkp;
for (u32 i = 0; i < size / 16; i += 4) {
__m128i chunk = _mm_mullo_epi16(_mm_load_si128(&p[i]), cursor2);
cursor = _mm_add_epi32(cursor, chunk);
cursor = _mm_add_epi16(cursor, chunk);
cursor = _mm_xor_si128(cursor, _mm_load_si128(&p[i + 1]));
cursor = _mm_add_epi32(cursor, _mm_load_si128(&p[i + 2]));
chunk = _mm_mullo_epi16(_mm_load_si128(&p[i + 3]), cursor2);
@ -63,6 +63,57 @@ u32 QuickTexHashSSE2(const void *checkp, u32 size) {
}
#endif
u32 QuickTexHashNonSSE(const void *checkp, u32 size) {
u32 check = 0;
if (((intptr_t)checkp & 0xf) == 0 && (size & 0x3f) == 0) {
static const u16 cursor2_initial[8] = {0xc00bU, 0x9bd9U, 0x4b73U, 0xb651U, 0x4d9bU, 0x4309U, 0x0083U, 0x0001U};
union u32x4_u16x8 {
u32 x32[4];
u16 x16[8];
};
u32x4_u16x8 cursor = {0, 0, 0, 0};
u32x4_u16x8 cursor2;
static const u16 update[8] = {0x2455U, 0x2455U, 0x2455U, 0x2455U, 0x2455U, 0x2455U, 0x2455U, 0x2455U};
for (u32 j = 0; j < 8; ++j) {
cursor2.x16[j] = cursor2_initial[j];
}
const u32x4_u16x8 *p = (const u32x4_u16x8 *)checkp;
for (u32 i = 0; i < size / 16; i += 4) {
for (u32 j = 0; j < 8; ++j) {
const u16 temp = p[i + 0].x16[j] * cursor2.x16[j];
cursor.x16[j] += temp;
}
for (u32 j = 0; j < 4; ++j) {
cursor.x32[j] ^= p[i + 1].x32[j];
cursor.x32[j] += p[i + 2].x32[j];
}
for (u32 j = 0; j < 8; ++j) {
const u16 temp = p[i + 3].x16[j] * cursor2.x16[j];
cursor.x16[j] ^= temp;
}
for (u32 j = 0; j < 8; ++j) {
cursor2.x16[j] += update[j];
}
}
for (u32 j = 0; j < 4; ++j) {
cursor.x32[j] += cursor2.x32[j];
}
check = cursor.x32[0] + cursor.x32[1] + cursor.x32[2] + cursor.x32[3];
} else {
const u32 *p = (const u32 *)checkp;
for (u32 i = 0; i < size / 8; ++i) {
check += *p++;
check ^= *p++;
}
}
return check;
}
static u32 QuickTexHashBasic(const void *checkp, u32 size) {
#if defined(ARM) && defined(__GNUC__)
__builtin_prefetch(checkp, 0, 0);

View File

@ -22,7 +22,7 @@
#error Should not be compiled on non-ARM.
#endif
static const u16 MEMORY_ALIGNED16(QuickTexHashInitial[8]) = {0x0001U, 0x0083U, 0x4309U, 0x4d9bU, 0xb651U, 0x4b73U, 0x9bd9U, 0xc00bU};
static const u16 MEMORY_ALIGNED16(QuickTexHashInitial[8]) = {0xc00bU, 0x9bd9U, 0x4b73U, 0xb651U, 0x4d9bU, 0x4309U, 0x0083U, 0x0001U};
u32 QuickTexHashNEON(const void *checkp, u32 size) {
u32 check = 0;
@ -31,21 +31,21 @@ u32 QuickTexHashNEON(const void *checkp, u32 size) {
if (((intptr_t)checkp & 0xf) == 0 && (size & 0x3f) == 0) {
#ifdef IOS
uint32x4_t cursor = vdupq_n_u32(0);
uint32x4_t cursor2 = vld1q_u32((const u32 *)QuickTexHashInitial);
uint32x4_t update = vdupq_n_u32(0x24552455U);
uint16x8_t cursor2 = vld1q_u16(QuickTexHashInitial);
uint16x8_t update = vdupq_n_u16(0x2455U);
const u32 *p = (const u32 *)checkp;
for (u32 i = 0; i < size / 16; i += 4) {
cursor = vmlaq_u32(cursor, vld1q_u32(&p[4 * 0]), cursor2);
cursor = vreinterpretq_u32_u16(vmlaq_u16(vreinterpretq_u16_u32(cursor), vreinterpretq_u16_u32(vld1q_u32(&p[4 * 0])), cursor2));
cursor = veorq_u32(cursor, vld1q_u32(&p[4 * 1]));
cursor = vaddq_u32(cursor, vld1q_u32(&p[4 * 2]));
cursor = veorq_u32(cursor, vmulq_u32(vld1q_u32(&p[4 * 3]), cursor2));
cursor2 = vaddq_u32(cursor2, update);
cursor = veorq_u32(cursor, vreinterpretq_u32_u16(vmulq_u16(vreinterpretq_u16_u32(vld1q_u32(&p[4 * 3])), cursor2)));
cursor2 = vaddq_u16(cursor2, update);
p += 4 * 4;
}
cursor = vaddq_u32(cursor, cursor2);
cursor = vaddq_u32(cursor, vreinterpretq_u32_u16(cursor2));
check = vgetq_lane_u32(cursor, 0) + vgetq_lane_u32(cursor, 1) + vgetq_lane_u32(cursor, 2) + vgetq_lane_u32(cursor, 3);
#else
// TODO: Why does this crash on iOS, but only certain devices?
@ -60,15 +60,15 @@ u32 QuickTexHashNEON(const void *checkp, u32 size) {
"vmov.i32 q0, #0\n"
// Initialize cursor2.
"movw r0, 0x0001\n"
"movt r0, 0x0083\n"
"movw r1, 0x4309\n"
"movt r1, 0x4d9b\n"
"movw r0, 0xc00b\n"
"movt r0, 0x9bd9\n"
"movw r1, 0x4b73\n"
"movt r1, 0xb651\n"
"vmov d2, r0, r1\n"
"movw r0, 0xb651\n"
"movt r0, 0x4b73\n"
"movw r1, 0x9bd9\n"
"movt r1, 0xc00b\n"
"movw r0, 0x4d9b\n"
"movt r0, 0x4309\n"
"movw r1, 0x0083\n"
"movt r1, 0x0001\n"
"vmov d3, r0, r1\n"
// Initialize update.
@ -82,12 +82,12 @@ u32 QuickTexHashNEON(const void *checkp, u32 size) {
"QuickTexHashNEON_next:\n"
"pld [%2, #0xc0]\n"
"vldmia %2!, {d16-d23}\n"
"vmla.i32 q0, q1, q8\n"
"vmul.i32 q11, q11, q1\n"
"vmla.i16 q0, q1, q8\n"
"vmul.i16 q11, q11, q1\n"
"veor.i32 q0, q0, q9\n"
"cmp %2, r0\n"
"vadd.i32 q0, q0, q10\n"
"vadd.i32 q1, q1, q2\n"
"vadd.i16 q1, q1, q2\n"
"veor.i32 q0, q0, q11\n"
"blo QuickTexHashNEON_next\n"