mirror of
https://github.com/libretro/ppsspp.git
synced 2024-11-25 09:09:49 +00:00
Rewrite QuickTexHashNEON directly in asm.
Seems like gcc couldn't optimize it to this.
This commit is contained in:
parent
3f57f1f447
commit
2dfa2379f4
@ -29,6 +29,7 @@ u32 QuickTexHashNEON(const void *checkp, u32 size) {
|
||||
__builtin_prefetch(checkp, 0, 0);
|
||||
|
||||
if (((intptr_t)checkp & 0xf) == 0 && (size & 0x3f) == 0) {
|
||||
#if 0
|
||||
uint32x4_t cursor = vdupq_n_u32(0);
|
||||
uint32x4_t cursor2 = vld1q_u32((const u32 *)QuickTexHashInitial);
|
||||
uint32x4_t update = vdupq_n_u32(0x24552455U);
|
||||
@ -46,6 +47,61 @@ u32 QuickTexHashNEON(const void *checkp, u32 size) {
|
||||
|
||||
cursor = vaddq_u32(cursor, cursor2);
|
||||
check = vgetq_lane_u32(cursor, 0) + vgetq_lane_u32(cursor, 1) + vgetq_lane_u32(cursor, 2) + vgetq_lane_u32(cursor, 3);
|
||||
#else
|
||||
// d0/d1 (q0) - cursor
|
||||
// d2/d3 (q1) - cursor2
|
||||
// d4/d5 (q2) - update
|
||||
// d6-d13 (q3-q6) - memory transfer
|
||||
asm volatile (
|
||||
// Initialize cursor.
|
||||
"vmov.i32 q0, #0\n"
|
||||
|
||||
// Initialize cursor2.
|
||||
"movw r0, 0x0001\n"
|
||||
"movt r0, 0x0083\n"
|
||||
"movw r1, 0x4309\n"
|
||||
"movt r1, 0x4d9b\n"
|
||||
"vmov d2, r0, r1\n"
|
||||
"movw r0, 0xb651\n"
|
||||
"movt r0, 0x4b73\n"
|
||||
"movw r1, 0x9bd9\n"
|
||||
"movt r1, 0xc00b\n"
|
||||
"vmov d2, r0, r1\n"
|
||||
|
||||
// Initialize update.
|
||||
"movw r0, 0x2455\n"
|
||||
"movt r0, 0x2455\n"
|
||||
"mov r1, r0\n"
|
||||
"vmov d4, r0, r1\n"
|
||||
"vmov d5, r0, r1\n"
|
||||
|
||||
// This is where we end.
|
||||
"add r0, %1, %2\n"
|
||||
|
||||
// Okay, do the memory hashing.
|
||||
"QuickTexHashNEON_next:\n"
|
||||
"pld [%2, #0xc0]\n"
|
||||
"vldmia %2!, {d6-d13}\n"
|
||||
"vmla.i32 q0, q1, q3\n"
|
||||
"veor.i32 q0, q0, q4\n"
|
||||
"vmul.i32 q6, q6, q1\n"
|
||||
"cmp %2, r0\n"
|
||||
"vadd.i32 q0, q0, q5\n"
|
||||
"veor.i32 q0, q0, q6\n"
|
||||
"vadd.i32 q1, q1, q2\n"
|
||||
"blo QuickTexHashNEON_next\n"
|
||||
|
||||
// Now let's get the result.
|
||||
"vadd.i32 q0, q0, q1\n"
|
||||
"vadd.i32 d0, d0, d1\n"
|
||||
"vmov r0, r1, s0, s1\n"
|
||||
"add %0, r0, r1\n"
|
||||
|
||||
: "=r"(check)
|
||||
: "r"(size), "r"(checkp)
|
||||
: "r0", "r1"
|
||||
);
|
||||
#endif
|
||||
} else {
|
||||
const u32 size_u32 = size / 4;
|
||||
const u32 *p = (const u32 *)checkp;
|
||||
|
Loading…
Reference in New Issue
Block a user