Make number of sinc taps variable in NEON.

This commit is contained in:
Themaister 2013-02-08 14:27:51 +01:00
parent a38293caad
commit e19555d394
3 changed files with 27 additions and 27 deletions

View File

@ -224,15 +224,13 @@ static void process_sinc(rarch_sinc_resampler_t *resamp, float *out_buffer)
}
#elif defined(HAVE_NEON)
#if TAPS != 16
#error "NEON sinc is for now only implemented with 16 taps. Cannot continue."
#endif
// Need to make this function pointer as Android doesn't have built-in targets
// for NEON and plain ARMv7a.
static void (*process_sinc_func)(rarch_sinc_resampler_t *resamp, float *out_buffer);
void process_sinc_neon_asm(float *out, const float *left, const float *right, const float *coeff);
// Assumes that taps >= 8, and that taps is a multiple of 8.
void process_sinc_neon_asm(float *out, const float *left, const float *right, const float *coeff, unsigned taps);
static void process_sinc_neon(rarch_sinc_resampler_t *resamp, float *out_buffer)
{
const float *buffer_l = resamp->buffer_l + resamp->ptr;
@ -241,7 +239,7 @@ static void process_sinc_neon(rarch_sinc_resampler_t *resamp, float *out_buffer)
unsigned phase = resamp->time >> SUBPHASE_BITS;
const float *phase_table = resamp->phase_table[phase];
process_sinc_neon_asm(out_buffer, buffer_l, buffer_r, phase_table);
process_sinc_neon_asm(out_buffer, buffer_l, buffer_r, phase_table, TAPS);
}
#else // Plain ol' C99
#define process_sinc_func process_sinc_C

View File

@ -1,5 +1,5 @@
/* RetroArch - A frontend for libretro.
* Copyright (C) 2010-2012 - Hans-Kristian Arntzen
* Copyright (C) 2010-2013 - Hans-Kristian Arntzen
*
* RetroArch is free software: you can redistribute it and/or modify it under the terms
* of the GNU General Public License as published by the Free Software Found-
@ -16,37 +16,39 @@
.arm
.align 4
.global process_sinc_neon_asm
# void process_sinc_neon(float *out, const float *left, const float *right, const float *coeff)
# Hardcoded to 16 taps.
# void process_sinc_neon(float *out, const float *left, const float *right, const float *coeff, unsigned taps)
# Assumes taps is >= 8, and a multiple of 8.
process_sinc_neon_asm:
push {r4, lr}
vmov.f32 q0, #0.0
vmov.f32 q8, #0.0
# Taps argument (r4) goes on stack in armeabi.
ldr r4, [sp, #8]
1:
# Left
vld1.f32 {q0-q1}, [r1]!
vld1.f32 {q2-q3}, [r1]!
vld1.f32 {q2-q3}, [r1]!
# Right
vld1.f32 {q8-q9}, [r2]!
vld1.f32 {q10-q11}, [r2]!
# Coeff
vld1.f32 {q12-q13}, [r3, :128]!
vld1.f32 {q14-q15}, [r3, :128]!
# Left
vmul.f32 q0, q0, q12
vmul.f32 q1, q1, q13
vmla.f32 q0, q2, q14
vmla.f32 q1, q3, q15
# Left / Right
vmla.f32 q0, q2, q12
vmla.f32 q8, q10, q12
vmla.f32 q0, q3, q13
vmla.f32 q8, q11, q13
# Right
vmul.f32 q8, q8, q12
vmul.f32 q9, q9, q13
vmla.f32 q8, q10, q14
vmla.f32 q9, q11, q15
subs r4, r4, #8
bne 1b
# Add everything together
vadd.f32 q0, q0, q1
vadd.f32 q8, q8, q9
vadd.f32 d0, d0, d1
vadd.f32 d16, d16, d17
vpadd.f32 d0, d0, d16
vst1.f32 d0, [r0]
bx lr
pop {r4, pc}

View File

@ -1,5 +1,5 @@
/* RetroArch - A frontend for libretro.
* Copyright (C) 2010-2012 - Hans-Kristian Arntzen
* Copyright (C) 2010-2013 - Hans-Kristian Arntzen
*
* RetroArch is free software: you can redistribute it and/or modify it under the terms
* of the GNU General Public License as published by the Free Software Found-