Simplify sinc NEON code

This commit is contained in:
twinaphex 2021-09-04 00:35:45 +02:00
parent 9a5f4602cc
commit 6d1c0e96a3

View File

@ -88,21 +88,22 @@ typedef struct rarch_sinc_resampler
#if (defined(__ARM_NEON__) || defined(HAVE_NEON))
#ifdef HAVE_ARM_NEON_ASM_OPTIMIZATIONS
/* Assumes that taps >= 8, and that taps is a multiple of 8. */
void process_sinc_neon_asm(float *out, const float *left,
const float *right, const float *coeff, unsigned taps);
#else
#include <arm_neon.h>
#endif
/* Assumes that taps >= 8, and that taps is a multiple of 8. */
static void resampler_sinc_process_neon(void *re_, struct resampler_data *data)
{
rarch_sinc_resampler_t *resamp = (rarch_sinc_resampler_t*)re_;
unsigned phases = 1 << (resamp->phase_bits + resamp->subphase_bits);
uint32_t ratio = phases / data->ratio;
const float *input = data->data_in;
float *output = data->data_out;
size_t frames = data->input_frames;
size_t out_frames = 0;
while (frames)
{
while (frames && resamp->time >= phases)
@ -130,62 +131,10 @@ static void resampler_sinc_process_neon(void *re_, struct resampler_data *data)
{
unsigned phase = resamp->time >> resamp->subphase_bits;
const float *phase_table = resamp->phase_table + phase * taps;
#ifdef HAVE_ARM_NEON_ASM_OPTIMIZATIONS
process_sinc_neon_asm(output, buffer_l, buffer_r, phase_table, taps);
output += 2;
out_frames++;
resamp->time += ratio;
}
}
}
data->output_frames = out_frames;
}
#else
#include <arm_neon.h>
/* Assumes that taps >= 8, and that taps is a multiple of 8. */
static void resampler_sinc_process_neon_intrin(void *re_, struct resampler_data *data)
{
rarch_sinc_resampler_t *resamp = (rarch_sinc_resampler_t*)re_;
unsigned phases = 1 << (resamp->phase_bits + resamp->subphase_bits);
uint32_t ratio = phases / data->ratio;
const float *input = data->data_in;
float *output = data->data_out;
size_t frames = data->input_frames;
size_t out_frames = 0;
while (frames)
{
while (resamp->time >= phases)
{
/* Push in reverse to make filter more obvious. */
if (!resamp->ptr)
resamp->ptr = resamp->taps;
resamp->ptr--;
resamp->buffer_l[resamp->ptr + resamp->taps] =
resamp->buffer_l[resamp->ptr] = *input++;
resamp->buffer_r[resamp->ptr + resamp->taps] =
resamp->buffer_r[resamp->ptr] = *input++;
resamp->time -= phases;
frames--;
}
{
const float *buffer_l = resamp->buffer_l + resamp->ptr;
const float *buffer_r = resamp->buffer_r + resamp->ptr;
unsigned taps = resamp->taps;
while (resamp->time < phases)
{
int i;
unsigned phase = resamp->time >> resamp->subphase_bits;
const float *phase_table = resamp->phase_table + phase * taps;
unsigned i;
float32x4_t p1 = {0, 0, 0, 0}, p2 = {0, 0, 0, 0};
float32x2_t p3, p4;
@ -204,8 +153,7 @@ static void resampler_sinc_process_neon_intrin(void *re_, struct resampler_data
p3 = vadd_f32(vget_low_f32(p1), vget_high_f32(p1));
p4 = vadd_f32(vget_low_f32(p2), vget_high_f32(p2));
vst1_f32(output, vpadd_f32(p3, p4));
#endif
output += 2;
out_frames++;
resamp->time += ratio;
@ -217,8 +165,6 @@ static void resampler_sinc_process_neon_intrin(void *re_, struct resampler_data
}
#endif
#endif
#if defined(__AVX__)
static void resampler_sinc_process_avx_kaiser(void *re_, struct resampler_data *data)
{
@ -971,11 +917,7 @@ static void *resampler_sinc_new(const struct resampler_config *config,
else if (mask & RESAMPLER_SIMD_NEON && window_type != SINC_WINDOW_KAISER)
{
#if (defined(__ARM_NEON__) || defined(HAVE_NEON))
#ifdef HAVE_ARM_NEON_ASM_OPTIMIZATIONS
sinc_resampler.process = resampler_sinc_process_neon;
#else
sinc_resampler.process = resampler_sinc_process_neon_intrin;
#endif
#endif
}