(sinc resampler) Simplifications - call besseli0 directly instead of

through kaiser_window_function wrapper function - get rid of some unnecessary
sqrtf calls and such, some other small opts
This commit is contained in:
LibretroAdmin 2022-08-02 11:29:05 +02:00
parent 605c4608d9
commit a974cef001
3 changed files with 103 additions and 88 deletions

View File

@ -314,12 +314,12 @@ static void fft_init(fft_t *fft)
window = (GLushort*)calloc(fft->size, sizeof(GLushort));
window_mod = 1.0 / kaiser_window_function(0.0, KAISER_BETA);
window_mod = 1.0 / besseli0(KAISER_BETA);
for (i = 0; i < fft->size; i++)
{
double phase = (double)(i - (int)(fft->size) / 2) / ((int)(fft->size) / 2);
double w = kaiser_window_function(phase, KAISER_BETA);
double w = besseli0(KAISER_BETA * sqrtf(1 - phase * phase));
window[i] = round(0xffff * w * window_mod);
}
glBindTexture(GL_TEXTURE_2D, fft->window_tex);

View File

@ -105,36 +105,36 @@ static void resampler_sinc_process_neon_kaiser(void *re_, struct resampler_data
float *output = data->data_out;
size_t frames = data->input_frames;
size_t out_frames = 0;
unsigned taps = resamp->taps;
while (frames)
{
while (frames && resamp->time >= phases)
{
/* Push in reverse to make filter more obvious. */
if (!resamp->ptr)
resamp->ptr = resamp->taps;
resamp->ptr = taps;
resamp->ptr--;
resamp->buffer_l[resamp->ptr + resamp->taps] =
resamp->buffer_l[resamp->ptr] = *input++;
resamp->buffer_l[resamp->ptr + taps] =
resamp->buffer_l[resamp->ptr] = *input++;
resamp->buffer_r[resamp->ptr + resamp->taps] =
resamp->buffer_r[resamp->ptr] = *input++;
resamp->buffer_r[resamp->ptr + taps] =
resamp->buffer_r[resamp->ptr] = *input++;
resamp->time -= phases;
resamp->time -= phases;
frames--;
}
{
const float *buffer_l = resamp->buffer_l + resamp->ptr;
const float *buffer_r = resamp->buffer_r + resamp->ptr;
unsigned taps = resamp->taps;
while (resamp->time < phases)
{
unsigned phase = resamp->time >> resamp->subphase_bits;
const float *phase_table = resamp->phase_table + phase * taps * 2;
const float *delta_table = phase_table + taps;
float32x4_t delta = vdupq_n_f32((resamp->time & resamp->subphase_mask) * resamp->subphase_mod);
unsigned i;
int i;
float32x4_t p1 = {0, 0, 0, 0}, p2 = {0, 0, 0, 0};
float32x2_t p3, p4;
@ -178,29 +178,30 @@ static void resampler_sinc_process_neon(void *re_, struct resampler_data *data)
float *output = data->data_out;
size_t frames = data->input_frames;
size_t out_frames = 0;
unsigned taps = resamp->taps;
while (frames)
{
while (frames && resamp->time >= phases)
{
/* Push in reverse to make filter more obvious. */
if (!resamp->ptr)
resamp->ptr = resamp->taps;
resamp->ptr = taps;
resamp->ptr--;
resamp->buffer_l[resamp->ptr + resamp->taps] =
resamp->buffer_l[resamp->ptr] = *input++;
resamp->buffer_l[resamp->ptr + taps] =
resamp->buffer_l[resamp->ptr] = *input++;
resamp->buffer_r[resamp->ptr + resamp->taps] =
resamp->buffer_r[resamp->ptr] = *input++;
resamp->buffer_r[resamp->ptr + taps] =
resamp->buffer_r[resamp->ptr] = *input++;
resamp->time -= phases;
resamp->time -= phases;
frames--;
}
{
const float *buffer_l = resamp->buffer_l + resamp->ptr;
const float *buffer_r = resamp->buffer_r + resamp->ptr;
unsigned taps = resamp->taps;
while (resamp->time < phases)
{
unsigned phase = resamp->time >> resamp->subphase_bits;
@ -208,7 +209,7 @@ static void resampler_sinc_process_neon(void *re_, struct resampler_data *data)
#ifdef HAVE_ARM_NEON_ASM_OPTIMIZATIONS
process_sinc_neon_asm(output, buffer_l, buffer_r, phase_table, taps);
#else
unsigned i;
int i;
float32x4_t p1 = {0, 0, 0, 0}, p2 = {0, 0, 0, 0};
float32x2_t p3, p4;
@ -250,6 +251,7 @@ static void resampler_sinc_process_avx_kaiser(void *re_, struct resampler_data *
float *output = data->data_out;
size_t frames = data->input_frames;
size_t out_frames = 0;
unsigned taps = resamp->taps;
{
while (frames)
@ -258,14 +260,14 @@ static void resampler_sinc_process_avx_kaiser(void *re_, struct resampler_data *
{
/* Push in reverse to make filter more obvious. */
if (!resamp->ptr)
resamp->ptr = resamp->taps;
resamp->ptr = taps;
resamp->ptr--;
resamp->buffer_l[resamp->ptr + resamp->taps] =
resamp->buffer_l[resamp->ptr] = *input++;
resamp->buffer_l[resamp->ptr + taps] =
resamp->buffer_l[resamp->ptr] = *input++;
resamp->buffer_r[resamp->ptr + resamp->taps] =
resamp->buffer_r[resamp->ptr] = *input++;
resamp->buffer_r[resamp->ptr + taps] =
resamp->buffer_r[resamp->ptr] = *input++;
resamp->time -= phases;
frames--;
@ -274,10 +276,9 @@ static void resampler_sinc_process_avx_kaiser(void *re_, struct resampler_data *
{
const float *buffer_l = resamp->buffer_l + resamp->ptr;
const float *buffer_r = resamp->buffer_r + resamp->ptr;
unsigned taps = resamp->taps;
while (resamp->time < phases)
{
unsigned i;
int i;
unsigned phase = resamp->time >> resamp->subphase_bits;
float *phase_table = resamp->phase_table + phase * taps * 2;
@ -335,6 +336,7 @@ static void resampler_sinc_process_avx(void *re_, struct resampler_data *data)
float *output = data->data_out;
size_t frames = data->input_frames;
size_t out_frames = 0;
unsigned taps = resamp->taps;
{
while (frames)
@ -343,26 +345,25 @@ static void resampler_sinc_process_avx(void *re_, struct resampler_data *data)
{
/* Push in reverse to make filter more obvious. */
if (!resamp->ptr)
resamp->ptr = resamp->taps;
resamp->ptr = taps;
resamp->ptr--;
resamp->buffer_l[resamp->ptr + resamp->taps] =
resamp->buffer_l[resamp->ptr] = *input++;
resamp->buffer_l[resamp->ptr + taps] =
resamp->buffer_l[resamp->ptr] = *input++;
resamp->buffer_r[resamp->ptr + resamp->taps] =
resamp->buffer_r[resamp->ptr] = *input++;
resamp->buffer_r[resamp->ptr + taps] =
resamp->buffer_r[resamp->ptr] = *input++;
resamp->time -= phases;
resamp->time -= phases;
frames--;
}
{
const float *buffer_l = resamp->buffer_l + resamp->ptr;
const float *buffer_r = resamp->buffer_r + resamp->ptr;
unsigned taps = resamp->taps;
while (resamp->time < phases)
{
unsigned i;
int i;
__m256 delta;
unsigned phase = resamp->time >> resamp->subphase_bits;
float *phase_table = resamp->phase_table + phase * taps;
@ -417,6 +418,7 @@ static void resampler_sinc_process_sse_kaiser(void *re_, struct resampler_data *
float *output = data->data_out;
size_t frames = data->input_frames;
size_t out_frames = 0;
unsigned taps = resamp->taps;
{
while (frames)
@ -425,26 +427,25 @@ static void resampler_sinc_process_sse_kaiser(void *re_, struct resampler_data *
{
/* Push in reverse to make filter more obvious. */
if (!resamp->ptr)
resamp->ptr = resamp->taps;
resamp->ptr = taps;
resamp->ptr--;
resamp->buffer_l[resamp->ptr + resamp->taps] =
resamp->buffer_l[resamp->ptr] = *input++;
resamp->buffer_l[resamp->ptr + taps] =
resamp->buffer_l[resamp->ptr] = *input++;
resamp->buffer_r[resamp->ptr + resamp->taps] =
resamp->buffer_r[resamp->ptr] = *input++;
resamp->buffer_r[resamp->ptr + taps] =
resamp->buffer_r[resamp->ptr] = *input++;
resamp->time -= phases;
resamp->time -= phases;
frames--;
}
{
const float *buffer_l = resamp->buffer_l + resamp->ptr;
const float *buffer_r = resamp->buffer_r + resamp->ptr;
unsigned taps = resamp->taps;
while (resamp->time < phases)
{
unsigned i;
int i;
__m128 sum;
unsigned phase = resamp->time >> resamp->subphase_bits;
float *phase_table = resamp->phase_table + phase * taps * 2;
@ -512,6 +513,7 @@ static void resampler_sinc_process_sse(void *re_, struct resampler_data *data)
float *output = data->data_out;
size_t frames = data->input_frames;
size_t out_frames = 0;
unsigned taps = resamp->taps;
{
while (frames)
@ -520,26 +522,25 @@ static void resampler_sinc_process_sse(void *re_, struct resampler_data *data)
{
/* Push in reverse to make filter more obvious. */
if (!resamp->ptr)
resamp->ptr = resamp->taps;
resamp->ptr = taps;
resamp->ptr--;
resamp->buffer_l[resamp->ptr + resamp->taps] =
resamp->buffer_l[resamp->ptr] = *input++;
resamp->buffer_l[resamp->ptr + taps] =
resamp->buffer_l[resamp->ptr] = *input++;
resamp->buffer_r[resamp->ptr + resamp->taps] =
resamp->buffer_r[resamp->ptr] = *input++;
resamp->buffer_r[resamp->ptr + taps] =
resamp->buffer_r[resamp->ptr] = *input++;
resamp->time -= phases;
resamp->time -= phases;
frames--;
}
{
const float *buffer_l = resamp->buffer_l + resamp->ptr;
const float *buffer_r = resamp->buffer_r + resamp->ptr;
unsigned taps = resamp->taps;
while (resamp->time < phases)
{
unsigned i;
int i;
__m128 sum;
unsigned phase = resamp->time >> resamp->subphase_bits;
float *phase_table = resamp->phase_table + phase * taps;
@ -603,6 +604,7 @@ static void resampler_sinc_process_c_kaiser(void *re_, struct resampler_data *da
float *output = data->data_out;
size_t frames = data->input_frames;
size_t out_frames = 0;
unsigned taps = resamp->taps;
{
while (frames)
@ -611,26 +613,25 @@ static void resampler_sinc_process_c_kaiser(void *re_, struct resampler_data *da
{
/* Push in reverse to make filter more obvious. */
if (!resamp->ptr)
resamp->ptr = resamp->taps;
resamp->ptr = taps;
resamp->ptr--;
resamp->buffer_l[resamp->ptr + resamp->taps] =
resamp->buffer_l[resamp->ptr] = *input++;
resamp->buffer_l[resamp->ptr + taps] =
resamp->buffer_l[resamp->ptr] = *input++;
resamp->buffer_r[resamp->ptr + resamp->taps] =
resamp->buffer_r[resamp->ptr] = *input++;
resamp->buffer_r[resamp->ptr + taps] =
resamp->buffer_r[resamp->ptr] = *input++;
resamp->time -= phases;
resamp->time -= phases;
frames--;
}
{
const float *buffer_l = resamp->buffer_l + resamp->ptr;
const float *buffer_r = resamp->buffer_r + resamp->ptr;
unsigned taps = resamp->taps;
while (resamp->time < phases)
{
unsigned i;
int i;
float sum_l = 0.0f;
float sum_r = 0.0f;
unsigned phase = resamp->time >> resamp->subphase_bits;
@ -672,6 +673,7 @@ static void resampler_sinc_process_c(void *re_, struct resampler_data *data)
float *output = data->data_out;
size_t frames = data->input_frames;
size_t out_frames = 0;
unsigned taps = resamp->taps;
{
while (frames)
@ -680,26 +682,25 @@ static void resampler_sinc_process_c(void *re_, struct resampler_data *data)
{
/* Push in reverse to make filter more obvious. */
if (!resamp->ptr)
resamp->ptr = resamp->taps;
resamp->ptr = taps;
resamp->ptr--;
resamp->buffer_l[resamp->ptr + resamp->taps] =
resamp->buffer_l[resamp->ptr] = *input++;
resamp->buffer_l[resamp->ptr + taps] =
resamp->buffer_l[resamp->ptr] = *input++;
resamp->buffer_r[resamp->ptr + resamp->taps] =
resamp->buffer_r[resamp->ptr] = *input++;
resamp->buffer_r[resamp->ptr + taps] =
resamp->buffer_r[resamp->ptr] = *input++;
resamp->time -= phases;
resamp->time -= phases;
frames--;
}
{
const float *buffer_l = resamp->buffer_l + resamp->ptr;
const float *buffer_r = resamp->buffer_r + resamp->ptr;
unsigned taps = resamp->taps;
while (resamp->time < phases)
{
unsigned i;
int i;
float sum_l = 0.0f;
float sum_r = 0.0f;
unsigned phase = resamp->time >> resamp->subphase_bits;
@ -741,7 +742,9 @@ static void sinc_init_table_kaiser(rarch_sinc_resampler_t *resamp,
float *phase_table, int phases, int taps, bool calculate_delta)
{
int i, j;
double window_mod = kaiser_window_function(0.0, resamp->kaiser_beta); /* Need to normalize w(0) to 1.0. */
/* Kaiser window function - need to normalize w(0) to 1.0f */
float kaiser_beta = resamp->kaiser_beta;
double window_mod = besseli0(kaiser_beta);
int stride = calculate_delta ? 2 : 1;
double sidelobes = taps / 2.0;
@ -750,13 +753,13 @@ static void sinc_init_table_kaiser(rarch_sinc_resampler_t *resamp,
for (j = 0; j < taps; j++)
{
double sinc_phase;
float val;
int n = j * phases + i;
double window_phase = (double)n / (phases * taps); /* [0, 1). */
window_phase = 2.0 * window_phase - 1.0; /* [-1, 1) */
sinc_phase = sidelobes * window_phase;
val = cutoff * sinc(M_PI * sinc_phase * cutoff) *
kaiser_window_function(window_phase, resamp->kaiser_beta) / window_mod;
float val = cutoff * sinc(M_PI * sinc_phase * cutoff) *
besseli0(kaiser_beta * sqrtf(1 - window_phase * window_phase))
/ window_mod;
phase_table[i * stride * taps + j] = val;
}
}
@ -787,7 +790,8 @@ static void sinc_init_table_kaiser(rarch_sinc_resampler_t *resamp,
sinc_phase = sidelobes * window_phase;
val = cutoff * sinc(M_PI * sinc_phase * cutoff) *
kaiser_window_function(window_phase, resamp->kaiser_beta) / window_mod;
besseli0(resamp->kaiser_beta * sqrtf(1 - window_phase *
window_phase)) / window_mod;
delta = (val - phase_table[phase * stride * taps + j]);
phase_table[(phase * stride + 1) * taps + j] = delta;
}
@ -799,7 +803,8 @@ static void sinc_init_table_lanczos(
float *phase_table, int phases, int taps, bool calculate_delta)
{
int i, j;
double window_mod = lanzcos_window_function(0.0); /* Need to normalize w(0) to 1.0. */
/* Lanczos window function - need to normalize w(0) to 1.0f */
double window_mod = 1.0;
int stride = calculate_delta ? 2 : 1;
double sidelobes = taps / 2.0;
@ -814,7 +819,7 @@ static void sinc_init_table_lanczos(
window_phase = 2.0 * window_phase - 1.0; /* [-1, 1) */
sinc_phase = sidelobes * window_phase;
val = cutoff * sinc(M_PI * sinc_phase * cutoff) *
lanzcos_window_function(window_phase) / window_mod;
sinc(M_PI * window_phase) / window_mod;
phase_table[i * stride * taps + j] = val;
}
}
@ -845,7 +850,7 @@ static void sinc_init_table_lanczos(
sinc_phase = sidelobes * window_phase;
val = cutoff * sinc(M_PI * sinc_phase * cutoff) *
lanzcos_window_function(window_phase) / window_mod;
sinc(M_PI * window_phase) / window_mod;
delta = (val - phase_table[phase * stride * taps + j]);
phase_table[(phase * stride + 1) * taps + j] = delta;
}
@ -921,7 +926,7 @@ static void *resampler_sinc_new(const struct resampler_config *config,
* taps accordingly to keep same stopband attenuation. */
if (bandwidth_mod < 1.0)
{
cutoff *= bandwidth_mod;
cutoff *= bandwidth_mod;
re->taps = (unsigned)ceil(re->taps / bandwidth_mod);
}

View File

@ -31,6 +31,11 @@
#include <retro_inline.h>
#include <retro_math.h>
/**
* sinc:
*
* Pure function.
**/
static INLINE double sinc(double val)
{
if (fabs(val) < 0.00001)
@ -38,7 +43,12 @@ static INLINE double sinc(double val)
return sin(val) / val;
}
/* Paeth prediction filter. */
/**
* paeth:
*
* Pure function.
* Paeth prediction filter.
**/
static INLINE int paeth(int a, int b, int c)
{
int p = a + b - c;
@ -53,11 +63,17 @@ static INLINE int paeth(int a, int b, int c)
return c;
}
/* Modified Bessel function of first order.
* Check Wiki for mathematical definition ... */
/**
* besseli0:
*
* Pure function.
*
* Modified Bessel function of first order.
* Check Wiki for mathematical definition ...
**/
static INLINE double besseli0(double x)
{
unsigned i;
int i;
double sum = 0.0;
double factorial = 1.0;
double factorial_mult = 0.0;
@ -69,12 +85,11 @@ static INLINE double besseli0(double x)
* Luckily, it converges rather fast. */
for (i = 0; i < 18; i++)
{
sum += x_pow * two_div_pow / (factorial * factorial);
sum += x_pow * two_div_pow / (factorial * factorial);
factorial_mult += 1.0;
x_pow *= x_sqr;
two_div_pow *= 0.25;
factorial *= factorial_mult;
x_pow *= x_sqr;
two_div_pow *= 0.25;
factorial *= factorial_mult;
}
return sum;
@ -85,9 +100,4 @@ static INLINE double kaiser_window_function(double index, double beta)
return besseli0(beta * sqrtf(1 - index * index));
}
static INLINE double lanzcos_window_function(double index)
{
return sinc(M_PI * index);
}
#endif