mirror of
https://github.com/libretro/RetroArch.git
synced 2025-02-21 10:11:18 +00:00
(sinc resampler) Simplifications - call besseli0 directly instead of
through kaiser_window_function wrapper function - get rid of some unnecessary sqrtf calls and such, some other small opts
This commit is contained in:
parent
605c4608d9
commit
a974cef001
@ -314,12 +314,12 @@ static void fft_init(fft_t *fft)
|
||||
|
||||
window = (GLushort*)calloc(fft->size, sizeof(GLushort));
|
||||
|
||||
window_mod = 1.0 / kaiser_window_function(0.0, KAISER_BETA);
|
||||
window_mod = 1.0 / besseli0(KAISER_BETA);
|
||||
|
||||
for (i = 0; i < fft->size; i++)
|
||||
{
|
||||
double phase = (double)(i - (int)(fft->size) / 2) / ((int)(fft->size) / 2);
|
||||
double w = kaiser_window_function(phase, KAISER_BETA);
|
||||
double w = besseli0(KAISER_BETA * sqrtf(1 - phase * phase));
|
||||
window[i] = round(0xffff * w * window_mod);
|
||||
}
|
||||
glBindTexture(GL_TEXTURE_2D, fft->window_tex);
|
||||
|
@ -105,36 +105,36 @@ static void resampler_sinc_process_neon_kaiser(void *re_, struct resampler_data
|
||||
float *output = data->data_out;
|
||||
size_t frames = data->input_frames;
|
||||
size_t out_frames = 0;
|
||||
unsigned taps = resamp->taps;
|
||||
while (frames)
|
||||
{
|
||||
while (frames && resamp->time >= phases)
|
||||
{
|
||||
/* Push in reverse to make filter more obvious. */
|
||||
if (!resamp->ptr)
|
||||
resamp->ptr = resamp->taps;
|
||||
resamp->ptr = taps;
|
||||
resamp->ptr--;
|
||||
|
||||
resamp->buffer_l[resamp->ptr + resamp->taps] =
|
||||
resamp->buffer_l[resamp->ptr] = *input++;
|
||||
resamp->buffer_l[resamp->ptr + taps] =
|
||||
resamp->buffer_l[resamp->ptr] = *input++;
|
||||
|
||||
resamp->buffer_r[resamp->ptr + resamp->taps] =
|
||||
resamp->buffer_r[resamp->ptr] = *input++;
|
||||
resamp->buffer_r[resamp->ptr + taps] =
|
||||
resamp->buffer_r[resamp->ptr] = *input++;
|
||||
|
||||
resamp->time -= phases;
|
||||
resamp->time -= phases;
|
||||
frames--;
|
||||
}
|
||||
|
||||
{
|
||||
const float *buffer_l = resamp->buffer_l + resamp->ptr;
|
||||
const float *buffer_r = resamp->buffer_r + resamp->ptr;
|
||||
unsigned taps = resamp->taps;
|
||||
while (resamp->time < phases)
|
||||
{
|
||||
unsigned phase = resamp->time >> resamp->subphase_bits;
|
||||
const float *phase_table = resamp->phase_table + phase * taps * 2;
|
||||
const float *delta_table = phase_table + taps;
|
||||
float32x4_t delta = vdupq_n_f32((resamp->time & resamp->subphase_mask) * resamp->subphase_mod);
|
||||
unsigned i;
|
||||
int i;
|
||||
float32x4_t p1 = {0, 0, 0, 0}, p2 = {0, 0, 0, 0};
|
||||
float32x2_t p3, p4;
|
||||
|
||||
@ -178,29 +178,30 @@ static void resampler_sinc_process_neon(void *re_, struct resampler_data *data)
|
||||
float *output = data->data_out;
|
||||
size_t frames = data->input_frames;
|
||||
size_t out_frames = 0;
|
||||
unsigned taps = resamp->taps;
|
||||
|
||||
while (frames)
|
||||
{
|
||||
while (frames && resamp->time >= phases)
|
||||
{
|
||||
/* Push in reverse to make filter more obvious. */
|
||||
if (!resamp->ptr)
|
||||
resamp->ptr = resamp->taps;
|
||||
resamp->ptr = taps;
|
||||
resamp->ptr--;
|
||||
|
||||
resamp->buffer_l[resamp->ptr + resamp->taps] =
|
||||
resamp->buffer_l[resamp->ptr] = *input++;
|
||||
resamp->buffer_l[resamp->ptr + taps] =
|
||||
resamp->buffer_l[resamp->ptr] = *input++;
|
||||
|
||||
resamp->buffer_r[resamp->ptr + resamp->taps] =
|
||||
resamp->buffer_r[resamp->ptr] = *input++;
|
||||
resamp->buffer_r[resamp->ptr + taps] =
|
||||
resamp->buffer_r[resamp->ptr] = *input++;
|
||||
|
||||
resamp->time -= phases;
|
||||
resamp->time -= phases;
|
||||
frames--;
|
||||
}
|
||||
|
||||
{
|
||||
const float *buffer_l = resamp->buffer_l + resamp->ptr;
|
||||
const float *buffer_r = resamp->buffer_r + resamp->ptr;
|
||||
unsigned taps = resamp->taps;
|
||||
while (resamp->time < phases)
|
||||
{
|
||||
unsigned phase = resamp->time >> resamp->subphase_bits;
|
||||
@ -208,7 +209,7 @@ static void resampler_sinc_process_neon(void *re_, struct resampler_data *data)
|
||||
#ifdef HAVE_ARM_NEON_ASM_OPTIMIZATIONS
|
||||
process_sinc_neon_asm(output, buffer_l, buffer_r, phase_table, taps);
|
||||
#else
|
||||
unsigned i;
|
||||
int i;
|
||||
float32x4_t p1 = {0, 0, 0, 0}, p2 = {0, 0, 0, 0};
|
||||
float32x2_t p3, p4;
|
||||
|
||||
@ -250,6 +251,7 @@ static void resampler_sinc_process_avx_kaiser(void *re_, struct resampler_data *
|
||||
float *output = data->data_out;
|
||||
size_t frames = data->input_frames;
|
||||
size_t out_frames = 0;
|
||||
unsigned taps = resamp->taps;
|
||||
|
||||
{
|
||||
while (frames)
|
||||
@ -258,14 +260,14 @@ static void resampler_sinc_process_avx_kaiser(void *re_, struct resampler_data *
|
||||
{
|
||||
/* Push in reverse to make filter more obvious. */
|
||||
if (!resamp->ptr)
|
||||
resamp->ptr = resamp->taps;
|
||||
resamp->ptr = taps;
|
||||
resamp->ptr--;
|
||||
|
||||
resamp->buffer_l[resamp->ptr + resamp->taps] =
|
||||
resamp->buffer_l[resamp->ptr] = *input++;
|
||||
resamp->buffer_l[resamp->ptr + taps] =
|
||||
resamp->buffer_l[resamp->ptr] = *input++;
|
||||
|
||||
resamp->buffer_r[resamp->ptr + resamp->taps] =
|
||||
resamp->buffer_r[resamp->ptr] = *input++;
|
||||
resamp->buffer_r[resamp->ptr + taps] =
|
||||
resamp->buffer_r[resamp->ptr] = *input++;
|
||||
|
||||
resamp->time -= phases;
|
||||
frames--;
|
||||
@ -274,10 +276,9 @@ static void resampler_sinc_process_avx_kaiser(void *re_, struct resampler_data *
|
||||
{
|
||||
const float *buffer_l = resamp->buffer_l + resamp->ptr;
|
||||
const float *buffer_r = resamp->buffer_r + resamp->ptr;
|
||||
unsigned taps = resamp->taps;
|
||||
while (resamp->time < phases)
|
||||
{
|
||||
unsigned i;
|
||||
int i;
|
||||
unsigned phase = resamp->time >> resamp->subphase_bits;
|
||||
|
||||
float *phase_table = resamp->phase_table + phase * taps * 2;
|
||||
@ -335,6 +336,7 @@ static void resampler_sinc_process_avx(void *re_, struct resampler_data *data)
|
||||
float *output = data->data_out;
|
||||
size_t frames = data->input_frames;
|
||||
size_t out_frames = 0;
|
||||
unsigned taps = resamp->taps;
|
||||
|
||||
{
|
||||
while (frames)
|
||||
@ -343,26 +345,25 @@ static void resampler_sinc_process_avx(void *re_, struct resampler_data *data)
|
||||
{
|
||||
/* Push in reverse to make filter more obvious. */
|
||||
if (!resamp->ptr)
|
||||
resamp->ptr = resamp->taps;
|
||||
resamp->ptr = taps;
|
||||
resamp->ptr--;
|
||||
|
||||
resamp->buffer_l[resamp->ptr + resamp->taps] =
|
||||
resamp->buffer_l[resamp->ptr] = *input++;
|
||||
resamp->buffer_l[resamp->ptr + taps] =
|
||||
resamp->buffer_l[resamp->ptr] = *input++;
|
||||
|
||||
resamp->buffer_r[resamp->ptr + resamp->taps] =
|
||||
resamp->buffer_r[resamp->ptr] = *input++;
|
||||
resamp->buffer_r[resamp->ptr + taps] =
|
||||
resamp->buffer_r[resamp->ptr] = *input++;
|
||||
|
||||
resamp->time -= phases;
|
||||
resamp->time -= phases;
|
||||
frames--;
|
||||
}
|
||||
|
||||
{
|
||||
const float *buffer_l = resamp->buffer_l + resamp->ptr;
|
||||
const float *buffer_r = resamp->buffer_r + resamp->ptr;
|
||||
unsigned taps = resamp->taps;
|
||||
while (resamp->time < phases)
|
||||
{
|
||||
unsigned i;
|
||||
int i;
|
||||
__m256 delta;
|
||||
unsigned phase = resamp->time >> resamp->subphase_bits;
|
||||
float *phase_table = resamp->phase_table + phase * taps;
|
||||
@ -417,6 +418,7 @@ static void resampler_sinc_process_sse_kaiser(void *re_, struct resampler_data *
|
||||
float *output = data->data_out;
|
||||
size_t frames = data->input_frames;
|
||||
size_t out_frames = 0;
|
||||
unsigned taps = resamp->taps;
|
||||
|
||||
{
|
||||
while (frames)
|
||||
@ -425,26 +427,25 @@ static void resampler_sinc_process_sse_kaiser(void *re_, struct resampler_data *
|
||||
{
|
||||
/* Push in reverse to make filter more obvious. */
|
||||
if (!resamp->ptr)
|
||||
resamp->ptr = resamp->taps;
|
||||
resamp->ptr = taps;
|
||||
resamp->ptr--;
|
||||
|
||||
resamp->buffer_l[resamp->ptr + resamp->taps] =
|
||||
resamp->buffer_l[resamp->ptr] = *input++;
|
||||
resamp->buffer_l[resamp->ptr + taps] =
|
||||
resamp->buffer_l[resamp->ptr] = *input++;
|
||||
|
||||
resamp->buffer_r[resamp->ptr + resamp->taps] =
|
||||
resamp->buffer_r[resamp->ptr] = *input++;
|
||||
resamp->buffer_r[resamp->ptr + taps] =
|
||||
resamp->buffer_r[resamp->ptr] = *input++;
|
||||
|
||||
resamp->time -= phases;
|
||||
resamp->time -= phases;
|
||||
frames--;
|
||||
}
|
||||
|
||||
{
|
||||
const float *buffer_l = resamp->buffer_l + resamp->ptr;
|
||||
const float *buffer_r = resamp->buffer_r + resamp->ptr;
|
||||
unsigned taps = resamp->taps;
|
||||
while (resamp->time < phases)
|
||||
{
|
||||
unsigned i;
|
||||
int i;
|
||||
__m128 sum;
|
||||
unsigned phase = resamp->time >> resamp->subphase_bits;
|
||||
float *phase_table = resamp->phase_table + phase * taps * 2;
|
||||
@ -512,6 +513,7 @@ static void resampler_sinc_process_sse(void *re_, struct resampler_data *data)
|
||||
float *output = data->data_out;
|
||||
size_t frames = data->input_frames;
|
||||
size_t out_frames = 0;
|
||||
unsigned taps = resamp->taps;
|
||||
|
||||
{
|
||||
while (frames)
|
||||
@ -520,26 +522,25 @@ static void resampler_sinc_process_sse(void *re_, struct resampler_data *data)
|
||||
{
|
||||
/* Push in reverse to make filter more obvious. */
|
||||
if (!resamp->ptr)
|
||||
resamp->ptr = resamp->taps;
|
||||
resamp->ptr = taps;
|
||||
resamp->ptr--;
|
||||
|
||||
resamp->buffer_l[resamp->ptr + resamp->taps] =
|
||||
resamp->buffer_l[resamp->ptr] = *input++;
|
||||
resamp->buffer_l[resamp->ptr + taps] =
|
||||
resamp->buffer_l[resamp->ptr] = *input++;
|
||||
|
||||
resamp->buffer_r[resamp->ptr + resamp->taps] =
|
||||
resamp->buffer_r[resamp->ptr] = *input++;
|
||||
resamp->buffer_r[resamp->ptr + taps] =
|
||||
resamp->buffer_r[resamp->ptr] = *input++;
|
||||
|
||||
resamp->time -= phases;
|
||||
resamp->time -= phases;
|
||||
frames--;
|
||||
}
|
||||
|
||||
{
|
||||
const float *buffer_l = resamp->buffer_l + resamp->ptr;
|
||||
const float *buffer_r = resamp->buffer_r + resamp->ptr;
|
||||
unsigned taps = resamp->taps;
|
||||
while (resamp->time < phases)
|
||||
{
|
||||
unsigned i;
|
||||
int i;
|
||||
__m128 sum;
|
||||
unsigned phase = resamp->time >> resamp->subphase_bits;
|
||||
float *phase_table = resamp->phase_table + phase * taps;
|
||||
@ -603,6 +604,7 @@ static void resampler_sinc_process_c_kaiser(void *re_, struct resampler_data *da
|
||||
float *output = data->data_out;
|
||||
size_t frames = data->input_frames;
|
||||
size_t out_frames = 0;
|
||||
unsigned taps = resamp->taps;
|
||||
|
||||
{
|
||||
while (frames)
|
||||
@ -611,26 +613,25 @@ static void resampler_sinc_process_c_kaiser(void *re_, struct resampler_data *da
|
||||
{
|
||||
/* Push in reverse to make filter more obvious. */
|
||||
if (!resamp->ptr)
|
||||
resamp->ptr = resamp->taps;
|
||||
resamp->ptr = taps;
|
||||
resamp->ptr--;
|
||||
|
||||
resamp->buffer_l[resamp->ptr + resamp->taps] =
|
||||
resamp->buffer_l[resamp->ptr] = *input++;
|
||||
resamp->buffer_l[resamp->ptr + taps] =
|
||||
resamp->buffer_l[resamp->ptr] = *input++;
|
||||
|
||||
resamp->buffer_r[resamp->ptr + resamp->taps] =
|
||||
resamp->buffer_r[resamp->ptr] = *input++;
|
||||
resamp->buffer_r[resamp->ptr + taps] =
|
||||
resamp->buffer_r[resamp->ptr] = *input++;
|
||||
|
||||
resamp->time -= phases;
|
||||
resamp->time -= phases;
|
||||
frames--;
|
||||
}
|
||||
|
||||
{
|
||||
const float *buffer_l = resamp->buffer_l + resamp->ptr;
|
||||
const float *buffer_r = resamp->buffer_r + resamp->ptr;
|
||||
unsigned taps = resamp->taps;
|
||||
while (resamp->time < phases)
|
||||
{
|
||||
unsigned i;
|
||||
int i;
|
||||
float sum_l = 0.0f;
|
||||
float sum_r = 0.0f;
|
||||
unsigned phase = resamp->time >> resamp->subphase_bits;
|
||||
@ -672,6 +673,7 @@ static void resampler_sinc_process_c(void *re_, struct resampler_data *data)
|
||||
float *output = data->data_out;
|
||||
size_t frames = data->input_frames;
|
||||
size_t out_frames = 0;
|
||||
unsigned taps = resamp->taps;
|
||||
|
||||
{
|
||||
while (frames)
|
||||
@ -680,26 +682,25 @@ static void resampler_sinc_process_c(void *re_, struct resampler_data *data)
|
||||
{
|
||||
/* Push in reverse to make filter more obvious. */
|
||||
if (!resamp->ptr)
|
||||
resamp->ptr = resamp->taps;
|
||||
resamp->ptr = taps;
|
||||
resamp->ptr--;
|
||||
|
||||
resamp->buffer_l[resamp->ptr + resamp->taps] =
|
||||
resamp->buffer_l[resamp->ptr] = *input++;
|
||||
resamp->buffer_l[resamp->ptr + taps] =
|
||||
resamp->buffer_l[resamp->ptr] = *input++;
|
||||
|
||||
resamp->buffer_r[resamp->ptr + resamp->taps] =
|
||||
resamp->buffer_r[resamp->ptr] = *input++;
|
||||
resamp->buffer_r[resamp->ptr + taps] =
|
||||
resamp->buffer_r[resamp->ptr] = *input++;
|
||||
|
||||
resamp->time -= phases;
|
||||
resamp->time -= phases;
|
||||
frames--;
|
||||
}
|
||||
|
||||
{
|
||||
const float *buffer_l = resamp->buffer_l + resamp->ptr;
|
||||
const float *buffer_r = resamp->buffer_r + resamp->ptr;
|
||||
unsigned taps = resamp->taps;
|
||||
while (resamp->time < phases)
|
||||
{
|
||||
unsigned i;
|
||||
int i;
|
||||
float sum_l = 0.0f;
|
||||
float sum_r = 0.0f;
|
||||
unsigned phase = resamp->time >> resamp->subphase_bits;
|
||||
@ -741,7 +742,9 @@ static void sinc_init_table_kaiser(rarch_sinc_resampler_t *resamp,
|
||||
float *phase_table, int phases, int taps, bool calculate_delta)
|
||||
{
|
||||
int i, j;
|
||||
double window_mod = kaiser_window_function(0.0, resamp->kaiser_beta); /* Need to normalize w(0) to 1.0. */
|
||||
/* Kaiser window function - need to normalize w(0) to 1.0f */
|
||||
float kaiser_beta = resamp->kaiser_beta;
|
||||
double window_mod = besseli0(kaiser_beta);
|
||||
int stride = calculate_delta ? 2 : 1;
|
||||
double sidelobes = taps / 2.0;
|
||||
|
||||
@ -750,13 +753,13 @@ static void sinc_init_table_kaiser(rarch_sinc_resampler_t *resamp,
|
||||
for (j = 0; j < taps; j++)
|
||||
{
|
||||
double sinc_phase;
|
||||
float val;
|
||||
int n = j * phases + i;
|
||||
double window_phase = (double)n / (phases * taps); /* [0, 1). */
|
||||
window_phase = 2.0 * window_phase - 1.0; /* [-1, 1) */
|
||||
sinc_phase = sidelobes * window_phase;
|
||||
val = cutoff * sinc(M_PI * sinc_phase * cutoff) *
|
||||
kaiser_window_function(window_phase, resamp->kaiser_beta) / window_mod;
|
||||
float val = cutoff * sinc(M_PI * sinc_phase * cutoff) *
|
||||
besseli0(kaiser_beta * sqrtf(1 - window_phase * window_phase))
|
||||
/ window_mod;
|
||||
phase_table[i * stride * taps + j] = val;
|
||||
}
|
||||
}
|
||||
@ -787,7 +790,8 @@ static void sinc_init_table_kaiser(rarch_sinc_resampler_t *resamp,
|
||||
sinc_phase = sidelobes * window_phase;
|
||||
|
||||
val = cutoff * sinc(M_PI * sinc_phase * cutoff) *
|
||||
kaiser_window_function(window_phase, resamp->kaiser_beta) / window_mod;
|
||||
besseli0(resamp->kaiser_beta * sqrtf(1 - window_phase *
|
||||
window_phase)) / window_mod;
|
||||
delta = (val - phase_table[phase * stride * taps + j]);
|
||||
phase_table[(phase * stride + 1) * taps + j] = delta;
|
||||
}
|
||||
@ -799,7 +803,8 @@ static void sinc_init_table_lanczos(
|
||||
float *phase_table, int phases, int taps, bool calculate_delta)
|
||||
{
|
||||
int i, j;
|
||||
double window_mod = lanzcos_window_function(0.0); /* Need to normalize w(0) to 1.0. */
|
||||
/* Lanczos window function - need to normalize w(0) to 1.0f */
|
||||
double window_mod = 1.0;
|
||||
int stride = calculate_delta ? 2 : 1;
|
||||
double sidelobes = taps / 2.0;
|
||||
|
||||
@ -814,7 +819,7 @@ static void sinc_init_table_lanczos(
|
||||
window_phase = 2.0 * window_phase - 1.0; /* [-1, 1) */
|
||||
sinc_phase = sidelobes * window_phase;
|
||||
val = cutoff * sinc(M_PI * sinc_phase * cutoff) *
|
||||
lanzcos_window_function(window_phase) / window_mod;
|
||||
sinc(M_PI * window_phase) / window_mod;
|
||||
phase_table[i * stride * taps + j] = val;
|
||||
}
|
||||
}
|
||||
@ -845,7 +850,7 @@ static void sinc_init_table_lanczos(
|
||||
sinc_phase = sidelobes * window_phase;
|
||||
|
||||
val = cutoff * sinc(M_PI * sinc_phase * cutoff) *
|
||||
lanzcos_window_function(window_phase) / window_mod;
|
||||
sinc(M_PI * window_phase) / window_mod;
|
||||
delta = (val - phase_table[phase * stride * taps + j]);
|
||||
phase_table[(phase * stride + 1) * taps + j] = delta;
|
||||
}
|
||||
@ -921,7 +926,7 @@ static void *resampler_sinc_new(const struct resampler_config *config,
|
||||
* taps accordingly to keep same stopband attenuation. */
|
||||
if (bandwidth_mod < 1.0)
|
||||
{
|
||||
cutoff *= bandwidth_mod;
|
||||
cutoff *= bandwidth_mod;
|
||||
re->taps = (unsigned)ceil(re->taps / bandwidth_mod);
|
||||
}
|
||||
|
||||
|
@ -31,6 +31,11 @@
|
||||
#include <retro_inline.h>
|
||||
#include <retro_math.h>
|
||||
|
||||
/**
|
||||
* sinc:
|
||||
*
|
||||
* Pure function.
|
||||
**/
|
||||
static INLINE double sinc(double val)
|
||||
{
|
||||
if (fabs(val) < 0.00001)
|
||||
@ -38,7 +43,12 @@ static INLINE double sinc(double val)
|
||||
return sin(val) / val;
|
||||
}
|
||||
|
||||
/* Paeth prediction filter. */
|
||||
/**
|
||||
* paeth:
|
||||
*
|
||||
* Pure function.
|
||||
* Paeth prediction filter.
|
||||
**/
|
||||
static INLINE int paeth(int a, int b, int c)
|
||||
{
|
||||
int p = a + b - c;
|
||||
@ -53,11 +63,17 @@ static INLINE int paeth(int a, int b, int c)
|
||||
return c;
|
||||
}
|
||||
|
||||
/* Modified Bessel function of first order.
|
||||
* Check Wiki for mathematical definition ... */
|
||||
/**
|
||||
* besseli0:
|
||||
*
|
||||
* Pure function.
|
||||
*
|
||||
* Modified Bessel function of first order.
|
||||
* Check Wiki for mathematical definition ...
|
||||
**/
|
||||
static INLINE double besseli0(double x)
|
||||
{
|
||||
unsigned i;
|
||||
int i;
|
||||
double sum = 0.0;
|
||||
double factorial = 1.0;
|
||||
double factorial_mult = 0.0;
|
||||
@ -69,12 +85,11 @@ static INLINE double besseli0(double x)
|
||||
* Luckily, it converges rather fast. */
|
||||
for (i = 0; i < 18; i++)
|
||||
{
|
||||
sum += x_pow * two_div_pow / (factorial * factorial);
|
||||
|
||||
sum += x_pow * two_div_pow / (factorial * factorial);
|
||||
factorial_mult += 1.0;
|
||||
x_pow *= x_sqr;
|
||||
two_div_pow *= 0.25;
|
||||
factorial *= factorial_mult;
|
||||
x_pow *= x_sqr;
|
||||
two_div_pow *= 0.25;
|
||||
factorial *= factorial_mult;
|
||||
}
|
||||
|
||||
return sum;
|
||||
@ -85,9 +100,4 @@ static INLINE double kaiser_window_function(double index, double beta)
|
||||
return besseli0(beta * sqrtf(1 - index * index));
|
||||
}
|
||||
|
||||
static INLINE double lanzcos_window_function(double index)
|
||||
{
|
||||
return sinc(M_PI * index);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
Loading…
x
Reference in New Issue
Block a user