(DSP Filters) Move to pre-SSE2 version of echo filter - SSE2 optimizations could

come later when 'simd' toggling gets added to the spec
This commit is contained in:
twinaphex 2014-04-29 17:03:12 +02:00
parent 47f6cc40ba
commit 752099808d
3 changed files with 191 additions and 214 deletions

View File

@ -68,7 +68,7 @@ ASMFLAGS := -INEON/asm
asflags += -mfpu=neon
endif
objects += eq.$(DYLIB) iir.$(DYLIB) phaser.$(DYLIB) reverb.$(DYLIB) volume.$(DYLIB) wah.$(DYLIB)
objects += echo.$(DYLIB) eq.$(DYLIB) iir.$(DYLIB) phaser.$(DYLIB) reverb.$(DYLIB) volume.$(DYLIB) wah.$(DYLIB)
all: build;

190
audio/filters/echo.c Normal file
View File

@ -0,0 +1,190 @@
/* RetroArch - A frontend for libretro.
* Copyright (C) 2010-2014 - Hans-Kristian Arntzen
* Copyright (C) 2011-2014 - Daniel De Matteis
*
* RetroArch is free software: you can redistribute it and/or modify it under the terms
* of the GNU General Public License as published by the Free Software Found-
* ation, either version 3 of the License, or (at your option) any later version.
*
* RetroArch is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along with RetroArch.
* If not, see <http://www.gnu.org/licenses/>.
*
*/
#include <math.h>
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#include "rarch_dsp.h"
// 4 source echo.
#ifdef __GNUC__
#define ALIGNED __attribute__((aligned(16)))
#else
#define ALIGNED
#endif
#ifndef min
#define min(a, b) (((a) < (b)) ? (a) : (b))
#endif
struct echo_filter
{
float *history; // history buffer
int pos; // current position in history buffer
int amp; // amplification of echoes (0-256)
int delay; // delay in number of samples
int ms; // delay in miliseconds
int rate; // sample rate
float f_amp; // amplification (0-1)
float input_rate;
};
struct echo_filter_data
{
struct echo_filter echo_l;
struct echo_filter echo_r;
float buf[4096];
};
#ifdef RARCH_INTERNAL
#define rarch_dsp_plugin_init echo_dsp_plugin_init
#endif
static float echo_process(void *data, float in)
{
struct echo_filter *echo = (struct echo_filter*)data;
float smp = echo->history[echo->pos];
smp *= echo->f_amp;
smp += in;
echo->history[echo->pos] = smp;
echo->pos = (echo->pos + 1) % echo->delay;
return smp;
}
static void echo_dsp_process(void *data, rarch_dsp_output_t *output,
const rarch_dsp_input_t *input)
{
int num_samples, i;
struct echo_filter_data *echo = (struct echo_filter_data*)data;
output->samples = echo->buf;
num_samples = input->frames * 2;
for (i = 0; i < num_samples;)
{
echo->buf[i] = echo_process(&echo->echo_l, input->samples[i]);
i++;
echo->buf[i] = echo_process(&echo->echo_r, input->samples[i]);
i++;
}
output->frames = input->frames;
}
static void echo_dsp_free(void *data)
{
struct echo_filter_data *echo = (struct echo_filter_data*)data;
if (echo->echo_l.history)
free(echo->echo_l.history);
if (echo->echo_r.history)
free(echo->echo_r.history);
if (echo)
free(echo);
}
static void echo_set_delay(void *data, int ms)
{
int new_delay, how_much, i;
float *new_history;
struct echo_filter *echo = (struct echo_filter*)data;
new_delay = ms * echo->input_rate / 1000;
if (new_delay == 0)
new_delay = 1;
new_history = (float*)malloc(new_delay * sizeof(float));
memset(new_history, 0, new_delay * sizeof(float));
if (echo->history)
{
how_much = echo->delay - echo->pos;
how_much = min(how_much, new_delay);
memcpy(new_history, echo->history + echo->pos, how_much * sizeof(float));
if (how_much < new_delay)
{
i = how_much;
how_much = new_delay - how_much;
how_much = min(how_much, echo->delay);
how_much = min(how_much, echo->pos);
memcpy(new_history + i, echo->history, how_much * sizeof(float));
}
if (echo->history)
free(echo->history);
}
echo->history = new_history;
echo->pos = 0;
echo->delay = new_delay;
echo->ms = ms;
}
static void *echo_dsp_init(const rarch_dsp_info_t *info)
{
struct echo_filter_data *echo = (struct echo_filter_data*)calloc(1, sizeof(*echo));;
if (!echo)
return NULL;
echo->echo_l.history = NULL;
echo->echo_l.input_rate = info->input_rate;
echo_set_delay(&echo->echo_l, 200);
echo->echo_l.amp = 128;
echo->echo_l.f_amp = (float)echo->echo_l.amp / 256.0f;
echo->echo_l.pos = 0;
echo->echo_r.history = NULL;
echo->echo_r.input_rate = info->input_rate;
echo_set_delay(&echo->echo_r, 200);
echo->echo_r.amp = 128;
echo->echo_r.f_amp = (float)echo->echo_r.amp / 256.0f;
echo->echo_r.pos = 0;
fprintf(stderr, "[Echo] loaded!\n");
return echo;
}
static void echo_dsp_config(void *data)
{
(void)data;
}
static const rarch_dsp_plugin_t dsp_plug = {
echo_dsp_init,
echo_dsp_process,
echo_dsp_free,
RARCH_DSP_API_VERSION,
echo_dsp_config,
"Echo",
NULL
};
const rarch_dsp_plugin_t *rarch_dsp_plugin_init(void)
{
return &dsp_plug;
}
#ifdef RARCH_INTERNAL
#undef rarch_dsp_plugin_init
#endif

View File

@ -1,213 +0,0 @@
/* RetroArch - A frontend for libretro.
* Copyright (C) 2010-2014 - Hans-Kristian Arntzen
* Copyright (C) 2011-2014 - Daniel De Matteis
*
* RetroArch is free software: you can redistribute it and/or modify it under the terms
* of the GNU General Public License as published by the Free Software Found-
* ation, either version 3 of the License, or (at your option) any later version.
*
* RetroArch is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along with RetroArch.
* If not, see <http://www.gnu.org/licenses/>.
*
*/
#include <math.h>
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#include "rarch_dsp.h"
#include <emmintrin.h>
// 4 source echo.
#ifdef __GNUC__
#define ALIGNED __attribute__((aligned(16)))
#else
#define ALIGNED
#endif
#define ECHO_MS 150
#define AMP 0.0
struct echo_filter_data
{
float echo_buffer[4][0x10000] ALIGNED;
float buffer[4096] ALIGNED;
float scratch_buf[4] ALIGNED;
unsigned buf_size[4];
unsigned ptr[4];
unsigned scratch_ptr;
__m128 amp[4] ALIGNED;
__m128 feedback ALIGNED;
float input_rate;
};
#ifdef RARCH_INTERNAL
#define rarch_dsp_plugin_init echo_dsp_plugin_init
#endif
static void echo_init(void *data)
{
unsigned i, j;
struct echo_filter_data *echo = (struct echo_filter_data*)data;
for (i = 0; i < 4; i++)
{
echo->ptr[i] = 0.0f;
echo->amp[i] = _mm_set1_ps(AMP);
}
echo->scratch_ptr = 0;
echo->feedback = _mm_set1_ps(0.0f);
echo->input_rate = 32000.0;
for (i = 0; i < 4; i++)
{
echo->scratch_buf[i] = 0.0f;
for (j = 0; j < 0x10000; j++)
echo->echo_buffer[i][j] = 0.0f;
}
for (i = 0; i < 4096; i++)
echo->buffer[i] = 0.0f;
}
static unsigned echo_sse2_process(void *data, const float *input, unsigned frames)
{
unsigned frames_out, i;
float *buffer_out;
struct echo_filter_data *echo = (struct echo_filter_data*)data;
frames_out = 0;
buffer_out = echo->buffer;
__m128 amp[4] = {
echo->amp[0],
echo->amp[1],
echo->amp[2],
echo->amp[3],
};
__m128 feedback = echo->feedback;
#define DO_FILTER() \
__m128 result[4]; \
__m128 echo_[4]; \
for (i = 0; i < 4; i++) \
{ \
echo_[i] = _mm_load_ps(echo->echo_buffer[i] + echo->ptr[i]); \
result[i] = _mm_mul_ps(amp[i], echo_[i]); \
} \
__m128 final_result = _mm_add_ps(_mm_add_ps(result[0], result[1]), _mm_add_ps(result[2], result[3])); \
__m128 feedback_result = _mm_mul_ps(feedback, final_result); \
final_result = _mm_add_ps(reg, final_result); \
feedback_result = _mm_add_ps(reg, feedback_result); \
for (i = 0; i < 4; i++) \
_mm_store_ps(echo->echo_buffer[i] + echo->ptr[i], feedback_result); \
_mm_store_ps(buffer_out, final_result); \
for (i = 0; i < 4; i++) \
echo->ptr[i] = (echo->ptr[i] + 4) % echo->buf_size[i]
// Fill up scratch buffer and flush.
if (echo->scratch_ptr)
{
for (i = echo->scratch_ptr; i < 4; i += 2)
{
echo->scratch_buf[i] = *input++;
echo->scratch_buf[i + 1] = *input++;
frames--;
}
echo->scratch_ptr = 0;
__m128 reg = _mm_load_ps(echo->scratch_buf);
DO_FILTER();
frames_out += 2;
buffer_out += 4;
}
// Main processing.
for (i = 0; (i + 4) <= (frames * 2); i += 4, input += 4, buffer_out += 4, frames_out += 2)
{
__m128 reg = _mm_loadu_ps(input); // Might not be aligned.
DO_FILTER();
}
// Flush rest to scratch buffer.
for (; i < (frames * 2); i++)
echo->scratch_buf[echo->scratch_ptr++] = *input++;
return frames_out;
}
static void echo_sse_dsp_process(void *data, rarch_dsp_output_t *output,
const rarch_dsp_input_t *input)
{
struct echo_filter_data *echo = (struct echo_filter_data*)data;
output->samples = echo->buffer;
output->frames = echo_sse2_process(echo, input->samples, input->frames);
}
static void echo_sse_dsp_free(void *data)
{
struct echo_filter_data *echo = (struct echo_filter_data*)data;
if (echo)
free(echo);
}
static void *echo_sse_dsp_init(const rarch_dsp_info_t *info)
{
struct echo_filter_data *echo = (struct echo_filter_data*)calloc(1, sizeof(*echo));;
if (!echo)
return NULL;
for (unsigned i = 0; i < 4; i++)
echo->buf_size[i] = ECHO_MS * (info->input_rate * 2) / 1000;
echo_init(echo);
echo->input_rate = info->input_rate;
fprintf(stderr, "[Echo] loaded!\n");
return echo;
}
static void echo_sse_dsp_config(void *data)
{
(void)data;
}
static const rarch_dsp_plugin_t dsp_plug = {
echo_sse_dsp_init,
echo_sse_dsp_process,
echo_sse_dsp_free,
RARCH_DSP_API_VERSION,
echo_sse_dsp_config,
"Echo (SSE2)",
NULL
};
const rarch_dsp_plugin_t *rarch_dsp_plugin_init(void)
{
return &dsp_plug;
}
#ifdef RARCH_INTERNAL
#undef rarch_dsp_plugin_init
#endif