From 37ed625babeaa2204b71ad0012822d22e08c1afc Mon Sep 17 00:00:00 2001 From: Themaister Date: Sat, 25 Feb 2012 21:47:40 +0100 Subject: [PATCH] Use shuffles/movehl. --- audio/sinc.c | 42 ++++++++++++++++++++---------------------- 1 file changed, 20 insertions(+), 22 deletions(-) diff --git a/audio/sinc.c b/audio/sinc.c index 4013a9e6ce..d1c3130843 100644 --- a/audio/sinc.c +++ b/audio/sinc.c @@ -35,10 +35,6 @@ #include #endif -#if __SSE3__ -#include -#endif - #define PHASE_BITS 8 #define SUBPHASE_BITS 16 @@ -136,9 +132,7 @@ ssnes_resampler_t *resampler_new(void) init_sinc_table(re); -#if __SSE3__ - SSNES_LOG("Sinc resampler [SSE3]\n"); -#elif __SSE__ +#if __SSE__ SSNES_LOG("Sinc resampler [SSE]\n"); #else SSNES_LOG("Sinc resampler [C]\n"); @@ -177,22 +171,26 @@ static void process_sinc(ssnes_resampler_t *resamp, float *out_buffer) sum_r = _mm_add_ps(sum_r, _mm_mul_ps(buf_r, sinc)); } -#if __SSE3__ - __m128 res = _mm_hadd_ps(_mm_hadd_ps(sum_l, sum_r), _mm_setzero_ps()); - _mm_storeu_ps(out_buffer, res); // Overwriting, but this is safe. -#else // Meh, compiler should optimize this to something sane. - union - { - float f[4]; - __m128 v; - } u[2] = { - [0] = { .v = sum_l }, - [1] = { .v = sum_r }, - }; + // Them annoying shuffles :V + // sum_l = { l3, l2, l1, l0 } + // sum_r = { r3, r2, r1, r0 } - out_buffer[0] = (u[0].f[0] + u[0].f[1]) + (u[0].f[2] + u[0].f[3]); - out_buffer[1] = (u[1].f[0] + u[1].f[1]) + (u[1].f[2] + u[1].f[3]); -#endif + __m128 sum = _mm_add_ps(_mm_shuffle_ps(sum_l, sum_r, _MM_SHUFFLE(1, 0, 1, 0)), + _mm_shuffle_ps(sum_l, sum_r, _MM_SHUFFLE(3, 2, 3, 2))); + + // sum = { r1, r0, l1, l0 } + { r3, r2, l3, l2 } + // sum = { R1, R0, L1, L0 } + + sum = _mm_add_ps(_mm_shuffle_ps(sum, sum, _MM_SHUFFLE(3, 3, 1, 1)), sum); + + // sum = {R1, R1, L1, L1 } + { R1, R0, L1, L0 } + // sum = { X, R, X, L } + + // Store L + _mm_store_ss(out_buffer + 0, sum); + + // movehl { X, R, X, L } == { X, R, X, R } + _mm_store_ss(out_buffer + 1, _mm_movehl_ps(sum, sum)); } #else // Plain ol' C99 static void process_sinc(ssnes_resampler_t *resamp, float *out_buffer)