From 5a1e783dae584a25683e43fddac3bc9e0d575a17 Mon Sep 17 00:00:00 2001 From: Karl Tomlinson Date: Wed, 23 Jul 2014 21:49:04 +1200 Subject: [PATCH] b=1042508 move resampler simd optimizations to separate translation units r=padenot This ensures that code in resample.c will run on Intel x86 cpus even when SSE support has been compiled, and will provide similarly for neon support when enabled. --HG-- rename : media/libspeex_resampler/sse-detect-runtime.patch => media/libspeex_resampler/simd-detect-runtime.patch rename : media/libspeex_resampler/src/resample_neon.h => media/libspeex_resampler/src/resample_neon.c rename : media/libspeex_resampler/src/resample_sse.h => media/libspeex_resampler/src/resample_sse.c rename : media/libspeex_resampler/src/sse_detect.cpp => media/libspeex_resampler/src/simd_detect.cpp rename : media/libspeex_resampler/src/sse_detect.h => media/libspeex_resampler/src/simd_detect.h extra : rebase_source : bdf1ef791129dedeadce7369354a5992729a99b7 --- .../simd-detect-runtime.patch | 331 ++++++++++++++++++ media/libspeex_resampler/src/moz.build | 10 +- media/libspeex_resampler/src/resample.c | 26 +- .../src/{resample_neon.h => resample_neon.c} | 6 +- .../src/{resample_sse.h => resample_sse.c} | 10 +- .../src/{sse_detect.cpp => simd_detect.cpp} | 20 +- media/libspeex_resampler/src/simd_detect.h | 43 +++ media/libspeex_resampler/src/sse_detect.h | 20 -- .../sse-detect-runtime.patch | 192 ---------- media/libspeex_resampler/update.sh | 6 +- 10 files changed, 413 insertions(+), 251 deletions(-) create mode 100644 media/libspeex_resampler/simd-detect-runtime.patch rename media/libspeex_resampler/src/{resample_neon.h => resample_neon.c} (96%) rename media/libspeex_resampler/src/{resample_sse.h => resample_sse.c} (89%) rename media/libspeex_resampler/src/{sse_detect.cpp => simd_detect.cpp} (58%) create mode 100644 media/libspeex_resampler/src/simd_detect.h delete mode 100644 media/libspeex_resampler/src/sse_detect.h delete mode 100644 media/libspeex_resampler/sse-detect-runtime.patch diff --git a/media/libspeex_resampler/simd-detect-runtime.patch b/media/libspeex_resampler/simd-detect-runtime.patch new file mode 100644 index 000000000000..c8b182ddadda --- /dev/null +++ b/media/libspeex_resampler/simd-detect-runtime.patch @@ -0,0 +1,331 @@ +diff --git a/media/libspeex_resampler/src/resample.c b/media/libspeex_resampler/src/resample.c +--- a/media/libspeex_resampler/src/resample.c ++++ b/media/libspeex_resampler/src/resample.c +@@ -92,23 +92,17 @@ static void speex_free (void *ptr) {free + + #define IMAX(a,b) ((a) > (b) ? (a) : (b)) + #define IMIN(a,b) ((a) < (b) ? (a) : (b)) + + #ifndef NULL + #define NULL 0 + #endif + +-#ifdef _USE_SSE +-#include "resample_sse.h" +-#endif +- +-#ifdef _USE_NEON +-#include "resample_neon.h" +-#endif ++#include "simd_detect.h" + + /* Numer of elements to allocate on the stack */ + #ifdef VAR_ARRAYS + #define FIXED_STACK_ALLOC 8192 + #else + #define FIXED_STACK_ALLOC 1024 + #endif + +@@ -344,17 +338,19 @@ static int resampler_basic_direct_single + const spx_uint32_t den_rate = st->den_rate; + spx_word32_t sum; + + while (!(last_sample >= (spx_int32_t)*in_len || out_sample >= (spx_int32_t)*out_len)) + { + const spx_word16_t *sinct = & sinc_table[samp_frac_num*N]; + const spx_word16_t *iptr = & in[last_sample]; + +-#ifndef OVERRIDE_INNER_PRODUCT_SINGLE ++#ifdef OVERRIDE_INNER_PRODUCT_SINGLE ++ if (!moz_speex_have_single_simd()) { ++#endif + int j; + sum = 0; + for(j=0;j= den_rate) + { + samp_frac_num -= den_rate; +@@ -402,29 +400,33 @@ static int resampler_basic_direct_double + const spx_uint32_t den_rate = st->den_rate; + double sum; + + while (!(last_sample >= (spx_int32_t)*in_len || out_sample >= (spx_int32_t)*out_len)) + { + const spx_word16_t *sinct = & sinc_table[samp_frac_num*N]; + const spx_word16_t *iptr = & in[last_sample]; + +-#ifndef OVERRIDE_INNER_PRODUCT_DOUBLE ++#ifdef OVERRIDE_INNER_PRODUCT_DOUBLE ++ if(moz_speex_have_double_simd()) { ++#endif + int j; + double accum[4] = {0,0,0,0}; + + for(j=0;j= den_rate) + { + samp_frac_num -= den_rate; +@@ -458,34 +460,38 @@ static int resampler_basic_interpolate_s + #ifdef FIXED_POINT + const spx_word16_t frac = PDIV32(SHL32((samp_frac_num*st->oversample) % st->den_rate,15),st->den_rate); + #else + const spx_word16_t frac = ((float)((samp_frac_num*st->oversample) % st->den_rate))/st->den_rate; + #endif + spx_word16_t interp[4]; + + +-#ifndef OVERRIDE_INTERPOLATE_PRODUCT_SINGLE ++#ifdef OVERRIDE_INTERPOLATE_PRODUCT_SINGLE ++ if (!moz_speex_have_single_simd()) { ++#endif + int j; + spx_word32_t accum[4] = {0,0,0,0}; + + for(j=0;jsinc_table[4+(j+1)*st->oversample-offset-2]); + accum[1] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset-1]); + accum[2] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset]); + accum[3] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset+1]); + } + + cubic_coef(frac, interp); + sum = MULT16_32_Q15(interp[0],SHR32(accum[0], 1)) + MULT16_32_Q15(interp[1],SHR32(accum[1], 1)) + MULT16_32_Q15(interp[2],SHR32(accum[2], 1)) + MULT16_32_Q15(interp[3],SHR32(accum[3], 1)); + sum = SATURATE32PSHR(sum, 15, 32767); +-#else ++#ifdef OVERRIDE_INTERPOLATE_PRODUCT_SINGLE ++ } else { + cubic_coef(frac, interp); + sum = interpolate_product_single(iptr, st->sinc_table + st->oversample + 4 - offset - 2, N, st->oversample, interp); ++ } + #endif + + out[out_stride * out_sample++] = sum; + last_sample += int_advance; + samp_frac_num += frac_advance; + if (samp_frac_num >= den_rate) + { + samp_frac_num -= den_rate; +@@ -521,33 +527,37 @@ static int resampler_basic_interpolate_d + #ifdef FIXED_POINT + const spx_word16_t frac = PDIV32(SHL32((samp_frac_num*st->oversample) % st->den_rate,15),st->den_rate); + #else + const spx_word16_t frac = ((float)((samp_frac_num*st->oversample) % st->den_rate))/st->den_rate; + #endif + spx_word16_t interp[4]; + + +-#ifndef OVERRIDE_INTERPOLATE_PRODUCT_DOUBLE ++#ifdef OVERRIDE_INTERPOLATE_PRODUCT_DOUBLE ++ if (!moz_speex_have_double_simd()) { ++#endif + int j; + double accum[4] = {0,0,0,0}; + + for(j=0;jsinc_table[4+(j+1)*st->oversample-offset-2]); + accum[1] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset-1]); + accum[2] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset]); + accum[3] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset+1]); + } + + cubic_coef(frac, interp); + sum = MULT16_32_Q15(interp[0],accum[0]) + MULT16_32_Q15(interp[1],accum[1]) + MULT16_32_Q15(interp[2],accum[2]) + MULT16_32_Q15(interp[3],accum[3]); +-#else ++#ifdef OVERRIDE_INTERPOLATE_PRODUCT_DOUBLE ++ } else { + cubic_coef(frac, interp); + sum = interpolate_product_double(iptr, st->sinc_table + st->oversample + 4 - offset - 2, N, st->oversample, interp); ++ } + #endif + + out[out_stride * out_sample++] = PSHR32(sum,15); + last_sample += int_advance; + samp_frac_num += frac_advance; + if (samp_frac_num >= den_rate) + { + samp_frac_num -= den_rate; +diff --git a/media/libspeex_resampler/src/resample_neon.c b/media/libspeex_resampler/src/resample_neon.c +--- a/media/libspeex_resampler/src/resample_neon.c ++++ b/media/libspeex_resampler/src/resample_neon.c +@@ -31,16 +31,18 @@ + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + ++#include "simd_detect.h" ++ + #include + + #ifdef FIXED_POINT + #ifdef __thumb2__ + static inline int32_t saturate_32bit_to_16bit(int32_t a) { + int32_t ret; + asm ("ssat %[ret], #16, %[a]" + : [ret] "=&r" (ret) +@@ -60,17 +62,17 @@ static inline int32_t saturate_32bit_to_ + return ret; + } + #endif + #undef WORD2INT + #define WORD2INT(x) (saturate_32bit_to_16bit(x)) + + #define OVERRIDE_INNER_PRODUCT_SINGLE + /* Only works when len % 4 == 0 */ +-static inline int32_t inner_product_single(const int16_t *a, const int16_t *b, unsigned int len) ++int32_t inner_product_single(const int16_t *a, const int16_t *b, unsigned int len) + { + int32_t ret; + uint32_t remainder = len % 16; + len = len - remainder; + + asm volatile (" cmp %[len], #0\n" + " bne 1f\n" + " vld1.16 {d16}, [%[b]]!\n" +@@ -134,17 +136,17 @@ static inline int32_t saturate_float_to_ + : "q0"); + return ret; + } + #undef WORD2INT + #define WORD2INT(x) (saturate_float_to_16bit(x)) + + #define OVERRIDE_INNER_PRODUCT_SINGLE + /* Only works when len % 4 == 0 */ +-static inline float inner_product_single(const float *a, const float *b, unsigned int len) ++float inner_product_single(const float *a, const float *b, unsigned int len) + { + float ret; + uint32_t remainder = len % 16; + len = len - remainder; + + asm volatile (" cmp %[len], #0\n" + " bne 1f\n" + " vld1.32 {q4}, [%[b]]!\n" +diff --git a/media/libspeex_resampler/src/resample_sse.c b/media/libspeex_resampler/src/resample_sse.c +--- a/media/libspeex_resampler/src/resample_sse.c ++++ b/media/libspeex_resampler/src/resample_sse.c +@@ -29,37 +29,39 @@ + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + ++#include "simd_detect.h" ++ + #include + + #define OVERRIDE_INNER_PRODUCT_SINGLE +-static inline float inner_product_single(const float *a, const float *b, unsigned int len) ++float inner_product_single(const float *a, const float *b, unsigned int len) + { + int i; + float ret; + __m128 sum = _mm_setzero_ps(); + for (i=0;i + #define OVERRIDE_INNER_PRODUCT_DOUBLE + +-static inline double inner_product_double(const float *a, const float *b, unsigned int len) ++double inner_product_double(const float *a, const float *b, unsigned int len) + { + int i; + double ret; + __m128d sum = _mm_setzero_pd(); + __m128 t; + for (i=0;i #ifdef FIXED_POINT @@ -65,7 +67,7 @@ static inline int32_t saturate_32bit_to_16bit(int32_t a) { #define OVERRIDE_INNER_PRODUCT_SINGLE /* Only works when len % 4 == 0 */ -static inline int32_t inner_product_single(const int16_t *a, const int16_t *b, unsigned int len) +int32_t inner_product_single(const int16_t *a, const int16_t *b, unsigned int len) { int32_t ret; uint32_t remainder = len % 16; @@ -139,7 +141,7 @@ static inline int32_t saturate_float_to_16bit(float a) { #define OVERRIDE_INNER_PRODUCT_SINGLE /* Only works when len % 4 == 0 */ -static inline float inner_product_single(const float *a, const float *b, unsigned int len) +float inner_product_single(const float *a, const float *b, unsigned int len) { float ret; uint32_t remainder = len % 16; diff --git a/media/libspeex_resampler/src/resample_sse.h b/media/libspeex_resampler/src/resample_sse.c similarity index 89% rename from media/libspeex_resampler/src/resample_sse.h rename to media/libspeex_resampler/src/resample_sse.c index 64be8a161612..2eb7929ce21b 100644 --- a/media/libspeex_resampler/src/resample_sse.h +++ b/media/libspeex_resampler/src/resample_sse.c @@ -34,10 +34,12 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +#include "simd_detect.h" + #include #define OVERRIDE_INNER_PRODUCT_SINGLE -static inline float inner_product_single(const float *a, const float *b, unsigned int len) +float inner_product_single(const float *a, const float *b, unsigned int len) { int i; float ret; @@ -54,7 +56,7 @@ static inline float inner_product_single(const float *a, const float *b, unsigne } #define OVERRIDE_INTERPOLATE_PRODUCT_SINGLE -static inline float interpolate_product_single(const float *a, const float *b, unsigned int len, const spx_uint32_t oversample, float *frac) { +float interpolate_product_single(const float *a, const float *b, unsigned int len, const spx_uint32_t oversample, float *frac) { int i; float ret; __m128 sum = _mm_setzero_ps(); @@ -75,7 +77,7 @@ static inline float interpolate_product_single(const float *a, const float *b, u #include #define OVERRIDE_INNER_PRODUCT_DOUBLE -static inline double inner_product_double(const float *a, const float *b, unsigned int len) +double inner_product_double(const float *a, const float *b, unsigned int len) { int i; double ret; @@ -97,7 +99,7 @@ static inline double inner_product_double(const float *a, const float *b, unsign } #define OVERRIDE_INTERPOLATE_PRODUCT_DOUBLE -static inline double interpolate_product_double(const float *a, const float *b, unsigned int len, const spx_uint32_t oversample, float *frac) { +double interpolate_product_double(const float *a, const float *b, unsigned int len, const spx_uint32_t oversample, float *frac) { int i; double ret; __m128d sum; diff --git a/media/libspeex_resampler/src/sse_detect.cpp b/media/libspeex_resampler/src/simd_detect.cpp similarity index 58% rename from media/libspeex_resampler/src/sse_detect.cpp rename to media/libspeex_resampler/src/simd_detect.cpp index b37112b66075..50111273b591 100644 --- a/media/libspeex_resampler/src/sse_detect.cpp +++ b/media/libspeex_resampler/src/simd_detect.cpp @@ -3,13 +3,25 @@ * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ -#include "mozilla/SSE.h" -#include "sse_detect.h" +#include "simd_detect.h" -int moz_has_sse2() { +#include "mozilla/SSE.h" +#include "mozilla/arm.h" + +#ifdef _USE_SSE2 +int moz_speex_have_double_simd() { return mozilla::supports_sse2() ? 1 : 0; } +#endif -int moz_has_sse() { +#ifdef _USE_SSE +int moz_speex_have_single_simd() { return mozilla::supports_sse() ? 1 : 0; } +#endif + +#ifdef _USE_NEON +int moz_speex_have_single_simd() { + return mozilla::supports_neon() ? 1 : 0; +} +#endif diff --git a/media/libspeex_resampler/src/simd_detect.h b/media/libspeex_resampler/src/simd_detect.h new file mode 100644 index 000000000000..f563b82b9e3c --- /dev/null +++ b/media/libspeex_resampler/src/simd_detect.h @@ -0,0 +1,43 @@ +/* vim: set shiftwidth=2 tabstop=8 autoindent cindent expandtab: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef simd_detect_h +#define simd_detect_h + +#include "speex_resampler.h" +#include "arch.h" + +#ifdef __cplusplus +extern "C" { +#endif + +int moz_speex_have_single_simd(); +int moz_speex_have_double_simd(); + +#if defined(_USE_SSE) || defined(_USE_NEON) +#define OVERRIDE_INNER_PRODUCT_SINGLE +#define inner_product_single CAT_PREFIX(RANDOM_PREFIX,_inner_product_single) +spx_word32_t inner_product_single(const spx_word16_t *a, const spx_word16_t *b, unsigned int len); +#endif +#if defined(_USE_SSE) +#define OVERRIDE_INTERPOLATE_PRODUCT_SINGLE +#define interpolate_product_single CAT_PREFIX(RANDOM_PREFIX,_interpolate_product_single) +spx_word32_t interpolate_product_single(const spx_word16_t *a, const spx_word16_t *b, unsigned int len, const spx_uint32_t oversample, float *frac); +#endif + +#if defined(_USE_SSE2) +#define OVERRIDE_INNER_PRODUCT_DOUBLE +#define inner_product_double CAT_PREFIX(RANDOM_PREFIX,_inner_product_double) +double inner_product_double(const float *a, const float *b, unsigned int len); +#define OVERRIDE_INTERPOLATE_PRODUCT_DOUBLE +#define interpolate_product_double CAT_PREFIX(RANDOM_PREFIX,_interpolate_product_double) +double interpolate_product_double(const float *a, const float *b, unsigned int len, const spx_uint32_t oversample, float *frac); +#endif + +#ifdef __cplusplus +} +#endif + +#endif // simd_detect_h diff --git a/media/libspeex_resampler/src/sse_detect.h b/media/libspeex_resampler/src/sse_detect.h deleted file mode 100644 index b246bb5c7c24..000000000000 --- a/media/libspeex_resampler/src/sse_detect.h +++ /dev/null @@ -1,20 +0,0 @@ -/* vim: set shiftwidth=2 tabstop=8 autoindent cindent expandtab: */ -/* This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ - -#ifndef SSE_DETECT -#define SSE_DETECT - -#ifdef __cplusplus -extern "C" { -#endif - - int moz_has_sse2(); - int moz_has_sse(); - -#ifdef __cplusplus -} -#endif - -#endif // SSE_DETECT diff --git a/media/libspeex_resampler/sse-detect-runtime.patch b/media/libspeex_resampler/sse-detect-runtime.patch deleted file mode 100644 index f24f07ee0608..000000000000 --- a/media/libspeex_resampler/sse-detect-runtime.patch +++ /dev/null @@ -1,192 +0,0 @@ -diff --git a/media/libspeex_resampler/src/resample.c b/media/libspeex_resampler/src/resample.c ---- a/media/libspeex_resampler/src/resample.c -+++ b/media/libspeex_resampler/src/resample.c -@@ -90,18 +90,28 @@ static void speex_free (void *ptr) {free - - #define IMAX(a,b) ((a) > (b) ? (a) : (b)) - #define IMIN(a,b) ((a) < (b) ? (a) : (b)) - - #ifndef NULL - #define NULL 0 - #endif - -+#include "sse_detect.h" -+ -+/* We compile SSE code on x86 all the time, but we only use it if we find at -+ * runtime that the CPU supports it. */ - #ifdef _USE_SSE -+#ifdef _MSC_VER -+#define inline __inline -+#endif - #include "resample_sse.h" -+#ifdef _MSC_VER -+#undef inline -+#endif - #endif - - #ifdef _USE_NEON - #include "resample_neon.h" - #endif - - /* Numer of elements to allocate on the stack */ - #ifdef VAR_ARRAYS -@@ -342,17 +352,19 @@ static int resampler_basic_direct_single - const spx_uint32_t den_rate = st->den_rate; - spx_word32_t sum; - - while (!(last_sample >= (spx_int32_t)*in_len || out_sample >= (spx_int32_t)*out_len)) - { - const spx_word16_t *sinct = & sinc_table[samp_frac_num*N]; - const spx_word16_t *iptr = & in[last_sample]; - --#ifndef OVERRIDE_INNER_PRODUCT_SINGLE -+#ifdef OVERRIDE_INNER_PRODUCT_SINGLE -+ if (!moz_has_sse()) { -+#endif - int j; - sum = 0; - for(j=0;j= den_rate) - { - samp_frac_num -= den_rate; -@@ -400,29 +414,33 @@ static int resampler_basic_direct_double - const spx_uint32_t den_rate = st->den_rate; - double sum; - - while (!(last_sample >= (spx_int32_t)*in_len || out_sample >= (spx_int32_t)*out_len)) - { - const spx_word16_t *sinct = & sinc_table[samp_frac_num*N]; - const spx_word16_t *iptr = & in[last_sample]; - --#ifndef OVERRIDE_INNER_PRODUCT_DOUBLE -+#ifdef OVERRIDE_INNER_PRODUCT_DOUBLE -+ if(moz_has_sse2()) { -+#endif - int j; - double accum[4] = {0,0,0,0}; - - for(j=0;j= den_rate) - { - samp_frac_num -= den_rate; -@@ -456,34 +474,38 @@ static int resampler_basic_interpolate_s - #ifdef FIXED_POINT - const spx_word16_t frac = PDIV32(SHL32((samp_frac_num*st->oversample) % st->den_rate,15),st->den_rate); - #else - const spx_word16_t frac = ((float)((samp_frac_num*st->oversample) % st->den_rate))/st->den_rate; - #endif - spx_word16_t interp[4]; - - --#ifndef OVERRIDE_INTERPOLATE_PRODUCT_SINGLE -+#ifdef OVERRIDE_INTERPOLATE_PRODUCT_SINGLE -+ if (!moz_has_sse()) { -+#endif - int j; - spx_word32_t accum[4] = {0,0,0,0}; - - for(j=0;jsinc_table[4+(j+1)*st->oversample-offset-2]); - accum[1] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset-1]); - accum[2] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset]); - accum[3] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset+1]); - } - - cubic_coef(frac, interp); - sum = MULT16_32_Q15(interp[0],SHR32(accum[0], 1)) + MULT16_32_Q15(interp[1],SHR32(accum[1], 1)) + MULT16_32_Q15(interp[2],SHR32(accum[2], 1)) + MULT16_32_Q15(interp[3],SHR32(accum[3], 1)); - sum = SATURATE32PSHR(sum, 15, 32767); --#else -+#ifdef OVERRIDE_INTERPOLATE_PRODUCT_SINGLE -+ } else { - cubic_coef(frac, interp); - sum = interpolate_product_single(iptr, st->sinc_table + st->oversample + 4 - offset - 2, N, st->oversample, interp); -+ } - #endif - - out[out_stride * out_sample++] = sum; - last_sample += int_advance; - samp_frac_num += frac_advance; - if (samp_frac_num >= den_rate) - { - samp_frac_num -= den_rate; -@@ -519,33 +541,37 @@ static int resampler_basic_interpolate_d - #ifdef FIXED_POINT - const spx_word16_t frac = PDIV32(SHL32((samp_frac_num*st->oversample) % st->den_rate,15),st->den_rate); - #else - const spx_word16_t frac = ((float)((samp_frac_num*st->oversample) % st->den_rate))/st->den_rate; - #endif - spx_word16_t interp[4]; - - --#ifndef OVERRIDE_INTERPOLATE_PRODUCT_DOUBLE -+#ifdef OVERRIDE_INTERPOLATE_PRODUCT_DOUBLE -+ if (!moz_has_sse2()) { -+#endif - int j; - double accum[4] = {0,0,0,0}; - - for(j=0;jsinc_table[4+(j+1)*st->oversample-offset-2]); - accum[1] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset-1]); - accum[2] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset]); - accum[3] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset+1]); - } - - cubic_coef(frac, interp); - sum = MULT16_32_Q15(interp[0],accum[0]) + MULT16_32_Q15(interp[1],accum[1]) + MULT16_32_Q15(interp[2],accum[2]) + MULT16_32_Q15(interp[3],accum[3]); --#else -+#ifdef OVERRIDE_INTERPOLATE_PRODUCT_DOUBLE -+ } else { - cubic_coef(frac, interp); - sum = interpolate_product_double(iptr, st->sinc_table + st->oversample + 4 - offset - 2, N, st->oversample, interp); -+ } - #endif - - out[out_stride * out_sample++] = PSHR32(sum,15); - last_sample += int_advance; - samp_frac_num += frac_advance; - if (samp_frac_num >= den_rate) - { - samp_frac_num -= den_rate; diff --git a/media/libspeex_resampler/update.sh b/media/libspeex_resampler/update.sh index 721ce41b435f..6f1f5007b7e6 100644 --- a/media/libspeex_resampler/update.sh +++ b/media/libspeex_resampler/update.sh @@ -10,8 +10,8 @@ set -e -x cp $1/libspeexdsp/resample.c src -cp $1/libspeexdsp/resample_sse.h src -cp $1/libspeexdsp/resample_neon.h src +cp $1/libspeexdsp/resample_sse.h src/resample_sse.c +cp $1/libspeexdsp/resample_neon.h src/resample_neon.c cp $1/libspeexdsp/arch.h src cp $1/libspeexdsp/stack_alloc.h src cp $1/libspeexdsp/fixed_generic.h src @@ -21,6 +21,6 @@ cp $1/COPYING . # apply outstanding local patches patch -p3 < outside-speex.patch -patch -p3 < sse-detect-runtime.patch +patch -p3 < simd-detect-runtime.patch patch -p3 < set-skip-frac.patch patch -p3 < hugemem.patch